### Name: BSgenome-class
### Title: The BSgenome class
### Aliases: class:BSgenome BSgenome-class BSgenome organism
###   organism,BSgenome-method species species,BSgenome-method provider
###   provider,BSgenome-method providerVersion
###   providerVersion,BSgenome-method releaseDate
###   releaseDate,BSgenome-method releaseName releaseName,BSgenome-method
###   sourceUrl sourceUrl,BSgenome-method seqnames seqnames,BSgenome-method
###   seqlengths seqlengths,BSgenome-method mseqnames
###   mseqnames,BSgenome-method names,BSgenome-method masknames
###   masknames,BSgenome-method BSgenome show,BSgenome-method
###   length,BSgenome-method [[,BSgenome-method [[<-,BSgenome-method
###   $,BSgenome-method
### Keywords: methods classes

### ** Examples

  ## Loading a BSgenome data package doesn't load its sequences
  ## into memory:
  library(BSgenome.Celegans.UCSC.ce2)

  ## Number of sequences in this genome:
  length(Celegans) 

  ## Display a summary of the sequences:
  Celegans

  ## Index of single sequences:
  seqnames(Celegans)

  ## Lengths (i.e. number of nucleotides) of the sequences:
  seqlengths(Celegans)

  ## Load chromosome I from disk to memory (hence takes some time)
  ## and keep a reference to it:
  chrI <- Celegans[["chrI"]]  # equivalent to Celegans$chrI

  chrI

  class(chrI)   # a DNAString instance
  length(chrI)  # with 15080483 nucleotides

  ## Multiple sequences:
  mseqnames(Celegans) 
  upstream1000 <- Celegans$upstream1000
  upstream1000
  class(upstream1000)  # a DNAStringSet instance
  ## Character vector containing the description lines of the first
  ## 4 sequences in the original FASTA file:
  names(upstream1000)[1:4]

  ## ---------------------------------------------------------------------
  ## PASS-BY-ADDRESS SEMANTIC, CACHING AND MEMORY USAGE
  ## ---------------------------------------------------------------------

  ## We want a message to be printed each time a sequence is removed
  ## from the cache:
  options(verbose=TRUE)

  gc()  # nothing seems to be removed from the cache
  rm(chrI, upstream1000)
  gc()  # chrI and upstream1000 are removed from the cache (they are
        # not in use anymore)

  options(verbose=FALSE)

  ## Get the current amount of data in memory (in Mb):
  mem0 <- gc()["Vcells", "(Mb)"]

  system.time(chrV <- Celegans[["chrV"]])  # read from disk
  
  gc()["Vcells", "(Mb)"] - mem0  # chrV occupies 20Mb in memory

  system.time(tmp <- Celegans[["chrV"]])  # much faster! (sequence
                                          # is in the cache)

  gc()["Vcells", "(Mb)"] - mem0  # we're still using 20Mb (sequences
                                 # have a pass-by-address semantic
                                 # i.e. the sequence data are not
                                 # duplicated)
  
  ## subseq() doesn't copy the sequence data either, hence it is very
  ## fast and memory efficient (but the returned object will hold a
  ## reference to chrV):
  y <- subseq(chrV, 10, 8000000) 
  gc()["Vcells", "(Mb)"] - mem0

  ## We must remove all references to chrV before it can be removed from
  ## the cache (so the 20Mb of memory used by this sequence are freed).
  options(verbose=TRUE)
  rm(chrV, tmp)
  gc()

  ## Remember that 'y' holds a reference to chrV too:
  rm(y)
  gc()

  options(verbose=FALSE)
  gc()["Vcells", "(Mb)"] - mem0



