### Name: nucleotideFrequency
### Title: Calculate the frequency of oligonucleotides in a DNA or RNA
###   sequence, plus some related functions
### Aliases: oligonucleotideFrequency oligonucleotideFrequency
###   oligonucleotideFrequency,XString-method
###   oligonucleotideFrequency,XStringSet-method
###   oligonucleotideFrequency,XStringViews-method
###   oligonucleotideFrequency,MaskedXString-method dinucleotideFrequency
###   trinucleotideFrequency nucleotideFrequencyAt
###   nucleotideFrequencyAt,XStringSet-method
###   nucleotideFrequencyAt,XStringViews-method oligonucleotideTransitions
###   mkAllStrings
### Keywords: methods manip

### ** Examples

  ## ---------------------------------------------------------------------
  ## A. BASIC *Frequency() EXAMPLES
  ## ---------------------------------------------------------------------
  data(yeastSEQCHR1)
  yeast1 <- DNAString(yeastSEQCHR1)

  dinucleotideFrequency(yeast1)
  trinucleotideFrequency(yeast1)
  oligonucleotideFrequency(yeast1, 4)

  ## Get the less and most represented 6-mers:
  f6 <- oligonucleotideFrequency(yeast1, 6)
  f6[f6 == min(f6)]
  f6[f6 == max(f6)]

  ## Get the result as an array:
  tri <- trinucleotideFrequency(yeast1, as.array=TRUE)
  tri["A", "A", "C"] # == trinucleotideFrequency(yeast1)["AAC"]
  tri["T", , ] # frequencies of trinucleotides starting with a "T"

  ## With input made of multiple sequences:
  library(drosophila2probe)
  probes <- DNAStringSet(drosophila2probe$sequence)
  dfmat <- dinucleotideFrequency(probes)  # a big matrix
  dinucleotideFrequency(probes, simplify.as="collapsed")
  dinucleotideFrequency(probes, simplify.as="collapsed", as.matrix=TRUE)

  ## ---------------------------------------------------------------------
  ## B. nucleotideFrequencyAt()
  ## ---------------------------------------------------------------------
  nucleotideFrequencyAt(probes, 13)
  nucleotideFrequencyAt(probes, c(13, 20))
  nucleotideFrequencyAt(probes, c(13, 20), as.array=FALSE)

  ## nucleotideFrequencyAt() can be used to answer questions like: "how
  ## many probes in the drosophila2 chip have T, G, T, A at position
  ## 2, 4, 13 and 20, respectively?"
  nucleotideFrequencyAt(probes, c(2, 4, 13, 20))["T", "G", "T", "A"]
  ## or "what's the probability to have an A at position 25 if there is
  ## one at position 13?"
  nf <- nucleotideFrequencyAt(probes, c(13, 25))
  sum(nf["A", "A"]) / sum(nf["A", ])
  ## Probabilities to have other bases at position 25 if there is an A
  ## at position 13:
  sum(nf["A", "C"]) / sum(nf["A", ])  # C
  sum(nf["A", "G"]) / sum(nf["A", ])  # G
  sum(nf["A", "T"]) / sum(nf["A", ])  # T

  ## See ?hasLetterAt for another way to get those results.

  ## ---------------------------------------------------------------------
  ## C. oligonucleotideTransitions()
  ## ---------------------------------------------------------------------
  ## Get nucleotide transition matrices for yeast1
  oligonucleotideTransitions(yeast1)
  oligonucleotideTransitions(yeast1, 2, freq=TRUE)

  ## ---------------------------------------------------------------------
  ## D. ADVANCED *Frequency() EXAMPLES
  ## ---------------------------------------------------------------------
  ## Note that when dropping the dimensions of the 'tri' array, elements
  ## in the resulting vector are ordered as if they were obtained with
  ## 'fast.moving.side="left"':
  triL <- trinucleotideFrequency(yeast1, fast.moving.side="left")
  all(as.vector(tri) == triL) # TRUE

  ## Convert the trinucleotide frequency into the amino acid frequency
  ## based on translation:
  tri1 <- trinucleotideFrequency(yeast1)
  names(tri1) <- GENETIC_CODE[names(tri1)]
  sapply(split(tri1, names(tri1)), sum) # 12512 occurrences of the stop codon

  ## When the returned vector is very long (e.g. width >= 10), using
  ## 'with.labels=FALSE' can improve performance significantly.
  ## Here for example, the observed speed up is between 25x and 500x:
  f12 <- oligonucleotideFrequency(yeast1, 12, with.labels=FALSE) # very fast!

  ## Spome related functions:
  dict1 <- mkAllStrings(LETTERS[1:3], 4)
  dict2 <- mkAllStrings(LETTERS[1:3], 4, fast.moving.side="left")
  identical(reverse(dict1), dict2) # TRUE 



