### Name: match-utils
### Title: Utility functions related to pattern matching
### Aliases: match-utils neditStartingAt neditEndingAt neditAt
###   isMatchingStartingAt isMatchingEndingAt isMatchingAt
###   neditStartingAt,character-method neditStartingAt,XString-method
###   neditStartingAt,XStringSet-method neditEndingAt,character-method
###   neditEndingAt,XString-method neditEndingAt,XStringSet-method
###   isMatchingStartingAt,character-method
###   isMatchingStartingAt,XString-method
###   isMatchingStartingAt,XStringSet-method
###   isMatchingEndingAt,character-method isMatchingEndingAt,XString-method
###   isMatchingEndingAt,XStringSet-method hasLetterAt mismatch
###   mismatch,ANY,XStringViews-method nmatch
###   nmatch,ANY,XStringViews-method nmismatch
###   nmismatch,ANY,XStringViews-method coverage,MaskedXString-method
###   coverage,MIndex-method nmismatchStartingAt nmismatchEndingAt
###   isMatching
### Keywords: methods

### ** Examples

  ## ---------------------------------------------------------------------
  ## hasLetterAt()
  ## ---------------------------------------------------------------------
  x <- DNAStringSet(c("AAACGT", "AACGT", "ACGT", "TAGGA"))
  hasLetterAt(x, "AAAAAA", 1:6)

  ## hasLetterAt() can be used to answer questions like: "which elements
  ## in 'x' have an A at position 2 and a G at position 4?"
  q1 <- hasLetterAt(x, "AG", c(2, 4))
  which(rowSums(q1) == 2)

  ## or "how many probes in the drosophila2 chip have T, G, T, A at
  ## position 2, 4, 13 and 20, respectively?"
  library(drosophila2probe)
  probes <- DNAStringSet(drosophila2probe$sequence)
  q2 <- hasLetterAt(probes, "TGTA", c(2, 4, 13, 20))
  sum(rowSums(q2) == 4)
  ## or "what's the probability to have an A at position 25 if there is
  ## one at position 13?"
  q3 <- hasLetterAt(probes, "AACGT", c(13, 25, 25, 25, 25))
  sum(q3[ , 1] & q3[ , 2]) / sum(q3[ , 1])
  ## Probabilities to have other bases at position 25 if there is an A
  ## at position 13:
  sum(q3[ , 1] & q3[ , 3]) / sum(q3[ , 1])  # C
  sum(q3[ , 1] & q3[ , 4]) / sum(q3[ , 1])  # G
  sum(q3[ , 1] & q3[ , 5]) / sum(q3[ , 1])  # T

  ## See ?nucleotideFrequencyAt for another way to get those results.

  ## ---------------------------------------------------------------------
  ## neditAt() / isMatchingAt()
  ## ---------------------------------------------------------------------
  subject <- DNAString("GTATA")

  ## Pattern "AT" matches subject "GTATA" at position 3 (exact match)
  neditAt("AT", subject, at=3)
  isMatchingAt("AT", subject, at=3)

  ## ... but not at position 1
  neditAt("AT", subject)
  isMatchingAt("AT", subject)

  ## ... unless we allow 1 mismatching letter (inexact match)
  isMatchingAt("AT", subject, max.mismatch=1)

  ## Here we look at 6 different starting positions and find 3 matches if
  ## we allow 1 mismatching letter
  isMatchingAt("AT", subject, at=0:5, max.mismatch=1)

  ## No match
  neditAt("NT", subject, at=1:4)
  isMatchingAt("NT", subject, at=1:4)

  ## 2 matches if N is interpreted as an ambiguity (fixed=FALSE)
  neditAt("NT", subject, at=1:4, fixed=FALSE)
  isMatchingAt("NT", subject, at=1:4, fixed=FALSE)

  ## max.mismatch != 0 and fixed=FALSE can be used together
  neditAt("NCA", subject, at=0:5, fixed=FALSE)
  isMatchingAt("NCA", subject, at=0:5, max.mismatch=1, fixed=FALSE)

  some_starts <- c(10:-10, NA, 6)
  subject <- DNAString("ACGTGCA")
  is_matching <- isMatchingAt("CAT", subject, at=some_starts, max.mismatch=1)
  some_starts[is_matching]

  ## ---------------------------------------------------------------------
  ## mismatch() / nmismatch()
  ## ---------------------------------------------------------------------
  m <- matchPattern("NCA", subject, max.mismatch=1, fixed=FALSE)
  mismatch("NCA", m)
  nmismatch("NCA", m)

  ## ---------------------------------------------------------------------
  ## coverage()
  ## ---------------------------------------------------------------------
  coverage(m)

  ## See ?matchPDict for examples of using coverage() on an MIndex object...



