\name{basecontent}
\alias{basecontent}
\title{Obtain the ATCG content of a gene}
\description{
  This function accepts a character string representing the base
  sequence of the DNA for a gene and computes the counts of each base.
}
\usage{
basecontent(seq)
}
\arguments{
  \item{seq}{Character vector.}
}
\details{
  Each element of \code{x} is separately decomposed into its base
  sequence components.
}
\value{
  A matrix with 4 columns and \code{length(x)} rows.
  The columns are names \code{A}, \code{C}, \code{T},
  \code{G}, and the values in each column are the counts
  of the corresponding bases in the elements of \code{x}. 

  The elements of \code{x} can be in upper case, lower case
  or mixed.
}

\author{R. Gentleman, W. Huber}

\seealso{\code{\link{complementSeq}},\code{\link{reverseSeq}}}

\examples{
 v<-c("AAACT", "GGGTT", "ggAtT")
 basecontent(v)
}
\keyword{manip}

\eof
\name{combine}
\alias{combine}
\title{A function to combine data from different Affymetrix genechip types
  into one AffyBatch.}
\description{
  The function takes two AffyBatches and their respective probe sequence
  information and merges the intensities from the matching probes only
  into one AffyBatch. 
  }
\usage{
combine(batch, probepkg, newcdf, verbose=TRUE)
}
\arguments{
  \item{batch}{A \code{list} of two AffyBatches.}
  \item{probepkg}{A character vector of length 2, containing the names
    of the probe sequences libraries (for example, \code{hgu133aprobe})
    that go with the AffyBatches.}
  \item{newcdf}{The name of the name CDF environment that is to be created.}
  \item{verbose}{If TRUE, messages are printed to the console.}
}
\details{
  This function is EXPERIMENTAL; no guarantee that the resulting data
  makes biological sense.
  
  You may want to have a look at the code of this function and at
  intermediate results.

  Presently the code is written for two chiptypes, but if necessary can
  now be easily generalized to \code{n}>=2.
}
\value{
  A list with two elements: \code{dat}, an \code{\link{AffyBatch}}, and
  \code{cdf}, an environment, which contains a "CDF" probe-set to probe
  mapping (as for ordinary genechips).
  cdf is obtrained by restricting the  probe-set to probe of the
  \bold{second} \code{batch} and \code{probelib}.
  
  A vignette is being written.
}
\author{R. Gentleman, Wolfgang Huber}


\examples{
  ## 
}
\keyword{manip}

\eof
\name{complementSeq}
\alias{complementSeq}

\title{Complementary sequence.}
\description{Function to obtain the complementary sequence.}
\usage{
complementSeq(seq, start=1, stop=0)
}
\arguments{
  \item{seq}{Character vector consisting of the letters A, C, G and T.}
  \item{start}{Numeric scalar: the sequence position at which to start
    complementing. If 1, start from the beginning.}
  \item{stop}{Numeric scalar: the sequence position at which to stop
    complementing. If 0, go until the end.}
}
\details{
  The complemented sequence for each element of the input is computed and
  returned.  The complement is given by the mapping:
  A -> T, C -> G, G -> C, T -> A.

  An important special case is \code{start=13}, \code{stop=13}:
  If \code{seq} is a vector of 25mer sequences on an Affymetrix
  GeneChip, \code{complementSeq(seq, start=13, stop=13)}
  calculates the so-called \emph{mismatch} sequences..

  The function deals only with sequences that represent DNA.
  These can consist only of the letters  \code{A}, \code{C}, \code{T}
  or \code{G}. Upper, lower or mixed case is allowed and honored.
  
}
\value{
  A character vector of the same length as \code{seq} is
  returned. Each component represents the transformed sequence for the
  input value.
}

\author{R. Gentleman, W. Huber}

\seealso{\code{\link{basecontent}}, \code{link{reverseSeq}}}

\examples{
 seq <- c("AAACT", "GGGTT")
 complementSeq(seq)

 seq <- c("CGACTGAGACCAAGACCTACAACAG", "CCCGCATCATCTTTCCTGTGCTCTT")
 complementSeq(seq, start=13, stop=13)
}
\keyword{manip}

\eof
\name{getProbeDataAffy}
\alias{getProbeDataAffy}
\title{Read a data file describing the probe sequences on an Affymetrix genechip}
\description{Read a data file describing the probe sequences on an Affymetrix genechip}
\usage{
getProbeDataAffy(arraytype, datafile, pkgname = NULL, comparewithcdf = TRUE)
}
\arguments{
  \item{arraytype}{Character. Array type (e.g. 'HG-U133A')}
  \item{datafile}{Character. The filename of the input data file.
    If omitted a default name is constructed from \code{arraytype}
    (see this function's source code).}
  \item{pkgname}{Character. Package name. If NULL the name is derived
    from \code{arraytype}.}
  \item{comparewithcdf}{Logical. If TRUE, run a consistency check
    against a CDF package of the same name (what used to be Laurent's
    "extraparanoia".)}
}
\details{This function serves as an interface between
  the (1) representation of array probe information data in the packages
  that are generated by \code{\link{makeProbePackage}} and (2) the
  vendor- and possibly version-specific way the data are represented
  in \code{datafile}.

  \code{datafile} is a tabulator-separated file with one row per probe,
  and column names \code{'Probe X'}, \code{'Probe Y'},
  \code{'Probe Sequence'}, and \code{'Probe.Set.Name'}. See the vignette
  for an example.
}

\value{A list with three components
  \itemize{
    \item{dataEnv}{an environment which contains the data frame with the
      probe sequences and the other probe data.}
    \item{symVal}{a named list of symbol value substitutions which can
      be used to customize the man pages. See
      \code{\link[Biobase:createPackage]{createPackage}}.}
    \item{pkgname}{a character with the package name; will be the same
      as the function parameter \code{pkgname} if it was specified;
      otherwise, the name is constructed from the parameter \code{arraytype}.}
  }
}

\seealso{makeProbePackage}
\examples{
  ## Please refer to the vignette
}
\keyword{IO}
\keyword{utilities}

\eof
\name{makeProbePackage}
\alias{makeProbePackage}
\title{Make a package with probe sequence related data for microarrays}
\description{Make a package with probe sequence related data for microarrays}
\usage{
makeProbePackage(arraytype,
    importfun = "getProbeDataAffy",
    maintainer,
    version,
    pkgname = NULL,
    outdir  = ".",
    force = FALSE, quiet = FALSE,
    check = TRUE, build = TRUE, unlink = TRUE, \dots)
}
\arguments{
  \item{arraytype}{Character. Name of array type (typically a vendor's
    name like "HG-U133A").}
  \item{importfun}{Character. Name of a function that can read the
    probe sequence data e.g. from a file. See
    \code{\link[matchprobes:getProbeDataAffy]{getProbeDataAffy}} for
    an example.}
  \item{maintainer}{Character. Name and email address of the maintainer.}
  \item{version}{Character. Version number for the package.}
  \item{pkgname}{Character. Name of the package. If missing, a name is
    created from \code{arraytype}.}
  \item{outdir}{Character. Path where the package is to be written.}
  \item{force}{Logical. If \code{TRUE} overrides possible warnings}
  \item{quiet}{Logical. If \code{TRUE} do not print statements on progress on the console}
  \item{check}{Logical. If \code{TRUE} call \code{R CMD check} on the package}
  \item{build}{Logical. If \code{TRUE} call \code{R CMD build} on the package}
  \item{unlink}{Logical. If \code{TRUE} unlink (remove) the \code{check}
    directory (only relevant if \code{check=TRUE})}
  \item{\dots}{Further arguments that get passed along to \code{importfun}}
}
\details{See vignette.
}
\examples{
filename <- file.path(.path.package("matchprobes"), "data", "HG-U95Av2_probe_tab.gz")
outdir   <- tempdir()
me       <- "Wolfgang Huber <w.huber@dkfz.de>"
makeProbePackage("HG-U95Av2",
                 datafile   = gzfile(filename, open="r"),
                 outdir     = outdir,
                 maintainer = me, 
                 version    = "0.0.1", 
                 force      = TRUE)
dir(outdir)
}
\keyword{IO}
\keyword{utilities}

\eof
\name{matchprobes}
\alias{matchprobes}
\title{A function to match a query sequence to the sequences of a set of
  probes.}
\description{
  The \code{query} sequence, a character string (probably representing
  a transcript of interest), is scanned for the presence of exact
  matches to the sequences in the character vector \code{records}.
  The indices of the set of matches are returned.
}
\usage{
matchprobes(query, records, probepos=FALSE)
}
\arguments{
  \item{query}{A character vector. For example, each element may represent
    a gene (transcript) of interest.}
  \item{records}{A character vector. For example, each element may represent
    the probes on a DNA array.}
  \item{probepos}{A logical value. If TRUE, return also the start
    positions of the matches in the query sequence.}
}
\details{
  The matching is done using the C library function \code{strstr}. It
  might be nice to explore other possibilities.
}
\value{
  A list.
  Its first element is a list of the same length as the input vector.
  Each element of the list is a numeric vector containing the indices of
  the probes that have a perfect match in the query
  sequence.

  If \code{probepos} is TRUE,
  the returned list has a second element: it is of the same shape
  as described above, and gives the respective positions of the
  matches.
}
\author{R. Gentleman, Laurent Gautier, Wolfgang Huber}


\examples{
  ## The main intention for this function is together with the probe
  ## tables from the "probe" data packages, e.g.:
  ## > library(hgu95av2probe)
  ## > data(probe)
  ## > seq <- probe$sequence
  ##
  ## Since we do not want to be dependent on the presence of this 
  ## data package, for the sake of example we simply simulate some
  ## probe sequences:

  bases <- c("A", "C", "G", "T")
  seq   <- sapply(1:1000, function(x) paste(bases[ceiling(4*runif(256))], collapse=""))

  w1 <- seq[20:22]
  w2 <- complementSeq(w1, start=13, stop=13)
  w  <- c(w1, w2)

  matchprobes(w, seq)
  matchprobes(w, seq, probepos=TRUE)
}
\keyword{manip}

\eof
\name{print.probetable}
\alias{print.probetable}
\title{Print method for probetable objects}
\usage{
print.probetable(x, \dots)
}
\description{
  Prints class(x), nrow(x) and ncol(x), but not the elements of x.
  The motivation for having this method is that methods from the package
  \code{base} such as
  \code{\link[base:print.matrix]{print.matrix}} and
  \code{\link[base:print.data.frame]{print.data.frame}}
  will try to print the values of all elements of \code{x}, which can
  take inconveniently much time and screen space if \code{x} is large.
}
\arguments{
  \item{x}{an object of S3-class \code{probetable}.}
  \item{\dots}{further arguments that get ignored.}
}
\seealso{
  \code{\link[base:print.matrix]{print.matrix}},
  \code{\link[base:print.data.frame]{print.data.frame}}
}
\examples{
  a = as.data.frame(matrix(runif(1e6), ncol=1e3))
  class(a) = c("probetable", class(a))
  print(a)
  print(as.matrix(a[2:3, 4:6]))
}
\keyword{print}

\eof
\name{reverseSeq}
\alias{reverseSeq}

\title{Reverse Sequence.}
\description{Functions to obtain the reverse sequence}
\usage{
reverseSeq(seq)
}
\arguments{
  \item{seq}{Character vector.}
}
\details{
  The function reverses the order of the constituent
  character strings of its argument.
}
\value{
  A character vector of the same length as \code{seq}.
}

\author{R. Gentleman, W. Huber}

\seealso{\code{\link{basecontent}},\code{\link{complementSeq}}}

\examples{
 w <-  c("hey there", "you silly fool")
 reverseSeq(w)

 w <- "able was I ere I saw Elba"
 reverseSeq(w)
}
\keyword{manip}

\eof
