.packageName <- "matchprobes"
##------------------------------------------------------------
## (c) Wolfgang Huber 2003
##------------------------------------------------------------
combine <- function(batch, probepkg, newcdf,
                    verbose=TRUE) {
  
   ## the "reference" chip type
   REFCHIP <- 1
  
  if(missing(newcdf))
    stop("Argument \"newcdf\" is missing, with no default.\nPlease specify the name of the new CDF environment.")

  require(affy)
  
  ## number of AffyBatches  
  nrbatch <- length(batch)
  stopifnot(length(probepkg)==nrbatch)

  ## number of arrays with each AffyBatch
  batchlength <- sapply(batch, length)
  
  upp   <- unique(probepkg)
  nrupp <- length(upp)
  if(nrupp<=1)
    stop("You have got nothing to combine.\n")
  
  for (lib in upp)
    library(lib, character.only=TRUE)
  
  uppname      = paste("package", upp, sep=":")
  probepkgname = paste("package", probepkg, sep=":")
  
  ## here we use the fact that the MM probes are always "one below"
  ## the PM probes, i.e. if PM is at (x,y), MM is at (x, y+1)
  pm.index <- function(x,y,nr)     y*nr + x + 1
  mm.index <- function(x,y,nr) (y+1)*nr + x + 1

  ## pm[[i]] and mm[[i]] map from the table probepkg[[i]] into the AffyBatch
  pm = mm = sequs = vector(mode="list", length=nrupp)
  for (i in 1:nrupp) {
    ibatch = match(upp[i], probepkg)
    if(verbose)
      cat(uppname[i], probepkg[ibatch], "\n", sep="\t")
    
    tmp <- get(upp[i], uppname[i])
    pm[[i]]  <- pm.index(tmp$x, tmp$y, nrow(batch[[ibatch]]))
    mm[[i]]  <- mm.index(tmp$x, tmp$y, nrow(batch[[ibatch]]))
    sequs[[i]] <- tmp$sequence
  }
  rm(tmp)
  
  ##----------------------------------------------------------------------
  ## cseqs: a logical vector of length = length(sequs[[REFCHIP]])
  ## TRUE if a probe is on all the arrays
  ##----------------------------------------------------------------------
  lseqs <- rep(TRUE, length(sequs[[REFCHIP]]))

  ## For i==REFCHIP, the %in% is redundant but the code is simpler so 
  for(i in 1:nrupp) {
    lseqs <-  lseqs & (sequs[[REFCHIP]] %in% sequs[[i]])
    if(verbose)
      cat(sum(lseqs), "probes in common between", paste(upp[1:i], collapse=", "), "and", probepkg[REFCHIP], "\n")
  }
  cseqs <- unique(sequs[[REFCHIP]][lseqs])
  nrcommon = length(cseqs)
  cat(nrcommon, "unique probes in common\n")

  ## pind: the matrix of first occurences of all unique probes
  pind <- matrix(NA, nrow=nrcommon, ncol=nrupp)
  for(i in 1:nrupp) {
    pind[,i] <- match(cseqs, sequs[[i]])
  }
  stopifnot(!any(is.na(pind)))

  ## debug: this may go away once the code has stabilized 
  for(i in 2:nrupp)
    stopifnot(all(sequs[[1]][pind[,1]] == sequs[[i]][pind[,i]]))
  stopifnot(!any(duplicated(sequs[[1]][pind[,1]])))
   
  ## create new CDF environment
  assign(newcdf, new.env(hash=TRUE, parent=NULL))

  use <- sequs[[REFCHIP]] %in% cseqs
  probesets <- split(sequs[[REFCHIP]][use],
                     f = get(probepkg[REFCHIP], probepkgname[REFCHIP])$Probe.Set.Name[use])
   
  psids <- names(probesets)

  for (i in 1:length(probesets)){
    ps <- probesets[[i]]  ## vector of sequences
    csi <- match(ps, cseqs)
    csi <- csi[!is.na(csi)]
    ## cat(psids[i], length(csi), "  ")
    if(length(csi)==0) next  ## probeset for which we have no information
    mat = cbind(csi, csi+length(cseqs))
    colnames(mat) = c("pm", "mm")
    assign(psids[i], mat, env=get(newcdf))
  }
   
  ##pind  = pind[ord,]
  ##psids = psids[ord]
  ##nrp = length(psids)
  ##j1  = 1
  ##while(j1 <= nrp) {
  ##  thepsid = psids[j1]
  ##  j2 <- j1
  ##  while(psids[j2]==thepsid && j2<=nrp)
  ##    j2 <- j2+1
  ##  jrange <- j1:(j2-1)
  ##  assign(thepsid, cbind(jrange, jrange+nrp), env=get(newcdf))
  ##  j1 <- j2
  ##}

  ## probepkg is the list of packages, and may contain the same ones several times
  ## upp are the unique ones
  ## pkgoff maps one to the other
  pkgoff = match(probepkg, upp)

  ## ----------------------------------------------------------------------
  ## join the expression value table
  x <- matrix(NA, nrow = nrcommon*2, ncol = sum(batchlength))
  j <- 0
  for(i in 1:nrbatch) {
    k <- pkgoff[i]
    x[, j + (1:batchlength[i])] <- exprs(batch[[i]])[c(pm[[k]][pind[,k]],
                                                       mm[[k]][pind[,k]]), ]
    j <- j + batchlength[i]
  }

  ## check!
  ##ps = "AFFX-HUMGAPDH/M33197_3_st"
  ##library(hu6800cdf)
  ##library(hgu95acdf)
  ##prp = get(ps, hu6800cdf)[, 1]
  ##prb = get(ps, hgu95acdf)[, 1])
  ##prk = get(ps, get(newcdf))[, 1])
  ##if(!all(x[prk,] == cbind(exprs(batch[[1]])[prp,], exprs(batch[[2]])[prb,])))
  ##  browser()
  
  lapply(uppname, function(x) { do.call("detach", list(x)) })

  return(list(dat = new("AffyBatch", exprs=x, cdfName = newcdf,
                        nrow=0,ncol=0),
              cdf = get(newcdf)))
}
## A function that reads tab-delimited probe sequence
## (and other stuff) files from Affymetrix
getProbeDataAffy <- function(arraytype, datafile,
                             pkgname = NULL, comparewithcdf = TRUE)
{  
  require(affy) || stop("Could not load library affy.")

  if(missing(datafile))
    datafile <- paste(arraytype, "_probe_tab", sep="")
  
  arraytype = cleancdfname(arraytype, addcdf=FALSE)
  cdfname   = cleancdfname(arraytype)
  if (is.null(pkgname))
    pkgname = paste(arraytype, "probe", sep="")

  ##LG: if (identical(grep(paste("^", .Platform$file.sep, sep=""), datafile), integer(0)))
  ##LG:   datafile <- file.path(datadir, datafile)
  ##WH: Laurent, please leave this code inactive.
  ##WH: datafile may also be a connection, and this code would break with that.
  ##WH: I like to keep things simple. If you need to paste a directory path
  ##WH: in front of 'datafile', then please do so before calling this function.
  
  what = list("character", "numeric", "numeric", "numeric", "character", "character")
  head <- scan(datafile, sep="\t", quiet=TRUE, multi.line = FALSE, nlines=1, what="character")
  dat  <- scan(datafile, sep="\t", quiet=TRUE, multi.line = FALSE, skip=1,  what=what)

  if(any(unlist(head) != c("Probe Set Name", "Probe X", "Probe Y", 
     "Probe Interrogation Position", "Probe Sequence", "Target Strandedness"))) {
      mess = paste("The data file", datafile, "does not have the expected column names",
         "in its header line. Please make sure it is the right data file. If you are",
         "positive, you may need to write a customized data import function",
         "to replace 'getProbeDataAffy'. You may use 'getProbeDataAffy' as a template.",
         "Please see the help files for the functions 'getProbeDataAffy' and",
         "'MakeProbePackage', and the vignette for the package matchprobes.\n")
      stop(mess)
    }

  for (i in which(what=="numeric")) {
    z = which(is.na(dat[[i]]))
    if(length(z)>0) 
      stop(paste("Corrupted data file: found non-number in line ", z[1],
                 " of column ", head[i], ": ", dat[z[1], i]), sep="") 
  }

  ## data frame with the probe data
  pt = data.frame(sequence = I(dat[[5]]),           ## character
                  x        = as.integer(dat[[2]]),  ## integer
                  y        = as.integer(dat[[3]]),  ## integer
                  Probe.Set.Name               = I(dat[[1]]),          ## character 
                  Probe.Interrogation.Position = as.integer(dat[[4]]), ## integer
                  Target.Strandedness          = dat[[6]])             ## factor
  class(pt) = c("probetable", class(pt))

  
  ## assign
  dataEnv = new.env()
  assign(pkgname, pt, envir=dataEnv)

  datasource = "The probe sequence data was obtained from \\\\url{http://www.affymetrix.com}."
  if(is.character(datafile))
    datasource = paste(datasource, " The file name was \\\\code{", datafile, "}.", sep="")

  symVal = list(ARRAYTYPE  = arraytype,
    DATASOURCE = datasource,
    NROW       = as.character(nrow(pt)),
    NCOL       = as.character(ncol(pt)))

  if(comparewithcdf) .lgExtraParanoia(pt, cdfname)
  
  return(list(pkgname = pkgname, symVal = symVal, dataEnv = dataEnv))
}
## ----------------------------------------------------------------------
## The table pt contains a probe to probe-set mapping (many-to-one).
## The CDF environment contains a probe-set to probe mapping (one-to-many).
## Here, we check whether they agree.
## In addition, it uses the information in the CDF to guess
## sizex, the size of the chip in x-direction.
## This is done using the fact that with current-day Affymetrix 
## for each PM probe at (x,y) there is a MM probe at (x,y+1).
## (C) Laurent Gautier, Wolfgang Huber 2003
## ----------------------------------------------------------------------
.lgExtraParanoia = function (pt, cdfname) {
  do.call("library", list(cdfname))
  thecdf = get(cdfname, envir=as.environment(paste("package", cdfname, sep=":")))

  ## Unroll CDF in order to invert the mapping from probe-set -> probe
  ## to probe -> probe-set. psnm1[i] is the probe set name for the i-th probe
  probesetnames = ls(thecdf)
  pm1   = unlist(lapply(probesetnames,
    function(ps) {get(ps, env=thecdf)[,1]}))
  mm1   = unlist(lapply(probesetnames,
    function(ps) {get(ps, env=thecdf)[,2]}))
  psnm1 = unlist(lapply(probesetnames,
    function(ps) {rep(ps, nrow(get(ps, env=thecdf)))}))
    
  ## On most chips, PM and MM probe are next to each other on the chip, at same
  ## x coordinate and at adjacent y coordinates. Then, "sizex" is always the same,
  ## namely the size of the chip in x-direction. On some chips, there are few
  ## exceptions.
  tab = table(mm1-pm1)
  sizex = as.numeric(names(tab))[ max(tab)==tab ]
  
  ## The probe indices according to pt
  pm2   =  pt$y    * sizex + pt$x + 1
  mm2   = (pt$y+1) * sizex + pt$x + 1
  psnm2 = pt[["Probe.Set.Name"]]
  
  ## Check if the probe set names that are associated with each probe
  ## are the same in both CDF and pt
  z1 = z2 = rep(NA, max(pm1, mm1, pm2, mm2))
  z1[pm1] = z1[mm1] = psnm1
  z2[pm2] = z2[mm2] = psnm2

  diffprob = which(z1 != z2)
  if(length(diffprob)>0) {
    cat("***************************************************************************\n",
        "Found different probe set names in 'CDF package' and 'probe package' for\n",
         length(diffprob), "probes.\n")
    for (i in 1:min(10, length(diffprob)))
      cat(z1[diffprob[i]], z2[diffprob[i]], "\n")
    cat("If you consider this mismatch insignificant, you may want to rerun this\n",
        "function with 'comparewithcdf  = FALSE'. Otherwise, you'll need to\n",
        "figure out the reason for this!\n")
    stop("Stopped")
  }
  
  invisible(TRUE)
}

##----------------------------------------------------------------------
## Copyright R. Gentleman and W. Huber, 2003, all rights reserved
##----------------------------------------------------------------------
makeProbePackage <- function(arraytype,
          importfun = "getProbeDataAffy",
          maintainer,
          version,
          pkgname = NULL,
          outdir  = ".",
          force = FALSE, quiet = FALSE, check = TRUE, build = TRUE, unlink = TRUE, ...)
{
  ## Bureucracy: check arguments
  if (missing(maintainer) || !is.character(maintainer))
    stop(paste("'maintainer' is missing or invalid. Please specify the maintainer of the",
               "package that you want to create in the form: Your name <you@domain>", sep="\n"))
  if (missing(version) || !is.character(version))
    stop(paste("'version' is missing or invalid. Please select a version number larger",
               "than those used for any previous versions of this package.", sep="\n"))
  if(!exists(importfun) || !is.function(get(importfun)))
    stop("'importfun' must be a function.")

  ## Call the import function
  ## importRes is a list with three elements:
  ## $pkgname : package name
  ## $dataEnv : environment containing data objects
  ## $symVal  : named list with symbol-value substitutions
  if (!quiet) cat("Importing the data.\n")
  importRes = do.call(importfun, c(arraytype = arraytype, pkgname = pkgname, list(...)))

  pkgname = importRes$pkgname
  thispkg = "matchprobes"
  desc    = package.description(thispkg)[c("Package", "Version")]
  creator = paste("package", desc[1], "version", desc[2])

  symbolValues = c(importRes$symVal, list(
    VERSION    = version,
    CREATOR    = creator,
    MAINTAINER = maintainer))

  ## Create package
  createRes = createPackage(pkgname,
    destinationDir = outdir,
    originDir = file.path(.path.package(thispkg), "Code"),
    symbolValues = symbolValues,
    unlink = unlink, quiet = quiet)

  ## Write the data objects
  if (!quiet) cat("Writing the data.\n")
  save(list  = ls(importRes$dataEnv),
       file  = file.path(createRes$pkgdir, "data", paste(pkgname, ".rda", sep="")),
       envir = importRes$dataEnv)

  ## R CMD check
  cdir <- getwd()
  setwd(outdir)
  on.exit(setwd(cdir))
  if (check) {
    if (!quiet)
      cat("Checking the package.\n")
    system(paste(file.path(R.home(),"bin","R"), "CMD check",
                 pkgname))
    logFile = file.path(paste(pkgname, "Rcheck", sep="."), "00check.log")
    if (!file.exists(logFile)) {
      stop(paste("Expected but did not find the log-file", logFile, "after R CMD check"))
    } else {
      thelines = readLines(logFile)
      warns = grep("WARNING", thelines, value=TRUE)
      errs  = grep("ERROR", thelines, value=TRUE)
      if (length(warns)>0)
        cat("*** WARNINGS ***\n", warns)
      if (length(errs)>0)
        stop(errs)
    }
    if (unlink)
      unlink(paste(pkgname, ".Rcheck", sep=""), recursive = TRUE)
  }

  ## R CMD build
  if (build) {
    if (!quiet)
      cat("Building the package.\n")
    system(paste(file.path(R.home(),"bin","R"), "CMD build",
                 ifelse(force, "-force", ""),
                 pkgname))
  }
  setwd(cdir)
  return(pkgname)
}

matchprobes <- function(query, records, probepos=FALSE) 
   .Call("MP_matchprobes", toupper(query), records, probepos, PACKAGE="matchprobes")

complementSeq <- function(seq, start=1, stop=0) 
  .Call("MP_complementSeq", seq, as.integer(start), as.integer(stop), PACKAGE="matchprobes")
  
reverseSeq  <- function(seq)
  .Call("MP_revstring", seq, PACKAGE="matchprobes")

basecontent <- function(seq) {
  rv <- .Call("MP_basecontent", seq, PACKAGE="matchprobes")
  class(rv) <- c("probetable", class(rv))
  return(rv)
}
print.probetable = function(x, ...) {
  cat("Object of class", class(x), "with", nrow(x), "rows and", ncol(x), "columns.\n")
}
.First.lib <- function(lib, pkgname, where) {
  ## load the compiled code
  library.dynam(pkgname, pkgname, lib)
}
