.packageName <- "vsn"
## Extract the intensity matrix from the argument "intensities"
## Probably this should, and may in a future version be done via S3- or S4
## method dispatching.
getIntensityMatrix = function(intensities, verbose) {
  y = switch(class(intensities),
    matrix     = { if (!is.numeric(intensities))
                     stop("'intensities' was found to be a matrix, but is not numeric.")
                   intensities
                 },
    data.frame = {  if (!all(sapply(intensities, is.numeric)))
                      stop("'intensities' was found to be a data.frame, but contains non-numeric columns.")
                    as.matrix(intensities)
                  },
    exprSet    = { exprs(intensities)
                 },
    marrayRaw  = { nrslides = as.integer(ncol(intensities@maRf))
                   nrspots  = as.integer(nrow(intensities@maRf))
                   if (verbose)
                     cat(sprintf("Converting marrayRaw (%d spots, %d slides) to %dx%d matrix.\n",
                                 nrspots, nrslides, nrspots, as.integer(2*nrslides)),
                         "Gf-Gb in odd columns, Rf-Rb in even columns.\n")
                   tmp = matrix(NA, nrow=nrspots, ncol=2*nrslides)
                   tmp[, (1:nrslides)*2-1 ] = intensities@maGf - intensities@maGb
                   tmp[, (1:nrslides)*2   ] = intensities@maRf - intensities@maRb
                   tmp
                 },
    stop(paste("'intensities' has class ", class(intensities),
         ". Permitted are: matrix, data.frame, exprSet, marrayRaw", sep=""))
  )  ## end of switch statement

  if (any(is.na(y)))
    stop(paste("'intensities' must not contain NA values.\n",
             "This could indicate that the input data has already undergone some\n",
             "thresholding or transformation (log?), and may not satisfy the\n",
             "requirements of the multiplicative-additive noise model.\n",
             "If you are sure that it is meaningful to proceed, please\n",
             "consider calling vsn on a subset of data where all values\n",
             "are defined, and then use vsnh on the full set of data.\n"))

  if (ncol(y)<=1) 
    stop(paste("'intensities' must be a matrix with at least two columns.\n",
               "Please read the documentation and the paper\n",
               "(Huber et al., Bioinformatics 18 (2002) S96-S104).\n"))

  if(is.integer(y))
    y <- y+0.0 ## convert to double

  return(y)
}
     

rowSds <- function(x, ...) {
  sqr     = function(x)  x*x
  n       = rowSums(!is.na(x))
  n[n<=1] = NA
  return(sqrt(rowSums(sqr(x-rowMeans(x, ...)), ...)/(n-1)))
}


meanSdPlot = function(x,
                      ranks=TRUE,
                      xlab = ifelse(ranks, "rank(mean)", "mean"),
                      ylab = "sd",
                      pch  = ".",
                      col, ...) {
  stopifnot(is.logical(ranks), length(ranks)==1, !is.na(ranks))

  ## the coloring
  pcol <- "black"
  if(missing(col)) {
      if (is(x, "exprSet")) {
          sel <- preproc(description(x))$vsnTrimSelection
          if(!is.null(sel)){
              if(!is.logical(sel) || length(sel)!=nrow(exprs(x)) || any(is.na(sel)))
                  stop(paste("The element \"vsnTrimSelection\" of the preprocessing",
                             "slot of the description slot of \"x\" is not valid.",
                             "You may remove it and try again.\n"))
              pcol <- ifelse(sel, "blue", "black")
          }
      }
  } else {
      pcol <- col
  }

  if (is(x, "exprSet"))
    x <- exprs(x)

  if (! is(x, "matrix"))
    stop("'x' must be a matrix or an exprSet (or it may inherit from these).")

  n    <- nrow(x)
  px   <- rowMeans(x, na.rm=TRUE)
  py   <- rowSds(  x, na.rm=TRUE)
  rpx  <- rank(px, na.last=FALSE)

  ## running median with centers at dm, 2*dm, 3*dm, ... and width 2*dm
  dm        <- 0.05
  midpoints <- seq(dm, 1-dm, by=dm)
  within    <- function(x, x1, x2) { x>=x1 & x<=x2 }
  mediwind  <- function(mp) median(py[within(rpx/n, mp-dm, mp+dm)], na.rm=TRUE)
  rq.sds    <- sapply(midpoints, mediwind)

  if(ranks) {
    px  <- rpx
    pxl <- midpoints*n
  } else {
    pxl <- quantile(px, probs=midpoints, na.rm=TRUE)
  }
  plot(px, py, pch=pch, xlab=xlab, ylab=ylab, col=pcol, ...)
  lines(pxl, rq.sds, col="red", type="b", pch=19)
}


##------------------------------------------------------------
## a wrapper for vsn to be used as a normalization method in
## the package affy
##------------------------------------------------------------
normalize.AffyBatch.vsn = function (abatch, subsample = 20000, niter = 4, strata,...)  
{
  if (!("package:affy" %in% search())) 
    stop("Please load the package affy before calling normalize.AffyBatch.vsn")

  ## ind = the perfect match probes. If length(ind) is larger than the value in
  ## subsample, then only use a random sample of size subsample from these
  ind = unlist(indexProbes(abatch, "pm"))
  if (!is.na(subsample)) {
    if (!is.numeric(subsample)) 
      stop("'subsample' must be numeric.")
    if (length(ind) > subsample) 
      ind = sample(ind, subsample)
  }

  if(missing(strata))
    strind <- rep(as.integer(1), length(ind))
  else 
    strind <- strata[ind]
  
  ## call parameter estimation (on subset of data)
  vsnres = vsn(intensity(abatch)[ind, ], niter=niter, strata=strind, ...)

  ## add parameters to preprocessing slot
  pars = preproc(description(vsnres))$vsnParams
  parsIter = preproc(description(vsnres))$vsnParamsIter # J.: added 2005/04/26
  description(abatch)@preprocessing = c(description(abatch)@preprocessing, list(vsnParams=pars, vsnParamsIter=parsIter))

  ## apply the transformation (to all data)
  intensity(abatch) = exp(vsnh(intensity(abatch), pars, strata=strata))
  return(abatch)
}


##-----------------------------------------------------------
## mu as in the statistical model - expectation of transformed intensities 
## generate sinh(mu) according to model of Newton et al.
## shape parameter a=1, scale theta=1
##------------------------------------------------------------
sagmbSimulateData <- function(n=8064, d=2, de=0, up=0.5, nrstrata=1) {
  stopifnot(is.numeric(n),  length(n)==1, n>=1)
  stopifnot(is.numeric(d),  length(d)==1, d>=2) 
  stopifnot(is.numeric(de), length(de)==1, de>=0, de<=1)
  stopifnot(is.numeric(up), length(up)==1, up>=0, up<=1)
  stopifnot(is.numeric(nrstrata), length(nrstrata)==1)

  sigmaeps <- 0.2
  mu <- asinh(1/rgamma(n, shape=1, scale=1))

  ##------------------------------------------------------------
  ## the calibration parameters: 
  ## offsets are drawn from a uniform distribution over the interval
  ##     [-.2, +.2] * 90%-quantile of mu
  ## log(factors) from normal(0,1)
  ## overall scale from exp(runif(1)*10-5)
  ## w.l.o.g. first offset is 0 and first factor is 1
  ##------------------------------------------------------------
  Delta.a   <- 0.2 * quantile(sinh(mu), 0.9)
  pars      <- array(NA, dim=c(nrstrata, d, 2))
  pars[,,1] <- c(0, runif(d*nrstrata-1, min=-Delta.a, max=Delta.a))
  pars[,,2] <- c(1, exp(rnorm(d*nrstrata-1)))
  pars      <- pars * exp(runif(1)*10-5)

  ##------------------------------------------------------------
  ## generate simulated data
  ##------------------------------------------------------------
  is.de  <- (runif(n)<de)
  hy     <- matrix(as.numeric(NA), nrow=n, ncol=d)
  hy[,1] <- mu + rnorm(n, sd=sigmaeps)    ## array 1 is a reference
  for (j in 2:d) {
    s      <- 2 * as.numeric(runif(n)<up) - 1
    hy[,j] <- mu + as.numeric(is.de)*s*runif(n, min=0, max=2) + rnorm(n, sd=sigmaeps)
  }
  strata <- as.integer(ceiling(runif(n)*nrstrata))
  offs   <- pars[strata,,1]
  facs   <- pars[strata,,2]
  stopifnot(all(dim(facs)==dim(hy)), all(dim(offs)==dim(hy)))
  y <- offs + facs * sinh(hy)
  return(list(y=y, hy=hy, is.de=is.de, strata=strata))
}

## assess
sagmbAssess <- function(h1, sim) {
  stopifnot(all(c("y", "hy", "is.de") %in% names(sim)))
  h2    <- sim$hy
  is.de <- sim$is.de
  
  stopifnot(is.matrix(h1), is.matrix(h2), is.logical(is.de))
  n <- nrow(h1)
  d <- ncol(h1)
  if(nrow(h2)!=n || ncol(h2)!=d) 
    stop(paste("'h1' and 'h2' should be matrices of the same size, but have ",
         n, "x", d, " and ", nrow(h2), "x", ncol(h2), sep=""))
  stopifnot(length(is.de)==n)

  dh1 <- dh2 <- matrix(nrow=n, ncol=d-1)
  for (j in 2:d) {
    dh1[, j-1] <- h1[, j] - h1[, 1]
    dh2[, j-1] <- h2[, j] - h2[, 1]
  }

  nsum <- (d-1) * sum(!is.de)
  res  <- sqrt(sum((dh1[!is.de,]-dh2[!is.de,])^2) / nsum)
  return(res)
}
##----------------------------------------------------------------------
## Robust calibration and variance stabilization
## (C) Wolfgang Huber <huber@ebi.ac.uk> 2002-2005
## With contributions from Markus Ruschhaupt, Dennis Kostka, David Kreil
##----------------------------------------------------------------------
##----------------------------------------------------------------------
## vsn: the main function of this library
##----------------------------------------------------------------------
vsn = function(intensities,
                lts.quantile = 0.5,
                verbose      = interactive(),
                niter        = 10,
                cvg.check    = NULL,
                describe.preprocessing=TRUE,
                subsample,
                pstart,
                strata) {
  y = getIntensityMatrix(intensities, verbose)
  d = ncol(y)

  ## Make sure the arguments are valid and plausible
  if (!is.numeric(lts.quantile) || (length(lts.quantile)!=1) ||
    (lts.quantile<0.5) || (lts.quantile>1))
      stop("'lts.quantile' must be a scalar between 0.5 and 1.")
  if (!is.numeric(niter) || (length(niter)!=1) || (niter<1))
    stop("'niter' must be a number >=1.")
  if (!is.logical(verbose))
    stop("'verbose' must be a logical value.")
  if(!missing(subsample)) {
    if(!is.numeric(subsample) || !(length(subsample)==1))
      stop("'subsample' must be a single number.")
    if(!(subsample>1e4))
      stop("'subsample' should not be less than 10000.")
  }
  
  if(missing(strata)) {
    strata = rep(as.integer(1), nrow(y))
  } else {
    if(!is.integer(strata) || !is.vector(strata) ||
       length(strata)!=nrow(y) || any(is.na(strata)))
      stop("'strata' must be an integer vector of length nrow(y) with no NAs.")
  }

  nrstrata = max(strata)
  if(missing(pstart)) {
    pstart = array(0, dim=c(nrstrata, d, 2))
    for(i in 1:d)
      pstart[,i,2] = 1/diff(quantile(y[,i], probs=c(0.25, 0.75)))
  } else {
    if(!is.array(pstart) || length(dim(pstart))!=3)
      stop("'pstart' must be a 3D array.")
    if(!all(dim(pstart)==c(nrstrata, d, 2)))
      stop(paste("dimensions of 'pstart' do not match. They should be ",
        paste(nrstrata, d, 2, sep=" x "), ", but are ",
        paste(dim(pstart), collapse=" x "), ".", sep=""))
  }

  isReordered = FALSE
  if(nrstrata>1) {
    minperstratum = as.integer(42/lts.quantile)
    sstr = sum(table(strata) < minperstratum)
    if(sstr>0) {
      mess = paste("*** There are less than", minperstratum, "data points in", sstr,
        "of the strata.\n*** The fitted parameters may be unreliable.\n")
      if(lts.quantile<0.9)
        mess = paste(mess, "*** You could try to increase the value of 'lts.quantile'.\n", sep="")
      if(nrstrata>1)
        mess = paste(mess, "*** You could try to reduce the number of strata.\n", sep="")
      warning(mess)
    }

    ## reorder the rows of the matrix so that each stratum sits in a contiguous block
    ordstrata = order(strata)
    reord     = order(ordstrata)
    y         = y[ordstrata,]
    strata    = strata[ordstrata]
    isReordered = TRUE
  }
  
  ## Print welcome message
  if (verbose)
    cat("vsn: ", nrow(y), " x ", d, " matrix (", nrstrata, " strat",
        ifelse(nrstrata==1, "um", "a"), ").  0% done.", sep="")

  ##---------------------------
  succeed = FALSE
  ltsq = seq(lts.quantile, 1, length=3)
  for(i in seq(along=ltsq)) {
    tryCatch({
      v = dovsn(y=y, lts.quantile=ltsq[i], verbose=verbose,
        niter=niter, cvg.check=cvg.check, subsample=subsample,
        pstart=pstart, strata=strata)
      succeed = TRUE
      break
    }, error= function(e) {
      if(verbose) {
        cat(c("First", "Second", "Third")[i], "attempt at likelihood",
            "optimization result in the following error:\n")
        print(e)
      if(i<length(ltsq))
        cat("Restarting with lts.quantile=", signif(ltsq[i+1],2), "\n")
      }}
    ) ## tryCatch
  } ## for
             
  if(!succeed)
    stop(paste("\nThe likelihood optimization did not converge. A likely",
               "reason is that the normalization parameters are not uniquely identifiable",
               "from the provided data.\nPossibly, the columns of the data matrix",
               "are exactly co-linear or affine dependent - please verify the data",
               "to make sure there were no mix-ups."))
  
  ##---------------------------

  ## Prepare the return result: an exprSet
  ## The transformed data goes into slot exprs.
  ## If input was allready an exprSet, keep the values of all the other slots.
  ## To the slot description@preprocessing, append the parameters and the
  ##    trimming selection.

  res = descr = NULL
  if (is(intensities, "exprSet")) {
    res = intensities
    if (is(description(intensities), "MIAME")) {
      descr = description(intensities)
    }
  }
  if(is.null(descr))   descr = new("MIAME")
  if(is.null(res))     res   = new("exprSet", description=descr)

  if(isReordered) {
    v$hy  = v$hy[reord, ]
    v$sel = v$sel[reord]
  }
  
  if (describe.preprocessing) {
    exprs(res) = v$hy
    vsnPreprocessing =  list(vsnParams      = v$par,
                           vsnParamsIter    = v$params,
                           vsnTrimSelection = v$sel)
    class(vsnPreprocessing) = c("vsnPreprocessing", class(vsnPreprocessing))
    res@description@preprocessing = append(res@description@preprocessing, vsnPreprocessing)
  }
  return(res)
}


##--------------------------------------------------
## This is the actual "workhorse" function 
##--------------------------------------------------
dovsn = function(y, lts.quantile, verbose, niter, cvg.check, subsample, pstart, strata) {

  nrstrata = dim(pstart)[1]
  d        = dim(pstart)[2]
  
  ## a place to save the trajectory of estimated parameters along the iterations:
  params = array(NA, dim=c(dim(pstart), niter))

  ## begin of the outer LL iteration loop
  optim.niter = 10
  oldhy   = Inf  ## for calculating a convergence criterion: earlier result
  cvgcCnt = 0    ## counts the number of iterations that have already met
                  ## the convergence criterion
  sel = rep(TRUE, nrow(y))
  for(lts.iter in 1:niter) {
    ## subsample?
    if(!missing(subsample)) {
      if(nrstrata==1) {
        ssp = list(which(sel))
      } else {
        ssp  = split(which(sel), strata[sel])
      }
      ssps = lapply(ssp, function(s) {
        if(length(s)<=subsample) {
          s
        } else {
          sample(s, subsample, replace=FALSE)
        }
      } )
      sel = unlist(ssps)
    }
    ysel   = y[sel,]
    istrat = calc.istrat(strata[sel], nrstrata, d)
    p0     = pstart
    for (optim.iter in 1:optim.niter) {
      optres = .Call("vsn_c", ysel, as.vector(p0), istrat, as.integer(0), PACKAGE="vsn")
      stopifnot(length(optres)==2*nrstrata*d+1)
      conv = round(optres[length(optres)])
      par  = array(optres[-length(optres)], dim=dim(p0))
      if (conv==0)
        break
      if(conv==52) {
        ## ABNORMAL_TERMINATION_IN_LNSRCH
        ## This seems to indicate that a stepwidth to go along the gradient could not be found,
        ## probably because the start point p0 was already too close to the optimum (?). Hence, try
        ## again from a slightly different start point
        p0 = p0 + runif(length(pstart), min=0, max=0.01)
        if(verbose)
          cat("(CONV=52, restarting with new initial parameters)")
      } else {
        stop(paste("Likelihood optimization: the function optim() returned the value convergence=",
                   conv, "\nPlease make sure your data is good. ",
                   "If so, contact the package maintainer.\n", sep=""))
      }
    }

    if (conv!=0)
      stop(paste("Likelihood optimization did not converge even after", optim.niter, "calls to optim().",
           "\nPlease make sure your data is good. If the problem persists,",
           "\nplease contact the package maintainer.\n"))
    if (any(par[,,2]<0))
      stop(paste("Likelihood optimization produced negative parameter estimates in spite of constraints.",
           "\nPlease contact the package maintainer.\n"))

    if(verbose)
      cat("\b\b\b\b\b\b\b\b\b", sprintf("%2d", as.integer(round((1-((lts.iter-niter)/niter)^2)*100))),
          "% done.", sep="")

    ## selection of points in a LTS fashion:
    ## calculate residuals
    hy     = vsnh(y, par, strata)
    hmean  = rowMeans(hy)
    sqres  = hy - hmean
    sqres  = rowSums(sqres*sqres) ## squared residuals

    ## select those data points within lts.quantile; do this separately
    ## within each stratum, and also within strata defined by hmean
    ## (see the SAGMB 2003 paper for details)
    nrslice = 5
    group   = ceiling(rank(hmean)/length(hmean)*nrslice)
    group   = factor((strata-1)*nrslice + group)
    grmed   = tapply(sqres, group, quantile, probs=lts.quantile)
    meds    = grmed[as.character(group)]
    stopifnot(!any(is.na(meds)))
    sel     = (sqres <= meds)

    params[,,,lts.iter] = pstart = par

    ## Convergence check
    ## after a suggestion from David Kreil, kreil@ebi.ac.uk
    if(!is.null(cvg.check)) {
      cvgc    = max(abs((hy - oldhy)/diff(range(hy))))
      cvgcCnt = ifelse( cvgc < cvg.check$eps, cvgcCnt + 1, 0 )
      if (verbose)
        cat(sprintf("iter %2d: cvgc=%.5f%%, par=", as.integer(lts.iter), cvgc),
            sapply(par, function(x) sprintf("%9.3g",x)),"\n")
      if (cvgcCnt >= cvg.check$n)
        break
      oldhy = hy
    }

  } ## end of for-loop (iter)
  if(verbose)
    cat("\n")

  return(list(hy=hy, par=par, params=params, sel=sel))
}

##------------------------------------------------------------
## istrat[j] is the starting position for stratum j
## i.e. istrat[j]...istrat[j+1] are the elements of ysel that
## belong to stratum j (using C indexing convention,
## i.e. starting at 0).
## Note: the counting over the different samples is folded into j,
## i.e., if there are 4 strata on the array and 3 samples, then
## j runs from 1:12
##------------------------------------------------------------
calc.istrat = function(strata, nrstrata, d) {
    istr = which(!duplicated(strata))-1
    stopifnot(length(istr)==nrstrata, all(diff(strata[istr])>0))

    istrat = numeric(d*nrstrata+1)
    istrat[d*nrstrata+1] = length(strata)*d  ## end point
    for(i in 1:d)
      istrat[(i-1)*nrstrata + (1:nrstrata)] = ((i-1)*length(strata) + istr)
    return(as.integer(istrat))
}
vsnPlotPar = function(x, what, xlab="iter", ylab=what, ...) {
  stopifnot(is.character(what), length(what)==1)
  stopifnot(is(x, "exprSet"))
  whatopts <- c("offsets", "factors")
  j <- match(what, whatopts)
  if(is.na(j))
    stop(paste("Argument 'what' must be one of ", whatopts, ", is: ", what, "\n", sep=""))

  pars = preproc(description(x))$vsnParamsIter
  if(is.null(pars) || !is.array(pars) || length(dim(pars))!=4)
    stop("Argument 'x' does not contain the vsn parameters!")

  niter <- dim(pars)[4]
  matplot(1:niter, t(matrix(pars[,,j,], ncol=niter)), type="b", pch=16, xlab=xlab, ylab=ylab, ...)
}
##---------------------------------------------------------------------
## The "arsinh" transformation
##
## Note: the constant -log(2*facs[1]) is added to the transformed data
## in order to achieve h_1(y) \approx log(y) for y\to\infty, that is,
## better comparability to the log transformation.
## It has no effect on the generalized log-ratios.
##--------------------------------------------------------------------
vsnh <- function(y, p, strata) {
  if (!is.matrix(y) || !is.numeric(y))
    stop("vsnh: 'y' must be a numeric matrix.\n")
  
  if (!is.array(p) || !is.numeric(p) || any(is.na(p)))
    stop("'p' must be an array with no NAs.\n")

  if(missing(strata)) {
    strata <- rep(as.integer(1), nrow(y))
  } else {
    if(!is.integer(strata) || !is.vector(strata) || 
       length(strata)!=nrow(y) || any(is.na(strata)))
      stop("'strata' must be an integer vector of length nrow(y) with no NAs.")
  }
  nrstrata <- max(strata)
  
  if(nrstrata==1 && length(dim(p))==2)
    dim(p) <- c(1, dim(p))
  
  if(length(dim(p))!=3 || dim(p)[1]!=nrstrata || dim(p)[2]!=ncol(y) || dim(p)[3]!=2)
    stop("'p' has wrong dimensions.")
  if (any(p[,,2]<=0))
    stop("'p' contains invalid values: factors must be non-negative.")

  hy = .Call("vsn_c", y, as.vector(p), strata, as.integer(2), PACKAGE="vsn")

  ## The old, memory-wasting way:
  ## use the recycling rule: p[strata,,1], p[strata,,2], y, and the result of asinh()
  ## are all n*d-matrices. p[strata,1,2] and the result of log() is an n-vector.
  ## hy   = asinh(p[strata,,1] + p[strata,,2] * y) - log(2*p[strata,1,2])

  dimnames(hy) = dimnames(y)
  return(hy)
}

.onAttach <- function(libname, pkgname) {
  if(.Platform$OS.type == "windows" && interactive() && .Platform$GUI ==  "Rgui") {
        addVigs2WinMenu("vsn")
    }
}

.onLoad <- function(libname, pkgname) {
## register vsn as a normalization method with the affy package, if that is loaded:
if ("package:affy" %in% search())
  if(!"vsn" %in% get("normalize.AffyBatch.methods", "package:affy"))
    assign("normalize.AffyBatch.methods",
           c(get("normalize.AffyBatch.methods", pos="package:affy"), "vsn"),
           pos="package:affy")
}
