.packageName <- "plgem"
"plgem.deg" <-
function(observedStn,plgemResampledStn,delta=0.001,verbose=FALSE) {
	#some checks
	if(class(observedStn)!="matrix") stop("Object observedStn in function plgem.deg is not of class matrix ")
	if(class(plgemResampledStn)!="list") stop("Object plgemResampledStn in function plgem.deg is not of class list ")
	if(class(plgemResampledStn$RESAMPLED.STN)!="matrix") stop("Object plgemResampledStn$RESAMPLED.STN in function plgem.deg is not of class matrix ")
	if(class(delta)!="numeric") stop("Argument delta in function plgem.deg is not of class numeric")
	if(!(all(delta>0) && all(delta<1))) stop("Argument delta in function plgem.deg is not in (0,1)")
	if(class(verbose)!="logical") stop("Argument verbose in function plgem.deg is not of class logical")

	if(verbose) cat("selecting significant DEG:")
	library(Biobase)

	#preparing...
	repl.number<-plgemResampledStn$REPL.NUMBER
	resampledStn<-plgemResampledStn$RESAMPLED.STN
	condition.name<-colnames(observedStn)
	condition.number<-length(condition.name)
	if(verbose) cat("found",condition.number,"condition(s) compared to the baseline.\n")
   	delta.name<-as.character(delta)
	DEG.list<-list()
	up.thr<-array(,dim=c(length(delta),ncol(resampledStn)))
	colnames(up.thr)<-colnames(resampledStn)
	down.thr<-array(,dim=c(length(delta),ncol(resampledStn)))
	colnames(down.thr)<-colnames(resampledStn)

	#calculating up and down thresholds
	for (j in 1:ncol(resampledStn)) {
	    up.thr[,j]<-quantile(resampledStn[,j],1-delta/2,na.rm=TRUE)
	    down.thr[,j]<-quantile(resampledStn[,j],delta/2,na.rm=TRUE)
	}

	#identification of differentially expressed genes (DEG)
	geneIDs <- rownames(observedStn)
	for (i in 1:length(delta)) {
		if(verbose) cat("Delta = ",delta[i],"\n")
		DEG.list[[delta.name[i]]]<-list()
		for (j in 1:condition.number) {
			if(verbose) cat("	Condition = ",condition.name[j],"\n")
			UP<-up.thr[i,as.character(repl.number[condition.name[j]])]
			DOWN<-down.thr[i,as.character(repl.number[condition.name[j]])]
			#selecting DEG
			DEG.index<-which(observedStn[,j]>UP | observedStn[,j]<DOWN)
			DEG.number<-length(DEG.index)
			DEG.stn<-observedStn[DEG.index,j]
			names(DEG.stn)<-geneIDs[DEG.index]
			if(DEG.number==0) {
				DEG.list[[delta.name[i]]][[condition.name[j]]]<-NA
			}
			else {
				DEG.list[[delta.name[i]]][[condition.name[j]]]<-DEG.stn
			}
			if(verbose) {cat("delta:",delta[i],"condition:",condition.name[j],"found",DEG.number,"DEG\n")}
		}
	}
	if(verbose) cat("done with selecting significant DEG.\n\n")
	gc()
	return(DEG.list)
}
"plgem.fit" <-
function(data, covariateNumb=1, fit.condition=1,p=10,q=0.5,fittingEval=FALSE,plot.file=FALSE,verbose=FALSE) {
	library(Biobase)
	library(MASS)

	# some checks..
	if(class(data)!="ExpressionSet") stop("Object data in function plgem.fit is not of class ExpressionSet")
	if(covariateNumb > ncol(pData(data))) stop("covariateNumb is greater than the number of covariates in phenodata of data")
	if(class(fit.condition)!="numeric" && class(fit.condition)!="integer") stop("Argument fit.condition in function plgem.fit is not of class numeric or integer")
	if(class(p)!="numeric" && class(p)!="integer") stop("Argument p in function plgem.fit is not of class numeric or integer")
	if(class(q)!="numeric" && class(q)!="integer") stop("Argument q in function plgem.fit is not of class numeric or integer")
	if(class(fittingEval)!="logical") stop("Argument fittingEval in function plgem.fit is not of class logical")
	if(class(plot.file)!="logical") stop("Argument plot.file in function plgem.fit is not of class logical")
	if(class(verbose)!="logical") stop("Argument verbose in function plgem.fit is not of class logical")

	if(verbose) cat("fitting PLGEM..","\n")

	if(verbose) cat("samples extracted for fitting:","\n")
	condition.names<-as.character(pData(data)[,covariateNumb])
	condition.name<-unique(condition.names)
	data<-data[,which(condition.names==condition.name[fit.condition])]
	if(verbose) print(pData(data))
	row<-length(featureNames(data))
	if(length(sampleNames(data))<2) stop("At least 2 replicates needed to fit PLGEM")

	# 'data' mean and standard deviation
	dataMatrix<-exprs(data)
	data.mean<-rowMeans(dataMatrix,na.rm=TRUE)
	data.mean<-sort(data.mean)
	data.sd<-sd(t(dataMatrix),na.rm=TRUE)
	data.sd<-data.sd[names(data.mean)]
	data.mean<-replace(data.mean,data.mean<=0,min(data.mean[data.mean>0]))
	data.sd<-replace(data.sd,data.sd<=0,min(data.sd[data.sd>0]))

	# Determination of Modelling Points (MP)
	if(verbose) cat("determining modelling points...\n")
	limits<-array(,dim=p+1)
	MP.x<-array(,dim=p)
	MP.y<-array(,dim=p)
	for (i in 0:p) {limits[i+1]<-round(row*i/p)}
	for (i in 1:(length(limits)-1)) {
		MP.y[i]<-quantile(data.sd[limits[i]:limits[i+1]],q,na.rm=TRUE)	
		MP.x[i]<-median(data.mean[limits[i]:limits[i+1]],na.rm=TRUE)
	}

	# fit of linear regression over the Modelling Points
	if(verbose) cat("fitting data and modelling points...\n")
	MP.lm<-lm(log(as.numeric(MP.y))~log(as.numeric(MP.x)))
	slope<-coef(MP.lm)[2]
	intercept<-coef(MP.lm)[1]
	adj.r2.mp<-summary.lm(MP.lm)$adj.r.squared
	data.pearson<-cor(log(data.mean),log(data.sd))

	if(fittingEval) {
		if(plot.file) {png(file="fittingEval.png",width=600,height=600)}
		# computation of model-residuals
		modeled.spread<-(data.mean^slope)*(exp(intercept))
		residuals<-(log(data.sd)-log(modeled.spread))
		maxResidual<-ceiling(max(abs(residuals)))

		layout(matrix(1:4, 2, 2))
		# contour plot of data & model fit
		mainTitle<-paste("PLGEM parameters:\nslope = ",signif(slope,3),"\nintercept = ",signif(intercept,3),sep="")
		meanSd.kde2d<-kde2d(log(data.mean), log(data.sd),n=50)
		contour(meanSd.kde2d,col=colors()[40:55],nlevels=15,xlab="ln(mean)",ylab="ln(sd)",cex=0.5,main=mainTitle,cex.main=0.8)
		points(log(MP.x),log(MP.y),cex=2,col="black")
		abline(intercept,slope,col="red",lwd=2)
		# contour plot of residuals vs rank of mean
		residuals.kde2d<-kde2d(rank(data.mean,ties.method="first"),residuals,n=50)
		contour(residuals.kde2d,col=colors()[40:55],nlevels=15,xlab="Rank of mean",ylab="Residuals",ylim=c(-maxResidual,maxResidual),cex=0.5)
		# histogram of residuals
		hist(residuals,breaks=100,xlab="Residuals",ylab="Counts",xlim=c(-maxResidual,maxResidual))
		# qqplot of residuals distribution versus standard normal distribution
		qqnorm(residuals,xlab="Standard Normal",ylab="Residuals",cex=0.5)
		if(plot.file) {dev.off()}
	}

	# return model parameters
	if(verbose) cat("done with fitting PLGEM.\n\n")
	gc()
	return(list(SLOPE=slope,INTERCEPT=intercept,DATA.PEARSON=data.pearson,ADJ.R2.MP=adj.r2.mp,FIT.CONDITION=fit.condition))
}
"plgem.obsStn" <-
function(data,plgemFit, covariateNumb=1, baseline.condition=1,verbose=FALSE) {
	library(Biobase)

	#some checks...
	if(class(data)!="ExpressionSet") stop("Object data in function plgem.obsStn is not of class ExpressionSet")
     if(covariateNumb > ncol(pData(data))) stop("covariateNumb is greater than the number of covariates in phenodata of data")
	if(class(plgemFit)!="list") stop("Object plgemFit in function plgem.obsStn is not of class list")
	if(class(baseline.condition)!="numeric" && class(baseline.condition)!="integer") stop("Argument baseline.condition in function plgem.obsStn is not of class numeric or integer")
	if(class(verbose)!="logical") stop("Argument verbose in function plgem.obsStn is not of class logical")
	
	if(verbose) cat("calculating observed PLGEM-STN statistics:")

	#internal functions
	stn<-function(location1,location2,spread1,spread2){
		(location2-location1)/(spread1+spread2)
	}

	plgem.spread<-function(location,slope,intercept) {
		(location^(slope))*exp(intercept)
	}
	
	#preparing...
	condition.names<-as.character(pData(data)[,covariateNumb])
	condition.name<-unique(condition.names)
	condition.number<-length(condition.name)
	if (condition.number < 2) stop("At least 2 conditions are needed in object data for function plgem.obsStn")
	if(verbose) cat("found",(condition.number-1),"condition(s) to compare to the baseline.\n")
	dataMatrix<-exprs(data)
	#replacing zero and negative values with minimum positive value
	dataMatrix<-replace(dataMatrix,dataMatrix<=0,min(dataMatrix[dataMatrix>0]))
	if(verbose) cat("working on baseline",condition.name[baseline.condition],"...\n")
	baseline.col<-which(condition.names==condition.name[baseline.condition])
	if (verbose) cat(colnames(dataMatrix)[baseline.col],"\n")
	obervedStn<-array(,dim=c(nrow(dataMatrix),condition.number-1))
	rownames(obervedStn)<-featureNames(data)
	colnames(obervedStn)<-condition.name[-baseline.condition]

	#calculating mean and modeled spread for the baseline condition
	mean.left<-rowMeans(dataMatrix[,baseline.col],na.rm=TRUE)
	spread.left<-plgem.spread(mean.left,plgemFit$SLOPE,plgemFit$INTERCEPT)

	#calculating mean and modeled spread for the remaining condition(s)
	col.counter<-0
	for (i in (1:condition.number)[-baseline.condition]) {
		col.counter<-col.counter+1
		if(verbose) cat("working on condition",condition.name[i],"...\n")
		condition.col<-which(condition.names==condition.name[i])
		if(verbose) cat(colnames(dataMatrix)[condition.col],"\n")
		if(length(condition.col)==1) {mean.right<-dataMatrix[,condition.col]}
		else {
		    mean.right<-rowMeans(dataMatrix[,condition.col],na.rm=TRUE)
		}
		spread.right<-plgem.spread(mean.right,plgemFit$SLOPE,plgemFit$INTERCEPT)
		#computation of PLGEM-STN statistics
		obervedStn[,col.counter]<-stn(mean.left,mean.right,spread.left,spread.right)
	}

	if(verbose) cat("done with calculating PLGEM-STN statistics.\n\n")
	gc()
	return(obervedStn)
}
"plgem.resampledStn" <-
function(data,plgemFit,covariateNumb=1,baseline.condition=1,iterations="automatic",verbose=FALSE) {
	library(Biobase)

	#some checks...
	if(class(data)!="ExpressionSet") stop("Object data in function plgem.resampledStn is not of class ExpressionSet")
    if(covariateNumb > ncol(pData(data))) stop("covariateNumb is greater than the number of covariates in phenodata of data")
	if(class(plgemFit)!="list") stop("Object plgemFit in function plgem.resampledStn is not of class list")
	if(class(baseline.condition)!="numeric" && class(baseline.condition)!="integer") stop("Argument baseline.condition in function plgem.resampledStn is not of class numeric or integer")
	if(iterations!="automatic" && class(iterations)!="numeric" && class(iterations)!="integer") stop("Argument iterations in function plgem.resampledStn is neither of class numeric (or integer) nor equal to 'automatic'")
	if(class(verbose)!="logical") stop("Argument verbose in function plgem.resampledStn is not of class logical")

	if(verbose) cat("calculating resampled PLGEM-STN statistics:")

	#internal functions
	stn<-function(location1,location2,spread1,spread2){
		(location2-location1)/(spread1+spread2)
	}

	plgem.spread<-function(location,slope,intercept) {
		(location^(slope))*exp(intercept)
	}

	#preparing...
	condition.names<-as.character(pData(data)[,covariateNumb])
	condition.name<-unique(condition.names)
	condition.number<-length(condition.name)
	if (condition.number < 2) stop("At least 2 conditions are needed in object data for function plgem.resampledStn")
	if(verbose) cat("found",(condition.number-1),"condition(s) to compare to the baseline.\n")
	dataMatrix<-exprs(data)
	#replacing zero and negative values with minimum positive value
	dataMatrix<-replace(dataMatrix,dataMatrix<=0,min(dataMatrix[dataMatrix>0]))
	rowNumber<-nrow(dataMatrix)
	baseline.col<-which(condition.names==condition.name[baseline.condition])
	if(verbose) cat("baseline samples:\n")
	if (verbose) cat(colnames(dataMatrix)[baseline.col],"\n")
	fit.col<-which(condition.names==condition.name[plgemFit$FIT.CONDITION])
	if(verbose) cat("resampling on samples:\n")
	if (verbose) cat(colnames(dataMatrix)[fit.col],"\n")

	#determining the number of replicates of each condition
	repl.number<-array(,dim=condition.number)
	names(repl.number)<-condition.name
	for (i in 1:condition.number)	{
		repl.number[i]<-length(which(condition.names==condition.name[i]))
	}
	repl.cases<-unique(repl.number[-baseline.condition])

	#determination of number of iterations
	if(iterations == "automatic") {
		a<-length(fit.col)
		b<-length(baseline.col)
		c<-max(repl.number[-1])
		iterations<-a^(b+c)
		iterations<-min(iterations,500)
	}
	if(verbose) cat("Using ",iterations," iterations...\n")

	#computing resampled STN statistics for each case of number of replicates
	resampledStn<-array(,dim=c(rowNumber*iterations,length(repl.cases)))
	colnames(resampledStn)<-as.character(repl.cases)
	for (i in 1:length(repl.cases)) {
		if(verbose) cat("working on cases with ",repl.cases[i]," replicates...\n")
		if(verbose) cat("     Iterations: ")
		for (j in 1:iterations){
			if(verbose) {if (j/20 == trunc(j/20)) {cat(j," ")} }
			#sampling column indices
			left.col<-sample(fit.col,length(baseline.col),replace=TRUE)
			right.col<-sample(fit.col,repl.cases[i],replace=TRUE)

			#calculating mean and modeled spread for the first artificial condition
			mean.left<-rowMeans(dataMatrix[,left.col],na.rm=TRUE)
			spread.left<-plgem.spread(mean.left,plgemFit$SLOPE,plgemFit$INTERCEPT)	   

			#calculating mean and modeled spread for the second artificial condition
			if(length(right.col)==1) {mean.right<-dataMatrix[,right.col]}
			else {
			    mean.right<-rowMeans(dataMatrix[,right.col],na.rm=TRUE)
			}
			spread.right<-plgem.spread(mean.right,plgemFit$SLOPE,plgemFit$INTERCEPT)

			#computation of resampled PLGEM-STN statistics
			rowIndex<-(rowNumber*(j-1)+1):(rowNumber*j)
			resampledStn[rowIndex,i]<-stn(mean.left,mean.right,spread.left,spread.right)
		}
		if(verbose) cat("\n")
	}

	if(verbose) cat("done with calculating resampled PLGEM-STN statistics.\n\n")
	gc()
	return(list(RESAMPLED.STN=resampledStn,REPL.NUMBER=repl.number))
}
"plgem.write.summary" <-
function(x, verbose=FALSE) {
  if(class(x)!="list" || class(x[[1]])!="list") stop("x has be the output of either the plgem.deg or the run.plgem functions, i.e. a list of list(s) of named vectors")
  if (verbose) cat("Writing files\n")
  f1 <- names(x)
  for (i in f1) {
    y <- x[[i]]
    f2 <- names(y)
    for (j in f2) {
      z <- x[[i]][[j]]
      fname <- paste(j, "-", i, ".txt", sep="")
      if (verbose) cat(fname, "\n")
      write.table(round(z,3), file=fname, sep="\t", quote=FALSE, col.names=FALSE)
      }
    }
  if (verbose) cat("to folder", getwd(), "\n")
}
"run.plgem" <-
function(esdata, signLev=0.001, rank=100, covariateNumb=1, baselineCondition=1, Iterations="automatic", fitting.eval=TRUE, plotFile=FALSE, writeFiles = FALSE, Verbose=FALSE) {
	library(Biobase)

	#some checks
	if(class(esdata)!="ExpressionSet") stop("Object esdata in function run.plgem is not of class ExpressionSet")
    if(covariateNumb > ncol(pData(esdata))) stop("covariateNumb is greater than the number of covariates in phenodata of esdata")
	if(class(signLev)!="numeric" && class(signLev)!="integer") stop("Argument signLev in function run.plgem is not of class numeric or integer")
	if(class(rank)!="numeric" && class(rank)!="integer") stop("Argument rank in function run.plgem is not of class numeric or integer")
	if(class(baselineCondition)!="numeric" && class(baselineCondition)!="integer") stop("Argument baselineCondition in function run.plgem is not of class numeric or integer")
	if(Iterations!="automatic" && class(Iterations)!="numeric" && class(Iterations)!="integer") stop("Argument Iterations in function run.plgem is neither of class numeric (or integer) nor equal to 'automatic'")
	if(class(fitting.eval)!="logical") stop("Object fitting.eval in function run.plgem is not of class logical")
	if(class(plotFile)!="logical") stop("Object plotFile in function run.plgem is not of class logical")
	if(class(Verbose)!="logical") stop("Argument Verbose in function run.plgem is not of class logical")

	condition.names<-as.character(pData(esdata)[,covariateNumb])
	condition.name<-unique(condition.names)
	condition.number<-length(condition.name)
	if (condition.number < 2) stop("At least 2 conditions are needed in object esdata for function run.plgem")
	
	#determining the number of replicates of each condition
	repl.number<-array(,dim=length(condition.name))
	for (i in 1:length(condition.name)) {
	    repl.number[i]<-length(which(condition.names==condition.name[i]))
	}
	if(max(repl.number)==1) stop ("PLGEM can not be fitted without replicates")
	names(repl.number)<-condition.name

	#determination of the best condition on which to fit the model
	if(length(which(repl.number==max(repl.number)))==1) {
		#determination of the condition with the highest number of replicates
		fit.condition<-which(repl.number==max(repl.number))
		if(Verbose) cat("fit.condition",fit.condition,"\n")
	}
	else {
		#more than one condition has the highest number of replicates, therefore the one giving the best fit is chosen
		max.indexes<-which(repl.number==max(repl.number))
		adj.r<-array(,dim=length(max.indexes))
		for(i in 1:length(adj.r)) {
			adj.r[i]<-as.numeric(plgem.fit(data=esdata, covariateNumb=covariateNumb, fit.condition=max.indexes[i],p=10,q=0.5,verbose=FALSE)$ADJ.R2.MP)
		}
		if(Verbose) cat("adj.r",adj.r,"\n")
		fit.condition<-max.indexes[which(adj.r==max(adj.r))]
		if(length(fit.condition)>1) {
			fit.condition<-fit.condition[1]
			warning("PLGEM fits equally well on more than one condition. \n Condition ",condition.name[fit.condition]," used.\n")
		}
		else {
			if(Verbose) cat("condition ",condition.name[fit.condition]," used \n")
		}
	}

	# fitting and evaluating plgem
	plgemFit<-plgem.fit(data=esdata, covariateNumb=covariateNumb, fit.condition = fit.condition,p=10,q=0.5,fittingEval=fitting.eval,plot.file=plotFile,verbose=Verbose)
	# computing observed STN statistics
	obs.stn<-plgem.obsStn(esdata,plgemFit,verbose=Verbose, covariateNumb=covariateNumb, baseline.condition=baselineCondition)

	if(repl.number[fit.condition]<3) {
		# since not enough replicates are available for resampling, selection of DEG will be based on ranking
		cat("Less than 3 replicates found in dataset: ranking genes \n")
		DEG.list<-list()
		col.counter<-0
		for(i in (1:condition.number)[-baselineCondition]) {
			col.counter<-col.counter+1
			rankedIDs<-names(sort(abs(obs.stn[,col.counter]),decreasing=TRUE))[1:rank]
			DEG.list[[condition.name[i]]]<-obs.stn[rankedIDs,col.counter]
			names(DEG.list[[condition.name[i]]])<-rankedIDs
		}
	}
	else {
		# computing resampled STN statistics
		res.stn<-plgem.resampledStn(esdata,plgemFit,iterations=Iterations,covariateNumb=covariateNumb,baseline.condition=baselineCondition,verbose=Verbose)
		# DEG selection
		DEG.list<-plgem.deg(obs.stn,res.stn,delta=signLev,verbose=Verbose)
	}

    if(writeFiles) plgem.write.summary(x = DEG.list, verbose = Verbose) # writing DEG list(s) on the disk

	return(DEG.list)
}
