.packageName <- "apComplex"
#revised LCdelta function using lists not matrices
#will only calculate for combination


LCjoinadjBin <- function(x,comp,adjMat,VBPs,VBOs,VPOs,simMat,mu,alpha,Beta){

       y <- unique(c(x,comp))
       
       adjBinC <- adjBinFUN(y,adjMat=adjMat,VBPs=VBPs,VBOs=VBOs,VPOs=VPOs,mu=mu,alpha=alpha,Beta=Beta,simMat=simMat)

       adjBinC
}

LCjoinLchange <- function(x,comp,complexes,adjMat,ccMat,VBPs,VBOs,VPOs,simMat,mu,alpha,Beta){

	      y <- unique(c(x,comp))
	      yVBPs <- intersect(y,VBPs)
	      yVBOs <- intersect(y,VBOs)
	      yVPOs <- intersect(y,VPOs)

	      exCadjMat <- ccMat[c(yVBPs,yVBOs),c(yVBPs,yVPOs)]
	      exCadjMat[yVBPs,yVBPs] <- 1
	
	      LchangeC <-  sum((1-exCadjMat)*
		      (adjMat[c(yVBPs,yVBOs),c(yVBPs,yVPOs)]*alpha-
		      log(1+exp(mu+alpha+Beta*
				simMat[c(yVBPs,yVBOs),c(yVBPs,yVPOs)])) + 
		      log(1+exp(mu+Beta*
				simMat[c(yVBPs,yVBOs),c(yVBPs,yVPOs)]))))
	  	      
 
	      LchangeC

}

LCjoinfisher <- function(x,comp,adjMat,VBPs,VBOs,VPOs,nMax,wsVal=2e7){


       y <- unique(c(x,comp))
       fisherC <-
       fisherFUN(y,adjMat=adjMat,VBPs=VBPs,VBOs=VBOs,VPOs=VPOs,nMax=nMax,wsVal=wsVal)
       fisherC	     

}
#x is a character vector of node names in a complex

adjBinFUN <- function(x,adjMat,VBPs,VBOs,VPOs,mu,alpha,Beta,simMat){

	  xVBP <- intersect(x,VBPs)
	  xVBO <- intersect(x,VBOs)
	  xVPO <- intersect(x,VPOs)	
	  
	  nVBP <- length(xVBP)
	  nVBO <- length(xVBO)
	  nVPO <- length(xVPO)

	  temp <- matrix(adjMat[c(xVBP,xVBO),c(xVBP,xVPO)],ncol=(nVBP+nVPO))
	  sim <- matrix(simMat[c(xVBP,xVBO),c(xVBP,xVPO)],ncol=(nVBP+nVPO))

	  adjBin <- sum(temp*(mu+alpha+Beta*sim)-
		 log(1+exp(mu+alpha+Beta*sim)))-
		 nVBP*(mu+alpha+Beta-log(1+exp(mu+alpha+Beta)))


	  X <- sum(temp)-nVBP

	  cX <- lgamma(nVBP*(nVBP+nVPO+nVBO-1)+nVBO*nVPO+1) - 
	     lgamma(X+1) - lgamma(nVBP*(nVBP+nVPO+nVBO-1)+nVBO*nVPO-X+1)

	  #compute penalty contribution
	  lK <- cX + adjBin	  
	  lK
}

#function to find bhmaxSubgraphs from a bait-hit adjacency matrix

#by default, unreciprocated bait-bait edges will be treated as observed

#adjMat has dimensions N by (N+M) corresponding to N baits and M hits
#adjMat is named with row and column names corresponding to proteins

#this function uses 'maxClique' from RBGL

bhmaxSubgraph <- function(adjMat,VBs=NULL,VPs=NULL,unrecip=1){

	!is.null(colnames(adjMat)) || stop("Columns of adjMat must be named")
	!is.null(rownames(adjMat))|| stop("Rows of adjMat must be named")

	if(!is.null(VBs)) stopifnot(all(VBs %in% rownames(adjMat)))
	if(!is.null(VPs)) stopifnot(all(VPs %in% colnames(adjMat)))

	#create viable bait and prey sets if not specified
	if(is.null(VBs)) VBs <- rownames(adjMat)[rowSums(adjMat)>0]
	if(is.null(VPs)) VPs <- colnames(adjMat)[colSums(adjMat)>0]

	VBPs <- intersect(VBs,VPs)
	VBOs <- setdiff(VBs,VBPs)
	VPOs <- setdiff(VPs,VBPs)
	
	allProts <- c(VBPs,VBOs,VPOs)
	nProts <- length(allProts)

	#reorder adjMat rows and columns
	adjMat <- adjMat[c(VBPs,VBOs),c(VBPs,VPOs)]
	diag(adjMat) <- 0

	#for VPOs, insert an edge if found by a common bait
	#for VBOs, insert ad edge if they find a common prey
	
	adjMatAppend <- matrix(0,nProts,nProts)
	rownames(adjMatAppend) <- allProts
	colnames(adjMatAppend) <- allProts
	adjMatAppend[VBPs,VBPs] <- adjMat[VBPs,VBPs]
	adjMatAppend[VBPs,VPOs] <- adjMat[VBPs,VPOs]
	adjMatAppend[VBOs,VBPs] <- adjMat[VBOs,VBPs]
	adjMatAppend[VBOs,VPOs] <- adjMat[VBOs,VPOs]
	adjMatAppend[VPOs,VPOs] <- (1*(t(adjMat) %*% adjMat > 0))[VPOs,VPOs]
	adjMatAppend[VBOs,VBOs] <- (1*(adjMat %*% t(adjMat) > 0))[VBOs,VBOs]
	diag(adjMatAppend) <- 0

	#make corresponding undirected graph
	g <- as(adjMatAppend,"graphNEL")
	ug <- ugraph(g)

	#find maximal cliques
	mcs <- maxClique(ug)

	#remove cliques containing only VPOs or VBOs
	vbovpoFUN <- function(x) all(x %in% VBOs) | all(x %in% VPOs) 
	vbovpoc <- which(unlist(lapply(mcs$maxCliques,FUN=vbovpoFUN)))
	if(length(vbovpoc)>0) mcs$maxCliques <- mcs$maxCliques[-vbovpoc]

	#remove cliques with 1 members -- since the diagonal=0
	#this shouldn't happen, but it seems to - bug in maxClique?
	mem1 <- which(unlist(lapply(mcs$maxCliques,FUN=length))==1)
	if(length(mem1)>0) mcs$maxCliques <- mcs$maxCliques[-mem1]
	
	mcs
}



# a function to run the entire algorithm at once

findComplexes <- 
function(adjMat,VBs=NULL,VPs=NULL,simMat=NULL,sensitivity=.75,specificity=.995, 
Beta=0,commonFrac=2/3,wsVal = 2e7){

	##find number of baits and number of hits

        !is.null(colnames(adjMat)) || stop("Columns of adjMat must be named")
        !is.null(rownames(adjMat))|| stop("Rows of adjMat must be named")
        
        if(!is.null(VBs)) stopifnot(all(VBs %in% rownames(adjMat)))
        if(!is.null(VPs)) stopifnot(all(VPs %in% colnames(adjMat)))
        
        #create viable bait and prey sets if not specified
        if(is.null(VBs)) VBs <- rownames(adjMat)[rowSums(adjMat)>0]
        if(is.null(VPs)) VPs <- colnames(adjMat)[colSums(adjMat)>0]
        
        VBPs <- intersect(VBs,VPs)
        VBOs <- setdiff(VBs,VBPs) 
        VPOs <- setdiff(VPs,VBPs) 
    
	##set parameters for logistic regression model

	mu <- log((1-specificity)/specificity)
	alpha <- log(sensitivity/(1-sensitivity))-mu

	##create simMat of zeroes with diagonal of ones if one is not specified

	if(is.null(simMat)) {
		simMat <- matrix(0,dim(adjMat)[1],dim(adjMat)[2])
		diag(simMat) <- 1
		colnames(simMat) <- colnames(adjMat)
		rownames(simMat) <- rownames(adjMat)
	}

	##find maximal BH-complete subgraphs for initial 
	##protein complex membership graph estimate

	print("Finding Initial Maximal BH-complete Subgraphs")
	PCMG <- bhmaxSubgraph(adjMat,VBs=VBs,VPs=VPs,unrecip=1*(sensitivity<specificity))

	##combine complexes using LC measure

	#put PCMG in order by number of baits in complex
	numBaitsFUN <- function(x) sum(x %in% VBPs)
	numBaits <- unlist(lapply(PCMG$maxCliques,FUN=numBaitsFUN))
	
	baitOrder <- order(numBaits,decreasing=TRUE)
	PCMGo <- PCMG
	PCMGo$maxCliques <- PCMGo$maxCliques[baitOrder]
	
	#merge complex estimates using LCdelta criteria
	print("Combining Complex Estimates")
	PCMG2 <-
	mergeComplexes(PCMGo,adjMat=adjMat,VBs=VBs,VPs=VPs,simMat=simMat,
			Beta=Beta,sensitivity=sensitivity,
			specificity=specificity,commonFrac=commonFrac,
			wsVal = wsVal)

	return(PCMG2)

}



 

#adjMat = 1 for bait-bait entries

fisherFUN <- function(x,adjMat,VBPs,VBOs,VPOs,nMax,wsVal=2e7){

	  xVBP <- intersect(x,VBPs)
	  xVBO <- intersect(x,VBOs)
	  xVPO <- intersect(x,VPOs)	
	  
	  nVBP <- length(xVBP)
	  nVBO <- length(xVBO)
	  nVPO <- length(xVPO)


	  if(length(x)<nMax & (nVBP+nVPO)>1){

	  temp <- matrix(adjMat[c(xVBP,xVBO),c(xVBP,xVPO)],ncol=(nVBP+nVPO))
	  rownames(temp) <- c(xVBP,xVBO)
	  colnames(temp) <- c(xVBP,xVPO)

	  bh1 <- colSums(temp)
	  bh1[xVBP] <- bh1[xVBP] - 1
	  bh0 <- colSums(1-temp)

	  if(length(bh0)>15) wsVal <- 2e9

	  ans <- log(fisher.test(rbind(bh0,bh1),workspace=wsVal)$p.value)

	  } else ans <- NA

	  ans
}
#revised function for mergeComplexes using lists rather than matrices

#bhmax is a list of length one names 'maxCliques'
#bhmax is output from the bhmaxSubgraph function
#bhmax$maxCliques is a list of character vectors containing the clique members

#this uses the following functions: adjBinFUN, fisherFUN,
#LCjoinadjBin, LCjoinfisher, and LCjoinLchange

#new function adjusting for viability status


mergeComplexes <- 
function(bhmax,adjMat,VBs=NULL,VPs=NULL,simMat=NULL,sensitivity=.75,specificity=.995,Beta=0,commonFrac=2/3,wsVal 
= 2e7){

	stopifnot("maxCliques" %in% names(bhmax))
	if(!is.null(VBs)) stopifnot(all(VBs %in% rownames(adjMat)))
	if(!is.null(VPs)) stopifnot(all(VPs %in% colnames(adjMat)))

	#create viable bait and prey sets if not specified
	if(is.null(VBs)) VBs <- rownames(adjMat)[rowSums(adjMat)>0]
	if(is.null(VPs)) VPs <- colnames(adjMat)[colSums(adjMat)>0]

	VBPs <- intersect(VBs,VPs)
	VBOs <- setdiff(VBs,VBPs)
	VPOs <- setdiff(VPs,VBPs)

	#initial complex estimates
	complexes <- bhmax$maxCliques

	adjMat <- adjMat[c(VBPs,VBOs),c(VBPs,VPOs)]

	#set parameters
	adjMat[VBPs,VBPs] <- 1
	mu <- log((1-specificity)/specificity)
	alpha <- log(sensitivity/(1-sensitivity))-mu

	#make complex co-membership matrix
	ccMat <- adjMat
	ccMat[VBPs,VBPs] <- pmax(adjMat[VBPs,VBPs],t(adjMat[VBPs,VBPs]))


	#make simMat with entries 0 and diagonal 1 if simMat not specified
	if(is.null(simMat)){
	simMat <- matrix(0,dim(adjMat)[1],dim(adjMat)[2])
	rownames(simMat) <- rownames(adjMat)
	colnames(simMat) <- colnames(adjMat)
	simMat[VBPs,VBPs] <- 1
	}

	#calculate adj binomial and fisher test for initial complexes
	print("calculating initial penalty terms")

	adjBin <-unlist(lapply(complexes,FUN=adjBinFUN,adjMat=adjMat,
	VBPs=VBPs,VBOs=VBOs,VPOs=VPOs,
	mu=mu,alpha=alpha,Beta=Beta,simMat=simMat))

	fisher <- unlist(lapply(complexes,FUN=fisherFUN,adjMat=adjMat,
	VBPs=VBPs,VBOs=VBOs,VPOs=VPOs,
	nMax=20,wsVal=wsVal))

	#start looking at combinations
	print("looking at complex combinations")
	i <- 1 
	K <- length(complexes)

	keepgoing <- i < K
   
 
	while(keepgoing){

	keepgoing2 <- TRUE

	while(keepgoing2){

	#for complex under consideration, 
	#narrow combination candidates to those with common members

	commonFracFUN <- function(x) length(intersect(x,complexes[[i]]))>floor(length(complexes[[i]])*commonFrac)

	testL <- lapply(complexes,FUN=commonFracFUN)
	testset <- which(unlist(testL))
	testset <- testset[-which(testset==i)]
	Ktemp <- length(testset)

	#if there are candidates, then test to see 
	#if penalized likelihood increases when combined	
	if(Ktemp>0){

	#find adjusted binomial for two complexes
	lK2 <- adjBin[i] + adjBin[testset]

	#find adjusted binomial for joined complex
	lK1adjBin <- unlist(lapply(complexes[testset],FUN=LCjoinadjBin,comp=complexes[[i]],adjMat=adjMat,VBPs=VBPs,VBOs=VBOs,VPOs=VPOs,simMat=simMat,mu=mu,alpha=alpha,Beta=Beta))

	#find change in likelihood when adding new edges to graph
	Lchange <-
	unlist(lapply(complexes[testset],FUN=LCjoinLchange,comp=complexes[[i]],complexes=complexes,adjMat=adjMat,ccMat=ccMat,VBPs=VBPs,VBOs=VBOs,VPOs=VPOs,simMat=simMat,mu=mu,alpha=alpha,Beta=Beta))

	#find total change in likelihood
	LCInc1 <- lK1adjBin+Lchange-lK2

	#for combinations where it could reasonably make a difference,
	#look at change attributable to fisher's exact test
	lK1fisher <- rep(0,length(testset))
	dofisher <- which(LCInc1>-20)
	
	if(length(dofisher)>0){

	lKfisher <-
	unlist(lapply(complexes[testset[dofisher]],FUN=LCjoinfisher,comp=complexes[[i]],adjMat=adjMat,VBPs=VBPs,VBOs=VBOs,VPOs=VPOs,nMax=20,wsVal=wsVal))
	

	lK1fisher[dofisher] <- lKfisher - fisher[i] - fisher[testset[dofisher]]
	}
	
	
	#add in fisher's exact component
	LCIncs <- LCInc1+lK1fisher

	#some fisher tests will result in NA if too many proteins
	#replace these with likelihood change without fisher component
	LCIncs[which(is.na(LCIncs))] <- LCInc1[which(is.na(LCIncs))]
	
	same <- sum(LCIncs>0)>0

	#if any combinations increase the likihood, then find the maximum
	#and make the combination
	
	if(same){
		wm <- which.max(LCIncs)
		thisone <- testset[wm]

		combo <- unique(c(complexes[[i]],complexes[[thisone]]))
		complexes[[i]] <- combo		

		#remove combined complex
		complexes <- complexes[-thisone]

		#make corresponding changes to adjBin and fisher vectors
	        adjBin[i] <- lK1adjBin[wm]
		adjBin <- adjBin[-thisone]

		fisher[i] <- lK1fisher[wm]	
		if(is.na(lKfisher[wm])) fisher[i] <- NA
		fisher <- fisher[-thisone]

		#make changes in complex comembership matrix
		comboVB <- intersect(combo,c(VBPs,VBOs))
		comboVP <- intersect(combo,c(VBPs,VPOs))
		ccMat[comboVB,comboVP] <- 1


		K <- length(complexes)
		if(thisone<i) i <- i-1
		

	}
	}else same <- FALSE
	keepgoing2 <- same
	}

	
	i <- i+1
	keepgoing <- i < K

	#print(paste("i",i,"K",K))

}
nC <- length(complexes)
names(complexes) <- paste("Complex",1:nC,sep="")
return(complexes)

}
#function to sort complex estimates into MBME, SBMH, UnRBB

sortComplexes <- function(PCMG,adjMat){

   diag(adjMat) <- 0
   bNames <- rownames(adjMat)
   nComps <- length(PCMG)

   nBFUN <- function(x) sum(x %in% bNames)
   nBs <- unlist(lapply(PCMG,FUN=nBFUN))
   nT <- unlist(lapply(PCMG,FUN=length))
	    
   SBMHi <- which(nBs==1)
   SBMH <- PCMG[SBMHi]

   UnRBBi <- which(nBs==2 & nT==2)
   keep <- rep(TRUE,length(UnRBBi))
   for (i in 1:length(UnRBBi)){
       tBs <- PCMG[[UnRBBi[i]]]
       tVBPs <- tBs[tBs %in% colnames(adjMat)]
       keep[i] <- sum(adjMat[tBs,tVBPs])==1
   }   
   UnRBBi <- UnRBBi[keep]
   UnRBB <- PCMG[UnRBBi]

   MBMEi <- c(1:nComps)[!(1:nComps) %in% c(SBMHi,UnRBBi)]
   MBME <- PCMG[MBMEi]

   if(length(MBMEi)>0){
   names(MBME) <- paste("MBME",1:length(MBMEi),sep="")
   } else MBME <- NA

   if(length(SBMHi)>0){
   names(SBMH) <- paste("SBMH",1:length(SBMHi),sep="")
   } else SBMH <- NA

   if(length(UnRBBi)>0){
   names(UnRBB) <- paste("UnRBB",1:length(UnRBBi),sep="")
   } else UnRBB <- NA


   sComps <- list(MBME=MBME,SBMH=SBMH,UnRBB=UnRBB)

   return(sComps)

}
.First.lib <- function(libname,pkgname,where){
	require("graph") || stop("Need package graph")
	require("RBGL") || stop("Need package RBGL")
}

