.packageName <- "simulatorAPMS"
applyDeformationError <- function(foundPrey, rateDeform){

    ##This function takes in the set of proteins interacting with
    ##a particular bait in the TSN. From this set, the function
    ##randomly selects a subset based on rateDeform and this subset
    ##is returned as the FN Deformation errors. NB, the bait has been
    ##identified as deformed by the user.
    
    numPrey <- length(foundPrey)
    numMissedPrey <- round(rateDeform*numPrey)
    missedPreyIndices <- sample(numPrey, numMissedPrey)
    missedPrey <- foundPrey[missedPreyIndices]
    missedPrey
}
applyFNErrors <- function(foundPrey, rateFN){

    ##This function takes in the set of proteins interacting with
    ##a particular bait in the TSN. From this set, the function
    ##randomly selects a subset based on rateFN and this subset
    ##is returned as the FN errors.
    
    numPrey = length(foundPrey)
    numFN = round(numPrey*rateFN)
    proteinsFNIndices <- sample(numPrey, numFN)
    proteinsFN <- foundPrey[proteinsFNIndices]
    proteinsFN

}
applyFPErrors <- function(notFoundProt, rateFP){

    ##This function takes in the set of proteins not interacting with
    ##a particular bait in the TSN. From this set, the function
    ##randomly selects a subset based on rateFP and this subset
    ##is returned as the FP errors.
    
    numNotFoundProt = length(notFoundProt)
    numFP = round(numNotFoundProt*rateFP)
    proteinsFPIndices <- sample(numNotFoundProt, numFP)
    proteinsFP <- notFoundProt[proteinsFPIndices]
    proteinsFP

}
applyStickyError <- function(notFoundProt, rateSticky){

    ##This function takes in the set of proteins not interacting with
    ##a particular bait in the TSN. From this set, the function
    ##randomly selects a subset based on rateSticky and this subset
    ##is returned as the FP Sticky errors. NB, the bait has been
    ##identified as sticky by the user.
    
    numNotFoundProt <- length(notFoundProt)
    numFoundProtInErr <- round(rateSticky*numNotFoundProt)
    foundProtInErrIndices <- sample(numNotFoundProt, numFoundProtInErr)
    ProtInErr <- notFoundProt[foundProtInErrIndices]
    ProtInErr

}
    
compIndep <- function(TSNMat, erMat, intersectMat){

    N = nrow(TSNMat)
    jointProb <- intersectMat/N
    howIndep <- jointProb


    TSNProb = (colSums(TSNMat))/N
    
    erProb = (colSums(erMat))/N

    prodProb = TSNProb %*% t(erProb)

    howIndep = jointProb * log(jointProb/prodProb)

    for (i in 1:nrow(howIndep)){
        for (j in 1:ncol(howIndep)){
            if (is.nan(howIndep[i,j])){
                howIndep[i,j] = 0
            }
        }
    }
            

    howIndep

}
recordSticky <- function(simMat, vSticky, vBaits){

    ##We will remove prey that are known to be sticky (or companion or
    ##involved in gluconeogenesis)


    nBaitsP1 = length(vBaits)+1
    ##Because of apComplex, we cannot remove sticky baits...but QC should
    ##have made sure that sticky baits were never used
    stickyNBaits = setdiff(vSticky, vBaits)

    ##vecStickyProt returns the column indices for those prey that
    ##are seen by more than a preselected number of baits
    vecStickyProt = which(colSums(simMat[,nBaitsP1:ncol(simMat)]) > 100)
    namesSP = names(vecStickyProt)

    if (length(stickyNBaits)!= 0){
        ##we will check to see if the sticky proteins have already
        ##been spoken for
        check = stickyNBaits %in% namesSP
        
        if (check == FALSE){

            ##otherwise, we get all the rest
            rest = setdiff(stickyNBaits, namesSP)
            namesInd = which(colnames(simMat) == rest)
            vecStickyProt = c(vecStickyProt, namesInd)

        }
    }
    #print(vecStickyProt)
    vecStickyProt

}
runAPComplex <- function(errorModel, vBaits){

    ##This function prepares the error model simulation output
    ##as an imput for apComplex. To reduce the size of the
    ##matrix, any column with trivial sum is removed since
    ##this implies that the protein indexing that particular
    ##column is never found by any baits used in the AP-MS
    ##technology, and hence, would not be recorded in the
    ##wet-lab experiment. The return value is the matrix
    ##representation of the bipartite graph.

    library(apComplex)

    prot = colnames(errorModel)
    notBaits = setdiff(prot,vBaits)
    newIndexing = c(vBaits, notBaits)
    errorModel = errorModel[,newIndexing]
    
    n = length(vBaits)
    diag(errorModel) = 1
    cIndToDel = which(colSums(errorModel) == 0 )
    if (length(cIndToDel) != 0){
        errorModel = errorModel[,-cIndToDel]
    }
    errorComplex = findComplexes(errorModel)
    errComp <- createGOMatrix(errorComplex)
    removeProt = which(colSums(errComp) == 1)
    if (length(removeProt) != 0){
        errComp = errComp[,-removeProt]
    }
    errComp

}



##should it be all proteins or just those in non-trivial complexes?

runCompIndep <- function (TSNMat, estMat, intersectMat){

    ##This function prepares two matrix representation of bipartite
    ##graphs for comparison. We need the rows of TSNMat and erComplex
    ##to be identical if reasonable comparisons can be made. To that
    ##end, both TSNMat and erComplex are extended so that all proteins
    ##are listed and in the same order.
    
    rNamesTSN = rownames(TSNMat)
    rNamesEr = rownames(estMat)

    ##grabbing names of proteins in one matrix and not the other...
    onlyInTSN = setdiff(rNamesTSN, rNamesEr)
    onlyInEr = setdiff(rNamesEr, rNamesTSN)

    ##creating zero matrices indexed by the two sets above....
    apendTSN = matrix(0, nrow = length(onlyInEr), ncol = ncol(TSNMat))
    dimnames(apendTSN) = list(onlyInEr, colnames(TSNMat))
    apendEr = matrix(0, nrow = length(onlyInTSN), ncol = ncol(estMat))
    dimnames(apendEr) = list(onlyInTSN, colnames(estMat))

    ##binding the matrices created above to TSNMat and erComplex...
    TSNMat = rbind(TSNMat, apendTSN)
    estMat = rbind(estMat, apendEr)

    ##aggregate set of preteins listed in some order
    unionNames = union(rNamesTSN, rNamesEr)

    ##makes sure ordering in TSN and erComplex are identical...
    TSNMat = TSNMat[unionNames,]
    estMat = estMat[unionNames,]

    ##calls the compIndep fucntion


    compIndep(TSNMat, estMat, intersectMat)
    

}
runSimulators <- function(TSNMat, vBaits, vDeform=NULL, vStky=NULL, rateFP, rateFN, rateD, rateS, missedProt=NULL, seedIn){

    ## FIXME: Tony need to fix this
    ##data("filteredProt")
    ##test = intersect(vBaits, filteredProt)
    ##if(length(test) != 0){
    ##    stop("You have used these (sticky/companion/involved in gluconeogenesis) proteins as bait.")
    ##}
    
    ##This function sets up all the necessary hoopla so that the imputs are
    ##in the correct form to call the other simulator functions...

    ##seed so one can regenerate random things...
    set.seed(seedIn)
    tmpMatrix = TSNMat
    
    ##The TSNMat will generally include complexes of size 2 or higher...so it
    ##is necessary to re-enter the proteins that are singleton complexes (or
    ##so this is what we posit). The if statement appends rows indexed by such
    ##proteins that don't belong to any non-trivial complex.
    if (length(missedProt) != 0){

        colNames = colnames(TSNMat)
        nRows = nrow(TSNMat)
        nCols = ncol(TSNMat)
        numMissedProt = length(missedProt)
        appendMat = matrix(0, nrow = numMissedProt, ncol = nCols)

        dimnames(appendMat) <- list(missedProt, colNames)
        tmpMatrix = rbind(TSNMat, appendMat)

    }

    ##We calculate the protein-protein complex co-membership
    ##matrix using the appended TSNMat (this is going from incidence to adjacency).
    ##Because we only care about interactions or not (and not how many) we change
    ##any nonzero element to unity.
    PPIMat = tmpMatrix %*% t(tmpMatrix)
    mode(PPIMat) = "logical"
    mode(PPIMat) = "numeric"

    ##In the experiment, only the baits will be present in the rows of the PPI matrix
    ##since these are the proteins the user has preselected to conduct the experiment.
    numBaits = length(vBaits)
    exMat = PPIMat[vBaits,]
    
    ##For each bait, we call the simulator function to generate the random FP/FN errors
    ##on each row indexed by that bait.
    for (i in 1:numBaits){

        exMat = simulator(tmpMatrix, exMat, vBaits[i], rateFP, rateFN)

    }
    ##If the user enters known deformed baits, then this function calls the deformation error
    ##simulator to generate FN's due to deformation.
    if (!is.null(vDeform)){
    
        exMat = simulatorD(vDeform, rateD, TSNMat, exMat)
    }

    ##If the user enters known sticky proteins, then this function calls the sticky error
    ##simulator to generate FP's due to stickyiness. We have a small list of proteins
    ##known to be either sticky, a companion, or involved in gluconeogensis which we
    ##called filteredProt that we attach to the sticky prot list. Note that sticky
    ##proteins should not be used as baits since this is handled in the QC.


    ##vStky = c(vStky, filteredProt)
    
    if (!is.null(vStky)){

        exMat = simulatorS(vStky, rateS, exMat)
    }

    ##Each bait should always interact with itself by definition.
    diag(exMat[1:length(vBaits), 1:length(vBaits)]) = 1
    print(dim(exMat))
    rS = recordSticky(exMat, vStky, vBaits)
    #print(rS)
    if(length(rS) != 0){
        exMat = exMat[,-rS]
    }

    exMat
   

}

simulator <- function(TSNMat, exMat, bait, rateFP, rateFN){


    ##find the complexes that contain the bait of interest
    colIndices <- which(TSNMat[bait,] == 1)


    ##now we take all the proteins that interact with this bait in the TSN
    ##as well as all those that do not

    foundPrey = which(exMat[bait,] == 1)
    notFoundByThisBait = which(exMat[bait,] == 0)
    
    ##using only the non-interacting proteins, we simulate FP errors
    falsePositive <- applyFPErrors(notFoundByThisBait, rateFP)
    exMat[bait, falsePositive] = 1

    ##using only the interacting proteins, we simulate FN errors
    falseNegative <- applyFNErrors(foundPrey, rateFN)
    exMat[bait, falseNegative] = 0

    exMat
}



    #old code
    #colIndices = NULL
    #for(n in 1:length(bait)){
    #    colIndices <- union(colIndices, which(TSNMat[bait[n],] == 1))
    #}




simulatorD <- function(deformedBaits, rateD, TSNMat, exMat){

    ##Use a uniform rate for deformation error...
    if (length(rateD) == 1){

        rateD = rep(rateD, length(deformedBaits))
    }

    ##We need to either use a uniform rate for all deformed baits
    ##or we should use a unique rate for each deformed bait
    if (length(rateD) != 1 && length(rateD) != length(deformedBaits)){

        stop("The length of rateD must be either a singleton or equal to the number of deformed baits")
    }

    ##This part of the function partitions the proteins into those interacting with the deformed
    ##bait in the TSN and those that do not. The deformation errors are generated on only those
    ##proteins that do interact with the bait in the TSN.
    for (i in 1:length(deformedBaits)){

        foundPrey = which(exMat[deformedBaits[i],] == 1)
        foundPrey = setdiff(foundPrey, deformedBaits[i])
        missPrey = applyDeformationError(foundPrey, rateD[i])
        exMat[deformedBaits[i], missPrey] = 0
    }

    exMat
}


        #old code 
        #prey <- which(PPIMat[deformedBaits[i],] == 1)
        #prey <- prey[-deformedBaits[i]]
        #missedPrey <- applyDeformationError(prey, rateD[i])
        #PPIMat[deformedBaits[i], missedPrey] = 0
        
simulatorS <- function(sticky, rateS, exMat){

    ##This function takes the sticky proteins and generates errors to the
    ##accompaning baits. These will be the proteins which is in need of filtering.
    
    if (length(rateS) != 1){

        stop("The length of rateS should be a single value")
    }

    for (i in 1:length(sticky)){

        notFound = which(exMat[,sticky[i]] == 0)
        #notFound = setdiff(notFound,sticky[i])
        err = applyStickyError(notFound, rateS)
        exMat[err, sticky[i]] = 1
    }

    exMat
}


         
