.packageName <- "simulatorAPMS"
JaccardCoef <- function(dataMat){

    ##The Jaccard Coefficient varies in the closed set [0,1]
    ##and measures similarity based on the statistics calculated in
    ##CompareComplex.R. The Jaccard coefficient is essentially
    ##$\frac{a}{a+b+c}$. See also the Dice-Sorenson Coefficient.
    
    similarityMat = matrix(0.00, nrow = dim(dataMat$intersect)[1], ncol = dim(dataMat$intersect)[2])
    dimnames(similarityMat) = dimnames(dataMat$intersect)

    for (i in 1:dim(dataMat$intersect)[1]){

        for(j in 1:dim(dataMat$intersect)[2]){

            num = dataMat$intersect[i,j]
            denom = dataMat$intersect[i,j] + dataMat$cminusk[i,j] + dataMat$kminusc[i,j]

            if (denom != 0){
                similarityMat[i,j] = num/denom
            }

        }

    }

    similarityMat
}
applyDeformationError <- function(foundPrey, rateDeform){

    ##This function takes in the set of proteins interacting with
    ##a particular bait in the TSN. From this set, the function
    ##randomly selects a subset based on rateDeform and this subset
    ##is returned as the FN Deformation errors. NB, the bait has been
    ##identified as deformed by the user.
    
    numPrey <- length(foundPrey)
    numMissedPrey <- round(rateDeform*numPrey)
    missedPreyIndices <- sample(numPrey, numMissedPrey)
    missedPrey <- foundPrey[missedPreyIndices]
    missedPrey
}
applyFNErrors <- function(foundPrey, rateFN){

    ##This function takes in the set of proteins interacting with
    ##a particular bait in the TSN. From this set, the function
    ##randomly selects a subset based on rateFN and this subset
    ##is returned as the FN errors.
    
    numPrey = length(foundPrey)
    numFN = round(numPrey*rateFN)
    proteinsFNIndices <- sample(numPrey, numFN)
    proteinsFN <- foundPrey[proteinsFNIndices]
    proteinsFN

}
applyFPErrors <- function(notFoundProt, rateFP){

    ##This function takes in the set of proteins not interacting with
    ##a particular bait in the TSN. From this set, the function
    ##randomly selects a subset based on rateFP and this subset
    ##is returned as the FP errors.
    
    numNotFoundProt = length(notFoundProt)
    numFP = round(numNotFoundProt*rateFP)
    proteinsFPIndices <- sample(numNotFoundProt, numFP)
    proteinsFP <- notFoundProt[proteinsFPIndices]
    proteinsFP

}
applyStickyError <- function(notFoundProt, rateSticky){

    ##This function takes in the set of proteins not interacting with
    ##a particular bait in the TSN. From this set, the function
    ##randomly selects a subset based on rateSticky and this subset
    ##is returned as the FP Sticky errors. NB, the bait has been
    ##identified as sticky by the user.
    
    numNotFoundProt <- length(notFoundProt)
    numFoundProtInErr <- round(rateSticky*numNotFoundProt)
    foundProtInErrIndices <- sample(numNotFoundProt, numFoundProtInErr)
    ProtInErr <- notFoundProt[foundProtInErrIndices]
    ProtInErr

}
    
compBijection <- function (TSNMat, estMat, c2kMatrix, bijMat, counter = 1){

    if(nrow(bijMat) != min(dim(c2kMatrix))+ (counter-1)){
        stop("The number of rows in the Bijection Matrix must be equal min(dim(c2kMatrix)).")
    }
    maxEntry = max(c2kMatrix)
    maxInd = which(c2kMatrix == maxEntry, arr.ind = TRUE)
    ##bijMat = matrix(ncol = 2, nrow = nrow(c2kMatrix))

    complexNames = rownames(c2kMatrix)[maxInd[,"row"]]
    komplexNames = colnames(c2kMatrix)[maxInd[,"col"]]

    if (nrow(maxInd) == 1){
        
        #alignedComp <- compAlignment(maxInd, c2kMatrix)
        bijMat[counter, 1] = complexNames
        bijMat[counter, 2] = komplexNames
        bijMat[counter, 3] = maxEntry
        rowToDel = which(rownames(c2kMatrix) == complexNames)
        colToDel = which(colnames(c2kMatrix) == komplexNames)
        c2kMatrix = c2kMatrix[-rowToDel,,drop=FALSE]        
        c2kMatrix = c2kMatrix[, -colToDel, drop=FALSE]
    }

    if (nrow(maxInd) != 1){

        maxc2k = colSums(TSNMat[,complexNames]) + colSums(estMat[,komplexNames])
        best = which(maxc2k == max(maxc2k))
        if (length(best) > 1){
            best = best[1]
        }
        bijMat[counter,1] = complexNames[best]
        bijMat[counter,2] = komplexNames[best]
        bijMat[counter, 3] = maxEntry
        rowToDel = which(rownames(c2kMatrix) == complexNames[best])
        colToDel = which(colnames(c2kMatrix) == komplexNames[best])
        c2kMatrix = c2kMatrix[-rowToDel,, drop=FALSE]
        c2kMatrix = c2kMatrix[,-colToDel, drop=FALSE]

    }

    counter = counter+1
    if ((nrow(c2kMatrix) != 0) && (ncol(c2kMatrix) != 0)){
        bijMat = compBijection(TSNMat, estMat, c2kMatrix, bijMat, counter)
    }

    bijMat
}
    
        
compIndep <- function(TSNMat, erMat, intersectMat){

    N = nrow(TSNMat)
    jointProb <- intersectMat/N
    howIndep <- jointProb


    TSNProb = (colSums(TSNMat))/N
    
    erProb = (colSums(erMat))/N

    prodProb = TSNProb %*% t(erProb)

    howIndep = jointProb * log(jointProb/prodProb)

    for (i in 1:nrow(howIndep)){
        for (j in 1:ncol(howIndep)){
            if (is.nan(howIndep[i,j])){
                howIndep[i,j] = 0
            }
        }
    }
            

    howIndep

}
compareComplex <- function(TSNMat, erMat){

    ##We would like to compare the complexes derived from the error model (via apComplex)
    ##and the True State of Nature complexes we posited. To this end, we calculate three
    ##statistics: (1) the numbers of proteins common to both complex C_i and K_j (denoted by a),
    ##(2) the number of proteins present in C_i but absent in K_j (denoted by b), and (3)
    ## the number of proteins present in K_j and absent in C_i (denoted by c). NB C_i are the
    ##columns of TSNMat, and K_j, erMat. 
    
    identical(rownames(TSNMat),rownames(erMat)) || stop("Rownames of both matrices must be identical.") 
    #dataArray = array(dim = (c(dim(TSNMat)[2], dim(erMat)[2], 3)))
    #dataArray[,,1] = t(TSNMat)  %*%  erMat  ## calculates statistic "a"
    #dataArray[,,2] = t(TSNMat)  %*% !erMat  ## calculates statistic "b"
    #dataArray[,,3] = t(!TSNMat) %*%  erMat  ## calculates statistic "c"
    #dataArray
    dlist <- list(intersect= t(TSNMat)  %*%  erMat,  ## calculates statistic "a",
                  cminusk= t(TSNMat) %*% !erMat,
                  kminusc = t(!TSNMat) %*% erMat )

    dlist
}


#dlist$intersect[i, j]




































                                        #for(i in 1:dim(TSNMat)[2]){
                                        #
                                        #    complex = names(TSNMat[which(TSNMat[,i] == 1),i])
                                        #
                                        #    for (j in 1:dim(erMat)[2]){
                                        #
                                        #   
                                        #        komplex = names(erMat[which(erMat[,j] == 1),j])
                                        #
                                        #        inCNotK = setdiff(complex, komplex)
                                        #        inKNotC = setdiff(komplex, complex)
                                        #
                                        #
                                        #        b = length(inCNotK)
                                        #        c = length(inKNotC)
                                        #
                                        #
                                        #        dataArray[i,j,2] = b
                                        #        dataArray[i,j,3] = c
                                        #
                                        #        
                                        #
                                        #    }
                                        #}
                                        #
                                        #dataArray

recordSticky <- function(simMat, vSticky){

    vecStickyProt = which(rowSums(simMat) > 50)
    namesSP = names(vecStickyProt)

    if (is.null(vSticky) == FALSE){
        check = vSticky %in% namesSP

        if (check == FALSE){
            
            rest = setdiff(vSticky, namesSP)
            namesInd = which(rownames(simMat) == rest)
            vecStickyProt = c(vecStickyProt, namesInd)

        }
    }
   
    vecStickyProt

}
runAPComplex <- function(errorModel, vBaits){

    ##This function prepares the error model simulation output
    ##as an imput for apComplex. To reduce the size of the
    ##matrix, any column with trivial sum is removed since
    ##this implies that the protein indexing that particular
    ##column is never found by any baits used in the AP-MS
    ##technology, and hence, would not be recorded in the
    ##wet-lab experiment. The return value is the matrix
    ##representation of the bipartite graph.

    library(apComplex)

    prot = colnames(errorModel)
    notBaits = setdiff(prot,vBaits)
    newIndexing = c(vBaits, notBaits)
    errorModel = errorModel[,newIndexing]
    
    n = length(vBaits)
    diag(errorModel) = 1
    cIndToDel = which(colSums(errorModel) == 0 )
    if (length(cIndToDel) != 0){
        errorModel = errorModel[,-cIndToDel]
    }
    errorComplex = findComplexes(errorModel)
    removeProt = which(colSums(errorComplex) == 1)
    if (length(removeProt) != 0){
        errorComplex = errorComplex[,-removeProt]
    }
    errorComplex

}


runAlignment <- function(TSNMat, estMat, c2kMat){

    num = min(dim(c2kMat))
    bijMat = matrix(nrow = num, ncol = 3)
    dimnames(bijMat) = list(1:num, c("ISMO-Complexes", "Est-Complexes", "Coefficient"))

    compBijection(TSNMat, estMat, c2kMat, bijMat, 1)



}
##should it be all proteins or just those in non-trivial complexes?

runCompIndep <- function (TSNMat, estMat, intersectMat){

    ##This function prepares two matrix representation of bipartite
    ##graphs for comparison. We need the rows of TSNMat and erComplex
    ##to be identical if reasonable comparisons can be made. To that
    ##end, both TSNMat and erComplex are extended so that all proteins
    ##are listed and in the same order.
    
    rNamesTSN = rownames(TSNMat)
    rNamesEr = rownames(estMat)

    ##grabbing names of proteins in one matrix and not the other...
    onlyInTSN = setdiff(rNamesTSN, rNamesEr)
    onlyInEr = setdiff(rNamesEr, rNamesTSN)

    ##creating zero matrices indexed by the two sets above....
    apendTSN = matrix(0, nrow = length(onlyInEr), ncol = ncol(TSNMat))
    dimnames(apendTSN) = list(onlyInEr, colnames(TSNMat))
    apendEr = matrix(0, nrow = length(onlyInTSN), ncol = ncol(estMat))
    dimnames(apendEr) = list(onlyInTSN, colnames(estMat))

    ##binding the matrices created above to TSNMat and erComplex...
    TSNMat = rbind(TSNMat, apendTSN)
    estMat = rbind(estMat, apendEr)

    ##aggregate set of preteins listed in some order
    unionNames = union(rNamesTSN, rNamesEr)

    ##makes sure ordering in TSN and erComplex are identical...
    TSNMat = TSNMat[unionNames,]
    estMat = estMat[unionNames,]

    ##calls the compIndep fucntion


    compIndep(TSNMat, estMat, intersectMat)
    

}
runCompareComplex <- function(TSNMat, erComplex){

    ##This function prepares two matrix representation of bipartite
    ##graphs for comparison. We need the rows of TSNMat and erComplex
    ##to be identical if reasonable comparisons can be made. To that
    ##end, both TSNMat and erComplex are extended so that all proteins
    ##are listed and in the same order.
    
    rNamesTSN = rownames(TSNMat)
    rNamesEr = rownames(erComplex)

    ##grabbing names of proteins in one matrix and not the other...
    onlyInTSN = setdiff(rNamesTSN, rNamesEr)
    onlyInEr = setdiff(rNamesEr, rNamesTSN)

    ##creating zero matrices indexed by the two sets above....
    apendTSN = matrix(0, nrow = length(onlyInEr), ncol = ncol(TSNMat))
    dimnames(apendTSN) = list(onlyInEr, colnames(TSNMat))
    apendEr = matrix(0, nrow = length(onlyInTSN), ncol = ncol(erComplex))
    dimnames(apendEr) = list(onlyInTSN, colnames(erComplex))

    ##binding the matrices created above to TSNMat and erComplex...
    TSNMat = rbind(TSNMat, apendTSN)
    erComplex = rbind(erComplex, apendEr)

    ##aggregate set of preteins listed in some order
    unionNames = union(rNamesTSN, rNamesEr)

    ##makes sure ordering in TSN and erComplex are identical...
    TSNMat = TSNMat[unionNames,]
    erComplex = erComplex[unionNames,]

    ##calls compareComplex and gets the necessary statistics
    compArray = compareComplex(TSNMat, erComplex)
    compArray

}
    
runSimulators <- function(TSNMat, vBaits, vDeform=NULL, vStky=NULL, rateFP, rateFN, rateD, rateS, missedProt=NULL, seedIn){


    ##This function sets up all the necessary hoopla so that the imputs are
    ##in the correct form to call the other simulator functions...

    ##seed so one can regenerate random things...
    set.seed(seedIn)
    tmpMatrix = TSNMat
    
    ##The TSNMat will generally include complexes of size 2 or higher...so it
    ##is necessary to re-enter the proteins that are singleton complexes (or
    ##so this is what we posit). The if statement appends rows indexed by such
    ##proteins that don't belong to any non-trivial complex.
    if (length(missedProt) != 0){

        colNames = colnames(TSNMat)
        nRows = nrow(TSNMat)
        nCols = ncol(TSNMat)
        numMissedProt = length(missedProt)
        appendMat = matrix(0, nrow = numMissedProt, ncol = nCols)

        dimnames(appendMat) <- list(missedProt, colNames)
        tmpMatrix = rbind(TSNMat, appendMat)

    }

    ##We calculate the protein-protein interaction matrix using the appended TSNMat.
    ##Because we only care about interactions or not (and not how many) we change
    ##any nonzero element to unity.
    PPIMat = tmpMatrix %*% t(tmpMatrix)
    mode(PPIMat) = "logical"
    mode(PPIMat) = "numeric"

    ##In the experiment, only the baits will be present in the rows of the PPI matrix
    ##since these are the proteins the user has preselected to conduct the experiment.
    numBaits = length(vBaits)
    exMat = PPIMat[vBaits, 1:ncol(PPIMat)]
    
    ##For each bait, we call the simulator function to generate the random FP/FN errors
    ##on each row indexed by that bait.
    for (i in 1:numBaits){

        exMat = simulator(tmpMatrix, exMat, vBaits[i], rateFP, rateFN)

    }
    ##If the user enters known deformed baits, then this function calls the deformation error
    ##simulator to generate FN's due to deformation.
    if (is.null(vDeform) == FALSE){
    
        exMat = simulatorD(vDeform, rateD, TSNMat, exMat)
    }

    ##If the user enters known sticky baits, then this function calls the sticky error
    ##simulator to generate FP's due to stickyiness.
    if (is.null(vStky) == FALSE){

        exMat = simulatorS(vStky, rateS, TSNMat, exMat)
    }

    ##Each bait should always interact with itself by definition.
    diag(exMat[1:length(vBaits), 1:length(vBaits)]) = 1

    rS = recordSticky(exMat, vStky)
    if(length(rS) != 0){
        exMat = exMat[-rS,]
        exMat = exMat[,-rS]
    }

    exMat
   

}

simulator <- function(TSNMat, exMat, bait, rateFP, rateFN){


    ##find the complexes that contain the bait of interest
    colIndices <- which(TSNMat[bait,] == 1)


    ##now we take all the proteins that interact with this bait in the TSN
    ##as well as all those that do not

    foundPrey = which(exMat[bait,] == 1)
    notFoundByThisBait = which(exMat[bait,] == 0)
    
    ##using only the non-interacting proteins, we simulate FP errors
    falsePositive <- applyFPErrors(notFoundByThisBait, rateFP)
    exMat[bait, falsePositive] = 1

    ##using only the interacting proteins, we simulate FN errors
    falseNegative <- applyFNErrors(foundPrey, rateFN)
    exMat[bait, falseNegative] = 0

    exMat
}



    #old code
    #colIndices = NULL
    #for(n in 1:length(bait)){
    #    colIndices <- union(colIndices, which(TSNMat[bait[n],] == 1))
    #}




simulatorD <- function(deformedBaits, rateD, TSNMat, exMat){

    ##Use a uniform rate for deformation error...
    if (length(rateD) == 1){

        rateD = rep(rateD, length(deformedBaits))
    }

    ##We need to either use a uniform rate for all deformed baits
    ##or we should use a unique rate for each deformed bait
    if (length(rateD) != 1 && length(rateD) != length(deformedBaits)){

        stop("The length of rateD must be either a singleton or equal to the number of deformed baits")
    }

    ##This part of the function partitions the proteins into those interacting with the deformed
    ##bait in the TSN and those that do not. The deformation errors are generated on only those
    ##proteins that do interact with the bait in the TSN.
    for (i in 1:length(deformedBaits)){

        foundPrey = which(exMat[deformedBaits[i],] == 1)
        foundPrey = setdiff(foundPrey, deformedBaits[i])
        missPrey = applyDeformationError(foundPrey, rateD[i])
        exMat[deformedBaits[i], missPrey] = 0
    }

    exMat
}


        #old code 
        #prey <- which(PPIMat[deformedBaits[i],] == 1)
        #prey <- prey[-deformedBaits[i]]
        #missedPrey <- applyDeformationError(prey, rateD[i])
        #PPIMat[deformedBaits[i], missedPrey] = 0
        
simulatorS <- function(stickyBaits, rateS, TSNMat, exMat){

   ##Use a uniform rate for sticky error...        
    if (length(rateS) == 1){

        rateS = rep(rateS, length(stickyBaits))
    }
    
    ##We need to either use a uniform rate for all sticky baits
    ##or we should use a unique rate for each sticky bait
    if (length(rateS) != 1 && length(rateS) != length(stickyBaits)){

        stop("The length of rateS should be either a singleton or the same as the number of sticky baits.")
    }


    ##This part of the function partitions the proteins into those interacting with the sticky
    ##bait in the TSN and those that do not. The sticky errors are generated on only those
    ##proteins that do not interact with the bait in the TSN.    
    for (i in 1:length(stickyBaits)){

        notFound = which(exMat[stickyBaits[i],] == 0)
        notFound = setdiff(notFound,stickyBaits[i])
        errPrey = applyStickyError(notFound, rateS[i])
        exMat[stickyBaits[i], errPrey] = 1
    }

    exMat
}


        #old code
        #notPrey <- which(PPIMat[stickyBaits[i], ] == 0)
        #errPrey <- applyStickyError(notPrey, rateS[i])
        #PPIMat[stickyBaits[i], errPrey] = 1
        
