suppressPackageStartupMessages(library(optparse))
suppressPackageStartupMessages(library(futile.logger))
suppressPackageStartupMessages(library(BiocParallel))

### Parsing command line ------------------------------------------------------

option_list <- list(
    make_option(c("--bam"), action="store", type="character", default=NULL,
        help="Input BAM file"),
    make_option(c("--bai"), action="store", type="character", default=NULL,
        help="BAM index file. Only necessary for non-standard file naming."),
    make_option(c("--coverage"), action="store", type="character", default=NULL,
        help="Input coverage file (supported file formats are GATK and CNVkit)"),
    make_option(c("--gcgene"), action="store", type="character", default=NULL,
        help="Interval file as generated by IntervalFile.R"),
    make_option(c("--method"), action="store", 
        default=formals(PureCN::correctCoverageBias)$method[[2]], 
        help="GC-normalization method [default %default]"),
    make_option(c("--keepduplicates"), action="store_true", 
        default=formals(PureCN::calculateBamCoverageByInterval)$keep.duplicates, 
        help="Count reads marked as duplicates [default %default]"),
    make_option(c("--outdir"), action="store", type="character", default=NULL,
        help="Output directory to which results should be written"),
    make_option(c("--cpu"), action="store", type="integer", default=1,
        help="Number of CPUs to use when --bam is a list of BAM files [default %default]"),
    make_option(c("--seed"), action="store", type="integer", default=NULL,
        help="Seed for random number generator [default %default]"),
    make_option(c("-v", "--version"), action="store_true", default=FALSE, 
        help="Print PureCN version"),
    make_option(c("-f", "--force"), action="store_true", default=FALSE, 
        help="Overwrite existing files")
)

opt <- parse_args(OptionParser(option_list=option_list))

if (opt$version) {
    message(as.character(packageVersion("PureCN")))
    q(status=1)
}    

if (!is.null(opt$seed)) {
    set.seed(opt$seed)
}
    
force <- opt$force

bam.file <- opt$bam
index.file <- opt$bai

gatk.coverage <- opt$coverage
gc.gene.file <- opt$gcgene
if (is.null(opt$outdir)) stop("Need --outdir")

outdir <- normalizePath(opt$outdir, mustWork=TRUE)

gc.gene.file <- normalizePath(gc.gene.file, mustWork=TRUE)

### Calculate coverage from BAM files -----------------------------------------

.checkFileList <- function(file) {
    files <- read.delim(file, as.is=TRUE, header=FALSE)[,1]
    numExists <- sum(file.exists(files), na.rm=TRUE)
    if (numExists < length(files)) { 
        stop("File not exists in file ", file)
    }
    files
}

getCoverageBams <- function(bamFiles, indexFiles, outdir, gc.gene.file, 
    force=FALSE, cpu=1, keep.duplicates=FALSE) {

    bamFiles <- bamFiles
    indexFiles <- indexFiles
    outdir <- outdir
    gc.gene.file <- gc.gene.file
    force <- force

    if (cpu>1) { 
        flog.info("Using %i CPUs with %s.", cpu, class(bpparam())[1])
    }
    .getCoverageBam <- function(bam.file, index.file=NULL, outdir, 
        gc.gene.file, force) {
        output.file <- file.path(outdir,  gsub(".bam$","_coverage.txt", 
            basename(bam.file)))
        futile.logger::flog.info("Processing %s...", output.file)
        if (!is.null(index.file)) {
            index.file <- normalizePath(index.file, mustWork=TRUE)
            index.file <- sub(".bai$", "", index.file)
        } else if (file.exists(sub("bam$", "bai", bam.file))) {
            index.file <- sub(".bam$", "", bam.file)
        } else {    
            index.file <- bam.file
        }    
        #return(output.file)
        if (file.exists(output.file) && !force) {
            futile.logger::flog.info("%s exists. Skipping... (--force will overwrite)", output.file)
        } else {
            PureCN::calculateBamCoverageByInterval(bam.file=bam.file, 
                interval.file=gc.gene.file, output.file=output.file,
                index.file=index.file, keep.duplicates=keep.duplicates)
        }
        output.file
    }
    
    param <- new(class(bpparam()), workers=cpu)
    #param <- bpparam()
    coverageFiles <- unlist(
        bplapply(seq_along(bamFiles), 
            function(i) .getCoverageBam(bamFiles[i], indexFiles[i], outdir, gc.gene.file, force), 
            BPPARAM=param)
    )

    coverageFiles
}

coverageFiles <- NULL
indexFiles <- NULL

flog.info("Loading PureCN...")
suppressPackageStartupMessages(library(PureCN))
    
if (!is.null(bam.file)) {
    bam.file <- normalizePath(bam.file, mustWork=TRUE)
    if (grepl(".list$", bam.file)) {
        bamFiles <- .checkFileList(bam.file)
        if (!is.null(index.file)) { 
            if(!grepl(".list$", index.file)) {
                stop("list of BAM files requires list of BAI files.")
            }
            indexFiles <- .checkFileList(index.file)   
        }
    } else {
        bamFiles <- bam.file
    }    
    if (length(bamFiles) != length(indexFiles) && !is.null(indexFiles)) {
        stop("List of BAM files and BAI files of different length.")
    }    

    coverageFiles <- getCoverageBams(bamFiles, indexFiles, outdir, 
        gc.gene.file, force, opt$cpu, opt$keepduplicates) 

}

### GC-normalize coverage -----------------------------------------------------

.gcNormalize <- function(gatk.coverage, gc.gene.file, method, outdir, force) {
    output.file <- file.path(outdir,  gsub(".txt$|_interval_summary",
        paste0("_", tolower(method), ".txt"), basename(gatk.coverage)))
    outpng.file <- sub("txt$","png", output.file)
    if (file.exists(output.file) && !force) {
        flog.info("%s exists. Skipping... (--force will overwrite)", output.file)
    } else {
        png(outpng.file, width=800)
        correctCoverageBias(gatk.coverage, gc.gene.file,
            output.file=output.file, method=method, plot.gc.bias=TRUE)
        dev.off()
   } 
}

if (!is.null(gatk.coverage) || !is.null(coverageFiles)) {
    # started not from BAMs?
    if (is.null(coverageFiles)) {
        if (grepl(".list$", gatk.coverage)) {
            coverageFiles <- .checkFileList(gatk.coverage)
        } else {
            coverageFiles <- gatk.coverage
        }
    }
    for (gatk.coverage in coverageFiles)     
        .gcNormalize(gatk.coverage, gc.gene.file, opt$method, outdir, force)
}
    
