## ----style, eval=TRUE, echo=FALSE, results="asis"---------------------------------------
BiocStyle::latex()

## ----knitr, echo=FALSE, results="hide"--------------------------------------------------
library("knitr")
opts_chunk$set(tidy=FALSE,dev="png",fig.show="hide",
               fig.width=4,fig.height=4.5,
               message=FALSE, highlight=FALSE)

## ----Quick start------------------------------------------------------------------------
library(STAN)
data(example)
# Fitting a HMM
hmm_fitted = fitHMM(observations, hmm_ex)
# Fitting a bdHMM
bdhmm_fitted = fitHMM(observations, bdhmm_ex, dirFlags=flags)
# Calculating the viterbi paths
viterbi_bdhmm = getViterbi(bdhmm_fitted$hmm, observations)
viterbi_hmm = getViterbi(hmm_fitted$hmm, observations)

## ----Loading library, results="hide"----------------------------------------------------
library(STAN)

## ----yeast example data, echo=TRUE------------------------------------------------------
data(yeastTF_databychrom_ex)
str(yeastTF_databychrom_ex)
data(yeastTF_probeAnno_ex)
str(yeastTF_probeAnno_ex)

## ----State labeling yeast, echo=TRUE----------------------------------------------------
stateLabel = c("F1", "F2", "F3", "F4", "F5", "R1", "R2", "R3", "R4", "R5", "U1", "U2")

## ----Initial emissions, echo=TRUE-------------------------------------------------------
nStates = 12
data(yeastTF_initMeans)
data(yeastTF_initCovs)
gaussEmission = HMMEmission(type="Gaussian",
                            parameters=list(mean=yeastTF_initMeans, cov=yeastTF_initCovs),
                            nStates=nStates)
gaussEmission

## ----Initial transitions, echo=TRUE-----------------------------------------------------
transMat = matrix(1/nStates, nrow=nStates, ncol=nStates)
initProb = rep(1/nStates, nStates)

## ----Defining the yeast model, echo=TRUE, results="hide"--------------------------------
dirobs = as.integer(c(rep(0,10), 1, 1))

## ----Fitting the bdHMM to yeast data, echo=TRUE, results="hide"-------------------------
bdhmm = bdHMM(initProb=initProb, transMat=transMat,
        emission=gaussEmission, nStates=nStates,
        status="initial", stateLabel=stateLabel,
        transitionsOptim="analytical", directedObs=dirobs)
bdhmm_fitted = fitHMM(yeastTF_databychrom_ex, bdhmm, maxIters=100, verbose=FALSE)

## ----Plotting fitted in yeast, echo=TRUE, eval=FALSE, results="hide"--------------------
#  means_fitted = do.call("rbind",bdhmm_fitted$hmm@emission@parameters$mean, )[c(1:5, 11:12),]
#  library(gplots)
#  heat = c("dark green", "dark green", "dark green", "gold", "orange", "red")
#  colfct = colorRampPalette(heat)
#  colpal = colfct(200)
#  colnames(means_fitted) = colnames(yeastTF_databychrom_ex[[1]])
#  rownames(means_fitted) = c(paste("F", 1:5, sep=""), paste("U", 1:2, sep=""))
#  heatmap.2(means_fitted, col=colpal, trace="none", cexCol=0.9, cexRow=0.9,
#            cellnote=round(means_fitted,1), notecol="black", dendrogram="row",
#            Rowv=TRUE, Colv=FALSE, notecex=0.9)

## ----Plotting the means, echo=FALSE, results="hide"-------------------------------------
library(gplots)
pdf("means.pdf")
means_fitted = do.call("rbind",bdhmm_fitted$hmm@emission@parameters$mean, )[c(1:5, 11:12),]
library(gplots)
heat = c("dark green", "dark green", "dark green", "gold", "orange", "red")
colfct = colorRampPalette(heat)
colpal = colfct(200)
colnames(means_fitted) = colnames(yeastTF_databychrom_ex[[1]])
rownames(means_fitted) = c(paste("F", 1:5, sep=""), paste("U", 1:2, sep=""))
heatmap.2(means_fitted, col=colpal, trace="none", cexCol=0.9, cexRow=0.9,
          cellnote=round(means_fitted,1), notecol="black", dendrogram="row", 
          Rowv=TRUE, Colv=FALSE, notecex=0.9)
dev.off()

## ----Calculating viterbi path in yeast, echo=TRUE, results="hide"-----------------------
yeastTF_viterbi = getViterbi(bdhmm_fitted$hmm, yeastTF_databychrom_ex)
str(yeastTF_viterbi)

## ----Retrieving UCSC genome annotation, echo=TRUE, eval=FALSE, results="hide"-----------
#  library(Gviz)
#  chr = "chrIV"
#  gen = "sacCer3"
#  gtrack <- GenomeAxisTrack()
#  yeastTF_SGDGenes <- UcscTrack(genome = gen, chromosome = chr,
#          track = "SGD Gene", from=1217060, to=1225000, trackType = "GeneRegionTrack",
#          rstarts = "exonStarts", rends = "exonEnds", gene = "name",
#          symbol = "name", transcript = "name", strand = "strand",
#          fill = "blue", name = "SGD Genes", col="black")

## ----Creating yeast Gviz data tracks, echo=TRUE, eval=FALSE, results="hide"-------------
#  faccols = hcl(h = seq(15, 375 - 360/dim(yeastTF_databychrom_ex$chr04)[2],
#                length = dim(yeastTF_databychrom_ex$chr04)[2])%%360, c = 100, l = 65)
#  names(faccols) = colnames(yeastTF_databychrom_ex$chr04)
#  
#  dlist=list()
#  for(n in colnames(yeastTF_databychrom_ex$chr04)) {
#      dlist[[n]] = DataTrack(data = yeastTF_databychrom_ex$chr04[,n],
#                   start = yeastTF_probeAnno_ex$chr04,
#                   end = yeastTF_probeAnno_ex$chr04+8,
#                   chromosome = "chrIV", genome=gen,
#                   name = n, type="h", col=faccols[n])
#  }

## ----Creating yeast Gviz viterbi tracks, echo=TRUE, eval=FALSE, results="hide"----------
#  library(GenomicRanges)
#  library(IRanges)
#  myViterbiDirs = list(F=c("F1", "F2", "F3", "F4", "F5"), U=c("U1", "U2"),
#                       R=c("R1", "R2", "R3", "R4", "R5"))
#  myViterbiPanels = list()
#  cols = rainbow(7)
#  cols = cols[c(1:5,1:5,6:7)]
#  names(cols) = stateLabel
#  
#  for(dir in c("F", "U", "R")) {
#      myPos = yeastTF_probeAnno_ex$chr04 >= 1217060 & yeastTF_probeAnno_ex$chr04 <= 1225000
#      myRle = Rle(yeastTF_viterbi$chr04[myPos])
#      currItems = which(myRle@values %in% myViterbiDirs[[dir]])
#  
#      start = yeastTF_probeAnno_ex$chr04[myPos][start(myRle)][currItems]
#      width = myRle@lengths[currItems]
#      ids = as.character(myRle@values[currItems])
#      values = as.character(myRle@values[currItems])
#  
#      myViterbiPanels[[dir]] = AnnotationTrack(range=GRanges(seqnames=rep("chrIV",
#          length(currItems)), ranges=IRanges(start=start, width=width*8, names=values)),
#          genome=gen, chromosome=chr, name=paste("Viterbi\n", "(", dir, ")", sep=""),
#          id=ids[order(start)], shape="box",fill=cols[values[order(start)]], col="black",
#          stacking="dense")
#  }

## ----Gviz plot  yeast, echo=TRUE, eval=FALSE, results="hide"----------------------------
#  sizes = rep(1,17)
#  sizes[14] = 2
#  sizes[15:17] = 0.7
#  sizes[1] = 1.3
#  plotTracks(c(list(gtrack), dlist, list(yeastTF_SGDGenes), myViterbiPanels),
#             from=1217060, to=1225000, sizes=sizes, showFeatureId=TRUE, featureAnnotation="id",
#             fontcolor.feature="black", cex.feature=0.7, background.title="darkgrey", showId=TRUE)

## ----Plotting the result2, echo=FALSE, results="hide"-----------------------------------
#library(colorspace)
pdf("yeast_ex.pdf", width=7*1.2, height=7*1.5)
library(Gviz)
chr = "chrIV"
gen = "sacCer3"
gtrack <- GenomeAxisTrack()
data(yeastTF_SGDGenes)


faccols = hcl(h = seq(15, 375 - 360/dim(yeastTF_databychrom_ex$chr04)[2], 
				length = dim(yeastTF_databychrom_ex$chr04)[2])%%360, c = 100, l = 65)
names(faccols) = colnames(yeastTF_databychrom_ex$chr04)

dlist=list()
for(n in colnames(yeastTF_databychrom_ex$chr04)) {
	dlist[[n]] = DataTrack(data = yeastTF_databychrom_ex$chr04[,n], 
			start = yeastTF_probeAnno_ex$chr04, 
			end = yeastTF_probeAnno_ex$chr04+8, 
			chromosome = "chrIV", genome=gen,
			name = n, type="h", col=faccols[n])
}

library(GenomicRanges)
library(IRanges)
myViterbiDirs = list(F=c("F1", "F2", "F3", "F4", "F5"), U=c("U1", "U2"), 
		R=c("R1", "R2", "R3", "R4", "R5"))
myViterbiPanels = list()
cols = rainbow(7)
cols = cols[c(1:5,1:5,6:7)]
names(cols) = stateLabel

for(dir in c("F", "U", "R")) {
	myPos = yeastTF_probeAnno_ex$chr04 >= 1217060 & yeastTF_probeAnno_ex$chr04 <= 1225000
	myRle = Rle(yeastTF_viterbi$chr04[myPos])
	currItems = which(myRle@values %in% myViterbiDirs[[dir]])
	
	start = yeastTF_probeAnno_ex$chr04[myPos][start(myRle)][currItems]
	width = myRle@lengths[currItems]
	ids = as.character(myRle@values[currItems])
	values = as.character(myRle@values[currItems])
	
	myViterbiPanels[[dir]] = AnnotationTrack(range=GRanges(seqnames=rep("chrIV", 
							length(currItems)), ranges=IRanges(start=start, width=width*8, names=values)),
			genome=gen, chromosome=chr, name=paste("Viterbi\n", "(", dir, ")", sep=""), 
			id=ids[order(start)], shape="box",fill=cols[values[order(start)]], col="black", 
			stacking="dense")
}

sizes = rep(1,17)
sizes[14] = 2
sizes[15:17] = 0.7
sizes[1] = 1.3
plotTracks(c(list(gtrack), dlist, list(yeastTF_SGDGenes), myViterbiPanels), 
		from=1217060, to=1225000, sizes=sizes, showFeatureId=TRUE, featureAnnotation="id", 
		fontcolor.feature="black", cex.feature=0.7, background.title="darkgrey", showId=TRUE)

dev.off()

## ----Loading human data, echo=TRUE, results="hide"--------------------------------------
data(humanCD4T_signal_ex)
data(humanCD4T_probeAnno_ex)
nStates = 8
stateLabel = c("F1", "F2", "F3", "F4", "R1", "R2", "R3", "R4")

## ----Showing the flag sequence, echo=TRUE-----------------------------------------------
data(humanCD4T_flags_ex)
humanCD4T_flags_ex$chr7[600:650]

## ----Fitting the bdHMM to the human CD4T cell data, echo=TRUE, results="hide"-----------
data(humanCD4T_initMeans)
data(humanCD4T_initCovs)
gaussEmission = HMMEmission(type="Gaussian",
		parameters=list(mean=humanCD4T_initMeans, cov=humanCD4T_initCovs),
		nStates=nStates)
transMat = matrix(1/nStates, nrow=nStates, ncol=nStates)
initProb = rep(1/nStates, nStates)
dirobs = as.integer(rep(0,13))
bdhmm = bdHMM(initProb=initProb, transMat=transMat,
		emission=gaussEmission, nStates=nStates,
		status="initial", stateLabel=stateLabel,
		transitionsOptim="analytical", directedObs=dirobs)
mybdHMM_CD4T = fitHMM(humanCD4T_signal_ex, bdhmm, maxIters=1000, dirFlags=humanCD4T_flags_ex)

## ----Defining state directionality, echo=TRUE, results="hide", eval=FALSE---------------
#  posterior_bdhmm = getPosterior(mybdHMM_CD4T$hmm, obs=humanCD4T_signal_ex)
#  
#  myTwins = 5:8
#  score = rep(0,4)
#  for(i in 1:(nStates/2)) {
#      numer = sum(abs(posterior_bdhmm$chr7[,i]-posterior_bdhmm$chr7[,myTwins[i]]))
#      denom = sum((posterior_bdhmm$chr7[,i]+posterior_bdhmm$chr7[,myTwins[i]]))
#      score[i] = numer/denom
#  }
#  
#  names(score) = c("F1/R1", "F2/R2", "F3/R3", "F4/R4")
#  barplot(score, col=rainbow(4), cex.names=0.8, ylim=c(0,1))
#  abline(h=0.5, lty=3)

## ----Calculating the viterbi path, echo=TRUE, results="hide"----------------------------
humanCD4T_viterbi = getViterbi(mybdHMM_CD4T$hmm, humanCD4T_signal_ex)
stateLabel2 = c("F1", "F2", "U1", "U2", "R1", "R2", "U1", "U2")
humanCD4T_viterbi_collapsed = lapply(humanCD4T_viterbi, 
                              function(chrom) factor(stateLabel2[chrom]))

## ----Plotting data and viterbi state path 1, echo=TRUE, results="hide", eval=FALSE------
#  library(Gviz)
#  data(humanCD4T_ucscGenes)
#  chr = "chr7"
#  gen = "hg18"
#  gtrack <- GenomeAxisTrack()
#  humanCD4T_ideogramChr7 <- IdeogramTrack(genome = gen, chromosome = chr)
#  
#  faccols = c("#FB9EB1", "#FB9EB1", "#FB9EB1", "#FB9EB1", "#FB9EB1", "#DAB36A", "#DAB36A",
#              "#DAB36A", "#7FBFF5", "#7FBFF5", "#7FBFF5", "#7FBFF5", "#7FBFF5")
#  names(faccols) = colnames(humanCD4T_signal_ex$chr7)
#  dlist=list()
#  for(n in colnames(humanCD4T_signal_ex$chr7)) {
#      dlist[[n]] = DataTrack(data=humanCD4T_signal_ex$chr7[,n], start=humanCD4T_probeAnno_ex$chr7,
#                   end = humanCD4T_probeAnno_ex$chr7+200, chromosome="chr7", genome=gen, name=n,
#                   type="h", col=faccols[n])
#  }
#  
#  myViterbiDirs = list(F=c("F1", "F2"), U=c("U1", "U2"), R=c("R1", "R2"))
#  myViterbiPanels = list()
#  cols = rainbow(4)
#  cols = cols[c(1:2,1:2,3:4)]
#  
#  for(dir in c("F", "U", "R")) {
#      myRle = Rle(humanCD4T_viterbi_collapsed$chr7)
#      currItems = which(myRle@values %in% myViterbiDirs[[dir]])
#  	
#      start = humanCD4T_probeAnno_ex$chr7[start(myRle)][currItems]
#      width = myRle@lengths[currItems]
#      ids = myRle@values[currItems]
#      values = myRle@values[currItems]
#      currGRange = GRanges(seqnames=rep("chr7", length(currItems)),ranges=IRanges(start=start,
#                           width=width*200, names=values[currItems]))
#  	
#      myViterbiPanels[[dir]] = AnnotationTrack(range=currGRange, genome="hg18", chromosome="chr7",
#                     name=paste("Viterbi\n", "(", dir, ")", sep=""), id=ids[order(start)],
#                     shape="box",fill=cols[values[order(start)]], col="black", stacking="dense")
#  }
#  
#  flagpos = humanCD4T_probeAnno_ex$chr7[range(which(humanCD4T_flags_ex$chr7 == "R"))]
#  flagpanel = list(AnnotationTrack(start=flagpos[1], end=flagpos[2], feature="flags",
#                  id="flags-1", strand="-", chromosome="chr7", genome="hg18",
#                  stacking="squish", name="flags", fill="coral", col="black", shape="arrow"))
#  
#  
#  sizes = rep(1,20)
#  sizes[16] = 2
#  sizes[17:20] = 0.7
#  sizes[1] = 0.5
#  plotTracks(c(list(humanCD4T_ideogramChr7), list(gtrack), dlist, list(humanCD4T_ucscGenes),
#                    flagpanel, myViterbiPanels), from=134600000, to=135350000, sizes=sizes,
#                    showFeatureId=TRUE, featureAnnotation="id", fontcolor.feature="black",
#                    cex.feature=0.7, background.title="darkgrey")

## ----Real plotting, echo=FALSE, results="hide", eval=TRUE-------------------------------
posterior_bdhmm = getPosterior(mybdHMM_CD4T$hmm, obs=humanCD4T_signal_ex)

myTwins = 5:8
score = rep(0,4)
for(i in 1:(nStates/2)) {
	numer = sum(abs(posterior_bdhmm$chr7[,i]-posterior_bdhmm$chr7[,myTwins[i]]))
	denom = sum((posterior_bdhmm$chr7[,i]+posterior_bdhmm$chr7[,myTwins[i]]))
	score[i] = numer/denom
}

names(score) = c("F1/R1", "F2/R2", "F3/R3", "F4/R4")
pdf("dir_score_human.pdf")
barplot(score, col=rainbow(4), cex.names=0.8, ylim=c(0,1))
abline(h=0.5, lty=3)
dev.off()


pdf("human_results.pdf", width=7*1.2, height=7*1.5)

library(Gviz)
data(humanCD4T_ucscGenes)
chr = "chr7"
gen = "hg18"
gtrack <- GenomeAxisTrack()
humanCD4T_ideogramChr7 <- IdeogramTrack(genome = gen, chromosome = chr)

faccols = c("#FB9EB1", "#FB9EB1", "#FB9EB1", "#FB9EB1", "#FB9EB1", "#DAB36A", "#DAB36A",
		"#DAB36A", "#7FBFF5", "#7FBFF5", "#7FBFF5", "#7FBFF5", "#7FBFF5")
names(faccols) = colnames(humanCD4T_signal_ex$chr7)
dlist=list()
for(n in colnames(humanCD4T_signal_ex$chr7)) {
	dlist[[n]] = DataTrack(data=humanCD4T_signal_ex$chr7[,n], start=humanCD4T_probeAnno_ex$chr7, 
			end = humanCD4T_probeAnno_ex$chr7+200, chromosome="chr7", genome=gen, name=n, 
			type="h", col=faccols[n])
}

myViterbiDirs = list(F=c("F1", "F2"), U=c("U1", "U2"), R=c("R1", "R2"))
myViterbiPanels = list()
cols = rainbow(4)
cols = cols[c(1:2,1:2,3:4)]

for(dir in c("F", "U", "R")) {
	myRle = Rle(humanCD4T_viterbi_collapsed$chr7)
	currItems = which(myRle@values %in% myViterbiDirs[[dir]])
	
	start = humanCD4T_probeAnno_ex$chr7[start(myRle)][currItems]
	width = myRle@lengths[currItems]
	ids = myRle@values[currItems]
	values = myRle@values[currItems]
	currGRange = GRanges(seqnames=rep("chr7", length(currItems)),ranges=IRanges(start=start, 
					width=width*200, names=values[currItems]))
	
	myViterbiPanels[[dir]] = AnnotationTrack(range=currGRange, genome="hg18", chromosome="chr7", 
			name=paste("Viterbi\n", "(", dir, ")", sep=""), id=ids[order(start)],  
			shape="box",fill=cols[values[order(start)]], col="black", stacking="dense")
}

flagpos = humanCD4T_probeAnno_ex$chr7[range(which(humanCD4T_flags_ex$chr7 == "R"))]
flagpanel = list(AnnotationTrack(start=flagpos[1], end=flagpos[2], feature="flags",
                 id="flags-1", strand="-", chromosome="chr7", genome="hg18",
                 stacking="squish", name="flags", fill="coral", col="black", shape="arrow"))


sizes = rep(1,20)
sizes[16] = 2
sizes[17:20] = 0.7
sizes[1] = 0.5
plotTracks(c(list(humanCD4T_ideogramChr7), list(gtrack), dlist, list(humanCD4T_ucscGenes), 
           flagpanel, myViterbiPanels), from=134600000, to=135350000, sizes=sizes,  
           showFeatureId=TRUE, featureAnnotation="id", fontcolor.feature="black",  
		   cex.feature=0.7, background.title="darkgrey")
dev.off()

## ----kmeans, echo=TRUE, eval=TRUE-------------------------------------------------------
nStates = 7
myMat = do.call("rbind", yeastTF_databychrom_ex)
myMat = myMat[apply(myMat, 1, function(x) all(! is.na(x))),]
myMat[, c("YPDexprW", "YPDexprC")] = t(apply(myMat[, c("YPDexprW", "YPDexprC")], 1,
                                             sort, decreasing=TRUE))
km = kmeans(myMat, centers=nStates, iter.max=1000, nstart=100)$centers
km[order(km[,"YPDexprW"], decreasing=TRUE),]

## ----sessInfo, results="asis", echo=FALSE-----------------------------------------------
toLatex(sessionInfo())

