
## ----echo=FALSE,results="hide"-------------------------------------------
date()


## ----echo=FALSE,results="hide"-------------------------------------------
scoresCis = function(...){NULL}  # does commented unevald code get checked?
suppressPackageStartupMessages({
library(knitcitations)
library(bibtex)
allbib = read.bibtex("allbib.bib")
library(GGtools)
library(yri1kgv)
library(snpStats)
library(scatterplot3d)
library(lumi)
library(parallel)
library(foreach)
library(biglm)
library(rmeta)
})


## ----dolo,echo=FALSE-----------------------------------------------------
load("f1.rda")


## ----lkex,eval=FALSE-----------------------------------------------------
## cc = new("CisConfig") # take a default configuration
## chrnames(cc) = "21"   # confine to chr21
## estimates(cc) = FALSE # no point estimates neede
## f1 <- All.cis( cc )   # compute the tests; can be slow without attendance
##                       # to parallelization


## ----lookf1,echo=TRUE----------------------------------------------------
length(f1)
f1[1:3]
metadata(f1)


## ----demoy,fig=TRUE,fig.width=7,fig.height=4,echo=FALSE,results="hide"----
library(yri1kgv)
if (!exists("c20")) c20 = getSS("yri1kgv", "chr20")
par(mfrow=c(1,2))
plot_EvG(probeId("o67h4JQSuEa02CJJIQ"), rsid("rs2259928"), c20,
  main="observed expr.")
if (!exists("c20f")) c20f = clipPCs(c20, 1:10)
plot_EvG(probeId("o67h4JQSuEa02CJJIQ"), rsid("rs2259928"), c20f,
  main="10 expr. PC removed")


## ----bag,fig=TRUE,fig.width=4,fig.height=4,echo=FALSE--------------------
library(snpStats)
library(scatterplot3d)
tmp = as.raw(1:253)
yy = g2post(tmp)
EB = yy %*% c(0,1,2) 
scatterplot3d(yy[,1], yy[,3], EB, xlab="Pr(A/A)", ylab="Pr(B/B)", zlab="mean num. B")


## ----bag2----------------------------------------------------------------
library(GGtools)
library(yri1kgv)
library(lumiHumanAll.db)
if (!exists("y22")) y22 = getSS("yri1kgv", "chr22")
y22
dim(exprs(y22))
fn = featureNames(y22)[1:5]


## ----getseq--------------------------------------------------------------
library(lumi)
id2seq(fn) # get the 50mer for each probe
# and some annotation


## ----getann--------------------------------------------------------------
select( lumiHumanAll.db, keys=fn, keytype="PROBEID", columns=c("SYMBOL", "CHR", "ENTREZID"))


## ----getgen--------------------------------------------------------------
gt22 <- smList(y22)[[1]]  # access to genotypes
as( gt22[1:5,1:5], "character" )
cs22 = col.summary(gt22)  # some information on genotypes
cs22[1:10,]


## ----showscript,eval=FALSE-----------------------------------------------
## library(parallel)
## newcl = makePSOCKcluster(c("master", paste0("node00", 1:3)))
## library(foreach)
## library(doParallel)
## registerDoParallel(cores=8)  # may want to keep at 5
## 
## library(GGtools)
## ceuDemoRecov = try(ciseqByCluster( newcl,
##    chromsToRun=19:22, finaltag="partceu100k",
##    outprefix="ceurun",
##    ncoresPerNode=8, targetfolder="/freshdata/CEU_DEMO"  ))
## save(ceuDemoRecov, file="ceuDemoRecov.rda")
## stopCluster(newcl)
## stopImplicitCluster()
## sessionInfo()


## ----coded,eval=FALSE----------------------------------------------------
## library(data.table)
## load("partceu100k_dt.rda")
## scoresCis("CPNE1", partceu100k_dt)


## ----disc----------------------------------------------------------------
load("partceu100k_dt.rda")
distcat = cut(partceu100k_dt$mindist,c(-1, 1, 1000, 5000, 10000, 50000, 100001))
fdrcat = cut(partceu100k_dt$fdr,c(-.01,.005, .05, .1, .2, 1.01))
fdrcat = relevel(fdrcat, "(0.2,1.01]")
mafcat = cut(partceu100k_dt$MAF,c(0,.05, .1, .2, .3, .51))
approm = 1*partceu100k_dt$chromcat878 %in% c("1_Active_Promoter", "3_Poised_Promoter")


## ----fit-----------------------------------------------------------------
partceu100k_dt = cbind(partceu100k_dt, distcat, fdrcat, mafcat, approm)
set.seed(1234)
train = sample(1:nrow(partceu100k_dt), 
   size=floor(nrow(partceu100k_dt)/2), replace=FALSE)
library(biglm)
b1 = bigglm(isgwashit~distcat+fdrcat+mafcat+approm, fam=binomial(),
 data=partceu100k_dt[train,], maxit=30)


## ----cali----------------------------------------------------------------
pp = predict(b1, newdata=partceu100k_dt[-train,], type="response")
summary(pp)
cpp = cut(pp, c(0,.025, .05, .12, .21))
table(cpp)
sapply(split(partceu100k_dt$isgwashit[-train], cpp), mean)


## ----demomodco,fig=TRUE,fig.width=7,fig.height=4-------------------------
tmat = matrix(rownames(summary(b1)$mat),nc=1)
est = summary(b1)$mat[,1]
library(rmeta)
forestplot(tmat, est, est-.01, est+.01, xlog=TRUE,
  boxsize=.35, graphwidth=unit(3, "inches"),
  xticks=exp(seq(-4,2,2)))


## ----results='asis',echo=FALSE-------------------------------------------
bibliography() #style="markdown")


## ----sess----------------------------------------------------------------
sessionInfo()


