## ----echo=FALSE----------------------------------------------------------
suppressPackageStartupMessages(library(org.Hs.eg.db))

## ------------------------------------------------------------------------
library(org.Hs.eg.db)

## ------------------------------------------------------------------------
columns(org.Hs.eg.db)

## ------------------------------------------------------------------------
keytypes(org.Hs.eg.db)

## ------------------------------------------------------------------------
head(keys(org.Hs.eg.db, keytype="ENTREZID"))

## ------------------------------------------------------------------------
ids = head(keys(org.Hs.eg.db, keytype="ENTREZID"))

## ------------------------------------------------------------------------
select(org.Hs.eg.db, keys=ids, columns="SYMBOL", keytype="ENTREZID")

## ------------------------------------------------------------------------
select(org.Hs.eg.db, keys=ids, columns=c("GENENAME", "SYMBOL"), keytype="ENTREZID")

## ------------------------------------------------------------------------
library("parathyroidSE")
data(exonicParts)
exonicParts[1:3]

## ------------------------------------------------------------------------
ids = unlist(mcols(exonicParts)$gene_id)
head(ids)

## ------------------------------------------------------------------------
res <- select(org.Hs.eg.db, keys=ids, columns="SYMBOL", keytype="ENSEMBL")
head(res)

## ------------------------------------------------------------------------
dim(res)[1] == length(exonicParts)
newMcols <- cbind(mcols(exonicParts), res[,2,drop=FALSE])
mcols(exonicParts) <- newMcols
exonicParts[1:3]

## ------------------------------------------------------------------------
id = ids[1]
res <- select(org.Hs.eg.db, keys=id, columns="GO", keytype="ENSEMBL")
head(res)

## ------------------------------------------------------------------------
library("GO.db")
head(res$GO)  ## shows what we are using as keys
head(select(GO.db, keys=res$GO, columns="TERM", keytype="GOID"))

## ----echo=FALSE----------------------------------------------------------
suppressPackageStartupMessages(library(hgu95av2.db))

## ------------------------------------------------------------------------
## Affymetrix U133 2.0 array IDs of interest; these might be
## obtained from
##
##   tbl <- topTable(efit, coef=2)
##   ids <- tbl[["ID"]]
##
## as part of a more extensive workflow.
ids <- c("39730_at", "1635_at", "1674_at", "40504_at", "40202_at")

## ------------------------------------------------------------------------
library("hgu95av2.db")

## ------------------------------------------------------------------------
columns(hgu95av2.db)
keytypes(hgu95av2.db)
columns <- c("PFAM","SYMBOL")
select(hgu95av2.db, keys=ids, columns, keytype="PROBEID")

## ----echo=FALSE----------------------------------------------------------
suppressPackageStartupMessages(library(TxDb.Hsapiens.UCSC.hg19.knownGene))

## ------------------------------------------------------------------------
library(TxDb.Hsapiens.UCSC.hg19.knownGene)
txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene ## done for convenience
keys <- head(keys(txdb, keytype="GENEID"), n=2)
columns <- c("TXNAME", "TXSTART","TXSTRAND")
select(txdb, keys, columns, keytype="GENEID")

## ------------------------------------------------------------------------
transcripts(txdb)

## ------------------------------------------------------------------------
exons(txdb)

## ------------------------------------------------------------------------
transcripts(txdb, columns = c("tx_id","tx_name","gene_id"))

## ------------------------------------------------------------------------
transcriptsBy(txdb, by="gene")

## ----echo=FALSE----------------------------------------------------------
suppressPackageStartupMessages(library(Homo.sapiens))

## ------------------------------------------------------------------------
library(Homo.sapiens)
keys <- head(keys(Homo.sapiens, keytype="ENTREZID"), n=2)
columns <- c("SYMBOL","TXNAME")
select(Homo.sapiens, keys, columns, keytype="ENTREZID")

## ------------------------------------------------------------------------
transcripts(Homo.sapiens, columns=c("TXNAME","SYMBOL"))

## ----eval=FALSE----------------------------------------------------------
#  gd <- list(join1 = c(GO.db="GOID", org.Hs.eg.db="GO"),
#        	   join2 = c(org.Hs.eg.db="ENTREZID",
#             TxDb.Hsapiens.UCSC.hg19.knownGene="GENEID"))
#  
#  makeOrganismPackage(pkgname = "Homo.sapiens",
#  	            graphData = gd,
#  		    organism = "Homo sapiens",
#  		    version = "1.0.0",
#  		    maintainer = "Package Maintainer<maintainer@somewhere.org>",
#  		    author = "Some Body",
#  		    destDir = ".",
#  		    license = "Artistic-2.0")

## ------------------------------------------------------------------------
library(Homo.sapiens)
keys <- head(keys(Homo.sapiens, keytype="ENTREZID"), n=2)

## ------------------------------------------------------------------------
head(keys(Homo.sapiens, keytype="ENTREZID", pattern="^2"), n=6)

## ------------------------------------------------------------------------
head(keys(Homo.sapiens, keytype="SYMBOL", pattern="^MS"), n=6)

## ------------------------------------------------------------------------
head(keys(Homo.sapiens, keytype="SYMBOL", pattern="^MS", fuzzy=TRUE), n=6)

## ------------------------------------------------------------------------
keys <- head(keys(Homo.sapiens, keytype="ENSEMBL", pattern="^MS", column="SYMBOL"), n=6)
keys
select(Homo.sapiens, keys, "SYMBOL", keytype="ENSEMBL")

## ------------------------------------------------------------------------
library(AnnotationHub)

ah = AnnotationHub()

## ------------------------------------------------------------------------
res <- ah$goldenpath.hg19.encodeDCC.wgEncodeUwTfbs.wgEncodeUwTfbsMcf7CtcfStdPkRep1.narrowPeak_0.0.1.RData

res

## ------------------------------------------------------------------------
length(ah)

## ------------------------------------------------------------------------
filters(ah)

## ------------------------------------------------------------------------
columns(ah)

## ------------------------------------------------------------------------
head(keys(ah, keytype="Species"))

## ------------------------------------------------------------------------
filters(ah) <- list(Species="Bos taurus")

length(ah)

## ----eval=FALSE----------------------------------------------------------
#  d <- display(ah)

## ------------------------------------------------------------------------
library("biomaRt")
head(listMarts())
ensembl <- useMart("ensembl")
ensembl

## ------------------------------------------------------------------------
head(listDatasets(ensembl))
ensembl <- useMart("ensembl",dataset="hsapiens_gene_ensembl")
ensembl

## ------------------------------------------------------------------------
head(listFilters(ensembl))

## ------------------------------------------------------------------------
head(listAttributes(ensembl))

## ------------------------------------------------------------------------
affyids=c("202763_at","209310_s_at","207500_at")
getBM(attributes=c('affy_hg_u133_plus_2', 'entrezgene'), 
                    filters = 'affy_hg_u133_plus_2', 
                    values = affyids, mart = ensembl)

## ------------------------------------------------------------------------
head(getBM(attributes='affy_hg_u133_plus_2', mart = ensembl))

## ------------------------------------------------------------------------
library(BSgenome.Hsapiens.UCSC.hg19)
ls(2)
Hsapiens

## ------------------------------------------------------------------------
seqNms <- seqnames(Hsapiens)
head(seqNms)
getSeq(Hsapiens, seqNms[1:2])

## ------------------------------------------------------------------------
rngs <- GRanges(seqnames = c('chr1', 'chr4'), strand=c('+','-'),
                ranges = IRanges(start=c(100000,300000), 
                                 end=c(100023,300037)))
rngs
res <- getSeq(Hsapiens, rngs)
res

## ----eval=FALSE----------------------------------------------------------
#  source("http://bioconductor.org/biocLite.R")
#  biocLite(c("hgu95av2.db", "GO.db"))

## ----eval=FALSE----------------------------------------------------------
#  library(AnnotationDbi)
#  library(GO.db)

## ----eval=FALSE----------------------------------------------------------
#  browseVignettes(package="AnnotationDbi")

## ----eval=FALSE----------------------------------------------------------
#  help.start()

## ------------------------------------------------------------------------
keys <- "MSX2"
columns <- c("ENTREZID", "CHR")
select(org.Hs.eg.db, keys, columns, keytype="SYMBOL")

## ------------------------------------------------------------------------
## 1st get all the gene symbols
orgSymbols <- keys(org.Hs.eg.db, keytype="SYMBOL")
## and then use that to get all gene symbols matched to all entrez gene IDs
egr <- select(org.Hs.eg.db, keys=orgSymbols, "ENTREZID", "SYMBOL")
length(egr$ENTREZID)
length(unique(egr$ENTREZID))
## VS:
length(egr$SYMBOL)
length(unique(egr$SYMBOL))
## So lets trap these symbols that are redundant and look more closely...
redund <- egr$SYMBOL
badSymbols <- redund[duplicated(redund)]
select(org.Hs.eg.db, badSymbols, "ENTREZID", "SYMBOL")

## ------------------------------------------------------------------------
chipSymbols <- keys(hgu95av2.db, keytype="SYMBOL")
orgSymbols <- keys(org.Hs.eg.db, keytype="SYMBOL")
length(orgSymbols)
length(chipSymbols)

## ------------------------------------------------------------------------
dim(select(org.Hs.eg.db,orgSymbols, "ENTREZID", "SYMBOL"))
dim(select(hgu95av2.db,chipSymbols, "ENTREZID", "SYMBOL")) 

## ------------------------------------------------------------------------
length(columns(org.Hs.eg.db)) < length(columns(hgu95av2.db))

## ------------------------------------------------------------------------
head(select(hgu95av2.db,chipSymbols, "PROBEID", "SYMBOL"))

## ------------------------------------------------------------------------
res1 <- select(TxDb.Hsapiens.UCSC.hg19.knownGene, 
               keys(TxDb.Hsapiens.UCSC.hg19.knownGene, keytype="TXID"),
       	       columns=c("GENEID","TXNAME","TXCHROM"), keytype="TXID")

head(res1)

## ------------------------------------------------------------------------
res2 <- transcripts(TxDb.Hsapiens.UCSC.hg19.knownGene, 
                    columns = c("gene_id","tx_name")) 
head(res2)

## ------------------------------------------------------------------------
library(TxDb.Athaliana.BioMart.plantsmart22)
res <- transcripts(TxDb.Athaliana.BioMart.plantsmart22, columns = c("gene_id")) 

## ----eval=FALSE----------------------------------------------------------
#  library(Homo.sapiens)
#  keys <- keys(Homo.sapiens, keytype="TXID")
#  res1 <- select(Homo.sapiens,
#                 keys= keys,
#         	       columns=c("SYMBOL","TXSTART","TXCHROM"), keytype="TXID")
#  
#  head(res1)

## ------------------------------------------------------------------------
library(Homo.sapiens)
res2 <- transcripts(Homo.sapiens, columns="SYMBOL") 
head(res2)

## ------------------------------------------------------------------------
columns(Homo.sapiens)
columns(org.Hs.eg.db)
columns(TxDb.Hsapiens.UCSC.hg19.knownGene)
## You might also want to look at this:
transcripts(Homo.sapiens, columns=c("SYMBOL","CHRLOC"))

## ------------------------------------------------------------------------
library(Homo.sapiens)
xk = head(keys(Homo.sapiens, keytype="ENTREZID", pattern="X", column="SYMBOL"))
xk

## ------------------------------------------------------------------------
select(Homo.sapiens, xk, "SYMBOL", "ENTREZID")

## ------------------------------------------------------------------------
keytypes(ah)

## ------------------------------------------------------------------------
keys(ah, keytype="DataProvider")
head(keys(ah, keytype="Genome"))

## ------------------------------------------------------------------------
filters(ah) <- NULL
filters(ah) <- list(Species="Homo sapiens", 
                    DataProvider="hgdownload.cse.ucsc.edu",
		    Genome="hg19")
length(ah)

## ------------------------------------------------------------------------
res <- ah$goldenpath.hg19.database.oreganno_0.0.1.RData

## ------------------------------------------------------------------------
library("biomaRt")
ensembl <- useMart("ensembl",dataset="hsapiens_gene_ensembl")
ids=c("1")
getBM(attributes=c('go_id', 'entrezgene'), 
		    filters = 'entrezgene',
                    values = ids, mart = ensembl)


## ------------------------------------------------------------------------
library(org.Hs.eg.db)
ids=c("1")
select(org.Hs.eg.db, keys=ids, columns="GO", keytype="ENTREZID")

## ------------------------------------------------------------------------
sessionInfo()

