| hpGeneListComp {PCAN} | R Documentation |
This function compare a whole gene list to a set of HP terms using a matrix of semantic similarity.
hpGeneListComp(geneList, ssMatByGene, geneSSScore = NULL, ...)
geneList |
a vector providing the genes of interest. |
ssMatByGene |
a list (one element per gene) of matrix
of semantic similarity between HP terms as returned by
|
geneSSScore |
a vector of semantic similarity scores for all the genes in ssMatByGene list. If not provided these scores are computed from ssMatByGene. |
... |
parameters for |
A list with the following elements:
The original HP of interest.
The distribution of scores for all genes for the HP of interest.
The semantic similarity by gene.
For each gene which related HP terms best fits with the HP of interest (colnames of the elements of ssMatByGene).
The median of scores.
According to a wilcox.test comparing genes
of interest to all the other genes.
Gene with the highest score among the genes of interest.
Maximum score.
Quantile of the scores compared to the whole list of gene.
Adjusted quantiles according Benjamini Hochberg
(link{p.adjust}).
Patrice Godard
hpGeneHeatmap, compareHPSets,
hpSetCompSummary and hpSetCompBestMatch
data(geneByHp, hp_descendants, package="PCAN")
data(hp_ancestors, hpDef, package="PCAN")
data(traitDef, geneDef, package="PCAN")
data(hpByTrait, package="PCAN")
geneByHp <- unstack(geneByHp, entrez~hp)
###########################################
## Compute information content of each HP according to associated genes
ic <- computeHpIC(geneByHp, hp_descendants)
###########################################
## Use case: comparing a gene and a disease
omim <- "612285"
traitDef[which(traitDef$id==omim),]
entrez <- "57545"
geneDef[which(geneDef$entrez==entrez),]
## Get HP terms associated to the disease
hpOfInterest <- hpByTrait$hp[which(hpByTrait$id==omim)]
## Get HP terms associated to the gene
hpByGene <- unstack(stack(geneByHp), ind~values)
geneHps <- hpByGene[[entrez]]
## HP Comparison
hpGeneResnik <- compareHPSets(
hpSet1=names(ic), hpSet2=hpOfInterest,
IC=ic,
ancestors=hp_ancestors,
method="Resnik",
BPPARAM=SerialParam()
)
hpMatByGene <- lapply(
hpByGene,
function(x){
hpGeneResnik[x, , drop=FALSE]
}
)
resnSss <- unlist(lapply(
hpMatByGene,
hpSetCompSummary,
method="bma", direction="symSim"
))
candScore <- resnSss[entrez]
###########################################
## The pathway consensus approach
## What about genes belonging to the same pathways as the candidate
data(rPath, hsEntrezByRPath, package="PCAN")
candPath <- names(hsEntrezByRPath)[which(unlist(lapply(
hsEntrezByRPath,
function(x) entrez %in% x
)))]
rPath[which(rPath$Pathway %in% candPath),]
rPathRes <- hpGeneListComp(
geneList=hsEntrezByRPath[[candPath]],
ssMatByGene = hpMatByGene,
geneSSScore = resnSss
)
hist(
resnSss,
breaks=100, col="grey",
ylim=c(0,5),
xlab=expression(Sim[sym]),
ylab="Density",
main=paste(
"Distribution of symmetric semantic similarity scores for all the",
length(resnSss), "genes"
),
probability=TRUE
)
toAdd <- hist(
rPathRes$scores,
breaks=100,
plot=FALSE
)
for(i in 1:length(toAdd$density)){
polygon(
x=toAdd$breaks[c(i, i+1, i+1, i)],
y=c(0, 0, rep(toAdd$density[i], 2)),
col="#BE000040",
border="#800000FF"
)
}
legend(
"topright",
paste0(
"Genes belonging to the ", candPath," pathway:\n'",
rPath[which(rPath$Pathway %in% candPath),"Pathway_name"],
"'\nand with a symmetric semantic similarity score (",
sum(!is.na(rPathRes$scores)),
"/",
length(rPathRes$scores),
")\n",
"p-value: ", signif(rPathRes$p.value, 2)
),
pch=15,
col="#BE000040",
bty="n",
cex=0.6
)
## Assessing the symmetric semantic similarity for each gene in the pathway
pathSss <- rPathRes$scores[which(!is.na(rPathRes$scores))]
names(pathSss) <- geneDef[match(names(pathSss), geneDef$entrez), "symbol"]
opar <- par(mar=c(7.1, 4.1, 4.1, 2.1))
barplot(
sort(pathSss),
las=2,
ylab=expression(Sim[sym]),
main=rPath[which(rPath$Pathway %in% candPath),"Pathway_name"]
)
p <- c(0.25, 0.5, 0.75, 0.95)
abline(
h=quantile(resnSss, probs=p),
col="#BE0000",
lty=c(2, 1, 2, 2),
lwd=c(2, 2, 2, 1)
)
text(
rep(0,4),
quantile(resnSss, probs=p),
p,
pos=3,
offset=0,
col="#BE0000",
cex=0.6
)
legend(
"topleft",
paste0(
"Quantiles of the distribution of symmetric semantic similarity\n",
"scores for all the genes."
),
lty=1,
col="#BE0000",
cex=0.6
)
par(opar)
## A heatmap showing the best HP match for each gene in the pathway
geneLabels <- geneDef$symbol[which(!duplicated(geneDef$entrez))]
names(geneLabels) <- geneDef$entrez[which(!duplicated(geneDef$entrez))]
hpLabels <- hpDef$name
names(hpLabels) <- hpDef$id
hpGeneHeatmap(
rPathRes,
genesOfInterest=entrez,
geneLabels=geneLabels,
hpLabels=hpLabels,
clustByGene=TRUE,
clustByHp=TRUE,
palFun=colorRampPalette(c("white", "red")),
goiCol="blue",
main=rPath[which(rPath$Pathway %in% candPath),"Pathway_name"]
)
###########################################
## What about genes interacting with the candidate (including itself)
data(hqStrNw, package="PCAN")
neighbors <- unique(c(
hqStrNw$gene1[which(hqStrNw$gene2==entrez)],
hqStrNw$gene2[which(hqStrNw$gene1==entrez)],
entrez
))
neighRes <- hpGeneListComp(
geneList=neighbors,
ssMatByGene = hpMatByGene,
geneSSScore = resnSss
)
hist(
resnSss,
breaks=100, col="grey",
ylim=c(0,10),
xlab=expression(Sim[sym]),
ylab="Density",
main=paste(
"Distribution of symmetric semantic similarity scores for all the",
length(resnSss), "genes"
),
probability=TRUE
)
toAdd <- hist(
neighRes$scores,
breaks=100,
plot=FALSE
)
for(i in 1:length(toAdd$density)){
polygon(
x=toAdd$breaks[c(i, i+1, i+1, i)],
y=c(0, 0, rep(toAdd$density[i], 2)),
col="#BE000040",
border="#800000FF"
)
}
legend(
"topright",
paste0(
"Genes interacting with ",
geneDef[which(geneDef$entrez==entrez),"symbol"],
" (", entrez, ")",
"\nand with a symmetric semantic similarity score (",
sum(!is.na(neighRes$scores)),
"/",
length(neighRes$scores),
")\n",
"p-value: ", signif(neighRes$p.value, 2)
),
pch=15,
col="#BE000040",
bty="n",
cex=0.6
)
## Assessing the symmetric semantic similarity score for each interacting gene
neighSss <- neighRes$scores[which(!is.na(neighRes$scores))]
names(neighSss) <- geneDef[match(names(neighSss), geneDef$entrez), "symbol"]
opar <- par(mar=c(7.1, 4.1, 4.1, 2.1))
barplot(
sort(neighSss),
las=2,
ylab=expression(Sim[sym]),
main=paste0(
"Genes interacting with ",
geneDef[which(geneDef$entrez==entrez),"symbol"],
" (", entrez, ")"
)
)
p <- c(0.25, 0.5, 0.75, 0.95)
abline(
h=quantile(resnSss, probs=p),
col="#BE0000",
lty=c(2, 1, 2, 2),
lwd=c(2, 2, 2, 1)
)
text(
rep(0,4),
quantile(resnSss, probs=p),
p,
pos=3,
offset=0,
col="#BE0000",
cex=0.6
)
legend(
"topleft",
paste0(
"Quantiles of the distribution of symmetric semantic similarity\n",
"scores for all the genes."
),
lty=1,
col="#BE0000",
cex=0.6
)
par(opar)
## A heatmap showing the best HP match for each neighbor gene
hpGeneHeatmap(
neighRes,
genesOfInterest=entrez,
geneLabels=geneLabels,
hpLabels=hpLabels,
clustByGene=TRUE,
clustByHp=TRUE,
palFun=colorRampPalette(c("white", "red")),
goiCol="blue",
main=rPath[which(rPath$Pathway %in% candPath),"Pathway_name"]
)