| plotPositives {benchdamic} | R Documentation |
Plot the difference between the number of true positives (TP)
and false positives (FP) for each method and for each 'top' threshold
provided by the createPositives() function.
plotPositives(positives, cols = NULL)
positives |
|
cols |
named vector of cols (default |
a ggplot2 object.
getPositives, createPositives.
data("ps_plaque_16S")
data("microbial_metabolism")
# Extract genera from the phyloseq tax_table slot
genera <- phyloseq::tax_table(ps_plaque_16S)[, "GENUS"]
# Genera as rownames of microbial_metabolism data.frame
rownames(microbial_metabolism) <- microbial_metabolism$Genus
# Match OTUs to their metabolism
priorInfo <- data.frame(genera,
"Type" = microbial_metabolism[genera, "Type"]
)
# Unmatched genera becomes "Unknown"
unknown_metabolism <- is.na(priorInfo$Type)
priorInfo[unknown_metabolism, "Type"] <- "Unknown"
priorInfo$Type <- factor(priorInfo$Type)
# Add a more informative names column
priorInfo[, "newNames"] <- paste0(rownames(priorInfo), priorInfo[, "GENUS"])
# DA analysis
# Add scaling factors
ps_plaque_16S <- norm_edgeR(object = ps_plaque_16S, method = "TMM")
ps_plaque_16S <- norm_CSS(object = ps_plaque_16S, method = "median")
# Perform DA analysis
Plaque_16S_DA <- list()
Plaque_16S_DA <- within(Plaque_16S_DA, {
# DA analysis
da.limma <- DA_limma(
object = ps_plaque_16S,
design = ~ 1 + HMP_BODY_SUBSITE,
coef = 2,
norm = "TMM"
)
da.limma.css <- DA_limma(
object = ps_plaque_16S,
design = ~ 1 + HMP_BODY_SUBSITE,
coef = 2,
norm = "CSSmedian"
)
})
# Count TPs and FPs, from the top 1 to the top 20 features.
# As direction is supplied, features are ordered by "logFC" absolute values.
positives <- createPositives(
object = Plaque_16S_DA,
priorKnowledge = priorInfo, enrichmentCol = "Type",
namesCol = "newNames", slot = "pValMat", colName = "rawP",
type = "pvalue", direction = "logFC", threshold_pvalue = 1,
threshold_logfc = 0, top = 1:20, alternative = "greater",
verbose = FALSE,
TP = list(c("DOWN Abundant", "Anaerobic"), c("UP Abundant", "Aerobic")),
FP = list(c("DOWN Abundant", "Aerobic"), c("UP Abundant", "Anaerobic"))
)
# Plot the TP-FP differences for each threshold
plotPositives(positives = positives)