## ----setup, include=FALSE--------------------------------------------------
knitr::opts_chunk$set(echo = TRUE)

## ----eval=TRUE, message=FALSE, results='hide'------------------------------
#-- Call packages
library(geneplast)
library(geneplast.data.string.v91)
library(Fletcher2013b)
library(ggplot2)
library(ggpubr)

## ----eval=FALSE, message=FALSE, warning=FALSE------------------------------
#  #-- Load orthology data from the 'geneplast.data.string.v91' package
#  data(gpdata_string_v91)
#  
#  #-- Create an object of class 'OGR' for a reference 'spid'
#  ogr <- groot.preprocess(cogdata=cogdata, phyloTree=phyloTree, spid="9606")

## ----eval=FALSE, message=FALSE, warning=FALSE------------------------------
#  #-- Run the 'groot' function and infer the evolutionary roots
#  ogr <- groot(ogr, nPermutations=1000, verbose=TRUE)

## ----eval=FALSE, message=FALSE, warning=FALSE------------------------------
#  #-- Load regulons
#  data("rtni1st")
#  tni.regulon.summary(rtni1st)

## ----eval=FALSE, message=FALSE, warning=FALSE------------------------------
#  ## This regulatory network comprised of 809 regulons.
#  ## regulatoryElements            Targets              Edges
#  ##                809              14131              47012
#  ## Min.  1st Qu.  Median    Mean  3rd Qu.   Max.
#  ## 0.0     10.0     37.0    58.1    80.0  523.0
#  ## regulatoryElements            Targets              Edges
#  ##                809              14131             617672
#  ## Min. 1st Qu.  Median    Mean 3rd Qu.    Max.
#  ##   0      43      449     764   1245    4148
#  ## ---

## ----eval=FALSE, message=FALSE, warning=FALSE------------------------------
#  #-- Put regulons into an 'igraph' object
#  #-- Note: small regulons (n<15 targets) are romeved in this step.
#  graph <- tni.graph(rtni1st, gtype = "rmap")
#  
#  #-- Map the 'ogr' object to the 'igraph' object
#  graph <- ogr2igraph(ogr, cogdata, graph, idkey = "ENTREZ")
#  
#  #-- Make a data frame with the gene roots
#  roots_df <- data.frame(SYMBOL = V(graph)$SYMBOL,
#                         ENTREZ = V(graph)$ENTREZ,
#                         Root = V(graph)$Root,
#                         TF_Targets = c("Targets","TFs")[V(graph)$tfs+1])

## ----eval=FALSE------------------------------------------------------------
#  #-- Remove NAs from missing annotation
#  roots_df <- roots_df[complete.cases(roots_df),]
#  
#  #-- Remove genes rooted at the base of the phylogenetic tree
#  roots_df <- roots_df[roots_df$Root<max(roots_df$Root),]
#  rownames(roots_df) <- 1:nrow(roots_df)
#  
#  #-- Check TF and target counts
#  table(roots_df$TF_Targets)

## ----eval=FALSE------------------------------------------------------------
#  ## Targets     TFs
#  ##    6308     307

## ----eval=FALSE------------------------------------------------------------
#  head(roots_df)

## ----eval=FALSE------------------------------------------------------------
#  ##     SYMBOL ENTREZ Root TF_Targets
#  ## 1   CEBPG   1054   19        TFs
#  ## 2   NR4A2   4929   17        TFs
#  ## 3     EN1   2019   17        TFs
#  ## 4    TP53   7157   20        TFs
#  ## 5 GATAD2A  54815   19        TFs
#  ## 6     DR1   1810   23        TFs

## ----eval=FALSE------------------------------------------------------------
#  tail(roots_df)

## ----eval=FALSE------------------------------------------------------------
#  ##         SYMBOL ENTREZ Root TF_Targets
#  ## 6610      F11   2160   19    Targets
#  ## 6611   KCNK18 338567   24    Targets
#  ## 6612  TMEM220 388335   14    Targets
#  ## 6613 C1orf170  84808    7    Targets
#  ## 6614 C16orf96 342346    6    Targets
#  ## 6615    PANX3 116337   13    Targets

## ----eval=FALSE------------------------------------------------------------
#  wilcox.test(Root ~ TF_Targets, data=roots_df)

## ----eval=FALSE------------------------------------------------------------
#  ## Wilcoxon rank sum test with continuity correction
#  ## data:  Root by TF_Targets
#  ## W = 812534, p-value = 1.6e-06
#  ## alternative hypothesis: true location shift is not equal to 0

## ----eval=FALSE------------------------------------------------------------
#  #-- Set roots to display in y-axis
#  roots <- c(4,8,11,13,19,21,25)
#  
#  #-- Set a summary function to display dispersion within the violins
#  data_summary <- function(x) {
#    y <- mean(x); ymin <- y-sd(x); ymax <- y+sd(x)
#    return(c(y=y,ymin=ymin,ymax=ymax))
#  }
#  
#  #-- Build a ggplot object
#  p <- ggplot(roots_df, aes(x=TF_Targets, y=Root)) +
#    geom_violin(aes(fill=TF_Targets), adjust=2, show.legend=F) +
#    scale_y_continuous(breaks=roots, labels=paste("root",roots)) +
#    scale_fill_manual(values=c("#c7eae5","#dfc27d")) +
#    labs(x="TRN elements", y="Root distribution") +
#    scale_x_discrete(limits=c("TFs","Targets")) +
#    theme_classic() +
#    theme(text=element_text(size=20)) +
#    stat_summary(fun.data = data_summary)
#  
#  #-- Generate violin plots
#  p + stat_compare_means(method="wilcox.test",
#                         comparisons =list(c("Targets","TFs")),
#                         label = "p.signif")

## ----eval=FALSE, include=FALSE---------------------------------------------
#  pdf(file = "regulon_roots.pdf", width =  5.5, height = 5)
#  p + stat_compare_means(method="wilcox.test",
#                         comparisons =list(c("Targets","TFs")),
#                         label = "p.signif")
#  dev.off()

## ----eval=FALSE------------------------------------------------------------
#  
#  #-- Get roots for TFs
#  idx <- roots_df$TF_Targets=="TFs"
#  tfroots <- roots_df$Root[idx]
#  names(tfroots) <- roots_df$SYMBOL[idx]
#  
#  #-- Get roots for TF-target genes
#  regulons <- tni.get(rtni1st, what = "regulons", idkey = "ENTREZ")[names(tfroots)]
#  regroots <- lapply(regulons, function(reg){
#    roots_df$Root[roots_df$ENTREZ%in%reg]
#  })
#  tfroots <- tfroots[names(regroots)]
#  
#  #-- Compute root distance between a TF and its targets
#  rootdist <- lapply(names(regroots), function(reg){
#    regroots[[reg]]-tfroots[reg]
#  })
#  names(rootdist) <- names(regroots)
#  
#  #-- Compute median root distance
#  med_rootdist <- unlist(lapply(rootdist, median))
#  
#  #-- Plot a pie chart grouping regulons based on
#  #-- the median root distance
#  cols = c("#1c92d5","grey","#98d1f2")
#  n <- as.numeric(table(sign(med_rootdist)))
#  pie(n, labels = paste(n,"regulons"), col = cols, border="white", cex=1.5)
#  legend("bottomleft",
#         legend = c("TF-target genes rooted before the TF",
#                    "TF-target genes rooted with the TF",
#                    "TF-target genes rooted after the TF"),
#         fill = rev(cols), bty = "n")

## ----eval=FALSE, include=FALSE---------------------------------------------
#  cols = c("#1c92d5","grey","#98d1f2")
#  n <- as.numeric(table(sign(med_rootdist)))
#  pdf(file = "pie_regulon_roots.pdf")
#  pie(n, labels = paste(n,"regulons"), col = cols, border="white", cex=1.5)
#  legend("bottomleft",
#         legend = c("TF-target genes rooted before the TF",
#                    "TF-target genes rooted with the TF",
#                    "TF-target genes rooted after the TF"),
#         fill = rev(cols), bty = "n")
#  dev.off()

## ----eval=FALSE------------------------------------------------------------
#  #-- Plot a boxplot showing individual regulons
#  
#  #-- Sort regulons based on the calculated root distances
#  med_rootdist <- sort(med_rootdist, decreasing = T)
#  rootdist <- rootdist[names(med_rootdist)]
#  regroots <- regroots[names(med_rootdist)]
#  tfroots <- tfroots[names(med_rootdist)]
#  
#  #-- Generate the boxplot
#  plot.new()
#  par(usr=c(c(0,length(rootdist)),range(rootdist)))
#  idx <- sign(med_rootdist)+2
#  cols = c("#1c92d5","grey","#98d1f2")
#  boxplot(rootdist, horizontal= F, outline=FALSE, las=2, axes=FALSE, add=T,
#          pars = list(boxwex = 0.6, boxcol=cols[idx], whiskcol=cols[idx]),
#          pch="|", lty=1, lwd=0.75,
#          col = cols[idx])
#  abline(h=0, lmitre=5, col="#E69F00", lwd=3, lt=2)
#  par(mgp=c(2,0.1,0))
#  axis(side=1, cex.axis=1.2, padj=0.5, hadj=0.5, las=1, lwd=1.5, tcl= -0.2)
#  par(mgp=c(2.5,1.2,0.5))
#  axis(side=2, cex.axis=1.2, padj=0.5, hadj=0.5, las=1, lwd=1.5, tcl= -0.2)
#  legend("topright",legend = c("TF-target genes rooted before the TF",
#                               "TF-target genes rooted with the TF",
#                               "TF-target genes rooted after the TF"), fill = rev(cols), bty = "n")
#  title(xlab = "Regulons sorted by the median distance to TF root", ylab = "Distance to TF root")

## ----label='Session information', eval=TRUE, echo=FALSE--------------------
sessionInfo()

