maSigPro              package:maSigPro              R Documentation

_W_r_a_p_p_i_n_g _f_u_n_c_t_i_o_n _f_o_r _i_d_e_n_t_i_f_y_i_n_g _s_i_g_n_i_f_i_c_a_n_t _d_i_f_f_e_r_e_n_t_i_a_l _g_e_n_e _e_x_p_r_e_s_s_i_o_n _p_r_o_f_i_l_e_s _i_n _m_i_c_o_r_a_r_r_a_y _t_i_m_e _c_o_u_r_s_e _e_x_p_e_r_i_m_e_n_t_s

_D_e_s_c_r_i_p_t_i_o_n:

     'maSigPro' performs a whole maSigPro analysis for a times series
     gene expression experiment. The function sucesively calls the
     functions 'make.design.matrix'(optional), 'p.vector', 'T.fit',
     'get.siggenes' and 'see.genes'.

_U_s_a_g_e:

     maSigPro(data, edesign, matrix = "AUTO", groups.vector = NULL, 
         degree = 2, time.col = 1, repl.col = 2, group.cols = c(3:ncol(edesign)), 
         Q = 0.05, alfa = Q, nvar.correction = FALSE, step.method = "backward", rsq = 0.7,
         min.obs = 3, vars = "groups", significant.intercept = "dummy", cluster.data = 1, 
         add.IDs = FALSE, IDs = NULL, matchID.col = 1, only.names = FALSE, k = 9, m = 1.45, 
         cluster.method = "hclust", distance = "cor", agglo.method = "ward", iter.max = 500, 
         summary.mode = "median", color.mode = "rainbow", trat.repl.spots = "none",
         index = IDs[, (matchID.col + 1)], match = IDs[, matchID.col], rs = 0.7, 
         show.fit = TRUE, show.lines = TRUE, pdf = TRUE, cexlab = 0.8, 
         legend = TRUE, main = NULL, ...)

_A_r_g_u_m_e_n_t_s:

    data: matrix with normalized gene expression data. Genes must be in
          rows and arrays in columns. Row names must contain geneIDs

          (argument of 'p.vector')

 edesign: matrix of experimental design. Row names must contain
          arrayIDs 

          (argument of 'make.design.matrix' and 'see.genes')

  matrix: design matrix for regression analysis. By default design is
          calculated with make.design.matrix

          (argument of 'p.vector' and 'T.fit', by default computed by
          'make.design.matrix')

groups.vector: vector indicating experimental group of each variable 

          (argument of 'get.siggenes' and 'see.genes', by default
          computed by 'make.design.matrix')

  degree: the degree of the regression fit polynome. 'degree' = 1
          returns lineal regression, 'degree' = 2 returns quadratic
          regression, etc... 

          (argument of 'make.design.matrix') 

time.col: column in edesign containing time values. Default is first
          column 

          (argument of 'make.design.matrix' and 'see.genes') 

repl.col: column in edesign containing coding for replicates arrays.
          Default is second column 

          (argument of 'make.design.matrix' and 'see.genes') 

group.cols: columns in 'edesign' indicating the coding for each group
          of the experiment (see 'make.design.matrix')

          (argument of 'make.design.matrix' and 'see.genes') 

       Q: level of false discovery rate (FDR) control

          (argument of 'p.vector')

    alfa: significance level used for variable selection in the
          stepwise regression

          (argument of 'T.fit')

nvar.correction: logical for indicating correcting of stepwise
          regression significance level  (argument of 'T.fit')

step.method: argument to be passed to the step function. 

          Can be either '"backward"', '"forward"',
          '"two.ways.backward"' or '"two.ways.forward"' 

     rsq: cut-off level at the R-squared value for the stepwise
          regression fit. 

          Only genes with R-squared greater than 'rsq' are selected 

 min.obs: genes with less than this number of true numerical values
          will be excluded from the analysis  

          (argument of 'p.vector' and 'T.fit')

    vars: variables for which to extract significant genes

          (argument of 'get.siggenes')

significant.intercept: experimental groups for which significant
          intercept coefficients are considered 

          (argument of 'get.siggenes')

cluster.data: Type of data used by the cluster algorithm

          (argument of 'see.genes') 

 add.IDs: logical indicating whether to include additional gene id's in
          the significant genes result  

          (argument of 'get.siggenes') 

     IDs: matrix contaning additional gene id information (required
          when 'add.IDs' is TRUE) 

          (argument of 'get.siggenes') 

matchID.col: number of matching column in matrix IDs for adding genes
          ids  (argument of'get.siggenes') 

only.names: logical. If TRUE, expression values are ommited in the
          significant genes result

          (argument of 'get.siggenes') 

       k: number of clusters 

          (argument of 'see.genes') 

       m: m parameter when '"mfuzz"' clustering algorithm is used. See
          'mfuzz' 

          (argument of 'see.genes') 

cluster.method: clustering method for data partioning

          (argument of 'see.genes') 

distance: distance measurement function used when 'cluster.method' is
          '"hclust"' 

          (argument of 'see.genes') 

agglo.method: aggregation method used when 'cluster.method' is
          '"hclust"'  (argument of 'see.genes') 

iter.max: number of iterations when 'cluster.method' is '"kmeans"' 

          (argument of 'see.genes') 

summary.mode: the method to condensate expression information when more
          than one gene is present in the data.  

          Possible values are '"representative"' and '"median"' 

          (argument of 'PlotGroups') 

color.mode: color scale for plotting profiles. Can be either
          '"rainblow"' or '"gray"' 

          (argument of 'PlotProfiles') 

trat.repl.spots: treatment givent to replicate spots. Possible values
          are '"none"' and '"average"'

          (argument of 'get.siggenes') 

   index: argument of the 'average.rows' function to use when
          'trat.repl.spots' is '"average"'

          (argument of 'get.siggenes') 

   match: argument of the 'link{\average.rows}' function to use when
          'trat.repl.spots' is '"average"'

          (argument of 'get.siggenes') 

      rs: minimun pearson correlation coefficient for replicated spots
          profiles to be averaged

          (argument of 'get.siggenes') 

show.fit: logical indicating whether regression fit curves must be
          plotted

          (argument of 'see.genes') 

show.lines: logical indicating whether a line must be drawn joining
          plotted data points for reach group

          (argument of 'see.genes')

     pdf: logical indicating whether a pdf results file must be
          generated

          (argument of 'see.genes')

  cexlab: graphical parameter maginfication to be used for x labels in
          plotting functions 

  legend: logical indicating whether legend must be added when plotting
          profiles 

          (argument of 'see.genes') 

    main: title for pdf results file 

     ...: other graphical function arguments

_D_e_t_a_i_l_s:

     maSigPro finds and display genes with significant profile
     differences in time series gene expression experiments. The main,
     compulsory, input parameters for this function are a matrix of
     gene expression data (see 'p.vector' for details) and a matrix
     describing  experimental design (see 'make.design.matrix' or
     'p.vector' for details). In case extended gene ID information is
     wanted to be included in the result of significant genes, a third
     IDs matrix containing this  information will be required (see
     'get.siggenes' for details).

     Basiscally in the function calls subsequent steps of the maSigPro
     approach which is:

        *  Make a general regression model with dummies to indicate
           different experimental groups. 

        *  Select significant genes on the basis of this general model,
           applying fdr control. 

        *  Find significant variables for each gene, using stepwise
           regression. 

        *  Extract and display significant genes for any set of
           variables or experimental groups. 

_V_a_l_u_e:

 summary: a vector or matrix listing significant genes for the
          variables given by the function parameters

sig.genes: a list with detailed information on the significant genes
          found for the variables given by the function parameters.
          Each element of the list is also a list containing:

          'sig.profiles': expression values of significant genes.The
          cluster assingment of each gene is given in the last column

          'coefficients': regression coefficients for significant genes

          't.score': value of the t statistics of significant genes

          'sig.pvalues': p-values of the regression coefficients for
          significant genes

          'g': number of genes

          ... :arguments passed by previous functions

input.data : input analysis data

       G: number of input genes

 edesign: matrix of experimental design

     dis: regression design matrix

 min.obs: imputed value for minimal number of true observations

p.vector: vector containing the computed p-values of the general
          regression model for each gene

variables : variables in the general regression model

       g: number of signifant genes

p.vector.alfa : p-vlaue at FDR = 'Q' control

step.method: imputed step method for stepwise regression

       Q: imputed value for false discovery rate (FDR) control

step.alfa: inputed significance level in stepwise regression

influ.info : data frame of genes containing influencial data

_A_u_t_h_o_r(_s):

     Ana Conesa, aconesa@ivia.es; Maria Jose Nueda, mj.nueda@ua.es

_R_e_f_e_r_e_n_c_e_s:

     Conesa, A., Nueda M.J., Alberto Ferrer, A., Talon, T. 2005.
     maSigPro: a Method to Identify Significant Differential Expression
     Profiles in Time-Course Microarray Experiments.

_S_e_e _A_l_s_o:

     'make.design.matrix', 'p.vector', 'T.fit', 'get.siggenes',
     'see.genes'

_E_x_a_m_p_l_e_s:

     #### GENERATE TIME COURSE DATA
     ## generate n random gene expression profiles of a data set with 
     ## one control plus 3 treatments, 3 time points and r replicates per time point.

     tc.GENE <- function(n, r,
                  var11 = 0.01, var12 = 0.01,var13 = 0.01,
                  var21 = 0.01, var22 = 0.01, var23 =0.01,
                  var31 = 0.01, var32 = 0.01, var33 = 0.01,
                  var41 = 0.01, var42 = 0.01, var43 = 0.01,
                  a1 = 0, a2 = 0, a3 = 0, a4 = 0,
                  b1 = 0, b2 = 0, b3 = 0, b4 = 0,
                  c1 = 0, c2 = 0, c3 = 0, c4 = 0)
     {

       tc.dat <- NULL
       for (i in 1:n) {
         Ctl <- c(rnorm(r, a1, var11), rnorm(r, b1, var12), rnorm(r, c1, var13))  # Ctl group
         Tr1 <- c(rnorm(r, a2, var21), rnorm(r, b2, var22), rnorm(r, c2, var23))  # Tr1 group
         Tr2 <- c(rnorm(r, a3, var31), rnorm(r, b3, var32), rnorm(r, c3, var33))  # Tr2 group
         Tr3 <- c(rnorm(r, a4, var41), rnorm(r, b4, var42), rnorm(r, c4, var43))  # Tr3 group
         gene <- c(Ctl, Tr1, Tr2, Tr3)
         tc.dat <- rbind(tc.dat, gene)
       }
       tc.dat
     }

     ## Create 270 flat profiles
     flat <- tc.GENE(n = 270, r = 3)
     ## Create 10 genes with profile differences between Ctl and Tr1 groups
     twodiff <- tc.GENE (n = 10, r = 3, b2 = 0.5, c2 = 1.3)
     ## Create 10 genes with profile differences between Ctl, Tr2, and Tr3 groups
     threediff <- tc.GENE(n = 10, r = 3, b3 = 0.8, c3 = -1, a4 = -0.1, b4 = -0.8, c4 = -1.2)
     ## Create 10 genes with profile differences between Ctl and Tr2 and different variance
     vardiff <- tc.GENE(n = 10, r = 3, a3 = 0.7, b3 = 1, c3 = 1.2, var32 = 0.03, var33 = 0.03)
     ## Create dataset
     tc.DATA <- rbind(flat, twodiff, threediff, vardiff)
     rownames(tc.DATA) <- paste("feature", c(1:300), sep = "")
     colnames(tc.DATA) <- paste("Array", c(1:36), sep = "")
     tc.DATA[sample(c(1:(300*36)), 300)] <- NA  # introduce missing values

     #### CREATE EXPERIMENTAL DESIGN
     Time <- rep(c(rep(c(1:3), each = 3)), 4)
     Replicates <- rep(c(1:12), each = 3)
     Control <- c(rep(1, 9), rep(0, 27))
     Treat1 <- c(rep(0, 9), rep(1, 9), rep(0, 18))
     Treat2 <- c(rep(0, 18), rep(1, 9), rep(0,9))
     Treat3 <- c(rep(0, 27), rep(1, 9))
     edesign <- cbind(Time, Replicates, Control, Treat1, Treat2, Treat3)
     rownames(edesign) <- paste("Array", c(1:36), sep = "")

     #### RUN maSigPro
     tc.test <- maSigPro (tc.DATA, edesign, degree = 2, vars = "groups", main = "Test")

     tc.test$g  # gives number of total significant genes
     tc.test$summary  # shows significant genes by experimental groups
     tc.test$sig.genes$Treat1$sig.pvalues  # shows pvalues of the significant coefficients 
                                           # in the regression models of the significant genes 
                                           # for Control.vs.Treat1 comparison

