applyCdfGroups          package:affxparser          R Documentation

_A_p_p_l_i_e_s _a _f_u_n_c_t_i_o_n _o_v_e_r _t_h_e _g_r_o_u_p_s _i_n _a _C_D_F _s_t_r_u_c_t_u_r_e

_D_e_s_c_r_i_p_t_i_o_n:

     Applies a function over the groups in a CDF structure.

_U_s_a_g_e:

     applyCdfGroups(cdf, fcn, ...)

_A_r_g_u_m_e_n_t_s:

     cdf: A CDF 'list' structure.

     fcn: A 'function' that takes a 'list' structure of group elements
          and returns an updated 'list' of groups.

     ...: Arguments passed to the 'fcn' function.

_V_a_l_u_e:

     Returns an updated CDF 'list' structure.

_P_r_e-_d_e_f_i_n_e_d _r_e_s_t_r_u_c_t_u_r_i_n_g _f_u_n_c_t_i_o_n_s:


           *  'cdfGetFields'() - Gets a subset of groups fields in a
              CDF structure.

           *  'cdfGetGroups'() - Gets a subset of groups in a CDF
              structure.

           *  'cdfOrderBy'() - Orders the fields according to the value
              of another field in the same CDF group.

           *  'cdfOrderColumnsBy'() - Orders the columns of fields
              according to the values in a certain row of another field
              in the same CDF group.

           *  'cdfAddBaseMmCounts'() - Adds the number of allele A and
              allele B mismatching nucleotides of the probes in a CDF
              structure.

           *  'cdfAddProbeOffsets'() - Adds probe offsets to the groups
              in a CDF structure.

           *  'cdfGtypeCelToPQ'() - Function to immitate Affymetrix'
              'gtype_cel_to_pq' software.

           *  'cdfMergeAlleles'() - Function to join CDF allele A and
              allele B groups strand by strand.

           *  'cdfMergeStrands'() - Function to join CDF groups with
              the same names.


     We appreciate contributions.

_A_u_t_h_o_r(_s):

     Henrik Bengtsson (<URL: http://www.braju.com/R/>)

_E_x_a_m_p_l_e_s:

     ##############################################################
     if (require("AffymetrixDataTestFiles")) {            # START #
     ##############################################################

     cdfFile <- findCdf("Mapping10K_Xba131")

     # Identify the unit index from the unit name
     unitName <- "SNP_A-1509436"
     unit <- which(readCdfUnitNames(cdfFile) == unitName)

     # Read the CDF file
     cdf0 <- readCdfUnits(cdfFile, units=unit, stratifyBy="pmmm", readType=FALSE, readDirection=FALSE)
     cat("Default CDF structure:\n")
     print(cdf0)

     # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
     # Tabulate the information in each group
     # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
     cdf <- readCdfUnits(cdfFile, units=unit)
     cdf <- applyCdfGroups(cdf, lapply, as.data.frame)
     print(cdf)

     # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
     # Infer the (true or the relative) offset for probe quartets.
     # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
     cdf <- applyCdfGroups(cdf0, cdfAddProbeOffsets)
     cat("Probe offsets:\n")
     print(cdf)

     # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
     # Identify the number of nucleotides that mismatch the
     # allele A and the allele B sequences, respectively.
     # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
     cdf <- applyCdfGroups(cdf, cdfAddBaseMmCounts)
     cat("Allele A & B target sequence mismatch counts:\n")
     print(cdf)


     # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
     # Combine the signals from  the sense and the anti-sense
     # strands in a SNP CEL files.
     # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
     # First, join the strands in the CDF structure.
     cdf <- applyCdfGroups(cdf, cdfMergeStrands)
     cat("Joined CDF structure:\n")
     print(cdf)

     # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
     # Rearrange values of group fields into quartets.  This
     # requires that the values are already arranged as PMs and MMs.
     # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
     cdf <- applyCdfGroups(cdf0, cdfMergeAlleles)
     cat("Probe quartets:\n")
     print(cdf)

     # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
     # Get the x and y cell locations (note, zero-based)
     # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
     x <- unlist(applyCdfGroups(cdf, cdfGetFields, "x"), use.names=FALSE)
     y <- unlist(applyCdfGroups(cdf, cdfGetFields, "y"), use.names=FALSE)

     # Validate
     ncol <- readCdfHeader(cdfFile)$cols
     cells <- as.integer(y*ncol+x+1)
     cells <- sort(cells)

     cells0 <- readCdfCellIndices(cdfFile, units=unit)
     cells0 <- unlist(cells0, use.names=FALSE)
     cells0 <- sort(cells0)

     stopifnot(identical(cells0, cells))

     ##############################################################
     }                                                     # STOP #
     ##############################################################

