readCdfNbrOfCellsPerUnitGroup   package:affxparser   R Documentation

_G_e_t_s _t_h_e _n_u_m_b_e_r _o_f _c_e_l_l_s (_p_r_o_b_e_s) _t_h_a_t _e_a_c_h _g_r_o_u_p _o_f _e_a_c_h _u_n_i_t _i_n _a _C_D_F _f_i_l_e

_D_e_s_c_r_i_p_t_i_o_n:

     Gets the number of cells (probes) that each group of each unit in
     a CDF file.

_U_s_a_g_e:

     readCdfNbrOfCellsPerUnitGroup(filename, units=NULL, verbose=0)

_A_r_g_u_m_e_n_t_s:

filename: The filename of the CDF file.

   units: An 'integer' 'vector' of unit indices specifying which units
          to be read.  If 'NULL', all units are read.

 verbose: An 'integer' specifying the verbose level. If 0, the file is
          parsed quietly.  The higher numbers, the more details.

_V_a_l_u_e:

     A named 'list' of named 'integer' vectors.  The name of the list
     elements are unit names and the names of the integer vector are
     group names.

_A_u_t_h_o_r(_s):

     Henrik Bengtsson (<URL: http://www.braju.com/R/>)

_E_x_a_m_p_l_e_s:

     for (zzz in 0) {

     cdfFile <- findCdf("Mapping50K_Xba240")
     if (is.null(cdfFile))
       break

     groups <- readCdfNbrOfCellsPerUnitGroup(cdfFile)

     # Number of units read
     print(length(groups))
     #   59015

     # Details on two units
     print(groups[56:57])
     # $`SNP_A-1650338`
     #  C  G  C  G
     #  8  8 12 12
     #
     # $`SNP_A-1716667`
     #  A  G  A  G
     # 10 10 10 10

     # Number of groups with different number of cells
     print(table(unlist(groups)))
     #      6     8    10    12    14    16    60
     #  16348 59462 84344 59462 16348    20     4

     # Number of cells per unit
     nbrOfCellsPerUnit <- unlist(lapply(groups, FUN=sum))
     print(table(nbrOfCellsPerUnit))
     #   16    40    60
     #   20 58991     4

     # Number of groups per unit
     nbrOfGroupsPerUnit <- unlist(lapply(groups, FUN=length))

     # Details on a few units
     print(nbrOfGroupsPerUnit[20:30])
     # AFFX-barcodeP AFFX-barcodeQ AFFX-barcodeR AFFX-barcodeS AFFX-barcodeT
     #             1             1             1             1             1
     #   AFFX-601964   AFFX-656757   AFFX-721431   AFFX-737848  AFFX-1329481
     #             4             4             4             4             4
     #  AFFX-1375402
     #             4

     # Number of units for each unique number of groups
     print(table(nbrOfGroupsPerUnit))
     #   1     4
     #  24 58991

     x <- list()
     for (size in unique(nbrOfGroupsPerUnit)) {
       subset <- groups[nbrOfGroupsPerUnit==size]
       t <- matrix(unlist(subset), nrow=size)
       colnames(t) <- names(subset)
       x[[as.character(size)]] <- t
       rm(subset, t)
     }

     # Check if there are any quartet units where the number
     # of cells in Group 1 & 2 or Group 3 & 4 does not have
     # the same number of cells.
     # Group 1 & 2
     print(sum(x[["4"]][1,]-x[["4"]][2,] != 0))
     # 0

     # Group 3 & 4
     print(sum(x[["4"]][3,]-x[["4"]][4,] != 0))
     # 0

     # Clean up
     rm(cdfFile, groups, nbrOfCellsPerUnit, nbrOfGroupsPerUnit, x)
     } # for (zzz in 0)

