maskMotif             package:Biostrings             R Documentation

_M_a_s_k_i_n_g _b_y _c_o_n_t_e_n_t (_o_r _b_y _p_o_s_i_t_i_o_n)

_D_e_s_c_r_i_p_t_i_o_n:

     Functions for masking a sequence by content (or by position).

_U_s_a_g_e:

       maskMotif(x, motif, min.block.width=1)
       mask(x, start=NA, end=NA, pattern)

_A_r_g_u_m_e_n_t_s:

       x: The sequence to mask. 

   motif: The motif to mask in the sequence. 

min.block.width: The minimum width of the blocks to mask. 

   start: An integer vector containing the starting positions of the
          regions to mask. 

     end: An integer vector containing the ending positions of the
          regions to mask. 

 pattern: The motif to mask in the sequence. 

_V_a_l_u_e:

     A MaskedXString object for 'maskMotif' and an XStringViews object
     for 'mask'.

_A_u_t_h_o_r(_s):

     H. Pages

_S_e_e _A_l_s_o:

     'read.Mask', XString-class, MaskedXString-class,
     XStringViews-class, MaskCollection-class

_E_x_a_m_p_l_e_s:

       ## ---------------------------------------------------------------------
       ## EXAMPLE 1
       ## ---------------------------------------------------------------------

       maskMotif(BString("AbcbbcbEEE"), "bcb")
       maskMotif(BString("AbcbcbEEE"), "bcb")

       ## maskMotif() can be used in an incremental way to mask more than 1
       ## motif. Note that maskMotif() does not try to mask again what's
       ## already masked (i.e. the new mask will never overlaps with the
       ## previous masks) so the order in which the motifs are masked actually
       ## matters as it will affect the total set of masked positions.
       x0 <- BString("AbcbEEEEEbcbbEEEcbbcbc")
       x1 <- maskMotif(x0, "E")
       x1
       x2 <- maskMotif(x1, "bcb")
       x2
       x3 <- maskMotif(x2, "b")
       x3
       ## Note that inverting the order in which "b" and "bcb" are masked would
       ## lead to a different final set of masked positions.
       ## Also note that the order doesn't matter if the motifs to mask don't
       ## overlap (we assume that the motifs are unique) i.e. if the prefix of
       ## each motif is not the suffix of any other motif. This is of course
       ## the case when all the motifs have only 1 letter.

       ## ---------------------------------------------------------------------
       ## EXAMPLE 2
       ## ---------------------------------------------------------------------

       x <- DNAString("ACACAACTAGATAGNACTNNGAGAGACGC")

       ## Mask the N-blocks
       x1 <- maskMotif(x, "N")
       x1
       as(x1, "XStringViews")
       gaps(x1)
       as(gaps(x1), "XStringViews")

       ## Mask the AC-blocks 
       x2 <- maskMotif(x1, "AC")
       x2
       gaps(x2)

       ## Mask the GA-blocks
       x3 <- maskMotif(x2, "GA", min.block.width=5)
       x3  # masks 2 and 3 overlap
       gaps(x3)

       ## ---------------------------------------------------------------------
       ## EXAMPLE 3
       ## ---------------------------------------------------------------------

       library(BSgenome.Dmelanogaster.UCSC.dm3)
       chrU <- Dmelanogaster$chrU
       chrU
       alphabetFrequency(chrU)
       chrU <- maskMotif(chrU, "N")
       chrU
       alphabetFrequency(chrU)
       as(chrU, "XStringViews")
       as(gaps(chrU), "XStringViews")

       mask2 <- Mask(mask.width=length(chrU), start=c(50000, 350000, 543900), width=25000)
       names(mask2) <- "some ugly regions"
       masks(chrU) <- append(masks(chrU), mask2)
       chrU
       as(chrU, "XStringViews")
       as(gaps(chrU), "XStringViews")

       ## ---------------------------------------------------------------------
       ## EXAMPLE 4
       ## ---------------------------------------------------------------------
       ## Note that unlike maskMotif(), mask() returns an XStringViews object!

       ## masking "by position"
       mask("AxyxyxBC", 2, 6)

       ## masking "by content"
       mask("AxyxyxBC", "xyx")
       noN_chrU <- mask(chrU, "N")
       noN_chrU
       alphabetFrequency(noN_chrU, collapse=TRUE)

