| fine_clustering {CellaRepertorium} | R Documentation |
Perform additional clustering of sequences within groups
fine_clustering( ccdb, sequence_key, type, max_affinity = NULL, keep_clustering_details = FALSE, ... )
ccdb |
A |
sequence_key |
|
type |
'AA' or 'DNA' |
max_affinity |
|
keep_clustering_details |
|
... |
Arguments passed on to
|
ContigCellDB() object with updated contig_tbl and cluster_tbl
library(dplyr)
data(ccdb_ex)
ccdb_ex_small = ccdb_ex
ccdb_ex_small$cell_tbl = ccdb_ex_small$cell_tbl[1:200,]
ccdb_ex_small = cdhit_ccdb(ccdb_ex_small,
sequence_key = 'cdr3_nt', type = 'DNA', cluster_name = 'DNA97',
identity = .965, min_length = 12, G = 1)
ccdb_ex_small = fine_clustering(ccdb_ex_small, sequence_key = 'cdr3_nt', type = 'DNA')
# Canonicalize with the medoid contig is probably what is most common
ccdb_medoid = canonicalize_cluster(ccdb_ex_small)
# But there are other possibilities.
# To pass multiple "AND" filter arguments must use &
ccdb_umi = canonicalize_cluster(ccdb_ex_small,
contig_filter_args = chain == 'TRA' & length > 500, tie_break_keys = 'umis',
contig_fields = c('chain', 'length'))
ccdb_umi$cluster_tbl %>% dplyr::select(chain, length) %>% summary()