21 #ifndef TESSERACT_TEXTORD_TEXTORD_H__
22 #define TESSERACT_TEXTORD_TEXTORD_H__
46 int width,
int height, Pix* pix,
47 BLOCK_LIST* blocks, TO_BLOCK_LIST* to_blocks);
54 return use_cjk_fp_model_;
57 use_cjk_fp_model_ = flag;
72 void find_components(Pix* pix, BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks);
82 bool use_cjk_fp_model_;
87 int width,
int height, TO_BLOCK_LIST* to_blocks);
89 void MakeBlockRows(
int min_spacing,
int max_spacing,
93 void fit_rows(
float gradient,
ICOORD page_tr, TO_BLOCK_LIST *blocks);
94 void cleanup_rows_fitting(
ICOORD page_tr,
100 void compute_block_xheight(
TO_BLOCK *block,
float gradient);
101 void compute_row_xheight(
TO_ROW *row,
104 int block_line_size);
105 void make_spline_rows(
TO_BLOCK *block,
112 void make_old_baselines(
TO_BLOCK *block,
115 void correlate_lines(
TO_BLOCK *block,
float gradient);
116 void correlate_neighbours(
TO_BLOCK *block,
119 int correlate_with_stats(
TO_ROW **rows,
122 void find_textlines(
TO_BLOCK *block,
128 void block_spacing_stats(
TO_BLOCK *block,
130 BOOL8 &old_text_ord_proportional,
132 inT16 &block_space_gap_width,
134 inT16 &block_non_space_gap_width
136 void row_spacing_stats(
TO_ROW *row,
141 inT16 block_space_gap_width,
143 inT16 block_non_space_gap_width
145 void old_to_method(
TO_ROW *row,
146 STATS *all_gap_stats,
147 STATS *space_gap_stats,
148 STATS *small_gap_stats,
149 inT16 block_space_gap_width,
151 inT16 block_non_space_gap_width
155 STATS *all_gap_stats,
156 BOOL8 suspected_table,
160 void improve_row_threshold(
TO_ROW *row,
STATS *all_gap_stats);
165 inT16 real_current_gap,
166 inT16 within_xht_current_gap,
172 BOOL8& prev_gap_was_a_space,
173 BOOL8& break_at_next_gap);
177 void peek_at_next_gap(
TO_ROW *row,
181 inT16 &next_within_xht_gap);
182 void mark_gap(
TBOX blob,
185 inT16 prev_blob_width,
187 inT16 next_blob_width,
189 float find_mean_blob_spacing(
WERD *word);
201 float filter_noise_blobs(BLOBNBOX_LIST *src_list,
202 BLOBNBOX_LIST *noise_list,
203 BLOBNBOX_LIST *small_list,
204 BLOBNBOX_LIST *large_list);
205 void cleanup_blocks(BLOCK_LIST *blocks);
206 BOOL8 clean_noise_from_row(
ROW *row);
207 void clean_noise_from_words(
ROW *row);
210 void clean_small_noise_from_words(
ROW *row);
214 "Script has no xheight, so use a single mode for horizontal text");
218 "Constrain relative values of inter and intra-word gaps for "
221 "Block stats to use fixed pitch rows?");
223 "Force word breaks on punct to break long lines in non-space "
226 "Space stats use prechopping?");
228 "Fix suspected bug in old code");
230 "Only stat OBVIOUS spaces");
232 "Only stat OBVIOUS spaces");
234 "Only stat OBVIOUS spaces");
236 "Only stat OBVIOUS spaces");
238 "Use row alone when inadequate cert spaces");
242 "Dont restrict kn->sp fuzzy limit to tables");
244 "Use within xht gap for wd breaks");
246 "Use within xht gap for wd breaks");
248 "Only use within xht gap for wd breaks");
250 "Dont chng kn to space next to punct");
254 "Enable improvement heuristic");
257 "or should we use mean");
259 "No.samples reqd to reestimate for row");
261 "No.gaps reqd with 1 large gap to treat as a table");
263 "No.gaps reqd with few cert spaces to use certs");
266 "Factor for defining space threshold in terms of space and "
269 "how far between kern and space?");
271 "how far between kern and space?");
273 "Fract of xheight for narrow");
275 "narrow if w/h less than this");
278 "wide if w/h less than this");
280 "Fract of xheight for fuzz sp");
282 "Fract of xheight for fuzz sp");
284 "Fract of xheight for fuzz sp");
287 "gap ratio to flip kern->sp");
289 "gap ratio to flip kern->sp");
291 "gap ratio to flip kern->sp");
296 "Fract of kerns reqd for isolated row stats");
298 "Min difference of kn & sp in table");
300 "Expect spaces bigger than this");
302 "Fuzzy if less than this");
306 "Dont trust spaces less than this time kn");
308 "Thresh guess - mult kn by this");
310 "Thresh guess - mult xht by this");
312 "Multiplier on kn to limit thresh");
314 "Dont autoflip kn to sp when large separation");
316 "Limit use of xht gap with large kns");
318 "Limit use of xht gap with odd small kns");
320 "Dont reduce box if the top left is non blank");
322 "Dont let sp minus kn get too small");
324 "How wide fuzzies need context");
332 "Fraction of bounding box for noise");
344 "xh fract width error for norm blobs");
346 "Height fraction to discard outlines as speckle noise");
355 #endif // TESSERACT_TEXTORD_TEXTORD_H__
int textord_max_noise_size
int tosp_enough_space_samples_for_median
double tosp_narrow_aspect_ratio
bool tosp_stats_use_xht_gaps
double textord_initialasc_ile
double tosp_ignore_very_big_gaps
void filter_blobs(ICOORD page_tr, TO_BLOCK_LIST *blocks, BOOL8 testing_on)
double tosp_old_sp_kn_th_factor
double textord_noise_area_ratio
#define BOOL_VAR_H(name, val, comment)
bool textord_noise_rejwords
ROW * make_blob_words(TO_ROW *row, FCOORD rotation)
double tosp_table_kn_sp_ratio
double textord_noise_hfract
double tosp_dont_fool_with_small_kerns
double textord_blob_size_bigile
bool textord_noise_rejrows
double tosp_kern_gap_factor2
bool tosp_only_small_gaps_for_kern
bool textord_single_height_mode
void TextordPage(PageSegMode pageseg_mode, int width, int height, Pix *pix, BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks)
double tosp_fuzzy_space_factor2
double textord_noise_syfract
double textord_noise_rowratio
double tosp_wide_fraction
double tosp_kern_gap_factor1
double tosp_threshold_bias2
bool tosp_force_wordbreak_on_punct
double tosp_fuzzy_space_factor
#define double_VAR_H(name, val, comment)
bool tosp_recovery_isolated_row_stats
double textord_noise_normratio
ROW * make_prop_words(TO_ROW *row, FCOORD rotation)
bool tosp_narrow_blobs_not_cert
double tosp_pass_wide_fuzz_sp_to_context
bool tosp_row_use_cert_spaces1
double tosp_fuzzy_kn_fraction
double tosp_ignore_big_gaps
double textord_blshift_xfraction
void find_components(Pix *pix, BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks)
double textord_noise_sxfract
void to_spacing(ICOORD page_tr, TO_BLOCK_LIST *blocks)
Textord(CCStruct *ccstruct)
double tosp_enough_small_gaps
double textord_blob_size_smallile
bool tosp_all_flips_fuzzy
bool tosp_fuzzy_limit_all
bool tosp_row_use_cert_spaces
double tosp_max_sane_kn_thresh
bool tosp_flip_fuzz_sp_to_kn
double tosp_table_fuzzy_kn_sp_ratio
double textord_blshift_maxshift
double tosp_table_xht_sp_ratio
bool tosp_use_pre_chopping
int textord_noise_translimit
double tosp_init_guess_kn_mult
double tosp_min_sane_kn_sp
int textord_noise_sizefraction
bool tosp_flip_fuzz_kn_to_sp
void CleanupSingleRowResult(PageSegMode pageseg_mode, PAGE_RES *page_res)
#define INT_VAR_H(name, val, comment)
int textord_noise_sncount
double tosp_wide_aspect_ratio
bool tosp_old_to_constrain_sp_kn
double textord_initialx_ile
double tosp_large_kerning
double tosp_threshold_bias1
double textord_noise_sizelimit
bool tosp_only_use_xht_gaps
double tosp_narrow_fraction
bool tosp_rule_9_test_punct
double tosp_kern_gap_factor3
bool tosp_block_use_cert_spaces
double tosp_fuzzy_space_factor1
double tosp_fuzzy_sp_fraction
double tosp_silly_kn_sp_gap
bool tosp_only_use_prop_rows
void set_use_cjk_fp_model(bool flag)
double tosp_init_guess_xht_mult
bool use_cjk_fp_model() const