Tesseract  3.02
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
tesseract::Textord Class Reference

#include <textord.h>

Public Member Functions

 Textord (CCStruct *ccstruct)
 
 ~Textord ()
 
void TextordPage (PageSegMode pageseg_mode, int width, int height, Pix *pix, BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks)
 
void CleanupSingleRowResult (PageSegMode pageseg_mode, PAGE_RES *page_res)
 
bool use_cjk_fp_model () const
 
void set_use_cjk_fp_model (bool flag)
 
void to_spacing (ICOORD page_tr, TO_BLOCK_LIST *blocks)
 
ROWmake_prop_words (TO_ROW *row, FCOORD rotation)
 
ROWmake_blob_words (TO_ROW *row, FCOORD rotation)
 
void find_components (Pix *pix, BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks)
 
void filter_blobs (ICOORD page_tr, TO_BLOCK_LIST *blocks, BOOL8 testing_on)
 

Public Attributes

bool textord_single_height_mode = false
 
bool tosp_old_to_method = false
 
bool tosp_old_to_constrain_sp_kn = false
 
bool tosp_only_use_prop_rows = true
 
bool tosp_force_wordbreak_on_punct = false
 
bool tosp_use_pre_chopping = false
 
bool tosp_old_to_bug_fix = false
 
bool tosp_block_use_cert_spaces = true
 
bool tosp_row_use_cert_spaces = true
 
bool tosp_narrow_blobs_not_cert = true
 
bool tosp_row_use_cert_spaces1 = true
 
bool tosp_recovery_isolated_row_stats = true
 
bool tosp_only_small_gaps_for_kern = false
 
bool tosp_all_flips_fuzzy = false
 
bool tosp_fuzzy_limit_all = true
 
bool tosp_stats_use_xht_gaps = true
 
bool tosp_use_xht_gaps = true
 
bool tosp_only_use_xht_gaps = false
 
bool tosp_rule_9_test_punct = false
 
bool tosp_flip_fuzz_kn_to_sp = true
 
bool tosp_flip_fuzz_sp_to_kn = true
 
bool tosp_improve_thresh = false
 
int tosp_debug_level = 0
 
int tosp_enough_space_samples_for_median = 3
 
int tosp_redo_kern_limit = 10
 
int tosp_few_samples = 40
 
int tosp_short_row = 20
 
int tosp_sanity_method = 1
 
double tosp_old_sp_kn_th_factor = 2.0
 
double tosp_threshold_bias1 = 0
 
double tosp_threshold_bias2 = 0
 
double tosp_narrow_fraction = 0.3
 
double tosp_narrow_aspect_ratio = 0.48
 
double tosp_wide_fraction = 0.52
 
double tosp_wide_aspect_ratio = 0.0
 
double tosp_fuzzy_space_factor = 0.6
 
double tosp_fuzzy_space_factor1 = 0.5
 
double tosp_fuzzy_space_factor2 = 0.72
 
double tosp_gap_factor = 0.83
 
double tosp_kern_gap_factor1 = 2.0
 
double tosp_kern_gap_factor2 = 1.3
 
double tosp_kern_gap_factor3 = 2.5
 
double tosp_ignore_big_gaps = -1
 
double tosp_ignore_very_big_gaps = 3.5
 
double tosp_rep_space = 1.6
 
double tosp_enough_small_gaps = 0.65
 
double tosp_table_kn_sp_ratio = 2.25
 
double tosp_table_xht_sp_ratio = 0.33
 
double tosp_table_fuzzy_kn_sp_ratio = 3.0
 
double tosp_fuzzy_kn_fraction = 0.5
 
double tosp_fuzzy_sp_fraction = 0.5
 
double tosp_min_sane_kn_sp = 1.5
 
double tosp_init_guess_kn_mult = 2.2
 
double tosp_init_guess_xht_mult = 0.28
 
double tosp_max_sane_kn_thresh = 5.0
 
double tosp_flip_caution = 0.0
 
double tosp_large_kerning = 0.19
 
double tosp_dont_fool_with_small_kerns = -1
 
double tosp_near_lh_edge = 0
 
double tosp_silly_kn_sp_gap = 0.2
 
double tosp_pass_wide_fuzz_sp_to_context = 0.75
 
bool textord_no_rejects = false
 
bool textord_show_blobs = false
 
bool textord_show_boxes = false
 
int textord_max_noise_size = 7
 
double textord_blob_size_bigile = 95
 
double textord_noise_area_ratio = 0.7
 
double textord_blob_size_smallile = 20
 
double textord_initialx_ile = 0.75
 
double textord_initialasc_ile = 0.90
 
int textord_noise_sizefraction = 10
 
double textord_noise_sizelimit = 0.5
 
int textord_noise_translimit = 16
 
double textord_noise_normratio = 2.0
 
bool textord_noise_rejwords = true
 
bool textord_noise_rejrows = true
 
double textord_noise_syfract = 0.2
 
double textord_noise_sxfract = 0.4
 
double textord_noise_hfract = 1.0/64
 
int textord_noise_sncount = 1
 
double textord_noise_rowratio = 6.0
 
bool textord_noise_debug = FALSE
 
double textord_blshift_maxshift = 0.00
 
double textord_blshift_xfraction = 9.99
 

Detailed Description

Definition at line 39 of file textord.h.

Constructor & Destructor Documentation

tesseract::Textord::Textord ( CCStruct ccstruct)
explicit

Definition at line 34 of file textord.cpp.

35  : ccstruct_(ccstruct), use_cjk_fp_model_(false),
36  // makerow.cpp ///////////////////////////////////////////
38  "Script has no xheight, so use a single mode",
39  ccstruct_->params()),
40  // tospace.cpp ///////////////////////////////////////////
41  BOOL_MEMBER(tosp_old_to_method, false, "Space stats use prechopping?",
42  ccstruct_->params()),
44  "Constrain relative values of inter and intra-word gaps for "
45  "old_to_method.",
46  ccstruct_->params()),
48  "Block stats to use fixed pitch rows?",
49  ccstruct_->params()),
51  "Force word breaks on punct to break long lines in non-space "
52  "delimited langs",
53  ccstruct_->params()),
55  "Space stats use prechopping?",
56  ccstruct_->params()),
57  BOOL_MEMBER(tosp_old_to_bug_fix, false, "Fix suspected bug in old code",
58  ccstruct_->params()),
60  "Only stat OBVIOUS spaces",
61  ccstruct_->params()),
62  BOOL_MEMBER(tosp_row_use_cert_spaces, true, "Only stat OBVIOUS spaces",
63  ccstruct_->params()),
65  "Only stat OBVIOUS spaces",
66  ccstruct_->params()),
67  BOOL_MEMBER(tosp_row_use_cert_spaces1, true, "Only stat OBVIOUS spaces",
68  ccstruct_->params()),
70  "Use row alone when inadequate cert spaces",
71  ccstruct_->params()),
72  BOOL_MEMBER(tosp_only_small_gaps_for_kern, false, "Better guess",
73  ccstruct_->params()),
74  BOOL_MEMBER(tosp_all_flips_fuzzy, false, "Pass ANY flip to context?",
75  ccstruct_->params()),
77  "Dont restrict kn->sp fuzzy limit to tables",
78  ccstruct_->params()),
80  "Use within xht gap for wd breaks",
81  ccstruct_->params()),
82  BOOL_MEMBER(tosp_use_xht_gaps, true, "Use within xht gap for wd breaks",
83  ccstruct_->params()),
85  "Only use within xht gap for wd breaks",
86  ccstruct_->params()),
88  "Dont chng kn to space next to punct",
89  ccstruct_->params()),
90  BOOL_MEMBER(tosp_flip_fuzz_kn_to_sp, true, "Default flip",
91  ccstruct_->params()),
92  BOOL_MEMBER(tosp_flip_fuzz_sp_to_kn, true, "Default flip",
93  ccstruct_->params()),
94  BOOL_MEMBER(tosp_improve_thresh, false, "Enable improvement heuristic",
95  ccstruct_->params()),
96  INT_MEMBER(tosp_debug_level, 0, "Debug data",
97  ccstruct_->params()),
99  "or should we use mean",
100  ccstruct_->params()),
102  "No.samples reqd to reestimate for row",
103  ccstruct_->params()),
105  "No.gaps reqd with 1 large gap to treat as a table",
106  ccstruct_->params()),
108  "No.gaps reqd with few cert spaces to use certs",
109  ccstruct_->params()),
110  INT_MEMBER(tosp_sanity_method, 1, "How to avoid being silly",
111  ccstruct_->params()),
113  "Factor for defining space threshold in terms of space and "
114  "kern sizes",
115  ccstruct_->params()),
117  "how far between kern and space?",
118  ccstruct_->params()),
120  "how far between kern and space?",
121  ccstruct_->params()),
122  double_MEMBER(tosp_narrow_fraction, 0.3, "Fract of xheight for narrow",
123  ccstruct_->params()),
125  "narrow if w/h less than this",
126  ccstruct_->params()),
127  double_MEMBER(tosp_wide_fraction, 0.52, "Fract of xheight for wide",
128  ccstruct_->params()),
129  double_MEMBER(tosp_wide_aspect_ratio, 0.0, "wide if w/h less than this",
130  ccstruct_->params()),
132  "Fract of xheight for fuzz sp",
133  ccstruct_->params()),
135  "Fract of xheight for fuzz sp",
136  ccstruct_->params()),
138  "Fract of xheight for fuzz sp",
139  ccstruct_->params()),
140  double_MEMBER(tosp_gap_factor, 0.83, "gap ratio to flip sp->kern",
141  ccstruct_->params()),
142  double_MEMBER(tosp_kern_gap_factor1, 2.0, "gap ratio to flip kern->sp",
143  ccstruct_->params()),
144  double_MEMBER(tosp_kern_gap_factor2, 1.3, "gap ratio to flip kern->sp",
145  ccstruct_->params()),
146  double_MEMBER(tosp_kern_gap_factor3, 2.5, "gap ratio to flip kern->sp",
147  ccstruct_->params()),
148  double_MEMBER(tosp_ignore_big_gaps, -1, "xht multiplier",
149  ccstruct_->params()),
150  double_MEMBER(tosp_ignore_very_big_gaps, 3.5, "xht multiplier",
151  ccstruct_->params()),
152  double_MEMBER(tosp_rep_space, 1.6, "rep gap multiplier for space",
153  ccstruct_->params()),
155  "Fract of kerns reqd for isolated row stats",
156  ccstruct_->params()),
158  "Min difference of kn & sp in table",
159  ccstruct_->params()),
161  "Expect spaces bigger than this",
162  ccstruct_->params()),
164  "Fuzzy if less than this",
165  ccstruct_->params()),
166  double_MEMBER(tosp_fuzzy_kn_fraction, 0.5, "New fuzzy kn alg",
167  ccstruct_->params()),
168  double_MEMBER(tosp_fuzzy_sp_fraction, 0.5, "New fuzzy sp alg",
169  ccstruct_->params()),
171  "Dont trust spaces less than this time kn",
172  ccstruct_->params()),
174  "Thresh guess - mult kn by this",
175  ccstruct_->params()),
177  "Thresh guess - mult xht by this",
178  ccstruct_->params()),
180  "Multiplier on kn to limit thresh",
181  ccstruct_->params()),
183  "Dont autoflip kn to sp when large separation",
184  ccstruct_->params()),
186  "Limit use of xht gap with large kns",
187  ccstruct_->params()),
189  "Limit use of xht gap with odd small kns",
190  ccstruct_->params()),
192  "Dont reduce box if the top left is non blank",
193  ccstruct_->params()),
195  "Dont let sp minus kn get too small",
196  ccstruct_->params()),
198  "How wide fuzzies need context",
199  ccstruct_->params()),
200  // tordmain.cpp ///////////////////////////////////////////
201  BOOL_MEMBER(textord_no_rejects, false, "Don't remove noise blobs",
202  ccstruct_->params()),
203  BOOL_MEMBER(textord_show_blobs, false, "Display unsorted blobs",
204  ccstruct_->params()),
205  BOOL_MEMBER(textord_show_boxes, false, "Display unsorted blobs",
206  ccstruct_->params()),
207  INT_MEMBER(textord_max_noise_size, 7, "Pixel size of noise",
208  ccstruct_->params()),
209  double_MEMBER(textord_blob_size_bigile, 95, "Percentile for large blobs",
210  ccstruct_->params()),
212  "Fraction of bounding box for noise",
213  ccstruct_->params()),
215  "Percentile for small blobs",
216  ccstruct_->params()),
218  "Ile of sizes for xheight guess",
219  ccstruct_->params()),
221  "Ile of sizes for xheight guess",
222  ccstruct_->params()),
224  "Fraction of size for maxima",
225  ccstruct_->params()),
227  "Fraction of x for big t count",
228  ccstruct_->params()),
229  INT_MEMBER(textord_noise_translimit, 16, "Transitions for normal blob",
230  ccstruct_->params()),
232  "Dot to norm ratio for deletion",
233  ccstruct_->params()),
234  BOOL_MEMBER(textord_noise_rejwords, true, "Reject noise-like words",
235  ccstruct_->params()),
236  BOOL_MEMBER(textord_noise_rejrows, true, "Reject noise-like rows",
237  ccstruct_->params()),
239  "xh fract height error for norm blobs",
240  ccstruct_->params()),
242  "xh fract width error for norm blobs",
243  ccstruct_->params()),
245  "Height fraction to discard outlines as speckle noise",
246  ccstruct_->params()),
247  INT_MEMBER(textord_noise_sncount, 1, "super norm blobs to save row",
248  ccstruct_->params()),
250  "Dot to norm ratio for deletion",
251  ccstruct_->params()),
252  BOOL_MEMBER(textord_noise_debug, false, "Debug row garbage detector",
253  ccstruct_->params()),
254  double_MEMBER(textord_blshift_maxshift, 0.00, "Max baseline shift",
255  ccstruct_->params()),
257  "Min size of baseline shift",
258  ccstruct_->params()) {
259 }
bool textord_show_blobs
Definition: textord.h:327
int textord_max_noise_size
Definition: textord.h:329
int tosp_enough_space_samples_for_median
Definition: textord.h:257
double tosp_narrow_aspect_ratio
Definition: textord.h:275
bool tosp_stats_use_xht_gaps
Definition: textord.h:244
int tosp_redo_kern_limit
Definition: textord.h:259
double textord_initialasc_ile
Definition: textord.h:335
double tosp_ignore_very_big_gaps
Definition: textord.h:293
double tosp_gap_factor
Definition: textord.h:285
double tosp_old_sp_kn_th_factor
Definition: textord.h:267
double textord_noise_area_ratio
Definition: textord.h:332
bool tosp_improve_thresh
Definition: textord.h:254
bool textord_noise_rejwords
Definition: textord.h:340
double tosp_table_kn_sp_ratio
Definition: textord.h:298
bool textord_show_boxes
Definition: textord.h:328
double tosp_near_lh_edge
Definition: textord.h:320
bool tosp_use_xht_gaps
Definition: textord.h:246
double textord_noise_hfract
Definition: textord.h:346
double tosp_dont_fool_with_small_kerns
Definition: textord.h:318
double textord_blob_size_bigile
Definition: textord.h:330
bool textord_noise_rejrows
Definition: textord.h:341
double tosp_kern_gap_factor2
Definition: textord.h:289
bool tosp_only_small_gaps_for_kern
Definition: textord.h:239
bool textord_noise_debug
Definition: textord.h:349
bool textord_single_height_mode
Definition: textord.h:214
double tosp_rep_space
Definition: textord.h:294
double tosp_flip_caution
Definition: textord.h:314
#define INT_MEMBER(name, val, comment, vec)
Definition: params.h:272
double tosp_fuzzy_space_factor2
Definition: textord.h:284
double textord_noise_syfract
Definition: textord.h:342
double textord_noise_rowratio
Definition: textord.h:348
double tosp_wide_fraction
Definition: textord.h:276
double tosp_kern_gap_factor1
Definition: textord.h:287
double tosp_threshold_bias2
Definition: textord.h:271
bool tosp_force_wordbreak_on_punct
Definition: textord.h:224
double tosp_fuzzy_space_factor
Definition: textord.h:280
bool tosp_recovery_isolated_row_stats
Definition: textord.h:238
double textord_noise_normratio
Definition: textord.h:339
bool tosp_narrow_blobs_not_cert
Definition: textord.h:234
double tosp_pass_wide_fuzz_sp_to_context
Definition: textord.h:324
bool tosp_row_use_cert_spaces1
Definition: textord.h:236
double tosp_fuzzy_kn_fraction
Definition: textord.h:303
double tosp_ignore_big_gaps
Definition: textord.h:292
double textord_blshift_xfraction
Definition: textord.h:351
double textord_noise_sxfract
Definition: textord.h:344
double tosp_enough_small_gaps
Definition: textord.h:296
double textord_blob_size_smallile
Definition: textord.h:333
bool tosp_all_flips_fuzzy
Definition: textord.h:240
bool tosp_fuzzy_limit_all
Definition: textord.h:242
bool tosp_row_use_cert_spaces
Definition: textord.h:232
double tosp_max_sane_kn_thresh
Definition: textord.h:312
bool tosp_flip_fuzz_sp_to_kn
Definition: textord.h:252
double tosp_table_fuzzy_kn_sp_ratio
Definition: textord.h:302
double textord_blshift_maxshift
Definition: textord.h:350
double tosp_table_xht_sp_ratio
Definition: textord.h:300
bool tosp_use_pre_chopping
Definition: textord.h:226
bool tosp_old_to_method
Definition: textord.h:216
int tosp_sanity_method
Definition: textord.h:264
bool textord_no_rejects
Definition: textord.h:326
int textord_noise_translimit
Definition: textord.h:338
bool tosp_old_to_bug_fix
Definition: textord.h:228
double tosp_init_guess_kn_mult
Definition: textord.h:308
double tosp_min_sane_kn_sp
Definition: textord.h:306
int textord_noise_sizefraction
Definition: textord.h:336
bool tosp_flip_fuzz_kn_to_sp
Definition: textord.h:251
int textord_noise_sncount
Definition: textord.h:347
double tosp_wide_aspect_ratio
Definition: textord.h:278
bool tosp_old_to_constrain_sp_kn
Definition: textord.h:219
double textord_initialx_ile
Definition: textord.h:334
double tosp_large_kerning
Definition: textord.h:316
double tosp_threshold_bias1
Definition: textord.h:269
double textord_noise_sizelimit
Definition: textord.h:337
bool tosp_only_use_xht_gaps
Definition: textord.h:248
double tosp_narrow_fraction
Definition: textord.h:273
bool tosp_rule_9_test_punct
Definition: textord.h:250
double tosp_kern_gap_factor3
Definition: textord.h:291
bool tosp_block_use_cert_spaces
Definition: textord.h:230
double tosp_fuzzy_space_factor1
Definition: textord.h:282
double tosp_fuzzy_sp_fraction
Definition: textord.h:304
double tosp_silly_kn_sp_gap
Definition: textord.h:322
bool tosp_only_use_prop_rows
Definition: textord.h:221
ParamsVectors * params()
Definition: ccutil.h:65
double tosp_init_guess_xht_mult
Definition: textord.h:310
#define BOOL_MEMBER(name, val, comment, vec)
Definition: params.h:275
#define double_MEMBER(name, val, comment, vec)
Definition: params.h:281
tesseract::Textord::~Textord ( )

Definition at line 261 of file textord.cpp.

261  {
262 }

Member Function Documentation

void tesseract::Textord::CleanupSingleRowResult ( PageSegMode  pageseg_mode,
PAGE_RES page_res 
)

Definition at line 339 of file textord.cpp.

340  {
341  if (PSM_LINE_FIND_ENABLED(pageseg_mode))
342  return; // No cleanup required.
343  PAGE_RES_IT it(page_res);
344  // Find the best row, being the greatest mean word conf.
345  float row_total_conf = 0.0f;
346  int row_word_count = 0;
347  ROW_RES* best_row = NULL;
348  float best_conf = 0.0f;
349  for (it.restart_page(); it.word() != NULL; it.forward()) {
350  WERD_RES* word = it.word();
351  row_total_conf += word->best_choice->certainty();
352  ++row_word_count;
353  if (it.next_row() != it.row()) {
354  row_total_conf /= row_word_count;
355  if (best_row == NULL || best_conf < row_total_conf) {
356  best_row = it.row();
357  best_conf = row_total_conf;
358  }
359  row_total_conf = 0.0f;
360  row_word_count = 0;
361  }
362  }
363  // Now eliminate any word not in the best row.
364  for (it.restart_page(); it.word() != NULL; it.forward()) {
365  if (it.row() != best_row)
366  it.DeleteCurrentWord();
367  }
368 }
float certainty() const
Definition: ratngs.h:234
#define NULL
Definition: host.h:144
#define PSM_LINE_FIND_ENABLED(pageseg_mode)
Definition: publictypes.h:175
WERD * word
Definition: pageres.h:334
ROW * row
Definition: pageres.h:286
WERD_CHOICE * best_choice
Definition: pageres.h:359
void tesseract::Textord::filter_blobs ( ICOORD  page_tr,
TO_BLOCK_LIST *  blocks,
BOOL8  testing_on 
)

Definition at line 239 of file tordmain.cpp.

241  { // for plotting
242  TO_BLOCK_IT block_it = blocks; // destination iterator
243  TO_BLOCK *block; // created block
244 
245  #ifndef GRAPHICS_DISABLED
246  if (to_win != NULL)
247  to_win->Clear();
248  #endif // GRAPHICS_DISABLED
249 
250  for (block_it.mark_cycle_pt(); !block_it.cycled_list();
251  block_it.forward()) {
252  block = block_it.data();
253  block->line_size = filter_noise_blobs(&block->blobs,
254  &block->noise_blobs,
255  &block->small_blobs,
256  &block->large_blobs);
257  block->line_spacing = block->line_size *
264 
265  #ifndef GRAPHICS_DISABLED
266  if (textord_show_blobs && testing_on) {
267  if (to_win == NULL)
268  create_to_win(page_tr);
269  block->plot_graded_blobs(to_win);
270  }
271  if (textord_show_boxes && testing_on) {
272  if (to_win == NULL)
273  create_to_win(page_tr);
278  }
279  #endif // GRAPHICS_DISABLED
280  }
281 }
BLOBNBOX_LIST noise_blobs
Definition: blobbox.h:737
bool textord_show_blobs
Definition: textord.h:327
void plot_graded_blobs(ScrollView *to_win)
Definition: blobbox.cpp:999
void create_to_win(ICOORD page_tr)
Definition: drawtord.cpp:49
void Clear()
Definition: scrollview.cpp:590
BLOBNBOX_LIST blobs
Definition: blobbox.h:735
EXTERN ScrollView * to_win
Definition: drawtord.cpp:40
double textord_min_linesize
Definition: makerow.cpp:84
bool textord_show_boxes
Definition: textord.h:328
float line_size
Definition: blobbox.h:748
#define NULL
Definition: host.h:144
float line_spacing
Definition: blobbox.h:742
static const double kXHeightFraction
Definition: ccstruct.h:35
double textord_excess_blobsize
Definition: makerow.cpp:86
float max_blob_size
Definition: blobbox.h:749
BLOBNBOX_LIST small_blobs
Definition: blobbox.h:738
static const double kAscenderFraction
Definition: ccstruct.h:36
void plot_box_list(ScrollView *win, BLOBNBOX_LIST *list, ScrollView::Color body_colour)
Definition: drawtord.cpp:70
BLOBNBOX_LIST large_blobs
Definition: blobbox.h:739
static const double kDescenderFraction
Definition: ccstruct.h:34
void tesseract::Textord::find_components ( Pix *  pix,
BLOCK_LIST *  blocks,
TO_BLOCK_LIST *  to_blocks 
)

Definition at line 208 of file tordmain.cpp.

209  {
210  int width = pixGetWidth(pix);
211  int height = pixGetHeight(pix);
212  if (width > MAX_INT16 || height > MAX_INT16) {
213  tprintf("Input image too large! (%d, %d)\n", width, height);
214  return; // Can't handle it.
215  }
216 
218 
219  BLOCK_IT block_it(blocks); // iterator
220  for (block_it.mark_cycle_pt(); !block_it.cycled_list();
221  block_it.forward()) {
222  BLOCK* block = block_it.data();
223  if (block->poly_block() == NULL || block->poly_block()->IsText()) {
224  extract_edges(pix, block);
225  }
226  }
227 
228  assign_blobs_to_blocks2(pix, blocks, to_blocks);
229  ICOORD page_tr(width, height);
230  filter_blobs(page_tr, to_blocks, !textord_test_landscape);
231 }
void extract_edges(Pix *pix, BLOCK *block)
Definition: edgblob.cpp:335
void filter_blobs(ICOORD page_tr, TO_BLOCK_LIST *blocks, BOOL8 testing_on)
Definition: tordmain.cpp:239
#define NULL
Definition: host.h:144
void assign_blobs_to_blocks2(Pix *pix, BLOCK_LIST *blocks, TO_BLOCK_LIST *port_blocks)
Definition: tordmain.cpp:156
bool textord_test_landscape
Definition: makerow.cpp:51
void set_global_loc_code(int loc_code)
Definition: globaloc.cpp:100
POLY_BLOCK * poly_block() const
Definition: pdblock.h:62
Definition: ocrblock.h:31
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:41
integer coordinate
Definition: points.h:30
#define MAX_INT16
Definition: host.h:119
bool IsText() const
Definition: polyblk.h:54
#define LOC_EDGE_PROG
Definition: errcode.h:44
ROW * tesseract::Textord::make_blob_words ( TO_ROW row,
FCOORD  rotation 
)

Definition at line 1183 of file tospace.cpp.

1186  {
1187  bool bol; // start of line
1188  ROW *real_row; // output row
1189  C_OUTLINE_IT cout_it;
1190  C_BLOB_LIST cblobs;
1191  C_BLOB_IT cblob_it = &cblobs;
1192  WERD_LIST words;
1193  WERD_IT word_it; // new words
1194  WERD *word; // new word
1195  double coeffs[3]; // quadratic
1196  BLOBNBOX *bblob; // current blob
1197  TBOX blob_box; // bounding box
1198  BLOBNBOX_IT box_it; // iterator
1199  inT16 word_count = 0;
1200 
1201  cblob_it.set_to_list(&cblobs);
1202  box_it.set_to_list(row->blob_list());
1203  word_it.set_to_list(&words);
1204  bol = TRUE;
1205  if (!box_it.empty()) {
1206 
1207  do {
1208  bblob = box_it.data();
1209  blob_box = bblob->bounding_box();
1210  if (bblob->joined_to_prev()) {
1211  if (bblob->cblob() != NULL) {
1212  cout_it.set_to_list(cblob_it.data()->out_list());
1213  cout_it.move_to_last();
1214  cout_it.add_list_after(bblob->cblob()->out_list());
1215  delete bblob->cblob();
1216  }
1217  } else {
1218  if (bblob->cblob() != NULL)
1219  cblob_it.add_after_then_move(bblob->cblob());
1220  }
1221  box_it.forward(); // next one
1222  bblob = box_it.data();
1223  blob_box = bblob->bounding_box();
1224 
1225  if (!bblob->joined_to_prev() && !cblobs.empty()) {
1226  word = new WERD(&cblobs, 1, NULL);
1227  word_count++;
1228  word_it.add_after_then_move(word);
1229  if (bol) {
1230  word->set_flag(W_BOL, TRUE);
1231  bol = FALSE;
1232  }
1233  if (box_it.at_first()) { // at end of line
1234  word->set_flag(W_EOL, TRUE);
1235  }
1236  }
1237  }
1238  while (!box_it.at_first()); // until back at start
1239  /* Setup the row with created words. */
1240  coeffs[0] = 0;
1241  coeffs[1] = row->line_m();
1242  coeffs[2] = row->line_c();
1243  real_row = new ROW(row, (inT16) row->kern_size, (inT16) row->space_size);
1244  word_it.set_to_list(real_row->word_list());
1245  //put words in row
1246  word_it.add_list_after(&words);
1247  real_row->recalc_bounding_box();
1248  if (tosp_debug_level > 4) {
1249  tprintf ("Row:Made %d words in row ((%d,%d)(%d,%d))\n",
1250  word_count,
1251  real_row->bounding_box().left(),
1252  real_row->bounding_box().bottom(),
1253  real_row->bounding_box().right(),
1254  real_row->bounding_box().top());
1255  }
1256  return real_row;
1257  }
1258  return NULL;
1259 }
float line_m() const
Definition: blobbox.h:542
C_BLOB * cblob() const
Definition: blobbox.h:245
const TBOX & bounding_box() const
Definition: blobbox.h:208
C_OUTLINE_LIST * out_list()
Definition: stepblob.h:42
#define NULL
Definition: host.h:144
inT16 left() const
Definition: rect.h:67
float line_c() const
Definition: blobbox.h:545
Definition: rect.h:29
#define FALSE
Definition: capi.h:28
inT16 right() const
Definition: rect.h:74
float space_size
Definition: blobbox.h:639
void recalc_bounding_box()
Definition: ocrrow.cpp:91
float kern_size
Definition: blobbox.h:638
Definition: ocrrow.h:32
Definition: werd.h:35
bool joined_to_prev() const
Definition: blobbox.h:233
inT16 top() const
Definition: rect.h:53
void set_flag(WERD_FLAGS mask, BOOL8 value)
Definition: werd.h:123
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:41
Definition: werd.h:60
short inT16
Definition: host.h:100
TBOX bounding_box() const
Definition: ocrrow.h:85
WERD_LIST * word_list()
Definition: ocrrow.h:52
Definition: werd.h:36
BLOBNBOX_LIST * blob_list()
Definition: blobbox.h:571
#define TRUE
Definition: capi.h:27
inT16 bottom() const
Definition: rect.h:60
ROW * tesseract::Textord::make_prop_words ( TO_ROW row,
FCOORD  rotation 
)

Definition at line 886 of file tospace.cpp.

889  {
890  BOOL8 bol; //start of line
891  /* prev_ values are for start of word being built. non prev_ values are for
892  the gap between the word being built and the next one. */
893  BOOL8 prev_fuzzy_sp; //probably space
894  BOOL8 prev_fuzzy_non; //probably not
895  uinT8 prev_blanks; //in front of word
896  BOOL8 fuzzy_sp; //probably space
897  BOOL8 fuzzy_non; //probably not
898  uinT8 blanks; //in front of word
899  BOOL8 prev_gap_was_a_space = FALSE;
900  BOOL8 break_at_next_gap = FALSE;
901  ROW *real_row; //output row
902  C_OUTLINE_IT cout_it;
903  C_BLOB_LIST cblobs;
904  C_BLOB_IT cblob_it = &cblobs;
905  WERD_LIST words;
906  WERD_IT word_it; //new words
907  WERD *word; //new word
908  WERD_IT rep_char_it; //repeated char words
909  inT32 next_rep_char_word_right = MAX_INT32;
910  float repetition_spacing; //gap between repetitions
911  inT32 xstarts[2]; //row ends
912  double coeffs[3]; //quadratic
913  inT32 prev_x; //end of prev blob
914  BLOBNBOX *bblob; //current blob
915  TBOX blob_box; //bounding box
916  BLOBNBOX_IT box_it; //iterator
917  TBOX prev_blob_box;
918  TBOX next_blob_box;
919  inT16 prev_gap = MAX_INT16;
920  inT16 current_gap = MAX_INT16;
921  inT16 next_gap = MAX_INT16;
922  inT16 prev_within_xht_gap = MAX_INT16;
923  inT16 current_within_xht_gap = MAX_INT16;
924  inT16 next_within_xht_gap = MAX_INT16;
925  inT16 word_count = 0;
926 
927  rep_char_it.set_to_list (&(row->rep_words));
928  if (!rep_char_it.empty ()) {
929  next_rep_char_word_right =
930  rep_char_it.data ()->bounding_box ().right ();
931  }
932 
933  prev_x = -MAX_INT16;
934  cblob_it.set_to_list (&cblobs);
935  box_it.set_to_list (row->blob_list ());
936  word_it.set_to_list (&words);
937  bol = TRUE;
938  prev_blanks = 0;
939  prev_fuzzy_sp = FALSE;
940  prev_fuzzy_non = FALSE;
941  if (!box_it.empty ()) {
942  xstarts[0] = box_it.data ()->bounding_box ().left ();
943  if (xstarts[0] > next_rep_char_word_right) {
944  /* We need to insert a repeated char word at the start of the row */
945  word = rep_char_it.extract ();
946  word_it.add_after_then_move (word);
947  /* Set spaces before repeated char word */
948  word->set_flag (W_BOL, TRUE);
949  bol = FALSE;
950  word->set_blanks (0);
951  //NO uncertainty
952  word->set_flag (W_FUZZY_SP, FALSE);
953  word->set_flag (W_FUZZY_NON, FALSE);
954  xstarts[0] = word->bounding_box ().left ();
955  /* Set spaces after repeated char word (and leave current word set) */
956  repetition_spacing = find_mean_blob_spacing (word);
957  current_gap = box_it.data ()->bounding_box ().left () -
958  next_rep_char_word_right;
959  current_within_xht_gap = current_gap;
960  if (current_gap > tosp_rep_space * repetition_spacing) {
961  prev_blanks = (uinT8) floor (current_gap / row->space_size);
962  if (prev_blanks < 1)
963  prev_blanks = 1;
964  }
965  else
966  prev_blanks = 0;
967  if (tosp_debug_level > 5)
968  tprintf ("Repch wd at BOL(%d, %d). rep spacing %5.2f; Rgap:%d ",
969  box_it.data ()->bounding_box ().left (),
970  box_it.data ()->bounding_box ().bottom (),
971  repetition_spacing, current_gap);
972  prev_fuzzy_sp = FALSE;
973  prev_fuzzy_non = FALSE;
974  if (rep_char_it.empty ()) {
975  next_rep_char_word_right = MAX_INT32;
976  }
977  else {
978  rep_char_it.forward ();
979  next_rep_char_word_right =
980  rep_char_it.data ()->bounding_box ().right ();
981  }
982  }
983 
984  peek_at_next_gap(row,
985  box_it,
986  next_blob_box,
987  next_gap,
988  next_within_xht_gap);
989  do {
990  bblob = box_it.data ();
991  blob_box = bblob->bounding_box ();
992  if (bblob->joined_to_prev ()) {
993  if (bblob->cblob () != NULL) {
994  cout_it.set_to_list (cblob_it.data ()->out_list ());
995  cout_it.move_to_last ();
996  cout_it.add_list_after (bblob->cblob ()->out_list ());
997  delete bblob->cblob ();
998  }
999  } else {
1000  if (bblob->cblob() != NULL)
1001  cblob_it.add_after_then_move (bblob->cblob ());
1002  prev_x = blob_box.right ();
1003  }
1004  box_it.forward (); //next one
1005  bblob = box_it.data ();
1006  blob_box = bblob->bounding_box ();
1007 
1008  if (!bblob->joined_to_prev() && bblob->cblob() != NULL) {
1009  /* Real Blob - not multiple outlines or pre-chopped */
1010  prev_gap = current_gap;
1011  prev_within_xht_gap = current_within_xht_gap;
1012  prev_blob_box = next_blob_box;
1013  current_gap = next_gap;
1014  current_within_xht_gap = next_within_xht_gap;
1015  peek_at_next_gap(row,
1016  box_it,
1017  next_blob_box,
1018  next_gap,
1019  next_within_xht_gap);
1020 
1021  inT16 prev_gap_arg = prev_gap;
1022  inT16 next_gap_arg = next_gap;
1023  if (tosp_only_use_xht_gaps) {
1024  prev_gap_arg = prev_within_xht_gap;
1025  next_gap_arg = next_within_xht_gap;
1026  }
1027  // Decide if a word-break should be inserted
1028  if (blob_box.left () > next_rep_char_word_right ||
1029  make_a_word_break(row, blob_box, prev_gap_arg, prev_blob_box,
1030  current_gap, current_within_xht_gap,
1031  next_blob_box, next_gap_arg,
1032  blanks, fuzzy_sp, fuzzy_non,
1033  prev_gap_was_a_space,
1034  break_at_next_gap) ||
1035  box_it.at_first()) {
1036  /* Form a new word out of the blobs collected */
1037  word = new WERD (&cblobs, prev_blanks, NULL);
1038  word_count++;
1039  word_it.add_after_then_move (word);
1040  if (bol) {
1041  word->set_flag (W_BOL, TRUE);
1042  bol = FALSE;
1043  }
1044  if (prev_fuzzy_sp)
1045  //probably space
1046  word->set_flag (W_FUZZY_SP, TRUE);
1047  else if (prev_fuzzy_non)
1048  word->set_flag (W_FUZZY_NON, TRUE);
1049  //probably not
1050 
1051  if (blob_box.left () > next_rep_char_word_right) {
1052  /* We need to insert a repeated char word */
1053  word = rep_char_it.extract ();
1054  word_it.add_after_then_move (word);
1055 
1056  /* Set spaces before repeated char word */
1057  repetition_spacing = find_mean_blob_spacing (word);
1058  current_gap = word->bounding_box ().left () - prev_x;
1059  current_within_xht_gap = current_gap;
1060  if (current_gap > tosp_rep_space * repetition_spacing) {
1061  blanks =
1062  (uinT8) floor (current_gap / row->space_size);
1063  if (blanks < 1)
1064  blanks = 1;
1065  }
1066  else
1067  blanks = 0;
1068  if (tosp_debug_level > 5)
1069  tprintf
1070  ("Repch wd (%d,%d) rep gap %5.2f; Lgap:%d (%d blanks);",
1071  word->bounding_box ().left (),
1072  word->bounding_box ().bottom (),
1073  repetition_spacing, current_gap, blanks);
1074  word->set_blanks (blanks);
1075  //NO uncertainty
1076  word->set_flag (W_FUZZY_SP, FALSE);
1077  word->set_flag (W_FUZZY_NON, FALSE);
1078 
1079  /* Set spaces after repeated char word (and leave current word set) */
1080  current_gap =
1081  blob_box.left () - next_rep_char_word_right;
1082  if (current_gap > tosp_rep_space * repetition_spacing) {
1083  blanks = (uinT8) (current_gap / row->space_size);
1084  if (blanks < 1)
1085  blanks = 1;
1086  }
1087  else
1088  blanks = 0;
1089  if (tosp_debug_level > 5)
1090  tprintf (" Rgap:%d (%d blanks)\n",
1091  current_gap, blanks);
1092  fuzzy_sp = FALSE;
1093  fuzzy_non = FALSE;
1094 
1095  if (rep_char_it.empty ()) {
1096  next_rep_char_word_right = MAX_INT32;
1097  }
1098  else {
1099  rep_char_it.forward ();
1100  next_rep_char_word_right =
1101  rep_char_it.data ()->bounding_box ().right ();
1102  }
1103  }
1104 
1105  if (box_it.at_first () && rep_char_it.empty ()) {
1106  //at end of line
1107  word->set_flag (W_EOL, TRUE);
1108  xstarts[1] = prev_x;
1109  }
1110  else {
1111  prev_blanks = blanks;
1112  prev_fuzzy_sp = fuzzy_sp;
1113  prev_fuzzy_non = fuzzy_non;
1114  }
1115  }
1116  }
1117  }
1118  while (!box_it.at_first ()); //until back at start
1119 
1120  /* Insert any further repeated char words */
1121  while (!rep_char_it.empty ()) {
1122  word = rep_char_it.extract ();
1123  word_it.add_after_then_move (word);
1124 
1125  /* Set spaces before repeated char word */
1126  repetition_spacing = find_mean_blob_spacing (word);
1127  current_gap = word->bounding_box ().left () - prev_x;
1128  if (current_gap > tosp_rep_space * repetition_spacing) {
1129  blanks = (uinT8) floor (current_gap / row->space_size);
1130  if (blanks < 1)
1131  blanks = 1;
1132  }
1133  else
1134  blanks = 0;
1135  if (tosp_debug_level > 5)
1136  tprintf
1137  ("Repch wd at EOL (%d,%d). rep spacing %d; Lgap:%d (%d blanks)\n",
1138  word->bounding_box ().left (), word->bounding_box ().bottom (),
1139  repetition_spacing, current_gap, blanks);
1140  word->set_blanks (blanks);
1141  //NO uncertainty
1142  word->set_flag (W_FUZZY_SP, FALSE);
1143  word->set_flag (W_FUZZY_NON, FALSE);
1144  prev_x = word->bounding_box ().right ();
1145  if (rep_char_it.empty ()) {
1146  //at end of line
1147  word->set_flag (W_EOL, TRUE);
1148  xstarts[1] = prev_x;
1149  }
1150  else {
1151  rep_char_it.forward ();
1152  }
1153  }
1154  coeffs[0] = 0;
1155  coeffs[1] = row->line_m ();
1156  coeffs[2] = row->line_c ();
1157  real_row = new ROW (row,
1158  (inT16) row->kern_size, (inT16) row->space_size);
1159  word_it.set_to_list (real_row->word_list ());
1160  //put words in row
1161  word_it.add_list_after (&words);
1162  real_row->recalc_bounding_box ();
1163 
1164  if (tosp_debug_level > 4) {
1165  tprintf ("Row: Made %d words in row ((%d,%d)(%d,%d))\n",
1166  word_count,
1167  real_row->bounding_box ().left (),
1168  real_row->bounding_box ().bottom (),
1169  real_row->bounding_box ().right (),
1170  real_row->bounding_box ().top ());
1171  }
1172  return real_row;
1173  }
1174  return NULL;
1175 }
TBOX bounding_box()
Definition: werd.cpp:164
void set_blanks(uinT8 new_blanks)
Definition: werd.h:107
float line_m() const
Definition: blobbox.h:542
C_BLOB * cblob() const
Definition: blobbox.h:245
const TBOX & bounding_box() const
Definition: blobbox.h:208
C_OUTLINE_LIST * out_list()
Definition: stepblob.h:42
unsigned char BOOL8
Definition: host.h:113
#define NULL
Definition: host.h:144
inT16 left() const
Definition: rect.h:67
float line_c() const
Definition: blobbox.h:545
int inT32
Definition: host.h:102
Definition: rect.h:29
double tosp_rep_space
Definition: textord.h:294
#define FALSE
Definition: capi.h:28
inT16 right() const
Definition: rect.h:74
float space_size
Definition: blobbox.h:639
void recalc_bounding_box()
Definition: ocrrow.cpp:91
#define MAX_INT32
Definition: host.h:120
WERD_LIST rep_words
Definition: blobbox.h:640
float kern_size
Definition: blobbox.h:638
Definition: ocrrow.h:32
Definition: werd.h:35
bool joined_to_prev() const
Definition: blobbox.h:233
inT16 top() const
Definition: rect.h:53
void set_flag(WERD_FLAGS mask, BOOL8 value)
Definition: werd.h:123
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:41
Definition: werd.h:60
short inT16
Definition: host.h:100
TBOX bounding_box() const
Definition: ocrrow.h:85
WERD_LIST * word_list()
Definition: ocrrow.h:52
unsigned char uinT8
Definition: host.h:99
#define MAX_INT16
Definition: host.h:119
bool tosp_only_use_xht_gaps
Definition: textord.h:248
Definition: werd.h:36
BLOBNBOX_LIST * blob_list()
Definition: blobbox.h:571
#define TRUE
Definition: capi.h:27
inT16 bottom() const
Definition: rect.h:60
void tesseract::Textord::set_use_cjk_fp_model ( bool  flag)
inline

Definition at line 56 of file textord.h.

56  {
57  use_cjk_fp_model_ = flag;
58  }
void tesseract::Textord::TextordPage ( PageSegMode  pageseg_mode,
int  width,
int  height,
Pix *  pix,
BLOCK_LIST *  blocks,
TO_BLOCK_LIST *  to_blocks 
)

Definition at line 265 of file textord.cpp.

267  {
268  page_tr_.set_x(width);
269  page_tr_.set_y(height);
270  if (to_blocks->empty()) {
271  // AutoPageSeg was not used, so we need to find_components first.
272  find_components(pix, blocks, to_blocks);
273  } else {
274  // AutoPageSeg does not need to find_components as it did that already.
275  // Filter_blobs sets up the TO_BLOCKs the same as find_components does.
276  filter_blobs(page_tr_, to_blocks, true);
277  }
278 
279  ASSERT_HOST(!to_blocks->empty());
280  if (pageseg_mode == PSM_SINGLE_BLOCK_VERT_TEXT) {
281  const FCOORD anticlockwise90(0.0f, 1.0f);
282  const FCOORD clockwise90(0.0f, -1.0f);
283  TO_BLOCK_IT it(to_blocks);
284  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
285  TO_BLOCK* to_block = it.data();
286  BLOCK* block = to_block->block;
287  // Create a fake poly_block in block from its bounding box.
288  block->set_poly_block(new POLY_BLOCK(block->bounding_box(),
290  // Rotate the to_block along with its contained block and blobnbox lists.
291  to_block->rotate(anticlockwise90);
292  // Set the block's rotation values to obey the convention followed in
293  // layout analysis for vertical text.
294  block->set_re_rotation(clockwise90);
295  block->set_classify_rotation(clockwise90);
296  }
297  }
298 
299  TO_BLOCK_IT to_block_it(to_blocks);
300  TO_BLOCK* to_block = to_block_it.data();
301  // Make the rows in the block.
302  float gradient;
303  // Do it the old fashioned way.
304  if (PSM_LINE_FIND_ENABLED(pageseg_mode)) {
305  gradient = make_rows(page_tr_, to_blocks);
306  } else {
307  // SINGLE_LINE, SINGLE_WORD and SINGLE_CHAR all need a single row.
308  gradient = make_single_row(page_tr_, to_block, to_blocks);
309  }
310  // Now fit baselines. For now only old mode is available.
311  fit_rows(gradient, page_tr_, to_blocks);
312  // Now make the words in the lines.
313  if (PSM_WORD_FIND_ENABLED(pageseg_mode)) {
314  // SINGLE_LINE uses the old word maker on the single line.
315  make_words(this, page_tr_, gradient, blocks, to_blocks);
316  } else {
317  // SINGLE_WORD and SINGLE_CHAR cram all the blobs into a
318  // single word, and in SINGLE_CHAR mode, all the outlines
319  // go in a single blob.
320  TO_BLOCK* to_block = to_block_it.data();
321  make_single_word(pageseg_mode == PSM_SINGLE_CHAR,
322  to_block->get_rows(), to_block->block->row_list());
323  }
324  cleanup_blocks(blocks); // Remove empties.
325 
326  // Compute the margins for each row in the block, to be used later for
327  // paragraph detection.
328  BLOCK_IT b_it(blocks);
329  for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
330  b_it.data()->compute_row_margins();
331  }
332 #ifndef GRAPHICS_DISABLED
333  close_to_win();
334 #endif
335 }
float make_rows(ICOORD page_tr, TO_BLOCK_LIST *port_blocks)
Definition: makerow.cpp:197
void filter_blobs(ICOORD page_tr, TO_BLOCK_LIST *blocks, BOOL8 testing_on)
Definition: tordmain.cpp:239
void set_poly_block(POLY_BLOCK *blk)
set the poly block
Definition: pdblock.h:66
ROW_LIST * row_list()
get rows
Definition: ocrblock.h:121
#define f(xc, yc)
Definition: imgscale.cpp:39
#define PSM_LINE_FIND_ENABLED(pageseg_mode)
Definition: publictypes.h:175
void set_classify_rotation(const FCOORD &rotation)
Definition: ocrblock.h:148
void rotate(const FCOORD &rotation)
Definition: blobbox.h:682
void find_components(Pix *pix, BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks)
Definition: tordmain.cpp:208
Treat the image as a single character.
Definition: publictypes.h:160
void set_y(inT16 yin)
rewrite function
Definition: points.h:65
Definition: ocrblock.h:31
void close_to_win()
Definition: drawtord.cpp:56
#define PSM_WORD_FIND_ENABLED(pageseg_mode)
Definition: publictypes.h:177
void set_re_rotation(const FCOORD &rotation)
Definition: ocrblock.h:142
Definition: points.h:189
void set_x(inT16 xin)
rewrite function
Definition: points.h:61
float make_single_row(ICOORD page_tr, TO_BLOCK *block, TO_BLOCK_LIST *blocks)
Definition: makerow.cpp:167
TO_ROW_LIST * get_rows()
Definition: blobbox.h:676
#define ASSERT_HOST(x)
Definition: errcode.h:84
void bounding_box(ICOORD &bottom_left, ICOORD &top_right) const
get box
Definition: pdblock.h:70
BLOCK * block
Definition: blobbox.h:740
void make_single_word(bool one_blob, TO_ROW_LIST *rows, ROW_LIST *real_rows)
Definition: wordseg.cpp:61
void make_words(tesseract::Textord *textord, ICOORD page_tr, float gradient, BLOCK_LIST *blocks, TO_BLOCK_LIST *port_blocks)
Definition: wordseg.cpp:105
void tesseract::Textord::to_spacing ( ICOORD  page_tr,
TO_BLOCK_LIST *  blocks 
)

Definition at line 35 of file tospace.cpp.

38  {
39  TO_BLOCK_IT block_it; //iterator
40  TO_BLOCK *block; //current block;
41  TO_ROW_IT row_it; //row iterator
42  TO_ROW *row; //current row
43  int block_index; //block number
44  int row_index; //row number
45  //estimated width of real spaces for whole block
46  inT16 block_space_gap_width;
47  //estimated width of non space gaps for whole block
48  inT16 block_non_space_gap_width;
49  BOOL8 old_text_ord_proportional;//old fixed/prop result
50  GAPMAP *gapmap = NULL; //map of big vert gaps in blk
51 
52  block_it.set_to_list (blocks);
53  block_index = 1;
54  for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
55  block_it.forward ()) {
56  block = block_it.data ();
57  gapmap = new GAPMAP (block);
58  block_spacing_stats(block,
59  gapmap,
60  old_text_ord_proportional,
61  block_space_gap_width,
62  block_non_space_gap_width);
63  // Make sure relative values of block-level space and non-space gap
64  // widths are reasonable. The ratio of 1:3 is also used in
65  // block_spacing_stats, to corrrect the block_space_gap_width
66  // Useful for arabic and hindi, when the non-space gap width is
67  // often over-estimated and should not be trusted. A similar ratio
68  // is found in block_spacing_stats.
70  (float) block_space_gap_width / block_non_space_gap_width < 3.0) {
71  block_non_space_gap_width = (inT16) floor (block_space_gap_width / 3.0);
72  }
73  row_it.set_to_list (block->get_rows ());
74  row_index = 1;
75  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
76  row = row_it.data ();
77  if ((row->pitch_decision == PITCH_DEF_PROP) ||
78  (row->pitch_decision == PITCH_CORR_PROP)) {
79  if ((tosp_debug_level > 0) && !old_text_ord_proportional)
80  tprintf ("Block %d Row %d: Now Proportional\n",
81  block_index, row_index);
82  row_spacing_stats(row,
83  gapmap,
84  block_index,
85  row_index,
86  block_space_gap_width,
87  block_non_space_gap_width);
88  }
89  else {
90  if ((tosp_debug_level > 0) && old_text_ord_proportional)
91  tprintf
92  ("Block %d Row %d: Now Fixed Pitch Decision:%d fp flag:%f\n",
93  block_index, row_index, row->pitch_decision,
94  row->fixed_pitch);
95  }
96 #ifndef GRAPHICS_DISABLED
99 #endif
100  row_index++;
101  }
102  delete gapmap;
103  block_index++;
104  }
105 }
EXTERN ScrollView * to_win
Definition: drawtord.cpp:40
unsigned char BOOL8
Definition: host.h:113
void plot_word_decisions(ScrollView *win, inT16 pitch, TO_ROW *row)
Definition: drawtord.cpp:250
#define NULL
Definition: host.h:144
Definition: gap_map.h:7
PITCH_TYPE pitch_decision
Definition: blobbox.h:622
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:41
bool tosp_old_to_method
Definition: textord.h:216
EXTERN bool textord_show_initial_words
Definition: tovars.cpp:26
short inT16
Definition: host.h:100
bool tosp_old_to_constrain_sp_kn
Definition: textord.h:219
TO_ROW_LIST * get_rows()
Definition: blobbox.h:676
float fixed_pitch
Definition: blobbox.h:623
bool tesseract::Textord::use_cjk_fp_model ( ) const
inline

Definition at line 53 of file textord.h.

53  {
54  return use_cjk_fp_model_;
55  }

Member Data Documentation

double tesseract::Textord::textord_blob_size_bigile = 95

"Percentile for large blobs"

Definition at line 330 of file textord.h.

double tesseract::Textord::textord_blob_size_smallile = 20

"Percentile for small blobs"

Definition at line 333 of file textord.h.

double tesseract::Textord::textord_blshift_maxshift = 0.00

"Max baseline shift"

Definition at line 350 of file textord.h.

double tesseract::Textord::textord_blshift_xfraction = 9.99

"Min size of baseline shift"

Definition at line 351 of file textord.h.

double tesseract::Textord::textord_initialasc_ile = 0.90

"Ile of sizes for xheight guess"

Definition at line 335 of file textord.h.

double tesseract::Textord::textord_initialx_ile = 0.75

"Ile of sizes for xheight guess"

Definition at line 334 of file textord.h.

int tesseract::Textord::textord_max_noise_size = 7

"Pixel size of noise"

Definition at line 329 of file textord.h.

bool tesseract::Textord::textord_no_rejects = false

"Don't remove noise blobs"

Definition at line 326 of file textord.h.

double tesseract::Textord::textord_noise_area_ratio = 0.7

"Fraction of bounding box for noise"

Definition at line 332 of file textord.h.

bool tesseract::Textord::textord_noise_debug = FALSE

"Debug row garbage detector"

Definition at line 349 of file textord.h.

double tesseract::Textord::textord_noise_hfract = 1.0/64

"Height fraction to discard outlines as speckle noise"

Definition at line 346 of file textord.h.

double tesseract::Textord::textord_noise_normratio = 2.0

"Dot to norm ratio for deletion"

Definition at line 339 of file textord.h.

bool tesseract::Textord::textord_noise_rejrows = true

"Reject noise-like rows"

Definition at line 341 of file textord.h.

bool tesseract::Textord::textord_noise_rejwords = true

"Reject noise-like words"

Definition at line 340 of file textord.h.

double tesseract::Textord::textord_noise_rowratio = 6.0

"Dot to norm ratio for deletion"

Definition at line 348 of file textord.h.

int tesseract::Textord::textord_noise_sizefraction = 10

"Fraction of size for maxima"

Definition at line 336 of file textord.h.

double tesseract::Textord::textord_noise_sizelimit = 0.5

"Fraction of x for big t count"

Definition at line 337 of file textord.h.

int tesseract::Textord::textord_noise_sncount = 1

"super norm blobs to save row"

Definition at line 347 of file textord.h.

double tesseract::Textord::textord_noise_sxfract = 0.4

"xh fract width error for norm blobs"

Definition at line 344 of file textord.h.

double tesseract::Textord::textord_noise_syfract = 0.2

"xh fract error for norm blobs"

Definition at line 342 of file textord.h.

int tesseract::Textord::textord_noise_translimit = 16

"Transitions for normal blob"

Definition at line 338 of file textord.h.

bool tesseract::Textord::textord_show_blobs = false

"Display unsorted blobs"

Definition at line 327 of file textord.h.

bool tesseract::Textord::textord_show_boxes = false

"Display boxes"

Definition at line 328 of file textord.h.

bool tesseract::Textord::textord_single_height_mode = false

"Script has no xheight, so use a single mode for horizontal text"

Definition at line 214 of file textord.h.

bool tesseract::Textord::tosp_all_flips_fuzzy = false

"Pass ANY flip to context?"

Definition at line 240 of file textord.h.

bool tesseract::Textord::tosp_block_use_cert_spaces = true

"Only stat OBVIOUS spaces"

Definition at line 230 of file textord.h.

int tesseract::Textord::tosp_debug_level = 0

"Debug data"

Definition at line 255 of file textord.h.

double tesseract::Textord::tosp_dont_fool_with_small_kerns = -1

"Limit use of xht gap with odd small kns"

Definition at line 318 of file textord.h.

double tesseract::Textord::tosp_enough_small_gaps = 0.65

"Fract of kerns reqd for isolated row stats"

Definition at line 296 of file textord.h.

int tesseract::Textord::tosp_enough_space_samples_for_median = 3

"or should we use mean"

Definition at line 257 of file textord.h.

int tesseract::Textord::tosp_few_samples = 40

"No.gaps reqd with 1 large gap to treat as a table"

Definition at line 261 of file textord.h.

double tesseract::Textord::tosp_flip_caution = 0.0

"Dont autoflip kn to sp when large separation"

Definition at line 314 of file textord.h.

bool tesseract::Textord::tosp_flip_fuzz_kn_to_sp = true

"Default flip"

Definition at line 251 of file textord.h.

bool tesseract::Textord::tosp_flip_fuzz_sp_to_kn = true

"Default flip"

Definition at line 252 of file textord.h.

bool tesseract::Textord::tosp_force_wordbreak_on_punct = false

"Force word breaks on punct to break long lines in non-space " "delimited langs"

Definition at line 224 of file textord.h.

double tesseract::Textord::tosp_fuzzy_kn_fraction = 0.5

"New fuzzy kn alg"

Definition at line 303 of file textord.h.

bool tesseract::Textord::tosp_fuzzy_limit_all = true

"Dont restrict kn->sp fuzzy limit to tables"

Definition at line 242 of file textord.h.

double tesseract::Textord::tosp_fuzzy_sp_fraction = 0.5

"New fuzzy sp alg"

Definition at line 304 of file textord.h.

double tesseract::Textord::tosp_fuzzy_space_factor = 0.6

"Fract of xheight for fuzz sp"

Definition at line 280 of file textord.h.

double tesseract::Textord::tosp_fuzzy_space_factor1 = 0.5

"Fract of xheight for fuzz sp"

Definition at line 282 of file textord.h.

double tesseract::Textord::tosp_fuzzy_space_factor2 = 0.72

"Fract of xheight for fuzz sp"

Definition at line 284 of file textord.h.

double tesseract::Textord::tosp_gap_factor = 0.83

"gap ratio to flip sp->kern"

Definition at line 285 of file textord.h.

double tesseract::Textord::tosp_ignore_big_gaps = -1

"xht multiplier"

Definition at line 292 of file textord.h.

double tesseract::Textord::tosp_ignore_very_big_gaps = 3.5

"xht multiplier"

Definition at line 293 of file textord.h.

bool tesseract::Textord::tosp_improve_thresh = false

"Enable improvement heuristic"

Definition at line 254 of file textord.h.

double tesseract::Textord::tosp_init_guess_kn_mult = 2.2

"Thresh guess - mult kn by this"

Definition at line 308 of file textord.h.

double tesseract::Textord::tosp_init_guess_xht_mult = 0.28

"Thresh guess - mult xht by this"

Definition at line 310 of file textord.h.

double tesseract::Textord::tosp_kern_gap_factor1 = 2.0

"gap ratio to flip kern->sp"

Definition at line 287 of file textord.h.

double tesseract::Textord::tosp_kern_gap_factor2 = 1.3

"gap ratio to flip kern->sp"

Definition at line 289 of file textord.h.

double tesseract::Textord::tosp_kern_gap_factor3 = 2.5

"gap ratio to flip kern->sp"

Definition at line 291 of file textord.h.

double tesseract::Textord::tosp_large_kerning = 0.19

"Limit use of xht gap with large kns"

Definition at line 316 of file textord.h.

double tesseract::Textord::tosp_max_sane_kn_thresh = 5.0

"Multiplier on kn to limit thresh"

Definition at line 312 of file textord.h.

double tesseract::Textord::tosp_min_sane_kn_sp = 1.5

"Dont trust spaces less than this time kn"

Definition at line 306 of file textord.h.

double tesseract::Textord::tosp_narrow_aspect_ratio = 0.48

"narrow if w/h less than this"

Definition at line 275 of file textord.h.

bool tesseract::Textord::tosp_narrow_blobs_not_cert = true

"Only stat OBVIOUS spaces"

Definition at line 234 of file textord.h.

double tesseract::Textord::tosp_narrow_fraction = 0.3

"Fract of xheight for narrow"

Definition at line 273 of file textord.h.

double tesseract::Textord::tosp_near_lh_edge = 0

"Dont reduce box if the top left is non blank"

Definition at line 320 of file textord.h.

double tesseract::Textord::tosp_old_sp_kn_th_factor = 2.0

"Factor for defining space threshold in terms of space and " "kern sizes"

Definition at line 267 of file textord.h.

bool tesseract::Textord::tosp_old_to_bug_fix = false

"Fix suspected bug in old code"

Definition at line 228 of file textord.h.

bool tesseract::Textord::tosp_old_to_constrain_sp_kn = false

"Constrain relative values of inter and intra-word gaps for " "old_to_method."

Definition at line 219 of file textord.h.

bool tesseract::Textord::tosp_old_to_method = false

"Space stats use prechopping?"

Definition at line 216 of file textord.h.

bool tesseract::Textord::tosp_only_small_gaps_for_kern = false

"Better guess"

Definition at line 239 of file textord.h.

bool tesseract::Textord::tosp_only_use_prop_rows = true

"Block stats to use fixed pitch rows?"

Definition at line 221 of file textord.h.

bool tesseract::Textord::tosp_only_use_xht_gaps = false

"Only use within xht gap for wd breaks"

Definition at line 248 of file textord.h.

double tesseract::Textord::tosp_pass_wide_fuzz_sp_to_context = 0.75

"How wide fuzzies need context"

Definition at line 324 of file textord.h.

bool tesseract::Textord::tosp_recovery_isolated_row_stats = true

"Use row alone when inadequate cert spaces"

Definition at line 238 of file textord.h.

int tesseract::Textord::tosp_redo_kern_limit = 10

"No.samples reqd to reestimate for row"

Definition at line 259 of file textord.h.

double tesseract::Textord::tosp_rep_space = 1.6

"rep gap multiplier for space"

Definition at line 294 of file textord.h.

bool tesseract::Textord::tosp_row_use_cert_spaces = true

"Only stat OBVIOUS spaces"

Definition at line 232 of file textord.h.

bool tesseract::Textord::tosp_row_use_cert_spaces1 = true

"Only stat OBVIOUS spaces"

Definition at line 236 of file textord.h.

bool tesseract::Textord::tosp_rule_9_test_punct = false

"Dont chng kn to space next to punct"

Definition at line 250 of file textord.h.

int tesseract::Textord::tosp_sanity_method = 1

"How to avoid being silly"

Definition at line 264 of file textord.h.

int tesseract::Textord::tosp_short_row = 20

"No.gaps reqd with few cert spaces to use certs"

Definition at line 263 of file textord.h.

double tesseract::Textord::tosp_silly_kn_sp_gap = 0.2

"Dont let sp minus kn get too small"

Definition at line 322 of file textord.h.

bool tesseract::Textord::tosp_stats_use_xht_gaps = true

"Use within xht gap for wd breaks"

Definition at line 244 of file textord.h.

double tesseract::Textord::tosp_table_fuzzy_kn_sp_ratio = 3.0

"Fuzzy if less than this"

Definition at line 302 of file textord.h.

double tesseract::Textord::tosp_table_kn_sp_ratio = 2.25

"Min difference of kn & sp in table"

Definition at line 298 of file textord.h.

double tesseract::Textord::tosp_table_xht_sp_ratio = 0.33

"Expect spaces bigger than this"

Definition at line 300 of file textord.h.

double tesseract::Textord::tosp_threshold_bias1 = 0

"how far between kern and space?"

Definition at line 269 of file textord.h.

double tesseract::Textord::tosp_threshold_bias2 = 0

"how far between kern and space?"

Definition at line 271 of file textord.h.

bool tesseract::Textord::tosp_use_pre_chopping = false

"Space stats use prechopping?"

Definition at line 226 of file textord.h.

bool tesseract::Textord::tosp_use_xht_gaps = true

"Use within xht gap for wd breaks"

Definition at line 246 of file textord.h.

double tesseract::Textord::tosp_wide_aspect_ratio = 0.0

"wide if w/h less than this"

Definition at line 278 of file textord.h.

double tesseract::Textord::tosp_wide_fraction = 0.52

"Fract of xheight for wide"

Definition at line 276 of file textord.h.


The documentation for this class was generated from the following files: