Tesseract  3.02
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
WERD Class Reference

#include <werd.h>

Inheritance diagram for WERD:
ELIST2_LINK

Public Member Functions

 WERD ()
 
 WERD (C_BLOB_LIST *blob_list, uinT8 blanks, const char *text)
 
 WERD (C_BLOB_LIST *blob_list, WERD *clone)
 
WERDConstructFromSingleBlob (bool bol, bool eol, C_BLOB *blob)
 
 ~WERD ()
 
WERDoperator= (const WERD &source)
 
WERDConstructWerdWithNewBlobs (C_BLOB_LIST *all_blobs, C_BLOB_LIST *orphan_blobs)
 
C_BLOB_LIST * rej_cblob_list ()
 
C_BLOB_LIST * cblob_list ()
 
uinT8 space ()
 
void set_blanks (uinT8 new_blanks)
 
int script_id () const
 
void set_script_id (int id)
 
TBOX bounding_box ()
 
const char * text () const
 
void set_text (const char *new_text)
 
BOOL8 flag (WERD_FLAGS mask) const
 
void set_flag (WERD_FLAGS mask, BOOL8 value)
 
BOOL8 display_flag (uinT8 flag) const
 
void set_display_flag (uinT8 flag, BOOL8 value)
 
WERDshallow_copy ()
 
void move (const ICOORD vec)
 
void join_on (WERD *other)
 
void copy_on (WERD *other)
 
void print ()
 
void plot (ScrollView *window, ScrollView::Color colour)
 
void plot (ScrollView *window)
 
void plot_rej_blobs (ScrollView *window)
 
- Public Member Functions inherited from ELIST2_LINK
 ELIST2_LINK ()
 
 ELIST2_LINK (const ELIST2_LINK &)
 
void operator= (const ELIST2_LINK &)
 

Static Public Member Functions

static ScrollView::Color NextColor (ScrollView::Color colour)
 

Detailed Description

Definition at line 60 of file werd.h.

Constructor & Destructor Documentation

WERD::WERD ( )
inline

Definition at line 62 of file werd.h.

62 {}
WERD::WERD ( C_BLOB_LIST *  blob_list,
uinT8  blank_count,
const char *  text 
)

WERD::WERD

Constructor to build a WERD from a list of C_BLOBs. blob_list The C_BLOBs (in word order) are not copied; we take its elements and put them in our lists. blank_count blanks in front of the word text correct text, outlives this WERD

Definition at line 49 of file werd.cpp.

50  : blanks(blank_count),
51  flags(0),
52  script_id_(0),
53  correct(text) {
54  C_BLOB_IT start_it = blob_list;
55  C_BLOB_IT end_it = blob_list;
56  C_BLOB_IT rej_cblob_it = &rej_cblobs;
57  C_OUTLINE_IT c_outline_it;
58  inT16 inverted_vote = 0;
59  inT16 non_inverted_vote = 0;
60 
61  // Move blob_list's elements into cblobs.
62  while (!end_it.at_last())
63  end_it.forward();
64  cblobs.assign_to_sublist(&start_it, &end_it);
65 
66  /*
67  Set white on black flag for the WERD, moving any duff blobs onto the
68  rej_cblobs list.
69  First, walk the cblobs checking the inverse flag for each outline of each
70  cblob. If a cblob has inconsistent flag settings for its different
71  outlines, move the blob to the reject list. Otherwise, increment the
72  appropriate w-on-b or b-on-w vote for the word.
73 
74  Now set the inversion flag for the WERD by maximum vote.
75 
76  Walk the blobs again, moving any blob whose inversion flag does not agree
77  with the concencus onto the reject list.
78  */
79  start_it.set_to_list(&cblobs);
80  if (start_it.empty())
81  return;
82  for (start_it.mark_cycle_pt(); !start_it.cycled_list(); start_it.forward()) {
83  BOOL8 reject_blob = FALSE;
84  BOOL8 blob_inverted;
85 
86  c_outline_it.set_to_list(start_it.data()->out_list());
87  blob_inverted = c_outline_it.data()->flag(COUT_INVERSE);
88  for (c_outline_it.mark_cycle_pt();
89  !c_outline_it.cycled_list() && !reject_blob;
90  c_outline_it.forward()) {
91  reject_blob = c_outline_it.data()->flag(COUT_INVERSE) != blob_inverted;
92  }
93  if (reject_blob) {
94  rej_cblob_it.add_after_then_move(start_it.extract());
95  } else {
96  if (blob_inverted)
97  inverted_vote++;
98  else
99  non_inverted_vote++;
100  }
101  }
102 
103  flags.set_bit(W_INVERSE, (inverted_vote > non_inverted_vote));
104 
105  start_it.set_to_list(&cblobs);
106  if (start_it.empty())
107  return;
108  for (start_it.mark_cycle_pt(); !start_it.cycled_list(); start_it.forward()) {
109  c_outline_it.set_to_list(start_it.data()->out_list());
110  if (c_outline_it.data()->flag(COUT_INVERSE) != flags.bit(W_INVERSE))
111  rej_cblob_it.add_after_then_move(start_it.extract());
112  }
113 }
void set_bit(uinT8 bit_num, BOOL8 value)
Definition: bits16.h:47
unsigned char BOOL8
Definition: host.h:113
const char * text() const
Definition: werd.h:119
#define FALSE
Definition: capi.h:28
Definition: werd.h:44
short inT16
Definition: host.h:100
BOOL8 bit(uinT8 bit_num) const
Definition: bits16.h:56
WERD::WERD ( C_BLOB_LIST *  blob_list,
WERD clone 
)

WERD::WERD

Constructor to build a WERD from a list of C_BLOBs. The C_BLOBs are not copied so the source list is emptied.

Definition at line 123 of file werd.cpp.

125  : flags(clone->flags),
126  script_id_(clone->script_id_),
127  correct(clone->correct) {
128  C_BLOB_IT start_it = blob_list; // iterator
129  C_BLOB_IT end_it = blob_list; // another
130 
131  while (!end_it.at_last ())
132  end_it.forward (); //move to last
133  ((C_BLOB_LIST *) (&cblobs))->assign_to_sublist (&start_it, &end_it);
134  //move to our list
135  blanks = clone->blanks;
136  // fprintf(stderr,"Wrong constructor!!!!\n");
137 }
WERD::~WERD ( )
inline

Definition at line 78 of file werd.h.

78  {
79  }

Member Function Documentation

TBOX WERD::bounding_box ( )

WERD::bounding_box

Return the bounding box of the WERD. This is quite a mess to compute! ORIGINALLY, REJECT CBLOBS WERE EXCLUDED, however, this led to bugs when the words on the row were re-sorted. The original words were built with reject blobs included. The FUZZY SPACE flags were set accordingly. If ALL the blobs in a word are rejected the BB for the word is NULL, causing the sort to screw up, leading to the erroneous possibility of the first word in a row being marked as FUZZY space.

Definition at line 164 of file werd.cpp.

164  {
165  TBOX box; // box being built
166  C_BLOB_IT rej_cblob_it = &rej_cblobs; // rejected blobs
167 
168  for (rej_cblob_it.mark_cycle_pt(); !rej_cblob_it.cycled_list();
169  rej_cblob_it.forward()) {
170  box += rej_cblob_it.data()->bounding_box();
171  }
172 
173  C_BLOB_IT it = &cblobs; // blobs of WERD
174  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
175  box += it.data()->bounding_box();
176  }
177  return box;
178 }
Definition: rect.h:29
C_BLOB_LIST* WERD::cblob_list ( )
inline

Definition at line 100 of file werd.h.

100  { // get compact blobs
101  return &cblobs;
102  }
WERD * WERD::ConstructFromSingleBlob ( bool  bol,
bool  eol,
C_BLOB blob 
)

Definition at line 141 of file werd.cpp.

141  {
142  C_BLOB_LIST temp_blobs;
143  C_BLOB_IT temp_it(&temp_blobs);
144  temp_it.add_after_then_move(blob);
145  WERD* blob_word = new WERD(&temp_blobs, this);
146  blob_word->set_flag(W_BOL, bol);
147  blob_word->set_flag(W_EOL, eol);
148  return blob_word;
149 }
WERD()
Definition: werd.h:62
Definition: werd.h:35
void set_flag(WERD_FLAGS mask, BOOL8 value)
Definition: werd.h:123
Definition: werd.h:60
Definition: werd.h:36
WERD * WERD::ConstructWerdWithNewBlobs ( C_BLOB_LIST *  all_blobs,
C_BLOB_LIST *  orphan_blobs 
)

WERD::ConstructWerdWithNewBlobs()

This method returns a new werd constructed using the blobs in the input all_blobs list, which correspond to the blobs in this werd object. The blobs used to construct the new word are consumed and removed from the input all_blobs list. Returns NULL if the word couldn't be constructed. Returns original blobs for which no matches were found in the output list orphan_blobs (appends).

Definition at line 402 of file werd.cpp.

403  {
404  C_BLOB_LIST current_blob_list;
405  C_BLOB_IT werd_blobs_it(&current_blob_list);
406  // Add the word's c_blobs.
407  werd_blobs_it.add_list_after(cblob_list());
408 
409  // New blob list. These contain the blobs which will form the new word.
410  C_BLOB_LIST new_werd_blobs;
411  C_BLOB_IT new_blobs_it(&new_werd_blobs);
412 
413  // not_found_blobs contains the list of current word's blobs for which a
414  // corresponding blob wasn't found in the input all_blobs list.
415  C_BLOB_LIST not_found_blobs;
416  C_BLOB_IT not_found_it(&not_found_blobs);
417  not_found_it.move_to_last();
418 
419  werd_blobs_it.move_to_first();
420  for (werd_blobs_it.mark_cycle_pt(); !werd_blobs_it.cycled_list();
421  werd_blobs_it.forward()) {
422  C_BLOB* werd_blob = werd_blobs_it.extract();
423  TBOX werd_blob_box = werd_blob->bounding_box();
424  bool found = false;
425  // Now find the corresponding blob for this blob in the all_blobs
426  // list. For now, follow the inefficient method of pairwise
427  // comparisons. Ideally, one can pre-bucket the blobs by row.
428  C_BLOB_IT all_blobs_it(all_blobs);
429  for (all_blobs_it.mark_cycle_pt(); !all_blobs_it.cycled_list();
430  all_blobs_it.forward()) {
431  C_BLOB* a_blob = all_blobs_it.data();
432  // Compute the overlap of the two blobs. If major, a_blob should
433  // be added to the new blobs list.
434  TBOX a_blob_box = a_blob->bounding_box();
435  if (a_blob_box.null_box()) {
436  tprintf("Bounding box couldn't be ascertained\n");
437  }
438  if (werd_blob_box.contains(a_blob_box) ||
439  werd_blob_box.major_overlap(a_blob_box)) {
440  // Old blobs are from minimal splits, therefore are expected to be
441  // bigger. The new small blobs should cover a significant portion.
442  // This is it.
443  all_blobs_it.extract();
444  new_blobs_it.add_after_then_move(a_blob);
445  found = true;
446  }
447  }
448  if (!found) {
449  not_found_it.add_after_then_move(werd_blob);
450  } else {
451  delete werd_blob;
452  }
453  }
454  // Iterate over all not found blobs. Some of them may be due to
455  // under-segmentation (which is OK, since the corresponding blob is already
456  // in the list in that case.
457  not_found_it.move_to_first();
458  for (not_found_it.mark_cycle_pt(); !not_found_it.cycled_list();
459  not_found_it.forward()) {
460  C_BLOB* not_found = not_found_it.data();
461  TBOX not_found_box = not_found->bounding_box();
462  C_BLOB_IT existing_blobs_it(new_blobs_it);
463  for (existing_blobs_it.mark_cycle_pt(); !existing_blobs_it.cycled_list();
464  existing_blobs_it.forward()) {
465  C_BLOB* a_blob = existing_blobs_it.data();
466  TBOX a_blob_box = a_blob->bounding_box();
467  if ((not_found_box.major_overlap(a_blob_box) ||
468  a_blob_box.major_overlap(not_found_box)) &&
469  not_found_box.y_overlap(a_blob_box)) {
470  // Already taken care of.
471  delete not_found_it.extract();
472  break;
473  }
474  }
475  }
476  if (orphan_blobs) {
477  C_BLOB_IT orphan_blobs_it(orphan_blobs);
478  orphan_blobs_it.move_to_last();
479  orphan_blobs_it.add_list_after(&not_found_blobs);
480  }
481 
482  // New blobs are ready. Create a new werd object with these.
483  WERD* new_werd = NULL;
484  if (!new_werd_blobs.empty()) {
485  new_werd = new WERD(&new_werd_blobs, this);
486  } else {
487  // Add the blobs back to this word so that it can be reused.
488  C_BLOB_IT this_list_it(cblob_list());
489  this_list_it.add_list_after(&not_found_blobs);
490  }
491  return new_werd;
492 }
C_BLOB_LIST * cblob_list()
Definition: werd.h:100
bool null_box() const
Definition: rect.h:45
bool contains(const FCOORD pt) const
Definition: rect.h:323
#define NULL
Definition: host.h:144
Definition: rect.h:29
WERD()
Definition: werd.h:62
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:41
Definition: werd.h:60
bool major_overlap(const TBOX &box) const
Definition: rect.h:358
TBOX bounding_box()
Definition: stepblob.cpp:192
bool y_overlap(const TBOX &box) const
Definition: rect.h:418
void WERD::copy_on ( WERD other)

WERD::copy_on

Copy blobs from other word onto this one.

Definition at line 224 of file werd.cpp.

224  {
225  bool reversed = other->bounding_box().left() < bounding_box().left();
226  C_BLOB_IT c_blob_it(&cblobs);
227  C_BLOB_LIST c_blobs;
228 
229  c_blobs.deep_copy(&other->cblobs, &C_BLOB::deep_copy);
230  if (reversed) {
231  c_blob_it.add_list_before(&c_blobs);
232  } else {
233  c_blob_it.move_to_last();
234  c_blob_it.add_list_after(&c_blobs);
235  }
236  if (!other->rej_cblobs.empty()) {
237  C_BLOB_IT rej_c_blob_it(&rej_cblobs);
238  C_BLOB_LIST new_rej_c_blobs;
239 
240  new_rej_c_blobs.deep_copy(&other->rej_cblobs, &C_BLOB::deep_copy);
241  if (reversed) {
242  rej_c_blob_it.add_list_before(&new_rej_c_blobs);
243  } else {
244  rej_c_blob_it.move_to_last();
245  rej_c_blob_it.add_list_after(&new_rej_c_blobs);
246  }
247  }
248 }
TBOX bounding_box()
Definition: werd.cpp:164
inT16 left() const
Definition: rect.h:67
static C_BLOB * deep_copy(const C_BLOB *src)
Definition: stepblob.h:76
BOOL8 WERD::display_flag ( uinT8  flag) const
inline

Definition at line 125 of file werd.h.

125 { return disp_flags.bit(flag); }
BOOL8 flag(WERD_FLAGS mask) const
Definition: werd.h:122
BOOL8 bit(uinT8 bit_num) const
Definition: bits16.h:56
BOOL8 WERD::flag ( WERD_FLAGS  mask) const
inline

Definition at line 122 of file werd.h.

122 { return flags.bit(mask); }
BOOL8 bit(uinT8 bit_num) const
Definition: bits16.h:56
void WERD::join_on ( WERD other)

WERD::join_on

Join other word onto this one. Delete the old word.

Definition at line 201 of file werd.cpp.

201  {
202  C_BLOB_IT blob_it(&cblobs);
203  C_BLOB_IT src_it(&other->cblobs);
204  C_BLOB_IT rej_cblob_it(&rej_cblobs);
205  C_BLOB_IT src_rej_it(&other->rej_cblobs);
206 
207  while (!src_it.empty()) {
208  blob_it.add_to_end(src_it.extract());
209  src_it.forward();
210  }
211  while (!src_rej_it.empty()) {
212  rej_cblob_it.add_to_end(src_rej_it.extract());
213  src_rej_it.forward();
214  }
215 }
void WERD::move ( const ICOORD  vec)

WERD::move

Reposition WERD by vector NOTE!! REJECT CBLOBS ARE NOT MOVED

Definition at line 188 of file werd.cpp.

188  {
189  C_BLOB_IT cblob_it(&cblobs); // cblob iterator
190 
191  for (cblob_it.mark_cycle_pt(); !cblob_it.cycled_list(); cblob_it.forward())
192  cblob_it.data()->move(vec);
193 }
ScrollView::Color WERD::NextColor ( ScrollView::Color  colour)
static

Definition at line 296 of file werd.cpp.

296  {
297  ScrollView::Color next = static_cast<ScrollView::Color>(colour + 1);
298  if (next >= LAST_COLOUR || next < FIRST_COLOUR)
299  next = FIRST_COLOUR;
300  return next;
301 }
#define LAST_COLOUR
Definition: werd.cpp:32
#define FIRST_COLOUR
Definition: werd.cpp:31
WERD & WERD::operator= ( const WERD source)

WERD::operator=

Assign a word, DEEP copying the blob list

Definition at line 359 of file werd.cpp.

359  {
360  this->ELIST2_LINK::operator= (source);
361  blanks = source.blanks;
362  flags = source.flags;
363  script_id_ = source.script_id_;
364  dummy = source.dummy;
365  correct = source.correct;
366  if (!cblobs.empty())
367  cblobs.clear();
368  cblobs.deep_copy(&source.cblobs, &C_BLOB::deep_copy);
369 
370  if (!rej_cblobs.empty())
371  rej_cblobs.clear();
372  rej_cblobs.deep_copy(&source.rej_cblobs, &C_BLOB::deep_copy);
373  return *this;
374 }
void operator=(const ELIST2_LINK &)
Definition: elst2.h:76
static C_BLOB * deep_copy(const C_BLOB *src)
Definition: stepblob.h:76
void WERD::plot ( ScrollView window,
ScrollView::Color  colour 
)

WERD::plot

Draw the WERD in the given colour.

Definition at line 287 of file werd.cpp.

287  {
288  C_BLOB_IT it = &cblobs;
289  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
290  it.data()->plot(window, colour, colour);
291  }
292  plot_rej_blobs(window);
293 }
void plot_rej_blobs(ScrollView *window)
Definition: werd.cpp:327
void WERD::plot ( ScrollView window)

WERD::plot

Draw the WERD in rainbow colours in window.

Definition at line 309 of file werd.cpp.

309  {
311  C_BLOB_IT it = &cblobs;
312  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
313  it.data()->plot(window, colour, CHILD_COLOUR);
314  colour = NextColor(colour);
315  }
316  plot_rej_blobs(window);
317 }
void plot_rej_blobs(ScrollView *window)
Definition: werd.cpp:327
#define FIRST_COLOUR
Definition: werd.cpp:31
static ScrollView::Color NextColor(ScrollView::Color colour)
Definition: werd.cpp:296
#define CHILD_COLOUR
Definition: werd.cpp:33
void WERD::plot_rej_blobs ( ScrollView window)

WERD::plot_rej_blobs

Draw the WERD rejected blobs in window - ALWAYS GREY

Definition at line 327 of file werd.cpp.

327  {
328  C_BLOB_IT it = &rej_cblobs;
329  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
330  it.data()->plot(window, ScrollView::GREY, ScrollView::GREY);
331  }
332 }
void WERD::print ( )

WERD::print

Display members

Definition at line 256 of file werd.cpp.

256  {
257  tprintf("Blanks= %d\n", blanks);
258  bounding_box().print();
259  tprintf("Flags = %d = 0%o\n", flags.val, flags.val);
260  tprintf(" W_SEGMENTED = %s\n", flags.bit(W_SEGMENTED) ? "TRUE" : "FALSE ");
261  tprintf(" W_ITALIC = %s\n", flags.bit(W_ITALIC) ? "TRUE" : "FALSE ");
262  tprintf(" W_BOL = %s\n", flags.bit(W_BOL) ? "TRUE" : "FALSE ");
263  tprintf(" W_EOL = %s\n", flags.bit(W_EOL) ? "TRUE" : "FALSE ");
264  tprintf(" W_NORMALIZED = %s\n",
265  flags.bit(W_NORMALIZED) ? "TRUE" : "FALSE ");
266  tprintf(" W_SCRIPT_HAS_XHEIGHT = %s\n",
267  flags.bit(W_SCRIPT_HAS_XHEIGHT) ? "TRUE" : "FALSE ");
268  tprintf(" W_SCRIPT_IS_LATIN = %s\n",
269  flags.bit(W_SCRIPT_IS_LATIN) ? "TRUE" : "FALSE ");
270  tprintf(" W_DONT_CHOP = %s\n", flags.bit(W_DONT_CHOP) ? "TRUE" : "FALSE ");
271  tprintf(" W_REP_CHAR = %s\n", flags.bit(W_REP_CHAR) ? "TRUE" : "FALSE ");
272  tprintf(" W_FUZZY_SP = %s\n", flags.bit(W_FUZZY_SP) ? "TRUE" : "FALSE ");
273  tprintf(" W_FUZZY_NON = %s\n", flags.bit(W_FUZZY_NON) ? "TRUE" : "FALSE ");
274  tprintf("Correct= %s\n", correct.string());
275  tprintf("Rejected cblob count = %d\n", rej_cblobs.length());
276  tprintf("Script = %d\n", script_id_);
277 }
TBOX bounding_box()
Definition: werd.cpp:164
Definition: werd.h:33
uinT16 val
Definition: bits16.h:28
Definition: werd.h:35
const char * string() const
Definition: strngs.cpp:156
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:41
BOOL8 bit(uinT8 bit_num) const
Definition: bits16.h:56
Definition: werd.h:36
void print() const
Definition: rect.h:263
C_BLOB_LIST* WERD::rej_cblob_list ( )
inline

Definition at line 95 of file werd.h.

95  { // compact format
96  return &rej_cblobs;
97  }
int WERD::script_id ( ) const
inline

Definition at line 110 of file werd.h.

110  {
111  return script_id_;
112  }
void WERD::set_blanks ( uinT8  new_blanks)
inline

Definition at line 107 of file werd.h.

107  {
108  blanks = new_blanks;
109  }
void WERD::set_display_flag ( uinT8  flag,
BOOL8  value 
)
inline

Definition at line 126 of file werd.h.

126  {
127  disp_flags.set_bit(flag, value);
128  }
void set_bit(uinT8 bit_num, BOOL8 value)
Definition: bits16.h:47
BOOL8 flag(WERD_FLAGS mask) const
Definition: werd.h:122
void WERD::set_flag ( WERD_FLAGS  mask,
BOOL8  value 
)
inline

Definition at line 123 of file werd.h.

123 { flags.set_bit(mask, value); }
void set_bit(uinT8 bit_num, BOOL8 value)
Definition: bits16.h:47
void WERD::set_script_id ( int  id)
inline

Definition at line 113 of file werd.h.

113  {
114  script_id_ = id;
115  }
void WERD::set_text ( const char *  new_text)
inline

Definition at line 120 of file werd.h.

120 { correct = new_text; }
WERD * WERD::shallow_copy ( )

WERD::shallow_copy()

Make a shallow copy of a word

Definition at line 342 of file werd.cpp.

342  {
343  WERD *new_word = new WERD;
344 
345  new_word->blanks = blanks;
346  new_word->flags = flags;
347  new_word->dummy = dummy;
348  new_word->correct = correct;
349  return new_word;
350 }
WERD()
Definition: werd.h:62
Definition: werd.h:60
uinT8 WERD::space ( )
inline

Definition at line 104 of file werd.h.

104  { // access function
105  return blanks;
106  }
const char* WERD::text ( ) const
inline

Definition at line 119 of file werd.h.

119 { return correct.string(); }
const char * string() const
Definition: strngs.cpp:156

The documentation for this class was generated from the following files: