Tesseract  3.02
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
tesseract::LTRResultIterator Class Reference

#include <ltrresultiterator.h>

Inheritance diagram for tesseract::LTRResultIterator:
tesseract::PageIterator tesseract::ResultIterator tesseract::MutableIterator

Public Member Functions

 LTRResultIterator (PAGE_RES *page_res, Tesseract *tesseract, int scale, int scaled_yres, int rect_left, int rect_top, int rect_width, int rect_height)
 
virtual ~LTRResultIterator ()
 
char * GetUTF8Text (PageIteratorLevel level) const
 
void SetLineSeparator (const char *new_line)
 
void SetParagraphSeparator (const char *new_para)
 
float Confidence (PageIteratorLevel level) const
 
const char * WordFontAttributes (bool *is_bold, bool *is_italic, bool *is_underlined, bool *is_monospace, bool *is_serif, bool *is_smallcaps, int *pointsize, int *font_id) const
 
const char * WordRecognitionLanguage () const
 
StrongScriptDirection WordDirection () const
 
bool WordIsFromDictionary () const
 
bool WordIsNumeric () const
 
bool HasBlamerInfo () const
 
void * GetParamsTrainingBundle () const
 
const char * GetBlamerDebug () const
 
const char * GetBlamerMisadaptionDebug () const
 
char * WordTruthUTF8Text () const
 
const char * WordLattice (int *lattice_size) const
 
bool SymbolIsSuperscript () const
 
bool SymbolIsSubscript () const
 
bool SymbolIsDropcap () const
 
- Public Member Functions inherited from tesseract::PageIterator
 PageIterator (PAGE_RES *page_res, Tesseract *tesseract, int scale, int scaled_yres, int rect_left, int rect_top, int rect_width, int rect_height)
 
virtual ~PageIterator ()
 
 PageIterator (const PageIterator &src)
 
const PageIteratoroperator= (const PageIterator &src)
 
bool PositionedAtSameWord (const PAGE_RES_IT *other) const
 
virtual void Begin ()
 
virtual void RestartParagraph ()
 
bool IsWithinFirstTextlineOfParagraph () const
 
virtual void RestartRow ()
 
virtual bool Next (PageIteratorLevel level)
 
virtual bool IsAtBeginningOf (PageIteratorLevel level) const
 
virtual bool IsAtFinalElement (PageIteratorLevel level, PageIteratorLevel element) const
 
int Cmp (const PageIterator &other) const
 
bool BoundingBox (PageIteratorLevel level, int *left, int *top, int *right, int *bottom) const
 
bool BoundingBoxInternal (PageIteratorLevel level, int *left, int *top, int *right, int *bottom) const
 
bool Empty (PageIteratorLevel level) const
 
PolyBlockType BlockType () const
 
Pix * GetBinaryImage (PageIteratorLevel level) const
 
Pix * GetImage (PageIteratorLevel level, int padding, int *left, int *top) const
 
bool Baseline (PageIteratorLevel level, int *x1, int *y1, int *x2, int *y2) const
 
void Orientation (tesseract::Orientation *orientation, tesseract::WritingDirection *writing_direction, tesseract::TextlineOrder *textline_order, float *deskew_angle) const
 
void ParagraphInfo (tesseract::ParagraphJustification *justification, bool *is_list_item, bool *is_crown, int *first_line_indent) const
 

Protected Attributes

const char * line_separator_
 
const char * paragraph_separator_
 
- Protected Attributes inherited from tesseract::PageIterator
PAGE_RESpage_res_
 
Tesseracttesseract_
 
PAGE_RES_ITit_
 
WERDword_
 
int word_length_
 
int blob_index_
 
C_BLOB_IT * cblob_it_
 
int scale_
 
int scaled_yres_
 
int rect_left_
 
int rect_top_
 
int rect_width_
 
int rect_height_
 

Friends

class ChoiceIterator
 

Additional Inherited Members

- Protected Member Functions inherited from tesseract::PageIterator
TESS_LOCAL void BeginWord (int offset)
 

Detailed Description

Definition at line 46 of file ltrresultiterator.h.

Constructor & Destructor Documentation

tesseract::LTRResultIterator::LTRResultIterator ( PAGE_RES page_res,
Tesseract tesseract,
int  scale,
int  scaled_yres,
int  rect_left,
int  rect_top,
int  rect_width,
int  rect_height 
)

Definition at line 30 of file ltrresultiterator.cpp.

34  : PageIterator(page_res, tesseract, scale, scaled_yres,
35  rect_left, rect_top, rect_width, rect_height),
36  line_separator_("\n"),
37  paragraph_separator_("\n") {
38 }
PageIterator(PAGE_RES *page_res, Tesseract *tesseract, int scale, int scaled_yres, int rect_left, int rect_top, int rect_width, int rect_height)
tesseract::LTRResultIterator::~LTRResultIterator ( )
virtual

Definition at line 40 of file ltrresultiterator.cpp.

40  {
41 }

Member Function Documentation

float tesseract::LTRResultIterator::Confidence ( PageIteratorLevel  level) const

Definition at line 94 of file ltrresultiterator.cpp.

94  {
95  if (it_->word() == NULL) return 0.0f; // Already at the end!
96  float mean_certainty = 0.0f;
97  int certainty_count = 0;
98  PAGE_RES_IT res_it(*it_);
99  WERD_CHOICE* best_choice = res_it.word()->best_choice;
100  ASSERT_HOST(best_choice != NULL);
101  switch (level) {
102  case RIL_BLOCK:
103  do {
104  best_choice = res_it.word()->best_choice;
105  ASSERT_HOST(best_choice != NULL);
106  mean_certainty += best_choice->certainty();
107  ++certainty_count;
108  res_it.forward();
109  } while (res_it.block() == res_it.prev_block());
110  break;
111  case RIL_PARA:
112  do {
113  best_choice = res_it.word()->best_choice;
114  ASSERT_HOST(best_choice != NULL);
115  mean_certainty += best_choice->certainty();
116  ++certainty_count;
117  res_it.forward();
118  } while (res_it.block() == res_it.prev_block() &&
119  res_it.row()->row->para() == res_it.prev_row()->row->para());
120  break;
121  case RIL_TEXTLINE:
122  do {
123  best_choice = res_it.word()->best_choice;
124  ASSERT_HOST(best_choice != NULL);
125  mean_certainty += best_choice->certainty();
126  ++certainty_count;
127  res_it.forward();
128  } while (res_it.row() == res_it.prev_row());
129  break;
130  case RIL_WORD:
131  mean_certainty += best_choice->certainty();
132  ++certainty_count;
133  break;
134  case RIL_SYMBOL:
135  BLOB_CHOICE_LIST_CLIST* choices = best_choice->blob_choices();
136  if (choices != NULL) {
137  BLOB_CHOICE_LIST_C_IT blob_choices_it(choices);
138  for (int blob = 0; blob < blob_index_; ++blob)
139  blob_choices_it.forward();
140  BLOB_CHOICE_IT choice_it(blob_choices_it.data());
141  for (choice_it.mark_cycle_pt();
142  !choice_it.cycled_list();
143  choice_it.forward()) {
144  if (choice_it.data()->unichar_id() ==
145  best_choice->unichar_id(blob_index_))
146  break;
147  }
148  mean_certainty += choice_it.data()->certainty();
149  } else {
150  mean_certainty += best_choice->certainty();
151  }
152  ++certainty_count;
153  }
154  if (certainty_count > 0) {
155  mean_certainty /= certainty_count;
156  float confidence = 100 + 5 * mean_certainty;
157  if (confidence < 0.0f) confidence = 0.0f;
158  if (confidence > 100.0f) confidence = 100.0f;
159  return confidence;
160  }
161  return 0.0f;
162 }
BLOB_CHOICE_LIST_CLIST * blob_choices()
Definition: ratngs.h:244
float certainty() const
Definition: ratngs.h:234
#define NULL
Definition: host.h:144
#define f(xc, yc)
Definition: imgscale.cpp:39
WERD_RES * word() const
Definition: pageres.h:757
#define ASSERT_HOST(x)
Definition: errcode.h:84
const UNICHAR_ID unichar_id(int index) const
Definition: ratngs.h:220
const char * tesseract::LTRResultIterator::GetBlamerDebug ( ) const

Definition at line 254 of file ltrresultiterator.cpp.

254  {
255  return it_->word()->blamer_bundle->debug.string();
256 }
WERD_RES * word() const
Definition: pageres.h:757
const char * string() const
Definition: strngs.cpp:156
STRING debug
Definition: pageres.h:178
BlamerBundle * blamer_bundle
Definition: pageres.h:367
const char * tesseract::LTRResultIterator::GetBlamerMisadaptionDebug ( ) const

Definition at line 260 of file ltrresultiterator.cpp.

260  {
262 }
WERD_RES * word() const
Definition: pageres.h:757
const char * string() const
Definition: strngs.cpp:156
STRING misadaption_debug
Definition: pageres.h:180
BlamerBundle * blamer_bundle
Definition: pageres.h:367
void * tesseract::LTRResultIterator::GetParamsTrainingBundle ( ) const

Definition at line 247 of file ltrresultiterator.cpp.

247  {
248  return (it_->word() != NULL && it_->word()->blamer_bundle != NULL) ?
250 }
#define NULL
Definition: host.h:144
WERD_RES * word() const
Definition: pageres.h:757
tesseract::ParamsTrainingBundle params_training_bundle
Definition: pageres.h:199
BlamerBundle * blamer_bundle
Definition: pageres.h:367
char * tesseract::LTRResultIterator::GetUTF8Text ( PageIteratorLevel  level) const

Definition at line 45 of file ltrresultiterator.cpp.

45  {
46  if (it_->word() == NULL) return NULL; // Already at the end!
47  STRING text;
48  PAGE_RES_IT res_it(*it_);
49  WERD_CHOICE* best_choice = res_it.word()->best_choice;
50  ASSERT_HOST(best_choice != NULL);
51  if (level == RIL_SYMBOL) {
52  text = res_it.word()->BestUTF8(blob_index_, false);
53  } else if (level == RIL_WORD) {
54  text = best_choice->unichar_string();
55  } else {
56  bool eol = false; // end of line?
57  bool eop = false; // end of paragraph?
58  do { // for each paragraph in a block
59  do { // for each text line in a paragraph
60  do { // for each word in a text line
61  best_choice = res_it.word()->best_choice;
62  ASSERT_HOST(best_choice != NULL);
63  text += best_choice->unichar_string();
64  text += " ";
65  res_it.forward();
66  eol = res_it.row() != res_it.prev_row();
67  } while (!eol);
68  text.truncate_at(text.length() - 1);
69  text += line_separator_;
70  eop = res_it.block() != res_it.prev_block() ||
71  res_it.row()->row->para() != res_it.prev_row()->row->para();
72  } while (level != RIL_TEXTLINE && !eop);
73  if (eop) text += paragraph_separator_;
74  } while (level == RIL_BLOCK && res_it.block() == res_it.prev_block());
75  }
76  int length = text.length() + 1;
77  char* result = new char[length];
78  strncpy(result, text.string(), length);
79  return result;
80 }
void truncate_at(inT32 index)
Definition: strngs.cpp:223
const STRING & unichar_string() const
Definition: ratngs.h:395
inT32 length() const
Definition: strngs.cpp:151
#define NULL
Definition: host.h:144
WERD_RES * word() const
Definition: pageres.h:757
const char * string() const
Definition: strngs.cpp:156
Definition: strngs.h:40
#define ASSERT_HOST(x)
Definition: errcode.h:84
bool tesseract::LTRResultIterator::HasBlamerInfo ( ) const

Definition at line 239 of file ltrresultiterator.cpp.

239  {
240  return (it_->word() != NULL && it_->word()->blamer_bundle != NULL &&
241  (it_->word()->blamer_bundle->debug.length() > 0 ||
243 }
inT32 length() const
Definition: strngs.cpp:151
#define NULL
Definition: host.h:144
WERD_RES * word() const
Definition: pageres.h:757
STRING debug
Definition: pageres.h:178
STRING misadaption_debug
Definition: pageres.h:180
BlamerBundle * blamer_bundle
Definition: pageres.h:367
void tesseract::LTRResultIterator::SetLineSeparator ( const char *  new_line)

Definition at line 83 of file ltrresultiterator.cpp.

83  {
85 }
#define new_line()
Definition: cutil.h:83
void tesseract::LTRResultIterator::SetParagraphSeparator ( const char *  new_para)

Definition at line 88 of file ltrresultiterator.cpp.

88  {
89  paragraph_separator_ = new_para;
90 }
bool tesseract::LTRResultIterator::SymbolIsDropcap ( ) const

Definition at line 312 of file ltrresultiterator.cpp.

312  {
313  if (cblob_it_ == NULL && it_->word() != NULL)
315  return false;
316 }
#define NULL
Definition: host.h:144
WERD_RES * word() const
Definition: pageres.h:757
ScriptPos BlobPosition(int index) const
Definition: boxword.h:105
tesseract::BoxWord * box_word
Definition: pageres.h:387
bool tesseract::LTRResultIterator::SymbolIsSubscript ( ) const

Definition at line 303 of file ltrresultiterator.cpp.

303  {
304  if (cblob_it_ == NULL && it_->word() != NULL)
306  return false;
307 }
#define NULL
Definition: host.h:144
WERD_RES * word() const
Definition: pageres.h:757
ScriptPos BlobPosition(int index) const
Definition: boxword.h:105
tesseract::BoxWord * box_word
Definition: pageres.h:387
bool tesseract::LTRResultIterator::SymbolIsSuperscript ( ) const

Definition at line 294 of file ltrresultiterator.cpp.

294  {
295  if (cblob_it_ == NULL && it_->word() != NULL)
297  return false;
298 }
#define NULL
Definition: host.h:144
WERD_RES * word() const
Definition: pageres.h:757
ScriptPos BlobPosition(int index) const
Definition: boxword.h:105
tesseract::BoxWord * box_word
Definition: pageres.h:387
StrongScriptDirection tesseract::LTRResultIterator::WordDirection ( ) const

Definition at line 210 of file ltrresultiterator.cpp.

210  {
211  if (it_->word() == NULL) return DIR_NEUTRAL;
212  bool has_rtl = it_->word()->AnyRtlCharsInWord();
213  bool has_ltr = it_->word()->AnyLtrCharsInWord();
214  if (has_rtl && !has_ltr)
215  return DIR_RIGHT_TO_LEFT;
216  if (has_ltr && !has_rtl)
217  return DIR_LEFT_TO_RIGHT;
218  if (!has_ltr && !has_rtl)
219  return DIR_NEUTRAL;
220  return DIR_MIX;
221 }
#define NULL
Definition: host.h:144
WERD_RES * word() const
Definition: pageres.h:757
bool AnyLtrCharsInWord() const
Definition: pageres.h:523
bool AnyRtlCharsInWord() const
Definition: pageres.h:506
const char * tesseract::LTRResultIterator::WordFontAttributes ( bool *  is_bold,
bool *  is_italic,
bool *  is_underlined,
bool *  is_monospace,
bool *  is_serif,
bool *  is_smallcaps,
int *  pointsize,
int *  font_id 
) const

Definition at line 172 of file ltrresultiterator.cpp.

179  {
180  if (it_->word() == NULL) return NULL; // Already at the end!
181  if (it_->word()->fontinfo == NULL) {
182  *font_id = -1;
183  return NULL; // No font information.
184  }
185  const FontInfo& font_info = *it_->word()->fontinfo;
186  *font_id = font_info.universal_id;
187  *is_bold = font_info.is_bold();
188  *is_italic = font_info.is_italic();
189  *is_underlined = false; // TODO(rays) fix this!
190  *is_monospace = font_info.is_fixed_pitch();
191  *is_serif = font_info.is_serif();
192  *is_smallcaps = it_->word()->small_caps;
193  float row_height = it_->row()->row->x_height() +
194  it_->row()->row->ascenders() - it_->row()->row->descenders();
195  // Convert from pixels to printers points.
196  *pointsize = scaled_yres_ > 0
197  ? static_cast<int>(row_height * kPointsPerInch / scaled_yres_ + 0.5)
198  : 0;
199 
200  return font_info.name;
201 }
ROW_RES * row() const
Definition: pageres.h:760
bool is_serif() const
Definition: fontinfo.h:87
const FontInfo * fontinfo
Definition: pageres.h:424
bool is_italic() const
Definition: fontinfo.h:84
#define NULL
Definition: host.h:144
bool is_fixed_pitch() const
Definition: fontinfo.h:86
WERD_RES * word() const
Definition: pageres.h:757
bool is_bold() const
Definition: fontinfo.h:85
const int kPointsPerInch
Definition: publictypes.h:33
float ascenders() const
Definition: ocrrow.h:79
float x_height() const
Definition: ocrrow.h:61
float descenders() const
Definition: ocrrow.h:82
ROW * row
Definition: pageres.h:286
bool small_caps
Definition: pageres.h:420
bool tesseract::LTRResultIterator::WordIsFromDictionary ( ) const

Definition at line 224 of file ltrresultiterator.cpp.

224  {
225  if (it_->word() == NULL) return false; // Already at the end!
226  int permuter = it_->word()->best_choice->permuter();
227  return permuter == SYSTEM_DAWG_PERM || permuter == FREQ_DAWG_PERM ||
228  permuter == USER_DAWG_PERM;
229 }
#define NULL
Definition: host.h:144
WERD_RES * word() const
Definition: pageres.h:757
uinT8 permuter() const
Definition: ratngs.h:237
WERD_CHOICE * best_choice
Definition: pageres.h:359
bool tesseract::LTRResultIterator::WordIsNumeric ( ) const

Definition at line 232 of file ltrresultiterator.cpp.

232  {
233  if (it_->word() == NULL) return false; // Already at the end!
234  int permuter = it_->word()->best_choice->permuter();
235  return permuter == NUMBER_PERM;
236 }
#define NULL
Definition: host.h:144
WERD_RES * word() const
Definition: pageres.h:757
uinT8 permuter() const
Definition: ratngs.h:237
WERD_CHOICE * best_choice
Definition: pageres.h:359
const char * tesseract::LTRResultIterator::WordLattice ( int *  lattice_size) const

Definition at line 284 of file ltrresultiterator.cpp.

284  {
285  if (it_->word() == NULL) return NULL; // Already at the end!
286  if (it_->word()->blamer_bundle == NULL) return NULL;
287  *lattice_size = it_->word()->blamer_bundle->lattice_size;
288  return it_->word()->blamer_bundle->lattice_data;
289 }
int lattice_size
Definition: pageres.h:197
#define NULL
Definition: host.h:144
WERD_RES * word() const
Definition: pageres.h:757
char * lattice_data
Definition: pageres.h:196
BlamerBundle * blamer_bundle
Definition: pageres.h:367
const char * tesseract::LTRResultIterator::WordRecognitionLanguage ( ) const

Definition at line 204 of file ltrresultiterator.cpp.

204  {
205  if (it_->word() == NULL || it_->word()->tesseract == NULL) return NULL;
206  return it_->word()->tesseract->lang.string();
207 }
tesseract::Tesseract * tesseract
Definition: pageres.h:403
#define NULL
Definition: host.h:144
WERD_RES * word() const
Definition: pageres.h:757
const char * string() const
Definition: strngs.cpp:156
STRING lang
Definition: ccutil.h:69
char * tesseract::LTRResultIterator::WordTruthUTF8Text ( ) const

Definition at line 266 of file ltrresultiterator.cpp.

266  {
267  if (it_->word() == NULL) return NULL; // Already at the end!
268  if (it_->word()->blamer_bundle == NULL ||
270  return NULL; // no truth information for this word
271  }
272  const GenericVector<STRING> &truth_vec =
274  STRING truth_text;
275  for (int i = 0; i < truth_vec.size(); ++i) truth_text += truth_vec[i];
276  int length = truth_text.length() + 1;
277  char* result = new char[length];
278  strncpy(result, truth_text.string(), length);
279  return result;
280 }
IncorrectResultReason incorrect_result_reason
Definition: pageres.h:176
inT32 length() const
Definition: strngs.cpp:151
#define NULL
Definition: host.h:144
WERD_RES * word() const
Definition: pageres.h:757
GenericVector< STRING > truth_text
Definition: pageres.h:174
const char * string() const
Definition: strngs.cpp:156
Definition: strngs.h:40
int size() const
Definition: genericvector.h:59
BlamerBundle * blamer_bundle
Definition: pageres.h:367

Friends And Related Function Documentation

friend class ChoiceIterator
friend

Definition at line 47 of file ltrresultiterator.h.

Member Data Documentation

const char* tesseract::LTRResultIterator::line_separator_
protected

Definition at line 165 of file ltrresultiterator.h.

const char* tesseract::LTRResultIterator::paragraph_separator_
protected

Definition at line 166 of file ltrresultiterator.h.


The documentation for this class was generated from the following files: