Tesseract  3.02
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
WERD_RES Class Reference

#include <pageres.h>

Inheritance diagram for WERD_RES:
ELIST_LINK

Public Member Functions

 WERD_RES ()
 
 WERD_RES (WERD *the_word)
 
 WERD_RES (const WERD_RES &source)
 
 ~WERD_RES ()
 
const char *const BestUTF8 (int blob_index, bool in_rtl_context) const
 
const char *const RawUTF8 (int blob_index) const
 
UNICHARSET::Direction SymbolDirection (int blob_index) const
 
bool AnyRtlCharsInWord () const
 
bool AnyLtrCharsInWord () const
 
bool UnicharsInReadingOrder () const
 
void InitNonPointers ()
 
void InitPointers ()
 
void Clear ()
 
void ClearResults ()
 
WERD_RESoperator= (const WERD_RES &source)
 
void CopySimpleFields (const WERD_RES &source)
 
void InitForRetryRecognition (const WERD_RES &source)
 
bool SetupForTessRecognition (const UNICHARSET &unicharset_in, tesseract::Tesseract *tesseract, Pix *pix, bool numeric_mode, bool use_body_size, ROW *row, BLOCK *block)
 
bool SetupForCubeRecognition (const UNICHARSET &unicharset_in, tesseract::Tesseract *tesseract, const BLOCK *block)
 
void SetupFake (const UNICHARSET &uch)
 
void SetupWordScript (const UNICHARSET &unicharset_in)
 
void SetupBlamerBundle ()
 
void ConsumeWordResults (WERD_RES *word)
 
void ReplaceBestChoice (const WERD_CHOICE &choice, const GenericVector< int > &segmentation_state)
 
void RebuildBestState ()
 
void CloneChoppedToRebuild ()
 
void SetupBoxWord ()
 
void SetScriptPositions ()
 
void WithoutFootnoteSpan (int *start, int *end) const
 
void WithoutFootnoteSpan (const WERD_CHOICE &choice, const GenericVector< int > &state, int *start, int *end) const
 
void FakeClassifyWord (int blob_count, BLOB_CHOICE **choices)
 
void BestChoiceToCorrectText ()
 
bool ConditionalBlobMerge (TessResultCallback2< UNICHAR_ID, UNICHAR_ID, UNICHAR_ID > *class_cb, TessResultCallback2< bool, const TBOX &, const TBOX & > *box_cb, BLOB_CHOICE_LIST_CLIST *blob_choices)
 
UNICHAR_ID BothQuotes (UNICHAR_ID id1, UNICHAR_ID id2)
 
void fix_quotes (BLOB_CHOICE_LIST_CLIST *blob_choices)
 
UNICHAR_ID BothHyphens (UNICHAR_ID id1, UNICHAR_ID id2)
 
bool HyphenBoxesOverlap (const TBOX &box1, const TBOX &box2)
 
void fix_hyphens (BLOB_CHOICE_LIST_CLIST *blob_choices)
 
UNICHAR_ID BothSpaces (UNICHAR_ID id1, UNICHAR_ID id2)
 
void merge_tess_fails ()
 
void copy_on (WERD_RES *word_res)
 
bool PiecesAllNatural (int start, int count) const
 
- Public Member Functions inherited from ELIST_LINK
 ELIST_LINK ()
 
 ELIST_LINK (const ELIST_LINK &)
 
void operator= (const ELIST_LINK &)
 

Static Public Member Functions

static WERD_RESdeep_copy (const WERD_RES *src)
 

Public Attributes

WERDword
 
tesseract::BoxWordbln_boxes
 
DENORM denorm
 
const UNICHARSETuch_set
 
TWERDchopped_word
 
SEAMS seam_array
 
WERD_CHOICEbest_choice
 
WERD_CHOICEraw_choice
 
GenericVector< WERD_CHOICE * > alt_choices
 
GenericVector< GenericVector
< int > > 
alt_states
 
BlamerBundleblamer_bundle
 
TWERDrebuild_word
 
tesseract::BoxWordbox_word
 
GenericVector< int > best_state
 
GenericVector< STRINGcorrect_text
 
tesseract::Tesseracttesseract
 
WERD_CHOICEep_choice
 
REJMAP reject_map
 
BOOL8 tess_failed
 
BOOL8 tess_accepted
 
BOOL8 tess_would_adapt
 
BOOL8 done
 
bool small_caps
 
inT8 italic
 
inT8 bold
 
const FontInfofontinfo
 
const FontInfofontinfo2
 
inT8 fontinfo_id_count
 
inT8 fontinfo_id2_count
 
BOOL8 guessed_x_ht
 
BOOL8 guessed_caps_ht
 
CRUNCH_MODE unlv_crunch_mode
 
float x_height
 
float caps_height
 
BOOL8 combination
 
BOOL8 part_of_combo
 
BOOL8 reject_spaces
 
GenericVector< inT8best_choice_fontinfo_ids
 

Detailed Description

Definition at line 314 of file pageres.h.

Constructor & Destructor Documentation

WERD_RES::WERD_RES ( )
inline

Definition at line 456 of file pageres.h.

456  {
457  InitNonPointers();
458  InitPointers();
459  }
void InitNonPointers()
Definition: pageres.cpp:750
void InitPointers()
Definition: pageres.cpp:775
WERD_RES::WERD_RES ( WERD the_word)
inline

Definition at line 460 of file pageres.h.

460  {
461  InitNonPointers();
462  InitPointers();
463  word = the_word;
464  }
void InitNonPointers()
Definition: pageres.cpp:750
void InitPointers()
Definition: pageres.cpp:775
WERD * word
Definition: pageres.h:334
WERD_RES::WERD_RES ( const WERD_RES source)
inline

Definition at line 465 of file pageres.h.

465  {
466  InitPointers();
467  *this = source; // see operator=
468  }
void InitPointers()
Definition: pageres.cpp:775
WERD_RES::~WERD_RES ( )

Definition at line 746 of file pageres.cpp.

746  {
747  Clear();
748 }
void Clear()
Definition: pageres.cpp:789

Member Function Documentation

bool WERD_RES::AnyLtrCharsInWord ( ) const
inline

Definition at line 523 of file pageres.h.

523  {
524  if (uch_set == NULL || best_choice == NULL || best_choice->length() < 1)
525  return false;
526  for (int id = 0; id < best_choice->length(); id++) {
527  int unichar_id = best_choice->unichar_id(id);
528  if (unichar_id < 0 || unichar_id >= uch_set->size())
529  continue; // Ignore illegal chars.
530  UNICHARSET::Direction dir = uch_set->get_direction(unichar_id);
531  if (dir == UNICHARSET::U_LEFT_TO_RIGHT)
532  return true;
533  }
534  return false;
535  }
int length() const
Definition: ratngs.h:214
int size() const
Definition: unicharset.h:264
#define NULL
Definition: host.h:144
const UNICHARSET * uch_set
Definition: pageres.h:348
Direction get_direction(UNICHAR_ID unichar_id) const
Definition: unicharset.h:579
const UNICHAR_ID unichar_id(int index) const
Definition: ratngs.h:220
WERD_CHOICE * best_choice
Definition: pageres.h:359
bool WERD_RES::AnyRtlCharsInWord ( ) const
inline

Definition at line 506 of file pageres.h.

506  {
507  if (uch_set == NULL || best_choice == NULL || best_choice->length() < 1)
508  return false;
509  for (int id = 0; id < best_choice->length(); id++) {
510  int unichar_id = best_choice->unichar_id(id);
511  if (unichar_id < 0 || unichar_id >= uch_set->size())
512  continue; // Ignore illegal chars.
514  uch_set->get_direction(unichar_id);
515  if (dir == UNICHARSET::U_RIGHT_TO_LEFT ||
518  return true;
519  }
520  return false;
521  }
int length() const
Definition: ratngs.h:214
int size() const
Definition: unicharset.h:264
#define NULL
Definition: host.h:144
const UNICHARSET * uch_set
Definition: pageres.h:348
Direction get_direction(UNICHAR_ID unichar_id) const
Definition: unicharset.h:579
const UNICHAR_ID unichar_id(int index) const
Definition: ratngs.h:220
WERD_CHOICE * best_choice
Definition: pageres.h:359
void WERD_RES::BestChoiceToCorrectText ( )

Definition at line 572 of file pageres.cpp.

572  {
575  for (int i = 0; i < best_choice->length(); ++i) {
576  UNICHAR_ID choice_id = best_choice->unichar_id(i);
577  const char* blob_choice = uch_set->id_to_unichar(choice_id);
578  correct_text.push_back(STRING(blob_choice));
579  }
580 }
int length() const
Definition: ratngs.h:214
const char *const id_to_unichar(UNICHAR_ID id) const
Definition: unicharset.cpp:233
int UNICHAR_ID
Definition: unichar.h:31
virtual void clear()
#define NULL
Definition: host.h:144
int push_back(T object)
const UNICHARSET * uch_set
Definition: pageres.h:348
Definition: strngs.h:40
GenericVector< STRING > correct_text
Definition: pageres.h:396
#define ASSERT_HOST(x)
Definition: errcode.h:84
const UNICHAR_ID unichar_id(int index) const
Definition: ratngs.h:220
WERD_CHOICE * best_choice
Definition: pageres.h:359
const char* const WERD_RES::BestUTF8 ( int  blob_index,
bool  in_rtl_context 
) const
inline

Definition at line 477 of file pageres.h.

477  {
478  if (blob_index < 0 || blob_index >= best_choice->length())
479  return NULL;
480  UNICHAR_ID id = best_choice->unichar_id(blob_index);
481  if (id < 0 || id >= uch_set->size() || id == INVALID_UNICHAR_ID)
482  return NULL;
483  UNICHAR_ID mirrored = uch_set->get_mirror(id);
484  if (in_rtl_context && mirrored > 0 && mirrored != INVALID_UNICHAR_ID)
485  id = mirrored;
486  return uch_set->id_to_unichar_ext(id);
487  }
int length() const
Definition: ratngs.h:214
const char *const id_to_unichar_ext(UNICHAR_ID id) const
Definition: unicharset.cpp:241
int UNICHAR_ID
Definition: unichar.h:31
int size() const
Definition: unicharset.h:264
#define NULL
Definition: host.h:144
const UNICHARSET * uch_set
Definition: pageres.h:348
UNICHAR_ID get_mirror(UNICHAR_ID unichar_id) const
Definition: unicharset.h:586
const UNICHAR_ID unichar_id(int index) const
Definition: ratngs.h:220
WERD_CHOICE * best_choice
Definition: pageres.h:359
UNICHAR_ID WERD_RES::BothHyphens ( UNICHAR_ID  id1,
UNICHAR_ID  id2 
)

Definition at line 683 of file pageres.cpp.

683  {
684  const char *ch = uch_set->id_to_unichar(id1);
685  const char *next_ch = uch_set->id_to_unichar(id2);
686  if (strlen(ch) == 1 && strlen(next_ch) == 1 &&
687  (*ch == '-' || *ch == '~') && (*next_ch == '-' || *next_ch == '~'))
688  return uch_set->unichar_to_id("-");
689  return INVALID_UNICHAR_ID;
690 }
const char *const id_to_unichar(UNICHAR_ID id) const
Definition: unicharset.cpp:233
const UNICHAR_ID unichar_to_id(const char *const unichar_repr) const
Definition: unicharset.cpp:176
const UNICHARSET * uch_set
Definition: pageres.h:348
UNICHAR_ID WERD_RES::BothQuotes ( UNICHAR_ID  id1,
UNICHAR_ID  id2 
)

Definition at line 660 of file pageres.cpp.

660  {
661  const char *ch = uch_set->id_to_unichar(id1);
662  const char *next_ch = uch_set->id_to_unichar(id2);
663  if (is_simple_quote(ch, strlen(ch)) &&
664  is_simple_quote(next_ch, strlen(next_ch)))
665  return uch_set->unichar_to_id("\"");
666  return INVALID_UNICHAR_ID;
667 }
const char *const id_to_unichar(UNICHAR_ID id) const
Definition: unicharset.cpp:233
const UNICHAR_ID unichar_to_id(const char *const unichar_repr) const
Definition: unicharset.cpp:176
const UNICHARSET * uch_set
Definition: pageres.h:348
UNICHAR_ID WERD_RES::BothSpaces ( UNICHAR_ID  id1,
UNICHAR_ID  id2 
)

Definition at line 713 of file pageres.cpp.

713  {
714  if (id1 == id2 && id1 == uch_set->unichar_to_id(" "))
715  return id1;
716  else
717  return INVALID_UNICHAR_ID;
718 }
const UNICHAR_ID unichar_to_id(const char *const unichar_repr) const
Definition: unicharset.cpp:176
const UNICHARSET * uch_set
Definition: pageres.h:348
void WERD_RES::Clear ( )

Definition at line 789 of file pageres.cpp.

789  {
790  if (word != NULL && combination) {
791  delete word;
792  }
793  word = NULL;
794  delete blamer_bundle;
796  ClearResults();
797 }
void ClearResults()
Definition: pageres.cpp:799
#define NULL
Definition: host.h:144
BOOL8 combination
Definition: pageres.h:450
WERD * word
Definition: pageres.h:334
BlamerBundle * blamer_bundle
Definition: pageres.h:367
void WERD_RES::ClearResults ( )

Definition at line 799 of file pageres.cpp.

799  {
800  done = false;
801  fontinfo = NULL;
802  fontinfo2 = NULL;
803  fontinfo_id_count = 0;
804  fontinfo_id2_count = 0;
805  if (bln_boxes != NULL) {
806  delete bln_boxes;
807  bln_boxes = NULL;
808  }
809  if (chopped_word != NULL) {
810  delete chopped_word;
811  chopped_word = NULL;
812  }
813  if (rebuild_word != NULL) {
814  delete rebuild_word;
815  rebuild_word = NULL;
816  }
817  if (box_word != NULL) {
818  delete box_word;
819  box_word = NULL;
820  }
821  best_state.clear();
823  if (seam_array != NULL) {
825  seam_array = NULL;
826  }
827  if (best_choice != NULL) {
828  delete best_choice;
829  delete raw_choice;
830  best_choice = NULL;
831  raw_choice = NULL;
832  }
833  if (!alt_choices.empty()) {
835  alt_choices.clear();
836  }
837  alt_states.clear();
838  if (ep_choice != NULL) {
839  delete ep_choice;
840  ep_choice = NULL;
841  }
843 }
void delete_data_pointers()
TWERD * rebuild_word
Definition: pageres.h:381
BOOL8 done
Definition: pageres.h:419
const FontInfo * fontinfo
Definition: pageres.h:424
virtual void clear()
const FontInfo * fontinfo2
Definition: pageres.h:425
GenericVector< WERD_CHOICE * > alt_choices
Definition: pageres.h:363
void ClearResults()
Definition: pageres.h:103
#define NULL
Definition: host.h:144
GenericVector< int > best_state
Definition: pageres.h:392
GenericVector< GenericVector< int > > alt_states
Definition: pageres.h:364
SEAMS seam_array
Definition: pageres.h:358
WERD_CHOICE * ep_choice
Definition: pageres.h:407
bool empty() const
Definition: genericvector.h:68
tesseract::BoxWord * bln_boxes
Definition: pageres.h:343
void free_seam_list(SEAMS seam_list)
Definition: seam.cpp:200
inT8 fontinfo_id2_count
Definition: pageres.h:427
WERD_CHOICE * raw_choice
Definition: pageres.h:360
GenericVector< STRING > correct_text
Definition: pageres.h:396
tesseract::BoxWord * box_word
Definition: pageres.h:387
TWERD * chopped_word
Definition: pageres.h:357
inT8 fontinfo_id_count
Definition: pageres.h:426
BlamerBundle * blamer_bundle
Definition: pageres.h:367
WERD_CHOICE * best_choice
Definition: pageres.h:359
void WERD_RES::CloneChoppedToRebuild ( )

Definition at line 480 of file pageres.cpp.

480  {
481  if (rebuild_word != NULL)
482  delete rebuild_word;
484  SetupBoxWord();
485  int word_len = box_word->length();
486  best_state.reserve(word_len);
487  correct_text.reserve(word_len);
488  for (int i = 0; i < word_len; ++i) {
491  }
492 }
TWERD * rebuild_word
Definition: pageres.h:381
const int length() const
Definition: boxword.h:99
#define NULL
Definition: host.h:144
Definition: blobs.h:233
GenericVector< int > best_state
Definition: pageres.h:392
int push_back(T object)
void reserve(int size)
Definition: strngs.h:40
GenericVector< STRING > correct_text
Definition: pageres.h:396
tesseract::BoxWord * box_word
Definition: pageres.h:387
TWERD * chopped_word
Definition: pageres.h:357
void SetupBoxWord()
Definition: pageres.cpp:495
bool WERD_RES::ConditionalBlobMerge ( TessResultCallback2< UNICHAR_ID, UNICHAR_ID, UNICHAR_ID > *  class_cb,
TessResultCallback2< bool, const TBOX &, const TBOX & > *  box_cb,
BLOB_CHOICE_LIST_CLIST *  blob_choices 
)

Definition at line 587 of file pageres.cpp.

591  {
592  bool modified = false;
593  for (int i = 0; i + 1 < best_choice->length(); ++i) {
594  UNICHAR_ID new_id = class_cb->Run(best_choice->unichar_id(i),
595  best_choice->unichar_id(i+1));
596  if (new_id != INVALID_UNICHAR_ID &&
597  (box_cb == NULL || box_cb->Run(box_word->BlobBox(i),
598  box_word->BlobBox(i + 1)))) {
599  if (reject_map.length() == best_choice->length())
601  best_choice->set_unichar_id(new_id, i);
603  raw_choice->set_unichar_id(new_id, i);
605  modified = true;
606  rebuild_word->MergeBlobs(i, i + 2);
607  box_word->MergeBoxes(i, i + 2);
608  if (i + 1 < best_state.length()) {
609  best_state[i] += best_state[i + 1];
610  best_state.remove(i + 1);
611  }
612 
613  BLOB_CHOICE_LIST_C_IT blob_choices_it(blob_choices);
614  for (int j = 0; j < i; ++j)
615  blob_choices_it.forward();
616  BLOB_CHOICE_IT it1(blob_choices_it.data()); // first choices
617  BLOB_CHOICE_LIST* target_choices = blob_choices_it.data_relative(1);
618  BLOB_CHOICE_IT it2(target_choices); // second choices
619  float certainty = it2.data()->certainty();
620  float rating = it2.data()->rating();
621  if (it1.data()->certainty() < certainty) {
622  certainty = it1.data()->certainty();
623  rating = it1.data()->rating();
624  target_choices = blob_choices_it.data();
625  blob_choices_it.forward();
626  }
627  delete blob_choices_it.extract(); // get rid of spare
628  // TODO(rays) Fix the choices so they contain the desired result.
629  // Do we really need to ? Only needed for fix_quotes, which should be
630  // going away.
631  }
632  }
633  delete class_cb;
634  delete box_cb;
635  return modified;
636 }
int length() const
Definition: ratngs.h:214
TWERD * rebuild_word
Definition: pageres.h:381
int UNICHAR_ID
Definition: unichar.h:31
REJMAP reject_map
Definition: pageres.h:408
#define NULL
Definition: host.h:144
void MergeBoxes(int start, int end)
Definition: boxword.cpp:177
GenericVector< int > best_state
Definition: pageres.h:392
virtual R Run(A1, A2)=0
void remove_pos(inT16 pos)
Definition: rejctmap.cpp:371
void remove_unichar_id(int index)
Definition: ratngs.h:357
WERD_CHOICE * raw_choice
Definition: pageres.h:360
int length() const
Definition: genericvector.h:63
tesseract::BoxWord * box_word
Definition: pageres.h:387
const TBOX & BlobBox(int index) const
Definition: boxword.h:102
void set_unichar_id(UNICHAR_ID unichar_id, int index)
Definition: ratngs.h:247
virtual void remove(int index)
inT32 length() const
Definition: rejctmap.h:238
void MergeBlobs(int start, int end)
Definition: blobs.cpp:494
const UNICHAR_ID unichar_id(int index) const
Definition: ratngs.h:220
WERD_CHOICE * best_choice
Definition: pageres.h:359
void WERD_RES::ConsumeWordResults ( WERD_RES word)

Definition at line 411 of file pageres.cpp.

411  {
412  denorm = word->denorm;
413  MovePointerData(&chopped_word, &word->chopped_word);
414  MovePointerData(&rebuild_word, &word->rebuild_word);
415  MovePointerData(&box_word, &word->box_word);
416  if (seam_array != NULL)
418  seam_array = word->seam_array;
419  word->seam_array = NULL;
420  best_state.move(&word->best_state);
422  MovePointerData(&best_choice, &word->best_choice);
423  MovePointerData(&raw_choice, &word->raw_choice);
425  alt_choices.move(&word->alt_choices);
426  alt_states.move(&word->alt_states);
427  reject_map = word->reject_map;
428  if (word->blamer_bundle != NULL) {
429  assert(blamer_bundle != NULL);
431  }
432  CopySimpleFields(*word);
433 }
void delete_data_pointers()
TWERD * rebuild_word
Definition: pageres.h:381
GenericVector< WERD_CHOICE * > alt_choices
Definition: pageres.h:363
REJMAP reject_map
Definition: pageres.h:408
#define NULL
Definition: host.h:144
GenericVector< int > best_state
Definition: pageres.h:392
GenericVector< GenericVector< int > > alt_states
Definition: pageres.h:364
SEAMS seam_array
Definition: pageres.h:358
void free_seam_list(SEAMS seam_list)
Definition: seam.cpp:200
void CopyResults(const BlamerBundle &other)
Definition: pageres.h:124
DENORM denorm
Definition: pageres.h:346
WERD_CHOICE * raw_choice
Definition: pageres.h:360
void move(GenericVector< T > *from)
GenericVector< STRING > correct_text
Definition: pageres.h:396
tesseract::BoxWord * box_word
Definition: pageres.h:387
void CopySimpleFields(const WERD_RES &source)
Definition: pageres.cpp:234
TWERD * chopped_word
Definition: pageres.h:357
BlamerBundle * blamer_bundle
Definition: pageres.h:367
WERD_CHOICE * best_choice
Definition: pageres.h:359
void WERD_RES::copy_on ( WERD_RES word_res)
inline

Definition at line 674 of file pageres.h.

674  { //from this word
675  word->set_flag(W_BOL, word->flag(W_BOL) || word_res->word->flag(W_BOL));
676  word->set_flag(W_EOL, word->flag(W_EOL) || word_res->word->flag(W_EOL));
677  word->copy_on(word_res->word);
678  }
void copy_on(WERD *other)
Definition: werd.cpp:224
BOOL8 flag(WERD_FLAGS mask) const
Definition: werd.h:122
WERD * word
Definition: pageres.h:334
Definition: werd.h:35
void set_flag(WERD_FLAGS mask, BOOL8 value)
Definition: werd.h:123
Definition: werd.h:36
void WERD_RES::CopySimpleFields ( const WERD_RES source)

Definition at line 234 of file pageres.cpp.

234  {
235  tess_failed = source.tess_failed;
236  tess_accepted = source.tess_accepted;
238  done = source.done;
240  small_caps = source.small_caps;
241  italic = source.italic;
242  bold = source.bold;
243  fontinfo = source.fontinfo;
244  fontinfo2 = source.fontinfo2;
247  x_height = source.x_height;
248  caps_height = source.caps_height;
249  guessed_x_ht = source.guessed_x_ht;
251  reject_spaces = source.reject_spaces;
252  uch_set = source.uch_set;
253  tesseract = source.tesseract;
254 }
BOOL8 done
Definition: pageres.h:419
const FontInfo * fontinfo
Definition: pageres.h:424
const FontInfo * fontinfo2
Definition: pageres.h:425
tesseract::Tesseract * tesseract
Definition: pageres.h:403
BOOL8 guessed_x_ht
Definition: pageres.h:428
BOOL8 tess_would_adapt
Definition: pageres.h:418
BOOL8 reject_spaces
Definition: pageres.h:452
inT8 bold
Definition: pageres.h:422
const UNICHARSET * uch_set
Definition: pageres.h:348
BOOL8 guessed_caps_ht
Definition: pageres.h:429
float x_height
Definition: pageres.h:431
inT8 fontinfo_id2_count
Definition: pageres.h:427
BOOL8 tess_accepted
Definition: pageres.h:417
inT8 italic
Definition: pageres.h:421
inT8 fontinfo_id_count
Definition: pageres.h:426
BOOL8 tess_failed
Definition: pageres.h:409
float caps_height
Definition: pageres.h:432
CRUNCH_MODE unlv_crunch_mode
Definition: pageres.h:430
bool small_caps
Definition: pageres.h:420
static WERD_RES* WERD_RES::deep_copy ( const WERD_RES src)
inlinestatic

Definition at line 668 of file pageres.h.

668  {
669  return new WERD_RES(*src);
670  }
WERD_RES()
Definition: pageres.h:456
void WERD_RES::FakeClassifyWord ( int  blob_count,
BLOB_CHOICE **  choices 
)

Definition at line 549 of file pageres.cpp.

549  {
550  // Setup the WERD_RES.
552  ASSERT_HOST(blob_count == box_word->length());
554  BLOB_CHOICE_LIST_CLIST* word_choices = new BLOB_CHOICE_LIST_CLIST;
555  BLOB_CHOICE_LIST_C_IT bc_it(word_choices);
556  for (int c = 0; c < blob_count; ++c) {
558  choices[c]->unichar_id(), 1,
559  choices[c]->rating(), choices[c]->certainty());
560  BLOB_CHOICE_LIST* choice_list = new BLOB_CHOICE_LIST;
561  BLOB_CHOICE_IT choice_it(choice_list);
562  choice_it.add_after_then_move(choices[c]);
563  bc_it.add_after_then_move(choice_list);
564  }
565  best_choice->set_blob_choices(word_choices);
566  delete raw_choice;
568  reject_map.initialise(blob_count);
569 }
const int length() const
Definition: boxword.h:99
void append_unichar_id(UNICHAR_ID unichar_id, char fragment_length, float rating, float certainty)
Definition: ratngs.cpp:313
REJMAP reject_map
Definition: pageres.h:408
#define NULL
Definition: host.h:144
WERD_CHOICE * raw_choice
Definition: pageres.h:360
tesseract::BoxWord * box_word
Definition: pageres.h:387
void set_blob_choices(BLOB_CHOICE_LIST_CLIST *blob_choices)
Definition: ratngs.cpp:184
#define ASSERT_HOST(x)
Definition: errcode.h:84
void initialise(inT16 length)
Definition: rejctmap.cpp:324
WERD_CHOICE * best_choice
Definition: pageres.h:359
void WERD_RES::fix_hyphens ( BLOB_CHOICE_LIST_CLIST *  blob_choices)

Definition at line 700 of file pageres.cpp.

700  {
701  if (!uch_set->contains_unichar("-") ||
703  return; // Don't create it if it is disallowed.
704 
708  blob_choices);
709 }
const UNICHAR_ID unichar_to_id(const char *const unichar_repr) const
Definition: unicharset.cpp:176
bool ConditionalBlobMerge(TessResultCallback2< UNICHAR_ID, UNICHAR_ID, UNICHAR_ID > *class_cb, TessResultCallback2< bool, const TBOX &, const TBOX & > *box_cb, BLOB_CHOICE_LIST_CLIST *blob_choices)
Definition: pageres.cpp:587
bool HyphenBoxesOverlap(const TBOX &box1, const TBOX &box2)
Definition: pageres.cpp:694
UNICHAR_ID BothHyphens(UNICHAR_ID id1, UNICHAR_ID id2)
Definition: pageres.cpp:683
const UNICHARSET * uch_set
Definition: pageres.h:348
bool contains_unichar(const char *const unichar_repr) const
Definition: unicharset.cpp:543
bool get_enabled(UNICHAR_ID unichar_id) const
Definition: unicharset.h:747
_ConstTessMemberResultCallback_0_0< false, R, T1 >::base * NewPermanentTessCallback(const T1 *obj, R(T2::*member)() const)
Definition: tesscallback.h:116
void WERD_RES::fix_quotes ( BLOB_CHOICE_LIST_CLIST *  blob_choices)

Definition at line 670 of file pageres.cpp.

670  {
671  if (!uch_set->contains_unichar("\"") ||
673  return; // Don't create it if it is disallowed.
674 
677  NULL,
678  blob_choices);
679 }
const UNICHAR_ID unichar_to_id(const char *const unichar_repr) const
Definition: unicharset.cpp:176
UNICHAR_ID BothQuotes(UNICHAR_ID id1, UNICHAR_ID id2)
Definition: pageres.cpp:660
bool ConditionalBlobMerge(TessResultCallback2< UNICHAR_ID, UNICHAR_ID, UNICHAR_ID > *class_cb, TessResultCallback2< bool, const TBOX &, const TBOX & > *box_cb, BLOB_CHOICE_LIST_CLIST *blob_choices)
Definition: pageres.cpp:587
#define NULL
Definition: host.h:144
const UNICHARSET * uch_set
Definition: pageres.h:348
bool contains_unichar(const char *const unichar_repr) const
Definition: unicharset.cpp:543
bool get_enabled(UNICHAR_ID unichar_id) const
Definition: unicharset.h:747
_ConstTessMemberResultCallback_0_0< false, R, T1 >::base * NewPermanentTessCallback(const T1 *obj, R(T2::*member)() const)
Definition: tesscallback.h:116
bool WERD_RES::HyphenBoxesOverlap ( const TBOX box1,
const TBOX box2 
)

Definition at line 694 of file pageres.cpp.

694  {
695  return box1.right() >= box2.left();
696 }
inT16 left() const
Definition: rect.h:67
inT16 right() const
Definition: rect.h:74
void WERD_RES::InitForRetryRecognition ( const WERD_RES source)

Definition at line 260 of file pageres.cpp.

260  {
261  word = source.word;
262  CopySimpleFields(source);
263  if (source.blamer_bundle != NULL) {
264  blamer_bundle = new BlamerBundle();
266  }
267 }
#define NULL
Definition: host.h:144
void CopyTruth(const BlamerBundle &other)
Definition: pageres.h:117
WERD * word
Definition: pageres.h:334
void CopySimpleFields(const WERD_RES &source)
Definition: pageres.cpp:234
BlamerBundle * blamer_bundle
Definition: pageres.h:367
void WERD_RES::InitNonPointers ( )

Definition at line 750 of file pageres.cpp.

750  {
751  tess_failed = FALSE;
754  done = FALSE;
756  small_caps = false;
757  italic = FALSE;
758  bold = FALSE;
759  // The fontinfos and tesseract count as non-pointers as they point to
760  // data owned elsewhere.
761  fontinfo = NULL;
762  fontinfo2 = NULL;
763  tesseract = NULL;
764  fontinfo_id_count = 0;
765  fontinfo_id2_count = 0;
766  x_height = 0.0;
767  caps_height = 0.0;
768  guessed_x_ht = TRUE;
770  combination = FALSE;
773 }
BOOL8 done
Definition: pageres.h:419
const FontInfo * fontinfo
Definition: pageres.h:424
const FontInfo * fontinfo2
Definition: pageres.h:425
BOOL8 part_of_combo
Definition: pageres.h:451
tesseract::Tesseract * tesseract
Definition: pageres.h:403
#define NULL
Definition: host.h:144
BOOL8 combination
Definition: pageres.h:450
BOOL8 guessed_x_ht
Definition: pageres.h:428
#define FALSE
Definition: capi.h:28
BOOL8 tess_would_adapt
Definition: pageres.h:418
BOOL8 reject_spaces
Definition: pageres.h:452
inT8 bold
Definition: pageres.h:422
BOOL8 guessed_caps_ht
Definition: pageres.h:429
float x_height
Definition: pageres.h:431
inT8 fontinfo_id2_count
Definition: pageres.h:427
BOOL8 tess_accepted
Definition: pageres.h:417
inT8 italic
Definition: pageres.h:421
inT8 fontinfo_id_count
Definition: pageres.h:426
BOOL8 tess_failed
Definition: pageres.h:409
float caps_height
Definition: pageres.h:432
CRUNCH_MODE unlv_crunch_mode
Definition: pageres.h:430
bool small_caps
Definition: pageres.h:420
#define TRUE
Definition: capi.h:27
void WERD_RES::InitPointers ( )

Definition at line 775 of file pageres.cpp.

775  {
776  word = NULL;
777  bln_boxes = NULL;
778  uch_set = NULL;
779  chopped_word = NULL;
780  rebuild_word = NULL;
781  box_word = NULL;
782  seam_array = NULL;
783  best_choice = NULL;
784  raw_choice = NULL;
785  ep_choice = NULL;
787 }
TWERD * rebuild_word
Definition: pageres.h:381
#define NULL
Definition: host.h:144
SEAMS seam_array
Definition: pageres.h:358
WERD * word
Definition: pageres.h:334
const UNICHARSET * uch_set
Definition: pageres.h:348
WERD_CHOICE * ep_choice
Definition: pageres.h:407
tesseract::BoxWord * bln_boxes
Definition: pageres.h:343
WERD_CHOICE * raw_choice
Definition: pageres.h:360
tesseract::BoxWord * box_word
Definition: pageres.h:387
TWERD * chopped_word
Definition: pageres.h:357
BlamerBundle * blamer_bundle
Definition: pageres.h:367
WERD_CHOICE * best_choice
Definition: pageres.h:359
void WERD_RES::merge_tess_fails ( )

Definition at line 721 of file pageres.cpp.

721  {
725  int len = best_choice->length();
726  ASSERT_HOST(reject_map.length() == len);
727  ASSERT_HOST(box_word->length() == len);
728  }
729 }
int length() const
Definition: ratngs.h:214
const int length() const
Definition: boxword.h:99
BLOB_CHOICE_LIST_CLIST * blob_choices()
Definition: ratngs.h:244
bool ConditionalBlobMerge(TessResultCallback2< UNICHAR_ID, UNICHAR_ID, UNICHAR_ID > *class_cb, TessResultCallback2< bool, const TBOX &, const TBOX & > *box_cb, BLOB_CHOICE_LIST_CLIST *blob_choices)
Definition: pageres.cpp:587
REJMAP reject_map
Definition: pageres.h:408
#define NULL
Definition: host.h:144
UNICHAR_ID BothSpaces(UNICHAR_ID id1, UNICHAR_ID id2)
Definition: pageres.cpp:713
tesseract::BoxWord * box_word
Definition: pageres.h:387
_ConstTessMemberResultCallback_0_0< false, R, T1 >::base * NewPermanentTessCallback(const T1 *obj, R(T2::*member)() const)
Definition: tesscallback.h:116
inT32 length() const
Definition: rejctmap.h:238
#define ASSERT_HOST(x)
Definition: errcode.h:84
WERD_CHOICE * best_choice
Definition: pageres.h:359
WERD_RES & WERD_RES::operator= ( const WERD_RES source)

Definition at line 177 of file pageres.cpp.

177  {
178  this->ELIST_LINK::operator=(source);
179  Clear();
180  if (source.combination) {
181  word = new WERD;
182  *word = *(source.word); // deep copy
183  } else {
184  word = source.word; // pt to same word
185  }
186  if (source.bln_boxes != NULL)
187  bln_boxes = new tesseract::BoxWord(*source.bln_boxes);
188  if (source.chopped_word != NULL)
189  chopped_word = new TWERD(*source.chopped_word);
190  if (source.rebuild_word != NULL)
191  rebuild_word = new TWERD(*source.rebuild_word);
192  // TODO(rays) Do we ever need to copy the seam_array?
193  denorm = source.denorm;
194  if (source.box_word != NULL)
195  box_word = new tesseract::BoxWord(*source.box_word);
196  best_state = source.best_state;
197  correct_text = source.correct_text;
198 
199  if (source.best_choice != NULL) {
200  best_choice = new WERD_CHOICE(*source.best_choice);
201  raw_choice = new WERD_CHOICE(*source.raw_choice);
203  }
204  else {
205  best_choice = NULL;
206  raw_choice = NULL;
209  }
210  }
211  for (int i = 0; i < source.alt_choices.length(); ++i) {
212  const WERD_CHOICE *choice = source.alt_choices[i];
213  ASSERT_HOST(choice != NULL);
214  alt_choices.push_back(new WERD_CHOICE(*choice));
215  }
216  alt_states = source.alt_states;
217  if (source.ep_choice != NULL) {
218  ep_choice = new WERD_CHOICE(*source.ep_choice);
219  } else {
220  ep_choice = NULL;
221  }
222  reject_map = source.reject_map;
223  combination = source.combination;
224  part_of_combo = source.part_of_combo;
225  CopySimpleFields(source);
226  if (source.blamer_bundle != NULL) {
227  blamer_bundle = new BlamerBundle(*(source.blamer_bundle));
228  }
229  return *this;
230 }
TWERD * rebuild_word
Definition: pageres.h:381
virtual void clear()
BOOL8 part_of_combo
Definition: pageres.h:451
GenericVector< WERD_CHOICE * > alt_choices
Definition: pageres.h:363
REJMAP reject_map
Definition: pageres.h:408
#define NULL
Definition: host.h:144
Definition: blobs.h:233
BOOL8 combination
Definition: pageres.h:450
GenericVector< int > best_state
Definition: pageres.h:392
int push_back(T object)
GenericVector< GenericVector< int > > alt_states
Definition: pageres.h:364
GenericVector< inT8 > best_choice_fontinfo_ids
Definition: pageres.h:454
WERD * word
Definition: pageres.h:334
WERD_CHOICE * ep_choice
Definition: pageres.h:407
bool empty() const
Definition: genericvector.h:68
tesseract::BoxWord * bln_boxes
Definition: pageres.h:343
DENORM denorm
Definition: pageres.h:346
WERD_CHOICE * raw_choice
Definition: pageres.h:360
Definition: werd.h:60
int length() const
Definition: genericvector.h:63
GenericVector< STRING > correct_text
Definition: pageres.h:396
tesseract::BoxWord * box_word
Definition: pageres.h:387
void CopySimpleFields(const WERD_RES &source)
Definition: pageres.cpp:234
TWERD * chopped_word
Definition: pageres.h:357
void Clear()
Definition: pageres.cpp:789
#define ASSERT_HOST(x)
Definition: errcode.h:84
void operator=(const ELIST_LINK &)
Definition: elst.h:102
BlamerBundle * blamer_bundle
Definition: pageres.h:367
WERD_CHOICE * best_choice
Definition: pageres.h:359
bool WERD_RES::PiecesAllNatural ( int  start,
int  count 
) const

Definition at line 733 of file pageres.cpp.

733  {
734  // all seams must have no splits.
735  for (int index = start; index < start + count - 1; ++index) {
736  if (index >= 0 && index < array_count(seam_array)) {
737  SEAM* seam = reinterpret_cast<SEAM *>(array_value(seam_array, index));
738  if (seam != NULL && seam->split1 != NULL)
739  return false;
740  }
741  }
742  return true;
743 }
#define NULL
Definition: host.h:144
SEAMS seam_array
Definition: pageres.h:358
SPLIT * split1
Definition: seam.h:46
#define array_count(a)
Definition: tessarray.h:74
#define array_value(a, i)
Definition: tessarray.h:132
int count(LIST var_list)
Definition: oldlist.cpp:108
const char* const WERD_RES::RawUTF8 ( int  blob_index) const
inline

Definition at line 489 of file pageres.h.

489  {
490  if (blob_index < 0 || blob_index >= raw_choice->length())
491  return NULL;
492  UNICHAR_ID id = raw_choice->unichar_id(blob_index);
493  if (id < 0 || id >= uch_set->size() || id == INVALID_UNICHAR_ID)
494  return NULL;
495  return uch_set->id_to_unichar(id);
496  }
int length() const
Definition: ratngs.h:214
const char *const id_to_unichar(UNICHAR_ID id) const
Definition: unicharset.cpp:233
int UNICHAR_ID
Definition: unichar.h:31
int size() const
Definition: unicharset.h:264
#define NULL
Definition: host.h:144
const UNICHARSET * uch_set
Definition: pageres.h:348
WERD_CHOICE * raw_choice
Definition: pageres.h:360
const UNICHAR_ID unichar_id(int index) const
Definition: ratngs.h:220
void WERD_RES::RebuildBestState ( )

Definition at line 452 of file pageres.cpp.

452  {
453  if (rebuild_word != NULL)
454  delete rebuild_word;
455  rebuild_word = new TWERD;
456  if (seam_array == NULL) {
458  }
459  TBLOB* prev_blob = NULL;
460  int start = 0;
461  for (int i = 0; i < best_state.size(); ++i) {
462  int length = best_state[i];
463  join_pieces(chopped_word->blobs, seam_array, start, start + length - 1);
464  TBLOB* blob = chopped_word->blobs;
465  for (int i = 0; i < start; ++i)
466  blob = blob->next;
467  TBLOB* copy_blob = new TBLOB(*blob);
468  if (prev_blob == NULL)
469  rebuild_word->blobs = copy_blob;
470  else
471  prev_blob->next = copy_blob;
472  prev_blob = copy_blob;
473  break_pieces(blob, seam_array, start, start + length - 1);
474  start += length;
475  }
476 }
TWERD * rebuild_word
Definition: pageres.h:381
SEAMS start_seam_list(TBLOB *blobs)
Definition: seam.cpp:175
void break_pieces(TBLOB *blobs, SEAMS seams, inT16 start, inT16 end)
Definition: seam.cpp:535
#define NULL
Definition: host.h:144
Definition: blobs.h:233
GenericVector< int > best_state
Definition: pageres.h:392
TBLOB * blobs
Definition: blobs.h:274
void join_pieces(TBLOB *piece_blobs, SEAMS seams, inT16 start, inT16 end)
Definition: seam.cpp:564
SEAMS seam_array
Definition: pageres.h:358
Definition: blobs.h:174
int size() const
Definition: genericvector.h:59
TWERD * chopped_word
Definition: pageres.h:357
TBLOB * next
Definition: blobs.h:228
void WERD_RES::ReplaceBestChoice ( const WERD_CHOICE choice,
const GenericVector< int > &  segmentation_state 
)

Definition at line 436 of file pageres.cpp.

438  {
439  delete best_choice;
440  best_choice = new WERD_CHOICE(choice);
441  best_state = segmentation_state;
443  SetupBoxWord();
444  // Make up a fake reject map of the right length to keep the
445  // rejection pass happy.
446  reject_map.initialise(segmentation_state.length());
449 }
BOOL8 done
Definition: pageres.h:419
REJMAP reject_map
Definition: pageres.h:408
GenericVector< int > best_state
Definition: pageres.h:392
BOOL8 tess_would_adapt
Definition: pageres.h:418
void RebuildBestState()
Definition: pageres.cpp:452
int length() const
Definition: genericvector.h:63
BOOL8 tess_accepted
Definition: pageres.h:417
void SetScriptPositions()
Definition: pageres.cpp:505
void SetupBoxWord()
Definition: pageres.cpp:495
void initialise(inT16 length)
Definition: rejctmap.cpp:324
WERD_CHOICE * best_choice
Definition: pageres.h:359
void WERD_RES::SetScriptPositions ( )

Definition at line 505 of file pageres.cpp.

505  {
507  best_choice);
508 }
TWERD * rebuild_word
Definition: pageres.h:381
void SetScriptPositions(const UNICHARSET &unicharset, bool small_caps, TWERD *tessword, WERD_CHOICE *best_choice)
Definition: boxword.cpp:108
const UNICHARSET * uch_set
Definition: pageres.h:348
tesseract::BoxWord * box_word
Definition: pageres.h:387
bool small_caps
Definition: pageres.h:420
WERD_CHOICE * best_choice
Definition: pageres.h:359
void WERD_RES::SetupBlamerBundle ( )

Definition at line 379 of file pageres.cpp.

379  {
380  if (blamer_bundle != NULL) {
381  blamer_bundle->norm_box_tolerance = kBlamerBoxTolerance * denorm.x_scale();
382  TPOINT topleft;
383  TPOINT botright;
384  TPOINT norm_topleft;
385  TPOINT norm_botright;
386  for (int b = 0; b < blamer_bundle->truth_word.length(); ++b) {
387  const TBOX &box = blamer_bundle->truth_word.BlobBox(b);
388  topleft.x = box.left();
389  topleft.y = box.top();
390  botright.x = box.right();
391  botright.y = box.bottom();
392  denorm.NormTransform(topleft, &norm_topleft);
393  denorm.NormTransform(botright, &norm_botright);
394  TBOX norm_box(norm_topleft.x, norm_botright.y,
395  norm_botright.x, norm_topleft.y);
397  }
398  }
399 }
const int length() const
Definition: boxword.h:99
float x_scale() const
Definition: normalis.h:264
#define NULL
Definition: host.h:144
inT16 left() const
Definition: rect.h:67
void NormTransform(const TPOINT &pt, TPOINT *transformed) const
Definition: normalis.cpp:190
Definition: rect.h:29
inT16 right() const
Definition: rect.h:74
inT16 y
Definition: blobs.h:68
tesseract::BoxWord truth_word
Definition: pageres.h:167
inT16 x
Definition: blobs.h:67
tesseract::BoxWord norm_truth_word
Definition: pageres.h:170
Definition: blobs.h:53
inT16 top() const
Definition: rect.h:53
DENORM denorm
Definition: pageres.h:346
void InsertBox(int index, const TBOX &box)
Definition: boxword.cpp:194
const TBOX & BlobBox(int index) const
Definition: boxword.h:102
int norm_box_tolerance
Definition: pageres.h:172
BlamerBundle * blamer_bundle
Definition: pageres.h:367
inT16 bottom() const
Definition: rect.h:60
void WERD_RES::SetupBoxWord ( )

Definition at line 495 of file pageres.cpp.

495  {
496  if (box_word != NULL)
497  delete box_word;
501 }
static BoxWord * CopyFromNormalized(const DENORM *denorm, TWERD *tessword)
Definition: boxword.cpp:67
TWERD * rebuild_word
Definition: pageres.h:381
void ClipToOriginalWord(const BLOCK *block, WERD *original_word)
Definition: boxword.cpp:138
#define NULL
Definition: host.h:144
WERD * word
Definition: pageres.h:334
DENORM denorm
Definition: pageres.h:346
tesseract::BoxWord * box_word
Definition: pageres.h:387
const BLOCK * block() const
Definition: normalis.h:276
void ComputeBoundingBoxes()
Definition: blobs.cpp:477
void WERD_RES::SetupFake ( const UNICHARSET uch)

Definition at line 340 of file pageres.cpp.

340  {
341  ClearResults();
342  SetupWordScript(unicharset_in);
343  chopped_word = new TWERD;
344  rebuild_word = new TWERD;
347  int blob_count = word->cblob_list()->length();
348  best_choice = new WERD_CHOICE("", NULL, 10.0f, -1.0f,
349  TOP_CHOICE_PERM, unicharset_in);
350  raw_choice = new WERD_CHOICE("", NULL, 10.0f, -1.0f,
351  TOP_CHOICE_PERM, unicharset_in);
352  if (blob_count > 0) {
353  BLOB_CHOICE** fake_choices = new BLOB_CHOICE*[blob_count];
354  // For non-text blocks, just pass any blobs through to the box_word
355  // and call the word failed with a fake classification.
356  C_BLOB_IT b_it(word->cblob_list());
357  int blob_id = 0;
358  for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
359  TBOX box = b_it.data()->bounding_box();
360  box_word->InsertBox(box_word->length(), box);
361  fake_choices[blob_id++] = new BLOB_CHOICE(0, 10.0f, -1.0f,
362  -1, -1, -1, 0, 0, false);
363  }
364  FakeClassifyWord(blob_count, fake_choices);
365  delete [] fake_choices;
366  }
367  tess_failed = true;
368 }
void ClearResults()
Definition: pageres.cpp:799
TWERD * rebuild_word
Definition: pageres.h:381
const int length() const
Definition: boxword.h:99
C_BLOB_LIST * cblob_list()
Definition: werd.h:100
#define NULL
Definition: host.h:144
Definition: blobs.h:233
Definition: rect.h:29
#define f(xc, yc)
Definition: imgscale.cpp:39
WERD * word
Definition: pageres.h:334
tesseract::BoxWord * bln_boxes
Definition: pageres.h:343
WERD_CHOICE * raw_choice
Definition: pageres.h:360
void InsertBox(int index, const TBOX &box)
Definition: boxword.cpp:194
tesseract::BoxWord * box_word
Definition: pageres.h:387
TWERD * chopped_word
Definition: pageres.h:357
void SetupWordScript(const UNICHARSET &unicharset_in)
Definition: pageres.cpp:370
void FakeClassifyWord(int blob_count, BLOB_CHOICE **choices)
Definition: pageres.cpp:549
BOOL8 tess_failed
Definition: pageres.h:409
WERD_CHOICE * best_choice
Definition: pageres.h:359
bool WERD_RES::SetupForCubeRecognition ( const UNICHARSET unicharset_in,
tesseract::Tesseract tesseract,
const BLOCK block 
)

Definition at line 317 of file pageres.cpp.

319  {
320  tesseract = tess;
321  POLY_BLOCK* pb = block != NULL ? block->poly_block() : NULL;
322  if (pb != NULL && !pb->IsText()) {
323  // Ignore words in graphic regions.
324  SetupFake(unicharset_in);
325  word->set_flag(W_REP_CHAR, false);
326  return false;
327  }
328  ClearResults();
329  SetupWordScript(unicharset_in);
330  TBOX word_box = word->bounding_box();
332  word_box.left(), word_box.bottom(),
333  1.0f, 1.0f, 0.0f, 0.0f);
335  return true;
336 }
void ClearResults()
Definition: pageres.cpp:799
TBOX bounding_box()
Definition: werd.cpp:164
#define NULL
Definition: host.h:144
inT16 left() const
Definition: rect.h:67
Definition: rect.h:29
WERD * word
Definition: pageres.h:334
POLY_BLOCK * poly_block() const
Definition: pdblock.h:62
void set_flag(WERD_FLAGS mask, BOOL8 value)
Definition: werd.h:123
DENORM denorm
Definition: pageres.h:346
void SetupNormalization(const BLOCK *block, const ROW *row, const FCOORD *rotation, const DENORM *predecessor, const DENORM_SEG *segs, int num_segs, float x_origin, float y_origin, float x_scale, float y_scale, float final_xshift, float final_yshift)
Definition: normalis.cpp:143
void SetupBlamerBundle()
Definition: pageres.cpp:379
void SetupFake(const UNICHARSET &uch)
Definition: pageres.cpp:340
void SetupWordScript(const UNICHARSET &unicharset_in)
Definition: pageres.cpp:370
bool IsText() const
Definition: polyblk.h:54
inT16 bottom() const
Definition: rect.h:60
bool WERD_RES::SetupForTessRecognition ( const UNICHARSET unicharset_in,
tesseract::Tesseract tesseract,
Pix *  pix,
bool  numeric_mode,
bool  use_body_size,
ROW row,
BLOCK block 
)

Definition at line 272 of file pageres.cpp.

276  {
277  tesseract = tess;
278  POLY_BLOCK* pb = block != NULL ? block->poly_block() : NULL;
279  if (word->cblob_list()->empty() || (pb != NULL && !pb->IsText())) {
280  // Empty words occur when all the blobs have been moved to the rej_blobs
281  // list, which seems to occur frequently in junk.
282  SetupFake(unicharset_in);
283  word->set_flag(W_REP_CHAR, false);
284  return false;
285  }
286  ClearResults();
287  SetupWordScript(unicharset_in);
289  if (use_body_size && row->body_size() > 0.0f) {
290  chopped_word->SetupBLNormalize(block, row, row->body_size(),
291  numeric_mode, &denorm);
292  } else {
293  chopped_word->SetupBLNormalize(block, row, x_height, numeric_mode, &denorm);
294  }
295  // The image will be 8-bit grey if the input was grey or color. Note that in
296  // a grey image 0 is black and 255 is white. If the input was binary, then
297  // the pix will be binary and 0 is white, with 1 being black.
298  // To tell the difference pixGetDepth() will return 8 or 1.
299  denorm.set_pix(pix);
300  // The inverse flag will be true iff the word has been determined to be white
301  // on black, and is independent of whether the pix is 8 bit or 1 bit.
306  best_choice = new WERD_CHOICE(&unicharset_in);
308  raw_choice = new WERD_CHOICE(&unicharset_in);
309  raw_choice->make_bad();
311  return true;
312 }
static BoxWord * CopyFromNormalized(const DENORM *denorm, TWERD *tessword)
Definition: boxword.cpp:67
void ClearResults()
Definition: pageres.cpp:799
SEAMS start_seam_list(TBLOB *blobs)
Definition: seam.cpp:175
C_BLOB_LIST * cblob_list()
Definition: werd.h:100
static TWERD * PolygonalCopy(WERD *src)
Definition: blobs.cpp:405
void set_pix(Pix *pix)
Definition: normalis.h:246
void SetupBLNormalize(const BLOCK *block, const ROW *row, float x_height, bool numeric_mode, DENORM *denorm) const
Definition: blobs.cpp:424
void make_bad()
Set the fields in this choice to be default (bad) values.
Definition: ratngs.h:321
#define NULL
Definition: host.h:144
TBLOB * blobs
Definition: blobs.h:274
void set_inverse(bool value)
Definition: normalis.h:252
BOOL8 flag(WERD_FLAGS mask) const
Definition: werd.h:122
SEAMS seam_array
Definition: pageres.h:358
WERD * word
Definition: pageres.h:334
Definition: werd.h:44
POLY_BLOCK * poly_block() const
Definition: pdblock.h:62
void Normalize(const DENORM &denorm)
Definition: blobs.cpp:447
float x_height
Definition: pageres.h:431
tesseract::BoxWord * bln_boxes
Definition: pageres.h:343
void set_flag(WERD_FLAGS mask, BOOL8 value)
Definition: werd.h:123
DENORM denorm
Definition: pageres.h:346
WERD_CHOICE * raw_choice
Definition: pageres.h:360
void SetupBlamerBundle()
Definition: pageres.cpp:379
void SetupFake(const UNICHARSET &uch)
Definition: pageres.cpp:340
float body_size() const
Definition: ocrrow.h:70
TWERD * chopped_word
Definition: pageres.h:357
void SetupWordScript(const UNICHARSET &unicharset_in)
Definition: pageres.cpp:370
bool IsText() const
Definition: polyblk.h:54
WERD_CHOICE * best_choice
Definition: pageres.h:359
void WERD_RES::SetupWordScript ( const UNICHARSET unicharset_in)

Definition at line 370 of file pageres.cpp.

370  {
371  uch_set = &uch;
372  int script = uch.default_sid();
373  word->set_script_id(script);
374  word->set_flag(W_SCRIPT_HAS_XHEIGHT, uch.script_has_xheight());
375  word->set_flag(W_SCRIPT_IS_LATIN, script == uch.latin_sid());
376 }
void set_script_id(int id)
Definition: werd.h:113
WERD * word
Definition: pageres.h:334
const UNICHARSET * uch_set
Definition: pageres.h:348
void set_flag(WERD_FLAGS mask, BOOL8 value)
Definition: werd.h:123
int default_sid() const
Definition: unicharset.h:760
UNICHARSET::Direction WERD_RES::SymbolDirection ( int  blob_index) const
inline

Definition at line 498 of file pageres.h.

498  {
499  if (best_choice == NULL ||
500  blob_index >= best_choice->length() ||
501  blob_index < 0)
503  return uch_set->get_direction(best_choice->unichar_id(blob_index));
504  }
int length() const
Definition: ratngs.h:214
#define NULL
Definition: host.h:144
const UNICHARSET * uch_set
Definition: pageres.h:348
Direction get_direction(UNICHAR_ID unichar_id) const
Definition: unicharset.h:579
const UNICHAR_ID unichar_id(int index) const
Definition: ratngs.h:220
WERD_CHOICE * best_choice
Definition: pageres.h:359
bool WERD_RES::UnicharsInReadingOrder ( ) const
inline

Definition at line 540 of file pageres.h.

540  {
542  }
bool unichars_in_script_order() const
Definition: ratngs.h:389
WERD_CHOICE * best_choice
Definition: pageres.h:359
void WERD_RES::WithoutFootnoteSpan ( int *  start,
int *  end 
) const

Definition at line 510 of file pageres.cpp.

510  {
511  int end = best_choice->length();
512  while (end > 0 &&
513  uch_set->get_isdigit(best_choice->unichar_ids()[end - 1]) &&
515  end--;
516  }
517  int start = 0;
518  while (start < end &&
521  start++;
522  }
523  *pstart = start;
524  *pend = end;
525 }
int length() const
Definition: ratngs.h:214
const UNICHAR_ID * unichar_ids() const
Definition: ratngs.h:217
bool get_isdigit(UNICHAR_ID unichar_id) const
Definition: unicharset.h:413
const UNICHARSET * uch_set
Definition: pageres.h:348
ScriptPos BlobPosition(int index) const
Definition: boxword.h:105
tesseract::BoxWord * box_word
Definition: pageres.h:387
WERD_CHOICE * best_choice
Definition: pageres.h:359
void WERD_RES::WithoutFootnoteSpan ( const WERD_CHOICE choice,
const GenericVector< int > &  state,
int *  start,
int *  end 
) const

Definition at line 527 of file pageres.cpp.

529  {
530  int len = word.length();
531  *pstart = 0;
532  *pend = len;
533  if (len < 2) return;
534  if (!word.unicharset()->get_isdigit(word.unichar_ids()[len - 1]) &&
535  !word.unicharset()->get_isdigit(word.unichar_ids()[0])) return;
536 
537  // ok, now that we know the word ends in digits, do the expensive bit of
538  // figuring out if they're superscript.
539  WERD_RES copy(*this);
540  copy.ReplaceBestChoice(word, state);
541  copy.WithoutFootnoteSpan(pstart, pend);
542 }
WERD * word
Definition: pageres.h:334

Member Data Documentation

GenericVector<WERD_CHOICE *> WERD_RES::alt_choices

Definition at line 363 of file pageres.h.

GenericVector<GenericVector<int> > WERD_RES::alt_states

Definition at line 364 of file pageres.h.

WERD_CHOICE* WERD_RES::best_choice

Definition at line 359 of file pageres.h.

GenericVector<inT8> WERD_RES::best_choice_fontinfo_ids

Definition at line 454 of file pageres.h.

GenericVector<int> WERD_RES::best_state

Definition at line 392 of file pageres.h.

BlamerBundle* WERD_RES::blamer_bundle

Definition at line 367 of file pageres.h.

tesseract::BoxWord* WERD_RES::bln_boxes

Definition at line 343 of file pageres.h.

inT8 WERD_RES::bold

Definition at line 422 of file pageres.h.

tesseract::BoxWord* WERD_RES::box_word

Definition at line 387 of file pageres.h.

float WERD_RES::caps_height

Definition at line 432 of file pageres.h.

TWERD* WERD_RES::chopped_word

Definition at line 357 of file pageres.h.

BOOL8 WERD_RES::combination

Definition at line 450 of file pageres.h.

GenericVector<STRING> WERD_RES::correct_text

Definition at line 396 of file pageres.h.

DENORM WERD_RES::denorm

Definition at line 346 of file pageres.h.

BOOL8 WERD_RES::done

Definition at line 419 of file pageres.h.

WERD_CHOICE* WERD_RES::ep_choice

Definition at line 407 of file pageres.h.

const FontInfo* WERD_RES::fontinfo

Definition at line 424 of file pageres.h.

const FontInfo* WERD_RES::fontinfo2

Definition at line 425 of file pageres.h.

inT8 WERD_RES::fontinfo_id2_count

Definition at line 427 of file pageres.h.

inT8 WERD_RES::fontinfo_id_count

Definition at line 426 of file pageres.h.

BOOL8 WERD_RES::guessed_caps_ht

Definition at line 429 of file pageres.h.

BOOL8 WERD_RES::guessed_x_ht

Definition at line 428 of file pageres.h.

inT8 WERD_RES::italic

Definition at line 421 of file pageres.h.

BOOL8 WERD_RES::part_of_combo

Definition at line 451 of file pageres.h.

WERD_CHOICE* WERD_RES::raw_choice

Definition at line 360 of file pageres.h.

TWERD* WERD_RES::rebuild_word

Definition at line 381 of file pageres.h.

REJMAP WERD_RES::reject_map

Definition at line 408 of file pageres.h.

BOOL8 WERD_RES::reject_spaces

Definition at line 452 of file pageres.h.

SEAMS WERD_RES::seam_array

Definition at line 358 of file pageres.h.

bool WERD_RES::small_caps

Definition at line 420 of file pageres.h.

BOOL8 WERD_RES::tess_accepted

Definition at line 417 of file pageres.h.

BOOL8 WERD_RES::tess_failed

Definition at line 409 of file pageres.h.

BOOL8 WERD_RES::tess_would_adapt

Definition at line 418 of file pageres.h.

tesseract::Tesseract* WERD_RES::tesseract

Definition at line 403 of file pageres.h.

const UNICHARSET* WERD_RES::uch_set

Definition at line 348 of file pageres.h.

CRUNCH_MODE WERD_RES::unlv_crunch_mode

Definition at line 430 of file pageres.h.

WERD* WERD_RES::word

Definition at line 334 of file pageres.h.

float WERD_RES::x_height

Definition at line 431 of file pageres.h.


The documentation for this class was generated from the following files: