72 (*debug) +=
" Choice ";
75 (*debug) += choice_str;
92 BLOCK_LIST *the_block_list,
95 BLOCK_IT block_it(the_block_list);
96 BLOCK_RES_IT block_res_it(&block_res_list);
97 for (block_it.mark_cycle_pt();
98 !block_it.cycled_list(); block_it.forward()) {
99 block_res_it.add_to_end(
new BLOCK_RES(block_it.data()));
101 prev_word_best_choice = prev_word_best_choice_ptr;
111 ROW_IT row_it (the_block->
row_list ());
125 for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
126 row_res_it.add_to_end(
new ROW_RES(row_it.data()));
149 for (word_it.mark_cycle_pt(); !word_it.cycled_list(); word_it.forward()) {
150 word_res =
new WERD_RES(word_it.data());
160 copy_word =
new WERD;
162 *copy_word = *(word_it.data());
166 word_res_it.add_to_end(combo);
172 word_res_it.add_to_end(word_res);
289 if (use_body_size && row->
body_size() > 0.0f) {
319 const BLOCK* block) {
333 1.0f, 1.0f, 0.0f, 0.0f);
349 TOP_CHOICE_PERM, unicharset_in);
351 TOP_CHOICE_PERM, unicharset_in);
352 if (blob_count > 0) {
358 for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
359 TBOX box = b_it.data()->bounding_box();
362 -1, -1, -1, 0, 0,
false);
365 delete [] fake_choices;
388 topleft.
x = box.
left();
389 topleft.
y = box.
top();
394 TBOX norm_box(norm_topleft.
x, norm_botright.
y,
395 norm_botright.
x, norm_topleft.
y);
403 template<
class T>
static void MovePointerData(T** dest, T**src) {
465 for (
int i = 0; i < start; ++i)
468 if (prev_blob ==
NULL)
471 prev_blob->
next = copy_blob;
472 prev_blob = copy_blob;
488 for (
int i = 0; i < word_len; ++i) {
518 while (start < end &&
529 int *pstart,
int *pend)
const {
554 BLOB_CHOICE_LIST_CLIST* word_choices =
new BLOB_CHOICE_LIST_CLIST;
555 BLOB_CHOICE_LIST_C_IT bc_it(word_choices);
556 for (
int c = 0; c < blob_count; ++c) {
558 choices[c]->unichar_id(), 1,
559 choices[c]->rating(), choices[c]->certainty());
560 BLOB_CHOICE_LIST* choice_list =
new BLOB_CHOICE_LIST;
561 BLOB_CHOICE_IT choice_it(choice_list);
562 choice_it.add_after_then_move(choices[c]);
563 bc_it.add_after_then_move(choice_list);
591 BLOB_CHOICE_LIST_CLIST *blob_choices) {
592 bool modified =
false;
596 if (new_id != INVALID_UNICHAR_ID &&
613 BLOB_CHOICE_LIST_C_IT blob_choices_it(blob_choices);
614 for (
int j = 0; j < i; ++j)
615 blob_choices_it.forward();
616 BLOB_CHOICE_IT it1(blob_choices_it.data());
617 BLOB_CHOICE_LIST* target_choices = blob_choices_it.data_relative(1);
618 BLOB_CHOICE_IT it2(target_choices);
619 float certainty = it2.data()->certainty();
620 float rating = it2.data()->rating();
621 if (it1.data()->certainty() < certainty) {
622 certainty = it1.data()->certainty();
623 rating = it1.data()->rating();
624 target_choices = blob_choices_it.data();
625 blob_choices_it.forward();
627 delete blob_choices_it.extract();
644 static int is_simple_quote(
const char* signed_str,
int length) {
645 const unsigned char* str =
646 reinterpret_cast<const unsigned char*
>(signed_str);
648 return (length == 1 && (*str ==
'\'' || *str ==
'`')) ||
650 (length == 3 && ((*str == 0xe2 &&
651 *(str + 1) == 0x80 &&
652 *(str + 2) == 0x98) ||
654 *(str + 1) == 0x80 &&
655 *(str + 2) == 0x99)));
663 if (is_simple_quote(ch, strlen(ch)) &&
664 is_simple_quote(next_ch, strlen(next_ch)))
666 return INVALID_UNICHAR_ID;
686 if (strlen(ch) == 1 && strlen(next_ch) == 1 &&
687 (*ch ==
'-' || *ch ==
'~') && (*next_ch ==
'-' || *next_ch ==
'~'))
689 return INVALID_UNICHAR_ID;
717 return INVALID_UNICHAR_ID;
735 for (
int index = start; index < start + count - 1; ++index) {
846 return word_res == other.word_res &&
847 row_res == other.row_res &&
848 block_res == other.block_res;
853 if (other.block_res ==
NULL) {
855 if (block_res ==
NULL)
859 if (block_res ==
NULL) {
862 if (block_res == other.block_res) {
863 if (other.row_res ==
NULL || row_res ==
NULL) {
867 if (row_res == other.row_res) {
870 if (word_res == other.word_res) {
876 for (word_res_it.mark_cycle_pt(); !word_res_it.cycled_list();
877 word_res_it.forward()) {
878 if (word_res_it.data() == word_res) {
880 }
else if (word_res_it.data() == other.word_res) {
889 for (row_res_it.mark_cycle_pt(); !row_res_it.cycled_list();
890 row_res_it.forward()) {
891 if (row_res_it.data() == row_res) {
893 }
else if (row_res_it.data() == other.row_res) {
902 for (block_res_it.mark_cycle_pt();
903 !block_res_it.cycled_list(); block_res_it.forward()) {
904 if (block_res_it.data() == block_res) {
906 }
else if (block_res_it.data() == other.block_res) {
921 WERD_IT w_it(
row()->
row->word_list());
922 for (w_it.mark_cycle_pt(); !w_it.cycled_list(); w_it.forward()) {
924 if (word == word_res->
word)
928 w_it.add_before_then_move(new_word);
933 WERD_RES_IT wr_it(&
row()->word_res_list);
934 for (wr_it.mark_cycle_pt(); !wr_it.cycled_list(); wr_it.forward()) {
936 if (word == word_res)
940 wr_it.add_before_then_move(new_res);
941 if (wr_it.at_first()) {
958 WERD_IT w_it(
row()->
row->word_list());
959 for (w_it.mark_cycle_pt(); !w_it.cycled_list(); w_it.forward()) {
960 if (w_it.data() == word_res->
word) {
965 delete w_it.extract();
969 WERD_RES_IT wr_it(&
row()->word_res_list);
970 for (wr_it.mark_cycle_pt(); !wr_it.cycled_list(); wr_it.forward()) {
971 if (wr_it.data() == word_res) {
977 delete wr_it.extract();
989 block_res_it.mark_cycle_pt();
990 prev_block_res =
NULL;
992 prev_word_res =
NULL;
996 next_block_res =
NULL;
998 next_word_res =
NULL;
999 internal_forward(
true, empty_ok);
1000 return internal_forward(
false, empty_ok);
1010 void PAGE_RES_IT::ResetWordIterator() {
1011 if (row_res == next_row_res) {
1014 word_res_it.move_to_first();
1015 word_res_it.mark_cycle_pt();
1016 while (!word_res_it.cycled_list() && word_res_it.data() != next_word_res)
1017 word_res_it.forward();
1019 word_res_it.forward();
1038 WERD_RES *PAGE_RES_IT::internal_forward(
bool new_block,
bool empty_ok) {
1039 bool new_row =
false;
1041 prev_block_res = block_res;
1042 prev_row_res = row_res;
1043 prev_word_res = word_res;
1044 block_res = next_block_res;
1045 row_res = next_row_res;
1046 word_res = next_word_res;
1047 next_block_res =
NULL;
1048 next_row_res =
NULL;
1049 next_word_res =
NULL;
1051 while (!block_res_it.cycled_list()) {
1054 row_res_it.set_to_list(&block_res_it.data()->row_res_list);
1055 row_res_it.mark_cycle_pt();
1056 if (row_res_it.empty() && empty_ok) {
1057 next_block_res = block_res_it.data();
1062 while (!row_res_it.cycled_list()) {
1065 word_res_it.set_to_list(&row_res_it.data()->word_res_list);
1066 word_res_it.mark_cycle_pt();
1069 while (!word_res_it.cycled_list() && word_res_it.data()->part_of_combo)
1070 word_res_it.forward();
1071 if (!word_res_it.cycled_list()) {
1072 next_block_res = block_res_it.data();
1073 next_row_res = row_res_it.data();
1074 next_word_res = word_res_it.data();
1075 word_res_it.forward();
1079 row_res_it.forward();
1083 block_res_it.forward();
1102 if (!row)
return NULL;
1116 while (block_res == next_block_res &&
1117 (next_row_res !=
NULL && next_row_res->
row !=
NULL &&
1119 internal_forward(
false,
true);
1121 return internal_forward(
false,
true);
1131 while (block_res == next_block_res) {
1132 internal_forward(
false,
true);
1134 return internal_forward(
false,
true);
1138 inT16 chars_in_word;
1139 inT16 rejects_in_word = 0;
1149 block_res->
rej_count += rejects_in_word;
1151 if (chars_in_word == rejects_in_word)
static BoxWord * CopyFromNormalized(const DENORM *denorm, TWERD *tessword)
void delete_data_pointers()
bool SetupForTessRecognition(const UNICHARSET &unicharset_in, tesseract::Tesseract *tesseract, Pix *pix, bool numeric_mode, bool use_body_size, ROW *row, BLOCK *block)
SEAMS start_seam_list(TBLOB *blobs)
const char * IncorrectReason() const
C_BLOB_LIST * cblob_list()
bool script_has_xheight() const
const char *const id_to_unichar(UNICHAR_ID id) const
static TWERD * PolygonalCopy(WERD *src)
IncorrectResultReason incorrect_result_reason
const UNICHAR_ID * unichar_ids() const
BLOB_CHOICE_LIST_CLIST * blob_choices()
WERD_RES * forward_block()
const UNICHAR_ID unichar_to_id(const char *const unichar_repr) const
UNICHAR_ID BothQuotes(UNICHAR_ID id1, UNICHAR_ID id2)
void set_script_id(int id)
WERD_RES * InsertSimpleCloneWord(const WERD_RES &clone_res, WERD *new_word)
void ConsumeWordResults(WERD_RES *word)
BLOCK_RES_LIST block_res_list
const FontInfo * fontinfo
bool SetupForCubeRecognition(const UNICHARSET &unicharset_in, tesseract::Tesseract *tesseract, const BLOCK *block)
bool ConditionalBlobMerge(TessResultCallback2< UNICHAR_ID, UNICHAR_ID, UNICHAR_ID > *class_cb, TessResultCallback2< bool, const TBOX &, const TBOX & > *box_cb, BLOB_CHOICE_LIST_CLIST *blob_choices)
void SetupBLNormalize(const BLOCK *block, const ROW *row, float x_height, bool numeric_mode, DENORM *denorm) const
void append_unichar_id(UNICHAR_ID unichar_id, char fragment_length, float rating, float certainty)
const FontInfo * fontinfo2
GenericVector< WERD_CHOICE * > alt_choices
WERD_RES * start_page(bool empty_ok)
void break_pieces(TBLOB *blobs, SEAMS seams, inT16 start, inT16 end)
tesseract::Tesseract * tesseract
WERD_RES * restart_page()
WERD_RES * forward_paragraph()
void make_bad()
Set the fields in this choice to be default (bad) values.
void ClipToOriginalWord(const BLOCK *block, WERD *original_word)
void FillDebugString(const STRING &msg, const WERD_CHOICE *choice, STRING *debug)
void MergeBoxes(int start, int end)
WERD_RES_LIST word_res_list
UNICHAR_ID BothSpaces(UNICHAR_ID id1, UNICHAR_ID id2)
ROW_LIST * row_list()
get rows
GenericVector< int > best_state
void SetScriptPositions(const UNICHARSET &unicharset, bool small_caps, TWERD *tessword, WERD_CHOICE *best_choice)
void NormTransform(const TPOINT &pt, TPOINT *transformed) const
bool HyphenBoxesOverlap(const TBOX &box1, const TBOX &box2)
bool get_isdigit(UNICHAR_ID unichar_id) const
void CopyTruth(const BlamerBundle &other)
void join_pieces(TBLOB *piece_blobs, SEAMS seams, inT16 start, inT16 end)
void set_inverse(bool value)
GenericVector< GenericVector< int > > alt_states
UNICHAR_ID BothHyphens(UNICHAR_ID id1, UNICHAR_ID id2)
void fix_hyphens(BLOB_CHOICE_LIST_CLIST *blob_choices)
BOOL8 flag(WERD_FLAGS mask) const
bool operator==(const PAGE_RES_IT &other) const
GenericVector< inT8 > best_choice_fontinfo_ids
tesseract::BoxWord truth_word
void CloneChoppedToRebuild()
void remove_pos(inT16 pos)
tesseract::BoxWord norm_truth_word
const UNICHARSET * uch_set
bool PiecesAllNatural(int start, int count) const
GenericVector< STRING > truth_text
POLY_BLOCK * poly_block() const
#define ELISTIZE(CLASSNAME)
void Normalize(const DENORM &denorm)
void WithoutFootnoteSpan(int *start, int *end) const
tesseract::BoxWord * bln_boxes
void set_flag(WERD_FLAGS mask, BOOL8 value)
ROW_RES_LIST row_res_list
void free_seam_list(SEAMS seam_list)
void CopyResults(const BlamerBundle &other)
void remove_unichar_id(int index)
bool contains_unichar(const char *const unichar_repr) const
WERD_CHOICE ** prev_word_best_choice
inT32 whole_word_rej_count
static const char * IncorrectReasonName(IncorrectResultReason irr)
ScriptPos BlobPosition(int index) const
void BestChoiceToCorrectText()
void move(GenericVector< T > *from)
void SetupNormalization(const BLOCK *block, const ROW *row, const FCOORD *rotation, const DENORM *predecessor, const DENORM_SEG *segs, int num_segs, float x_origin, float y_origin, float x_scale, float y_scale, float final_xshift, float final_yshift)
void InsertBox(int index, const TBOX &box)
GenericVector< STRING > correct_text
bool truth_has_char_boxes
tesseract::BoxWord * box_word
const TBOX & BlobBox(int index) const
void set_unichar_id(UNICHAR_ID unichar_id, int index)
bool get_enabled(UNICHAR_ID unichar_id) const
_ConstTessMemberResultCallback_0_0< false, R, T1 >::base * NewPermanentTessCallback(const T1 *obj, R(T2::*member)() const)
virtual void remove(int index)
void CopySimpleFields(const WERD_RES &source)
void SetupFake(const UNICHARSET &uch)
const BLOCK * block() const
void SetupWordScript(const UNICHARSET &unicharset_in)
void set_blob_choices(BLOB_CHOICE_LIST_CLIST *blob_choices)
void SetScriptPositions()
void FakeClassifyWord(int blob_count, BLOB_CHOICE **choices)
void ComputeBoundingBoxes()
void MergeBlobs(int start, int end)
#define array_value(a, i)
void ReplaceBestChoice(const WERD_CHOICE &choice, const GenericVector< int > &segmentation_state)
int cmp(const PAGE_RES_IT &other) const
void fix_quotes(BLOB_CHOICE_LIST_CLIST *blob_choices)
void InitForRetryRecognition(const WERD_RES &source)
CRUNCH_MODE unlv_crunch_mode
void copy_on(WERD_RES *word_res)
void string_and_lengths(STRING *word_str, STRING *word_lengths_str) const
const UNICHARSET * unicharset() const
const UNICHAR_ID unichar_id(int index) const
void operator=(const ELIST_LINK &)
BlamerBundle * blamer_bundle
void initialise(inT16 length)
WERD_CHOICE * best_choice
WERD_RES & operator=(const WERD_RES &source)