75 while (srcpt != start);
84 for (outline = srcline; outline !=
NULL; outline = outline->
next) {
104 if (srcpt->
flags[1] == 2)
108 while (srcpt != start);
111 if (srcpt->
flags[1] == 0) {
123 while (srcpt != real_start);
132 for (outline = srcline; outline !=
NULL; outline = outline->
next) {
147 bool italic_blob,
SEAMS seam_list) {
154 other_blob =
new TBLOB;
157 blob->
next = other_blob;
180 cprintf (
"\n** no seam picked *** \n");
183 apply_seam(blob, other_blob, italic_blob, seam);
186 if ((seam ==
NULL) ||
196 blob->
next = next_blob;
200 #ifndef GRAPHICS_DISABLED
204 cprintf (
"\n** seam being removed ** \n");
220 bool italic_blob,
SEAMS seam_list) {
225 for (x = 0; x < blob_number; x++)
229 italic_blob, seam_list);
235 bool italic_blob,
SEAMS seam_list) {
241 while (blob !=
NULL) {
248 TPOINT original_topleft, original_botright;
252 TBOX original_box =
TBOX(original_topleft.
x, original_botright.
y,
253 original_botright.
x, original_topleft.
y);
255 bool almost_equal_box =
false;
257 for (
int i = 0; i < boxes.
size(); i++) {
261 almost_equal_box =
true;
266 (!almost_equal_box && num_overlap > 1)) {
268 italic_blob, seam_list);
273 *blob_number = *blob_number + 1;
294 for (index = 0; index < length; index++)
311 edgept = outline->
loop;
315 edgept = edgept->
next;
317 while (edgept != outline->
loop);
337 bool split_next_to_fragment,
343 BLOB_CHOICE_LIST *answer;
344 BLOB_CHOICE_IT answer_it;
349 bool split_point_from_dict = (*blob_number != -1);
350 if (split_point_from_dict) {
354 split_next_to_fragment);
357 cprintf(
"blob_number = %d\n", *blob_number);
358 if (*blob_number == -1)
366 answer = char_choices->
get(*blob_number);
369 answer_it.set_to_list(answer);
370 if (!split_point_from_dict) {
372 rating_ceiling = answer_it.data()->rating();
376 for (blob = word->
blobs; x < *blob_number; x++) {
383 delete char_choices->
get(*blob_number);
387 char_choices->
insert(answer, *blob_number);
391 char_choices->
set(answer, *blob_number + 1);
405 char chop_index_string[2];
406 if (chop_index <= 9) {
407 snprintf(chop_index_string,
sizeof(chop_index_string),
"%d", chop_index);
409 chop_index_string[0] =
static_cast<char>(
'A' - 10 + chop_index);
410 chop_index_string[1] =
'\0';
413 if (unichar_id == INVALID_UNICHAR_ID) {
417 BLOB_CHOICE_IT answer_it(answer);
420 answer_it.data()->rating(),
421 answer_it.data()->certainty(),
422 answer_it.data()->fontinfo_id(),
423 answer_it.data()->fontinfo_id2(),
424 answer_it.data()->script_id(),
425 answer_it.data()->min_xheight(),
426 answer_it.data()->max_xheight(),
427 answer_it.data()->adapted());
429 answer_it.set_to_list(answer);
430 answer_it.add_after_then_move(modified_blob);
445 int *right_chop_index) {
449 BLOB_CHOICE_LIST *answer;
450 BLOB_CHOICE_IT answer_it;
453 int left_chop_index = 0;
458 cprintf(
"blob_number = %d\n", *blob_number);
459 if (*blob_number == -1)
465 answer = char_choices->
get(*blob_number);
468 answer_it.set_to_list(answer);
469 rating_ceiling = answer_it.data()->rating();
472 for (blob = word->
blobs; x < *blob_number; x++) {
481 *seam_list =
insert_seam(*seam_list, *blob_number, seam, blob, word->
blobs);
483 answer = char_choices->
get(*blob_number);
484 answer_it.set_to_list(answer);
485 unichar_id = answer_it.data()->unichar_id();
486 float rating = answer_it.data()->rating() / exp(1.0);
489 delete char_choices->
get(*blob_number);
493 char_choices->
insert(answer, *blob_number);
497 char_choices->
set(answer, *blob_number + 1);
525 *seam_list =
insert_seam(*seam_list, blob_number, seam, blob,
547 found_em[0] = found_em[1] = found_em[2] =
FALSE;
549 for (outline = blob->
outlines; outline; outline = outline->
next) {
565 last_outline = outline;
568 if (!found_em[0] || !found_em[1] || !found_em[2])
588 BLOB_CHOICE_LIST *match_result;
598 blob !=
NULL; blob = blob->
next, index++) {
601 if (match_result ==
NULL)
602 cprintf(
"Null classifier output!\n");
603 *char_choices += match_result;
605 bit_count = index - 1;
607 bool acceptable =
false;
608 bool replaced =
false;
609 bool best_choice_updated =
612 if (best_choice_updated &&
623 bool best_choice_acceptable =
false;
630 &best_choice_acceptable);
667 BLOB_CHOICE_IT blob_choice_it(best_char_choices->
get(i));
670 for (blob_choice_it.mark_cycle_pt(); !blob_choice_it.cycled_list();
671 blob_choice_it.forward()) {
672 if (!(
getDict().getUnicharset().get_fragment(
673 blob_choice_it.data()->unichar_id()))) {
674 first_choice = blob_choice_it.data();
687 debug =
"Best choice is: incorrect, top choice, dictionary word";
688 debug +=
" with permuter ";
691 debug =
"Classifier/Old LM tradeoff is to blame";
700 if (ratings ==
NULL) {
706 if (ratings !=
NULL) {
708 tprintf(
"Final Ratings Matrix:\n");
727 return best_char_choices;
746 bool *best_choice_acceptable) {
749 bool updated_best_choice =
false;
759 updated_best_choice =
779 bool replaced =
false;
780 if ((updated_best_choice &&
781 (*best_choice_acceptable =
789 if (updated_best_choice)
CopyCharChoices(*char_choices, best_char_choices);
802 float rating_ceiling,
803 bool split_next_to_fragment) {
804 BLOB_CHOICE_IT blob_choice_it;
806 BLOB_CHOICE_IT temp_it;
809 int worst_index = -1;
811 int worst_index_near_fragment = -1;
816 cprintf(
"rating_ceiling = %8.4f\n", rating_ceiling);
818 cprintf(
"rating_ceiling = No Limit\n");
821 if (split_next_to_fragment && char_choices.
length() > 0) {
823 if (char_choices.
get(0) !=
NULL) {
824 temp_it.set_to_list(char_choices.
get(0));
826 temp_it.data()->unichar_id());
832 for (x = 0; x < char_choices.
length(); ++x) {
833 if (char_choices.
get(x) ==
NULL) {
834 if (fragments !=
NULL) {
839 blob_choice_it.set_to_list(char_choices.
get(x));
840 blob_choice = blob_choice_it.data();
842 if (split_next_to_fragment && x+1 < char_choices.
length()) {
843 if (char_choices.
get(x+1) !=
NULL) {
844 temp_it.set_to_list(char_choices.
get(x+1));
846 temp_it.data()->unichar_id());
848 fragments[x+1] =
NULL;
851 if (blob_choice->
rating() < rating_ceiling &&
854 if (blob_choice->
rating() > worst) {
856 worst = blob_choice->
rating();
858 if (split_next_to_fragment) {
860 bool expand_following_fragment =
861 (x + 1 < char_choices.
length() &&
863 bool expand_preceding_fragment =
864 (x > 0 && fragments[x-1] !=
NULL && !fragments[x-1]->
is_ending());
865 if ((expand_following_fragment || expand_preceding_fragment) &&
866 blob_choice->
rating() > worst_near_fragment) {
867 worst_index_near_fragment = x;
868 worst_near_fragment = blob_choice->
rating();
870 cprintf(
"worst_index_near_fragment=%d"
871 " expand_following_fragment=%d"
872 " expand_preceding_fragment=%d\n",
873 worst_index_near_fragment,
874 expand_following_fragment,
875 expand_preceding_fragment);
882 if (fragments !=
NULL) {
887 return worst_index_near_fragment != -1 ?
888 worst_index_near_fragment : worst_index;
901 for (
int i = 0; i < fixpt->
size(); i++) {
902 if ((*fixpt)[i].begin == (*fixpt)[i].end &&
903 (*fixpt)[i].dangerous &&
904 (*fixpt)[i].correct_is_ngram) {
905 return (*fixpt)[i].begin;
919 assert(blamer_bundle !=
NULL);
925 bool missing_chop =
false;
929 while (b < blamer_bundle->truth_word.length() && curr_blob !=
NULL) {
933 curr_blob = curr_blob->
next;
940 curr_blob = curr_blob->
next;
944 if (missing_chop || b < blamer_bundle->norm_truth_word.length()) {
946 char debug_buffer[256];
948 sprintf(debug_buffer,
"Detected missing chop (tolerance=%d) at ",
950 debug += debug_buffer;
952 debug.
add_str_int(
"\nNo chop for truth at x=", truth_x);
956 debug +=
" truth box(es)";
958 debug +=
"\nMaximally chopped word boxes:\n";
960 curr_blob = curr_blob->
next) {
962 sprintf(debug_buffer,
"(%d,%d)->(%d,%d)\n",
964 debug += debug_buffer;
966 debug +=
"Truth bounding boxes:\n";
969 sprintf(debug_buffer,
"(%d,%d)->(%d,%d)\n",
971 debug += debug_buffer;
994 BLOB_CHOICE_IT blob_choice_it;
1000 chunks_record.
chunks = blobs;
1006 for (x = 0; x < num_chunks; x++) {
1011 blob_choice_it.set_to_list(choices);
1013 if (blob_choice_it.data()->certainty() == 0) {
1017 -(
inT16) (10 * blob_choice_it.data()->rating() /
1018 blob_choice_it.data()->certainty());
1021 chunks_record.
weights = blob_weights;
1026 if (!only_create_ratings_matrix) {
1033 state, fixpt, best_state);
void delete_data_pointers()
bool ChoiceIsCorrect(const UNICHARSET &uni_set, const WERD_CHOICE *choice, const GenericVector< STRING > &truth_text)
inT16 select_blob_to_split_from_fixpt(DANGERR *fixpt)
void preserve_outline(EDGEPT *start)
WIDTH_RECORD * char_widths
bool improve_one_blob(WERD_RES *word_res, BLOB_CHOICE_LIST_VECTOR *char_choices, inT32 *blob_number, SEAMS *seam_list, DANGERR *fixpt, bool split_next_to_fragment, BlamerBundle *blamer_bundle)
const char *const id_to_unichar(UNICHAR_ID id) const
IncorrectResultReason incorrect_result_reason
const UNICHAR_ID unichar_to_id(const char *const unichar_repr) const
int shared_split_points(SEAM *seam1, SEAM *seam2)
void restore_outline_tree(TESSLINE *srcline)
void set_chopper_blame(WERD_RES *word)
void SegSearch(CHUNKS_RECORD *chunks_record, WERD_CHOICE *best_choice, BLOB_CHOICE_LIST_VECTOR *best_char_choices, WERD_CHOICE *raw_choice, STATE *output_best_state, BlamerBundle *blamer_bundle)
inT16 check_seam_order(TBLOB *blob, SEAM *seam)
void best_first_search(CHUNKS_RECORD *chunks_record, BLOB_CHOICE_LIST_VECTOR *best_char_choices, WERD_RES *word, STATE *state, DANGERR *fixpt, STATE *best_state)
bool chop_one_blob(TWERD *word, BLOB_CHOICE_LIST_VECTOR *char_choices, inT32 *blob_number, SEAMS *seam_list, int *right_chop_index)
void SaveAltChoices(const LIST &best_choices, WERD_RES *word)
bool best_choice_is_dict_and_top_choice
BLOB_CHOICE_LIST * get_piece_rating(MATRIX *ratings, TBLOB *blobs, const DENORM &denorm, SEAMS seams, inT16 start, inT16 end, BlamerBundle *blamer_bundle)
bool contains(const FCOORD pt) const
double tessedit_certainty_threshold
bool divisible_blob(TBLOB *blob, bool italic_blob, TPOINT *location)
MATRIX * word_associator(bool only_create_ratings_matrtix, WERD_RES *word, STATE *state, BLOB_CHOICE_LIST_VECTOR *best_char_choices, DANGERR *fixpt, STATE *best_state)
const UNICHARSET & getUnicharset() const
void CopyCharChoices(const BLOB_CHOICE_LIST_VECTOR &from, BLOB_CHOICE_LIST_VECTOR *to)
void append_debug(STRING *str) const
SEAMS insert_seam(SEAMS seam_list, int index, SEAM *seam, TBLOB *left_blob, TBLOB *first_blob)
bool enable_new_segsearch
void print_seams(const char *label, SEAMS seams)
inT16 select_blob_to_split(const BLOB_CHOICE_LIST_VECTOR &char_choices, float rating_ceiling, bool split_next_to_fragment)
void print_state(const char *label, STATE *state, int num_joints)
void DenormTransform(const TPOINT &pt, TPOINT *original) const
WIDTH_RECORD * blobs_widths(TBLOB *blobs)
GenericVector< BLOB_CHOICE_LIST * > BLOB_CHOICE_LIST_VECTOR
MATRIX * record_piece_ratings(TBLOB *blobs)
void CallFillLattice(const MATRIX &ratings, const LIST &best_choices, const UNICHARSET &unicharset, BlamerBundle *blamer_bundle)
bool fragments_guide_chopper
int check_blob(TBLOB *blob)
void insert(T t, int index)
void add_str_int(const char *str, int number)
inT16 BLOB_WEIGHTS[MAX_NUM_CHUNKS]
SEAM * chop_overlapping_blob(const GenericVector< TBOX > &boxes, WERD_RES *word_res, inT32 *blob_number, bool italic_blob, SEAMS seam_list)
void set_n_ones(STATE *state, int n)
BLOB_CHOICE_LIST_VECTOR * chop_word_main(WERD_RES *word)
tesseract::BoxWord norm_truth_word
const UNICHARSET * uch_set
void preserve_outline_tree(TESSLINE *srcline)
BLOB_CHOICE_LIST * fake_classify_blob(UNICHAR_ID class_id, float rating, float certainty)
#define is_split_outline(outline, split)
GenericVector< STRING > truth_text
bool permute_characters(const BLOB_CHOICE_LIST_VECTOR &char_choices, WERD_CHOICE *best_choice, WERD_CHOICE *raw_choice)
SEAM * chop_numbered_blob(TWERD *word, inT32 blob_number, bool italic_blob, SEAMS seam_list)
const CHAR_FRAGMENT * get_fragment(UNICHAR_ID unichar_id) const
SEAM * attempt_blob_chop(TWERD *word, TBLOB *blob, inT32 blob_number, bool italic_blob, SEAMS seam_list)
inT16 total_containment(TBLOB *blob1, TBLOB *blob2)
void SetBlame(IncorrectResultReason irr, const STRING &msg, const WERD_CHOICE *choice, bool debug)
bool test_insert_seam(SEAMS seam_list, int index, TBLOB *left_blob, TBLOB *first_blob)
void delete_seam(void *arg)
void improve_by_chopping(WERD_RES *word, BLOB_CHOICE_LIST_VECTOR *char_choices, STATE *best_state, BLOB_CHOICE_LIST_VECTOR *best_char_choices, DANGERR *fixpt, bool *updated_best_choice)
DLLSYM void tprintf(const char *format,...)
bool chop_one_blob2(const GenericVector< TBOX > &boxes, WERD_RES *word_res, SEAMS *seam_list)
void undo_seam(TBLOB *blob, TBLOB *other_blob, SEAM *seam)
void insert_new_chunk(register STATE *state, register int index, register int num_joints)
int count_blobs(TBLOB *blobs)
bool fragment_mark() const
void delete_matrix_pointers()
bool wordrec_debug_blamer
void LogNewSplit(int Blob)
void print(const UNICHARSET &unicharset) const
TBOX bounding_box() const
void apply_seam(TBLOB *blob, TBLOB *other_blob, bool italic_blob, SEAM *seam)
BLOB_CHOICE_LIST_VECTOR * rebuild_current_state(WERD_RES *word, STATE *state, BLOB_CHOICE_LIST_VECTOR *char_choices, MATRIX *ratings)
const char * permuter_name() const
double overlap_fraction(const TBOX &box) const
bool truth_has_char_boxes
const TBOX & BlobBox(int index) const
WIDTH_RECORD * chunk_widths
void display_blob(TBLOB *blob, C_COL color)
bool almost_equal(const TBOX &box, int tolerance) const
void update_blob_classifications(TWERD *word, const BLOB_CHOICE_LIST_VECTOR &choices)
bool wordrec_enable_assoc
EDGEPT * restore_outline(EDGEPT *start)
void modify_blob_choice(BLOB_CHOICE_LIST *answer, int chop_index)
SEAM * new_seam(PRIORITY priority, const TPOINT &location, SPLIT *split1, SPLIT *split2, SPLIT *split3)
int any_shared_split_points(SEAMS seam_list, SEAM *seam)
#define array_value(a, i)
bool is_beginning() const
SEAM * pick_good_seam(TBLOB *blob)
void cprintf(const char *format,...)
void print_seam(const char *label, SEAM *seam)
static bool valid_word_permuter(uinT8 perm, bool numbers_ok)
Check all the DAWGs to see if this word is in any of them.
const UNICHAR_ID unichar_id(int index) const
BlamerBundle * blamer_bundle
WERD_CHOICE * best_choice
BLOB_CHOICE_LIST * classify_blob(TBLOB *blob, const DENORM &denorm, const char *string, C_COL color, BlamerBundle *blamer_bundle)
int repair_unchopped_blobs