49 STRING output_fname = fname;
50 const char *lastdot = strrchr(output_fname.
string(),
'.');
51 if (lastdot !=
NULL) output_fname[lastdot - output_fname.
string()] =
'\0';
52 output_fname +=
".txt";
73 if (tbox->
left() < 0) {
93 const char *lastdot = strrchr(box_fname.
string(),
'.');
94 if (lastdot !=
NULL) box_fname[lastdot - box_fname.
string()] =
'\0';
101 page_res_it.restart_page();
109 int examined_words = 0;
111 keep_going =
read_t(&page_res_it, &tbox);
118 read_t(&page_res_it, &tbox) :
123 keep_going = (bbox.
left() > tbox.
left()) ?
read_t(&page_res_it, &tbox) :
131 page_res_it.prev_row(),
132 page_res_it.prev_block(),
133 label.
string(), output_file);
136 }
while (keep_going);
143 for (page_res_it.restart_page(); page_res_it.block() !=
NULL;
144 page_res_it.forward()) {
145 if (page_res_it.word()) {
146 if (page_res_it.word()->uch_set ==
NULL)
151 if (examined_words < 0.85 * total_words) {
152 tprintf(
"TODO(antonova): clean up recog_training_segmented; "
153 " It examined only a small fraction of the ambigs image.\n");
155 tprintf(
"recog_training_segmented: examined %d / %d words.\n",
156 examined_words, total_words);
177 int label_num_unichars = 0;
179 for (offset = 0; label[offset] !=
'\0' && step > 0;
181 offset += step, ++label_num_unichars);
183 tprintf(
"Not outputting illegal unichar %s\n", label);
188 if (label_num_unichars == 1 && best_choice->
blob_choices()->length() == 1) {
189 BLOB_CHOICE_LIST_C_IT outer_blob_choice_it;
190 outer_blob_choice_it.set_to_list(best_choice->
blob_choices());
191 BLOB_CHOICE_IT blob_choice_it;
192 blob_choice_it.set_to_list(outer_blob_choice_it.data());
193 for (blob_choice_it.mark_cycle_pt();
194 !blob_choice_it.cycled_list();
195 blob_choice_it.forward()) {
197 if (blob_choice->
unichar_id() != INVALID_UNICHAR_ID) {
198 fprintf(output_file,
"%s\t%s\t%.4f\t%.4f\n",
const char *const id_to_unichar(UNICHAR_ID id) const
BLOB_CHOICE_LIST_CLIST * blob_choices()
int tessedit_tess_adaption_mode
FILE * open_file(const char *filename, const char *mode)
UNICHAR_ID unichar_id() const
void recog_training_segmented(const STRING &fname, PAGE_RES *page_res, volatile ETEXT_DESC *monitor, FILE *output_file)
bool read_t(PAGE_RES_IT *page_res_it, TBOX *tbox)
const inT16 kMaxBoxEdgeDiff
BLOCK_RES * block() const
const UNICHARSET * uch_set
void classify_word_pass1(BLOCK *block, ROW *row, WERD_RES *word)
const char * string() const
bool ReadNextBox(int *line_number, FILE *box_file, STRING *utf8_str, TBOX *bounding_box)
FILE * init_recog_training(const STRING &fname)
void ambigs_classify_and_output(WERD_RES *werd_res, ROW_RES *row_res, BLOCK_RES *block_res, const char *label, FILE *output_file)
DLLSYM void tprintf(const char *format,...)
void PrintAmbigAlternatives(FILE *file, const char *label, int label_num_unichars)
Print all the choices in raw_choices_ list for non 1-1 ambiguities.
bool tessedit_ambigs_training
void rotate(const FCOORD &vec)
int step(const char *str) const
bool stopper_no_acceptable_choices
bool tessedit_enable_doc_dict
WERD_CHOICE * best_choice