21 #pragma warning(disable:4244) // Conversion warnings
44 #define EPAPER_EXT ".ep"
45 #define PAGE_YSIZE 3508
46 #define CTRL_INSET '\024' //dc4=text inset
47 #define CTRL_FONT '\016' //so=font change
48 #define CTRL_DEFAULT '\017' //si=default font
49 #define CTRL_SHIFT '\022' //dc2=x shift
50 #define CTRL_TAB '\011' //tab
51 #define CTRL_NEWLINE '\012' //newline
52 #define CTRL_HARDLINE '\015' //cr
67 pts = pixels * 72.0 / pix_res;
68 return (
inT32) (pts + 0.5);
74 const TBOX *target_word_box) {
82 block_of_last_word =
NULL;
83 while (page_res_it.
word () !=
NULL) {
90 FCOORD center_pt((current_word_box.
right()+current_word_box.
left())/2,(current_word_box.
bottom()+current_word_box.
top())/2);
91 if (!target_word_box->
contains(center_pt))
99 block_of_last_word != page_res_it.
block ()) {
100 block_of_last_word = page_res_it.
block ();
101 block_id = block_of_last_word->
block->
index();
120 nextword, nextblock), force_eol);
149 int ep_chars_index = 0;
169 txt_chs[txt_index] =
' ';
170 map_chs[txt_index++] =
'1';
171 ep_chars[ep_chars_index++] =
' ';
180 txt_chs[txt_index] = unrecognised;
182 map_chs[txt_index++] =
'1';
183 ep_chars[ep_chars_index++] = unrecognised;
186 map_chs[txt_index++] =
'0';
193 ep_chars[ep_chars_index++] = 1;
195 ep_chars[ep_chars_index++] = 1;
197 ep_chars[ep_chars_index++] = 2;
199 ep_chars[ep_chars_index++] = 1;
201 ep_chars[ep_chars_index++] = 1;
210 txt_chs[txt_index] =
'\n';
211 map_chs[txt_index++] =
'\n';
213 ep_chars[ep_chars_index++] = newline_type;
220 txt_chs[txt_index] =
'\0';
221 map_chs[txt_index] =
'\0';
222 ep_chars[ep_chars_index] =
'\0';
249 if (!blob_choices_it.empty())
delete blob_choices_it.extract();
274 tprintf (
"Dict word: \"%s\": %d\n",
279 repetition_code =
"|^~R";
280 wordstr_lengths =
"\001\001\001\001";
283 wordstr = &repetition_code;
289 word->
reject_map[i].setrej_minimal_rej_accept();
297 word->
reject_map[i].setrej_minimal_rej_accept();
325 if (next_word ==
NULL || next_block ==
NULL || block != next_block)
327 if (next_word->
space () > 0)
333 end_gap = block_box.
right () - word_box.
right ();
335 width = next_box.
right () - next_box.
left ();
354 if (i < word->reject_map.length()) {
379 for (i = 0; i < len; i++) {
381 word_res->
reject_map[i].setrej_minimal_rej_accept();
394 for (i = 0; i < len; ++i) {
397 word_res->
reject_map[i].setrej_minimal_rej_accept();
408 for (i = 0; i < len; ++i) {
411 word_res->
reject_map[i].setrej_minimal_rej_accept();
415 for (i = 0; i < len; i++) {
418 word_res->
reject_map[i].setrej_minimal_rej_accept();
420 word_res->
reject_map[i].setrej_minimal_rej_accept();
422 word_res->
reject_map[i].setrej_minimal_rej_accept();
431 for (i = 0; i < len; i++) {
435 word_res->
reject_map[i].setrej_minimal_rej_accept();
439 word_res->
reject_map[i].setrej_minimal_rej_accept();
451 for (i = 0; i < len; i++) {
457 word_res->
reject_map[i].setrej_minimal_rej_accept();
466 for (
int i = 0; i < word.
length(); ++i) {
476 for (
int i = 0; i < word.
length(); ++i) {
486 const char *lengths) {
489 if (*lengths == 1 && *s ==
'(')
493 ((*s ==
'$') || (*s ==
'.') || (*s ==
'+') || (*s ==
'-')))
496 for (; *s !=
'\0'; s += *(lengths++)) {
499 else if (prev_digit &&
500 (*lengths == 1 && ((*s ==
'.') || (*s ==
',') || (*s ==
'-'))))
502 else if (prev_digit && *lengths == 1 &&
503 (*(s + *lengths) ==
'\0') && ((*s ==
'%') || (*s ==
')')))
505 else if (prev_digit &&
506 *lengths == 1 && (*s ==
'%') &&
507 (*(lengths + 1) == 1 && *(s + *lengths) ==
')') &&
508 (*(s + *lengths + *(lengths + 1)) ==
'\0'))
const STRING & unichar_string() const
inT16 count_alphas(const WERD_CHOICE &word)
bool get_isalpha(UNICHAR_ID unichar_id) const
const char *const id_to_unichar(UNICHAR_ID id) const
BLOB_CHOICE_LIST_CLIST * blob_choices()
const UNICHAR_ID unichar_to_id(const char *const unichar_repr) const
bool last_char_was_newline
bool suspect_constrain_1Il
const STRING debug_string() const
bool tessedit_rejection_debug
bool contains(const FCOORD pt) const
bool tessedit_write_block_separators
BLOCK_RES * next_block() const
WERD_RES * restart_page()
void output_pass(PAGE_RES_IT &page_res_it, const TBOX *target_word_box)
void write_results(PAGE_RES_IT &page_res_it, char newline_type, BOOL8 force_eol)
void DeleteBox(int index)
bool get_isdigit(UNICHAR_ID unichar_id) const
UNICHAR_ID get_rep_char(WERD_RES *word)
BOOL8 check_debug_pt(WERD_RES *word, int location)
inT32 pixels_to_pts(inT32 pixels, inT32 pix_res)
BOOL8 flag(WERD_FLAGS mask) const
void remove_pos(inT16 pos)
BLOCK_RES * block() const
bool write_results_empty_block
inT16 safe_dict_word(const WERD_RES *werd_res)
inT16 count_alphanums(const WERD_CHOICE &word)
const UNICHARSET * uch_set
WERD_RES * next_word() const
void set_unlv_suspects(WERD_RES *word)
const char * string() const
int dict_word(const WERD_CHOICE &word)
bool tessedit_zero_rejection
bool tessedit_word_for_word
bool unlv_tilde_crunching
bool eq(UNICHAR_ID unichar_id, const char *const unichar_repr) const
void remove_unichar_id(int index)
DLLSYM void tprintf(const char *format,...)
bool tessedit_minimal_rejection
inT16 space() const
return spacing
const STRING & unichar_lengths() const
tesseract::BoxWord * box_word
bool tilde_crunch_written
BOOL8 acceptable_number_string(const char *s, const char *lengths)
double suspect_rating_per_ch
char determine_newline_type(WERD *word, BLOCK *block, WERD *next_word, BLOCK *next_block)
void bounding_box(ICOORD &bottom_left, ICOORD &top_right) const
get box
CRUNCH_MODE unlv_crunch_mode
ACCEPTABLE_WERD_TYPE acceptable_word_string(const UNICHARSET &char_set, const char *s, const char *lengths)
const UNICHARSET * unicharset() const
const UNICHAR_ID unichar_id(int index) const
bool tessedit_write_rep_codes
double suspect_accept_rating
WERD_CHOICE * best_choice
bool tessedit_zero_kelvin_rejection