Tesseract  3.02
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
fixspace.cpp File Reference
#include "mfcpch.h"
#include <ctype.h>
#include "reject.h"
#include "statistc.h"
#include "control.h"
#include "fixspace.h"
#include "genblob.h"
#include "tessvars.h"
#include "tessbox.h"
#include "secname.h"
#include "globals.h"
#include "tesseractclass.h"

Go to the source code of this file.

Namespaces

 tesseract
 

Macros

#define PERFECT_WERDS   999
 
#define MAXSPACING   128 /*max expected spacing in pix */
 

Functions

void initialise_search (WERD_RES_LIST &src_list, WERD_RES_LIST &new_list)
 
transform_to_next_perm()

Examines the current word list to find the smallest word gap size. Then walks the word list closing any gaps of this size by either inserted new combination words, or extending existing ones.

The routine COULD be limited to stop it building words longer than N blobs.

If there are no more gaps then it DELETES the entire list and returns the empty list to cause termination.

void transform_to_next_perm (WERD_RES_LIST &words)
 
void fixspace_dbg (WERD_RES *word)
 

Macro Definition Documentation

#define MAXSPACING   128 /*max expected spacing in pix */

Definition at line 36 of file fixspace.cpp.

#define PERFECT_WERDS   999

Definition at line 35 of file fixspace.cpp.

Function Documentation

void fixspace_dbg ( WERD_RES word)

Definition at line 879 of file fixspace.cpp.

879  {
880  TBOX box = word->word->bounding_box();
881  BOOL8 show_map_detail = FALSE;
882  inT16 i;
883 
884  box.print();
885  tprintf(" \"%s\" ", word->best_choice->unichar_string().string());
886  tprintf("Blob count: %d (word); %d/%d (rebuild word)\n",
887  word->word->cblob_list()->length(),
888  word->rebuild_word->NumBlobs(),
889  word->box_word->length());
890  word->reject_map.print(debug_fp);
891  tprintf("\n");
892  if (show_map_detail) {
893  tprintf("\"%s\"\n", word->best_choice->unichar_string().string());
894  for (i = 0; word->best_choice->unichar_string()[i] != '\0'; i++) {
895  tprintf("**** \"%c\" ****\n", word->best_choice->unichar_string()[i]);
896  word->reject_map[i].full_print(debug_fp);
897  }
898  }
899 
900  tprintf("Tess Accepted: %s\n", word->tess_accepted ? "TRUE" : "FALSE");
901  tprintf("Done flag: %s\n\n", word->done ? "TRUE" : "FALSE");
902 }
TWERD * rebuild_word
Definition: pageres.h:381
void full_print(FILE *fp)
Definition: rejctmap.cpp:412
const STRING & unichar_string() const
Definition: ratngs.h:395
const int length() const
Definition: boxword.h:99
TBOX bounding_box()
Definition: werd.cpp:164
C_BLOB_LIST * cblob_list()
Definition: werd.h:100
BOOL8 done
Definition: pageres.h:419
void print(FILE *fp)
Definition: rejctmap.cpp:400
unsigned char BOOL8
Definition: host.h:113
int NumBlobs() const
Definition: blobs.h:263
REJMAP reject_map
Definition: pageres.h:408
Definition: rect.h:29
#define FALSE
Definition: capi.h:28
WERD * word
Definition: pageres.h:334
const char * string() const
Definition: strngs.cpp:156
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:41
short inT16
Definition: host.h:100
tesseract::BoxWord * box_word
Definition: pageres.h:387
BOOL8 tess_accepted
Definition: pageres.h:417
void print() const
Definition: rect.h:263
FILE * debug_fp
Definition: tessvars.cpp:25
WERD_CHOICE * best_choice
Definition: pageres.h:359
void initialise_search ( WERD_RES_LIST &  src_list,
WERD_RES_LIST &  new_list 
)

Definition at line 178 of file fixspace.cpp.

178  {
179  WERD_RES_IT src_it(&src_list);
180  WERD_RES_IT new_it(&new_list);
181  WERD_RES *src_wd;
182  WERD_RES *new_wd;
183 
184  for (src_it.mark_cycle_pt(); !src_it.cycled_list(); src_it.forward()) {
185  src_wd = src_it.data();
186  if (!src_wd->combination) {
187  new_wd = new WERD_RES(*src_wd);
188  new_wd->combination = FALSE;
189  new_wd->part_of_combo = FALSE;
190  new_it.add_after_then_move(new_wd);
191  }
192  }
193 }
BOOL8 part_of_combo
Definition: pageres.h:451
BOOL8 combination
Definition: pageres.h:450
#define FALSE
Definition: capi.h:28
void transform_to_next_perm ( WERD_RES_LIST &  words)

Definition at line 373 of file fixspace.cpp.

373  {
374  WERD_RES_IT word_it(&words);
375  WERD_RES_IT prev_word_it(&words);
376  WERD_RES *word;
377  WERD_RES *prev_word;
378  WERD_RES *combo;
379  WERD *copy_word;
380  inT16 prev_right = -MAX_INT16;
381  TBOX box;
382  inT16 gap;
383  inT16 min_gap = MAX_INT16;
384 
385  for (word_it.mark_cycle_pt(); !word_it.cycled_list(); word_it.forward()) {
386  word = word_it.data();
387  if (!word->part_of_combo) {
388  box = word->word->bounding_box();
389  if (prev_right > -MAX_INT16) {
390  gap = box.left() - prev_right;
391  if (gap < min_gap)
392  min_gap = gap;
393  }
394  prev_right = box.right();
395  }
396  }
397  if (min_gap < MAX_INT16) {
398  prev_right = -MAX_INT16; // back to start
399  word_it.set_to_list(&words);
400  // Note: we can't use cycle_pt due to inserted combos at start of list.
401  for (; (prev_right == -MAX_INT16) || !word_it.at_first();
402  word_it.forward()) {
403  word = word_it.data();
404  if (!word->part_of_combo) {
405  box = word->word->bounding_box();
406  if (prev_right > -MAX_INT16) {
407  gap = box.left() - prev_right;
408  if (gap <= min_gap) {
409  prev_word = prev_word_it.data();
410  if (prev_word->combination) {
411  combo = prev_word;
412  } else {
413  /* Make a new combination and insert before
414  * the first word being joined. */
415  copy_word = new WERD;
416  *copy_word = *(prev_word->word);
417  // deep copy
418  combo = new WERD_RES(copy_word);
419  combo->combination = TRUE;
420  combo->x_height = prev_word->x_height;
421  prev_word->part_of_combo = TRUE;
422  prev_word_it.add_before_then_move(combo);
423  }
424  combo->word->set_flag(W_EOL, word->word->flag(W_EOL));
425  if (word->combination) {
426  combo->word->join_on(word->word);
427  // Move blobs to combo
428  // old combo no longer needed
429  delete word_it.extract();
430  } else {
431  // Copy current wd to combo
432  combo->copy_on(word);
433  word->part_of_combo = TRUE;
434  }
435  combo->done = FALSE;
436  combo->ClearResults();
437  } else {
438  prev_word_it = word_it; // catch up
439  }
440  }
441  prev_right = box.right();
442  }
443  }
444  } else {
445  words.clear(); // signal termination
446  }
447 }
void ClearResults()
Definition: pageres.cpp:799
TBOX bounding_box()
Definition: werd.cpp:164
BOOL8 done
Definition: pageres.h:419
void join_on(WERD *other)
Definition: werd.cpp:201
BOOL8 part_of_combo
Definition: pageres.h:451
inT16 left() const
Definition: rect.h:67
BOOL8 combination
Definition: pageres.h:450
Definition: rect.h:29
#define FALSE
Definition: capi.h:28
inT16 right() const
Definition: rect.h:74
BOOL8 flag(WERD_FLAGS mask) const
Definition: werd.h:122
WERD * word
Definition: pageres.h:334
float x_height
Definition: pageres.h:431
void set_flag(WERD_FLAGS mask, BOOL8 value)
Definition: werd.h:123
Definition: werd.h:60
short inT16
Definition: host.h:100
#define MAX_INT16
Definition: host.h:119
Definition: werd.h:36
void copy_on(WERD_RES *word_res)
Definition: pageres.h:674
#define TRUE
Definition: capi.h:27