Tesseract  3.02
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
tordmain.cpp File Reference
#include "mfcpch.h"
#include "stderr.h"
#include "globaloc.h"
#include "blread.h"
#include "blobbox.h"
#include "ccstruct.h"
#include "edgblob.h"
#include "drawtord.h"
#include "makerow.h"
#include "wordseg.h"
#include "imgs.h"
#include "textord.h"
#include "tordmain.h"
#include "secname.h"
#include "allheaders.h"

Go to the source code of this file.

Namespaces

 tesseract
 

Macros

#define EXTERN
 
#define MAX_NEAREST_DIST   600
 

Functions

void SetBlobStrokeWidth (Pix *pix, BLOBNBOX *blob)
 
void assign_blobs_to_blocks2 (Pix *pix, BLOCK_LIST *blocks, TO_BLOCK_LIST *port_blocks)
 
void tweak_row_baseline (ROW *row, double blshift_maxshift, double blshift_xfraction)
 
inT32 blob_y_order (void *item1, void *item2)
 

Variables

const ERRCODE BLOCKLESS_BLOBS = "Warning:some blobs assigned to no block"
 

Macro Definition Documentation

#define EXTERN

Definition at line 47 of file tordmain.cpp.

#define MAX_NEAREST_DIST   600

Definition at line 49 of file tordmain.cpp.

Function Documentation

void assign_blobs_to_blocks2 ( Pix *  pix,
BLOCK_LIST *  blocks,
TO_BLOCK_LIST *  port_blocks 
)

Definition at line 156 of file tordmain.cpp.

158  { // output list
159  BLOCK *block; // current block
160  BLOBNBOX *newblob; // created blob
161  C_BLOB *blob; // current blob
162  BLOCK_IT block_it = blocks;
163  C_BLOB_IT blob_it; // iterator
164  BLOBNBOX_IT port_box_it; // iterator
165  // destination iterator
166  TO_BLOCK_IT port_block_it = port_blocks;
167  TO_BLOCK *port_block; // created block
168 
169  for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
170  block = block_it.data();
171  port_block = new TO_BLOCK(block);
172 
173  // Convert the good outlines to block->blob_list
174  port_box_it.set_to_list(&port_block->blobs);
175  blob_it.set_to_list(block->blob_list());
176  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
177  blob = blob_it.extract();
178  newblob = new BLOBNBOX(blob); // Convert blob to BLOBNBOX.
179  SetBlobStrokeWidth(pix, newblob);
180  port_box_it.add_after_then_move(newblob);
181  }
182 
183  // Put the rejected outlines in block->noise_blobs, which allows them to
184  // be reconsidered and sorted back into rows and recover outlines mistakenly
185  // rejected.
186  port_box_it.set_to_list(&port_block->noise_blobs);
187  blob_it.set_to_list(block->reject_blobs());
188  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
189  blob = blob_it.extract();
190  newblob = new BLOBNBOX(blob); // Convert blob to BLOBNBOX.
191  SetBlobStrokeWidth(pix, newblob);
192  port_box_it.add_after_then_move(newblob);
193  }
194 
195  port_block_it.add_after_then_move(port_block);
196  }
197 }
BLOBNBOX_LIST noise_blobs
Definition: blobbox.h:737
BLOBNBOX_LIST blobs
Definition: blobbox.h:735
void SetBlobStrokeWidth(Pix *pix, BLOBNBOX *blob)
Definition: tordmain.cpp:56
C_BLOB_LIST * reject_blobs()
Definition: ocrblock.h:136
C_BLOB_LIST * blob_list()
get blobs
Definition: ocrblock.h:133
Definition: ocrblock.h:31
inT32 blob_y_order ( void *  item1,
void *  item2 
)

Definition at line 793 of file tordmain.cpp.

795  {
796  //converted ptr
797  BLOBNBOX *blob1 = *(BLOBNBOX **) item1;
798  //converted ptr
799  BLOBNBOX *blob2 = *(BLOBNBOX **) item2;
800 
801  if (blob1->bounding_box ().bottom () > blob2->bounding_box ().bottom ())
802  return -1;
803  else if (blob1->bounding_box ().bottom () <
804  blob2->bounding_box ().bottom ())
805  return 1;
806  else {
807  if (blob1->bounding_box ().left () < blob2->bounding_box ().left ())
808  return -1;
809  else if (blob1->bounding_box ().left () >
810  blob2->bounding_box ().left ())
811  return 1;
812  else
813  return 0;
814  }
815 }
const TBOX & bounding_box() const
Definition: blobbox.h:208
inT16 left() const
Definition: rect.h:67
inT16 bottom() const
Definition: rect.h:60
void SetBlobStrokeWidth ( Pix *  pix,
BLOBNBOX blob 
)

Definition at line 56 of file tordmain.cpp.

56  {
57  // Cut the blob rectangle into a Pix.
58  int pix_height = pixGetHeight(pix);
59  const TBOX& box = blob->bounding_box();
60  int width = box.width();
61  int height = box.height();
62  Box* blob_pix_box = boxCreate(box.left(), pix_height - box.top(),
63  width, height);
64  Pix* pix_blob = pixClipRectangle(pix, blob_pix_box, NULL);
65  boxDestroy(&blob_pix_box);
66  Pix* dist_pix = pixDistanceFunction(pix_blob, 4, 8, L_BOUNDARY_BG);
67  pixDestroy(&pix_blob);
68  // Compute the stroke widths.
69  uinT32* data = pixGetData(dist_pix);
70  int wpl = pixGetWpl(dist_pix);
71  // Horizontal width of stroke.
72  STATS h_stats(0, width + 1);
73  for (int y = 0; y < height; ++y) {
74  uinT32* pixels = data + y*wpl;
75  int prev_pixel = 0;
76  int pixel = GET_DATA_BYTE(pixels, 0);
77  for (int x = 1; x < width; ++x) {
78  int next_pixel = GET_DATA_BYTE(pixels, x);
79  // We are looking for a pixel that is equal to its vertical neighbours,
80  // yet greater than its left neighbour.
81  if (prev_pixel < pixel &&
82  (y == 0 || pixel == GET_DATA_BYTE(pixels - wpl, x - 1)) &&
83  (y == height - 1 || pixel == GET_DATA_BYTE(pixels + wpl, x - 1))) {
84  if (pixel > next_pixel) {
85  // Single local max, so an odd width.
86  h_stats.add(pixel * 2 - 1, 1);
87  } else if (pixel == next_pixel && x + 1 < width &&
88  pixel > GET_DATA_BYTE(pixels, x + 1)) {
89  // Double local max, so an even width.
90  h_stats.add(pixel * 2, 1);
91  }
92  }
93  prev_pixel = pixel;
94  pixel = next_pixel;
95  }
96  }
97  // Vertical width of stroke.
98  STATS v_stats(0, height + 1);
99  for (int x = 0; x < width; ++x) {
100  int prev_pixel = 0;
101  int pixel = GET_DATA_BYTE(data, x);
102  for (int y = 1; y < height; ++y) {
103  uinT32* pixels = data + y*wpl;
104  int next_pixel = GET_DATA_BYTE(pixels, x);
105  // We are looking for a pixel that is equal to its horizontal neighbours,
106  // yet greater than its upper neighbour.
107  if (prev_pixel < pixel &&
108  (x == 0 || pixel == GET_DATA_BYTE(pixels - wpl, x - 1)) &&
109  (x == width - 1 || pixel == GET_DATA_BYTE(pixels - wpl, x + 1))) {
110  if (pixel > next_pixel) {
111  // Single local max, so an odd width.
112  v_stats.add(pixel * 2 - 1, 1);
113  } else if (pixel == next_pixel && y + 1 < height &&
114  pixel > GET_DATA_BYTE(pixels + wpl, x)) {
115  // Double local max, so an even width.
116  v_stats.add(pixel * 2, 1);
117  }
118  }
119  prev_pixel = pixel;
120  pixel = next_pixel;
121  }
122  }
123  pixDestroy(&dist_pix);
124  // Store the horizontal and vertical width in the blob, keeping both
125  // widths if there is enough information, otherwse only the one with
126  // the most samples.
127  // If there are insufficent samples, store zero, rather than using
128  // 2*area/perimeter, as the numbers that gives do not match the numbers
129  // from the distance method.
130  if (h_stats.get_total() >= (width + height) / 4) {
131  blob->set_horz_stroke_width(h_stats.ile(0.5f));
132  if (v_stats.get_total() >= (width + height) / 4)
133  blob->set_vert_stroke_width(v_stats.ile(0.5f));
134  else
135  blob->set_vert_stroke_width(0.0f);
136  } else {
137  if (v_stats.get_total() >= (width + height) / 4 ||
138  v_stats.get_total() > h_stats.get_total()) {
139  blob->set_horz_stroke_width(0.0f);
140  blob->set_vert_stroke_width(v_stats.ile(0.5f));
141  } else {
142  blob->set_horz_stroke_width(h_stats.get_total() > 2 ? h_stats.ile(0.5f)
143  : 0.0f);
144  blob->set_vert_stroke_width(0.0f);
145  }
146  }
147 }
const TBOX & bounding_box() const
Definition: blobbox.h:208
void set_vert_stroke_width(float width)
Definition: blobbox.h:323
#define NULL
Definition: host.h:144
inT16 left() const
Definition: rect.h:67
void set_horz_stroke_width(float width)
Definition: blobbox.h:317
inT16 width() const
Definition: rect.h:104
Definition: rect.h:29
#define f(xc, yc)
Definition: imgscale.cpp:39
inT16 top() const
Definition: rect.h:53
Definition: statistc.h:29
unsigned int uinT32
Definition: host.h:103
inT16 height() const
Definition: rect.h:97
void tweak_row_baseline ( ROW row,
double  blshift_maxshift,
double  blshift_xfraction 
)

Definition at line 680 of file tordmain.cpp.

682  {
683  TBOX blob_box; //bounding box
684  C_BLOB *blob; //current blob
685  WERD *word; //current word
686  inT32 blob_count; //no of blobs
687  inT32 src_index; //source segment
688  inT32 dest_index; //destination segment
689  inT32 *xstarts; //spline segments
690  double *coeffs; //spline coeffs
691  float ydiff; //baseline error
692  float x_centre; //centre of blob
693  //words of row
694  WERD_IT word_it = row->word_list ();
695  C_BLOB_IT blob_it; //blob iterator
696 
697  blob_count = 0;
698  for (word_it.mark_cycle_pt (); !word_it.cycled_list (); word_it.forward ()) {
699  word = word_it.data (); //current word
700  //get total blobs
701  blob_count += word->cblob_list ()->length ();
702  }
703  if (blob_count == 0)
704  return;
705  xstarts =
706  (inT32 *) alloc_mem ((blob_count + row->baseline.segments + 1) *
707  sizeof (inT32));
708  coeffs =
709  (double *) alloc_mem ((blob_count + row->baseline.segments) * 3 *
710  sizeof (double));
711 
712  src_index = 0;
713  dest_index = 0;
714  xstarts[0] = row->baseline.xcoords[0];
715  for (word_it.mark_cycle_pt (); !word_it.cycled_list (); word_it.forward ()) {
716  word = word_it.data (); //current word
717  //blobs in word
718  blob_it.set_to_list (word->cblob_list ());
719  for (blob_it.mark_cycle_pt (); !blob_it.cycled_list ();
720  blob_it.forward ()) {
721  blob = blob_it.data ();
722  blob_box = blob->bounding_box ();
723  x_centre = (blob_box.left () + blob_box.right ()) / 2.0;
724  ydiff = blob_box.bottom () - row->base_line (x_centre);
725  if (ydiff < 0)
726  ydiff = -ydiff / row->x_height ();
727  else
728  ydiff = ydiff / row->x_height ();
729  if (ydiff < blshift_maxshift
730  && blob_box.height () / row->x_height () > blshift_xfraction) {
731  if (xstarts[dest_index] >= x_centre)
732  xstarts[dest_index] = blob_box.left ();
733  coeffs[dest_index * 3] = 0;
734  coeffs[dest_index * 3 + 1] = 0;
735  coeffs[dest_index * 3 + 2] = blob_box.bottom ();
736  //shift it
737  dest_index++;
738  xstarts[dest_index] = blob_box.right () + 1;
739  }
740  else {
741  if (xstarts[dest_index] <= x_centre) {
742  while (row->baseline.xcoords[src_index + 1] <= x_centre
743  && src_index < row->baseline.segments - 1) {
744  if (row->baseline.xcoords[src_index + 1] >
745  xstarts[dest_index]) {
746  coeffs[dest_index * 3] =
747  row->baseline.quadratics[src_index].a;
748  coeffs[dest_index * 3 + 1] =
749  row->baseline.quadratics[src_index].b;
750  coeffs[dest_index * 3 + 2] =
751  row->baseline.quadratics[src_index].c;
752  dest_index++;
753  xstarts[dest_index] =
754  row->baseline.xcoords[src_index + 1];
755  }
756  src_index++;
757  }
758  coeffs[dest_index * 3] =
759  row->baseline.quadratics[src_index].a;
760  coeffs[dest_index * 3 + 1] =
761  row->baseline.quadratics[src_index].b;
762  coeffs[dest_index * 3 + 2] =
763  row->baseline.quadratics[src_index].c;
764  dest_index++;
765  xstarts[dest_index] = row->baseline.xcoords[src_index + 1];
766  }
767  }
768  }
769  }
770  while (src_index < row->baseline.segments
771  && row->baseline.xcoords[src_index + 1] <= xstarts[dest_index])
772  src_index++;
773  while (src_index < row->baseline.segments) {
774  coeffs[dest_index * 3] = row->baseline.quadratics[src_index].a;
775  coeffs[dest_index * 3 + 1] = row->baseline.quadratics[src_index].b;
776  coeffs[dest_index * 3 + 2] = row->baseline.quadratics[src_index].c;
777  dest_index++;
778  src_index++;
779  xstarts[dest_index] = row->baseline.xcoords[src_index];
780  }
781  //turn to spline
782  row->baseline = QSPLINE (dest_index, xstarts, coeffs);
783  free_mem(xstarts);
784  free_mem(coeffs);
785 }
float b
Definition: quadratc.h:59
float c
Definition: quadratc.h:60
C_BLOB_LIST * cblob_list()
Definition: werd.h:100
void free_mem(void *oldchunk)
Definition: memry.cpp:56
inT16 left() const
Definition: rect.h:67
int inT32
Definition: host.h:102
Definition: rect.h:29
inT16 right() const
Definition: rect.h:74
void * alloc_mem(inT32 count)
Definition: memry.cpp:48
double a
Definition: quadratc.h:58
float base_line(float xpos) const
Definition: ocrrow.h:56
float x_height() const
Definition: ocrrow.h:61
Definition: werd.h:60
WERD_LIST * word_list()
Definition: ocrrow.h:52
TBOX bounding_box()
Definition: stepblob.cpp:192
inT16 height() const
Definition: rect.h:97
inT16 bottom() const
Definition: rect.h:60

Variable Documentation

const ERRCODE BLOCKLESS_BLOBS = "Warning:some blobs assigned to no block"

Definition at line 44 of file tordmain.cpp.