Tesseract  3.02
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
underlin.cpp File Reference
#include "mfcpch.h"
#include "underlin.h"

Go to the source code of this file.

Macros

#define PROJECTION_MARGIN   10
 
#define EXTERN
 

Functions

void restore_underlined_blobs (TO_BLOCK *block)
 
TO_ROWmost_overlapping_row (TO_ROW_LIST *rows, BLOBNBOX *blob)
 
void find_underlined_blobs (BLOBNBOX *u_line, QSPLINE *baseline, float xheight, float baseline_offset, ICOORDELT_LIST *chop_cells)
 
void vertical_cunderline_projection (C_OUTLINE *outline, QSPLINE *baseline, float xheight, float baseline_offset, STATS *lower_proj, STATS *middle_proj, STATS *upper_proj)
 

Variables

EXTERN double textord_underline_offset = 0.1
 
EXTERN bool textord_restore_underlines = TRUE
 

Macro Definition Documentation

#define EXTERN

Definition at line 27 of file underlin.cpp.

#define PROJECTION_MARGIN   10

Definition at line 26 of file underlin.cpp.

Function Documentation

void find_underlined_blobs ( BLOBNBOX u_line,
QSPLINE baseline,
float  xheight,
float  baseline_offset,
ICOORDELT_LIST *  chop_cells 
)

Definition at line 179 of file underlin.cpp.

185  {
186  inT16 x, y; //sides of blob
187  ICOORD blob_chop; //sides of blob
188  TBOX blob_box = u_line->bounding_box ();
189  //cell iterator
190  ICOORDELT_IT cell_it = chop_cells;
191  STATS upper_proj (blob_box.left (), blob_box.right () + 1);
192  STATS middle_proj (blob_box.left (), blob_box.right () + 1);
193  STATS lower_proj (blob_box.left (), blob_box.right () + 1);
194  C_OUTLINE_IT out_it; //outlines of blob
195 
196  ASSERT_HOST (u_line->cblob () != NULL);
197 
198  out_it.set_to_list (u_line->cblob ()->out_list ());
199  for (out_it.mark_cycle_pt (); !out_it.cycled_list (); out_it.forward ()) {
200  vertical_cunderline_projection (out_it.data (),
201  baseline, xheight, baseline_offset,
202  &lower_proj, &middle_proj, &upper_proj);
203  }
204 
205  for (x = blob_box.left (); x < blob_box.right (); x++) {
206  if (middle_proj.pile_count (x) > 0) {
207  for (y = x + 1;
208  y < blob_box.right () && middle_proj.pile_count (y) > 0; y++);
209  blob_chop = ICOORD (x, y);
210  cell_it.add_after_then_move (new ICOORDELT (blob_chop));
211  x = y;
212  }
213  }
214 }
C_BLOB * cblob() const
Definition: blobbox.h:245
const TBOX & bounding_box() const
Definition: blobbox.h:208
C_OUTLINE_LIST * out_list()
Definition: stepblob.h:42
#define NULL
Definition: host.h:144
inT16 left() const
Definition: rect.h:67
Definition: rect.h:29
inT16 right() const
Definition: rect.h:74
short inT16
Definition: host.h:100
Definition: statistc.h:29
integer coordinate
Definition: points.h:30
void vertical_cunderline_projection(C_OUTLINE *outline, QSPLINE *baseline, float xheight, float baseline_offset, STATS *lower_proj, STATS *middle_proj, STATS *upper_proj)
Definition: underlin.cpp:224
#define ASSERT_HOST(x)
Definition: errcode.h:84
TO_ROW* most_overlapping_row ( TO_ROW_LIST *  rows,
BLOBNBOX blob 
)

Definition at line 122 of file underlin.cpp.

125  {
126  inT16 x = (blob->bounding_box ().left ()
127  + blob->bounding_box ().right ()) / 2;
128  TO_ROW_IT row_it = rows; //row iterator
129  TO_ROW *row; //current row
130  TO_ROW *best_row; //output row
131  float overlap; //of blob & row
132  float bestover; //best overlap
133 
134  best_row = NULL;
135  bestover = (float) -MAX_INT32;
136  if (row_it.empty ())
137  return NULL;
138  row = row_it.data ();
139  row_it.mark_cycle_pt ();
140  while (row->baseline.y (x) + row->descdrop > blob->bounding_box ().top ()
141  && !row_it.cycled_list ()) {
142  best_row = row;
143  bestover =
144  blob->bounding_box ().top () - row->baseline.y (x) + row->descdrop;
145  row_it.forward ();
146  row = row_it.data ();
147  }
148  while (row->baseline.y (x) + row->xheight + row->ascrise
149  >= blob->bounding_box ().bottom () && !row_it.cycled_list ()) {
150  overlap = row->baseline.y (x) + row->xheight + row->ascrise;
151  if (blob->bounding_box ().top () < overlap)
152  overlap = blob->bounding_box ().top ();
153  if (blob->bounding_box ().bottom () >
154  row->baseline.y (x) + row->descdrop)
155  overlap -= blob->bounding_box ().bottom ();
156  else
157  overlap -= row->baseline.y (x) + row->descdrop;
158  if (overlap > bestover) {
159  bestover = overlap;
160  best_row = row;
161  }
162  row_it.forward ();
163  row = row_it.data ();
164  }
165  if (bestover < 0
166  && row->baseline.y (x) + row->xheight + row->ascrise
167  - blob->bounding_box ().bottom () > bestover)
168  best_row = row;
169  return best_row;
170 }
QSPLINE baseline
Definition: blobbox.h:642
const TBOX & bounding_box() const
Definition: blobbox.h:208
float descdrop
Definition: blobbox.h:632
#define NULL
Definition: host.h:144
inT16 left() const
Definition: rect.h:67
inT16 right() const
Definition: rect.h:74
#define MAX_INT32
Definition: host.h:120
float ascrise
Definition: blobbox.h:631
inT16 top() const
Definition: rect.h:53
short inT16
Definition: host.h:100
double y(double x) const
Definition: quspline.cpp:217
float xheight
Definition: blobbox.h:629
inT16 bottom() const
Definition: rect.h:60
void restore_underlined_blobs ( TO_BLOCK block)

Definition at line 39 of file underlin.cpp.

41  {
42  inT16 chop_coord; //chop boundary
43  TBOX blob_box; //of underline
44  BLOBNBOX *u_line; //underline bit
45  TO_ROW *row; //best row for blob
46  ICOORDELT_LIST chop_cells; //blobs to cut out
47  //real underlines
48  BLOBNBOX_LIST residual_underlines;
49  C_OUTLINE_LIST left_coutlines;
50  C_OUTLINE_LIST right_coutlines;
51  ICOORDELT_IT cell_it = &chop_cells;
52  //under lines
53  BLOBNBOX_IT under_it = &block->underlines;
54  BLOBNBOX_IT ru_it = &residual_underlines;
55 
56  if (block->get_rows()->empty())
57  return; // Don't crash if there are no rows.
58  for (under_it.mark_cycle_pt (); !under_it.cycled_list ();
59  under_it.forward ()) {
60  u_line = under_it.extract ();
61  blob_box = u_line->bounding_box ();
62  row = most_overlapping_row (block->get_rows (), u_line);
63  find_underlined_blobs (u_line, &row->baseline, row->xheight,
65  &chop_cells);
66  cell_it.set_to_list (&chop_cells);
67  for (cell_it.mark_cycle_pt (); !cell_it.cycled_list ();
68  cell_it.forward ()) {
69  chop_coord = cell_it.data ()->x ();
70  if (cell_it.data ()->y () - chop_coord > textord_fp_chop_error + 1) {
71  split_to_blob (u_line, chop_coord,
73  &left_coutlines,
74  &right_coutlines);
75  if (!left_coutlines.empty()) {
76  ru_it.add_after_then_move(new BLOBNBOX(new C_BLOB(&left_coutlines)));
77  }
78  chop_coord = cell_it.data ()->y ();
79  split_to_blob(NULL, chop_coord, textord_fp_chop_error + 0.5,
80  &left_coutlines, &right_coutlines);
81  if (!left_coutlines.empty()) {
82  row->insert_blob(new BLOBNBOX(new C_BLOB(&left_coutlines)));
83  } else {
84  fprintf(stderr,
85  "Error:no outlines after chopping from %d to %d from (%d,%d)->(%d,%d)\n",
86  cell_it.data ()->x (), cell_it.data ()->y (),
87  blob_box.left (), blob_box.bottom (),
88  blob_box.right (), blob_box.top ());
90  }
91  u_line = NULL; //no more blobs to add
92  }
93  delete cell_it.extract();
94  }
95  if (!right_coutlines.empty ()) {
96  split_to_blob(NULL, blob_box.right(), textord_fp_chop_error + 0.5,
97  &left_coutlines, &right_coutlines);
98  if (!left_coutlines.empty())
99  ru_it.add_after_then_move(new BLOBNBOX(new C_BLOB(&left_coutlines)));
100  }
101  if (u_line != NULL) {
102  if (u_line->cblob() != NULL)
103  delete u_line->cblob();
104  delete u_line;
105  }
106  }
107  if (!ru_it.empty()) {
108  ru_it.move_to_first();
109  for (ru_it.mark_cycle_pt(); !ru_it.cycled_list(); ru_it.forward()) {
110  under_it.add_after_then_move(ru_it.extract());
111  }
112  }
113 }
QSPLINE baseline
Definition: blobbox.h:642
EXTERN double textord_underline_offset
Definition: underlin.cpp:29
C_BLOB * cblob() const
Definition: blobbox.h:245
const TBOX & bounding_box() const
Definition: blobbox.h:208
#define NULL
Definition: host.h:144
inT16 left() const
Definition: rect.h:67
Definition: rect.h:29
TO_ROW * most_overlapping_row(TO_ROW_LIST *rows, BLOBNBOX *blob)
Definition: underlin.cpp:122
#define FALSE
Definition: capi.h:28
inT16 right() const
Definition: rect.h:74
EXTERN int textord_fp_chop_error
Definition: fpchop.cpp:41
inT16 top() const
Definition: rect.h:53
short inT16
Definition: host.h:100
void split_to_blob(BLOBNBOX *blob, inT16 chop_coord, float pitch_error, C_OUTLINE_LIST *left_coutlines, C_OUTLINE_LIST *right_coutlines)
Definition: fpchop.cpp:249
void find_underlined_blobs(BLOBNBOX *u_line, QSPLINE *baseline, float xheight, float baseline_offset, ICOORDELT_LIST *chop_cells)
Definition: underlin.cpp:179
void insert_blob(BLOBNBOX *blob)
Definition: blobbox.cpp:709
float xheight
Definition: blobbox.h:629
TO_ROW_LIST * get_rows()
Definition: blobbox.h:676
BLOBNBOX_LIST underlines
Definition: blobbox.h:736
#define ASSERT_HOST(x)
Definition: errcode.h:84
inT16 bottom() const
Definition: rect.h:60
void vertical_cunderline_projection ( C_OUTLINE outline,
QSPLINE baseline,
float  xheight,
float  baseline_offset,
STATS lower_proj,
STATS middle_proj,
STATS upper_proj 
)

Definition at line 224 of file underlin.cpp.

232  {
233  ICOORD pos; //current point
234  ICOORD step; //edge step
235  inT16 lower_y, upper_y; //region limits
236  inT32 length; //of outline
237  inT16 stepindex; //current step
238  C_OUTLINE_IT out_it = outline->child ();
239 
240  pos = outline->start_pos ();
241  length = outline->pathlength ();
242  for (stepindex = 0; stepindex < length; stepindex++) {
243  step = outline->step (stepindex);
244  if (step.x () > 0) {
245  lower_y =
246  (inT16) floor (baseline->y (pos.x ()) + baseline_offset + 0.5);
247  upper_y =
248  (inT16) floor (baseline->y (pos.x ()) + baseline_offset +
249  xheight + 0.5);
250  if (pos.y () >= lower_y) {
251  lower_proj->add (pos.x (), -lower_y);
252  if (pos.y () >= upper_y) {
253  middle_proj->add (pos.x (), lower_y - upper_y);
254  upper_proj->add (pos.x (), upper_y - pos.y ());
255  }
256  else
257  middle_proj->add (pos.x (), lower_y - pos.y ());
258  }
259  else
260  lower_proj->add (pos.x (), -pos.y ());
261  }
262  else if (step.x () < 0) {
263  lower_y =
264  (inT16) floor (baseline->y (pos.x () - 1) + baseline_offset +
265  0.5);
266  upper_y =
267  (inT16) floor (baseline->y (pos.x () - 1) + baseline_offset +
268  xheight + 0.5);
269  if (pos.y () >= lower_y) {
270  lower_proj->add (pos.x () - 1, lower_y);
271  if (pos.y () >= upper_y) {
272  middle_proj->add (pos.x () - 1, upper_y - lower_y);
273  upper_proj->add (pos.x () - 1, pos.y () - upper_y);
274  }
275  else
276  middle_proj->add (pos.x () - 1, pos.y () - lower_y);
277  }
278  else
279  lower_proj->add (pos.x () - 1, pos.y ());
280  }
281  pos += step;
282  }
283 
284  for (out_it.mark_cycle_pt (); !out_it.cycled_list (); out_it.forward ()) {
285  vertical_cunderline_projection (out_it.data (),
286  baseline, xheight, baseline_offset,
287  lower_proj, middle_proj, upper_proj);
288  }
289 }
inT16 x() const
access function
Definition: points.h:52
const ICOORD & start_pos() const
Definition: coutln.h:108
ICOORD step(inT16 index) const
Definition: coutln.h:120
int inT32
Definition: host.h:102
void add(inT32 value, inT32 count)
Definition: statistc.cpp:103
inT16 y() const
access_function
Definition: points.h:56
inT32 pathlength() const
Definition: coutln.h:111
short inT16
Definition: host.h:100
integer coordinate
Definition: points.h:30
void vertical_cunderline_projection(C_OUTLINE *outline, QSPLINE *baseline, float xheight, float baseline_offset, STATS *lower_proj, STATS *middle_proj, STATS *upper_proj)
Definition: underlin.cpp:224
C_OUTLINE_LIST * child()
Definition: coutln.h:80
double y(double x) const
Definition: quspline.cpp:217

Variable Documentation

EXTERN bool textord_restore_underlines = TRUE

"Chop underlines & put back"

Definition at line 31 of file underlin.cpp.

EXTERN double textord_underline_offset = 0.1

"Fraction of x to ignore"

Definition at line 29 of file underlin.cpp.