Tesseract  3.02
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
tesseract::TabFind Class Reference

#include <tabfind.h>

Inheritance diagram for tesseract::TabFind:
tesseract::AlignedBlob tesseract::BlobGrid tesseract::BBGrid< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT > tesseract::GridBase tesseract::ColumnFinder

Public Member Functions

 TabFind (int gridsize, const ICOORD &bleft, const ICOORD &tright, TabVector_LIST *vlines, int vertical_x, int vertical_y, int resolution)
 
virtual ~TabFind ()
 
void InsertBlobsToGrid (bool h_spread, bool v_spread, BLOBNBOX_LIST *blobs, BBGrid< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT > *grid)
 
bool InsertBlob (bool h_spread, bool v_spread, BLOBNBOX *blob, BBGrid< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT > *grid)
 
void SetBlockRuleEdges (TO_BLOCK *block)
 
void SetBlobRuleEdges (BLOBNBOX_LIST *blobs)
 
int GutterWidth (int bottom_y, int top_y, const TabVector &v, bool ignore_unmergeables, int max_gutter_width, int *required_shift)
 
void GutterWidthAndNeighbourGap (int tab_x, int mean_height, int max_gutter, bool left, BLOBNBOX *bbox, int *gutter_width, int *neighbour_gap)
 
int RightEdgeForBox (const TBOX &box, bool crossing, bool extended)
 
int LeftEdgeForBox (const TBOX &box, bool crossing, bool extended)
 
TabVectorRightTabForBox (const TBOX &box, bool crossing, bool extended)
 
TabVectorLeftTabForBox (const TBOX &box, bool crossing, bool extended)
 
bool CommonWidth (int width)
 
WidthCallbackWidthCB ()
 
const ICOORDimage_origin () const
 
- Public Member Functions inherited from tesseract::AlignedBlob
 AlignedBlob (int gridsize, const ICOORD &bleft, const ICOORD &tright)
 
virtual ~AlignedBlob ()
 
ScrollViewDisplayTabs (const char *window_name, ScrollView *tab_win)
 
TabVectorFindVerticalAlignment (AlignedBlobParams align_params, BLOBNBOX *bbox, int *vertical_x, int *vertical_y)
 
- Public Member Functions inherited from tesseract::BlobGrid
 BlobGrid (int gridsize, const ICOORD &bleft, const ICOORD &tright)
 
virtual ~BlobGrid ()
 
void InsertBlobList (BLOBNBOX_LIST *blobs)
 
- Public Member Functions inherited from tesseract::BBGrid< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT >
 BBGrid ()
 
 BBGrid (int gridsize, const ICOORD &bleft, const ICOORD &tright)
 
virtual ~BBGrid ()
 
void Init (int gridsize, const ICOORD &bleft, const ICOORD &tright)
 
void Clear ()
 
void ClearGridData (void(*free_method)(BLOBNBOX *))
 
void InsertBBox (bool h_spread, bool v_spread, BLOBNBOX *bbox)
 
void InsertPixPtBBox (int left, int bottom, Pix *pix, BLOBNBOX *bbox)
 
void RemoveBBox (BLOBNBOX *bbox)
 
bool RectangleEmpty (const TBOX &rect)
 
IntGridCountCellElements ()
 
ScrollViewMakeWindow (int x, int y, const char *window_name)
 
void DisplayBoxes (ScrollView *window)
 
void AssertNoDuplicates ()
 
virtual void HandleClick (int x, int y)
 
- Public Member Functions inherited from tesseract::GridBase
 GridBase ()
 
 GridBase (int gridsize, const ICOORD &bleft, const ICOORD &tright)
 
virtual ~GridBase ()
 
void Init (int gridsize, const ICOORD &bleft, const ICOORD &tright)
 
int gridsize () const
 
int gridwidth () const
 
int gridheight () const
 
const ICOORDbleft () const
 
const ICOORDtright () const
 
void GridCoords (int x, int y, int *grid_x, int *grid_y) const
 
void ClipGridCoords (int *x, int *y) const
 

Static Public Member Functions

static bool DifferentSizes (int size1, int size2)
 
static bool VeryDifferentSizes (int size1, int size2)
 
- Static Public Member Functions inherited from tesseract::AlignedBlob
static bool WithinTestRegion (int detail_level, int x, int y)
 
static void IncrementDebugPix ()
 
static const STRINGtextord_debug_pix ()
 

Protected Member Functions

TabVector_LIST * vectors ()
 
TabVector_LIST * dead_vectors ()
 
bool FindTabVectors (TabVector_LIST *hlines, BLOBNBOX_LIST *image_blobs, TO_BLOCK *block, int min_gutter_width, ColPartitionGrid *part_grid, FCOORD *deskew, FCOORD *reskew)
 
void DontFindTabVectors (BLOBNBOX_LIST *image_blobs, TO_BLOCK *block, FCOORD *deskew, FCOORD *reskew)
 
void TidyBlobs (TO_BLOCK *block)
 
void SetupTabSearch (int x, int y, int *min_key, int *max_key)
 
ScrollViewDisplayTabVectors (ScrollView *tab_win)
 
ScrollViewFindInitialTabVectors (BLOBNBOX_LIST *image_blobs, int min_gutter_width, TO_BLOCK *block)
 
void ResetForVerticalText (const FCOORD &rotate, const FCOORD &rerotate, TabVector_LIST *horizontal_lines, int *min_gutter_width)
 
void Reset ()
 
void ReflectInYAxis ()
 

Static Protected Member Functions

static void RotateBlobList (const FCOORD &rotation, BLOBNBOX_LIST *blobs)
 

Protected Attributes

ICOORD vertical_skew_
 
int resolution_
 
- Protected Attributes inherited from tesseract::BBGrid< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT >
BLOBNBOX_CLIST * grid_
 
- Protected Attributes inherited from tesseract::GridBase
int gridsize_
 
int gridwidth_
 
int gridheight_
 
int gridbuckets_
 
ICOORD bleft_
 
ICOORD tright_
 

Detailed Description

The TabFind class contains code to find tab-stops and maintain the vectors_ list of tab vectors. Also provides an interface to find neighbouring blobs in the grid of BLOBNBOXes that is used by multiple subclasses. Searching is a complex operation because of the need to enforce rule/separator lines, and tabstop boundaries, (when available), so as the holder of the list of TabVectors this class provides the functions.

Definition at line 62 of file tabfind.h.

Constructor & Destructor Documentation

tesseract::TabFind::TabFind ( int  gridsize,
const ICOORD bleft,
const ICOORD tright,
TabVector_LIST *  vlines,
int  vertical_x,
int  vertical_y,
int  resolution 
)

Definition at line 89 of file tabfind.cpp.

92  : AlignedBlob(gridsize, bleft, tright),
93  resolution_(resolution),
94  image_origin_(0, tright.y() - 1) {
95  width_cb_ = NULL;
96  v_it_.set_to_list(&vectors_);
97  v_it_.add_list_after(vlines);
98  SetVerticalSkewAndParellelize(vertical_x, vertical_y);
100 }
bool CommonWidth(int width)
Definition: tabfind.cpp:419
int gridsize() const
Definition: bbgrid.h:68
#define NULL
Definition: host.h:144
inT16 y() const
access_function
Definition: points.h:56
_ConstTessMemberResultCallback_0_0< false, R, T1 >::base * NewPermanentTessCallback(const T1 *obj, R(T2::*member)() const)
Definition: tesscallback.h:116
AlignedBlob(int gridsize, const ICOORD &bleft, const ICOORD &tright)
tesseract::TabFind::~TabFind ( )
virtual

Definition at line 102 of file tabfind.cpp.

102  {
103  if (width_cb_ != NULL)
104  delete width_cb_;
105 }
#define NULL
Definition: host.h:144

Member Function Documentation

bool tesseract::TabFind::CommonWidth ( int  width)

Return true if the given width is close to one of the common widths in column_widths_.

Definition at line 419 of file tabfind.cpp.

419  {
420  width /= kColumnWidthFactor;
421  ICOORDELT_IT it(&column_widths_);
422  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
423  ICOORDELT* w = it.data();
424  if (NearlyEqual<int>(width, w->x(), 1))
425  return true;
426  }
427  return false;
428 }
inT16 x() const
access function
Definition: points.h:52
const int kColumnWidthFactor
Definition: tabfind.h:51
TabVector_LIST* tesseract::TabFind::dead_vectors ( )
inlineprotected

Definition at line 185 of file tabfind.h.

185  {
186  return &dead_vectors_;
187  }
bool tesseract::TabFind::DifferentSizes ( int  size1,
int  size2 
)
static

Return true if the sizes are more than a factor of 2 different.

Definition at line 432 of file tabfind.cpp.

432  {
433  return size1 > size2 * 2 || size2 > size1 * 2;
434 }
ScrollView * tesseract::TabFind::DisplayTabVectors ( ScrollView tab_win)
protected

Display the tab vectors found in this grid.

Definition at line 525 of file tabfind.cpp.

525  {
526 #ifndef GRAPHICS_DISABLED
527  // For every vector, display it.
528  TabVector_IT it(&vectors_);
529  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
530  TabVector* vector = it.data();
531  vector->Display(tab_win);
532  }
533  tab_win->Update();
534 #endif
535  return tab_win;
536 }
static void Update()
Definition: scrollview.cpp:710
void tesseract::TabFind::DontFindTabVectors ( BLOBNBOX_LIST *  image_blobs,
TO_BLOCK block,
FCOORD deskew,
FCOORD reskew 
)
protected

Definition at line 480 of file tabfind.cpp.

481  {
482  InsertBlobsToGrid(false, false, image_blobs, this);
483  InsertBlobsToGrid(true, false, &block->blobs, this);
484  deskew->set_x(1.0f);
485  deskew->set_y(0.0f);
486  reskew->set_x(1.0f);
487  reskew->set_y(0.0f);
488 }
BLOBNBOX_LIST blobs
Definition: blobbox.h:735
void set_x(float xin)
rewrite function
Definition: points.h:216
#define f(xc, yc)
Definition: imgscale.cpp:39
void InsertBlobsToGrid(bool h_spread, bool v_spread, BLOBNBOX_LIST *blobs, BBGrid< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT > *grid)
Definition: tabfind.cpp:116
void set_y(float yin)
rewrite function
Definition: points.h:220
ScrollView * tesseract::TabFind::FindInitialTabVectors ( BLOBNBOX_LIST *  image_blobs,
int  min_gutter_width,
TO_BLOCK block 
)
protected

Definition at line 542 of file tabfind.cpp.

544  {
546  ScrollView* line_win = MakeWindow(0, 0, "VerticalLines");
547  line_win = DisplayTabVectors(line_win);
548  }
549  // Prepare the grid.
550  if (image_blobs != NULL)
551  InsertBlobsToGrid(true, false, image_blobs, this);
552  InsertBlobsToGrid(true, false, &block->blobs, this);
553  ScrollView* initial_win = FindTabBoxes(min_gutter_width);
554  FindAllTabVectors(min_gutter_width);
555 
557  SortVectors();
558  EvaluateTabs();
559  if (textord_tabfind_show_initialtabs && initial_win != NULL)
560  initial_win = DisplayTabVectors(initial_win);
561  MarkVerticalText();
562  return initial_win;
563 }
BLOBNBOX_LIST blobs
Definition: blobbox.h:735
#define NULL
Definition: host.h:144
static void MergeSimilarTabVectors(const ICOORD &vertical, TabVector_LIST *vectors, BlobGrid *grid)
Definition: tabvector.cpp:362
void InsertBlobsToGrid(bool h_spread, bool v_spread, BLOBNBOX_LIST *blobs, BBGrid< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT > *grid)
Definition: tabfind.cpp:116
bool textord_tabfind_show_initialtabs
Definition: tabfind.cpp:84
ScrollView * DisplayTabVectors(ScrollView *tab_win)
Definition: tabfind.cpp:525
ICOORD vertical_skew_
Definition: tabfind.h:359
ScrollView * MakeWindow(int x, int y, const char *window_name)
bool tesseract::TabFind::FindTabVectors ( TabVector_LIST *  hlines,
BLOBNBOX_LIST *  image_blobs,
TO_BLOCK block,
int  min_gutter_width,
ColPartitionGrid part_grid,
FCOORD deskew,
FCOORD reskew 
)
protected

Top-level function to find TabVectors in an input page block. Returns false if the detected skew angle is impossible. Applies the detected skew angle to deskew the tabs, blobs and part_grid.

Definition at line 447 of file tabfind.cpp.

451  {
452  ScrollView* tab_win = FindInitialTabVectors(image_blobs, min_gutter_width,
453  block);
454  ComputeColumnWidths(tab_win, part_grid);
456  SortVectors();
457  CleanupTabs();
458  if (!Deskew(hlines, image_blobs, block, deskew, reskew))
459  return false; // Skew angle is too large.
460  part_grid->Deskew(*deskew);
461  ApplyTabConstraints();
462  #ifndef GRAPHICS_DISABLED
464  tab_win = MakeWindow(640, 50, "FinalTabs");
465  if (textord_debug_images) {
466  tab_win->Image(AlignedBlob::textord_debug_pix().string(),
467  image_origin_.x(), image_origin_.y());
468  } else {
469  DisplayBoxes(tab_win);
470  DisplayTabs("FinalTabs", tab_win);
471  }
472  tab_win = DisplayTabVectors(tab_win);
473  }
474  #endif // GRAPHICS_DISABLED
475  return true;
476 }
ScrollView * FindInitialTabVectors(BLOBNBOX_LIST *image_blobs, int min_gutter_width, TO_BLOCK *block)
Definition: tabfind.cpp:542
inT16 x() const
access function
Definition: points.h:52
bool textord_debug_images
Definition: alignedblob.cpp:34
static void MergeSimilarTabVectors(const ICOORD &vertical, TabVector_LIST *vectors, BlobGrid *grid)
Definition: tabvector.cpp:362
bool textord_tabfind_show_finaltabs
Definition: tabfind.cpp:85
inT16 y() const
access_function
Definition: points.h:56
void Image(struct Pix *image, int x_pos, int y_pos)
Definition: scrollview.cpp:768
ScrollView * DisplayTabs(const char *window_name, ScrollView *tab_win)
static const STRING & textord_debug_pix()
Definition: alignedblob.h:112
ScrollView * DisplayTabVectors(ScrollView *tab_win)
Definition: tabfind.cpp:525
ICOORD vertical_skew_
Definition: tabfind.h:359
ScrollView * MakeWindow(int x, int y, const char *window_name)
int tesseract::TabFind::GutterWidth ( int  bottom_y,
int  top_y,
const TabVector v,
bool  ignore_unmergeables,
int  max_gutter_width,
int *  required_shift 
)

Definition at line 186 of file tabfind.cpp.

188  {
189  bool right_to_left = v.IsLeftTab();
190  int bottom_x = v.XAtY(bottom_y);
191  int top_x = v.XAtY(top_y);
192  int start_x = right_to_left ? MAX(top_x, bottom_x) : MIN(top_x, bottom_x);
193  BlobGridSearch sidesearch(this);
194  sidesearch.StartSideSearch(start_x, bottom_y, top_y);
195  int min_gap = max_gutter_width;
196  *required_shift = 0;
197  BLOBNBOX* blob = NULL;
198  while ((blob = sidesearch.NextSideSearch(right_to_left)) != NULL) {
199  const TBOX& box = blob->bounding_box();
200  if (box.bottom() >= top_y || box.top() <= bottom_y)
201  continue; // Doesn't overlap enough.
202  if (box.height() >= gridsize() * 2 &&
203  box.height() > box.width() * kLineFragmentAspectRatio) {
204  // Skip likely separator line residue.
205  continue;
206  }
207  if (ignore_unmergeables && BLOBNBOX::UnMergeableType(blob->region_type()))
208  continue; // Skip non-text if required.
209  int mid_y = (box.bottom() + box.top()) / 2;
210  // We use the x at the mid-y so that the required_shift guarantees
211  // to clear all the blobs on the tab-stop. If we use the min/max
212  // of x at top/bottom of the blob, then exactness would be required,
213  // which is not a good thing.
214  int tab_x = v.XAtY(mid_y);
215  int gap;
216  if (right_to_left) {
217  gap = tab_x - box.right();
218  if (gap < 0 && box.left() - tab_x < *required_shift)
219  *required_shift = box.left() - tab_x;
220  } else {
221  gap = box.left() - tab_x;
222  if (gap < 0 && box.right() - tab_x > *required_shift)
223  *required_shift = box.right() - tab_x;
224  }
225  if (gap > 0 && gap < min_gap)
226  min_gap = gap;
227  }
228  // Result may be negative, in which case, this is a really bad tabstop.
229  return min_gap - abs(*required_shift);
230 }
const double kLineFragmentAspectRatio
Definition: tabfind.cpp:57
const TBOX & bounding_box() const
Definition: blobbox.h:208
BlobRegionType region_type() const
Definition: blobbox.h:260
int gridsize() const
Definition: bbgrid.h:68
#define NULL
Definition: host.h:144
inT16 left() const
Definition: rect.h:67
GridSearch< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT > BlobGridSearch
Definition: blobgrid.h:31
static bool UnMergeableType(BlobRegionType type)
Definition: blobbox.h:403
inT16 width() const
Definition: rect.h:104
Definition: rect.h:29
inT16 right() const
Definition: rect.h:74
inT16 top() const
Definition: rect.h:53
#define MIN(x, y)
Definition: ndminx.h:28
#define MAX(x, y)
Definition: ndminx.h:24
inT16 height() const
Definition: rect.h:97
inT16 bottom() const
Definition: rect.h:60
void tesseract::TabFind::GutterWidthAndNeighbourGap ( int  tab_x,
int  mean_height,
int  max_gutter,
bool  left,
BLOBNBOX bbox,
int *  gutter_width,
int *  neighbour_gap 
)

Find the gutter width and distance to inner neighbour for the given blob.

Definition at line 233 of file tabfind.cpp.

236  {
237  const TBOX& box = bbox->bounding_box();
238  // The gutter and internal sides of the box.
239  int gutter_x = left ? box.left() : box.right();
240  int internal_x = left ? box.right() : box.left();
241  // On ragged edges, the gutter side of the box is away from the tabstop.
242  int tab_gap = left ? gutter_x - tab_x : tab_x - gutter_x;
243  *gutter_width = max_gutter;
244  // If the box is away from the tabstop, we need to increase
245  // the allowed gutter width.
246  if (tab_gap > 0)
247  *gutter_width += tab_gap;
248  bool debug = WithinTestRegion(2, box.left(), box.bottom());
249  if (debug)
250  tprintf("Looking in gutter\n");
251  // Find the nearest blob on the outside of the column.
252  BLOBNBOX* gutter_bbox = AdjacentBlob(bbox, left,
253  bbox->flow() == BTFT_TEXT_ON_IMAGE, 0.0,
254  *gutter_width, box.top(), box.bottom());
255  if (gutter_bbox != NULL) {
256  TBOX gutter_box = gutter_bbox->bounding_box();
257  *gutter_width = left ? tab_x - gutter_box.right()
258  : gutter_box.left() - tab_x;
259  }
260  if (*gutter_width >= max_gutter) {
261  // If there is no box because a tab was in the way, get the tab coord.
262  TBOX gutter_box(box);
263  if (left) {
264  gutter_box.set_left(tab_x - max_gutter - 1);
265  gutter_box.set_right(tab_x - max_gutter);
266  int tab_gutter = RightEdgeForBox(gutter_box, true, false);
267  if (tab_gutter < tab_x - 1)
268  *gutter_width = tab_x - tab_gutter;
269  } else {
270  gutter_box.set_left(tab_x + max_gutter);
271  gutter_box.set_right(tab_x + max_gutter + 1);
272  int tab_gutter = LeftEdgeForBox(gutter_box, true, false);
273  if (tab_gutter > tab_x + 1)
274  *gutter_width = tab_gutter - tab_x;
275  }
276  }
277  if (*gutter_width > max_gutter)
278  *gutter_width = max_gutter;
279  // Now look for a neighbour on the inside.
280  if (debug)
281  tprintf("Looking for neighbour\n");
282  BLOBNBOX* neighbour = AdjacentBlob(bbox, !left,
283  bbox->flow() == BTFT_TEXT_ON_IMAGE, 0.0,
284  *gutter_width, box.top(), box.bottom());
285  int neighbour_edge = left ? RightEdgeForBox(box, true, false)
286  : LeftEdgeForBox(box, true, false);
287  if (neighbour != NULL) {
288  TBOX n_box = neighbour->bounding_box();
289  if (debug) {
290  tprintf("Found neighbour:");
291  n_box.print();
292  }
293  if (left && n_box.left() < neighbour_edge)
294  neighbour_edge = n_box.left();
295  else if (!left && n_box.right() > neighbour_edge)
296  neighbour_edge = n_box.right();
297  }
298  *neighbour_gap = left ? neighbour_edge - internal_x
299  : internal_x - neighbour_edge;
300 }
void set_right(int x)
Definition: rect.h:77
const TBOX & bounding_box() const
Definition: blobbox.h:208
#define NULL
Definition: host.h:144
inT16 left() const
Definition: rect.h:67
Definition: rect.h:29
inT16 right() const
Definition: rect.h:74
inT16 top() const
Definition: rect.h:53
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:41
int RightEdgeForBox(const TBOX &box, bool crossing, bool extended)
Definition: tabfind.cpp:306
int LeftEdgeForBox(const TBOX &box, bool crossing, bool extended)
Definition: tabfind.cpp:311
BlobTextFlowType flow() const
Definition: blobbox.h:272
static bool WithinTestRegion(int detail_level, int x, int y)
void set_left(int x)
Definition: rect.h:70
void print() const
Definition: rect.h:263
inT16 bottom() const
Definition: rect.h:60
const ICOORD& tesseract::TabFind::image_origin ( ) const
inline

Return the coords at which to draw the image backdrop.

Definition at line 174 of file tabfind.h.

174  {
175  return image_origin_;
176  }
bool tesseract::TabFind::InsertBlob ( bool  h_spread,
bool  v_spread,
BLOBNBOX blob,
BBGrid< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT > *  grid 
)

Insert a single blob into the given grid (not necessarily this). If h_spread, then all cells covered horizontally by the box are used, otherwise, just the bottom-left. Similarly for v_spread. A side effect is that the left and right rule edges of the blob are set according to the tab vectors in this (not grid).

Definition at line 143 of file tabfind.cpp.

145  {
146  TBOX box = blob->bounding_box();
147  blob->set_left_rule(LeftEdgeForBox(box, false, false));
148  blob->set_right_rule(RightEdgeForBox(box, false, false));
149  blob->set_left_crossing_rule(LeftEdgeForBox(box, true, false));
150  blob->set_right_crossing_rule(RightEdgeForBox(box, true, false));
151  if (blob->joined_to_prev())
152  return false;
153  grid->InsertBBox(h_spread, v_spread, blob);
154  return true;
155 }
const TBOX & bounding_box() const
Definition: blobbox.h:208
void set_right_rule(int new_right)
Definition: blobbox.h:299
void set_right_crossing_rule(int new_right)
Definition: blobbox.h:311
Definition: rect.h:29
void set_left_crossing_rule(int new_left)
Definition: blobbox.h:305
bool joined_to_prev() const
Definition: blobbox.h:233
void InsertBBox(bool h_spread, bool v_spread, BBC *bbox)
Definition: bbgrid.h:486
int RightEdgeForBox(const TBOX &box, bool crossing, bool extended)
Definition: tabfind.cpp:306
int LeftEdgeForBox(const TBOX &box, bool crossing, bool extended)
Definition: tabfind.cpp:311
void set_left_rule(int new_left)
Definition: blobbox.h:293
void tesseract::TabFind::InsertBlobsToGrid ( bool  h_spread,
bool  v_spread,
BLOBNBOX_LIST *  blobs,
BBGrid< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT > *  grid 
)

Insert a list of blobs into the given grid (not necessarily this). See InsertBlob for the other arguments. It would seem to make more sense to swap this and grid, but this way around allows grid to not be derived from TabFind, eg a ColPartitionGrid, while the grid that provides the tab stops(this) has to be derived from TabFind.

Definition at line 116 of file tabfind.cpp.

119  {
120  BLOBNBOX_IT blob_it(blobs);
121  int b_count = 0;
122  int reject_count = 0;
123  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
124  BLOBNBOX* blob = blob_it.data();
125 // if (InsertBlob(true, true, blob, grid)) {
126  if (InsertBlob(h_spread, v_spread, blob, grid)) {
127  ++b_count;
128  } else {
129  ++reject_count;
130  }
131  }
132  if (textord_debug_tabfind) {
133  tprintf("Inserted %d blobs into grid, %d rejected.\n",
134  b_count, reject_count);
135  }
136 }
int textord_debug_tabfind
Definition: alignedblob.cpp:28
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:41
bool InsertBlob(bool h_spread, bool v_spread, BLOBNBOX *blob, BBGrid< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT > *grid)
Definition: tabfind.cpp:143
int tesseract::TabFind::LeftEdgeForBox ( const TBOX box,
bool  crossing,
bool  extended 
)

As RightEdgeForBox, but finds the left Edge instead.

Definition at line 311 of file tabfind.cpp.

311  {
312  TabVector* v = LeftTabForBox(box, crossing, extended);
313  return v == NULL ? bleft_.x() : v->XAtY((box.top() + box.bottom()) / 2);
314 }
inT16 x() const
access function
Definition: points.h:52
#define NULL
Definition: host.h:144
inT16 top() const
Definition: rect.h:53
TabVector * LeftTabForBox(const TBOX &box, bool crossing, bool extended)
Definition: tabfind.cpp:373
inT16 bottom() const
Definition: rect.h:60
TabVector * tesseract::TabFind::LeftTabForBox ( const TBOX box,
bool  crossing,
bool  extended 
)

As RightTabForBox, but finds the left TabVector instead.

Definition at line 373 of file tabfind.cpp.

374  {
375  if (v_it_.empty())
376  return NULL;
377  int top_y = box.top();
378  int bottom_y = box.bottom();
379  int mid_y = (top_y + bottom_y) / 2;
380  int left = crossing ? (box.left() + box.right()) / 2 : box.left();
381  int min_key, max_key;
382  SetupTabSearch(left, mid_y, &min_key, &max_key);
383  // Position the iterator at the last TabVector with sort_key <= max_key.
384  while (!v_it_.at_last() && v_it_.data()->sort_key() <= max_key)
385  v_it_.forward();
386  while (!v_it_.at_first() && v_it_.data()->sort_key() > max_key) {
387  v_it_.backward();
388  }
389  // Find the rightmost tab vector that overlaps and has XAtY(mid_y) <= left.
390  TabVector* best_v = NULL;
391  int best_x = -1;
392  int key_limit = -1;
393  do {
394  TabVector* v = v_it_.data();
395  int x = v->XAtY(mid_y);
396  if (x <= left &&
397  (v->VOverlap(top_y, bottom_y) > 0 ||
398  (extended && v->ExtendedOverlap(top_y, bottom_y) > 0))) {
399  if (best_v == NULL || x > best_x) {
400  best_v = v;
401  best_x = x;
402  // We can guarantee that no better vector can be found if the
403  // sort key is less than that of the best by max_key - min_key.
404  key_limit = v->sort_key() - (max_key - min_key);
405  }
406  }
407  // Break when the search is done to avoid wrapping the iterator and
408  // thereby potentially slowing the next search.
409  if (v_it_.at_first() ||
410  (best_v != NULL && v->sort_key() < key_limit))
411  break; // Prevent restarting list for next call.
412  v_it_.backward();
413  } while (!v_it_.at_last());
414  return best_v;
415 }
void SetupTabSearch(int x, int y, int *min_key, int *max_key)
Definition: tabfind.cpp:518
#define NULL
Definition: host.h:144
inT16 left() const
Definition: rect.h:67
inT16 right() const
Definition: rect.h:74
inT16 top() const
Definition: rect.h:53
inT16 bottom() const
Definition: rect.h:60
void tesseract::TabFind::ReflectInYAxis ( )
protected

Definition at line 1385 of file tabfind.cpp.

1385  {
1386  TabVector_LIST temp_list;
1387  TabVector_IT temp_it(&temp_list);
1388  v_it_.move_to_first();
1389  // The TabVector list only contains vertical lines, but they need to be
1390  // reflected and the list needs to be reversed, so they are still in
1391  // sort_key order.
1392  while (!v_it_.empty()) {
1393  TabVector* v = v_it_.extract();
1394  v_it_.forward();
1395  v->ReflectInYAxis();
1396  temp_it.add_before_then_move(v);
1397  }
1398  v_it_.add_list_after(&temp_list);
1399  v_it_.move_to_first();
1400  // Reset this grid with reflected bounding boxes.
1401  TBOX grid_box(bleft(), tright());
1402  int tmp = grid_box.left();
1403  grid_box.set_left(-grid_box.right());
1404  grid_box.set_right(-tmp);
1405  Init(gridsize(), grid_box.botleft(), grid_box.topright());
1406 }
int gridsize() const
Definition: bbgrid.h:68
Definition: rect.h:29
const ICOORD & tright() const
Definition: bbgrid.h:80
const ICOORD & bleft() const
Definition: bbgrid.h:77
void Init(int gridsize, const ICOORD &bleft, const ICOORD &tright)
void tesseract::TabFind::Reset ( )
protected

Definition at line 1374 of file tabfind.cpp.

1374  {
1375  v_it_.move_to_first();
1376  for (v_it_.mark_cycle_pt(); !v_it_.cycled_list(); v_it_.forward()) {
1377  if (!v_it_.data()->IsSeparator())
1378  delete v_it_.extract();
1379  }
1380  Clear();
1381 }
void tesseract::TabFind::ResetForVerticalText ( const FCOORD rotate,
const FCOORD rerotate,
TabVector_LIST *  horizontal_lines,
int *  min_gutter_width 
)
protected

Definition at line 1329 of file tabfind.cpp.

1331  {
1332  // Rotate the horizontal and vertical vectors and swap them over.
1333  // Only the separators are kept and rotated; other tabs are used
1334  // to estimate the gutter width then thrown away.
1335  TabVector_LIST ex_verticals;
1336  TabVector_IT ex_v_it(&ex_verticals);
1337  TabVector_LIST vlines;
1338  TabVector_IT v_it(&vlines);
1339  while (!v_it_.empty()) {
1340  TabVector* v = v_it_.extract();
1341  if (v->IsSeparator()) {
1342  v->Rotate(rotate);
1343  ex_v_it.add_after_then_move(v);
1344  } else {
1345  v_it.add_after_then_move(v);
1346  }
1347  v_it_.forward();
1348  }
1349 
1350  // Adjust the min gutter width for better tabbox selection
1351  // in 2nd call to FindInitialTabVectors().
1352  int median_gutter = FindMedianGutterWidth(&vlines);
1353  if (median_gutter > *min_gutter_width)
1354  *min_gutter_width = median_gutter;
1355 
1356  TabVector_IT h_it(horizontal_lines);
1357  for (h_it.mark_cycle_pt(); !h_it.cycled_list(); h_it.forward()) {
1358  TabVector* h = h_it.data();
1359  h->Rotate(rotate);
1360  }
1361  v_it_.add_list_after(horizontal_lines);
1362  v_it_.move_to_first();
1363  h_it.set_to_list(horizontal_lines);
1364  h_it.add_list_after(&ex_verticals);
1365 
1366  // Rebuild the grid to the new size.
1367  TBOX grid_box(bleft(), tright());
1368  grid_box.rotate_large(rotate);
1369  Init(gridsize(), grid_box.botleft(), grid_box.topright());
1370 }
int gridsize() const
Definition: bbgrid.h:68
Definition: rect.h:29
const ICOORD & tright() const
Definition: bbgrid.h:80
const ICOORD & bleft() const
Definition: bbgrid.h:77
void Init(int gridsize, const ICOORD &bleft, const ICOORD &tright)
int tesseract::TabFind::RightEdgeForBox ( const TBOX box,
bool  crossing,
bool  extended 
)

Return the x-coord that corresponds to the right edge for the given box. If there is a rule line to the right that vertically overlaps it, then return the x-coord of the rule line, otherwise return the right edge of the page. For details see RightTabForBox below.

Definition at line 306 of file tabfind.cpp.

306  {
307  TabVector* v = RightTabForBox(box, crossing, extended);
308  return v == NULL ? tright_.x() : v->XAtY((box.top() + box.bottom()) / 2);
309 }
inT16 x() const
access function
Definition: points.h:52
#define NULL
Definition: host.h:144
TabVector * RightTabForBox(const TBOX &box, bool crossing, bool extended)
Definition: tabfind.cpp:329
inT16 top() const
Definition: rect.h:53
ICOORD tright_
Definition: bbgrid.h:96
inT16 bottom() const
Definition: rect.h:60
TabVector * tesseract::TabFind::RightTabForBox ( const TBOX box,
bool  crossing,
bool  extended 
)

Return the TabVector that corresponds to the right edge for the given box. If there is a TabVector to the right that vertically overlaps it, then return it, otherwise return NULL. Note that Right and Left refer to the position of the TabVector, not its type, ie RightTabForBox returns the nearest TabVector to the right of the box, regardless of its type. If a TabVector crosses right through the box (as opposed to grazing one edge or missing entirely), then crossing false will ignore such a line. Crossing true will return the line for BOTH left and right edges. If extended is true, then TabVectors are considered to extend to their extended_start/end_y, otherwise, just the startpt_ and endpt_. These functions make use of an internal iterator to the vectors_ list for speed when used repeatedly on neighbouring boxes. The caveat is that the iterator must be updated whenever the list is modified.

Definition at line 329 of file tabfind.cpp.

330  {
331  if (v_it_.empty())
332  return NULL;
333  int top_y = box.top();
334  int bottom_y = box.bottom();
335  int mid_y = (top_y + bottom_y) / 2;
336  int right = crossing ? (box.left() + box.right()) / 2 : box.right();
337  int min_key, max_key;
338  SetupTabSearch(right, mid_y, &min_key, &max_key);
339  // Position the iterator at the first TabVector with sort_key >= min_key.
340  while (!v_it_.at_first() && v_it_.data()->sort_key() >= min_key)
341  v_it_.backward();
342  while (!v_it_.at_last() && v_it_.data()->sort_key() < min_key)
343  v_it_.forward();
344  // Find the leftmost tab vector that overlaps and has XAtY(mid_y) >= right.
345  TabVector* best_v = NULL;
346  int best_x = -1;
347  int key_limit = -1;
348  do {
349  TabVector* v = v_it_.data();
350  int x = v->XAtY(mid_y);
351  if (x >= right &&
352  (v->VOverlap(top_y, bottom_y) > 0 ||
353  (extended && v->ExtendedOverlap(top_y, bottom_y) > 0))) {
354  if (best_v == NULL || x < best_x) {
355  best_v = v;
356  best_x = x;
357  // We can guarantee that no better vector can be found if the
358  // sort key exceeds that of the best by max_key - min_key.
359  key_limit = v->sort_key() + max_key - min_key;
360  }
361  }
362  // Break when the search is done to avoid wrapping the iterator and
363  // thereby potentially slowing the next search.
364  if (v_it_.at_last() ||
365  (best_v != NULL && v->sort_key() > key_limit))
366  break; // Prevent restarting list for next call.
367  v_it_.forward();
368  } while (!v_it_.at_first());
369  return best_v;
370 }
void SetupTabSearch(int x, int y, int *min_key, int *max_key)
Definition: tabfind.cpp:518
#define NULL
Definition: host.h:144
inT16 left() const
Definition: rect.h:67
inT16 right() const
Definition: rect.h:74
inT16 top() const
Definition: rect.h:53
inT16 bottom() const
Definition: rect.h:60
void tesseract::TabFind::RotateBlobList ( const FCOORD rotation,
BLOBNBOX_LIST *  blobs 
)
staticprotected

Definition at line 1259 of file tabfind.cpp.

1259  {
1260  BLOBNBOX_IT it(blobs);
1261  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1262  it.data()->rotate_box(rotation);
1263  }
1264 }
void tesseract::TabFind::SetBlobRuleEdges ( BLOBNBOX_LIST *  blobs)

Definition at line 167 of file tabfind.cpp.

167  {
168  BLOBNBOX_IT blob_it(blobs);
169  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
170  BLOBNBOX* blob = blob_it.data();
171  TBOX box = blob->bounding_box();
172  blob->set_left_rule(LeftEdgeForBox(box, false, false));
173  blob->set_right_rule(RightEdgeForBox(box, false, false));
174  blob->set_left_crossing_rule(LeftEdgeForBox(box, true, false));
175  blob->set_right_crossing_rule(RightEdgeForBox(box, true, false));
176  }
177 }
const TBOX & bounding_box() const
Definition: blobbox.h:208
void set_right_rule(int new_right)
Definition: blobbox.h:299
void set_right_crossing_rule(int new_right)
Definition: blobbox.h:311
Definition: rect.h:29
void set_left_crossing_rule(int new_left)
Definition: blobbox.h:305
int RightEdgeForBox(const TBOX &box, bool crossing, bool extended)
Definition: tabfind.cpp:306
int LeftEdgeForBox(const TBOX &box, bool crossing, bool extended)
Definition: tabfind.cpp:311
void set_left_rule(int new_left)
Definition: blobbox.h:293
void tesseract::TabFind::SetBlockRuleEdges ( TO_BLOCK block)

Definition at line 158 of file tabfind.cpp.

158  {
159  SetBlobRuleEdges(&block->blobs);
160  SetBlobRuleEdges(&block->small_blobs);
161  SetBlobRuleEdges(&block->noise_blobs);
162  SetBlobRuleEdges(&block->large_blobs);
163 }
BLOBNBOX_LIST noise_blobs
Definition: blobbox.h:737
BLOBNBOX_LIST blobs
Definition: blobbox.h:735
void SetBlobRuleEdges(BLOBNBOX_LIST *blobs)
Definition: tabfind.cpp:167
BLOBNBOX_LIST small_blobs
Definition: blobbox.h:738
BLOBNBOX_LIST large_blobs
Definition: blobbox.h:739
void tesseract::TabFind::SetupTabSearch ( int  x,
int  y,
int *  min_key,
int *  max_key 
)
protected

Definition at line 518 of file tabfind.cpp.

518  {
519  int key1 = TabVector::SortKey(vertical_skew_, x, (y + tright_.y()) / 2);
520  int key2 = TabVector::SortKey(vertical_skew_, x, (y + bleft_.y()) / 2);
521  *min_key = MIN(key1, key2);
522  *max_key = MAX(key1, key2);
523 }
inT16 y() const
access_function
Definition: points.h:56
static int SortKey(const ICOORD &vertical, int x, int y)
Definition: tabvector.h:280
ICOORD tright_
Definition: bbgrid.h:96
#define MIN(x, y)
Definition: ndminx.h:28
ICOORD vertical_skew_
Definition: tabfind.h:359
#define MAX(x, y)
Definition: ndminx.h:24
void tesseract::TabFind::TidyBlobs ( TO_BLOCK block)
protected

Definition at line 493 of file tabfind.cpp.

493  {
494  BLOBNBOX_IT large_it = &block->large_blobs;
495  BLOBNBOX_IT blob_it = &block->blobs;
496  int b_count = 0;
497  for (large_it.mark_cycle_pt(); !large_it.cycled_list(); large_it.forward()) {
498  BLOBNBOX* large_blob = large_it.data();
499  if (large_blob->owner() != NULL) {
500  blob_it.add_to_end(large_it.extract());
501  ++b_count;
502  }
503  }
504  if (textord_debug_tabfind) {
505  tprintf("Moved %d large blobs to normal list\n",
506  b_count);
507  #ifndef GRAPHICS_DISABLED
508  ScrollView* rej_win = MakeWindow(500, 300, "Image blobs");
509  block->plot_graded_blobs(rej_win);
510  block->plot_noise_blobs(rej_win);
511  rej_win->Update();
512  #endif // GRAPHICS_DISABLED
513  }
514  block->DeleteUnownedNoise();
515 }
void plot_graded_blobs(ScrollView *to_win)
Definition: blobbox.cpp:999
BLOBNBOX_LIST blobs
Definition: blobbox.h:735
#define NULL
Definition: host.h:144
int textord_debug_tabfind
Definition: alignedblob.cpp:28
void plot_noise_blobs(ScrollView *to_win)
Definition: blobbox.cpp:991
tesseract::ColPartition * owner() const
Definition: blobbox.h:329
static void Update()
Definition: scrollview.cpp:710
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:41
BLOBNBOX_LIST large_blobs
Definition: blobbox.h:739
void DeleteUnownedNoise()
Definition: blobbox.cpp:978
ScrollView * MakeWindow(int x, int y, const char *window_name)
TabVector_LIST* tesseract::TabFind::vectors ( )
inlineprotected

Accessors

Definition at line 182 of file tabfind.h.

182  {
183  return &vectors_;
184  }
bool tesseract::TabFind::VeryDifferentSizes ( int  size1,
int  size2 
)
static

Return true if the sizes are more than a factor of 5 different.

Definition at line 438 of file tabfind.cpp.

438  {
439  return size1 > size2 * 5 || size2 > size1 * 5;
440 }
WidthCallback* tesseract::TabFind::WidthCB ( )
inline

Return a callback for testing CommonWidth.

Definition at line 167 of file tabfind.h.

167  {
168  return width_cb_;
169  }

Member Data Documentation

int tesseract::TabFind::resolution_
protected

Definition at line 360 of file tabfind.h.

ICOORD tesseract::TabFind::vertical_skew_
protected

Definition at line 359 of file tabfind.h.


The documentation for this class was generated from the following files: