Tesseract  3.02
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
edgblob.cpp File Reference
#include "mfcpch.h"
#include "scanedg.h"
#include "drawedg.h"
#include "edgloop.h"
#include "edgblob.h"

Go to the source code of this file.

Macros

#define EXTERN
 

Functions

extract_edges

Run the edge detector over the block and return a list of blobs.

void extract_edges (Pix *pix, BLOCK *block)
 
outlines_to_blobs

Gather together outlines into blobs using the usual bucket sort.

void outlines_to_blobs (BLOCK *block, ICOORD bleft, ICOORD tright, C_OUTLINE_LIST *outlines)
 
fill_buckets

Run the edge detector over the block and return a list of blobs.

void fill_buckets (C_OUTLINE_LIST *outlines, OL_BUCKETS *buckets)
 
empty_buckets

Run the edge detector over the block and return a list of blobs.

void empty_buckets (BLOCK *block, OL_BUCKETS *buckets)
 
capture_children

Find all neighbouring outlines that are children of this outline and either move them to the output list or declare this outline illegal and return FALSE.

BOOL8 capture_children (OL_BUCKETS *buckets, C_BLOB_IT *reject_it, C_OUTLINE_IT *blob_it)
 

Variables

EXTERN bool edges_use_new_outline_complexity = FALSE
 
EXTERN int edges_max_children_per_outline = 10
 
EXTERN int edges_max_children_layers = 5
 
EXTERN bool edges_debug = FALSE
 
EXTERN int edges_children_per_grandchild = 10
 
EXTERN int edges_children_count_limit = 45
 
EXTERN bool edges_children_fix = FALSE
 
EXTERN int edges_min_nonhole = 12
 
EXTERN int edges_patharea_ratio = 40
 
EXTERN double edges_childarea = 0.5
 
EXTERN double edges_boxarea = 0.875
 

Macro Definition Documentation

#define EXTERN

Definition at line 31 of file edgblob.cpp.

Function Documentation

BOOL8 capture_children ( OL_BUCKETS buckets,
C_BLOB_IT *  reject_it,
C_OUTLINE_IT *  blob_it 
)

Definition at line 449 of file edgblob.cpp.

453  {
454  C_OUTLINE *outline; // master outline
455  inT32 child_count; // no of children
456 
457  outline = blob_it->data();
459  child_count = buckets->outline_complexity(outline,
461  0);
462  else
463  child_count = buckets->count_children(outline,
465  if (child_count > edges_children_count_limit)
466  return FALSE;
467 
468  if (child_count > 0)
469  buckets->extract_children(outline, blob_it);
470  return TRUE;
471 }
EXTERN int edges_children_count_limit
Definition: edgblob.cpp:51
inT32 outline_complexity(C_OUTLINE *outline, inT32 max_count, inT16 depth)
Definition: edgblob.cpp:115
inT32 count_children(C_OUTLINE *outline, inT32 max_count)
Definition: edgblob.cpp:184
int inT32
Definition: host.h:102
#define FALSE
Definition: capi.h:28
EXTERN bool edges_use_new_outline_complexity
Definition: edgblob.cpp:39
void extract_children(C_OUTLINE *outline, C_OUTLINE_IT *it)
Definition: edgblob.cpp:300
#define TRUE
Definition: capi.h:27
void empty_buckets ( BLOCK block,
OL_BUCKETS buckets 
)

Definition at line 403 of file edgblob.cpp.

406  {
407  BOOL8 good_blob; // healthy blob
408  C_OUTLINE_LIST outlines; // outlines in block
409  // iterator
410  C_OUTLINE_IT out_it = &outlines;
411  C_OUTLINE_IT bucket_it = buckets->start_scan();
412  C_OUTLINE_IT parent_it; // parent outline
413  C_BLOB *blob; // new blob
414  C_BLOB_IT good_blobs = block->blob_list();
415  C_BLOB_IT junk_blobs = block->reject_blobs();
416 
417  while (!bucket_it.empty()) {
418  out_it.set_to_list(&outlines);
419  do {
420  parent_it = bucket_it; // find outermost
421  do {
422  bucket_it.forward();
423  } while (!bucket_it.at_first() &&
424  !(*parent_it.data() < *bucket_it.data()));
425  } while (!bucket_it.at_first());
426 
427  // move to new list
428  out_it.add_after_then_move(parent_it.extract());
429  good_blob = capture_children(buckets, &junk_blobs, &out_it);
430  blob = new C_BLOB(&outlines);
431  if (good_blob)
432  good_blobs.add_after_then_move(blob);
433  else
434  junk_blobs.add_after_then_move(blob);
435 
436  bucket_it.set_to_list(buckets->scan_next());
437  }
438 }
C_OUTLINE_LIST * scan_next()
Definition: edgblob.h:53
unsigned char BOOL8
Definition: host.h:113
C_BLOB_LIST * reject_blobs()
Definition: ocrblock.h:136
C_BLOB_LIST * blob_list()
get blobs
Definition: ocrblock.h:133
BOOL8 capture_children(OL_BUCKETS *buckets, C_BLOB_IT *reject_it, C_OUTLINE_IT *blob_it)
Definition: edgblob.cpp:449
C_OUTLINE_LIST * start_scan()
Definition: edgblob.h:47
void extract_edges ( Pix *  pix,
BLOCK block 
)

Definition at line 335 of file edgblob.cpp.

336  { // block to scan
337  C_OUTLINE_LIST outlines; // outlines in block
338  C_OUTLINE_IT out_it = &outlines;
339 
340  // TODO(rays) move the pix all the way down to the bottom.
341  IMAGE image;
342  image.FromPix(pix);
343 
344  block_edges(&image, block, &out_it);
345  ICOORD bleft; // block box
346  ICOORD tright;
347  block->bounding_box(bleft, tright);
348  // make blobs
349  outlines_to_blobs(block, bleft, tright, &outlines);
350 }
void outlines_to_blobs(BLOCK *block, ICOORD bleft, ICOORD tright, C_OUTLINE_LIST *outlines)
Definition: edgblob.cpp:359
void FromPix(const Pix *src_pix)
Definition: imgs.cpp:1561
integer coordinate
Definition: points.h:30
void block_edges(IMAGE *t_image, PDBLK *block, C_OUTLINE_IT *outline_it)
Definition: scanedg.cpp:35
Definition: img.h:51
void bounding_box(ICOORD &bottom_left, ICOORD &top_right) const
get box
Definition: pdblock.h:70
void fill_buckets ( C_OUTLINE_LIST *  outlines,
OL_BUCKETS buckets 
)

Definition at line 378 of file edgblob.cpp.

381  {
382  TBOX ol_box; // outline box
383  C_OUTLINE_IT out_it = outlines; // iterator
384  C_OUTLINE_IT bucket_it; // iterator in bucket
385  C_OUTLINE *outline; // current outline
386 
387  for (out_it.mark_cycle_pt(); !out_it.cycled_list(); out_it.forward()) {
388  outline = out_it.extract(); // take off list
389  // get box
390  ol_box = outline->bounding_box();
391  bucket_it.set_to_list((*buckets) (ol_box.left(), ol_box.bottom()));
392  bucket_it.add_to_end(outline);
393  }
394 }
const TBOX & bounding_box() const
Definition: coutln.h:85
inT16 left() const
Definition: rect.h:67
Definition: rect.h:29
inT16 bottom() const
Definition: rect.h:60
void outlines_to_blobs ( BLOCK block,
ICOORD  bleft,
ICOORD  tright,
C_OUTLINE_LIST *  outlines 
)

Definition at line 359 of file edgblob.cpp.

363  {
364  // make buckets
365  OL_BUCKETS buckets(bleft, tright);
366 
367  fill_buckets(outlines, &buckets);
368  empty_buckets(block, &buckets);
369 }
void empty_buckets(BLOCK *block, OL_BUCKETS *buckets)
Definition: edgblob.cpp:403
void fill_buckets(C_OUTLINE_LIST *outlines, OL_BUCKETS *buckets)
Definition: edgblob.cpp:378

Variable Documentation

EXTERN double edges_boxarea = 0.875

"Min area fraction of grandchild for box"

Definition at line 61 of file edgblob.cpp.

EXTERN double edges_childarea = 0.5

"Min area fraction of child outline"

Definition at line 59 of file edgblob.cpp.

EXTERN int edges_children_count_limit = 45

"Max holes allowed in blob"

Definition at line 51 of file edgblob.cpp.

EXTERN bool edges_children_fix = FALSE

"Remove boxy parents of char-like children"

Definition at line 53 of file edgblob.cpp.

EXTERN int edges_children_per_grandchild = 10

"Importance ratio for chucking outlines"

Definition at line 49 of file edgblob.cpp.

EXTERN bool edges_debug = FALSE

"turn on debugging for this module"

Definition at line 45 of file edgblob.cpp.

EXTERN int edges_max_children_layers = 5

"Max layers of nested children inside a character outline"

Definition at line 43 of file edgblob.cpp.

EXTERN int edges_max_children_per_outline = 10

"Max number of children inside a character outline"

Definition at line 41 of file edgblob.cpp.

EXTERN int edges_min_nonhole = 12

"Min pixels for potential char in box"

Definition at line 55 of file edgblob.cpp.

EXTERN int edges_patharea_ratio = 40

"Max lensq/area for acceptable child outline"

Definition at line 57 of file edgblob.cpp.

EXTERN bool edges_use_new_outline_complexity = FALSE

"Use the new outline complexity module"

Definition at line 39 of file edgblob.cpp.