Tesseract  3.02
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
tesseract::ColPartition Class Reference

#include <colpartition.h>

Inheritance diagram for tesseract::ColPartition:
ELIST2_LINK

Public Member Functions

 ColPartition ()
 
 ColPartition (BlobRegionType blob_type, const ICOORD &vertical)
 
 ~ColPartition ()
 
const TBOXbounding_box () const
 
int left_margin () const
 
void set_left_margin (int margin)
 
int right_margin () const
 
void set_right_margin (int margin)
 
int median_top () const
 
int median_bottom () const
 
int median_left () const
 
int median_right () const
 
int median_size () const
 
void set_median_size (int size)
 
int median_width () const
 
void set_median_width (int width)
 
BlobRegionType blob_type () const
 
void set_blob_type (BlobRegionType t)
 
BlobTextFlowType flow () const
 
void set_flow (BlobTextFlowType f)
 
int good_blob_score () const
 
bool good_width () const
 
bool good_column () const
 
bool left_key_tab () const
 
int left_key () const
 
bool right_key_tab () const
 
int right_key () const
 
PolyBlockType type () const
 
void set_type (PolyBlockType t)
 
BLOBNBOX_CLIST * boxes ()
 
int boxes_count () const
 
void set_vertical (const ICOORD &v)
 
ColPartition_CLIST * upper_partners ()
 
ColPartition_CLIST * lower_partners ()
 
void set_working_set (WorkingPartSet *working_set)
 
bool block_owned () const
 
void set_block_owned (bool owned)
 
bool desperately_merged () const
 
ColPartitionSetcolumn_set () const
 
void set_side_step (int step)
 
int bottom_spacing () const
 
void set_bottom_spacing (int spacing)
 
int top_spacing () const
 
void set_top_spacing (int spacing)
 
void set_table_type ()
 
void clear_table_type ()
 
bool inside_table_column ()
 
void set_inside_table_column (bool val)
 
ColPartitionnearest_neighbor_above () const
 
void set_nearest_neighbor_above (ColPartition *part)
 
ColPartitionnearest_neighbor_below () const
 
void set_nearest_neighbor_below (ColPartition *part)
 
int space_above () const
 
void set_space_above (int space)
 
int space_below () const
 
void set_space_below (int space)
 
int space_to_left () const
 
void set_space_to_left (int space)
 
int space_to_right () const
 
void set_space_to_right (int space)
 
uinT8color1 ()
 
uinT8color2 ()
 
bool owns_blobs () const
 
void set_owns_blobs (bool owns_blobs)
 
int MidY () const
 
int MedianY () const
 
int MidX () const
 
int SortKey (int x, int y) const
 
int XAtY (int sort_key, int y) const
 
int KeyWidth (int left_key, int right_key) const
 
int ColumnWidth () const
 
int BoxLeftKey () const
 
int BoxRightKey () const
 
int LeftAtY (int y) const
 
int RightAtY (int y) const
 
bool IsLeftOf (const ColPartition &other) const
 
bool ColumnContains (int x, int y) const
 
bool IsEmpty () const
 
bool IsSingleton () const
 
bool HOverlaps (const ColPartition &other) const
 
bool VOverlaps (const ColPartition &other) const
 
int VCoreOverlap (const ColPartition &other) const
 
int HCoreOverlap (const ColPartition &other) const
 
bool VSignificantCoreOverlap (const ColPartition &other) const
 
bool WithinSameMargins (const ColPartition &other) const
 
bool TypesMatch (const ColPartition &other) const
 
bool IsLineType () const
 
bool IsImageType () const
 
bool IsTextType () const
 
bool IsVerticalType () const
 
bool IsHorizontalType () const
 
bool IsUnMergeableType () const
 
bool IsVerticalLine () const
 
bool IsHorizontalLine () const
 
void AddBox (BLOBNBOX *box)
 
void RemoveBox (BLOBNBOX *box)
 
BLOBNBOXBiggestBox ()
 
TBOX BoundsWithoutBox (BLOBNBOX *box)
 
void ClaimBoxes ()
 
void DisownBoxes ()
 
void DeleteBoxes ()
 
void ReflectInYAxis ()
 
bool IsLegal ()
 
bool MatchingColumns (const ColPartition &other) const
 
bool MatchingTextColor (const ColPartition &other) const
 
bool MatchingSizes (const ColPartition &other) const
 
bool ConfirmNoTabViolation (const ColPartition &other) const
 
bool MatchingStrokeWidth (const ColPartition &other, double fractional_tolerance, double constant_tolerance) const
 
bool OKDiacriticMerge (const ColPartition &candidate, bool debug) const
 
void SetLeftTab (const TabVector *tab_vector)
 
void SetRightTab (const TabVector *tab_vector)
 
void CopyLeftTab (const ColPartition &src, bool take_box)
 
void CopyRightTab (const ColPartition &src, bool take_box)
 
int LeftBlobRule () const
 
int RightBlobRule () const
 
float SpecialBlobsDensity (const BlobSpecialTextType type) const
 
int SpecialBlobsCount (const BlobSpecialTextType type)
 
void SetSpecialBlobsDensity (const BlobSpecialTextType type, const float density)
 
void ComputeSpecialBlobsDensity ()
 
void AddPartner (bool upper, ColPartition *partner)
 
void RemovePartner (bool upper, ColPartition *partner)
 
ColPartitionSingletonPartner (bool upper)
 
void Absorb (ColPartition *other, WidthCallback *cb)
 
bool OKMergeOverlap (const ColPartition &merge1, const ColPartition &merge2, int ok_box_overlap, bool debug)
 
BLOBNBOXOverlapSplitBlob (const TBOX &box)
 
ColPartitionSplitAtBlob (BLOBNBOX *split_blob)
 
ColPartitionSplitAt (int split_x)
 
void ComputeLimits ()
 
int CountOverlappingBoxes (const TBOX &box)
 
void SetPartitionType (int resolution, ColPartitionSet *columns)
 
PolyBlockType PartitionType (ColumnSpanningType flow) const
 
void ColumnRange (int resolution, ColPartitionSet *columns, int *first_col, int *last_col)
 
void SetColumnGoodness (WidthCallback *cb)
 
bool MarkAsLeaderIfMonospaced ()
 
void SetRegionAndFlowTypesFromProjectionValue (int value)
 
void SetBlobTypes ()
 
bool HasGoodBaseline ()
 
void AddToWorkingSet (const ICOORD &bleft, const ICOORD &tright, int resolution, ColPartition_LIST *used_parts, WorkingPartSet_LIST *working_set)
 
ColPartitionShallowCopy () const
 
ColPartitionCopyButDontOwnBlobs ()
 
ScrollView::Color BoxColor () const
 
void Print () const
 
void PrintColors ()
 
void SmoothPartnerRun (int working_set_count)
 
void RefinePartners (PolyBlockType type, bool get_desparate, ColPartitionGrid *grid)
 
bool IsInSameColumnAs (const ColPartition &part) const
 
void set_first_column (int column)
 
void set_last_column (int column)
 
- Public Member Functions inherited from ELIST2_LINK
 ELIST2_LINK ()
 
 ELIST2_LINK (const ELIST2_LINK &)
 
void operator= (const ELIST2_LINK &)
 

Static Public Member Functions

static ColPartitionMakeLinePartition (BlobRegionType blob_type, const ICOORD &vertical, int left, int bottom, int right, int top)
 
static ColPartitionFakePartition (const TBOX &box, PolyBlockType block_type, BlobRegionType blob_type, BlobTextFlowType flow)
 
static ColPartitionMakeBigPartition (BLOBNBOX *box, ColPartition_LIST *big_part_list)
 
static bool TypesMatch (BlobRegionType type1, BlobRegionType type2)
 
static bool TypesSimilar (PolyBlockType type1, PolyBlockType type2)
 
static void LineSpacingBlocks (const ICOORD &bleft, const ICOORD &tright, int resolution, ColPartition_LIST *block_parts, ColPartition_LIST *used_parts, BLOCK_LIST *completed_blocks, TO_BLOCK_LIST *to_blocks)
 
static TO_BLOCKMakeBlock (const ICOORD &bleft, const ICOORD &tright, ColPartition_LIST *block_parts, ColPartition_LIST *used_parts)
 
static TO_BLOCKMakeVerticalTextBlock (const ICOORD &bleft, const ICOORD &tright, ColPartition_LIST *block_parts, ColPartition_LIST *used_parts)
 

Detailed Description

ColPartition is a partition of a horizontal slice of the page. It starts out as a collection of blobs at a particular y-coord in the grid, but ends up (after merging and uniquing) as an approximate text line. ColPartitions are also used to hold a partitioning of the page into columns, each representing one column. Although a ColPartition applies to a given y-coordinate range, eventually, a ColPartitionSet of ColPartitions emerges, which represents the columns over a wide y-coordinate range.

Definition at line 67 of file colpartition.h.

Constructor & Destructor Documentation

tesseract::ColPartition::ColPartition ( )
inline

Definition at line 69 of file colpartition.h.

69  {
70  // This empty constructor is here only so that the class can be ELISTIZED.
71  // TODO(rays) change deep_copy in elst.h line 955 to take a callback copier
72  // and eliminate CLASSNAME##_copier.
73  }
tesseract::ColPartition::ColPartition ( BlobRegionType  blob_type,
const ICOORD vertical 
)
Parameters
blob_typeis the blob_region_type_ of the blobs in this partition.
verticalis the direction of logical vertical on the possibly skewed image.

Definition at line 84 of file colpartition.cpp.

85  : left_margin_(-MAX_INT32), right_margin_(MAX_INT32),
86  median_bottom_(MAX_INT32), median_top_(-MAX_INT32), median_size_(0),
87  median_left_(MAX_INT32), median_right_(-MAX_INT32), median_width_(0),
88  blob_type_(blob_type), flow_(BTFT_NONE), good_blob_score_(0),
89  good_width_(false), good_column_(false),
90  left_key_tab_(false), right_key_tab_(false),
91  left_key_(0), right_key_(0), type_(PT_UNKNOWN), vertical_(vertical),
92  working_set_(NULL), last_add_was_vertical_(false), block_owned_(false),
93  desperately_merged_(false),
94  first_column_(-1), last_column_(-1), column_set_(NULL),
95  side_step_(0), top_spacing_(0), bottom_spacing_(0),
96  type_before_table_(PT_UNKNOWN), inside_table_column_(false),
97  nearest_neighbor_above_(NULL), nearest_neighbor_below_(NULL),
98  space_above_(0), space_below_(0), space_to_left_(0), space_to_right_(0),
99  owns_blobs_(true) {
100  memset(special_blobs_densities_, 0, sizeof(special_blobs_densities_));
101 }
#define NULL
Definition: host.h:144
#define MAX_INT32
Definition: host.h:120
BlobRegionType blob_type() const
Definition: colpartition.h:148
tesseract::ColPartition::~ColPartition ( )

Definition at line 146 of file colpartition.cpp.

146  {
147  // Remove this as a partner of all partners, as we don't want them
148  // referring to a deleted object.
149  ColPartition_C_IT it(&upper_partners_);
150  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
151  it.data()->RemovePartner(false, this);
152  }
153  it.set_to_list(&lower_partners_);
154  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
155  it.data()->RemovePartner(true, this);
156  }
157 }

Member Function Documentation

void tesseract::ColPartition::Absorb ( ColPartition other,
WidthCallback cb 
)

Definition at line 617 of file colpartition.cpp.

617  {
618  // The result has to either own all of the blobs or none of them.
619  // Verify the flag is consisent.
620  ASSERT_HOST(owns_blobs() == other->owns_blobs());
621  // TODO(nbeato): check owns_blobs better. Right now owns_blobs
622  // should always be true when this is called. So there is no issues.
623  if (TabFind::WithinTestRegion(2, bounding_box_.left(),
624  bounding_box_.bottom()) ||
625  TabFind::WithinTestRegion(2, other->bounding_box_.left(),
626  other->bounding_box_.bottom())) {
627  tprintf("Merging:");
628  Print();
629  other->Print();
630  }
631 
632  // Update the special_blobs_densities_.
633  memset(special_blobs_densities_, 0, sizeof(special_blobs_densities_));
634  for (int type = 0; type < BSTT_COUNT; ++type) {
635  int w1 = boxes_.length(), w2 = other->boxes_.length();
636  float new_val = special_blobs_densities_[type] * w1 +
637  other->special_blobs_densities_[type] * w2;
638  if (!w1 || !w2) {
639  special_blobs_densities_[type] = new_val / (w1 + w2);
640  }
641  }
642 
643  // Merge the two sorted lists.
644  BLOBNBOX_C_IT it(&boxes_);
645  BLOBNBOX_C_IT it2(&other->boxes_);
646  for (; !it2.empty(); it2.forward()) {
647  BLOBNBOX* bbox2 = it2.extract();
648  ColPartition* prev_owner = bbox2->owner();
649  if (prev_owner != other && prev_owner != NULL) {
650  // A blob on other's list is owned by someone else; let them have it.
651  continue;
652  }
653  ASSERT_HOST(prev_owner == other || prev_owner == NULL);
654  if (prev_owner == other)
655  bbox2->set_owner(this);
656  it.add_to_end(bbox2);
657  }
658  left_margin_ = MIN(left_margin_, other->left_margin_);
659  right_margin_ = MAX(right_margin_, other->right_margin_);
660  if (other->left_key_ < left_key_) {
661  left_key_ = other->left_key_;
662  left_key_tab_ = other->left_key_tab_;
663  }
664  if (other->right_key_ > right_key_) {
665  right_key_ = other->right_key_;
666  right_key_tab_ = other->right_key_tab_;
667  }
668  // Combine the flow and blob_type in a sensible way.
669  // Dominant flows stay.
670  if (!DominatesInMerge(flow_, other->flow_)) {
671  flow_ = other->flow_;
672  blob_type_ = other->blob_type_;
673  }
674  SetBlobTypes();
675  if (IsVerticalType()) {
676  boxes_.sort(SortByBoxBottom<BLOBNBOX>);
677  last_add_was_vertical_ = true;
678  } else {
679  boxes_.sort(SortByBoxLeft<BLOBNBOX>);
680  last_add_was_vertical_ = false;
681  }
682  ComputeLimits();
683  // Fix partner lists. other is going away, so remove it as a
684  // partner of all its partners and add this in its place.
685  for (int upper = 0; upper < 2; ++upper) {
686  ColPartition_CLIST partners;
687  ColPartition_C_IT part_it(&partners);
688  part_it.add_list_after(upper ? &other->upper_partners_
689  : &other->lower_partners_);
690  for (part_it.move_to_first(); !part_it.empty(); part_it.forward()) {
691  ColPartition* partner = part_it.extract();
692  partner->RemovePartner(!upper, other);
693  partner->RemovePartner(!upper, this);
694  partner->AddPartner(!upper, this);
695  }
696  }
697  delete other;
698  if (cb != NULL) {
699  SetColumnGoodness(cb);
700  }
701 }
void set_owner(tesseract::ColPartition *new_owner)
Definition: blobbox.h:332
bool owns_blobs() const
Definition: colpartition.h:291
#define NULL
Definition: host.h:144
inT16 left() const
Definition: rect.h:67
tesseract::ColPartition * owner() const
Definition: blobbox.h:329
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:41
bool DominatesInMerge(BlobTextFlowType type1, BlobTextFlowType type2)
Definition: blobbox.h:114
#define MIN(x, y)
Definition: ndminx.h:28
#define MAX(x, y)
Definition: ndminx.h:24
PolyBlockType type() const
Definition: colpartition.h:181
bool IsVerticalType() const
Definition: colpartition.h:431
#define ASSERT_HOST(x)
Definition: errcode.h:84
static bool WithinTestRegion(int detail_level, int x, int y)
void SetColumnGoodness(WidthCallback *cb)
inT16 bottom() const
Definition: rect.h:60
void tesseract::ColPartition::AddBox ( BLOBNBOX box)

Definition at line 180 of file colpartition.cpp.

180  {
181  TBOX box = bbox->bounding_box();
182  // Update the partition limits.
183  if (boxes_.length() == 0) {
184  bounding_box_ = box;
185  } else {
186  bounding_box_ += box;
187  }
188 
189  if (IsVerticalType()) {
190  if (!last_add_was_vertical_) {
191  boxes_.sort(SortByBoxBottom<BLOBNBOX>);
192  last_add_was_vertical_ = true;
193  }
194  boxes_.add_sorted(SortByBoxBottom<BLOBNBOX>, true, bbox);
195  } else {
196  if (last_add_was_vertical_) {
197  boxes_.sort(SortByBoxLeft<BLOBNBOX>);
198  last_add_was_vertical_ = false;
199  }
200  boxes_.add_sorted(SortByBoxLeft<BLOBNBOX>, true, bbox);
201  }
202  if (!left_key_tab_)
203  left_key_ = BoxLeftKey();
204  if (!right_key_tab_)
205  right_key_ = BoxRightKey();
206  if (TabFind::WithinTestRegion(2, box.left(), box.bottom()))
207  tprintf("Added box (%d,%d)->(%d,%d) left_blob_x_=%d, right_blob_x_ = %d\n",
208  box.left(), box.bottom(), box.right(), box.top(),
209  bounding_box_.left(), bounding_box_.right());
210 }
const TBOX & bounding_box() const
Definition: blobbox.h:208
inT16 left() const
Definition: rect.h:67
Definition: rect.h:29
inT16 right() const
Definition: rect.h:74
inT16 top() const
Definition: rect.h:53
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:41
bool IsVerticalType() const
Definition: colpartition.h:431
static bool WithinTestRegion(int detail_level, int x, int y)
int BoxRightKey() const
Definition: colpartition.h:336
inT16 bottom() const
Definition: rect.h:60
void tesseract::ColPartition::AddPartner ( bool  upper,
ColPartition partner 
)

Definition at line 582 of file colpartition.cpp.

582  {
583  if (upper) {
584  partner->lower_partners_.add_sorted(SortByBoxLeft<ColPartition>,
585  true, this);
586  upper_partners_.add_sorted(SortByBoxLeft<ColPartition>, true, partner);
587  } else {
588  partner->upper_partners_.add_sorted(SortByBoxLeft<ColPartition>,
589  true, this);
590  lower_partners_.add_sorted(SortByBoxLeft<ColPartition>, true, partner);
591  }
592 }
void tesseract::ColPartition::AddToWorkingSet ( const ICOORD bleft,
const ICOORD tright,
int  resolution,
ColPartition_LIST *  used_parts,
WorkingPartSet_LIST *  working_set 
)

Definition at line 1313 of file colpartition.cpp.

1316  {
1317  if (block_owned_)
1318  return; // Done it already.
1319  block_owned_ = true;
1320  WorkingPartSet_IT it(working_sets);
1321  // If there is an upper partner use its working_set_ directly.
1322  ColPartition* partner = SingletonPartner(true);
1323  if (partner != NULL && partner->working_set_ != NULL) {
1324  working_set_ = partner->working_set_;
1325  working_set_->AddPartition(this);
1326  return;
1327  }
1328  if (partner != NULL && textord_debug_bugs) {
1329  tprintf("Partition with partner has no working set!:");
1330  Print();
1331  partner->Print();
1332  }
1333  // Search for the column that the left edge fits in.
1334  WorkingPartSet* work_set = NULL;
1335  it.move_to_first();
1336  int col_index = 0;
1337  for (it.mark_cycle_pt(); !it.cycled_list() &&
1338  col_index != first_column_;
1339  it.forward(), ++col_index);
1340  if (textord_debug_tabfind >= 2) {
1341  tprintf("Match is %s for:", (col_index & 1) ? "Real" : "Between");
1342  Print();
1343  }
1344  if (it.cycled_list() && textord_debug_bugs) {
1345  tprintf("Target column=%d, only had %d\n", first_column_, col_index);
1346  }
1347  ASSERT_HOST(!it.cycled_list());
1348  work_set = it.data();
1349  // If last_column_ != first_column, then we need to scoop up all blocks
1350  // between here and the last_column_ and put back in work_set.
1351  if (!it.cycled_list() && last_column_ != first_column_) {
1352  // Find the column that the right edge falls in.
1353  BLOCK_LIST completed_blocks;
1354  TO_BLOCK_LIST to_blocks;
1355  for (; !it.cycled_list() && col_index <= last_column_;
1356  it.forward(), ++col_index) {
1357  WorkingPartSet* end_set = it.data();
1358  end_set->ExtractCompletedBlocks(bleft, tright, resolution, used_parts,
1359  &completed_blocks, &to_blocks);
1360  }
1361  work_set->InsertCompletedBlocks(&completed_blocks, &to_blocks);
1362  }
1363  working_set_ = work_set;
1364  work_set->AddPartition(this);
1365 }
ColPartition * SingletonPartner(bool upper)
void AddPartition(ColPartition *part)
#define NULL
Definition: host.h:144
int textord_debug_tabfind
Definition: alignedblob.cpp:28
int textord_debug_bugs
Definition: alignedblob.cpp:29
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:41
#define ASSERT_HOST(x)
Definition: errcode.h:84
BLOBNBOX * tesseract::ColPartition::BiggestBox ( )

Definition at line 226 of file colpartition.cpp.

226  {
227  BLOBNBOX* biggest = NULL;
228  BLOBNBOX_C_IT bb_it(&boxes_);
229  for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
230  BLOBNBOX* bbox = bb_it.data();
231  if (IsVerticalType()) {
232  if (biggest == NULL ||
233  bbox->bounding_box().width() > biggest->bounding_box().width())
234  biggest = bbox;
235  } else {
236  if (biggest == NULL ||
237  bbox->bounding_box().height() > biggest->bounding_box().height())
238  biggest = bbox;
239  }
240  }
241  return biggest;
242 }
const TBOX & bounding_box() const
Definition: blobbox.h:208
#define NULL
Definition: host.h:144
inT16 width() const
Definition: rect.h:104
bool IsVerticalType() const
Definition: colpartition.h:431
inT16 height() const
Definition: rect.h:97
BlobRegionType tesseract::ColPartition::blob_type ( ) const
inline

Definition at line 148 of file colpartition.h.

148  {
149  return blob_type_;
150  }
bool tesseract::ColPartition::block_owned ( ) const
inline

Definition at line 205 of file colpartition.h.

205  {
206  return block_owned_;
207  }
int tesseract::ColPartition::bottom_spacing ( ) const
inline

Definition at line 220 of file colpartition.h.

220  {
221  return bottom_spacing_;
222  }
const TBOX& tesseract::ColPartition::bounding_box ( ) const
inline

Definition at line 109 of file colpartition.h.

109  {
110  return bounding_box_;
111  }
TBOX tesseract::ColPartition::BoundsWithoutBox ( BLOBNBOX box)

Definition at line 245 of file colpartition.cpp.

245  {
246  TBOX result;
247  BLOBNBOX_C_IT bb_it(&boxes_);
248  for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
249  if (box != bb_it.data()) {
250  result += bb_it.data()->bounding_box();
251  }
252  }
253  return result;
254 }
Definition: rect.h:29
ScrollView::Color tesseract::ColPartition::BoxColor ( ) const

Definition at line 1724 of file colpartition.cpp.

1724  {
1725  if (type_ == PT_UNKNOWN)
1726  return BLOBNBOX::TextlineColor(blob_type_, flow_);
1727  return POLY_BLOCK::ColorForPolyBlockType(type_);
1728 }
static ScrollView::Color TextlineColor(BlobRegionType region_type, BlobTextFlowType flow_type)
Definition: blobbox.cpp:393
static ScrollView::Color ColorForPolyBlockType(PolyBlockType type)
Returns a color to draw the given type.
Definition: polyblk.cpp:398
BLOBNBOX_CLIST* tesseract::ColPartition::boxes ( )
inline

Definition at line 187 of file colpartition.h.

187  {
188  return &boxes_;
189  }
int tesseract::ColPartition::boxes_count ( ) const
inline

Definition at line 190 of file colpartition.h.

190  {
191  return boxes_.length();
192  }
int tesseract::ColPartition::BoxLeftKey ( ) const
inline

Definition at line 332 of file colpartition.h.

332  {
333  return SortKey(bounding_box_.left(), MidY());
334  }
inT16 left() const
Definition: rect.h:67
int SortKey(int x, int y) const
Definition: colpartition.h:316
int tesseract::ColPartition::BoxRightKey ( ) const
inline

Definition at line 336 of file colpartition.h.

336  {
337  return SortKey(bounding_box_.right(), MidY());
338  }
inT16 right() const
Definition: rect.h:74
int SortKey(int x, int y) const
Definition: colpartition.h:316
void tesseract::ColPartition::ClaimBoxes ( )

Definition at line 258 of file colpartition.cpp.

258  {
259  BLOBNBOX_C_IT bb_it(&boxes_);
260  for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
261  BLOBNBOX* bblob = bb_it.data();
262  ColPartition* other = bblob->owner();
263  if (other == NULL) {
264  // Normal case: ownership is available.
265  bblob->set_owner(this);
266  } else {
267  ASSERT_HOST(other == this);
268  }
269  }
270 }
void set_owner(tesseract::ColPartition *new_owner)
Definition: blobbox.h:332
#define NULL
Definition: host.h:144
tesseract::ColPartition * owner() const
Definition: blobbox.h:329
#define ASSERT_HOST(x)
Definition: errcode.h:84
void tesseract::ColPartition::clear_table_type ( )
inline

Definition at line 239 of file colpartition.h.

239  {
240  if (type_ == PT_TABLE)
241  type_ = type_before_table_;
242  }
Definition: capi.h:62
uinT8* tesseract::ColPartition::color1 ( )
inline

Definition at line 285 of file colpartition.h.

285  {
286  return color1_;
287  }
uinT8* tesseract::ColPartition::color2 ( )
inline

Definition at line 288 of file colpartition.h.

288  {
289  return color2_;
290  }
ColPartitionSet* tesseract::ColPartition::column_set ( ) const
inline

Definition at line 214 of file colpartition.h.

214  {
215  return column_set_;
216  }
bool tesseract::ColPartition::ColumnContains ( int  x,
int  y 
) const
inline

Definition at line 353 of file colpartition.h.

353  {
354  return LeftAtY(y) - 1 <= x && x <= RightAtY(y) + 1;
355  }
int RightAtY(int y) const
Definition: colpartition.h:344
int LeftAtY(int y) const
Definition: colpartition.h:340
void tesseract::ColPartition::ColumnRange ( int  resolution,
ColPartitionSet columns,
int *  first_col,
int *  last_col 
)

Definition at line 1028 of file colpartition.cpp.

1029  {
1030  int first_spanned_col = -1;
1031  ColumnSpanningType span_type =
1032  columns->SpanningType(resolution,
1033  bounding_box_.left(), bounding_box_.right(),
1034  MidY(), left_margin_, right_margin_,
1035  first_col, last_col,
1036  &first_spanned_col);
1037  type_ = PartitionType(span_type);
1038 }
inT16 left() const
Definition: rect.h:67
inT16 right() const
Definition: rect.h:74
PolyBlockType PartitionType(ColumnSpanningType flow) const
int tesseract::ColPartition::ColumnWidth ( ) const
inline

Definition at line 328 of file colpartition.h.

328  {
329  return KeyWidth(left_key_, right_key_);
330  }
int KeyWidth(int left_key, int right_key) const
Definition: colpartition.h:324
void tesseract::ColPartition::ComputeLimits ( )

Definition at line 834 of file colpartition.cpp.

834  {
835  bounding_box_ = TBOX(); // Clear it
836  BLOBNBOX_C_IT it(&boxes_);
837  BLOBNBOX* bbox = NULL;
838  int non_leader_count = 0;
839  if (it.empty()) {
840  bounding_box_.set_left(left_margin_);
841  bounding_box_.set_right(right_margin_);
842  bounding_box_.set_bottom(0);
843  bounding_box_.set_top(0);
844  } else {
845  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
846  bbox = it.data();
847  bounding_box_ += bbox->bounding_box();
848  if (bbox->flow() != BTFT_LEADER)
849  ++non_leader_count;
850  }
851  }
852  if (!left_key_tab_)
853  left_key_ = BoxLeftKey();
854  if (left_key_ > BoxLeftKey() && textord_debug_bugs) {
855  // TODO(rays) investigate the causes of these error messages, to find
856  // out if they are genuinely harmful, or just indicative of junk input.
857  tprintf("Computed left-illegal partition\n");
858  Print();
859  }
860  if (!right_key_tab_)
861  right_key_ = BoxRightKey();
862  if (right_key_ < BoxRightKey() && textord_debug_bugs) {
863  tprintf("Computed right-illegal partition\n");
864  Print();
865  }
866  if (it.empty())
867  return;
868  if (IsImageType() || blob_type() == BRT_RECTIMAGE ||
869  blob_type() == BRT_POLYIMAGE) {
870  median_top_ = bounding_box_.top();
871  median_bottom_ = bounding_box_.bottom();
872  median_size_ = bounding_box_.height();
873  median_left_ = bounding_box_.left();
874  median_right_ = bounding_box_.right();
875  median_width_ = bounding_box_.width();
876  } else {
877  STATS top_stats(bounding_box_.bottom(), bounding_box_.top() + 1);
878  STATS bottom_stats(bounding_box_.bottom(), bounding_box_.top() + 1);
879  STATS size_stats(0, bounding_box_.height() + 1);
880  STATS left_stats(bounding_box_.left(), bounding_box_.right() + 1);
881  STATS right_stats(bounding_box_.left(), bounding_box_.right() + 1);
882  STATS width_stats(0, bounding_box_.width() + 1);
883  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
884  bbox = it.data();
885  if (non_leader_count == 0 || bbox->flow() != BTFT_LEADER) {
886  TBOX box = bbox->bounding_box();
887  int area = box.area();
888  top_stats.add(box.top(), area);
889  bottom_stats.add(box.bottom(), area);
890  size_stats.add(box.height(), area);
891  left_stats.add(box.left(), area);
892  right_stats.add(box.right(), area);
893  width_stats.add(box.width(), area);
894  }
895  }
896  median_top_ = static_cast<int>(top_stats.median() + 0.5);
897  median_bottom_ = static_cast<int>(bottom_stats.median() + 0.5);
898  median_size_ = static_cast<int>(size_stats.median() + 0.5);
899  median_left_ = static_cast<int>(left_stats.median() + 0.5);
900  median_right_ = static_cast<int>(right_stats.median() + 0.5);
901  median_width_ = static_cast<int>(width_stats.median() + 0.5);
902  }
903 
904  if (right_margin_ < bounding_box_.right() && textord_debug_bugs) {
905  tprintf("Made partition with bad right coords");
906  Print();
907  }
908  if (left_margin_ > bounding_box_.left() && textord_debug_bugs) {
909  tprintf("Made partition with bad left coords");
910  Print();
911  }
912  // Fix partner lists. The bounding box has changed and partners are stored
913  // in bounding box order, so remove and reinsert this as a partner
914  // of all its partners.
915  for (int upper = 0; upper < 2; ++upper) {
916  ColPartition_CLIST partners;
917  ColPartition_C_IT part_it(&partners);
918  part_it.add_list_after(upper ? &upper_partners_ : &lower_partners_);
919  for (part_it.move_to_first(); !part_it.empty(); part_it.forward()) {
920  ColPartition* partner = part_it.extract();
921  partner->RemovePartner(!upper, this);
922  partner->AddPartner(!upper, this);
923  }
924  }
925  if (TabFind::WithinTestRegion(2, bounding_box_.left(),
926  bounding_box_.bottom())) {
927  tprintf("Recomputed box for partition %p\n", this);
928  Print();
929  }
930 }
void set_right(int x)
Definition: rect.h:77
inT32 area() const
Definition: rect.h:111
const TBOX & bounding_box() const
Definition: blobbox.h:208
#define NULL
Definition: host.h:144
inT16 left() const
Definition: rect.h:67
inT16 width() const
Definition: rect.h:104
Definition: rect.h:29
inT16 right() const
Definition: rect.h:74
int textord_debug_bugs
Definition: alignedblob.cpp:29
inT16 top() const
Definition: rect.h:53
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:41
Definition: statistc.h:29
bool IsImageType() const
Definition: colpartition.h:423
BlobTextFlowType flow() const
Definition: blobbox.h:272
void set_bottom(int y)
Definition: rect.h:63
BlobRegionType blob_type() const
Definition: colpartition.h:148
static bool WithinTestRegion(int detail_level, int x, int y)
void set_top(int y)
Definition: rect.h:56
void set_left(int x)
Definition: rect.h:70
inT16 height() const
Definition: rect.h:97
int BoxRightKey() const
Definition: colpartition.h:336
inT16 bottom() const
Definition: rect.h:60
void tesseract::ColPartition::ComputeSpecialBlobsDensity ( )

Definition at line 561 of file colpartition.cpp.

561  {
562  memset(special_blobs_densities_, 0, sizeof(special_blobs_densities_));
563  if (boxes_.empty()) {
564  return;
565  }
566 
567  BLOBNBOX_C_IT blob_it(&boxes_);
568  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
569  BLOBNBOX* blob = blob_it.data();
571  special_blobs_densities_[type]++;
572  }
573 
574  for (int type = 0; type < BSTT_COUNT; ++type) {
575  special_blobs_densities_[type] /= boxes_.length();
576  }
577 }
BlobSpecialTextType special_text_type() const
Definition: blobbox.h:266
BlobSpecialTextType
Definition: blobbox.h:81
PolyBlockType type() const
Definition: colpartition.h:181
bool tesseract::ColPartition::ConfirmNoTabViolation ( const ColPartition other) const

Definition at line 392 of file colpartition.cpp.

392  {
393  if (bounding_box_.right() < other.bounding_box_.left() &&
394  bounding_box_.right() < other.LeftBlobRule())
395  return false;
396  if (other.bounding_box_.right() < bounding_box_.left() &&
397  other.bounding_box_.right() < LeftBlobRule())
398  return false;
399  if (bounding_box_.left() > other.bounding_box_.right() &&
400  bounding_box_.left() > other.RightBlobRule())
401  return false;
402  if (other.bounding_box_.left() > bounding_box_.right() &&
403  other.bounding_box_.left() > RightBlobRule())
404  return false;
405  return true;
406 }
inT16 left() const
Definition: rect.h:67
inT16 right() const
Definition: rect.h:74
ColPartition * tesseract::ColPartition::CopyButDontOwnBlobs ( )

Definition at line 1711 of file colpartition.cpp.

1711  {
1712  ColPartition* copy = ShallowCopy();
1713  copy->set_owns_blobs(false);
1714  BLOBNBOX_C_IT inserter(copy->boxes());
1715  BLOBNBOX_C_IT traverser(boxes());
1716  for (traverser.mark_cycle_pt(); !traverser.cycled_list(); traverser.forward())
1717  inserter.add_after_then_move(traverser.data());
1718  return copy;
1719 }
BLOBNBOX_CLIST * boxes()
Definition: colpartition.h:187
ColPartition * ShallowCopy() const
void tesseract::ColPartition::CopyLeftTab ( const ColPartition src,
bool  take_box 
)

Definition at line 498 of file colpartition.cpp.

498  {
499  left_key_tab_ = take_box ? false : src.left_key_tab_;
500  if (left_key_tab_) {
501  left_key_ = src.left_key_;
502  } else {
503  bounding_box_.set_left(XAtY(src.BoxLeftKey(), MidY()));
504  left_key_ = BoxLeftKey();
505  }
506  if (left_margin_ > bounding_box_.left())
507  left_margin_ = src.left_margin_;
508 }
inT16 left() const
Definition: rect.h:67
void set_left(int x)
Definition: rect.h:70
int XAtY(int sort_key, int y) const
Definition: colpartition.h:320
void tesseract::ColPartition::CopyRightTab ( const ColPartition src,
bool  take_box 
)

Definition at line 511 of file colpartition.cpp.

511  {
512  right_key_tab_ = take_box ? false : src.right_key_tab_;
513  if (right_key_tab_) {
514  right_key_ = src.right_key_;
515  } else {
516  bounding_box_.set_right(XAtY(src.BoxRightKey(), MidY()));
517  right_key_ = BoxRightKey();
518  }
519  if (right_margin_ < bounding_box_.right())
520  right_margin_ = src.right_margin_;
521 }
void set_right(int x)
Definition: rect.h:77
inT16 right() const
Definition: rect.h:74
int BoxRightKey() const
Definition: colpartition.h:336
int XAtY(int sort_key, int y) const
Definition: colpartition.h:320
int tesseract::ColPartition::CountOverlappingBoxes ( const TBOX box)

Definition at line 933 of file colpartition.cpp.

933  {
934  BLOBNBOX_C_IT it(&boxes_);
935  int overlap_count = 0;
936  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
937  BLOBNBOX* bbox = it.data();
938  if (box.overlap(bbox->bounding_box()))
939  ++overlap_count;
940  }
941  return overlap_count;
942 }
const TBOX & bounding_box() const
Definition: blobbox.h:208
bool overlap(const TBOX &box) const
Definition: rect.h:345
void tesseract::ColPartition::DeleteBoxes ( )

Definition at line 284 of file colpartition.cpp.

284  {
285  // Although the boxes_ list is a C_LIST, in some cases it owns the
286  // BLOBNBOXes, as the ColPartition takes ownership from the grid,
287  // and the BLOBNBOXes own the underlying C_BLOBs.
288  for (BLOBNBOX_C_IT bb_it(&boxes_); !bb_it.empty(); bb_it.forward()) {
289  BLOBNBOX* bblob = bb_it.extract();
290  delete bblob->cblob();
291  delete bblob;
292  }
293 }
C_BLOB * cblob() const
Definition: blobbox.h:245
bool tesseract::ColPartition::desperately_merged ( ) const
inline

Definition at line 211 of file colpartition.h.

211  {
212  return desperately_merged_;
213  }
void tesseract::ColPartition::DisownBoxes ( )

Definition at line 274 of file colpartition.cpp.

274  {
275  BLOBNBOX_C_IT bb_it(&boxes_);
276  for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
277  BLOBNBOX* bblob = bb_it.data();
278  ASSERT_HOST(bblob->owner() == this || bblob->owner() == NULL);
279  bblob->set_owner(NULL);
280  }
281 }
void set_owner(tesseract::ColPartition *new_owner)
Definition: blobbox.h:332
#define NULL
Definition: host.h:144
tesseract::ColPartition * owner() const
Definition: blobbox.h:329
#define ASSERT_HOST(x)
Definition: errcode.h:84
ColPartition * tesseract::ColPartition::FakePartition ( const TBOX box,
PolyBlockType  block_type,
BlobRegionType  blob_type,
BlobTextFlowType  flow 
)
static

Definition at line 108 of file colpartition.cpp.

111  {
112  ColPartition* part = new ColPartition(blob_type, ICOORD(0, 1));
113  part->set_type(block_type);
114  part->set_flow(flow);
115  part->AddBox(new BLOBNBOX(C_BLOB::FakeBlob(box)));
116  part->set_left_margin(box.left());
117  part->set_right_margin(box.right());
118  part->SetBlobTypes();
119  part->ComputeLimits();
120  part->ClaimBoxes();
121  return part;
122 }
inT16 left() const
Definition: rect.h:67
inT16 right() const
Definition: rect.h:74
static C_BLOB * FakeBlob(const TBOX &box)
Definition: stepblob.cpp:180
integer coordinate
Definition: points.h:30
BlobTextFlowType flow() const
Definition: colpartition.h:154
BlobRegionType blob_type() const
Definition: colpartition.h:148
BlobTextFlowType tesseract::ColPartition::flow ( ) const
inline

Definition at line 154 of file colpartition.h.

154  {
155  return flow_;
156  }
int tesseract::ColPartition::good_blob_score ( ) const
inline

Definition at line 160 of file colpartition.h.

160  {
161  return good_blob_score_;
162  }
bool tesseract::ColPartition::good_column ( ) const
inline

Definition at line 166 of file colpartition.h.

166  {
167  return good_column_;
168  }
bool tesseract::ColPartition::good_width ( ) const
inline

Definition at line 163 of file colpartition.h.

163  {
164  return good_width_;
165  }
bool tesseract::ColPartition::HasGoodBaseline ( )

Definition at line 1248 of file colpartition.cpp.

1248  {
1249  // Approximation of the baseline.
1250  DetLineFit linepoints;
1251  // Calculation of the mean height on this line segment. Note that these
1252  // variable names apply to the context of a horizontal line, and work
1253  // analogously, rather than literally in the case of a vertical line.
1254  int total_height = 0;
1255  int coverage = 0;
1256  int height_count = 0;
1257  int width = 0;
1258  BLOBNBOX_C_IT it(&boxes_);
1259  TBOX box(it.data()->bounding_box());
1260  // Accumulate points representing the baseline at the middle of each blob,
1261  // but add an additional point for each end of the line. This makes it
1262  // harder to fit a severe skew angle, as it is most likely not right.
1263  if (IsVerticalType()) {
1264  // For a vertical line, use the right side as the baseline.
1265  ICOORD first_pt(box.right(), box.bottom());
1266  // Use the bottom-right of the first (bottom) box, the top-right of the
1267  // last, and the middle-right of all others.
1268  linepoints.Add(first_pt);
1269  for (it.forward(); !it.at_last(); it.forward()) {
1270  BLOBNBOX* blob = it.data();
1271  box = blob->bounding_box();
1272  ICOORD box_pt(box.right(), (box.top() + box.bottom()) / 2);
1273  linepoints.Add(box_pt);
1274  total_height += box.width();
1275  coverage += box.height();
1276  ++height_count;
1277  }
1278  box = it.data()->bounding_box();
1279  ICOORD last_pt(box.right(), box.top());
1280  linepoints.Add(last_pt);
1281  width = last_pt.y() - first_pt.y();
1282 
1283  } else {
1284  // Horizontal lines use the bottom as the baseline.
1285  TBOX box(it.data()->bounding_box());
1286  // Use the bottom-left of the first box, the the bottom-right of the last,
1287  // and the middle of all others.
1288  ICOORD first_pt(box.left(), box.bottom());
1289  linepoints.Add(first_pt);
1290  for (it.forward(); !it.at_last(); it.forward()) {
1291  BLOBNBOX* blob = it.data();
1292  box = blob->bounding_box();
1293  ICOORD box_pt((box.left() + box.right()) / 2, box.bottom());
1294  linepoints.Add(box_pt);
1295  total_height += box.height();
1296  coverage += box.width();
1297  ++height_count;
1298  }
1299  box = it.data()->bounding_box();
1300  ICOORD last_pt(box.right(), box.bottom());
1301  linepoints.Add(last_pt);
1302  width = last_pt.x() - first_pt.x();
1303  }
1304  // Maximum median error allowed to be a good text line.
1305  double max_error = kMaxBaselineError * total_height / height_count;
1306  ICOORD start_pt, end_pt;
1307  double error = linepoints.Fit(&start_pt, &end_pt);
1308  return error < max_error && coverage >= kMinBaselineCoverage * width;
1309 }
inT16 x() const
access function
Definition: points.h:52
const TBOX & bounding_box() const
Definition: blobbox.h:208
const double kMinBaselineCoverage
Definition: rect.h:29
inT16 y() const
access_function
Definition: points.h:56
integer coordinate
Definition: points.h:30
bool IsVerticalType() const
Definition: colpartition.h:431
const double kMaxBaselineError
int tesseract::ColPartition::HCoreOverlap ( const ColPartition other) const
inline

Definition at line 381 of file colpartition.h.

381  {
382  return MIN(median_right_, other.median_right_) -
383  MAX(median_left_, other.median_left_);
384  }
#define MIN(x, y)
Definition: ndminx.h:28
#define MAX(x, y)
Definition: ndminx.h:24
bool tesseract::ColPartition::HOverlaps ( const ColPartition other) const
inline

Definition at line 365 of file colpartition.h.

365  {
366  return bounding_box_.x_overlap(other.bounding_box_);
367  }
bool x_overlap(const TBOX &box) const
Definition: rect.h:391
bool tesseract::ColPartition::inside_table_column ( )
inline

Definition at line 243 of file colpartition.h.

243  {
244  return inside_table_column_;
245  }
bool tesseract::ColPartition::IsEmpty ( ) const
inline

Definition at line 357 of file colpartition.h.

357  {
358  return boxes_.empty();
359  }
bool tesseract::ColPartition::IsHorizontalLine ( ) const
inline

Definition at line 449 of file colpartition.h.

449  {
450  return IsHorizontalType() && IsLineType();
451  }
bool IsLineType() const
Definition: colpartition.h:419
bool IsHorizontalType() const
Definition: colpartition.h:435
bool tesseract::ColPartition::IsHorizontalType ( ) const
inline

Definition at line 435 of file colpartition.h.

435  {
436  return blob_type_ == BRT_TEXT || blob_type_ == BRT_HLINE;
437  }
bool tesseract::ColPartition::IsImageType ( ) const
inline

Definition at line 423 of file colpartition.h.

423  {
424  return PTIsImageType(type_);
425  }
bool PTIsImageType(PolyBlockType type)
Definition: publictypes.h:65
bool tesseract::ColPartition::IsInSameColumnAs ( const ColPartition part) const

Definition at line 2128 of file colpartition.cpp.

2128  {
2129  // Overlap does not occur when last < part.first or first > part.last.
2130  // In other words, one is completely to the side of the other.
2131  // This is just DeMorgan's law applied to that so the function returns true.
2132  return (last_column_ >= part.first_column_) &&
2133  (first_column_ <= part.last_column_);
2134 }
bool tesseract::ColPartition::IsLeftOf ( const ColPartition other) const
inline

Definition at line 349 of file colpartition.h.

349  {
350  return bounding_box_.right() < other.bounding_box_.right();
351  }
inT16 right() const
Definition: rect.h:74
bool tesseract::ColPartition::IsLegal ( )

Definition at line 321 of file colpartition.cpp.

321  {
322  if (bounding_box_.left() > bounding_box_.right()) {
323  if (textord_debug_bugs) {
324  tprintf("Bounding box invalid\n");
325  Print();
326  }
327  return false; // Bounding box invalid.
328  }
329  if (left_margin_ > bounding_box_.left() ||
330  right_margin_ < bounding_box_.right()) {
331  if (textord_debug_bugs) {
332  tprintf("Margins invalid\n");
333  Print();
334  }
335  return false; // Margins invalid.
336  }
337  if (left_key_ > BoxLeftKey() || right_key_ < BoxRightKey()) {
338  if (textord_debug_bugs) {
339  tprintf("Key inside box: %d v %d or %d v %d\n",
340  left_key_, BoxLeftKey(), right_key_, BoxRightKey());
341  Print();
342  }
343  return false; // Keys inside the box.
344  }
345  return true;
346 }
inT16 left() const
Definition: rect.h:67
inT16 right() const
Definition: rect.h:74
int textord_debug_bugs
Definition: alignedblob.cpp:29
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:41
int BoxRightKey() const
Definition: colpartition.h:336
bool tesseract::ColPartition::IsLineType ( ) const
inline

Definition at line 419 of file colpartition.h.

419  {
420  return PTIsLineType(type_);
421  }
bool PTIsLineType(PolyBlockType type)
Definition: publictypes.h:61
bool tesseract::ColPartition::IsSingleton ( ) const
inline

Definition at line 361 of file colpartition.h.

361  {
362  return boxes_.singleton();
363  }
bool tesseract::ColPartition::IsTextType ( ) const
inline

Definition at line 427 of file colpartition.h.

427  {
428  return PTIsTextType(type_);
429  }
bool PTIsTextType(PolyBlockType type)
Definition: publictypes.h:70
bool tesseract::ColPartition::IsUnMergeableType ( ) const
inline

Definition at line 439 of file colpartition.h.

439  {
440  return BLOBNBOX::UnMergeableType(blob_type_) || type_ == PT_NOISE;
441  }
static bool UnMergeableType(BlobRegionType type)
Definition: blobbox.h:403
Definition: capi.h:64
bool tesseract::ColPartition::IsVerticalLine ( ) const
inline

Definition at line 444 of file colpartition.h.

444  {
445  return IsVerticalType() && IsLineType();
446  }
bool IsLineType() const
Definition: colpartition.h:419
bool IsVerticalType() const
Definition: colpartition.h:431
bool tesseract::ColPartition::IsVerticalType ( ) const
inline

Definition at line 431 of file colpartition.h.

431  {
432  return blob_type_ == BRT_VERT_TEXT || blob_type_ == BRT_VLINE;
433  }
int tesseract::ColPartition::KeyWidth ( int  left_key,
int  right_key 
) const
inline

Definition at line 324 of file colpartition.h.

324  {
325  return (right_key - left_key) / vertical_.y();
326  }
inT16 y() const
access_function
Definition: points.h:56
int tesseract::ColPartition::left_key ( ) const
inline

Definition at line 172 of file colpartition.h.

172  {
173  return left_key_;
174  }
bool tesseract::ColPartition::left_key_tab ( ) const
inline

Definition at line 169 of file colpartition.h.

169  {
170  return left_key_tab_;
171  }
int tesseract::ColPartition::left_margin ( ) const
inline

Definition at line 112 of file colpartition.h.

112  {
113  return left_margin_;
114  }
int tesseract::ColPartition::LeftAtY ( int  y) const
inline

Definition at line 340 of file colpartition.h.

340  {
341  return XAtY(left_key_, y);
342  }
int XAtY(int sort_key, int y) const
Definition: colpartition.h:320
int tesseract::ColPartition::LeftBlobRule ( ) const

Definition at line 524 of file colpartition.cpp.

524  {
525  BLOBNBOX_C_IT it(const_cast<BLOBNBOX_CLIST*>(&boxes_));
526  return it.data()->left_rule();
527 }
void tesseract::ColPartition::LineSpacingBlocks ( const ICOORD bleft,
const ICOORD tright,
int  resolution,
ColPartition_LIST *  block_parts,
ColPartition_LIST *  used_parts,
BLOCK_LIST *  completed_blocks,
TO_BLOCK_LIST *  to_blocks 
)
static

Definition at line 1373 of file colpartition.cpp.

1378  {
1379  int page_height = tright.y() - bleft.y();
1380  // Compute the initial spacing stats.
1381  ColPartition_IT it(block_parts);
1382  int part_count = 0;
1383  int max_line_height = 0;
1384 
1385  // TODO(joeliu): We should add some special logic for PT_INLINE_EQUATION type
1386  // because their line spacing with their neighbors maybe smaller and their
1387  // height may be slightly larger.
1388 
1389  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1390  ColPartition* part = it.data();
1391  ASSERT_HOST(!part->boxes()->empty());
1392  STATS side_steps(0, part->bounding_box().height());
1393  if (part->bounding_box().height() > max_line_height)
1394  max_line_height = part->bounding_box().height();
1395  BLOBNBOX_C_IT blob_it(part->boxes());
1396  int prev_bottom = blob_it.data()->bounding_box().bottom();
1397  for (blob_it.forward(); !blob_it.at_first(); blob_it.forward()) {
1398  BLOBNBOX* blob = blob_it.data();
1399  int bottom = blob->bounding_box().bottom();
1400  int step = bottom - prev_bottom;
1401  if (step < 0)
1402  step = -step;
1403  side_steps.add(step, 1);
1404  prev_bottom = bottom;
1405  }
1406  part->set_side_step(static_cast<int>(side_steps.median() + 0.5));
1407  if (!it.at_last()) {
1408  ColPartition* next_part = it.data_relative(1);
1409  part->set_bottom_spacing(part->median_bottom() -
1410  next_part->median_bottom());
1411  part->set_top_spacing(part->median_top() - next_part->median_top());
1412  } else {
1413  part->set_bottom_spacing(page_height);
1414  part->set_top_spacing(page_height);
1415  }
1416  if (textord_debug_tabfind) {
1417  part->Print();
1418  tprintf("side step = %.2f, top spacing = %d, bottom spacing=%d\n",
1419  side_steps.median(), part->top_spacing(), part->bottom_spacing());
1420  }
1421  ++part_count;
1422  }
1423  if (part_count == 0)
1424  return;
1425 
1426  SmoothSpacings(resolution, page_height, block_parts);
1427 
1428  // Move the partitions into individual block lists and make the blocks.
1429  BLOCK_IT block_it(completed_blocks);
1430  TO_BLOCK_IT to_block_it(to_blocks);
1431  ColPartition_LIST spacing_parts;
1432  ColPartition_IT sp_block_it(&spacing_parts);
1433  int same_block_threshold = max_line_height * kMaxSameBlockLineSpacing;
1434  for (it.mark_cycle_pt(); !it.empty();) {
1435  ColPartition* part = it.extract();
1436  sp_block_it.add_to_end(part);
1437  it.forward();
1438  if (it.empty() || part->bottom_spacing() > same_block_threshold ||
1439  !part->SpacingsEqual(*it.data(), resolution)) {
1440  // There is a spacing boundary. Check to see if it.data() belongs
1441  // better in the current block or the next one.
1442  if (!it.empty() && part->bottom_spacing() <= same_block_threshold) {
1443  ColPartition* next_part = it.data();
1444  // If there is a size match one-way, then the middle line goes with
1445  // its matched size, otherwise it goes with the smallest spacing.
1446  ColPartition* third_part = it.at_last() ? NULL : it.data_relative(1);
1447  if (textord_debug_tabfind) {
1448  tprintf("Spacings unequal: upper:%d/%d, lower:%d/%d,"
1449  " sizes %d %d %d\n",
1450  part->top_spacing(), part->bottom_spacing(),
1451  next_part->top_spacing(), next_part->bottom_spacing(),
1452  part->median_size(), next_part->median_size(),
1453  third_part != NULL ? third_part->median_size() : 0);
1454  }
1455  // We can only consider adding the next line to the block if the sizes
1456  // match and the lines are close enough for their size.
1457  if (part->SizesSimilar(*next_part) &&
1458  next_part->median_size() * kMaxSameBlockLineSpacing >
1459  part->bottom_spacing() &&
1460  part->median_size() * kMaxSameBlockLineSpacing >
1461  part->top_spacing()) {
1462  // Even now, we can only add it as long as the third line doesn't
1463  // match in the same way and have a smaller bottom spacing.
1464  if (third_part == NULL ||
1465  !next_part->SizesSimilar(*third_part) ||
1466  third_part->median_size() * kMaxSameBlockLineSpacing <=
1467  next_part->bottom_spacing() ||
1468  next_part->median_size() * kMaxSameBlockLineSpacing <=
1469  next_part->top_spacing() ||
1470  next_part->bottom_spacing() > part->bottom_spacing()) {
1471  // Add to the current block.
1472  sp_block_it.add_to_end(it.extract());
1473  it.forward();
1474  if (textord_debug_tabfind) {
1475  tprintf("Added line to current block.\n");
1476  }
1477  }
1478  }
1479  }
1480  TO_BLOCK* to_block = MakeBlock(bleft, tright, &spacing_parts, used_parts);
1481  if (to_block != NULL) {
1482  to_block_it.add_to_end(to_block);
1483  block_it.add_to_end(to_block->block);
1484  }
1485  sp_block_it.set_to_list(&spacing_parts);
1486  } else {
1487  if (textord_debug_tabfind && !it.empty()) {
1488  ColPartition* next_part = it.data();
1489  tprintf("Spacings equal: upper:%d/%d, lower:%d/%d\n",
1490  part->top_spacing(), part->bottom_spacing(),
1491  next_part->top_spacing(), next_part->bottom_spacing(),
1492  part->median_size(), next_part->median_size());
1493  }
1494  }
1495  }
1496 }
const double kMaxSameBlockLineSpacing
const TBOX & bounding_box() const
Definition: blobbox.h:208
#define NULL
Definition: host.h:144
int textord_debug_tabfind
Definition: alignedblob.cpp:28
inT16 y() const
access_function
Definition: points.h:56
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:41
Definition: statistc.h:29
#define ASSERT_HOST(x)
Definition: errcode.h:84
BLOCK * block
Definition: blobbox.h:740
static TO_BLOCK * MakeBlock(const ICOORD &bleft, const ICOORD &tright, ColPartition_LIST *block_parts, ColPartition_LIST *used_parts)
inT16 bottom() const
Definition: rect.h:60
ColPartition_CLIST* tesseract::ColPartition::lower_partners ( )
inline

Definition at line 199 of file colpartition.h.

199  {
200  return &lower_partners_;
201  }
ColPartition * tesseract::ColPartition::MakeBigPartition ( BLOBNBOX box,
ColPartition_LIST *  big_part_list 
)
static

Definition at line 129 of file colpartition.cpp.

130  {
131  box->set_owner(NULL);
132  ColPartition* single = new ColPartition(BRT_UNKNOWN, ICOORD(0, 1));
133  single->set_flow(BTFT_NONE);
134  single->AddBox(box);
135  single->ComputeLimits();
136  single->ClaimBoxes();
137  single->SetBlobTypes();
138  single->set_block_owned(true);
139  if (big_part_list != NULL) {
140  ColPartition_IT part_it(big_part_list);
141  part_it.add_to_end(single);
142  }
143  return single;
144 }
void set_owner(tesseract::ColPartition *new_owner)
Definition: blobbox.h:332
#define NULL
Definition: host.h:144
integer coordinate
Definition: points.h:30
TO_BLOCK * tesseract::ColPartition::MakeBlock ( const ICOORD bleft,
const ICOORD tright,
ColPartition_LIST *  block_parts,
ColPartition_LIST *  used_parts 
)
static

Definition at line 1605 of file colpartition.cpp.

1607  {
1608  if (block_parts->empty())
1609  return NULL; // Nothing to do.
1610  ColPartition_IT it(block_parts);
1611  ColPartition* part = it.data();
1612  PolyBlockType type = part->type();
1613  if (type == PT_VERTICAL_TEXT)
1614  return MakeVerticalTextBlock(bleft, tright, block_parts, used_parts);
1615  // LineSpacingBlocks has handed us a collection of evenly spaced lines and
1616  // put the average spacing in each partition, so we can just take the
1617  // linespacing from the first partition.
1618  int line_spacing = part->bottom_spacing();
1619  if (line_spacing < part->median_size())
1620  line_spacing = part->bounding_box().height();
1621  ICOORDELT_LIST vertices;
1622  ICOORDELT_IT vert_it(&vertices);
1623  ICOORD start, end;
1624  int min_x = MAX_INT32;
1625  int max_x = -MAX_INT32;
1626  int min_y = MAX_INT32;
1627  int max_y = -MAX_INT32;
1628  int iteration = 0;
1629  do {
1630  if (iteration == 0)
1631  ColPartition::LeftEdgeRun(&it, &start, &end);
1632  else
1633  ColPartition::RightEdgeRun(&it, &start, &end);
1634  ClipCoord(bleft, tright, &start);
1635  ClipCoord(bleft, tright, &end);
1636  vert_it.add_after_then_move(new ICOORDELT(start));
1637  vert_it.add_after_then_move(new ICOORDELT(end));
1638  UpdateRange(start.x(), &min_x, &max_x);
1639  UpdateRange(end.x(), &min_x, &max_x);
1640  UpdateRange(start.y(), &min_y, &max_y);
1641  UpdateRange(end.y(), &min_y, &max_y);
1642  if ((iteration == 0 && it.at_first()) ||
1643  (iteration == 1 && it.at_last())) {
1644  ++iteration;
1645  it.move_to_last();
1646  }
1647  } while (iteration < 2);
1649  tprintf("Making block at (%d,%d)->(%d,%d)\n",
1650  min_x, min_y, max_x, max_y);
1651  BLOCK* block = new BLOCK("", true, 0, 0, min_x, min_y, max_x, max_y);
1652  block->set_poly_block(new POLY_BLOCK(&vertices, type));
1653  return MoveBlobsToBlock(false, line_spacing, block, block_parts, used_parts);
1654 }
int median_size() const
Definition: colpartition.h:136
inT16 x() const
access function
Definition: points.h:52
void set_poly_block(POLY_BLOCK *blk)
set the poly block
Definition: pdblock.h:66
#define NULL
Definition: host.h:144
int textord_debug_tabfind
Definition: alignedblob.cpp:28
static TO_BLOCK * MakeVerticalTextBlock(const ICOORD &bleft, const ICOORD &tright, ColPartition_LIST *block_parts, ColPartition_LIST *used_parts)
inT16 y() const
access_function
Definition: points.h:56
PolyBlockType
Definition: publictypes.h:41
#define MAX_INT32
Definition: host.h:120
Definition: ocrblock.h:31
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:41
integer coordinate
Definition: points.h:30
void UpdateRange(const T1 &x, T2 *lower_bound, T2 *upper_bound)
Definition: helpers.h:74
PolyBlockType type() const
Definition: colpartition.h:181
ColPartition * tesseract::ColPartition::MakeLinePartition ( BlobRegionType  blob_type,
const ICOORD vertical,
int  left,
int  bottom,
int  right,
int  top 
)
static

Constructs a fake ColPartition with no BLOBNBOXes to represent a horizontal or vertical line, given a type and a bounding box.

Definition at line 161 of file colpartition.cpp.

164  {
165  ColPartition* part = new ColPartition(blob_type, vertical);
166  part->bounding_box_ = TBOX(left, bottom, right, top);
167  part->median_bottom_ = bottom;
168  part->median_top_ = top;
169  part->median_size_ = top - bottom;
170  part->median_width_ = right - left;
171  part->left_key_ = part->BoxLeftKey();
172  part->right_key_ = part->BoxRightKey();
173  return part;
174 }
Definition: rect.h:29
BlobRegionType blob_type() const
Definition: colpartition.h:148
TO_BLOCK * tesseract::ColPartition::MakeVerticalTextBlock ( const ICOORD bleft,
const ICOORD tright,
ColPartition_LIST *  block_parts,
ColPartition_LIST *  used_parts 
)
static

Definition at line 1658 of file colpartition.cpp.

1661  {
1662  if (block_parts->empty())
1663  return NULL; // Nothing to do.
1664  ColPartition_IT it(block_parts);
1665  ColPartition* part = it.data();
1666  TBOX block_box = part->bounding_box();
1667  int line_spacing = block_box.width();
1668  PolyBlockType type = it.data()->type();
1669  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1670  block_box += it.data()->bounding_box();
1671  }
1672  if (textord_debug_tabfind) {
1673  tprintf("Making block at:");
1674  block_box.print();
1675  }
1676  BLOCK* block = new BLOCK("", true, 0, 0, block_box.left(), block_box.bottom(),
1677  block_box.right(), block_box.top());
1678  block->set_poly_block(new POLY_BLOCK(block_box, type));
1679  return MoveBlobsToBlock(true, line_spacing, block, block_parts, used_parts);
1680 }
void set_poly_block(POLY_BLOCK *blk)
set the poly block
Definition: pdblock.h:66
#define NULL
Definition: host.h:144
inT16 left() const
Definition: rect.h:67
inT16 width() const
Definition: rect.h:104
Definition: rect.h:29
int textord_debug_tabfind
Definition: alignedblob.cpp:28
inT16 right() const
Definition: rect.h:74
PolyBlockType
Definition: publictypes.h:41
Definition: ocrblock.h:31
inT16 top() const
Definition: rect.h:53
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:41
PolyBlockType type() const
Definition: colpartition.h:181
void print() const
Definition: rect.h:263
inT16 bottom() const
Definition: rect.h:60
bool tesseract::ColPartition::MarkAsLeaderIfMonospaced ( )

Definition at line 1054 of file colpartition.cpp.

1054  {
1055  bool result = false;
1056  // Gather statistics on the gaps between blobs and the widths of the blobs.
1057  int part_width = bounding_box_.width();
1058  STATS gap_stats(0, part_width);
1059  STATS width_stats(0, part_width);
1060  BLOBNBOX_C_IT it(&boxes_);
1061  BLOBNBOX* prev_blob = it.data();
1062  prev_blob->set_flow(BTFT_NEIGHBOURS);
1063  width_stats.add(prev_blob->bounding_box().width(), 1);
1064  int blob_count = 1;
1065  for (it.forward(); !it.at_first(); it.forward()) {
1066  BLOBNBOX* blob = it.data();
1067  int left = blob->bounding_box().left();
1068  int right = blob->bounding_box().right();
1069  gap_stats.add(left - prev_blob->bounding_box().right(), 1);
1070  width_stats.add(right - left, 1);
1071  blob->set_flow(BTFT_NEIGHBOURS);
1072  prev_blob = blob;
1073  ++blob_count;
1074  }
1075  double median_gap = gap_stats.median();
1076  double median_width = width_stats.median();
1077  double max_width = MAX(median_gap, median_width);
1078  double min_width = MIN(median_gap, median_width);
1079  double gap_iqr = gap_stats.ile(0.75f) - gap_stats.ile(0.25f);
1080  if (textord_debug_tabfind >= 4) {
1081  tprintf("gap iqr = %g, blob_count=%d, limits=%g,%g\n",
1082  gap_iqr, blob_count, max_width * kMaxLeaderGapFractionOfMax,
1083  min_width * kMaxLeaderGapFractionOfMin);
1084  }
1085  if (gap_iqr < max_width * kMaxLeaderGapFractionOfMax &&
1086  gap_iqr < min_width * kMaxLeaderGapFractionOfMin &&
1087  blob_count >= kMinLeaderCount) {
1088  // This is stable enough to be called a leader, so check the widths.
1089  // Since leader dashes can join, run a dp cutting algorithm and go
1090  // on the cost.
1091  int offset = static_cast<int>(ceil(gap_iqr * 2));
1092  int min_step = static_cast<int>(median_gap + median_width + 0.5);
1093  int max_step = min_step + offset;
1094  min_step -= offset;
1095  // Pad the buffer with min_step/2 on each end.
1096  int part_left = bounding_box_.left() - min_step / 2;
1097  part_width += min_step;
1098  DPPoint* projection = new DPPoint[part_width];
1099  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1100  BLOBNBOX* blob = it.data();
1101  int left = blob->bounding_box().left();
1102  int right = blob->bounding_box().right();
1103  int height = blob->bounding_box().height();
1104  for (int x = left; x < right; ++x) {
1105  projection[left - part_left].AddLocalCost(height);
1106  }
1107  }
1108  DPPoint* best_end = DPPoint::Solve(min_step, max_step, false,
1110  part_width, projection);
1111  if (best_end != NULL && best_end->total_cost() < blob_count) {
1112  // Good enough. Call it a leader.
1113  result = true;
1114  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1115  BLOBNBOX* blob = it.data();
1116  TBOX box = blob->bounding_box();
1117  // If the first or last blob is spaced too much, don't mark it.
1118  if (it.at_first()) {
1119  int gap = it.data_relative(1)->bounding_box().left() -
1120  blob->bounding_box().right();
1121  if (blob->bounding_box().width() + gap > max_step) {
1122  it.extract();
1123  continue;
1124  }
1125  }
1126  if (it.at_last()) {
1127  int gap = blob->bounding_box().left() -
1128  it.data_relative(-1)->bounding_box().right();
1129  if (blob->bounding_box().width() + gap > max_step) {
1130  it.extract();
1131  break;
1132  }
1133  }
1134  blob->set_region_type(BRT_TEXT);
1135  blob->set_flow(BTFT_LEADER);
1136  }
1137  blob_type_ = BRT_TEXT;
1138  flow_ = BTFT_LEADER;
1139  } else if (textord_debug_tabfind) {
1140  if (best_end == NULL) {
1141  tprintf("No path\n");
1142  } else {
1143  tprintf("Total cost = %d vs allowed %d\n",
1144  best_end->total_cost() < blob_count);
1145  }
1146  }
1147  delete [] projection;
1148  }
1149  return result;
1150 }
const int kMinLeaderCount
const TBOX & bounding_box() const
Definition: blobbox.h:208
#define NULL
Definition: host.h:144
inT16 left() const
Definition: rect.h:67
int median_width() const
Definition: colpartition.h:142
inT64 CostWithVariance(const DPPoint *prev)
Definition: dppoint.cpp:68
inT16 width() const
Definition: rect.h:104
Definition: rect.h:29
#define f(xc, yc)
Definition: imgscale.cpp:39
static DPPoint * Solve(int min_step, int max_step, bool debug, CostFunc cost_func, int size, DPPoint *points)
Definition: dppoint.cpp:30
int textord_debug_tabfind
Definition: alignedblob.cpp:28
inT16 right() const
Definition: rect.h:74
void set_flow(BlobTextFlowType value)
Definition: blobbox.h:275
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:41
const double kMaxLeaderGapFractionOfMin
Definition: statistc.h:29
#define MIN(x, y)
Definition: ndminx.h:28
const double kMaxLeaderGapFractionOfMax
#define MAX(x, y)
Definition: ndminx.h:24
inT16 height() const
Definition: rect.h:97
void set_region_type(BlobRegionType new_type)
Definition: blobbox.h:263
bool tesseract::ColPartition::MatchingColumns ( const ColPartition other) const

Definition at line 349 of file colpartition.cpp.

349  {
350  int y = (MidY() + other.MidY()) / 2;
351  if (!NearlyEqual(other.LeftAtY(y) / kColumnWidthFactor,
352  LeftAtY(y) / kColumnWidthFactor, 1))
353  return false;
354  if (!NearlyEqual(other.RightAtY(y) / kColumnWidthFactor,
355  RightAtY(y) / kColumnWidthFactor, 1))
356  return false;
357  return true;
358 }
bool NearlyEqual(T x, T y, T tolerance)
Definition: host.h:148
int RightAtY(int y) const
Definition: colpartition.h:344
int LeftAtY(int y) const
Definition: colpartition.h:340
const int kColumnWidthFactor
Definition: tabfind.h:51
bool tesseract::ColPartition::MatchingSizes ( const ColPartition other) const

Definition at line 384 of file colpartition.cpp.

384  {
385  if (blob_type_ == BRT_VERT_TEXT || other.blob_type_ == BRT_VERT_TEXT)
386  return !TabFind::DifferentSizes(median_width_, other.median_width_);
387  else
388  return !TabFind::DifferentSizes(median_size_, other.median_size_);
389 }
static bool DifferentSizes(int size1, int size2)
Definition: tabfind.cpp:432
bool tesseract::ColPartition::MatchingStrokeWidth ( const ColPartition other,
double  fractional_tolerance,
double  constant_tolerance 
) const

Definition at line 409 of file colpartition.cpp.

411  {
412  int match_count = 0;
413  int nonmatch_count = 0;
414  BLOBNBOX_C_IT box_it(const_cast<BLOBNBOX_CLIST*>(&boxes_));
415  BLOBNBOX_C_IT other_it(const_cast<BLOBNBOX_CLIST*>(&other.boxes_));
416  box_it.mark_cycle_pt();
417  other_it.mark_cycle_pt();
418  while (!box_it.cycled_list() && !other_it.cycled_list()) {
419  if (box_it.data()->MatchingStrokeWidth(*other_it.data(),
420  fractional_tolerance,
421  constant_tolerance))
422  ++match_count;
423  else
424  ++nonmatch_count;
425  box_it.forward();
426  other_it.forward();
427  }
428  return match_count > nonmatch_count;
429 }
bool tesseract::ColPartition::MatchingTextColor ( const ColPartition other) const

Definition at line 361 of file colpartition.cpp.

361  {
362  if (color1_[L_ALPHA_CHANNEL] > kMaxRMSColorNoise &&
363  other.color1_[L_ALPHA_CHANNEL] > kMaxRMSColorNoise)
364  return false; // Too noisy.
365 
366  // Colors must match for other to count.
367  double d_this1_o = ImageFind::ColorDistanceFromLine(other.color1_,
368  other.color2_,
369  color1_);
370  double d_this2_o = ImageFind::ColorDistanceFromLine(other.color1_,
371  other.color2_,
372  color2_);
373  double d_o1_this = ImageFind::ColorDistanceFromLine(color1_, color2_,
374  other.color1_);
375  double d_o2_this = ImageFind::ColorDistanceFromLine(color1_, color2_,
376  other.color2_);
377 // All 4 distances must be small enough.
378  return d_this1_o < kMaxColorDistance && d_this2_o < kMaxColorDistance &&
379  d_o1_this < kMaxColorDistance && d_o2_this < kMaxColorDistance;
380 }
static double ColorDistanceFromLine(const uinT8 *line1, const uinT8 *line2, const uinT8 *point)
Definition: imagefind.cpp:332
const int kMaxColorDistance
const int kMaxRMSColorNoise
int tesseract::ColPartition::median_bottom ( ) const
inline

Definition at line 127 of file colpartition.h.

127  {
128  return median_bottom_;
129  }
int tesseract::ColPartition::median_left ( ) const
inline

Definition at line 130 of file colpartition.h.

130  {
131  return median_left_;
132  }
int tesseract::ColPartition::median_right ( ) const
inline

Definition at line 133 of file colpartition.h.

133  {
134  return median_right_;
135  }
int tesseract::ColPartition::median_size ( ) const
inline

Definition at line 136 of file colpartition.h.

136  {
137  return median_size_;
138  }
int tesseract::ColPartition::median_top ( ) const
inline

Definition at line 124 of file colpartition.h.

124  {
125  return median_top_;
126  }
int tesseract::ColPartition::median_width ( ) const
inline

Definition at line 142 of file colpartition.h.

142  {
143  return median_width_;
144  }
int tesseract::ColPartition::MedianY ( ) const
inline

Definition at line 308 of file colpartition.h.

308  {
309  return (median_top_ + median_bottom_) / 2;
310  }
int tesseract::ColPartition::MidX ( ) const
inline

Definition at line 312 of file colpartition.h.

312  {
313  return (bounding_box_.left() + bounding_box_.right()) / 2;
314  }
inT16 left() const
Definition: rect.h:67
inT16 right() const
Definition: rect.h:74
int tesseract::ColPartition::MidY ( ) const
inline

Definition at line 304 of file colpartition.h.

304  {
305  return (bounding_box_.top() + bounding_box_.bottom()) / 2;
306  }
inT16 top() const
Definition: rect.h:53
inT16 bottom() const
Definition: rect.h:60
ColPartition* tesseract::ColPartition::nearest_neighbor_above ( ) const
inline

Definition at line 249 of file colpartition.h.

249  {
250  return nearest_neighbor_above_;
251  }
ColPartition* tesseract::ColPartition::nearest_neighbor_below ( ) const
inline

Definition at line 255 of file colpartition.h.

255  {
256  return nearest_neighbor_below_;
257  }
bool tesseract::ColPartition::OKDiacriticMerge ( const ColPartition candidate,
bool  debug 
) const

Definition at line 437 of file colpartition.cpp.

438  {
439  BLOBNBOX_C_IT it(const_cast<BLOBNBOX_CLIST*>(&boxes_));
440  int min_top = MAX_INT32;
441  int max_bottom = -MAX_INT32;
442  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
443  BLOBNBOX* blob = it.data();
444  if (!blob->IsDiacritic()) {
445  if (debug) {
446  tprintf("Blob is not a diacritic:");
447  blob->bounding_box().print();
448  }
449  return false; // All blobs must have diacritic bases.
450  }
451  if (blob->base_char_top() < min_top)
452  min_top = blob->base_char_top();
453  if (blob->base_char_bottom() > max_bottom)
454  max_bottom = blob->base_char_bottom();
455  }
456  // If the intersection of all vertical ranges of all base characters
457  // overlaps the median range of this, then it is OK.
458  bool result = min_top > candidate.median_bottom_ &&
459  max_bottom < candidate.median_top_;
460  if (debug) {
461  if (result)
462  tprintf("OKDiacritic!\n");
463  else
464  tprintf("y ranges don\'t overlap: %d-%d / %d-%d\n",
465  max_bottom, min_top, median_bottom_, median_top_);
466  }
467  return result;
468 }
const TBOX & bounding_box() const
Definition: blobbox.h:208
bool IsDiacritic() const
Definition: blobbox.h:357
int base_char_bottom() const
Definition: blobbox.h:363
#define MAX_INT32
Definition: host.h:120
int base_char_top() const
Definition: blobbox.h:360
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:41
void print() const
Definition: rect.h:263
bool tesseract::ColPartition::OKMergeOverlap ( const ColPartition merge1,
const ColPartition merge2,
int  ok_box_overlap,
bool  debug 
)

Definition at line 713 of file colpartition.cpp.

715  {
716  // Vertical partitions are not allowed to be involved.
717  if (IsVerticalType() || merge1.IsVerticalType() || merge2.IsVerticalType()) {
718  if (debug)
719  tprintf("Vertical partition\n");
720  return false;
721  }
722  // The merging partitions must strongly overlap each other.
723  if (!merge1.VSignificantCoreOverlap(merge2)) {
724  if (debug)
725  tprintf("Voverlap %d (%d)\n",
726  merge1.VCoreOverlap(merge2),
727  merge1.VSignificantCoreOverlap(merge2));
728  return false;
729  }
730  // The merged box must not overlap the median bounds of this.
731  TBOX merged_box(merge1.bounding_box());
732  merged_box += merge2.bounding_box();
733  if (merged_box.bottom() < median_top_ && merged_box.top() > median_bottom_ &&
734  merged_box.bottom() < bounding_box_.top() - ok_box_overlap &&
735  merged_box.top() > bounding_box_.bottom() + ok_box_overlap) {
736  if (debug)
737  tprintf("Excessive box overlap\n");
738  return false;
739  }
740  // Looks OK!
741  return true;
742 }
Definition: rect.h:29
inT16 top() const
Definition: rect.h:53
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:41
bool IsVerticalType() const
Definition: colpartition.h:431
inT16 bottom() const
Definition: rect.h:60
BLOBNBOX * tesseract::ColPartition::OverlapSplitBlob ( const TBOX box)

Definition at line 746 of file colpartition.cpp.

746  {
747  if (boxes_.empty() || boxes_.singleton())
748  return NULL;
749  BLOBNBOX_C_IT it(&boxes_);
750  TBOX left_box(it.data()->bounding_box());
751  for (it.forward(); !it.at_first(); it.forward()) {
752  BLOBNBOX* bbox = it.data();
753  left_box += bbox->bounding_box();
754  if (left_box.overlap(box))
755  return bbox;
756  }
757  return NULL;
758 }
const TBOX & bounding_box() const
Definition: blobbox.h:208
#define NULL
Definition: host.h:144
Definition: rect.h:29
bool tesseract::ColPartition::owns_blobs ( ) const
inline

Definition at line 291 of file colpartition.h.

291  {
292  return owns_blobs_;
293  }
PolyBlockType tesseract::ColPartition::PartitionType ( ColumnSpanningType  flow) const

Definition at line 978 of file colpartition.cpp.

978  {
979  if (flow == CST_NOISE) {
980  if (blob_type_ != BRT_HLINE && blob_type_ != BRT_VLINE &&
981  blob_type_ != BRT_RECTIMAGE && blob_type_ != BRT_VERT_TEXT)
982  return PT_NOISE;
983  flow = CST_FLOWING;
984  }
985 
986  switch (blob_type_) {
987  case BRT_NOISE:
988  return PT_NOISE;
989  case BRT_HLINE:
990  return PT_HORZ_LINE;
991  case BRT_VLINE:
992  return PT_VERT_LINE;
993  case BRT_RECTIMAGE:
994  case BRT_POLYIMAGE:
995  switch (flow) {
996  case CST_FLOWING:
997  return PT_FLOWING_IMAGE;
998  case CST_HEADING:
999  return PT_HEADING_IMAGE;
1000  case CST_PULLOUT:
1001  return PT_PULLOUT_IMAGE;
1002  default:
1003  ASSERT_HOST(!"Undefined flow type for image!");
1004  }
1005  break;
1006  case BRT_VERT_TEXT:
1007  return PT_VERTICAL_TEXT;
1008  case BRT_TEXT:
1009  case BRT_UNKNOWN:
1010  default:
1011  switch (flow) {
1012  case CST_FLOWING:
1013  return PT_FLOWING_TEXT;
1014  case CST_HEADING:
1015  return PT_HEADING_TEXT;
1016  case CST_PULLOUT:
1017  return PT_PULLOUT_TEXT;
1018  default:
1019  ASSERT_HOST(!"Undefined flow type for text!");
1020  }
1021  }
1022  ASSERT_HOST(!"Should never get here!");
1023  return PT_NOISE;
1024 }
Definition: capi.h:64
BlobTextFlowType flow() const
Definition: colpartition.h:154
#define ASSERT_HOST(x)
Definition: errcode.h:84
void tesseract::ColPartition::Print ( ) const

Definition at line 1735 of file colpartition.cpp.

1735  {
1736  int y = MidY();
1737  tprintf("ColPart:%c(M%d-%c%d-B%d/%d,%d/%d)->(%dB-%d%c-%dM/%d,%d/%d)"
1738  " w-ok=%d, v-ok=%d, type=%d%c%d, fc=%d, lc=%d, boxes=%d"
1739  " ts=%d bs=%d ls=%d rs=%d\n",
1740  boxes_.empty() ? 'E' : ' ',
1741  left_margin_, left_key_tab_ ? 'T' : 'B', LeftAtY(y),
1742  bounding_box_.left(), median_left_,
1743  bounding_box_.bottom(), median_bottom_,
1744  bounding_box_.right(), RightAtY(y), right_key_tab_ ? 'T' : 'B',
1745  right_margin_, median_right_, bounding_box_.top(), median_top_,
1746  good_width_, good_column_, type_,
1747  kBlobTypes[blob_type_], flow_,
1748  first_column_, last_column_, boxes_.length(),
1749  space_above_, space_below_, space_to_left_, space_to_right_);
1750 }
inT16 left() const
Definition: rect.h:67
inT16 right() const
Definition: rect.h:74
inT16 top() const
Definition: rect.h:53
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:41
int RightAtY(int y) const
Definition: colpartition.h:344
int LeftAtY(int y) const
Definition: colpartition.h:340
inT16 bottom() const
Definition: rect.h:60
void tesseract::ColPartition::PrintColors ( )

Definition at line 1753 of file colpartition.cpp.

1753  {
1754  tprintf("Colors:(%d, %d, %d)%d -> (%d, %d, %d)\n",
1755  color1_[COLOR_RED], color1_[COLOR_GREEN], color1_[COLOR_BLUE],
1756  color1_[L_ALPHA_CHANNEL],
1757  color2_[COLOR_RED], color2_[COLOR_GREEN], color2_[COLOR_BLUE]);
1758 }
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:41
void tesseract::ColPartition::RefinePartners ( PolyBlockType  type,
bool  get_desparate,
ColPartitionGrid grid 
)

Definition at line 1830 of file colpartition.cpp.

1831  {
1832  if (TypesSimilar(type_, type)) {
1833  RefinePartnersInternal(true, get_desperate, grid);
1834  RefinePartnersInternal(false, get_desperate, grid);
1835  } else if (type == PT_COUNT) {
1836  // This is the final pass. Make sure only the correctly typed
1837  // partners surivive, however many there are.
1838  RefinePartnersByType(true, &upper_partners_);
1839  RefinePartnersByType(false, &lower_partners_);
1840  // It is possible for a merge to have given a partition multiple
1841  // partners again, so the last resort is to use overlap which is
1842  // guaranteed to leave at most one partner left.
1843  if (!upper_partners_.empty() && !upper_partners_.singleton())
1844  RefinePartnersByOverlap(true, &upper_partners_);
1845  if (!lower_partners_.empty() && !lower_partners_.singleton())
1846  RefinePartnersByOverlap(false, &lower_partners_);
1847  }
1848 }
static bool TypesSimilar(PolyBlockType type1, PolyBlockType type2)
Definition: colpartition.h:412
PolyBlockType type() const
Definition: colpartition.h:181
Definition: capi.h:64
void tesseract::ColPartition::ReflectInYAxis ( )

Definition at line 299 of file colpartition.cpp.

299  {
300  ColPartition_CLIST reversed_boxes;
301  ColPartition_C_IT reversed_it(&reversed_boxes);
302  // Reverse the order of the boxes_.
303  BLOBNBOX_C_IT bb_it(&boxes_);
304  for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
305  reversed_it.add_before_then_move(bb_it.extract());
306  }
307  bb_it.add_list_after(&reversed_boxes);
308  ASSERT_HOST(!left_key_tab_ && !right_key_tab_);
309  int tmp = left_margin_;
310  left_margin_ = -right_margin_;
311  right_margin_ = -tmp;
312  ComputeLimits();
313 }
#define ASSERT_HOST(x)
Definition: errcode.h:84
void tesseract::ColPartition::RemoveBox ( BLOBNBOX box)

Definition at line 213 of file colpartition.cpp.

213  {
214  BLOBNBOX_C_IT bb_it(&boxes_);
215  for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
216  if (box == bb_it.data()) {
217  bb_it.extract();
218  ComputeLimits();
219  return;
220  }
221  }
222 }
void tesseract::ColPartition::RemovePartner ( bool  upper,
ColPartition partner 
)

Definition at line 597 of file colpartition.cpp.

597  {
598  ColPartition_C_IT it(upper ? &upper_partners_ : &lower_partners_);
599  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
600  if (it.data() == partner) {
601  it.extract();
602  break;
603  }
604  }
605 }
int tesseract::ColPartition::right_key ( ) const
inline

Definition at line 178 of file colpartition.h.

178  {
179  return right_key_;
180  }
bool tesseract::ColPartition::right_key_tab ( ) const
inline

Definition at line 175 of file colpartition.h.

175  {
176  return right_key_tab_;
177  }
int tesseract::ColPartition::right_margin ( ) const
inline

Definition at line 118 of file colpartition.h.

118  {
119  return right_margin_;
120  }
int tesseract::ColPartition::RightAtY ( int  y) const
inline

Definition at line 344 of file colpartition.h.

344  {
345  return XAtY(right_key_, y);
346  }
int XAtY(int sort_key, int y) const
Definition: colpartition.h:320
int tesseract::ColPartition::RightBlobRule ( ) const

Definition at line 529 of file colpartition.cpp.

529  {
530  BLOBNBOX_C_IT it(const_cast<BLOBNBOX_CLIST*>(&boxes_));
531  it.move_to_last();
532  return it.data()->right_rule();
533 }
void tesseract::ColPartition::set_blob_type ( BlobRegionType  t)
inline

Definition at line 151 of file colpartition.h.

151  {
152  blob_type_ = t;
153  }
void tesseract::ColPartition::set_block_owned ( bool  owned)
inline

Definition at line 208 of file colpartition.h.

208  {
209  block_owned_ = owned;
210  }
void tesseract::ColPartition::set_bottom_spacing ( int  spacing)
inline

Definition at line 223 of file colpartition.h.

223  {
224  bottom_spacing_ = spacing;
225  }
void tesseract::ColPartition::set_first_column ( int  column)
inline

Definition at line 690 of file colpartition.h.

690  {
691  first_column_ = column;
692  }
void tesseract::ColPartition::set_flow ( BlobTextFlowType  f)
inline

Definition at line 157 of file colpartition.h.

157  {
158  flow_ = f;
159  }
#define f(xc, yc)
Definition: imgscale.cpp:39
void tesseract::ColPartition::set_inside_table_column ( bool  val)
inline

Definition at line 246 of file colpartition.h.

246  {
247  inside_table_column_ = val;
248  }
void tesseract::ColPartition::set_last_column ( int  column)
inline

Definition at line 693 of file colpartition.h.

693  {
694  last_column_ = column;
695  }
void tesseract::ColPartition::set_left_margin ( int  margin)
inline

Definition at line 115 of file colpartition.h.

115  {
116  left_margin_ = margin;
117  }
void tesseract::ColPartition::set_median_size ( int  size)
inline

Definition at line 139 of file colpartition.h.

139  {
140  median_size_ = size;
141  }
void tesseract::ColPartition::set_median_width ( int  width)
inline

Definition at line 145 of file colpartition.h.

145  {
146  median_width_ = width;
147  }
void tesseract::ColPartition::set_nearest_neighbor_above ( ColPartition part)
inline

Definition at line 252 of file colpartition.h.

252  {
253  nearest_neighbor_above_ = part;
254  }
void tesseract::ColPartition::set_nearest_neighbor_below ( ColPartition part)
inline

Definition at line 258 of file colpartition.h.

258  {
259  nearest_neighbor_below_ = part;
260  }
void tesseract::ColPartition::set_owns_blobs ( bool  owns_blobs)
inline

Definition at line 294 of file colpartition.h.

294  {
295  // Do NOT change ownership flag when there are blobs in the list.
296  // Immediately set the ownership flag when creating copies.
297  ASSERT_HOST(boxes_.empty());
298  owns_blobs_ = owns_blobs;
299  }
bool owns_blobs() const
Definition: colpartition.h:291
#define ASSERT_HOST(x)
Definition: errcode.h:84
void tesseract::ColPartition::set_right_margin ( int  margin)
inline

Definition at line 121 of file colpartition.h.

121  {
122  right_margin_ = margin;
123  }
void tesseract::ColPartition::set_side_step ( int  step)
inline

Definition at line 217 of file colpartition.h.

217  {
218  side_step_ = step;
219  }
void tesseract::ColPartition::set_space_above ( int  space)
inline

Definition at line 264 of file colpartition.h.

264  {
265  space_above_ = space;
266  }
void tesseract::ColPartition::set_space_below ( int  space)
inline

Definition at line 270 of file colpartition.h.

270  {
271  space_below_ = space;
272  }
void tesseract::ColPartition::set_space_to_left ( int  space)
inline

Definition at line 276 of file colpartition.h.

276  {
277  space_to_left_ = space;
278  }
void tesseract::ColPartition::set_space_to_right ( int  space)
inline

Definition at line 282 of file colpartition.h.

282  {
283  space_to_right_ = space;
284  }
void tesseract::ColPartition::set_table_type ( )
inline

Definition at line 233 of file colpartition.h.

233  {
234  if (type_ != PT_TABLE) {
235  type_before_table_ = type_;
236  type_ = PT_TABLE;
237  }
238  }
Definition: capi.h:62
void tesseract::ColPartition::set_top_spacing ( int  spacing)
inline

Definition at line 229 of file colpartition.h.

229  {
230  top_spacing_ = spacing;
231  }
void tesseract::ColPartition::set_type ( PolyBlockType  t)
inline

Definition at line 184 of file colpartition.h.

184  {
185  type_ = t;
186  }
void tesseract::ColPartition::set_vertical ( const ICOORD v)
inline

Definition at line 193 of file colpartition.h.

193  {
194  vertical_ = v;
195  }
void tesseract::ColPartition::set_working_set ( WorkingPartSet working_set)
inline

Definition at line 202 of file colpartition.h.

202  {
203  working_set_ = working_set;
204  }
void tesseract::ColPartition::SetBlobTypes ( )

Definition at line 1233 of file colpartition.cpp.

1233  {
1234  if (!owns_blobs())
1235  return;
1236  BLOBNBOX_C_IT it(&boxes_);
1237  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1238  BLOBNBOX* blob = it.data();
1239  if (blob->flow() != BTFT_LEADER)
1240  blob->set_flow(flow_);
1241  blob->set_region_type(blob_type_);
1242  ASSERT_HOST(blob->owner() == NULL || blob->owner() == this);
1243  }
1244 }
bool owns_blobs() const
Definition: colpartition.h:291
#define NULL
Definition: host.h:144
tesseract::ColPartition * owner() const
Definition: blobbox.h:329
void set_flow(BlobTextFlowType value)
Definition: blobbox.h:275
BlobTextFlowType flow() const
Definition: blobbox.h:272
#define ASSERT_HOST(x)
Definition: errcode.h:84
void set_region_type(BlobRegionType new_type)
Definition: blobbox.h:263
void tesseract::ColPartition::SetColumnGoodness ( WidthCallback cb)

Definition at line 1041 of file colpartition.cpp.

1041  {
1042  int y = MidY();
1043  int width = RightAtY(y) - LeftAtY(y);
1044  good_width_ = cb->Run(width);
1045  good_column_ = blob_type_ == BRT_TEXT && left_key_tab_ && right_key_tab_;
1046 }
int RightAtY(int y) const
Definition: colpartition.h:344
int LeftAtY(int y) const
Definition: colpartition.h:340
void tesseract::ColPartition::SetLeftTab ( const TabVector tab_vector)

Definition at line 473 of file colpartition.cpp.

473  {
474  if (tab_vector != NULL) {
475  left_key_ = tab_vector->sort_key();
476  left_key_tab_ = left_key_ <= BoxLeftKey();
477  } else {
478  left_key_tab_ = false;
479  }
480  if (!left_key_tab_)
481  left_key_ = BoxLeftKey();
482 }
#define NULL
Definition: host.h:144
void tesseract::ColPartition::SetPartitionType ( int  resolution,
ColPartitionSet columns 
)

Definition at line 946 of file colpartition.cpp.

946  {
947  int first_spanned_col = -1;
948  ColumnSpanningType span_type =
949  columns->SpanningType(resolution,
950  bounding_box_.left(), bounding_box_.right(),
951  MidY(), left_margin_, right_margin_,
952  &first_column_, &last_column_,
953  &first_spanned_col);
954  column_set_ = columns;
955  if (first_column_ < last_column_ && span_type == CST_PULLOUT &&
956  !IsLineType()) {
957  // Unequal columns may indicate that the pullout spans one of the columns
958  // it lies in, so force it to be allocated to just that column.
959  if (first_spanned_col >= 0) {
960  first_column_ = first_spanned_col;
961  last_column_ = first_spanned_col;
962  } else {
963  if ((first_column_ & 1) == 0)
964  last_column_ = first_column_;
965  else if ((last_column_ & 1) == 0)
966  first_column_ = last_column_;
967  else
968  first_column_ = last_column_ = (first_column_ + last_column_) / 2;
969  }
970  }
971  type_ = PartitionType(span_type);
972 }
bool IsLineType() const
Definition: colpartition.h:419
inT16 left() const
Definition: rect.h:67
inT16 right() const
Definition: rect.h:74
PolyBlockType PartitionType(ColumnSpanningType flow) const
void tesseract::ColPartition::SetRegionAndFlowTypesFromProjectionValue ( int  value)

Definition at line 1159 of file colpartition.cpp.

1159  {
1160  int blob_count = 0; // Total # blobs.
1161  int good_blob_score_ = 0; // Total # good strokewidth neighbours.
1162  int noisy_count = 0; // Total # neighbours marked as noise.
1163  int hline_count = 0;
1164  int vline_count = 0;
1165  BLOBNBOX_C_IT it(&boxes_);
1166  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1167  BLOBNBOX* blob = it.data();
1168  ++blob_count;
1169  noisy_count += blob->NoisyNeighbours();
1170  good_blob_score_ += blob->GoodTextBlob();
1171  if (blob->region_type() == BRT_HLINE) ++hline_count;
1172  if (blob->region_type() == BRT_VLINE) ++vline_count;
1173  }
1174  flow_ = BTFT_NEIGHBOURS;
1175  blob_type_ = BRT_UNKNOWN;
1176  if (hline_count > vline_count) {
1177  flow_ = BTFT_NONE;
1178  blob_type_ = BRT_HLINE;
1179  } else if (vline_count > hline_count) {
1180  flow_ = BTFT_NONE;
1181  blob_type_ = BRT_VLINE;
1182  } else if (value < -1 || 1 < value) {
1183  int long_side;
1184  int short_side;
1185  if (value > 0) {
1186  long_side = bounding_box_.width();
1187  short_side = bounding_box_.height();
1188  blob_type_ = BRT_TEXT;
1189  } else {
1190  long_side = bounding_box_.height();
1191  short_side = bounding_box_.width();
1192  blob_type_ = BRT_VERT_TEXT;
1193  }
1194  // We will combine the old metrics using aspect ratio and blob counts
1195  // with the input value by allowing a strong indication to flip the
1196  // STRONG_CHAIN/CHAIN flow values.
1197  int strong_score = blob_count >= kHorzStrongTextlineCount ? 1 : 0;
1198  if (short_side > kHorzStrongTextlineHeight) ++strong_score;
1199  if (short_side * kHorzStrongTextlineAspect < long_side) ++strong_score;
1200  if (abs(value) >= kMinStrongTextValue)
1201  flow_ = BTFT_STRONG_CHAIN;
1202  else if (abs(value) >= kMinChainTextValue)
1203  flow_ = BTFT_CHAIN;
1204  else
1205  flow_ = BTFT_NEIGHBOURS;
1206  // Upgrade chain to strong chain if the other indicators are good
1207  if (flow_ == BTFT_CHAIN && strong_score == 3)
1208  flow_ = BTFT_STRONG_CHAIN;
1209  // Downgrade strong vertical text to chain if the indicators are bad.
1210  if (flow_ == BTFT_STRONG_CHAIN && value < 0 && strong_score < 2)
1211  flow_ = BTFT_CHAIN;
1212  }
1213  if (flow_ == BTFT_NEIGHBOURS) {
1214  // Check for noisy neighbours.
1215  if (noisy_count >= blob_count) {
1216  flow_ = BTFT_NONTEXT;
1217  blob_type_= BRT_NOISE;
1218  }
1219  }
1220  if (TabFind::WithinTestRegion(2, bounding_box_.left(),
1221  bounding_box_.bottom())) {
1222  tprintf("RegionFlowTypesFromProjectionValue count=%d, noisy=%d, score=%d,",
1223  blob_count, noisy_count, good_blob_score_);
1224  tprintf(" Projection value=%d, flow=%d, blob_type=%d\n",
1225  value, flow_, blob_type_);
1226  Print();
1227  }
1228  SetBlobTypes();
1229 }
int NoisyNeighbours() const
Definition: blobbox.cpp:228
BlobRegionType region_type() const
Definition: blobbox.h:260
int GoodTextBlob() const
Definition: blobbox.cpp:217
inT16 left() const
Definition: rect.h:67
const int kMinStrongTextValue
const int kHorzStrongTextlineAspect
inT16 width() const
Definition: rect.h:104
const int kMinChainTextValue
const int kHorzStrongTextlineCount
const int kHorzStrongTextlineHeight
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:41
static bool WithinTestRegion(int detail_level, int x, int y)
inT16 height() const
Definition: rect.h:97
inT16 bottom() const
Definition: rect.h:60
void tesseract::ColPartition::SetRightTab ( const TabVector tab_vector)

Definition at line 485 of file colpartition.cpp.

485  {
486  if (tab_vector != NULL) {
487  right_key_ = tab_vector->sort_key();
488  right_key_tab_ = right_key_ >= BoxRightKey();
489  } else {
490  right_key_tab_ = false;
491  }
492  if (!right_key_tab_)
493  right_key_ = BoxRightKey();
494 }
#define NULL
Definition: host.h:144
int BoxRightKey() const
Definition: colpartition.h:336
void tesseract::ColPartition::SetSpecialBlobsDensity ( const BlobSpecialTextType  type,
const float  density 
)

Definition at line 555 of file colpartition.cpp.

556  {
558  special_blobs_densities_[type] = density;
559 }
PolyBlockType type() const
Definition: colpartition.h:181
#define ASSERT_HOST(x)
Definition: errcode.h:84
ColPartition * tesseract::ColPartition::ShallowCopy ( ) const

Definition at line 1684 of file colpartition.cpp.

1684  {
1685  ColPartition* part = new ColPartition(blob_type_, vertical_);
1686  part->left_margin_ = left_margin_;
1687  part->right_margin_ = right_margin_;
1688  part->bounding_box_ = bounding_box_;
1689  memcpy(part->special_blobs_densities_, special_blobs_densities_,
1690  sizeof(special_blobs_densities_));
1691  part->median_bottom_ = median_bottom_;
1692  part->median_top_ = median_top_;
1693  part->median_size_ = median_size_;
1694  part->median_left_ = median_left_;
1695  part->median_right_ = median_right_;
1696  part->median_width_ = median_width_;
1697  part->good_width_ = good_width_;
1698  part->good_column_ = good_column_;
1699  part->left_key_tab_ = left_key_tab_;
1700  part->right_key_tab_ = right_key_tab_;
1701  part->type_ = type_;
1702  part->flow_ = flow_;
1703  part->left_key_ = left_key_;
1704  part->right_key_ = right_key_;
1705  part->first_column_ = first_column_;
1706  part->last_column_ = last_column_;
1707  part->owns_blobs_ = false;
1708  return part;
1709 }
ColPartition * tesseract::ColPartition::SingletonPartner ( bool  upper)

Definition at line 608 of file colpartition.cpp.

608  {
609  ColPartition_CLIST* partners = upper ? &upper_partners_ : &lower_partners_;
610  if (!partners->singleton())
611  return NULL;
612  ColPartition_C_IT it(partners);
613  return it.data();
614 }
#define NULL
Definition: host.h:144
void tesseract::ColPartition::SmoothPartnerRun ( int  working_set_count)

Definition at line 1761 of file colpartition.cpp.

1761  {
1762  STATS left_stats(0, working_set_count);
1763  STATS right_stats(0, working_set_count);
1764  PolyBlockType max_type = type_;
1765  ColPartition* partner;
1766  for (partner = SingletonPartner(false); partner != NULL;
1767  partner = partner->SingletonPartner(false)) {
1768  if (partner->type_ > max_type)
1769  max_type = partner->type_;
1770  if (column_set_ == partner->column_set_) {
1771  left_stats.add(partner->first_column_, 1);
1772  right_stats.add(partner->last_column_, 1);
1773  }
1774  }
1775  type_ = max_type;
1776  // TODO(rays) Either establish that it isn't necessary to set the columns,
1777  // or find a way to do it that does not cause an assert failure in
1778  // AddToWorkingSet.
1779 #if 0
1780  first_column_ = left_stats.mode();
1781  last_column_ = right_stats.mode();
1782  if (last_column_ < first_column_)
1783  last_column_ = first_column_;
1784 #endif
1785 
1786  for (partner = SingletonPartner(false); partner != NULL;
1787  partner = partner->SingletonPartner(false)) {
1788  partner->type_ = max_type;
1789 #if 0 // See TODO above
1790  if (column_set_ == partner->column_set_) {
1791  partner->first_column_ = first_column_;
1792  partner->last_column_ = last_column_;
1793  }
1794 #endif
1795  }
1796 }
ColPartition * SingletonPartner(bool upper)
#define NULL
Definition: host.h:144
PolyBlockType
Definition: publictypes.h:41
Definition: statistc.h:29
int tesseract::ColPartition::SortKey ( int  x,
int  y 
) const
inline

Definition at line 316 of file colpartition.h.

316  {
317  return TabVector::SortKey(vertical_, x, y);
318  }
static int SortKey(const ICOORD &vertical, int x, int y)
Definition: tabvector.h:280
int tesseract::ColPartition::space_above ( ) const
inline

Definition at line 261 of file colpartition.h.

261  {
262  return space_above_;
263  }
int tesseract::ColPartition::space_below ( ) const
inline

Definition at line 267 of file colpartition.h.

267  {
268  return space_below_;
269  }
int tesseract::ColPartition::space_to_left ( ) const
inline

Definition at line 273 of file colpartition.h.

273  {
274  return space_to_left_;
275  }
int tesseract::ColPartition::space_to_right ( ) const
inline

Definition at line 279 of file colpartition.h.

279  {
280  return space_to_right_;
281  }
int tesseract::ColPartition::SpecialBlobsCount ( const BlobSpecialTextType  type)

Definition at line 540 of file colpartition.cpp.

540  {
542  BLOBNBOX_C_IT blob_it(&boxes_);
543  int count = 0;
544  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
545  BLOBNBOX* blob = blob_it.data();
547  if (blob_type == type) {
548  count++;
549  }
550  }
551 
552  return count;
553 }
BlobSpecialTextType special_text_type() const
Definition: blobbox.h:266
BlobSpecialTextType
Definition: blobbox.h:81
PolyBlockType type() const
Definition: colpartition.h:181
BlobRegionType blob_type() const
Definition: colpartition.h:148
#define ASSERT_HOST(x)
Definition: errcode.h:84
int count(LIST var_list)
Definition: oldlist.cpp:108
float tesseract::ColPartition::SpecialBlobsDensity ( const BlobSpecialTextType  type) const

Definition at line 535 of file colpartition.cpp.

535  {
537  return special_blobs_densities_[type];
538 }
PolyBlockType type() const
Definition: colpartition.h:181
#define ASSERT_HOST(x)
Definition: errcode.h:84
ColPartition * tesseract::ColPartition::SplitAt ( int  split_x)

Definition at line 800 of file colpartition.cpp.

800  {
801  if (split_x <= bounding_box_.left() || split_x >= bounding_box_.right())
802  return NULL; // There will be no change.
803  ColPartition* split_part = ShallowCopy();
804  split_part->set_owns_blobs(owns_blobs());
805  BLOBNBOX_C_IT it(&boxes_);
806  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
807  BLOBNBOX* bbox = it.data();
808  ColPartition* prev_owner = bbox->owner();
809  ASSERT_HOST(!owns_blobs() || prev_owner == this || prev_owner == NULL);
810  const TBOX& box = bbox->bounding_box();
811  if (box.left() >= split_x) {
812  split_part->AddBox(it.extract());
813  if (owns_blobs() && prev_owner != NULL)
814  bbox->set_owner(split_part);
815  }
816  }
817  ASSERT_HOST(!it.empty());
818  if (split_part->IsEmpty()) {
819  // Split part ended up with nothing. Possible if split_x passes
820  // through the last blob.
821  delete split_part;
822  return NULL;
823  }
824  right_key_tab_ = false;
825  split_part->left_key_tab_ = false;
826  right_margin_ = split_x;
827  split_part->left_margin_ = split_x;
828  ComputeLimits();
829  split_part->ComputeLimits();
830  return split_part;
831 }
void set_owner(tesseract::ColPartition *new_owner)
Definition: blobbox.h:332
bool owns_blobs() const
Definition: colpartition.h:291
const TBOX & bounding_box() const
Definition: blobbox.h:208
#define NULL
Definition: host.h:144
inT16 left() const
Definition: rect.h:67
Definition: rect.h:29
ColPartition * ShallowCopy() const
inT16 right() const
Definition: rect.h:74
tesseract::ColPartition * owner() const
Definition: blobbox.h:329
#define ASSERT_HOST(x)
Definition: errcode.h:84
ColPartition * tesseract::ColPartition::SplitAtBlob ( BLOBNBOX split_blob)

Definition at line 764 of file colpartition.cpp.

764  {
765  ColPartition* split_part = ShallowCopy();
766  split_part->set_owns_blobs(owns_blobs());
767  BLOBNBOX_C_IT it(&boxes_);
768  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
769  BLOBNBOX* bbox = it.data();
770  ColPartition* prev_owner = bbox->owner();
771  ASSERT_HOST(!owns_blobs() || prev_owner == this || prev_owner == NULL);
772  if (bbox == split_blob || !split_part->boxes_.empty()) {
773  split_part->AddBox(it.extract());
774  if (owns_blobs() && prev_owner != NULL)
775  bbox->set_owner(split_part);
776  }
777  }
778  ASSERT_HOST(!it.empty());
779  if (split_part->IsEmpty()) {
780  // Split part ended up with nothing. Possible if split_blob is not
781  // in the list of blobs.
782  delete split_part;
783  return NULL;
784  }
785  right_key_tab_ = false;
786  split_part->left_key_tab_ = false;
787  ComputeLimits();
788  // TODO(nbeato) Merge Ray's CL like this:
789  // if (owns_blobs())
790  // SetBlobTextlineGoodness();
791  split_part->ComputeLimits();
792  // TODO(nbeato) Merge Ray's CL like this:
793  // if (split_part->owns_blobs())
794  // split_part->SetBlobTextlineGoodness();
795  return split_part;
796 }
void set_owner(tesseract::ColPartition *new_owner)
Definition: blobbox.h:332
bool owns_blobs() const
Definition: colpartition.h:291
#define NULL
Definition: host.h:144
ColPartition * ShallowCopy() const
tesseract::ColPartition * owner() const
Definition: blobbox.h:329
#define ASSERT_HOST(x)
Definition: errcode.h:84
int tesseract::ColPartition::top_spacing ( ) const
inline

Definition at line 226 of file colpartition.h.

226  {
227  return top_spacing_;
228  }
PolyBlockType tesseract::ColPartition::type ( ) const
inline

Definition at line 181 of file colpartition.h.

181  {
182  return type_;
183  }
bool tesseract::ColPartition::TypesMatch ( const ColPartition other) const
inline

Definition at line 403 of file colpartition.h.

403  {
404  return TypesMatch(blob_type_, other.blob_type_);
405  }
bool TypesMatch(const ColPartition &other) const
Definition: colpartition.h:403
static bool tesseract::ColPartition::TypesMatch ( BlobRegionType  type1,
BlobRegionType  type2 
)
inlinestatic

Definition at line 406 of file colpartition.h.

406  {
407  return (type1 == type2 || type1 == BRT_UNKNOWN || type2 == BRT_UNKNOWN) &&
408  !BLOBNBOX::IsLineType(type1) && !BLOBNBOX::IsLineType(type2);
409  }
static bool IsLineType(BlobRegionType type)
Definition: blobbox.h:399
static bool tesseract::ColPartition::TypesSimilar ( PolyBlockType  type1,
PolyBlockType  type2 
)
inlinestatic

Definition at line 412 of file colpartition.h.

412  {
413  return (type1 == type2 ||
414  (type1 == PT_FLOWING_TEXT && type2 == PT_INLINE_EQUATION) ||
415  (type2 == PT_FLOWING_TEXT && type1 == PT_INLINE_EQUATION));
416  }
ColPartition_CLIST* tesseract::ColPartition::upper_partners ( )
inline

Definition at line 196 of file colpartition.h.

196  {
197  return &upper_partners_;
198  }
int tesseract::ColPartition::VCoreOverlap ( const ColPartition other) const
inline

Definition at line 375 of file colpartition.h.

375  {
376  return MIN(median_top_, other.median_top_) -
377  MAX(median_bottom_, other.median_bottom_);
378  }
#define MIN(x, y)
Definition: ndminx.h:28
#define MAX(x, y)
Definition: ndminx.h:24
bool tesseract::ColPartition::VOverlaps ( const ColPartition other) const
inline

Definition at line 370 of file colpartition.h.

370  {
371  return bounding_box_.y_gap(other.bounding_box_) < 0;
372  }
int y_gap(const TBOX &box) const
Definition: rect.h:218
bool tesseract::ColPartition::VSignificantCoreOverlap ( const ColPartition other) const
inline

Definition at line 387 of file colpartition.h.

387  {
388  int overlap = VCoreOverlap(other);
389  int height = MIN(median_top_ - median_bottom_,
390  other.median_top_ - other.median_bottom_);
391  return overlap * 3 > height;
392  }
int VCoreOverlap(const ColPartition &other) const
Definition: colpartition.h:375
#define MIN(x, y)
Definition: ndminx.h:28
bool tesseract::ColPartition::WithinSameMargins ( const ColPartition other) const
inline

Definition at line 395 of file colpartition.h.

395  {
396  return left_margin_ <= other.bounding_box_.left() &&
397  bounding_box_.left() >= other.left_margin_ &&
398  bounding_box_.right() <= other.right_margin_ &&
399  right_margin_ >= other.bounding_box_.right();
400  }
inT16 left() const
Definition: rect.h:67
inT16 right() const
Definition: rect.h:74
int tesseract::ColPartition::XAtY ( int  sort_key,
int  y 
) const
inline

Definition at line 320 of file colpartition.h.

320  {
321  return TabVector::XAtY(vertical_, sort_key, y);
322  }
int XAtY(int y) const
Definition: tabvector.h:189

The documentation for this class was generated from the following files: