Tesseract  3.02
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
tesseract::BoxWord Class Reference

#include <boxword.h>

Public Member Functions

 BoxWord ()
 
 BoxWord (const BoxWord &src)
 
 ~BoxWord ()
 
BoxWordoperator= (const BoxWord &src)
 
void CopyFrom (const BoxWord &src)
 
void SetScriptPositions (const UNICHARSET &unicharset, bool small_caps, TWERD *tessword, WERD_CHOICE *best_choice)
 
void ClipToOriginalWord (const BLOCK *block, WERD *original_word)
 
void MergeBoxes (int start, int end)
 
void InsertBox (int index, const TBOX &box)
 
void DeleteBox (int index)
 
void DeleteAllBoxes ()
 
void ProcessMatchedBlobs (const TWERD &other, TessCallback1< int > *cb) const
 
const TBOXbounding_box () const
 
const int length () const
 
const TBOXBlobBox (int index) const
 
ScriptPos BlobPosition (int index) const
 

Static Public Member Functions

static BoxWordCopyFromNormalized (const DENORM *denorm, TWERD *tessword)
 

Detailed Description

Definition at line 47 of file boxword.h.

Constructor & Destructor Documentation

tesseract::BoxWord::BoxWord ( )

Definition at line 39 of file boxword.cpp.

39  : length_(0) {
40 }
tesseract::BoxWord::BoxWord ( const BoxWord src)
explicit

Definition at line 42 of file boxword.cpp.

42  {
43  CopyFrom(src);
44 }
void CopyFrom(const BoxWord &src)
Definition: boxword.cpp:54
tesseract::BoxWord::~BoxWord ( )

Definition at line 46 of file boxword.cpp.

46  {
47 }

Member Function Documentation

const TBOX& tesseract::BoxWord::BlobBox ( int  index) const
inline

Definition at line 102 of file boxword.h.

102  {
103  return boxes_[index];
104  }
ScriptPos tesseract::BoxWord::BlobPosition ( int  index) const
inline

Definition at line 105 of file boxword.h.

105  {
106  if (index < 0 || index >= script_pos_.size())
107  return SP_NORMAL;
108  return script_pos_[index];
109  }
int size() const
Definition: genericvector.h:59
const TBOX& tesseract::BoxWord::bounding_box ( ) const
inline

Definition at line 96 of file boxword.h.

96  {
97  return bbox_;
98  }
void tesseract::BoxWord::ClipToOriginalWord ( const BLOCK block,
WERD original_word 
)

Definition at line 138 of file boxword.cpp.

138  {
139  for (int i = 0; i < length_; ++i) {
140  TBOX box = boxes_[i];
141  // Expand by a single pixel, as the poly approximation error is 1 pixel.
142  box = TBOX(box.left() - 1, box.bottom() - 1,
143  box.right() + 1, box.top() + 1);
144  // Now find the original box that matches.
145  TBOX original_box;
146  C_BLOB_IT b_it(original_word->cblob_list());
147  for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
148  TBOX blob_box = b_it.data()->bounding_box();
149  if (block != NULL)
150  blob_box.rotate(block->re_rotation());
151  if (blob_box.major_overlap(box)) {
152  original_box += blob_box;
153  }
154  }
155  if (!original_box.null_box()) {
156  if (NearlyEqual<int>(original_box.left(), box.left(), kBoxClipTolerance))
157  box.set_left(original_box.left());
158  if (NearlyEqual<int>(original_box.right(), box.right(),
160  box.set_right(original_box.right());
161  if (NearlyEqual<int>(original_box.top(), box.top(), kBoxClipTolerance))
162  box.set_top(original_box.top());
163  if (NearlyEqual<int>(original_box.bottom(), box.bottom(),
165  box.set_bottom(original_box.bottom());
166  }
167  original_box = original_word->bounding_box();
168  if (block != NULL)
169  original_box.rotate(block->re_rotation());
170  boxes_[i] = box.intersection(original_box);
171  }
172  ComputeBoundingBox();
173 }
TBOX bounding_box()
Definition: werd.cpp:164
void set_right(int x)
Definition: rect.h:77
C_BLOB_LIST * cblob_list()
Definition: werd.h:100
#define NULL
Definition: host.h:144
inT16 left() const
Definition: rect.h:67
const int kBoxClipTolerance
Definition: boxword.cpp:31
Definition: rect.h:29
inT16 right() const
Definition: rect.h:74
inT16 top() const
Definition: rect.h:53
bool major_overlap(const TBOX &box) const
Definition: rect.h:358
void rotate(const FCOORD &vec)
Definition: rect.h:182
TBOX intersection(const TBOX &box) const
Definition: rect.cpp:88
void set_bottom(int y)
Definition: rect.h:63
FCOORD re_rotation() const
Definition: ocrblock.h:139
void set_top(int y)
Definition: rect.h:56
void set_left(int x)
Definition: rect.h:70
inT16 bottom() const
Definition: rect.h:60
void tesseract::BoxWord::CopyFrom ( const BoxWord src)

Definition at line 54 of file boxword.cpp.

54  {
55  bbox_ = src.bbox_;
56  length_ = src.length_;
57  boxes_.clear();
58  boxes_.reserve(length_);
59  for (int i = 0; i < length_; ++i)
60  boxes_.push_back(src.boxes_[i]);
61 }
virtual void clear()
int push_back(T object)
void reserve(int size)
BoxWord * tesseract::BoxWord::CopyFromNormalized ( const DENORM denorm,
TWERD tessword 
)
static

Definition at line 67 of file boxword.cpp.

68  {
69  BoxWord* boxword = new BoxWord();
70  // Count the blobs.
71  boxword->length_ = 0;
72  for (TBLOB* tblob = tessword->blobs; tblob != NULL; tblob = tblob->next)
73  ++boxword->length_;
74  // Allocate memory.
75  boxword->boxes_.reserve(boxword->length_);
76 
77  for (TBLOB* tblob = tessword->blobs; tblob != NULL; tblob = tblob->next) {
78  TBOX blob_box;
79  for (TESSLINE* outline = tblob->outlines; outline != NULL;
80  outline = outline->next) {
81  EDGEPT* edgept = outline->loop;
82  // Iterate over the edges.
83  do {
84  if (!edgept->IsHidden() || !edgept->prev->IsHidden()) {
85  ICOORD pos(edgept->pos.x, edgept->pos.y);
86  if (denorm != NULL) {
87  TPOINT denormed;
88  denorm->DenormTransform(edgept->pos, &denormed);
89  pos.set_x(denormed.x);
90  pos.set_y(denormed.y);
91  }
92  TBOX pt_box(pos, pos);
93  blob_box += pt_box;
94  }
95  edgept = edgept->next;
96  } while (edgept != outline->loop);
97  }
98  boxword->boxes_.push_back(blob_box);
99  }
100  boxword->ComputeBoundingBox();
101  return boxword;
102 }
EDGEPT * next
Definition: blobs.h:106
EDGEPT * prev
Definition: blobs.h:107
#define NULL
Definition: host.h:144
Definition: rect.h:29
TBLOB * blobs
Definition: blobs.h:274
void DenormTransform(const TPOINT &pt, TPOINT *original) const
Definition: normalis.cpp:233
inT16 y
Definition: blobs.h:68
bool IsHidden() const
Definition: blobs.h:96
inT16 x
Definition: blobs.h:67
Definition: blobs.h:53
Definition: blobs.h:174
Definition: blobs.h:72
integer coordinate
Definition: points.h:30
TPOINT pos
Definition: blobs.h:100
void tesseract::BoxWord::DeleteAllBoxes ( )

Definition at line 213 of file boxword.cpp.

213  {
214  length_ = 0;
215  boxes_.clear();
216  bbox_ = TBOX();
217 }
virtual void clear()
Definition: rect.h:29
void tesseract::BoxWord::DeleteBox ( int  index)

Definition at line 205 of file boxword.cpp.

205  {
206  ASSERT_HOST(0 <= index && index < length_);
207  boxes_.remove(index);
208  --length_;
209  ComputeBoundingBox();
210 }
virtual void remove(int index)
#define ASSERT_HOST(x)
Definition: errcode.h:84
void tesseract::BoxWord::InsertBox ( int  index,
const TBOX box 
)

Definition at line 194 of file boxword.cpp.

194  {
195  if (index < length_)
196  boxes_.insert(box, index);
197  else
198  boxes_.push_back(box);
199  length_ = boxes_.size();
200  ComputeBoundingBox();
201 }
int push_back(T object)
void insert(T t, int index)
int size() const
Definition: genericvector.h:59
const int tesseract::BoxWord::length ( ) const
inline

Definition at line 99 of file boxword.h.

99  {
100  return length_;
101  }
void tesseract::BoxWord::MergeBoxes ( int  start,
int  end 
)

Definition at line 177 of file boxword.cpp.

177  {
178  start = ClipToRange(start, 0, length_);
179  end = ClipToRange(end, 0, length_);
180  if (end <= start + 1)
181  return;
182  for (int i = start + 1; i < end; ++i) {
183  boxes_[start] += boxes_[i];
184  }
185  int shrinkage = end - 1 - start;
186  length_ -= shrinkage;
187  for (int i = start + 1; i < length_; ++i)
188  boxes_[i] = boxes_[i + shrinkage];
189  boxes_.truncate(length_);
190 }
virtual void truncate(int size)
T ClipToRange(const T &x, const T &lower_bound, const T &upper_bound)
Definition: helpers.h:64
BoxWord & tesseract::BoxWord::operator= ( const BoxWord src)

Definition at line 49 of file boxword.cpp.

49  {
50  CopyFrom(src);
51  return *this;
52 }
void CopyFrom(const BoxWord &src)
Definition: boxword.cpp:54
void tesseract::BoxWord::ProcessMatchedBlobs ( const TWERD other,
TessCallback1< int > *  cb 
) const

Definition at line 229 of file boxword.cpp.

230  {
231  TBLOB* blob = other.blobs;
232  for (int i = 0; i < length_ && blob != NULL; ++i, blob = blob->next) {
233  TBOX blob_box = blob->bounding_box();
234  if (blob_box == boxes_[i])
235  cb->Run(i);
236  }
237  delete cb;
238 }
#define NULL
Definition: host.h:144
Definition: rect.h:29
TBLOB * blobs
Definition: blobs.h:274
Definition: blobs.h:174
virtual void Run(A1)=0
TBLOB * next
Definition: blobs.h:228
void tesseract::BoxWord::SetScriptPositions ( const UNICHARSET unicharset,
bool  small_caps,
TWERD tessword,
WERD_CHOICE best_choice 
)

Definition at line 108 of file boxword.cpp.

109  {
110  // Allocate memory.
111  script_pos_.init_to_size(length_, SP_NORMAL);
112 
113  int blob_index = 0;
114  for (TBLOB* tblob = tessword->blobs; tblob != NULL; tblob = tblob->next,
115  ++blob_index) {
116  int class_id = best_choice->unichar_id(blob_index);
117  TBOX blob_box = tblob->bounding_box();
118  int top = blob_box.top();
119  int bottom = blob_box.bottom();
120  int min_bottom, max_bottom, min_top, max_top;
121  unicharset.get_top_bottom(class_id, &min_bottom, &max_bottom,
122  &min_top, &max_top);
123  if (bottom <= kMaxDropCapBottom) {
124  script_pos_[blob_index] = SP_DROPCAP;
125  } else if (!small_caps) {
126  if (top + kMinSubscriptOffset < min_top) {
127  script_pos_[blob_index] = SP_SUBSCRIPT;
128  } else if (bottom - kMinSuperscriptOffset > max_bottom) {
129  script_pos_[blob_index] = SP_SUPERSCRIPT;
130  }
131  }
132  }
133 }
const int kMaxDropCapBottom
Definition: boxword.cpp:37
#define NULL
Definition: host.h:144
Definition: rect.h:29
TBLOB * blobs
Definition: blobs.h:274
void get_top_bottom(UNICHAR_ID unichar_id, int *min_bottom, int *max_bottom, int *min_top, int *max_top) const
Definition: unicharset.h:459
Definition: blobs.h:174
inT16 top() const
Definition: rect.h:53
const int kMinSuperscriptOffset
Definition: boxword.cpp:35
const int kMinSubscriptOffset
Definition: boxword.cpp:33
const UNICHAR_ID unichar_id(int index) const
Definition: ratngs.h:220
void init_to_size(int size, T t)
inT16 bottom() const
Definition: rect.h:60

The documentation for this class was generated from the following files: