Tesseract  3.02
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
tesseract::ShapeTable Class Reference

#include <shapetable.h>

Public Member Functions

 ShapeTable ()
 
 ShapeTable (const UNICHARSET &unicharset)
 
bool Serialize (FILE *fp) const
 
bool DeSerialize (bool swap, FILE *fp)
 
int NumShapes () const
 
const UNICHARSETunicharset () const
 
void set_unicharset (const UNICHARSET &unicharset)
 
STRING DebugStr (int shape_id) const
 
STRING SummaryStr () const
 
int AddShape (int unichar_id, int font_id)
 
int AddShape (const Shape &other)
 
void DeleteShape (int shape_id)
 
void AddToShape (int shape_id, int unichar_id, int font_id)
 
void AddShapeToShape (int shape_id, const Shape &other)
 
int FindShape (int unichar_id, int font_id) const
 
void GetFirstUnicharAndFont (int shape_id, int *unichar_id, int *font_id) const
 
const ShapeGetShape (int shape_id) const
 
ShapeMutableShape (int shape_id)
 
int BuildFromShape (const Shape &shape, const ShapeTable &master_shapes)
 
bool AlreadyMerged (int shape_id1, int shape_id2) const
 
bool AnyMultipleUnichars () const
 
int MaxNumUnichars () const
 
void ForceFontMerges (int start, int end)
 
int MasterUnicharCount (int shape_id) const
 
int MasterFontCount (int shape_id) const
 
int MergedUnicharCount (int shape_id1, int shape_id2) const
 
void MergeShapes (int shape_id1, int shape_id2)
 
void AppendMasterShapes (const ShapeTable &other)
 
int NumMasterShapes () const
 
int MasterDestinationIndex (int shape_id) const
 

Detailed Description

Definition at line 126 of file shapetable.h.

Constructor & Destructor Documentation

tesseract::ShapeTable::ShapeTable ( )

Definition at line 175 of file shapetable.cpp.

175  : unicharset_(NULL) {
176 }
#define NULL
Definition: host.h:144
tesseract::ShapeTable::ShapeTable ( const UNICHARSET unicharset)
explicit

Definition at line 177 of file shapetable.cpp.

178  : unicharset_(&unicharset) {
179 }

Member Function Documentation

int tesseract::ShapeTable::AddShape ( int  unichar_id,
int  font_id 
)

Definition at line 249 of file shapetable.cpp.

249  {
250  int index = shape_table_.size();
251  Shape* shape = new Shape;
252  shape->AddToShape(unichar_id, font_id);
253  shape_table_.push_back(shape);
254  return index;
255 }
int tesseract::ShapeTable::AddShape ( const Shape other)

Definition at line 259 of file shapetable.cpp.

259  {
260  int index = shape_table_.size();
261  Shape* shape = new Shape(other);
262  shape_table_.push_back(shape);
263  return index;
264 }
void tesseract::ShapeTable::AddShapeToShape ( int  shape_id,
const Shape other 
)

Definition at line 281 of file shapetable.cpp.

281  {
282  Shape& shape = *shape_table_[shape_id];
283  shape.AddShape(other);
284 }
void tesseract::ShapeTable::AddToShape ( int  shape_id,
int  unichar_id,
int  font_id 
)

Definition at line 275 of file shapetable.cpp.

275  {
276  Shape& shape = *shape_table_[shape_id];
277  shape.AddToShape(unichar_id, font_id);
278 }
bool tesseract::ShapeTable::AlreadyMerged ( int  shape_id1,
int  shape_id2 
) const

Definition at line 342 of file shapetable.cpp.

342  {
343  return MasterDestinationIndex(shape_id1) == MasterDestinationIndex(shape_id2);
344 }
int MasterDestinationIndex(int shape_id) const
Definition: shapetable.cpp:427
bool tesseract::ShapeTable::AnyMultipleUnichars ( ) const

Definition at line 347 of file shapetable.cpp.

347  {
348  int num_shapes = NumShapes();
349  for (int s1 = 0; s1 < num_shapes; ++s1) {
350  if (MasterDestinationIndex(s1) != s1) continue;
351  if (GetShape(s1).size() > 1)
352  return true;
353  }
354  return false;
355 }
int NumShapes() const
Definition: shapetable.h:140
const Shape & GetShape(int shape_id) const
Definition: shapetable.h:179
int MasterDestinationIndex(int shape_id) const
Definition: shapetable.cpp:427
void tesseract::ShapeTable::AppendMasterShapes ( const ShapeTable other)

Definition at line 439 of file shapetable.cpp.

439  {
440  for (int s = 0; s < other.shape_table_.size(); ++s) {
441  if (other.shape_table_[s]->destination_index() < 0) {
442  AddShape(*other.shape_table_[s]);
443  }
444  }
445 }
int AddShape(int unichar_id, int font_id)
Definition: shapetable.cpp:249
int tesseract::ShapeTable::BuildFromShape ( const Shape shape,
const ShapeTable master_shapes 
)

Definition at line 317 of file shapetable.cpp.

318  {
319  int num_masters = 0;
320  for (int u_ind = 0; u_ind < shape.size(); ++u_ind) {
321  for (int f_ind = 0; f_ind < shape[u_ind].font_ids.size(); ++f_ind) {
322  int c = shape[u_ind].unichar_id;
323  int f = shape[u_ind].font_ids[f_ind];
324  if (FindShape(c, f) < 0) {
325  int shape_id = AddShape(c, f);
326  int master_id = master_shapes.FindShape(c, f);
327  if (master_id >= 0 && shape.size() > 1) {
328  const Shape& master = master_shapes.GetShape(master_id);
329  if (master.IsSubsetOf(shape) && !shape.IsSubsetOf(master)) {
330  // Add everything else from the master shape.
331  shape_table_[shape_id]->AddShape(master);
332  ++num_masters;
333  }
334  }
335  }
336  }
337  }
338  return num_masters;
339 }
int AddShape(int unichar_id, int font_id)
Definition: shapetable.cpp:249
#define f(xc, yc)
Definition: imgscale.cpp:39
int FindShape(int unichar_id, int font_id) const
Definition: shapetable.cpp:290
STRING tesseract::ShapeTable::DebugStr ( int  shape_id) const

Definition at line 194 of file shapetable.cpp.

194  {
195  if (shape_id < 0 || shape_id >= shape_table_.size())
196  return STRING("INVALID_UNICHAR_ID");
197  const Shape& shape = GetShape(shape_id);
198  STRING result;
199  result.add_str_int("Shape", shape_id);
200  if (shape.size() > 100) {
201  result.add_str_int(" Num unichars=", shape.size());
202  return result;
203  }
204  for (int c = 0; c < shape.size(); ++c) {
205  result.add_str_int(" c_id=", shape[c].unichar_id);
206  result += "=";
207  result += unicharset_->id_to_unichar(shape[c].unichar_id);
208  if (shape.size() < 10) {
209  result.add_str_int(", ", shape[c].font_ids.size());
210  result += " fonts =";
211  int num_fonts = shape[c].font_ids.size();
212  if (num_fonts > 10) {
213  result.add_str_int(" ", shape[c].font_ids[0]);
214  result.add_str_int(" ... ", shape[c].font_ids[num_fonts - 1]);
215  } else {
216  for (int f = 0; f < num_fonts; ++f) {
217  result.add_str_int(" ", shape[c].font_ids[f]);
218  }
219  }
220  }
221  }
222  return result;
223 }
const char *const id_to_unichar(UNICHAR_ID id) const
Definition: unicharset.cpp:233
const Shape & GetShape(int shape_id) const
Definition: shapetable.h:179
#define f(xc, yc)
Definition: imgscale.cpp:39
void add_str_int(const char *str, int number)
Definition: strngs.cpp:334
inT32 size() const
Definition: strngs.h:56
Definition: strngs.h:40
void tesseract::ShapeTable::DeleteShape ( int  shape_id)

Definition at line 267 of file shapetable.cpp.

267  {
268  delete shape_table_[shape_id];
269  shape_table_[shape_id] = NULL;
270  shape_table_.remove(shape_id);
271 }
#define NULL
Definition: host.h:144
bool tesseract::ShapeTable::DeSerialize ( bool  swap,
FILE *  fp 
)

Definition at line 188 of file shapetable.cpp.

188  {
189  if (!shape_table_.DeSerialize(swap, fp)) return false;
190  return true;
191 }
int tesseract::ShapeTable::FindShape ( int  unichar_id,
int  font_id 
) const

Definition at line 290 of file shapetable.cpp.

290  {
291  for (int s = 0; s < shape_table_.size(); ++s) {
292  const Shape& shape = GetShape(s);
293  for (int c = 0; c < shape.size(); ++c) {
294  if (shape[c].unichar_id == unichar_id) {
295  if (font_id < 0)
296  return s; // We don't care about the font.
297  for (int f = 0; f < shape[c].font_ids.size(); ++f) {
298  if (shape[c].font_ids[f] == font_id)
299  return s;
300  }
301  }
302  }
303  }
304  return -1;
305 }
const Shape & GetShape(int shape_id) const
Definition: shapetable.h:179
#define f(xc, yc)
Definition: imgscale.cpp:39
void tesseract::ShapeTable::ForceFontMerges ( int  start,
int  end 
)

Definition at line 371 of file shapetable.cpp.

371  {
372  for (int s1 = start; s1 < end; ++s1) {
373  if (MasterDestinationIndex(s1) == s1 && GetShape(s1).size() == 1) {
374  int unichar_id = GetShape(s1)[0].unichar_id;
375  for (int s2 = s1 + 1; s2 < end; ++s2) {
376  if (MasterDestinationIndex(s2) == s2 && GetShape(s2).size() == 1 &&
377  unichar_id == GetShape(s2)[0].unichar_id) {
378  MergeShapes(s1, s2);
379  }
380  }
381  }
382  }
383  ShapeTable compacted(*unicharset_);
384  compacted.AppendMasterShapes(*this);
385  *this = compacted;
386 }
const Shape & GetShape(int shape_id) const
Definition: shapetable.h:179
int MasterDestinationIndex(int shape_id) const
Definition: shapetable.cpp:427
void MergeShapes(int shape_id1, int shape_id2)
Definition: shapetable.cpp:416
void tesseract::ShapeTable::GetFirstUnicharAndFont ( int  shape_id,
int *  unichar_id,
int *  font_id 
) const

Definition at line 308 of file shapetable.cpp.

309  {
310  const UnicharAndFonts& unichar_and_fonts = (*shape_table_[shape_id])[0];
311  *unichar_id = unichar_and_fonts.unichar_id;
312  *font_id = unichar_and_fonts.font_ids[0];
313 }
const Shape& tesseract::ShapeTable::GetShape ( int  shape_id) const
inline

Definition at line 179 of file shapetable.h.

179  {
180  return *shape_table_[shape_id];
181  }
int tesseract::ShapeTable::MasterDestinationIndex ( int  shape_id) const

Definition at line 427 of file shapetable.cpp.

427  {
428  int dest_id = shape_table_[shape_id]->destination_index();
429  if (dest_id == shape_id || dest_id < 0)
430  return shape_id; // Is master already.
431  int master_id = shape_table_[dest_id]->destination_index();
432  if (master_id == dest_id || master_id < 0)
433  return dest_id; // Dest is the master and shape_id points to it.
434  master_id = MasterDestinationIndex(master_id);
435  return master_id;
436 }
int MasterDestinationIndex(int shape_id) const
Definition: shapetable.cpp:427
int tesseract::ShapeTable::MasterFontCount ( int  shape_id) const

Definition at line 395 of file shapetable.cpp.

395  {
396  int master_id = MasterDestinationIndex(shape_id);
397  const Shape& shape = GetShape(master_id);
398  int font_count = 0;
399  for (int c = 0; c < shape.size(); ++c) {
400  font_count += shape[c].font_ids.size();
401  }
402  return font_count;
403 }
const Shape & GetShape(int shape_id) const
Definition: shapetable.h:179
int MasterDestinationIndex(int shape_id) const
Definition: shapetable.cpp:427
int tesseract::ShapeTable::MasterUnicharCount ( int  shape_id) const

Definition at line 389 of file shapetable.cpp.

389  {
390  int master_id = MasterDestinationIndex(shape_id);
391  return GetShape(master_id).size();
392 }
int size() const
Definition: shapetable.h:78
const Shape & GetShape(int shape_id) const
Definition: shapetable.h:179
int MasterDestinationIndex(int shape_id) const
Definition: shapetable.cpp:427
int tesseract::ShapeTable::MaxNumUnichars ( ) const

Definition at line 358 of file shapetable.cpp.

358  {
359  int max_num_unichars = 0;
360  int num_shapes = NumShapes();
361  for (int s = 0; s < num_shapes; ++s) {
362  if (GetShape(s).size() > max_num_unichars)
363  max_num_unichars = GetShape(s).size();
364  }
365  return max_num_unichars;
366 }
int NumShapes() const
Definition: shapetable.h:140
int size() const
Definition: shapetable.h:78
const Shape & GetShape(int shape_id) const
Definition: shapetable.h:179
int tesseract::ShapeTable::MergedUnicharCount ( int  shape_id1,
int  shape_id2 
) const

Definition at line 406 of file shapetable.cpp.

406  {
407  // Do it the easy way for now.
408  int master_id1 = MasterDestinationIndex(shape_id1);
409  int master_id2 = MasterDestinationIndex(shape_id2);
410  Shape combined_shape(*shape_table_[master_id1]);
411  combined_shape.AddShape(*shape_table_[master_id2]);
412  return combined_shape.size();
413 }
int MasterDestinationIndex(int shape_id) const
Definition: shapetable.cpp:427
void tesseract::ShapeTable::MergeShapes ( int  shape_id1,
int  shape_id2 
)

Definition at line 416 of file shapetable.cpp.

416  {
417  int master_id1 = MasterDestinationIndex(shape_id1);
418  int master_id2 = MasterDestinationIndex(shape_id2);
419  // Point master_id2 (and all merged shapes) to master_id1.
420  shape_table_[master_id2]->set_destination_index(master_id1);
421  // Add all the shapes of master_id2 to master_id1.
422  shape_table_[master_id1]->AddShape(*shape_table_[master_id2]);
423 }
int MasterDestinationIndex(int shape_id) const
Definition: shapetable.cpp:427
Shape* tesseract::ShapeTable::MutableShape ( int  shape_id)
inline

Definition at line 182 of file shapetable.h.

182  {
183  return shape_table_[shape_id];
184  }
int tesseract::ShapeTable::NumMasterShapes ( ) const

Definition at line 448 of file shapetable.cpp.

448  {
449  int num_shapes = 0;
450  for (int s = 0; s < shape_table_.size(); ++s) {
451  if (shape_table_[s]->destination_index() < 0)
452  ++num_shapes;
453  }
454  return num_shapes;
455 }
int tesseract::ShapeTable::NumShapes ( ) const
inline

Definition at line 140 of file shapetable.h.

140  {
141  return shape_table_.size();
142  }
bool tesseract::ShapeTable::Serialize ( FILE *  fp) const

Definition at line 182 of file shapetable.cpp.

182  {
183  if (!shape_table_.Serialize(fp)) return false;
184  return true;
185 }
void tesseract::ShapeTable::set_unicharset ( const UNICHARSET unicharset)
inline

Definition at line 148 of file shapetable.h.

148  {
149  unicharset_ = &unicharset;
150  }
const UNICHARSET & unicharset() const
Definition: shapetable.h:143
STRING tesseract::ShapeTable::SummaryStr ( ) const

Definition at line 226 of file shapetable.cpp.

226  {
227  int max_unichars = 0;
228  int num_multi_shapes = 0;
229  int num_master_shapes = 0;
230  for (int s = 0; s < shape_table_.size(); ++s) {
231  if (MasterDestinationIndex(s) != s) continue;
232  ++num_master_shapes;
233  int shape_size = GetShape(s).size();
234  if (shape_size > 1)
235  ++num_multi_shapes;
236  if (shape_size > max_unichars)
237  max_unichars = shape_size;
238  }
239  STRING result;
240  result.add_str_int("Number of shapes = ", num_master_shapes);
241  result.add_str_int(" max unichars = ", max_unichars);
242  result.add_str_int(" number with multiple unichars = ", num_multi_shapes);
243  return result;
244 }
int size() const
Definition: shapetable.h:78
const Shape & GetShape(int shape_id) const
Definition: shapetable.h:179
int MasterDestinationIndex(int shape_id) const
Definition: shapetable.cpp:427
void add_str_int(const char *str, int number)
Definition: strngs.cpp:334
Definition: strngs.h:40
const UNICHARSET& tesseract::ShapeTable::unicharset ( ) const
inline

Definition at line 143 of file shapetable.h.

143  {
144  return *unicharset_;
145  }

The documentation for this class was generated from the following files: