Tesseract  3.02
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
tesseract::CharSamp Class Reference

#include <char_samp.h>

Inheritance diagram for tesseract::CharSamp:
tesseract::Bmp8

Public Member Functions

 CharSamp ()
 
 CharSamp (int wid, int hgt)
 
 CharSamp (int left, int top, int wid, int hgt)
 
 ~CharSamp ()
 
unsigned short Left () const
 
unsigned short Right () const
 
unsigned short Top () const
 
unsigned short Bottom () const
 
unsigned short Page () const
 
unsigned short NormTop () const
 
unsigned short NormBottom () const
 
unsigned short NormAspectRatio () const
 
unsigned short FirstChar () const
 
unsigned short LastChar () const
 
char_32 Label () const
 
char_32StrLabel () const
 
string stringLabel () const
 
void SetLeft (unsigned short left)
 
void SetTop (unsigned short top)
 
void SetPage (unsigned short page)
 
void SetLabel (char_32 label)
 
void SetLabel (const char_32 *label32)
 
void SetLabel (string str)
 
void SetNormTop (unsigned short norm_top)
 
void SetNormBottom (unsigned short norm_bottom)
 
void SetNormAspectRatio (unsigned short norm_aspect_ratio)
 
void SetFirstChar (unsigned short first_char)
 
void SetLastChar (unsigned short last_char)
 
bool Save2CharDumpFile (FILE *fp) const
 
CharSampCrop ()
 
ConComp ** Segment (int *seg_cnt, bool right_2_left, int max_hist_wnd, int min_con_comp_size) const
 
CharSampScale (int wid, int hgt, bool isotropic=true)
 
CharSampClone () const
 
bool ComputeFeatures (int conv_grid_size, float *features)
 
int LabelLen () const
 
- Public Member Functions inherited from tesseract::Bmp8
 Bmp8 (unsigned short wid, unsigned short hgt)
 
 ~Bmp8 ()
 
bool Clear ()
 
unsigned short Width () const
 
unsigned short Stride () const
 
unsigned short Height () const
 
unsigned char * RawData () const
 
bool ScaleFrom (Bmp8 *bmp, bool isotropic=true)
 
bool Deslant ()
 
bool HorizontalDeslant (double *deslant_angle)
 
bool IsIdentical (Bmp8 *pBmp) const
 
ConComp ** FindConComps (int *concomp_cnt, int min_size) const
 
float ForegroundRatio () const
 
float MeanHorizontalHistogramEntropy () const
 
int * HorizontalHistogram () const
 

Static Public Member Functions

static CharSampFromCharDumpFile (CachedFile *fp)
 
static CharSampFromCharDumpFile (FILE *fp)
 
static CharSampFromCharDumpFile (unsigned char **raw_data)
 
static CharSampFromRawData (int left, int top, int wid, int hgt, unsigned char *data)
 
static CharSampFromConComps (ConComp **concomp_array, int strt_concomp, int seg_flags_size, int *seg_flags, bool *left_most, bool *right_most, int word_hgt)
 
static int AuxFeatureCnt ()
 
static int LabelLen (const char_32 *label32)
 
- Static Public Member Functions inherited from tesseract::Bmp8
static Bmp8FromCharDumpFile (CachedFile *fp)
 
static Bmp8FromCharDumpFile (FILE *fp)
 

Additional Inherited Members

- Protected Member Functions inherited from tesseract::Bmp8
bool LoadFromCharDumpFile (CachedFile *fp)
 
bool LoadFromCharDumpFile (FILE *fp)
 
bool LoadFromCharDumpFile (unsigned char **raw_data)
 
bool LoadFromRawData (unsigned char *data)
 
bool SaveBmp2CharDumpFile (FILE *fp) const
 
bool IsBlankColumn (int x) const
 
bool IsBlankRow (int y) const
 
void Crop (int *xst_src, int *yst_src, int *wid, int *hgt)
 
void Copy (int x, int y, int wid, int hgt, Bmp8 *bmp_dest) const
 
- Protected Attributes inherited from tesseract::Bmp8
unsigned short wid_
 
unsigned short hgt_
 
unsigned char ** line_buff_
 
- Static Protected Attributes inherited from tesseract::Bmp8
static const int kConCompAllocChunk = 16
 
static const int kDeslantAngleCount
 

Detailed Description

Definition at line 39 of file char_samp.h.

Constructor & Destructor Documentation

tesseract::CharSamp::CharSamp ( )

Definition at line 29 of file char_samp.cpp.

30  : Bmp8(0, 0) {
31  left_ = 0;
32  top_ = 0;
33  label32_ = NULL;
34  page_ = -1;
35 }
#define NULL
Definition: host.h:144
Bmp8(unsigned short wid, unsigned short hgt)
Definition: bmp_8.cpp:43
tesseract::CharSamp::CharSamp ( int  wid,
int  hgt 
)

Definition at line 37 of file char_samp.cpp.

38  : Bmp8(wid, hgt) {
39  left_ = 0;
40  top_ = 0;
41  label32_ = NULL;
42  page_ = -1;
43 }
#define NULL
Definition: host.h:144
Bmp8(unsigned short wid, unsigned short hgt)
Definition: bmp_8.cpp:43
tesseract::CharSamp::CharSamp ( int  left,
int  top,
int  wid,
int  hgt 
)

Definition at line 45 of file char_samp.cpp.

46  : Bmp8(wid, hgt)
47  , left_(left)
48  , top_(top) {
49  label32_ = NULL;
50  page_ = -1;
51 }
#define NULL
Definition: host.h:144
Bmp8(unsigned short wid, unsigned short hgt)
Definition: bmp_8.cpp:43
tesseract::CharSamp::~CharSamp ( )

Definition at line 53 of file char_samp.cpp.

53  {
54  if (label32_ != NULL) {
55  delete []label32_;
56  label32_ = NULL;
57  }
58 }
#define NULL
Definition: host.h:144

Member Function Documentation

static int tesseract::CharSamp::AuxFeatureCnt ( )
inlinestatic

Definition at line 138 of file char_samp.h.

138 { return (5); }
unsigned short tesseract::CharSamp::Bottom ( ) const
inline

Definition at line 49 of file char_samp.h.

49 { return top_ + hgt_; }
unsigned short hgt_
Definition: bmp_8.h:96
CharSamp * tesseract::CharSamp::Clone ( ) const

Definition at line 575 of file char_samp.cpp.

575  {
576  // create the cropped char samp
577  CharSamp *samp = new CharSamp(left_, top_, wid_, hgt_);
578  samp->SetLabel(label32_);
579  samp->SetFirstChar(first_char_);
580  samp->SetLastChar(last_char_);
581  samp->SetNormTop(norm_top_);
582  samp->SetNormBottom(norm_bottom_);
583  samp->SetNormAspectRatio(norm_aspect_ratio_);
584  // copy the bitmap to the cropped img
585  Copy(0, 0, wid_, hgt_, samp);
586  return samp;
587 }
unsigned short hgt_
Definition: bmp_8.h:96
unsigned short wid_
Definition: bmp_8.h:95
void Copy(int x, int y, int wid, int hgt, Bmp8 *bmp_dest) const
Definition: bmp_8.cpp:584
bool tesseract::CharSamp::ComputeFeatures ( int  conv_grid_size,
float *  features 
)

Definition at line 656 of file char_samp.cpp.

656  {
657  // Create a scaled BMP
658  CharSamp *scaled_bmp = Scale(conv_grid_size, conv_grid_size);
659  if (!scaled_bmp) {
660  return false;
661  }
662  // prepare input
663  unsigned char *buff = scaled_bmp->RawData();
664  // bitmap features
665  int input;
666  int bmp_size = conv_grid_size * conv_grid_size;
667  for (input = 0; input < bmp_size; input++) {
668  features[input] = 255.0f - (1.0f * buff[input]);
669  }
670  // word context features
671  features[input++] = FirstChar();
672  features[input++] = LastChar();
673  features[input++] = NormTop();
674  features[input++] = NormBottom();
675  features[input++] = NormAspectRatio();
676  delete scaled_bmp;
677  return true;
678 }
CharSamp * Scale(int wid, int hgt, bool isotropic=true)
Definition: char_samp.cpp:261
unsigned short FirstChar() const
Definition: char_samp.h:54
unsigned short LastChar() const
Definition: char_samp.h:55
unsigned short NormBottom() const
Definition: char_samp.h:52
unsigned short NormAspectRatio() const
Definition: char_samp.h:53
unsigned short NormTop() const
Definition: char_samp.h:51
CharSamp * tesseract::CharSamp::Crop ( )

Definition at line 358 of file char_samp.cpp.

358  {
359  // get the dimesions of the cropped img
360  int cropped_left = 0;
361  int cropped_top = 0;
362  int cropped_wid = wid_;
363  int cropped_hgt = hgt_;
364  Bmp8::Crop(&cropped_left, &cropped_top,
365  &cropped_wid, &cropped_hgt);
366 
367  if (cropped_wid == 0 || cropped_hgt == 0) {
368  return NULL;
369  }
370  // create the cropped char samp
371  CharSamp *cropped_samp = new CharSamp(left_ + cropped_left,
372  top_ + cropped_top,
373  cropped_wid, cropped_hgt);
374  cropped_samp->SetLabel(label32_);
375  cropped_samp->SetFirstChar(first_char_);
376  cropped_samp->SetLastChar(last_char_);
377  // the following 3 fields may/should be reset by the calling function
378  // using context information, i.e., location of character box
379  // w.r.t. the word bounding box
380  cropped_samp->SetNormAspectRatio(255 *
381  cropped_wid / (cropped_wid + cropped_hgt));
382  cropped_samp->SetNormTop(0);
383  cropped_samp->SetNormBottom(255);
384 
385  // copy the bitmap to the cropped img
386  Copy(cropped_left, cropped_top, cropped_wid, cropped_hgt, cropped_samp);
387  return cropped_samp;
388 }
unsigned short hgt_
Definition: bmp_8.h:96
#define NULL
Definition: host.h:144
unsigned short wid_
Definition: bmp_8.h:95
void Crop(int *xst_src, int *yst_src, int *wid, int *hgt)
Definition: bmp_8.cpp:354
void Copy(int x, int y, int wid, int hgt, Bmp8 *bmp_dest) const
Definition: bmp_8.cpp:584
unsigned short tesseract::CharSamp::FirstChar ( ) const
inline

Definition at line 54 of file char_samp.h.

54 { return first_char_; }
CharSamp * tesseract::CharSamp::FromCharDumpFile ( CachedFile fp)
static

Definition at line 82 of file char_samp.cpp.

82  {
83  unsigned short left;
84  unsigned short top;
85  unsigned short page;
86  unsigned short first_char;
87  unsigned short last_char;
88  unsigned short norm_top;
89  unsigned short norm_bottom;
90  unsigned short norm_aspect_ratio;
91  unsigned int val32;
92 
93  char_32 *label32;
94 
95  // read and check 32 bit marker
96  if (fp->Read(&val32, sizeof(val32)) != sizeof(val32)) {
97  return NULL;
98  }
99  if (val32 != 0xabd0fefe) {
100  return NULL;
101  }
102  // read label length,
103  if (fp->Read(&val32, sizeof(val32)) != sizeof(val32)) {
104  return NULL;
105  }
106  // the label is not null terminated in the file
107  if (val32 > 0) {
108  label32 = new char_32[val32 + 1];
109  if (label32 == NULL) {
110  return NULL;
111  }
112  // read label
113  if (fp->Read(label32, val32 * sizeof(*label32)) !=
114  (val32 * sizeof(*label32))) {
115  return NULL;
116  }
117  // null terminate
118  label32[val32] = 0;
119  } else {
120  label32 = NULL;
121  }
122  // read coordinates
123  if (fp->Read(&page, sizeof(page)) != sizeof(page)) {
124  return NULL;
125  }
126  if (fp->Read(&left, sizeof(left)) != sizeof(left)) {
127  return NULL;
128  }
129  if (fp->Read(&top, sizeof(top)) != sizeof(top)) {
130  return NULL;
131  }
132  if (fp->Read(&first_char, sizeof(first_char)) != sizeof(first_char)) {
133  return NULL;
134  }
135  if (fp->Read(&last_char, sizeof(last_char)) != sizeof(last_char)) {
136  return NULL;
137  }
138  if (fp->Read(&norm_top, sizeof(norm_top)) != sizeof(norm_top)) {
139  return NULL;
140  }
141  if (fp->Read(&norm_bottom, sizeof(norm_bottom)) != sizeof(norm_bottom)) {
142  return NULL;
143  }
144  if (fp->Read(&norm_aspect_ratio, sizeof(norm_aspect_ratio)) !=
145  sizeof(norm_aspect_ratio)) {
146  return NULL;
147  }
148  // create the object
149  CharSamp *char_samp = new CharSamp();
150  if (char_samp == NULL) {
151  return NULL;
152  }
153  // init
154  char_samp->label32_ = label32;
155  char_samp->page_ = page;
156  char_samp->left_ = left;
157  char_samp->top_ = top;
158  char_samp->first_char_ = first_char;
159  char_samp->last_char_ = last_char;
160  char_samp->norm_top_ = norm_top;
161  char_samp->norm_bottom_ = norm_bottom;
162  char_samp->norm_aspect_ratio_ = norm_aspect_ratio;
163  // load the Bmp8 part
164  if (char_samp->LoadFromCharDumpFile(fp) == false) {
165  delete char_samp;
166  return NULL;
167  }
168  return char_samp;
169 }
#define NULL
Definition: host.h:144
signed int char_32
Definition: string_32.h:40
CharSamp * tesseract::CharSamp::FromCharDumpFile ( FILE *  fp)
static

Definition at line 172 of file char_samp.cpp.

172  {
173  unsigned short left;
174  unsigned short top;
175  unsigned short page;
176  unsigned short first_char;
177  unsigned short last_char;
178  unsigned short norm_top;
179  unsigned short norm_bottom;
180  unsigned short norm_aspect_ratio;
181  unsigned int val32;
182  char_32 *label32;
183 
184  // read and check 32 bit marker
185  if (fread(&val32, 1, sizeof(val32), fp) != sizeof(val32)) {
186  return NULL;
187  }
188  if (val32 != 0xabd0fefe) {
189  return NULL;
190  }
191  // read label length,
192  if (fread(&val32, 1, sizeof(val32), fp) != sizeof(val32)) {
193  return NULL;
194  }
195  // the label is not null terminated in the file
196  if (val32 > 0) {
197  label32 = new char_32[val32 + 1];
198  if (label32 == NULL) {
199  return NULL;
200  }
201  // read label
202  if (fread(label32, 1, val32 * sizeof(*label32), fp) !=
203  (val32 * sizeof(*label32))) {
204  return NULL;
205  }
206  // null terminate
207  label32[val32] = 0;
208  } else {
209  label32 = NULL;
210  }
211  // read coordinates
212  if (fread(&page, 1, sizeof(page), fp) != sizeof(page)) {
213  return NULL;
214  }
215  if (fread(&left, 1, sizeof(left), fp) != sizeof(left)) {
216  return NULL;
217  }
218  if (fread(&top, 1, sizeof(top), fp) != sizeof(top)) {
219  return NULL;
220  }
221  if (fread(&first_char, 1, sizeof(first_char), fp) != sizeof(first_char)) {
222  return NULL;
223  }
224  if (fread(&last_char, 1, sizeof(last_char), fp) != sizeof(last_char)) {
225  return NULL;
226  }
227  if (fread(&norm_top, 1, sizeof(norm_top), fp) != sizeof(norm_top)) {
228  return NULL;
229  }
230  if (fread(&norm_bottom, 1, sizeof(norm_bottom), fp) != sizeof(norm_bottom)) {
231  return NULL;
232  }
233  if (fread(&norm_aspect_ratio, 1, sizeof(norm_aspect_ratio), fp) !=
234  sizeof(norm_aspect_ratio)) {
235  return NULL;
236  }
237  // create the object
238  CharSamp *char_samp = new CharSamp();
239  if (char_samp == NULL) {
240  return NULL;
241  }
242  // init
243  char_samp->label32_ = label32;
244  char_samp->page_ = page;
245  char_samp->left_ = left;
246  char_samp->top_ = top;
247  char_samp->first_char_ = first_char;
248  char_samp->last_char_ = last_char;
249  char_samp->norm_top_ = norm_top;
250  char_samp->norm_bottom_ = norm_bottom;
251  char_samp->norm_aspect_ratio_ = norm_aspect_ratio;
252  // load the Bmp8 part
253  if (char_samp->LoadFromCharDumpFile(fp) == false) {
254  return NULL;
255  }
256  return char_samp;
257 }
#define NULL
Definition: host.h:144
signed int char_32
Definition: string_32.h:40
CharSamp * tesseract::CharSamp::FromCharDumpFile ( unsigned char **  raw_data)
static

Definition at line 590 of file char_samp.cpp.

590  {
591  unsigned int val32;
592  char_32 *label32;
593  unsigned char *raw_data = *raw_data_ptr;
594 
595  // read and check 32 bit marker
596  memcpy(&val32, raw_data, sizeof(val32));
597  raw_data += sizeof(val32);
598  if (val32 != 0xabd0fefe) {
599  return NULL;
600  }
601  // read label length,
602  memcpy(&val32, raw_data, sizeof(val32));
603  raw_data += sizeof(val32);
604  // the label is not null terminated in the file
605  if (val32 > 0) {
606  label32 = new char_32[val32 + 1];
607  if (label32 == NULL) {
608  return NULL;
609  }
610  // read label
611  memcpy(label32, raw_data, val32 * sizeof(*label32));
612  raw_data += (val32 * sizeof(*label32));
613  // null terminate
614  label32[val32] = 0;
615  } else {
616  label32 = NULL;
617  }
618 
619  // create the object
620  CharSamp *char_samp = new CharSamp();
621  if (char_samp == NULL) {
622  return NULL;
623  }
624 
625  // read coordinates
626  char_samp->label32_ = label32;
627  memcpy(&char_samp->page_, raw_data, sizeof(char_samp->page_));
628  raw_data += sizeof(char_samp->page_);
629  memcpy(&char_samp->left_, raw_data, sizeof(char_samp->left_));
630  raw_data += sizeof(char_samp->left_);
631  memcpy(&char_samp->top_, raw_data, sizeof(char_samp->top_));
632  raw_data += sizeof(char_samp->top_);
633  memcpy(&char_samp->first_char_, raw_data, sizeof(char_samp->first_char_));
634  raw_data += sizeof(char_samp->first_char_);
635  memcpy(&char_samp->last_char_, raw_data, sizeof(char_samp->last_char_));
636  raw_data += sizeof(char_samp->last_char_);
637  memcpy(&char_samp->norm_top_, raw_data, sizeof(char_samp->norm_top_));
638  raw_data += sizeof(char_samp->norm_top_);
639  memcpy(&char_samp->norm_bottom_, raw_data, sizeof(char_samp->norm_bottom_));
640  raw_data += sizeof(char_samp->norm_bottom_);
641  memcpy(&char_samp->norm_aspect_ratio_, raw_data,
642  sizeof(char_samp->norm_aspect_ratio_));
643  raw_data += sizeof(char_samp->norm_aspect_ratio_);
644 
645  // load the Bmp8 part
646  if (char_samp->LoadFromCharDumpFile(&raw_data) == false) {
647  delete char_samp;
648  return NULL;
649  }
650 
651  (*raw_data_ptr) = raw_data;
652  return char_samp;
653 }
#define NULL
Definition: host.h:144
signed int char_32
Definition: string_32.h:40
CharSamp * tesseract::CharSamp::FromConComps ( ConComp **  concomp_array,
int  strt_concomp,
int  seg_flags_size,
int *  seg_flags,
bool *  left_most,
bool *  right_most,
int  word_hgt 
)
static

Definition at line 467 of file char_samp.cpp.

470  {
471  int concomp;
472  int end_concomp;
473  int concomp_cnt = 0;
474  end_concomp = strt_concomp + seg_flags_size;
475  // determine ID range
476  bool once = false;
477  int min_id = -1;
478  int max_id = -1;
479  for (concomp = strt_concomp; concomp < end_concomp; concomp++) {
480  if (!seg_flags || seg_flags[concomp - strt_concomp] != 0) {
481  if (!once) {
482  min_id = concomp_array[concomp]->ID();
483  max_id = concomp_array[concomp]->ID();
484  once = true;
485  } else {
486  UpdateRange(concomp_array[concomp]->ID(), &min_id, &max_id);
487  }
488  concomp_cnt++;
489  }
490  }
491  if (concomp_cnt < 1 || !once || min_id == -1 || max_id == -1) {
492  return NULL;
493  }
494  // alloc memo for computing leftmost and right most attributes
495  int id_cnt = max_id - min_id + 1;
496  bool *id_exist = new bool[id_cnt];
497  bool *left_most_exist = new bool[id_cnt];
498  bool *right_most_exist = new bool[id_cnt];
499  if (!id_exist || !left_most_exist || !right_most_exist)
500  return NULL;
501  memset(id_exist, 0, id_cnt * sizeof(*id_exist));
502  memset(left_most_exist, 0, id_cnt * sizeof(*left_most_exist));
503  memset(right_most_exist, 0, id_cnt * sizeof(*right_most_exist));
504  // find the dimensions of the charsamp
505  once = false;
506  int left = -1;
507  int right = -1;
508  int top = -1;
509  int bottom = -1;
510  int unq_ids = 0;
511  int unq_left_most = 0;
512  int unq_right_most = 0;
513  for (concomp = strt_concomp; concomp < end_concomp; concomp++) {
514  if (!seg_flags || seg_flags[concomp - strt_concomp] != 0) {
515  if (!once) {
516  left = concomp_array[concomp]->Left();
517  right = concomp_array[concomp]->Right();
518  top = concomp_array[concomp]->Top();
519  bottom = concomp_array[concomp]->Bottom();
520  once = true;
521  } else {
522  UpdateRange(concomp_array[concomp]->Left(),
523  concomp_array[concomp]->Right(), &left, &right);
524  UpdateRange(concomp_array[concomp]->Top(),
525  concomp_array[concomp]->Bottom(), &top, &bottom);
526  }
527  // count unq ids, unq left most and right mosts ids
528  int concomp_id = concomp_array[concomp]->ID() - min_id;
529  if (!id_exist[concomp_id]) {
530  id_exist[concomp_id] = true;
531  unq_ids++;
532  }
533  if (concomp_array[concomp]->LeftMost()) {
534  if (left_most_exist[concomp_id] == false) {
535  left_most_exist[concomp_id] = true;
536  unq_left_most++;
537  }
538  }
539  if (concomp_array[concomp]->RightMost()) {
540  if (right_most_exist[concomp_id] == false) {
541  right_most_exist[concomp_id] = true;
542  unq_right_most++;
543  }
544  }
545  }
546  }
547  delete []id_exist;
548  delete []left_most_exist;
549  delete []right_most_exist;
550  if (!once || left == -1 || top == -1 || right == -1 || bottom == -1) {
551  return NULL;
552  }
553  (*left_most) = (unq_left_most >= unq_ids);
554  (*right_most) = (unq_right_most >= unq_ids);
555  // create the char sample object
556  CharSamp *samp = new CharSamp(left, top, right - left + 1, bottom - top + 1);
557  if (!samp) {
558  return NULL;
559  }
560 
561  // set the foreground pixels
562  for (concomp = strt_concomp; concomp < end_concomp; concomp++) {
563  if (!seg_flags || seg_flags[concomp - strt_concomp] != 0) {
564  ConCompPt *pt_ptr = concomp_array[concomp]->Head();
565  while (pt_ptr) {
566  samp->line_buff_[pt_ptr->y() - top][pt_ptr->x() - left] = 0;
567  pt_ptr = pt_ptr->Next();
568  }
569  }
570  }
571  return samp;
572 }
unsigned short Left() const
Definition: char_samp.h:46
unsigned short Bottom() const
Definition: char_samp.h:49
#define NULL
Definition: host.h:144
unsigned short Right() const
Definition: char_samp.h:47
unsigned short Top() const
Definition: char_samp.h:48
void UpdateRange(const T1 &x, T2 *lower_bound, T2 *upper_bound)
Definition: helpers.h:74
CharSamp * tesseract::CharSamp::FromRawData ( int  left,
int  top,
int  wid,
int  hgt,
unsigned char *  data 
)
static

Definition at line 283 of file char_samp.cpp.

284  {
285  // create the object
286  CharSamp *char_samp = new CharSamp(left, top, wid, hgt);
287  if (char_samp == NULL) {
288  return NULL;
289  }
290  if (char_samp->LoadFromRawData(data) == false) {
291  delete char_samp;
292  return NULL;
293  }
294  return char_samp;
295 }
#define NULL
Definition: host.h:144
char_32 tesseract::CharSamp::Label ( ) const
inline

Definition at line 56 of file char_samp.h.

56  {
57  if (label32_ == NULL || LabelLen() != 1) {
58  return 0;
59  }
60  return label32_[0];
61  }
#define NULL
Definition: host.h:144
int LabelLen() const
Definition: char_samp.h:140
int tesseract::CharSamp::LabelLen ( ) const
inline

Definition at line 140 of file char_samp.h.

140 { return LabelLen(label32_); }
int LabelLen() const
Definition: char_samp.h:140
static int tesseract::CharSamp::LabelLen ( const char_32 label32)
inlinestatic

Definition at line 141 of file char_samp.h.

141  {
142  if (label32 == NULL) {
143  return 0;
144  }
145  int len = 0;
146  while (label32[++len] != 0);
147  return len;
148  }
#define NULL
Definition: host.h:144
unsigned short tesseract::CharSamp::LastChar ( ) const
inline

Definition at line 55 of file char_samp.h.

55 { return last_char_; }
unsigned short tesseract::CharSamp::Left ( ) const
inline

Definition at line 46 of file char_samp.h.

46 { return left_; }
unsigned short tesseract::CharSamp::NormAspectRatio ( ) const
inline

Definition at line 53 of file char_samp.h.

53 { return norm_aspect_ratio_; }
unsigned short tesseract::CharSamp::NormBottom ( ) const
inline

Definition at line 52 of file char_samp.h.

52 { return norm_bottom_; }
unsigned short tesseract::CharSamp::NormTop ( ) const
inline

Definition at line 51 of file char_samp.h.

51 { return norm_top_; }
unsigned short tesseract::CharSamp::Page ( ) const
inline

Definition at line 50 of file char_samp.h.

50 { return page_; }
unsigned short tesseract::CharSamp::Right ( ) const
inline

Definition at line 47 of file char_samp.h.

47 { return left_ + wid_; }
unsigned short wid_
Definition: bmp_8.h:95
bool tesseract::CharSamp::Save2CharDumpFile ( FILE *  fp) const

Definition at line 298 of file char_samp.cpp.

298  {
299  unsigned int val32;
300  // write and check 32 bit marker
301  val32 = 0xabd0fefe;
302  if (fwrite(&val32, 1, sizeof(val32), fp) != sizeof(val32)) {
303  return false;
304  }
305  // write label length
306  val32 = (label32_ == NULL) ? 0 : LabelLen(label32_);
307  if (fwrite(&val32, 1, sizeof(val32), fp) != sizeof(val32)) {
308  return false;
309  }
310  // write label
311  if (label32_ != NULL) {
312  if (fwrite(label32_, 1, val32 * sizeof(*label32_), fp) !=
313  (val32 * sizeof(*label32_))) {
314  return false;
315  }
316  }
317  // write coordinates
318  if (fwrite(&page_, 1, sizeof(page_), fp) != sizeof(page_)) {
319  return false;
320  }
321  if (fwrite(&left_, 1, sizeof(left_), fp) != sizeof(left_)) {
322  return false;
323  }
324  if (fwrite(&top_, 1, sizeof(top_), fp) != sizeof(top_)) {
325  return false;
326  }
327  if (fwrite(&first_char_, 1, sizeof(first_char_), fp) !=
328  sizeof(first_char_)) {
329  return false;
330  }
331  if (fwrite(&last_char_, 1, sizeof(last_char_), fp) != sizeof(last_char_)) {
332  return false;
333  }
334  if (fwrite(&norm_top_, 1, sizeof(norm_top_), fp) != sizeof(norm_top_)) {
335  return false;
336  }
337  if (fwrite(&norm_bottom_, 1, sizeof(norm_bottom_), fp) !=
338  sizeof(norm_bottom_)) {
339  return false;
340  }
341  if (fwrite(&norm_aspect_ratio_, 1, sizeof(norm_aspect_ratio_), fp) !=
342  sizeof(norm_aspect_ratio_)) {
343  return false;
344  }
345  if (SaveBmp2CharDumpFile(fp) == false) {
346  return false;
347  }
348  return true;
349 }
#define NULL
Definition: host.h:144
bool SaveBmp2CharDumpFile(FILE *fp) const
Definition: bmp_8.cpp:521
int LabelLen() const
Definition: char_samp.h:140
CharSamp * tesseract::CharSamp::Scale ( int  wid,
int  hgt,
bool  isotropic = true 
)

Definition at line 261 of file char_samp.cpp.

261  {
262  CharSamp *scaled_samp = new CharSamp(wid, hgt);
263  if (scaled_samp == NULL) {
264  return NULL;
265  }
266  if (scaled_samp->ScaleFrom(this, isotropic) == false) {
267  delete scaled_samp;
268  return NULL;
269  }
270  scaled_samp->left_ = left_;
271  scaled_samp->top_ = top_;
272  scaled_samp->page_ = page_;
273  scaled_samp->SetLabel(label32_);
274  scaled_samp->first_char_ = first_char_;
275  scaled_samp->last_char_ = last_char_;
276  scaled_samp->norm_top_ = norm_top_;
277  scaled_samp->norm_bottom_ = norm_bottom_;
278  scaled_samp->norm_aspect_ratio_ = norm_aspect_ratio_;
279  return scaled_samp;
280 }
#define NULL
Definition: host.h:144
ConComp ** tesseract::CharSamp::Segment ( int *  seg_cnt,
bool  right_2_left,
int  max_hist_wnd,
int  min_con_comp_size 
) const

Definition at line 392 of file char_samp.cpp.

393  {
394  // init
395  (*segment_cnt) = 0;
396  int concomp_cnt = 0;
397  int seg_cnt = 0;
398  // find the concomps of the image
399  ConComp **concomp_array = FindConComps(&concomp_cnt, min_con_comp_size);
400  if (concomp_cnt <= 0 || !concomp_array) {
401  if (concomp_array)
402  delete []concomp_array;
403  return NULL;
404  }
405  ConComp **seg_array = NULL;
406  // segment each concomp further using vertical histogram
407  for (int concomp = 0; concomp < concomp_cnt; concomp++) {
408  int concomp_seg_cnt = 0;
409  // segment the concomp
410  ConComp **concomp_seg_array = NULL;
411  ConComp **concomp_alloc_seg =
412  concomp_array[concomp]->Segment(max_hist_wnd, &concomp_seg_cnt);
413  // no segments, add the whole concomp
414  if (concomp_alloc_seg == NULL) {
415  concomp_seg_cnt = 1;
416  concomp_seg_array = concomp_array + concomp;
417  } else {
418  // delete the original concomp, we no longer need it
419  concomp_seg_array = concomp_alloc_seg;
420  delete concomp_array[concomp];
421  }
422  // add the resulting segments
423  for (int seg_idx = 0; seg_idx < concomp_seg_cnt; seg_idx++) {
424  // too small of a segment: ignore
425  if (concomp_seg_array[seg_idx]->Width() < 2 &&
426  concomp_seg_array[seg_idx]->Height() < 2) {
427  delete concomp_seg_array[seg_idx];
428  } else {
429  // add the new segment
430  // extend the segment array
431  if ((seg_cnt % kConCompAllocChunk) == 0) {
432  ConComp **temp_segm_array =
433  new ConComp *[seg_cnt + kConCompAllocChunk];
434  if (temp_segm_array == NULL) {
435  fprintf(stderr, "Cube ERROR (CharSamp::Segment): could not "
436  "allocate additional connected components\n");
437  delete []concomp_seg_array;
438  delete []concomp_array;
439  delete []seg_array;
440  return NULL;
441  }
442  if (seg_cnt > 0) {
443  memcpy(temp_segm_array, seg_array, seg_cnt * sizeof(*seg_array));
444  delete []seg_array;
445  }
446  seg_array = temp_segm_array;
447  }
448  seg_array[seg_cnt++] = concomp_seg_array[seg_idx];
449  }
450  } // segment
451  if (concomp_alloc_seg != NULL) {
452  delete []concomp_alloc_seg;
453  }
454  } // concomp
455  delete []concomp_array;
456 
457  // sort the concomps from Left2Right or Right2Left, based on the reading order
458  if (seg_cnt > 0 && seg_array != NULL) {
459  qsort(seg_array, seg_cnt, sizeof(*seg_array), right_2_left ?
461  }
462  (*segment_cnt) = seg_cnt;
463  return seg_array;
464 }
#define NULL
Definition: host.h:144
unsigned short Width() const
Definition: bmp_8.h:48
unsigned short Height() const
Definition: bmp_8.h:50
static int Right2LeftComparer(const void *comp1, const void *comp2)
Definition: con_comp.h:82
ConComp ** FindConComps(int *concomp_cnt, int min_size) const
Definition: bmp_8.cpp:611
static const int kConCompAllocChunk
Definition: bmp_8.h:100
static int Left2RightComparer(const void *comp1, const void *comp2)
Definition: con_comp.h:73
void tesseract::CharSamp::SetFirstChar ( unsigned short  first_char)
inline

Definition at line 104 of file char_samp.h.

104  {
105  first_char_ = first_char;
106  }
void tesseract::CharSamp::SetLabel ( char_32  label)
inline

Definition at line 68 of file char_samp.h.

68  {
69  if (label32_ != NULL) {
70  delete []label32_;
71  }
72  label32_ = new char_32[2];
73  if (label32_ != NULL) {
74  label32_[0] = label;
75  label32_[1] = 0;
76  }
77  }
#define NULL
Definition: host.h:144
signed int char_32
Definition: string_32.h:40
void tesseract::CharSamp::SetLabel ( const char_32 label32)
inline

Definition at line 78 of file char_samp.h.

78  {
79  if (label32_ != NULL) {
80  delete []label32_;
81  label32_ = NULL;
82  }
83  if (label32 != NULL) {
84  // remove any byte order markes if any
85  if (label32[0] == 0xfeff) {
86  label32++;
87  }
88  int len = LabelLen(label32);
89  label32_ = new char_32[len + 1];
90  if (label32_ != NULL) {
91  memcpy(label32_, label32, len * sizeof(*label32));
92  label32_[len] = 0;
93  }
94  }
95  }
#define NULL
Definition: host.h:144
int LabelLen() const
Definition: char_samp.h:140
signed int char_32
Definition: string_32.h:40
void tesseract::CharSamp::SetLabel ( string  str)

Definition at line 71 of file char_samp.cpp.

71  {
72  if (label32_ != NULL) {
73  delete []label32_;
74  label32_ = NULL;
75  }
76  string_32 str32;
77  CubeUtils::UTF8ToUTF32(str.c_str(), &str32);
78  SetLabel(reinterpret_cast<const char_32 *>(str32.c_str()));
79 }
basic_string< char_32 > string_32
Definition: string_32.h:41
#define NULL
Definition: host.h:144
static void UTF8ToUTF32(const char *utf8_str, string_32 *str32)
Definition: cube_utils.cpp:335
void SetLabel(char_32 label)
Definition: char_samp.h:68
void tesseract::CharSamp::SetLastChar ( unsigned short  last_char)
inline

Definition at line 107 of file char_samp.h.

107  {
108  last_char_ = last_char;
109  }
void tesseract::CharSamp::SetLeft ( unsigned short  left)
inline

Definition at line 65 of file char_samp.h.

65 { left_ = left; }
void tesseract::CharSamp::SetNormAspectRatio ( unsigned short  norm_aspect_ratio)
inline

Definition at line 101 of file char_samp.h.

101  {
102  norm_aspect_ratio_ = norm_aspect_ratio;
103  }
void tesseract::CharSamp::SetNormBottom ( unsigned short  norm_bottom)
inline

Definition at line 98 of file char_samp.h.

98  {
99  norm_bottom_ = norm_bottom;
100  }
void tesseract::CharSamp::SetNormTop ( unsigned short  norm_top)
inline

Definition at line 97 of file char_samp.h.

97 { norm_top_ = norm_top; }
void tesseract::CharSamp::SetPage ( unsigned short  page)
inline

Definition at line 67 of file char_samp.h.

67 { page_ = page; }
void tesseract::CharSamp::SetTop ( unsigned short  top)
inline

Definition at line 66 of file char_samp.h.

66 { top_ = top; }
string tesseract::CharSamp::stringLabel ( ) const

Definition at line 61 of file char_samp.cpp.

61  {
62  string str = "";
63  if (label32_ != NULL) {
64  string_32 str32(label32_);
65  CubeUtils::UTF32ToUTF8(str32.c_str(), &str);
66  }
67  return str;
68 }
basic_string< char_32 > string_32
Definition: string_32.h:41
#define NULL
Definition: host.h:144
static void UTF32ToUTF8(const char_32 *utf32_str, string *str)
Definition: cube_utils.cpp:349
char_32* tesseract::CharSamp::StrLabel ( ) const
inline

Definition at line 62 of file char_samp.h.

62 { return label32_; }
unsigned short tesseract::CharSamp::Top ( ) const
inline

Definition at line 48 of file char_samp.h.

48 { return top_; }

The documentation for this class was generated from the following files: