Tesseract  3.02
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
tesseract::CubeUtils Class Reference

#include <cube_utils.h>

Public Member Functions

 CubeUtils ()
 
 ~CubeUtils ()
 

Static Public Member Functions

static int Prob2Cost (double prob_val)
 
static double Cost2Prob (int cost)
 
static int StrLen (const char_32 *str)
 
static int StrCmp (const char_32 *str1, const char_32 *str2)
 
static char_32StrDup (const char_32 *str)
 
static CharSampCharSampleFromImg (IMAGE *img, int left, int top, int wid, int hgt)
 
static CharSampCharSampleFromPix (Pix *pix, int left, int top, int wid, int hgt)
 
static IMAGEImageFromCharSample (CharSamp *char_samp)
 
static Pix * PixFromCharSample (CharSamp *char_samp)
 
static bool ReadFileToString (const string &file_name, string *str)
 
static void SplitStringUsing (const string &str, const string &delims, vector< string > *str_vec)
 
static void UTF8ToUTF32 (const char *utf8_str, string_32 *str32)
 
static void UTF32ToUTF8 (const char_32 *utf32_str, string *str)
 
static bool IsCaseInvariant (const char_32 *str32, CharSet *char_set)
 
static char_32ToLower (const char_32 *str32, CharSet *char_set)
 
static char_32ToUpper (const char_32 *str32, CharSet *char_set)
 

Detailed Description

Definition at line 36 of file cube_utils.h.

Constructor & Destructor Documentation

tesseract::CubeUtils::CubeUtils ( )

Definition at line 28 of file cube_utils.cpp.

28  {
29 }
tesseract::CubeUtils::~CubeUtils ( )

Definition at line 31 of file cube_utils.cpp.

31  {
32 }

Member Function Documentation

CharSamp * tesseract::CubeUtils::CharSampleFromImg ( IMAGE img,
int  left,
int  top,
int  wid,
int  hgt 
)
static

Definition at line 121 of file cube_utils.cpp.

123  {
124  // get the raw img data from the image
125  unsigned char *temp_buff = GetImageData(img, left, top, wid, hgt);
126  if (temp_buff == NULL) {
127  return NULL;
128  }
129 
130  // create a char samp from temp buffer
131  CharSamp *char_samp = CharSamp::FromRawData(left, top, wid, hgt, temp_buff);
132  // clean up temp buffer
133  delete []temp_buff;
134  return char_samp;
135 }
#define NULL
Definition: host.h:144
static CharSamp * FromRawData(int left, int top, int wid, int hgt, unsigned char *data)
Definition: char_samp.cpp:283
CharSamp * tesseract::CubeUtils::CharSampleFromPix ( Pix *  pix,
int  left,
int  top,
int  wid,
int  hgt 
)
static

Definition at line 138 of file cube_utils.cpp.

139  {
140  // get the raw img data from the image
141  unsigned char *temp_buff = GetImageData(pix, left, top, wid, hgt);
142  if (temp_buff == NULL) {
143  return NULL;
144  }
145 
146  // create a char samp from temp buffer
147  CharSamp *char_samp = CharSamp::FromRawData(left, top, wid, hgt, temp_buff);
148 
149  // clean up temp buffer
150  delete []temp_buff;
151  return char_samp;
152 }
#define NULL
Definition: host.h:144
static CharSamp * FromRawData(int left, int top, int wid, int hgt, unsigned char *data)
Definition: char_samp.cpp:283
double tesseract::CubeUtils::Cost2Prob ( int  cost)
static

Definition at line 43 of file cube_utils.cpp.

43  {
44  return exp(-cost / PROB2COST_SCALE);
45 }
#define PROB2COST_SCALE
Definition: cube_const.h:24
IMAGE * tesseract::CubeUtils::ImageFromCharSample ( CharSamp char_samp)
static

Definition at line 155 of file cube_utils.cpp.

155  {
156  // parameter check
157  if (char_samp == NULL) {
158  return NULL;
159  }
160 
161  // get the raw data
162  int stride = char_samp->Stride(),
163  wid = char_samp->Width(),
164  hgt = char_samp->Height();
165 
166  unsigned char *buff = char_samp->RawData();
167  if (buff == NULL) {
168  return NULL;
169  }
170 
171  // create a new image object
172  IMAGE *img = new IMAGE();
173  if (img == NULL) {
174  return NULL;
175  }
176 
177  // create a blank B/W image
178  if (img->create(wid, hgt, 1) == -1) {
179  delete img;
180  return NULL;
181  }
182 
183  // copy the contents
184  IMAGELINE line;
185  line.init(wid);
186 
187  for (int y = 0, off = 0; y < hgt ; y++, off += stride) {
188  for (int x = 0; x < wid; x++) {
189  line.pixels[x] = (buff[off + x] == 0) ? 0 : 1;
190  }
191 
192  img->fast_put_line(0, hgt - 1 - y, wid, &line);
193  }
194 
195  return img;
196 }
Definition: img.h:325
#define NULL
Definition: host.h:144
void init(inT32 width)
Definition: img.h:343
uinT8 * pixels
image pixels
Definition: img.h:328
void fast_put_line(inT32 x, inT32 y, inT32 width, IMAGELINE *linebuf)
Definition: imgs.cpp:1203
Definition: img.h:51
inT8 create(inT32 x, inT32 y, inT8 bits_per_pixel)
Definition: imgs.cpp:121
bool tesseract::CubeUtils::IsCaseInvariant ( const char_32 str32,
CharSet char_set 
)
static

Definition at line 361 of file cube_utils.cpp.

361  {
362  bool all_one_case = true;
363  bool capitalized;
364  bool prev_upper;
365  bool prev_lower;
366  bool first_upper;
367  bool first_lower;
368  bool cur_upper;
369  bool cur_lower;
370 
371  string str8;
372  if (!char_set) {
373  // If cube char_set is missing, use C-locale-dependent functions
374  // on UTF8 characters to determine case properties.
375  first_upper = isupper(str32[0]);
376  first_lower = islower(str32[0]);
377  if (first_upper)
378  capitalized = true;
379  prev_upper = first_upper;
380  prev_lower = islower(str32[0]);
381  for (int c = 1; str32[c] != 0; ++c) {
382  cur_upper = isupper(str32[c]);
383  cur_lower = islower(str32[c]);
384  if ((prev_upper && cur_lower) || (prev_lower && cur_upper))
385  all_one_case = false;
386  if (cur_upper)
387  capitalized = false;
388  prev_upper = cur_upper;
389  prev_lower = cur_lower;
390  }
391  } else {
392  UNICHARSET *unicharset = char_set->InternalUnicharset();
393  // Use UNICHARSET functions to determine case properties
394  first_upper = unicharset->get_isupper(char_set->ClassID(str32[0]));
395  first_lower = unicharset->get_islower(char_set->ClassID(str32[0]));
396  if (first_upper)
397  capitalized = true;
398  prev_upper = first_upper;
399  prev_lower = unicharset->get_islower(char_set->ClassID(str32[0]));
400 
401  for (int c = 1; c < StrLen(str32); ++c) {
402  cur_upper = unicharset->get_isupper(char_set->ClassID(str32[c]));
403  cur_lower = unicharset->get_islower(char_set->ClassID(str32[c]));
404  if ((prev_upper && cur_lower) || (prev_lower && cur_upper))
405  all_one_case = false;
406  if (cur_upper)
407  capitalized = false;
408  prev_upper = cur_upper;
409  prev_lower = cur_lower;
410  }
411  }
412  return all_one_case || capitalized;
413 }
static int StrLen(const char_32 *str)
Definition: cube_utils.cpp:48
bool get_islower(UNICHAR_ID unichar_id) const
Definition: unicharset.h:399
bool get_isupper(UNICHAR_ID unichar_id) const
Definition: unicharset.h:406
Pix * tesseract::CubeUtils::PixFromCharSample ( CharSamp char_samp)
static

Definition at line 199 of file cube_utils.cpp.

199  {
200  // parameter check
201  if (char_samp == NULL) {
202  return NULL;
203  }
204 
205  // get the raw data
206  int stride = char_samp->Stride();
207  int wid = char_samp->Width();
208  int hgt = char_samp->Height();
209 
210  Pix *pix = pixCreate(wid, hgt, 1);
211  if (pix == NULL) {
212  return NULL;
213  }
214 
215  // copy the contents
216  unsigned char *line = char_samp->RawData();
217  for (int y = 0; y < hgt ; y++, line += stride) {
218  for (int x = 0; x < wid; x++) {
219  if (line[x] != 0) {
220  pixSetPixel(pix, x, y, 0);
221  } else {
222  pixSetPixel(pix, x, y, 255);
223  }
224  }
225  }
226 
227  return pix;
228 }
#define NULL
Definition: host.h:144
int tesseract::CubeUtils::Prob2Cost ( double  prob_val)
static

Definition at line 35 of file cube_utils.cpp.

35  {
36  if (prob_val < MIN_PROB) {
37  return MIN_PROB_COST;
38  }
39  return static_cast<int>(-log(prob_val) * PROB2COST_SCALE);
40 }
#define MIN_PROB
Definition: cube_const.h:28
#define PROB2COST_SCALE
Definition: cube_const.h:24
#define MIN_PROB_COST
Definition: cube_const.h:26
bool tesseract::CubeUtils::ReadFileToString ( const string &  file_name,
string *  str 
)
static

Definition at line 268 of file cube_utils.cpp.

268  {
269  str->clear();
270  FILE *fp = fopen(file_name.c_str(), "rb");
271  if (fp == NULL) {
272  return false;
273  }
274 
275  // get the size of the size
276  fseek(fp, 0, SEEK_END);
277  int file_size = ftell(fp);
278  if (file_size < 1) {
279  fclose(fp);
280  return false;
281  }
282  // adjust string size
283  str->reserve(file_size);
284  // read the contents
285  rewind(fp);
286  char *buff = new char[file_size];
287  if (buff == NULL) {
288  fclose(fp);
289  return false;
290  }
291  int read_bytes = fread(buff, 1, static_cast<int>(file_size), fp);
292  if (read_bytes == file_size) {
293  str->append(buff, file_size);
294  }
295  delete []buff;
296  fclose(fp);
297  return (read_bytes == file_size);
298 }
#define NULL
Definition: host.h:144
void tesseract::CubeUtils::SplitStringUsing ( const string &  str,
const string &  delims,
vector< string > *  str_vec 
)
static

Definition at line 301 of file cube_utils.cpp.

303  {
304  // Optimize the common case where delims is a single character.
305  if (delims[0] != '\0' && delims[1] == '\0') {
306  char c = delims[0];
307  const char* p = str.data();
308  const char* end = p + str.size();
309  while (p != end) {
310  if (*p == c) {
311  ++p;
312  } else {
313  const char* start = p;
314  while (++p != end && *p != c);
315  str_vec->push_back(string(start, p - start));
316  }
317  }
318  return;
319  }
320 
321  string::size_type begin_index, end_index;
322  begin_index = str.find_first_not_of(delims);
323  while (begin_index != string::npos) {
324  end_index = str.find_first_of(delims, begin_index);
325  if (end_index == string::npos) {
326  str_vec->push_back(str.substr(begin_index));
327  return;
328  }
329  str_vec->push_back(str.substr(begin_index, (end_index - begin_index)));
330  begin_index = str.find_first_not_of(delims, end_index);
331  }
332 }
int tesseract::CubeUtils::StrCmp ( const char_32 str1,
const char_32 str2 
)
static

Definition at line 58 of file cube_utils.cpp.

58  {
59  const char_32 *pch1 = str1;
60  const char_32 *pch2 = str2;
61 
62  for (; (*pch1) != 0 && (*pch2) != 0; pch1++, pch2++) {
63  if ((*pch1) != (*pch2)) {
64  return (*pch1) - (*pch2);
65  }
66  }
67 
68  if ((*pch1) == 0) {
69  if ((*pch2) == 0) {
70  return 0;
71  } else {
72  return -1;
73  }
74  } else {
75  return 1;
76  }
77 }
signed int char_32
Definition: string_32.h:40
char_32 * tesseract::CubeUtils::StrDup ( const char_32 str)
static

Definition at line 80 of file cube_utils.cpp.

80  {
81  int len = StrLen(str32);
82  char_32 *new_str = new char_32[len + 1];
83  if (new_str == NULL) {
84  return NULL;
85  }
86  memcpy(new_str, str32, len * sizeof(*str32));
87  new_str[len] = 0;
88  return new_str;
89 }
static int StrLen(const char_32 *str)
Definition: cube_utils.cpp:48
#define NULL
Definition: host.h:144
signed int char_32
Definition: string_32.h:40
int tesseract::CubeUtils::StrLen ( const char_32 str)
static

Definition at line 48 of file cube_utils.cpp.

48  {
49  if (char_32_ptr == NULL) {
50  return 0;
51  }
52  int len = -1;
53  while (char_32_ptr[++len]);
54  return len;
55 }
#define NULL
Definition: host.h:144
char_32 * tesseract::CubeUtils::ToLower ( const char_32 str32,
CharSet char_set 
)
static

Definition at line 415 of file cube_utils.cpp.

415  {
416  if (!char_set) {
417  return NULL;
418  }
419  UNICHARSET *unicharset = char_set->InternalUnicharset();
420  int len = StrLen(str32);
421  char_32 *lower = new char_32[len + 1];
422  if (!lower)
423  return NULL;
424  for (int i = 0; i < len; ++i) {
425  char_32 ch = str32[i];
426  if (ch == INVALID_UNICHAR_ID) {
427  delete [] lower;
428  return NULL;
429  }
430  // convert upper-case characters to lower-case
431  if (unicharset->get_isupper(char_set->ClassID(ch))) {
432  UNICHAR_ID uid_lower = unicharset->get_other_case(char_set->ClassID(ch));
433  const char_32 *str32_lower = char_set->ClassString(uid_lower);
434  // expect lower-case version of character to be a single character
435  if (!str32_lower || StrLen(str32_lower) != 1) {
436  delete [] lower;
437  return NULL;
438  }
439  lower[i] = str32_lower[0];
440  } else {
441  lower[i] = ch;
442  }
443  }
444  lower[len] = 0;
445  return lower;
446 }
int UNICHAR_ID
Definition: unichar.h:31
static int StrLen(const char_32 *str)
Definition: cube_utils.cpp:48
#define NULL
Definition: host.h:144
bool get_isupper(UNICHAR_ID unichar_id) const
Definition: unicharset.h:406
UNICHAR_ID get_other_case(UNICHAR_ID unichar_id) const
Definition: unicharset.h:572
signed int char_32
Definition: string_32.h:40
char_32 * tesseract::CubeUtils::ToUpper ( const char_32 str32,
CharSet char_set 
)
static

Definition at line 448 of file cube_utils.cpp.

448  {
449  if (!char_set) {
450  return NULL;
451  }
452  UNICHARSET *unicharset = char_set->InternalUnicharset();
453  int len = StrLen(str32);
454  char_32 *upper = new char_32[len + 1];
455  if (!upper)
456  return NULL;
457  for (int i = 0; i < len; ++i) {
458  char_32 ch = str32[i];
459  if (ch == INVALID_UNICHAR_ID) {
460  delete [] upper;
461  return NULL;
462  }
463  // convert lower-case characters to upper-case
464  if (unicharset->get_islower(char_set->ClassID(ch))) {
465  UNICHAR_ID uid_upper = unicharset->get_other_case(char_set->ClassID(ch));
466  const char_32 *str32_upper = char_set->ClassString(uid_upper);
467  // expect upper-case version of character to be a single character
468  if (!str32_upper || StrLen(str32_upper) != 1) {
469  delete [] upper;
470  return NULL;
471  }
472  upper[i] = str32_upper[0];
473  } else {
474  upper[i] = ch;
475  }
476  }
477  upper[len] = 0;
478  return upper;
479 }
int UNICHAR_ID
Definition: unichar.h:31
static int StrLen(const char_32 *str)
Definition: cube_utils.cpp:48
#define NULL
Definition: host.h:144
bool get_islower(UNICHAR_ID unichar_id) const
Definition: unicharset.h:399
UNICHAR_ID get_other_case(UNICHAR_ID unichar_id) const
Definition: unicharset.h:572
signed int char_32
Definition: string_32.h:40
void tesseract::CubeUtils::UTF32ToUTF8 ( const char_32 utf32_str,
string *  str 
)
static

Definition at line 349 of file cube_utils.cpp.

349  {
350  str->clear();
351  for (const char_32 *ch_32 = utf32_str; (*ch_32) != 0; ch_32++) {
352  UNICHAR uni_ch((*ch_32));
353  char *utf8 = uni_ch.utf8_str();
354  if (utf8 != NULL) {
355  (*str) += utf8;
356  delete []utf8;
357  }
358  }
359 }
#define NULL
Definition: host.h:144
signed int char_32
Definition: string_32.h:40
void tesseract::CubeUtils::UTF8ToUTF32 ( const char *  utf8_str,
string_32 str32 
)
static

Definition at line 335 of file cube_utils.cpp.

335  {
336  str32->clear();
337  int len = strlen(utf8_str);
338  int step = 0;
339  for (int ch = 0; ch < len; ch += step) {
340  step = UNICHAR::utf8_step(utf8_str + ch);
341  if (step > 0) {
342  UNICHAR uni_ch(utf8_str + ch, step);
343  (*str32) += uni_ch.first_uni();
344  }
345  }
346 }
static int utf8_step(const char *utf8_str)
Definition: unichar.cpp:131

The documentation for this class was generated from the following files: