Tesseract  3.02
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
CHAR_FRAGMENT Class Reference

#include <unicharset.h>

Public Member Functions

void set_all (const char *unichar, int pos, int total, bool natural)
 
void set_unichar (const char *uch)
 
void set_pos (int p)
 
void set_total (int t)
 
const char * get_unichar () const
 
int get_pos () const
 
int get_total () const
 
STRING to_string () const
 
bool equals (const char *other_unichar, int other_pos, int other_total) const
 
bool equals (const CHAR_FRAGMENT *other) const
 
bool is_continuation_of (const CHAR_FRAGMENT *fragment) const
 
bool is_beginning () const
 
bool is_ending () const
 
bool is_natural () const
 
void set_natural (bool value)
 

Static Public Member Functions

static STRING to_string (const char *unichar, int pos, int total, bool natural)
 
static CHAR_FRAGMENTparse_from_string (const char *str)
 

Static Public Attributes

static const int kMinLen = 6
 
static const int kMaxLen = 3 + UNICHAR_LEN + 2
 
static const int kMaxChunks = 5
 

Detailed Description

Definition at line 30 of file unicharset.h.

Member Function Documentation

bool CHAR_FRAGMENT::equals ( const char *  other_unichar,
int  other_pos,
int  other_total 
) const
inline

Definition at line 67 of file unicharset.h.

68  {
69  return (strcmp(this->unichar, other_unichar) == 0 &&
70  this->pos == other_pos && this->total == other_total);
71  }
bool CHAR_FRAGMENT::equals ( const CHAR_FRAGMENT other) const
inline

Definition at line 72 of file unicharset.h.

72  {
73  return this->equals(other->get_unichar(),
74  other->get_pos(),
75  other->get_total());
76  }
bool equals(const char *other_unichar, int other_pos, int other_total) const
Definition: unicharset.h:67
int get_total() const
Definition: unicharset.h:54
int get_pos() const
Definition: unicharset.h:53
const char * get_unichar() const
Definition: unicharset.h:52
int CHAR_FRAGMENT::get_pos ( ) const
inline

Definition at line 53 of file unicharset.h.

53 { return this->pos; }
int CHAR_FRAGMENT::get_total ( ) const
inline

Definition at line 54 of file unicharset.h.

54 { return this->total; }
const char* CHAR_FRAGMENT::get_unichar ( ) const
inline

Definition at line 52 of file unicharset.h.

52 { return this->unichar; }
bool CHAR_FRAGMENT::is_beginning ( ) const
inline

Definition at line 87 of file unicharset.h.

87 { return this->pos == 0; }
bool CHAR_FRAGMENT::is_continuation_of ( const CHAR_FRAGMENT fragment) const
inline

Definition at line 80 of file unicharset.h.

80  {
81  return (strcmp(this->unichar, fragment->get_unichar()) == 0 &&
82  this->total == fragment->get_total() &&
83  this->pos == fragment->get_pos() + 1);
84  }
int get_total() const
Definition: unicharset.h:54
int get_pos() const
Definition: unicharset.h:53
const char * get_unichar() const
Definition: unicharset.h:52
bool CHAR_FRAGMENT::is_ending ( ) const
inline

Definition at line 90 of file unicharset.h.

90 { return this->pos == this->total-1; }
bool CHAR_FRAGMENT::is_natural ( ) const
inline

Definition at line 95 of file unicharset.h.

95 { return natural; }
CHAR_FRAGMENT * CHAR_FRAGMENT::parse_from_string ( const char *  str)
static

Definition at line 902 of file unicharset.cpp.

902  {
903  const char *ptr = string;
904  int len = strlen(string);
905  if (len < kMinLen || *ptr != kSeparator) {
906  return NULL; // this string can not represent a fragment
907  }
908  ptr++; // move to the next character
909  int step = 0;
910  while ((ptr + step) < (string + len) && *(ptr + step) != kSeparator) {
911  step += UNICHAR::utf8_step(ptr + step);
912  }
913  if (step == 0 || step > UNICHAR_LEN) {
914  return NULL; // no character for unichar or the character is too long
915  }
916  char unichar[UNICHAR_LEN + 1];
917  strncpy(unichar, ptr, step);
918  unichar[step] = '\0'; // null terminate unichar
919  ptr += step; // move to the next fragment separator
920  int pos = 0;
921  int total = 0;
922  bool natural = false;
923  char *end_ptr = NULL;
924  for (int i = 0; i < 2; i++) {
925  if (ptr > string + len || *ptr != kSeparator) {
926  if (i == 1 && *ptr == kNaturalFlag)
927  natural = true;
928  else
929  return NULL; // Failed to parse fragment representation.
930  }
931  ptr++; // move to the next character
932  i == 0 ? pos = static_cast<int>(strtol(ptr, &end_ptr, 10))
933  : total = static_cast<int>(strtol(ptr, &end_ptr, 10));
934  ptr = end_ptr;
935  }
936  if (ptr != string + len) {
937  return NULL; // malformed fragment representation
938  }
939  CHAR_FRAGMENT *fragment = new CHAR_FRAGMENT();
940  fragment->set_all(unichar, pos, total, natural);
941  return fragment;
942 }
static int utf8_step(const char *utf8_str)
Definition: unichar.cpp:131
void set_all(const char *unichar, int pos, int total, bool natural)
Definition: unicharset.h:40
#define NULL
Definition: host.h:144
#define UNICHAR_LEN
Definition: unichar.h:28
static const int kMinLen
Definition: unicharset.h:33
void CHAR_FRAGMENT::set_all ( const char *  unichar,
int  pos,
int  total,
bool  natural 
)
inline

Definition at line 40 of file unicharset.h.

40  {
41  set_unichar(unichar);
42  set_pos(pos);
43  set_total(total);
44  set_natural(natural);
45  }
void set_natural(bool value)
Definition: unicharset.h:96
void set_total(int t)
Definition: unicharset.h:51
void set_unichar(const char *uch)
Definition: unicharset.h:46
void set_pos(int p)
Definition: unicharset.h:50
void CHAR_FRAGMENT::set_natural ( bool  value)
inline

Definition at line 96 of file unicharset.h.

96 { natural = value; }
void CHAR_FRAGMENT::set_pos ( int  p)
inline

Definition at line 50 of file unicharset.h.

50 { this->pos = p; }
void CHAR_FRAGMENT::set_total ( int  t)
inline

Definition at line 51 of file unicharset.h.

51 { this->total = t; }
void CHAR_FRAGMENT::set_unichar ( const char *  uch)
inline

Definition at line 46 of file unicharset.h.

46  {
47  strncpy(this->unichar, uch, UNICHAR_LEN);
48  this->unichar[UNICHAR_LEN] = '\0';
49  }
#define UNICHAR_LEN
Definition: unichar.h:28
STRING CHAR_FRAGMENT::to_string ( const char *  unichar,
int  pos,
int  total,
bool  natural 
)
static

Definition at line 889 of file unicharset.cpp.

890  {
891  if (total == 1) return STRING(unichar);
892  STRING result = "";
893  result += kSeparator;
894  result += unichar;
895  char buffer[kMaxLen];
896  snprintf(buffer, kMaxLen, "%c%d%c%d", kSeparator, pos,
897  natural ? kNaturalFlag : kSeparator, total);
898  result += buffer;
899  return result;
900 }
static const int kMaxLen
Definition: unicharset.h:35
Definition: strngs.h:40
STRING CHAR_FRAGMENT::to_string ( ) const
inline

Definition at line 61 of file unicharset.h.

61  {
62  return to_string(unichar, pos, total, natural);
63  }
STRING to_string() const
Definition: unicharset.h:61

Member Data Documentation

const int CHAR_FRAGMENT::kMaxChunks = 5
static

Definition at line 37 of file unicharset.h.

const int CHAR_FRAGMENT::kMaxLen = 3 + UNICHAR_LEN + 2
static

Definition at line 35 of file unicharset.h.

const int CHAR_FRAGMENT::kMinLen = 6
static

Definition at line 33 of file unicharset.h.


The documentation for this class was generated from the following files: