Tesseract  3.02
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
tesseract::UnicodeSpanSkipper Class Reference

Public Member Functions

 UnicodeSpanSkipper (const UNICHARSET *unicharset, const WERD_CHOICE *word)
 
int SkipPunc (int pos)
 
int SkipDigits (int pos)
 
int SkipRomans (int pos)
 
int SkipAlpha (int pos)
 

Detailed Description

Definition at line 294 of file paragraphs.cpp.

Constructor & Destructor Documentation

tesseract::UnicodeSpanSkipper::UnicodeSpanSkipper ( const UNICHARSET unicharset,
const WERD_CHOICE word 
)
inline

Definition at line 296 of file paragraphs.cpp.

297  : u_(unicharset), word_(word) { wordlen_ = word->length(); }
int length() const
Definition: ratngs.h:214

Member Function Documentation

int tesseract::UnicodeSpanSkipper::SkipAlpha ( int  pos)

Definition at line 335 of file paragraphs.cpp.

335  {
336  while (pos < wordlen_ && u_->get_isalpha(word_->unichar_id(pos))) pos++;
337  return pos;
338 }
const UNICHAR_ID unichar_id(int index) const
Definition: ratngs.h:220
int tesseract::UnicodeSpanSkipper::SkipDigits ( int  pos)

Definition at line 319 of file paragraphs.cpp.

319  {
320  while (pos < wordlen_ && (u_->get_isdigit(word_->unichar_id(pos)) ||
321  IsDigitLike(UnicodeFor(u_, word_, pos)))) pos++;
322  return pos;
323 }
int UnicodeFor(const UNICHARSET *u, const WERD_CHOICE *werd, int pos)
Definition: paragraphs.cpp:286
bool get_isdigit(UNICHAR_ID unichar_id) const
Definition: unicharset.h:413
bool IsDigitLike(int ch)
Definition: paragraphs.cpp:209
const UNICHAR_ID unichar_id(int index) const
Definition: ratngs.h:220
int tesseract::UnicodeSpanSkipper::SkipPunc ( int  pos)

Definition at line 314 of file paragraphs.cpp.

314  {
315  while (pos < wordlen_ && u_->get_ispunctuation(word_->unichar_id(pos))) pos++;
316  return pos;
317 }
const UNICHAR_ID unichar_id(int index) const
Definition: ratngs.h:220
int tesseract::UnicodeSpanSkipper::SkipRomans ( int  pos)

Definition at line 325 of file paragraphs.cpp.

325  {
326  const char *kRomans = "ivxlmdIVXLMD";
327  while (pos < wordlen_) {
328  int ch = UnicodeFor(u_, word_, pos);
329  if (ch >= 0xF0 || strchr(kRomans, ch) == 0) break;
330  pos++;
331  }
332  return pos;
333 }
int UnicodeFor(const UNICHARSET *u, const WERD_CHOICE *werd, int pos)
Definition: paragraphs.cpp:286

The documentation for this class was generated from the following file: