Tesseract  3.02
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
ocropus add-ons

Functions

TESS_LOCAL void tesseract::TessBaseAPI::AdaptToCharacter (const char *unichar_repr, int length, float baseline, float xheight, float descender, float ascender)
 
TESS_LOCAL PAGE_REStesseract::TessBaseAPI::RecognitionPass1 (BLOCK_LIST *block_list)
 
TESS_LOCAL PAGE_REStesseract::TessBaseAPI::RecognitionPass2 (BLOCK_LIST *block_list, PAGE_RES *pass1_result)
 
TESS_LOCAL void tesseract::TessBaseAPI::DetectParagraphs (bool after_text_recognition)
 
static TESS_LOCAL int tesseract::TessBaseAPI::TesseractExtractResult (char **text, int **lengths, float **costs, int **x0, int **y0, int **x1, int **y1, PAGE_RES *page_res)
 
TESS_LOCAL const PAGE_REStesseract::TessBaseAPI::GetPageRes () const
 

Detailed Description

Function Documentation

void tesseract::TessBaseAPI::AdaptToCharacter ( const char *  unichar_repr,
int  length,
float  baseline,
float  xheight,
float  descender,
float  ascender 
)
protected

Adapt to recognize the current image as the given character. The image must be preloaded and be just an image of a single character.

Adapt to recognize the current image as the given character. The image must be preloaded into pix_binary_ and be just an image of a single character.

Definition at line 1934 of file baseapi.cpp.

1939  {
1940  UNICHAR_ID id = tesseract_->unicharset.unichar_to_id(unichar_repr, length);
1941  TBLOB *blob = make_tesseract_blob(baseline, xheight, descender, ascender,
1943  tesseract_->pix_binary());
1944  float threshold;
1945  UNICHAR_ID best_class = 0;
1946  float best_rating = -100;
1947 
1948 
1949  // Classify to get a raw choice.
1950  BLOB_CHOICE_LIST choices;
1951  DENORM denorm;
1952  tesseract_->AdaptiveClassifier(blob, denorm, &choices, NULL);
1953  BLOB_CHOICE_IT choice_it;
1954  choice_it.set_to_list(&choices);
1955  for (choice_it.mark_cycle_pt(); !choice_it.cycled_list();
1956  choice_it.forward()) {
1957  if (choice_it.data()->rating() > best_rating) {
1958  best_rating = choice_it.data()->rating();
1959  best_class = choice_it.data()->unichar_id();
1960  }
1961  }
1962 
1963  threshold = tesseract_->matcher_good_threshold;
1964 
1965  if (blob->outlines)
1966  tesseract_->AdaptToChar(blob, denorm, id, kUnknownFontinfoId, threshold);
1967  delete blob;
1968 }
int UNICHAR_ID
Definition: unichar.h:31
void AdaptiveClassifier(TBLOB *Blob, const DENORM &denorm, BLOB_CHOICE_LIST *Choices, CLASS_PRUNER_RESULTS cp_results)
Definition: adaptmatch.cpp:178
const UNICHAR_ID unichar_to_id(const char *const unichar_repr) const
Definition: unicharset.cpp:176
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:757
#define NULL
Definition: host.h:144
TESSLINE * outlines
Definition: blobs.h:227
double matcher_good_threshold
Definition: classify.h:381
TBLOB * make_tesseract_blob(float baseline, float xheight, float descender, float ascender, bool numeric_mode, Pix *pix)
Definition: baseapi.cpp:1916
bool classify_bln_numeric_mode
Definition: classify.h:455
Definition: blobs.h:174
void AdaptToChar(TBLOB *Blob, const DENORM &denorm, CLASS_ID ClassId, int FontinfoId, FLOAT32 Threshold)
Definition: adaptmatch.cpp:933
UNICHARSET unicharset
Definition: ccutil.h:72
Pix * pix_binary() const
void tesseract::TessBaseAPI::DetectParagraphs ( bool  after_text_recognition)
protected

Definition at line 1987 of file baseapi.cpp.

1987  {
1988  int debug_level = 0;
1989  GetIntVariable("paragraph_debug_level", &debug_level);
1990  if (paragraph_models_ == NULL)
1992  MutableIterator *result_it = GetMutableIterator();
1993  do { // Detect paragraphs for this block
1995  ::tesseract::DetectParagraphs(debug_level, after_text_recognition,
1996  result_it, &models);
1997  *paragraph_models_ += models;
1998  } while (result_it->Next(RIL_BLOCK));
1999  delete result_it;
2000 }
#define NULL
Definition: host.h:144
bool GetIntVariable(const char *name, int *value) const
Definition: baseapi.cpp:165
GenericVector< ParagraphModel * > * paragraph_models_
Definition: baseapi.h:765
MutableIterator * GetMutableIterator()
Definition: baseapi.cpp:1007
void DetectParagraphs(int debug_level, GenericVector< RowInfo > *row_infos, GenericVector< PARA * > *row_owners, PARA_LIST *paragraphs, GenericVector< ParagraphModel * > *models)
TESS_LOCAL const PAGE_RES* tesseract::TessBaseAPI::GetPageRes ( ) const
inlineprotected

Definition at line 755 of file baseapi.h.

755  {
756  return page_res_;
757  };
PAGE_RES * page_res_
The page-level data.
Definition: baseapi.h:767
PAGE_RES * tesseract::TessBaseAPI::RecognitionPass1 ( BLOCK_LIST *  block_list)
protected

Recognize text doing one pass only, using settings for a given pass.

Definition at line 1971 of file baseapi.cpp.

1971  {
1972  PAGE_RES *page_res = new PAGE_RES(block_list,
1974  tesseract_->recog_all_words(page_res, NULL, NULL, NULL, 1);
1975  return page_res;
1976 }
bool recog_all_words(PAGE_RES *page_res, ETEXT_DESC *monitor, const TBOX *target_word_box, const char *word_config, int dopasses)
Definition: control.cpp:178
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:757
#define NULL
Definition: host.h:144
WERD_CHOICE * prev_word_best_choice_
Definition: wordrec.h:506
PAGE_RES * tesseract::TessBaseAPI::RecognitionPass2 ( BLOCK_LIST *  block_list,
PAGE_RES pass1_result 
)
protected

Definition at line 1978 of file baseapi.cpp.

1979  {
1980  if (!pass1_result)
1981  pass1_result = new PAGE_RES(block_list,
1983  tesseract_->recog_all_words(pass1_result, NULL, NULL, NULL, 2);
1984  return pass1_result;
1985 }
bool recog_all_words(PAGE_RES *page_res, ETEXT_DESC *monitor, const TBOX *target_word_box, const char *word_config, int dopasses)
Definition: control.cpp:178
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:757
#define NULL
Definition: host.h:144
WERD_CHOICE * prev_word_best_choice_
Definition: wordrec.h:506
int tesseract::TessBaseAPI::TesseractExtractResult ( char **  text,
int **  lengths,
float **  costs,
int **  x0,
int **  y0,
int **  x1,
int **  y1,
PAGE_RES page_res 
)
staticprotected

Extract the OCR results, costs (penalty points for uncertainty), and the bounding boxes of the characters.

Definition at line 2073 of file baseapi.cpp.

2080  {
2081  TESS_CHAR_LIST tess_chars;
2082  TESS_CHAR_IT tess_chars_it(&tess_chars);
2083  extract_result(&tess_chars_it, page_res);
2084  tess_chars_it.move_to_first();
2085  int n = tess_chars.length();
2086  int text_len = 0;
2087  *lengths = new int[n];
2088  *costs = new float[n];
2089  *x0 = new int[n];
2090  *y0 = new int[n];
2091  *x1 = new int[n];
2092  *y1 = new int[n];
2093  int i = 0;
2094  for (tess_chars_it.mark_cycle_pt();
2095  !tess_chars_it.cycled_list();
2096  tess_chars_it.forward(), i++) {
2097  TESS_CHAR *tc = tess_chars_it.data();
2098  text_len += (*lengths)[i] = tc->length;
2099  (*costs)[i] = tc->cost;
2100  (*x0)[i] = tc->box.left();
2101  (*y0)[i] = tc->box.bottom();
2102  (*x1)[i] = tc->box.right();
2103  (*y1)[i] = tc->box.top();
2104  }
2105  char *p = *text = new char[text_len];
2106 
2107  tess_chars_it.move_to_first();
2108  for (tess_chars_it.mark_cycle_pt();
2109  !tess_chars_it.cycled_list();
2110  tess_chars_it.forward()) {
2111  TESS_CHAR *tc = tess_chars_it.data();
2112  strncpy(p, tc->unicode_repr, tc->length);
2113  p += tc->length;
2114  }
2115  return n;
2116 }