Tesseract  3.02
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
reject.h File Reference
#include "params.h"
#include "pageres.h"
#include "notdll.h"

Go to the source code of this file.

Functions

void reject_blanks (WERD_RES *word)
 
void reject_poor_matches (WERD_RES *word, BLOB_CHOICE_LIST_CLIST *blob_choices)
 
float compute_reject_threshold (BLOB_CHOICE_LIST_CLIST *blob_choices)
 
BOOL8 word_contains_non_1_digit (const char *word, const char *word_lengths)
 
void dont_allow_1Il (WERD_RES *word)
 
void flip_hyphens (WERD_RES *word)
 
void flip_0O (WERD_RES *word)
 
BOOL8 non_0_digit (const char *str, int length)
 

Function Documentation

float compute_reject_threshold ( BLOB_CHOICE_LIST_CLIST *  blob_choices)

Definition at line 370 of file reject.cpp.

371  {
372  inT16 index; //to ratings
373  inT16 blob_count; //no of blobs in word
374  inT16 ok_blob_count = 0; //non TESS rej blobs in word
375  float *ratings; //array of confidences
376  float threshold; //rejection threshold
377  float bestgap; //biggest gap
378  float gapstart; //bottom of gap
379  //super iterator
380  BLOB_CHOICE_LIST_C_IT list_it = blob_choices;
381  BLOB_CHOICE_IT choice_it; //real iterator
382 
383  blob_count = blob_choices->length ();
384  ratings = (float *) alloc_mem (blob_count * sizeof (float));
385  for (list_it.mark_cycle_pt (), index = 0;
386  !list_it.cycled_list (); list_it.forward (), index++) {
387  choice_it.set_to_list (list_it.data ());
388  if (choice_it.length () > 0) {
389  ratings[ok_blob_count] = choice_it.data ()->certainty ();
390  //get in an array
391  // tprintf("Rating[%d]=%c %g %g\n",
392  // index,choice_it.data()->char_class(),
393  // choice_it.data()->rating(),choice_it.data()->certainty());
394  ok_blob_count++;
395  }
396  }
397  ASSERT_HOST (index == blob_count);
398  qsort (ratings, ok_blob_count, sizeof (float), sort_floats);
399  //sort them
400  bestgap = 0;
401  gapstart = ratings[0] - 1; //all reject if none better
402  if (ok_blob_count >= 3) {
403  for (index = 0; index < ok_blob_count - 1; index++) {
404  if (ratings[index + 1] - ratings[index] > bestgap) {
405  bestgap = ratings[index + 1] - ratings[index];
406  //find biggest
407  gapstart = ratings[index];
408  }
409  }
410  }
411  threshold = gapstart + bestgap / 2;
412  // tprintf("First=%g, last=%g, gap=%g, threshold=%g\n",
413  // ratings[0],ratings[index],bestgap,threshold);
414 
415  free_mem(ratings);
416  return threshold;
417 }
int sort_floats(const void *arg1, const void *arg2)
Definition: helpers.h:46
void free_mem(void *oldchunk)
Definition: memry.cpp:56
void * alloc_mem(inT32 count)
Definition: memry.cpp:48
short inT16
Definition: host.h:100
#define ASSERT_HOST(x)
Definition: errcode.h:84
void dont_allow_1Il ( WERD_RES word)
void flip_0O ( WERD_RES word)
void flip_hyphens ( WERD_RES word)
BOOL8 non_0_digit ( const char *  str,
int  length 
)
void reject_blanks ( WERD_RES word)

Definition at line 290 of file reject.cpp.

290  {
291  inT16 i;
292  inT16 offset;
293 
294  for (i = 0, offset = 0; word->best_choice->unichar_string()[offset] != '\0';
295  offset += word->best_choice->unichar_lengths()[i], i += 1) {
296  if (word->best_choice->unichar_string()[offset] == ' ')
297  //rej unrecognised blobs
298  word->reject_map[i].setrej_tess_failure ();
299  }
300 }
const STRING & unichar_string() const
Definition: ratngs.h:395
REJMAP reject_map
Definition: pageres.h:408
short inT16
Definition: host.h:100
const STRING & unichar_lengths() const
Definition: ratngs.h:402
WERD_CHOICE * best_choice
Definition: pageres.h:359
void reject_poor_matches ( WERD_RES word,
BLOB_CHOICE_LIST_CLIST *  blob_choices 
)

Definition at line 319 of file reject.cpp.

321  {
322  float threshold;
323  inT16 i = 0;
324  inT16 offset = 0;
325  //super iterator
326  BLOB_CHOICE_LIST_C_IT list_it = blob_choices;
327  BLOB_CHOICE_IT choice_it; //real iterator
328 
329  #ifndef SECURE_NAMES
330  if (strlen(word->best_choice->unichar_lengths().string()) !=
331  list_it.length()) {
332  tprintf
333  ("ASSERT FAIL string:\"%s\"; strlen=%d; choices len=%d; blob len=%d\n",
334  word->best_choice->unichar_string().string(),
335  strlen (word->best_choice->unichar_lengths().string()), list_it.length(),
336  word->box_word->length());
337  }
338  #endif
339  ASSERT_HOST (strlen (word->best_choice->unichar_lengths().string ()) ==
340  list_it.length ());
341  ASSERT_HOST(word->box_word->length() == list_it.length());
342  threshold = compute_reject_threshold (blob_choices);
343 
344  for (list_it.mark_cycle_pt ();
345  !list_it.cycled_list (); list_it.forward (), i++,
346  offset += word->best_choice->unichar_lengths()[i]) {
347  /* NB - only compares the threshold against the TOP choice char in the
348  choices list for a blob !! - the selected one may be below the threshold
349  */
350  choice_it.set_to_list (list_it.data ());
351  if ((word->best_choice->unichar_string()[offset] == ' ') ||
352  (choice_it.length () == 0))
353  //rej unrecognised blobs
354  word->reject_map[i].setrej_tess_failure ();
355  else if (choice_it.data ()->certainty () < threshold)
356  //rej poor score blob
357  word->reject_map[i].setrej_poor_match ();
358  }
359 }
const STRING & unichar_string() const
Definition: ratngs.h:395
const int length() const
Definition: boxword.h:99
float compute_reject_threshold(BLOB_CHOICE_LIST_CLIST *blob_choices)
Definition: reject.cpp:370
REJMAP reject_map
Definition: pageres.h:408
const char * string() const
Definition: strngs.cpp:156
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:41
short inT16
Definition: host.h:100
const STRING & unichar_lengths() const
Definition: ratngs.h:402
tesseract::BoxWord * box_word
Definition: pageres.h:387
#define ASSERT_HOST(x)
Definition: errcode.h:84
WERD_CHOICE * best_choice
Definition: pageres.h:359
BOOL8 word_contains_non_1_digit ( const char *  word,
const char *  word_lengths 
)