Tesseract  3.02
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
tesseract::Classify Class Reference

#include <classify.h>

Inheritance diagram for tesseract::Classify:
tesseract::CCStruct tesseract::CUtil tesseract::CCUtil tesseract::Wordrec tesseract::Tesseract

Public Member Functions

 Classify ()
 
virtual ~Classify ()
 
DictgetDict ()
 
const ShapeTableshape_table () const
 
ADAPT_TEMPLATES NewAdaptedTemplates (bool InitFromUnicharset)
 
int GetFontinfoId (ADAPT_CLASS Class, uinT8 ConfigId)
 
int PruneClasses (const INT_TEMPLATES_STRUCT *int_templates, int num_features, const INT_FEATURE_STRUCT *features, const uinT8 *normalization_factors, const uinT16 *expected_num_features, CP_RESULT_STRUCT *results)
 
void ReadNewCutoffs (FILE *CutoffFile, bool swap, inT64 end_offset, CLASS_CUTOFF_ARRAY Cutoffs)
 
void PrintAdaptedTemplates (FILE *File, ADAPT_TEMPLATES Templates)
 
void WriteAdaptedTemplates (FILE *File, ADAPT_TEMPLATES Templates)
 
ADAPT_TEMPLATES ReadAdaptedTemplates (FILE *File)
 
FLOAT32 ComputeNormMatch (CLASS_ID ClassId, const FEATURE_STRUCT &feature, BOOL8 DebugMatch)
 
void FreeNormProtos ()
 
NORM_PROTOSReadNormProtos (FILE *File, inT64 end_offset)
 
void ConvertProto (PROTO Proto, int ProtoId, INT_CLASS Class)
 
INT_TEMPLATES CreateIntTemplates (CLASSES FloatProtos, const UNICHARSET &target_unicharset)
 
void LearnWord (const char *filename, const char *rejmap, WERD_RES *word)
 
void LearnPieces (const char *filename, int start, int length, float threshold, CharSegmentationType segmentation, const char *correct_text, WERD_RES *word)
 
void InitAdaptiveClassifier (bool load_pre_trained_templates)
 
void InitAdaptedClass (TBLOB *Blob, const DENORM &denorm, CLASS_ID ClassId, int FontinfoId, ADAPT_CLASS Class, ADAPT_TEMPLATES Templates)
 
void AdaptToPunc (TBLOB *Blob, const DENORM &denorm, CLASS_ID ClassId, int FontinfoId, FLOAT32 Threshold)
 
void AmbigClassifier (TBLOB *Blob, const DENORM &denorm, INT_TEMPLATES Templates, ADAPT_CLASS *Classes, UNICHAR_ID *Ambiguities, ADAPT_RESULTS *Results)
 
void MasterMatcher (INT_TEMPLATES templates, inT16 num_features, const INT_FEATURE_STRUCT *features, const uinT8 *norm_factors, ADAPT_CLASS *classes, int debug, int num_classes, const TBOX &blob_box, CLASS_PRUNER_RESULTS results, ADAPT_RESULTS *final_results)
 
void ExpandShapesAndApplyCorrections (ADAPT_CLASS *classes, bool debug, int class_id, int bottom, int top, float cp_rating, int blob_length, const uinT8 *cn_factors, INT_RESULT_STRUCT &int_result, ADAPT_RESULTS *final_results)
 
double ComputeCorrectedRating (bool debug, int unichar_id, double cp_rating, double im_rating, int feature_misses, int bottom, int top, int blob_length, const uinT8 *cn_factors)
 
void ConvertMatchesToChoices (const DENORM &denorm, const TBOX &box, ADAPT_RESULTS *Results, BLOB_CHOICE_LIST *Choices)
 
void AddNewResult (ADAPT_RESULTS *results, CLASS_ID class_id, int shape_id, FLOAT32 rating, bool adapted, int config, int fontinfo_id, int fontinfo_id2)
 
int GetAdaptiveFeatures (TBLOB *Blob, INT_FEATURE_ARRAY IntFeatures, FEATURE_SET *FloatFeatures)
 
void DebugAdaptiveClassifier (TBLOB *Blob, const DENORM &denorm, ADAPT_RESULTS *Results)
 
void GetAdaptThresholds (TWERD *Word, const DENORM &denorm, const WERD_CHOICE &BestChoice, const WERD_CHOICE &BestRawChoice, FLOAT32 Thresholds[])
 
PROTO_ID MakeNewTempProtos (FEATURE_SET Features, int NumBadFeat, FEATURE_ID BadFeat[], INT_CLASS IClass, ADAPT_CLASS Class, BIT_VECTOR TempProtoMask)
 
int MakeNewTemporaryConfig (ADAPT_TEMPLATES Templates, CLASS_ID ClassId, int FontinfoId, int NumFeatures, INT_FEATURE_ARRAY Features, FEATURE_SET FloatFeatures)
 
void MakePermanent (ADAPT_TEMPLATES Templates, CLASS_ID ClassId, int ConfigId, const DENORM &denorm, TBLOB *Blob)
 
void PrintAdaptiveMatchResults (FILE *File, ADAPT_RESULTS *Results)
 
void RemoveExtraPuncs (ADAPT_RESULTS *Results)
 
void RemoveBadMatches (ADAPT_RESULTS *Results)
 
void SetAdaptiveThreshold (FLOAT32 Threshold)
 
void ShowBestMatchFor (TBLOB *Blob, const DENORM &denorm, CLASS_ID ClassId, int shape_id, BOOL8 AdaptiveOn, BOOL8 PreTrainedOn, ADAPT_RESULTS *Results)
 
STRING ClassIDToDebugStr (const INT_TEMPLATES_STRUCT *templates, int class_id, int config_id) const
 
int ClassAndConfigIDToFontOrShapeID (int class_id, int int_result_config) const
 
int ShapeIDToClassID (int shape_id) const
 
UNICHAR_IDBaselineClassifier (TBLOB *Blob, const DENORM &denorm, ADAPT_TEMPLATES Templates, ADAPT_RESULTS *Results)
 
int CharNormClassifier (TBLOB *Blob, const DENORM &denorm, INT_TEMPLATES Templates, ADAPT_RESULTS *Results)
 
int CharNormTrainingSample (bool pruner_only, const TrainingSample &sample, GenericVector< ShapeRating > *results)
 
UNICHAR_IDGetAmbiguities (TBLOB *Blob, const DENORM &denorm, CLASS_ID CorrectClass)
 
void DoAdaptiveMatch (TBLOB *Blob, const DENORM &denorm, ADAPT_RESULTS *Results)
 
void AdaptToChar (TBLOB *Blob, const DENORM &denorm, CLASS_ID ClassId, int FontinfoId, FLOAT32 Threshold)
 
void DisplayAdaptedChar (TBLOB *blob, const DENORM &denorm, INT_CLASS_STRUCT *int_class)
 
int AdaptableWord (TWERD *Word, const WERD_CHOICE &BestChoiceWord, const WERD_CHOICE &RawChoiceWord)
 
void EndAdaptiveClassifier ()
 
void PrintAdaptiveStatistics (FILE *File)
 
void SettupPass1 ()
 
void SettupPass2 ()
 
void AdaptiveClassifier (TBLOB *Blob, const DENORM &denorm, BLOB_CHOICE_LIST *Choices, CLASS_PRUNER_RESULTS cp_results)
 
void ClassifyAsNoise (ADAPT_RESULTS *Results)
 
void ResetAdaptiveClassifierInternal ()
 
int GetBaselineFeatures (TBLOB *Blob, const DENORM &denorm, INT_TEMPLATES Templates, INT_FEATURE_ARRAY IntFeatures, uinT8 *CharNormArray, inT32 *BlobLength)
 
int GetCharNormFeatures (TBLOB *Blob, const DENORM &denorm, INT_TEMPLATES Templates, INT_FEATURE_ARRAY IntFeatures, uinT8 *PrunerNormArray, uinT8 *CharNormArray, inT32 *BlobLength, inT32 *FeatureOutlineIndex)
 
void ComputeCharNormArrays (FEATURE_STRUCT *norm_feature, INT_TEMPLATES_STRUCT *templates, uinT8 *char_norm_array, uinT8 *pruner_array)
 
bool TempConfigReliable (CLASS_ID class_id, const TEMP_CONFIG &config)
 
void UpdateAmbigsGroup (CLASS_ID class_id, const DENORM &denorm, TBLOB *Blob)
 
void ResetFeaturesHaveBeenExtracted ()
 
bool AdaptiveClassifierIsFull ()
 
bool LooksLikeGarbage (const DENORM &denorm, TBLOB *blob)
 
void RefreshDebugWindow (ScrollView **win, const char *msg, int y_offset, const TBOX &wbox)
 
void ClearCharNormArray (uinT8 *char_norm_array)
 
void ComputeIntCharNormArray (const FEATURE_STRUCT &norm_feature, uinT8 *char_norm_array)
 
void ComputeIntFeatures (FEATURE_SET Features, INT_FEATURE_ARRAY IntFeatures)
 
INT_TEMPLATES ReadIntTemplates (FILE *File)
 
void WriteIntTemplates (FILE *File, INT_TEMPLATES Templates, const UNICHARSET &target_unicharset)
 
CLASS_ID GetClassToDebug (const char *Prompt, bool *adaptive_on, bool *pretrained_on, int *shape_id)
 
void ShowMatchDisplay ()
 
UnicityTable< FontInfo > & get_fontinfo_table ()
 
UnicityTable< FontSet > & get_fontset_table ()
 
void NormalizeOutlines (LIST Outlines, FLOAT32 *XScale, FLOAT32 *YScale)
 
FEATURE_SET ExtractOutlineFeatures (TBLOB *Blob)
 
FEATURE_SET ExtractPicoFeatures (TBLOB *Blob)
 
ReadClassFile

Read in the training data from a file. All of the classes are read in. The results are stored in the global variable, 'TrainingData'.

void ReadClassFile ()
 
- Public Member Functions inherited from tesseract::CCStruct
 CCStruct ()
 
 ~CCStruct ()
 
- Public Member Functions inherited from tesseract::CUtil
 CUtil ()
 
 ~CUtil ()
 
void read_variables (const char *filename, bool global_only)
 
- Public Member Functions inherited from tesseract::CCUtil
 CCUtil ()
 
virtual ~CCUtil ()
 
void main_setup (const char *argv0, const char *basename)
 
ParamsVectorsparams ()
 

Public Attributes

bool prioritize_division = FALSE
 
int tessedit_single_match = FALSE
 
bool classify_enable_learning = true
 
int classify_debug_level = 0
 
int classify_norm_method = character
 
double classify_char_norm_range = 0.2
 
double classify_min_norm_scale_x = 0.0
 
double classify_max_norm_scale_x = 0.325
 
double classify_min_norm_scale_y = 0.0
 
double classify_max_norm_scale_y = 0.325
 
bool tess_cn_matching = 0
 
bool tess_bn_matching = 0
 
bool classify_enable_adaptive_matcher = 1
 
bool classify_use_pre_adapted_templates = 0
 
bool classify_save_adapted_templates = 0
 
bool classify_enable_adaptive_debugger = 0
 
int matcher_debug_level = 0
 
int matcher_debug_flags = 0
 
int classify_learning_debug_level = 0
 
double matcher_good_threshold = 0.125
 
double matcher_great_threshold = 0.0
 
double matcher_perfect_threshold = 0.02
 
double matcher_bad_match_pad = 0.15
 
double matcher_rating_margin = 0.1
 
double matcher_avg_noise_size = 12.0
 
int matcher_permanent_classes_min = 1
 
int matcher_min_examples_for_prototyping = 3
 
int matcher_sufficient_examples_for_prototyping = 5
 
double matcher_clustering_max_angle_delta = 0.015
 
double classify_misfit_junk_penalty = 0.0
 
double rating_scale = 1.5
 
double certainty_scale = 20.0
 
double tessedit_class_miss_scale = 0.00390625
 
int classify_adapt_proto_threshold = 230
 
int classify_adapt_feature_threshold = 230
 
bool disable_character_fragments = TRUE
 
double classify_character_fragments_garbage_certainty_threshold = -3.0
 
bool classify_debug_character_fragments = FALSE
 
bool matcher_debug_separate_windows = FALSE
 
char * classify_learn_debug_str = ""
 
int classify_class_pruner_threshold = 229
 
int classify_class_pruner_multiplier = 30
 
int classify_cp_cutoff_strength = 7
 
int classify_integer_matcher_multiplier = 14
 
INT_TEMPLATES PreTrainedTemplates
 
ADAPT_TEMPLATES AdaptedTemplates
 
BIT_VECTOR AllProtosOn
 
BIT_VECTOR PrunedProtos
 
BIT_VECTOR AllConfigsOn
 
BIT_VECTOR AllProtosOff
 
BIT_VECTOR AllConfigsOff
 
BIT_VECTOR TempProtoMask
 
bool EnableLearning
 
NORM_PROTOSNormProtos
 
UnicityTable< FontInfofontinfo_table_
 
UnicityTable< FontSetfontset_table_
 
int il1_adaption_test = 0
 
bool classify_bln_numeric_mode = 0
 
- Public Attributes inherited from tesseract::CCUtil
STRING datadir
 
STRING imagebasename
 
STRING lang
 
STRING language_data_path_prefix
 
TessdataManager tessdata_manager
 
UNICHARSET unicharset
 
UnicharAmbigs unichar_ambigs
 
STRING imagefile
 
STRING directory
 
char * m_data_sub_dir = "tessdata/"
 
int ambigs_debug_level = 0
 
bool use_definite_ambigs_for_classifier = 0
 
bool use_ambigs_for_adaption = 0
 

Protected Attributes

IntegerMatcher im_
 
FEATURE_DEFS_STRUCT feature_defs_
 
ShapeTableshape_table_
 
- Protected Attributes inherited from tesseract::CCStruct
Image image_
 

Additional Inherited Members

- Static Public Attributes inherited from tesseract::CCStruct
static const double kDescenderFraction = 0.25
 
static const double kXHeightFraction = 0.5
 
static const double kAscenderFraction = 0.25
 
static const double kXHeightCapRatio
 

Detailed Description

Definition at line 58 of file classify.h.

Constructor & Destructor Documentation

tesseract::Classify::Classify ( )

Definition at line 34 of file classify.cpp.

36  "Prioritize blob division over chopping", this->params()),
38  "Top choice only from CP", this->params()),
40  "Enable adaptive classifier", this->params()),
41  INT_MEMBER(classify_debug_level, 0, "Classify debug level",
42  this->params()),
43  INT_MEMBER(classify_norm_method, character, "Normalization Method ...",
44  this->params()),
46  "Character Normalization Range ...", this->params()),
47  double_MEMBER(classify_min_norm_scale_x, 0.0, "Min char x-norm scale ...",
48  this->params()), /* PREV DEFAULT 0.1 */
49  double_MEMBER(classify_max_norm_scale_x, 0.325, "Max char x-norm scale ...",
50  this->params()), /* PREV DEFAULT 0.3 */
51  double_MEMBER(classify_min_norm_scale_y, 0.0, "Min char y-norm scale ...",
52  this->params()), /* PREV DEFAULT 0.1 */
53  double_MEMBER(classify_max_norm_scale_y, 0.325, "Max char y-norm scale ...",
54  this->params()), /* PREV DEFAULT 0.3 */
55  BOOL_MEMBER(tess_cn_matching, 0, "Character Normalized Matching",
56  this->params()),
57  BOOL_MEMBER(tess_bn_matching, 0, "Baseline Normalized Matching",
58  this->params()),
60  "Enable adaptive classifier",
61  this->params()),
63  "Use pre-adapted classifier templates", this->params()),
65  "Save adapted templates to a file", this->params()),
66  BOOL_MEMBER(classify_enable_adaptive_debugger, 0, "Enable match debugger",
67  this->params()),
68  INT_MEMBER(matcher_debug_level, 0, "Matcher Debug Level", this->params()),
69  INT_MEMBER(matcher_debug_flags, 0, "Matcher Debug Flags", this->params()),
70  INT_MEMBER(classify_learning_debug_level, 0, "Learning Debug Level: ",
71  this->params()),
72  double_MEMBER(matcher_good_threshold, 0.125, "Good Match (0-1)",
73  this->params()),
74  double_MEMBER(matcher_great_threshold, 0.0, "Great Match (0-1)",
75  this->params()),
76  double_MEMBER(matcher_perfect_threshold, 0.02, "Perfect Match (0-1)",
77  this->params()),
78  double_MEMBER(matcher_bad_match_pad, 0.15, "Bad Match Pad (0-1)",
79  this->params()),
80  double_MEMBER(matcher_rating_margin, 0.1, "New template margin (0-1)",
81  this->params()),
82  double_MEMBER(matcher_avg_noise_size, 12.0, "Avg. noise blob length",
83  this->params()),
84  INT_MEMBER(matcher_permanent_classes_min, 1, "Min # of permanent classes",
85  this->params()),
87  "Reliable Config Threshold", this->params()),
89  "Enable adaption even if the ambiguities have not been seen",
90  this->params()),
92  "Maximum angle delta for prototype clustering",
93  this->params()),
95  "Penalty to apply when a non-alnum is vertically out of "
96  "its expected textline position",
97  this->params()),
98  double_MEMBER(rating_scale, 1.5, "Rating scaling factor", this->params()),
99  double_MEMBER(certainty_scale, 20.0, "Certainty scaling factor",
100  this->params()),
102  "Scale factor for features not used", this->params()),
104  "Threshold for good protos during adaptive 0-255",
105  this->params()),
107  "Threshold for good features during adaptive 0-255",
108  this->params()),
110  "Do not include character fragments in the"
111  " results of the classifier", this->params()),
113  -3.0, "Exclude fragments that do not look like whole"
114  " characters from training and adaption", this->params()),
116  "Bring up graphical debugging windows for fragments training",
117  this->params()),
119  "Use two different windows for debugging the matching: "
120  "One for the protos and one for the features.", this->params()),
121  STRING_MEMBER(classify_learn_debug_str, "", "Class str to debug learning",
122  this->params()),
124  "Class Pruner Threshold 0-255", this->params()),
126  "Class Pruner Multiplier 0-255: ", this->params()),
128  "Class Pruner CutoffStrength: ", this->params()),
130  "Integer Matcher Multiplier 0-255: ", this->params()),
131  EnableLearning(true),
132  INT_MEMBER(il1_adaption_test, 0, "Dont adapt to i/I at beginning of word",
133  this->params()),
135  "Assume the input is numbers [0-9].", this->params()),
137  dict_(&image_) {
138  fontinfo_table_.set_compare_callback(
140  fontinfo_table_.set_clear_callback(
142  fontset_table_.set_compare_callback(
144  fontset_table_.set_clear_callback(
148  AllProtosOn = NULL;
149  PrunedProtos = NULL;
150  AllConfigsOn = NULL;
151  AllProtosOff = NULL;
154  NormProtos = NULL;
155 
156  AdaptiveMatcherCalls = 0;
157  BaselineClassifierCalls = 0;
158  CharNormClassifierCalls = 0;
159  AmbigClassifierCalls = 0;
160  NumWordsAdaptedTo = 0;
161  NumCharsAdaptedTo = 0;
162  NumBaselineClassesTried = 0;
163  NumCharNormClassesTried = 0;
164  NumAmbigClassesTried = 0;
165  NumClassesOutput = 0;
166  NumAdaptationsFailed = 0;
167 
168  FeaturesHaveBeenExtracted = false;
169  FeaturesOK = true;
170  learn_debug_win_ = NULL;
171  learn_fragmented_word_debug_win_ = NULL;
172  learn_fragments_debug_win_ = NULL;
173 
174  CharNormCutoffs = new uinT16[MAX_NUM_CLASSES];
175  BaselineCutoffs = new uinT16[MAX_NUM_CLASSES];
176 }
int classify_class_pruner_threshold
Definition: classify.h:420
BIT_VECTOR AllProtosOn
Definition: classify.h:433
double matcher_rating_margin
Definition: classify.h:385
int classify_adapt_feature_threshold
Definition: classify.h:404
UnicityTable< FontSet > fontset_table_
Definition: classify.h:451
BIT_VECTOR AllProtosOff
Definition: classify.h:436
bool CompareFontInfo(const FontInfo &fi1, const FontInfo &fi2)
Definition: fontinfo.cpp:25
int classify_integer_matcher_multiplier
Definition: classify.h:426
double classify_max_norm_scale_y
Definition: classify.h:367
bool CompareFontSet(const FontSet &fs1, const FontSet &fs2)
Definition: fontinfo.cpp:33
char * classify_learn_debug_str
Definition: classify.h:416
bool classify_enable_adaptive_debugger
Definition: classify.h:377
int classify_adapt_proto_threshold
Definition: classify.h:402
#define NULL
Definition: host.h:144
double matcher_clustering_max_angle_delta
Definition: classify.h:393
bool classify_enable_adaptive_matcher
Definition: classify.h:372
#define FALSE
Definition: capi.h:28
#define INT_MEMBER(name, val, comment, vec)
Definition: params.h:272
ShapeTable * shape_table_
Definition: classify.h:464
ADAPT_TEMPLATES AdaptedTemplates
Definition: classify.h:430
int matcher_permanent_classes_min
Definition: classify.h:387
double classify_char_norm_range
Definition: classify.h:363
double matcher_avg_noise_size
Definition: classify.h:386
double matcher_good_threshold
Definition: classify.h:381
INT_TEMPLATES PreTrainedTemplates
Definition: classify.h:426
void FontSetDeleteCallback(FontSet fs)
Definition: fontinfo.cpp:51
double classify_character_fragments_garbage_certainty_threshold
Definition: classify.h:410
#define MAX_NUM_CLASSES
Definition: matchdefs.h:31
bool disable_character_fragments
Definition: classify.h:407
void FontInfoDeleteCallback(FontInfo f)
Definition: fontinfo.cpp:44
BIT_VECTOR AllConfigsOff
Definition: classify.h:437
bool classify_bln_numeric_mode
Definition: classify.h:455
UnicityTable< FontInfo > fontinfo_table_
Definition: classify.h:443
int matcher_min_examples_for_prototyping
Definition: classify.h:389
bool classify_enable_learning
Definition: classify.h:356
double classify_misfit_junk_penalty
Definition: classify.h:396
unsigned short uinT16
Definition: host.h:101
int matcher_sufficient_examples_for_prototyping
Definition: classify.h:391
int classify_learning_debug_level
Definition: classify.h:380
#define STRING_MEMBER(name, val, comment, vec)
Definition: params.h:278
bool matcher_debug_separate_windows
Definition: classify.h:415
double certainty_scale
Definition: classify.h:398
_ConstTessMemberResultCallback_0_0< false, R, T1 >::base * NewPermanentTessCallback(const T1 *obj, R(T2::*member)() const)
Definition: tesscallback.h:116
bool classify_use_pre_adapted_templates
Definition: classify.h:374
bool classify_save_adapted_templates
Definition: classify.h:376
int classify_cp_cutoff_strength
Definition: classify.h:424
double classify_min_norm_scale_y
Definition: classify.h:366
double matcher_perfect_threshold
Definition: classify.h:383
BIT_VECTOR TempProtoMask
Definition: classify.h:438
double classify_min_norm_scale_x
Definition: classify.h:364
double tessedit_class_miss_scale
Definition: classify.h:400
int classify_class_pruner_multiplier
Definition: classify.h:422
bool prioritize_division
Definition: classify.h:354
double matcher_bad_match_pad
Definition: classify.h:384
BIT_VECTOR PrunedProtos
Definition: classify.h:434
NORM_PROTOS * NormProtos
Definition: classify.h:441
ParamsVectors * params()
Definition: ccutil.h:65
double matcher_great_threshold
Definition: classify.h:382
BIT_VECTOR AllConfigsOn
Definition: classify.h:435
#define BOOL_MEMBER(name, val, comment, vec)
Definition: params.h:275
double classify_max_norm_scale_x
Definition: classify.h:365
#define double_MEMBER(name, val, comment, vec)
Definition: params.h:281
bool classify_debug_character_fragments
Definition: classify.h:412
#define TRUE
Definition: capi.h:27
tesseract::Classify::~Classify ( )
virtual

Definition at line 178 of file classify.cpp.

178  {
180  delete learn_debug_win_;
181  delete learn_fragmented_word_debug_win_;
182  delete learn_fragments_debug_win_;
183  delete[] CharNormCutoffs;
184  delete[] BaselineCutoffs;
185 }
void EndAdaptiveClassifier()
Definition: adaptmatch.cpp:476

Member Function Documentation

int tesseract::Classify::AdaptableWord ( TWERD Word,
const WERD_CHOICE BestChoiceWord,
const WERD_CHOICE RawChoiceWord 
)

Return TRUE if the specified word is acceptable for adaptation.

Globals: none

Parameters
Wordcurrent word
BestChoiceWordbest overall choice for word with context
RawChoiceWordbest choice for word without context
Returns
TRUE or FALSE
Note
Exceptions: none
History: Thu May 30 14:25:06 1991, DSJ, Created.

Definition at line 894 of file adaptmatch.cpp.

896  {
897  int BestChoiceLength = BestChoiceWord.length();
898  float adaptable_score =
900  return // rules that apply in general - simplest to compute first
901  BestChoiceLength > 0 &&
902  BestChoiceLength == Word->NumBlobs() &&
903  BestChoiceLength <= MAX_ADAPTABLE_WERD_SIZE &&
904  // This basically ensures that the word is at least a dictionary match
905  // (freq word, user word, system dawg word, etc).
906  // Since all the other adjustments will make adjust factor higher
907  // than higher than adaptable_score=1.1+0.05=1.15
908  // Since these are other flags that ensure that the word is dict word,
909  // this check could be at times redundant.
910  getDict().CurrentBestChoiceAdjustFactor() <= adaptable_score &&
911  // Make sure that alternative choices are not dictionary words.
912  getDict().AlternativeChoicesWorseThan(adaptable_score) &&
913  getDict().CurrentBestChoiceIs(BestChoiceWord);
914 }
int length() const
Definition: ratngs.h:214
int NumBlobs() const
Definition: blobs.h:263
#define MAX_ADAPTABLE_WERD_SIZE
Definition: adaptmatch.cpp:73
Dict & getDict()
Definition: classify.h:62
FLOAT32 CurrentBestChoiceAdjustFactor()
Returns the adjustment factor for the best choice for the current word.
Definition: stopper.cpp:316
#define ADAPTABLE_WERD_ADJUSTMENT
Definition: adaptmatch.cpp:75
bool AlternativeChoicesWorseThan(FLOAT32 Threshold)
Definition: stopper.cpp:299
bool CurrentBestChoiceIs(const WERD_CHOICE &WordChoice)
Returns true if WordChoice is the same as the current best choice.
Definition: stopper.cpp:311
double segment_penalty_dict_case_ok
Definition: dict.h:818
void tesseract::Classify::AdaptiveClassifier ( TBLOB Blob,
const DENORM denorm,
BLOB_CHOICE_LIST *  Choices,
CLASS_PRUNER_RESULTS  CPResults 
)

This routine calls the adaptive matcher which returns (in an array) the class id of each class matched.

It also returns the number of classes matched. For each class matched it places the best rating found for that class into the Ratings array.

Bad matches are then removed so that they don't need to be sorted. The remaining good matches are then sorted and converted to choices.

This routine also performs some simple speckle filtering.

Note
Exceptions: none
History: Mon Mar 11 10:00:58 1991, DSJ, Created.
Parameters
Blobblob to be classified
denormnormalization/denormalization parameters
[out]ChoicesList of choices found by adaptive matcher.
[out]CPResultsArray of CPResultStruct of size MAX_NUM_CLASSES is filled on return with the choices found by the class pruner and the ratings therefrom. Also contains the detailed results of the integer matcher.

Definition at line 178 of file adaptmatch.cpp.

181  {
182  assert(Choices != NULL);
183  ADAPT_RESULTS *Results = new ADAPT_RESULTS();
184  Results->Initialize();
185 
186  if (AdaptedTemplates == NULL)
188  DoAdaptiveMatch(Blob, denorm, Results);
189  if (CPResults != NULL)
190  memcpy(CPResults, Results->CPResults,
191  sizeof(CPResults[0]) * Results->NumMatches);
192 
193  RemoveBadMatches(Results);
194  qsort((void *)Results->match, Results->NumMatches,
195  sizeof(ScoredClass), CompareByRating);
196  RemoveExtraPuncs(Results);
197  ConvertMatchesToChoices(denorm, Blob->bounding_box(), Results, Choices);
198 
199  if (matcher_debug_level >= 1) {
200  cprintf ("AD Matches = ");
201  PrintAdaptiveMatchResults(stdout, Results);
202  }
203 
204  if (LargeSpeckle(Blob))
205  AddLargeSpeckleTo(Choices);
206 
207 #ifndef GRAPHICS_DISABLED
209  DebugAdaptiveClassifier(Blob, denorm, Results);
210 #endif
211 
212  NumClassesOutput += Choices->length();
213  if (Choices->length() == 0) {
215  tprintf ("Empty classification!\n"); // Should never normally happen.
216  Choices = new BLOB_CHOICE_LIST();
217  BLOB_CHOICE_IT temp_it;
218  temp_it.set_to_list(Choices);
219  temp_it.add_to_end(
220  new BLOB_CHOICE(0, 50.0f, -20.0f, -1, -1, NULL, 0, 0, false));
221  }
222 
223  delete Results;
224 } /* AdaptiveClassifier */
ADAPT_TEMPLATES NewAdaptedTemplates(bool InitFromUnicharset)
Definition: adaptive.cpp:167
int CompareByRating(const void *arg1, const void *arg2)
void DebugAdaptiveClassifier(TBLOB *Blob, const DENORM &denorm, ADAPT_RESULTS *Results)
bool classify_enable_adaptive_debugger
Definition: classify.h:377
#define NULL
Definition: host.h:144
#define f(xc, yc)
Definition: imgscale.cpp:39
ADAPT_TEMPLATES AdaptedTemplates
Definition: classify.h:430
void RemoveExtraPuncs(ADAPT_RESULTS *Results)
bool classify_bln_numeric_mode
Definition: classify.h:455
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:41
TBOX bounding_box() const
Definition: blobs.cpp:384
void RemoveBadMatches(ADAPT_RESULTS *Results)
void PrintAdaptiveMatchResults(FILE *File, ADAPT_RESULTS *Results)
void AddLargeSpeckleTo(BLOB_CHOICE_LIST *Choices)
Definition: speckle.cpp:62
ScoredClass match[MAX_NUM_CLASSES]
Definition: adaptmatch.cpp:95
CLASS_PRUNER_RESULTS CPResults
Definition: adaptmatch.cpp:97
void cprintf(const char *format,...)
Definition: callcpp.cpp:41
void ConvertMatchesToChoices(const DENORM &denorm, const TBOX &box, ADAPT_RESULTS *Results, BLOB_CHOICE_LIST *Choices)
void Initialize()
Definition: adaptmatch.cpp:101
void DoAdaptiveMatch(TBLOB *Blob, const DENORM &denorm, ADAPT_RESULTS *Results)
BOOL8 LargeSpeckle(TBLOB *blob)
Definition: speckle.cpp:103
bool tesseract::Classify::AdaptiveClassifierIsFull ( )
inline

Definition at line 319 of file classify.h.

319 { return NumAdaptationsFailed > 0; }
void tesseract::Classify::AdaptToChar ( TBLOB Blob,
const DENORM denorm,
CLASS_ID  ClassId,
int  FontinfoId,
FLOAT32  Threshold 
)
Parameters
Blobblob to add to templates for ClassId
denormnormalization/denormalization parameters
ClassIdclass to add blob to
FontinfoIdfont information from pre-trained templates
Thresholdminimum match rating to existing template

Globals:

  • AdaptedTemplates current set of adapted templates
  • AllProtosOn dummy mask to match against all protos
  • AllConfigsOn dummy mask to match against all configs
Returns
none
Note
Exceptions: none
History: Thu Mar 14 09:36:03 1991, DSJ, Created.

Definition at line 933 of file adaptmatch.cpp.

937  {
938  int NumFeatures;
939  INT_FEATURE_ARRAY IntFeatures;
940  INT_RESULT_STRUCT IntResult;
941  INT_CLASS IClass;
942  ADAPT_CLASS Class;
943  TEMP_CONFIG TempConfig;
944  FEATURE_SET FloatFeatures;
945  int NewTempConfigId;
946 
948  NumCharsAdaptedTo++;
949  if (!LegalClassId (ClassId))
950  return;
951 
952  Class = AdaptedTemplates->Class[ClassId];
953  assert(Class != NULL);
954  if (IsEmptyAdaptedClass(Class)) {
955  InitAdaptedClass(Blob, denorm, ClassId, FontinfoId, Class,
957  }
958  else {
959  IClass = ClassForClassId (AdaptedTemplates->Templates, ClassId);
960 
961  NumFeatures = GetAdaptiveFeatures(Blob, IntFeatures, &FloatFeatures);
962  if (NumFeatures <= 0)
963  return;
964 
966  // Only match configs with the matching font.
967  BIT_VECTOR MatchingFontConfigs = NewBitVector(MAX_NUM_PROTOS);
968  for (int cfg = 0; cfg < IClass->NumConfigs; ++cfg) {
969  if (GetFontinfoId(Class, cfg) == FontinfoId) {
970  SET_BIT(MatchingFontConfigs, cfg);
971  } else {
972  reset_bit(MatchingFontConfigs, cfg);
973  }
974  }
975  im_.Match(IClass, AllProtosOn, MatchingFontConfigs,
976  NumFeatures, IntFeatures,
979  FreeBitVector(MatchingFontConfigs);
980 
981  SetAdaptiveThreshold(Threshold);
982 
983  if (IntResult.Rating <= Threshold) {
984  if (ConfigIsPermanent (Class, IntResult.Config)) {
986  cprintf ("Found good match to perm config %d = %4.1f%%.\n",
987  IntResult.Config, (1.0 - IntResult.Rating) * 100.0);
988  FreeFeatureSet(FloatFeatures);
989  return;
990  }
991 
992  TempConfig = TempConfigFor (Class, IntResult.Config);
993  IncreaseConfidence(TempConfig);
994  if (TempConfig->NumTimesSeen > Class->MaxNumTimesSeen) {
995  Class->MaxNumTimesSeen = TempConfig->NumTimesSeen;
996  }
998  cprintf ("Increasing reliability of temp config %d to %d.\n",
999  IntResult.Config, TempConfig->NumTimesSeen);
1000 
1001  if (TempConfigReliable(ClassId, TempConfig)) {
1002  MakePermanent(AdaptedTemplates, ClassId, IntResult.Config, denorm,
1003  Blob);
1004  UpdateAmbigsGroup(ClassId, denorm, Blob);
1005  }
1006  }
1007  else {
1008  if (classify_learning_debug_level >= 1) {
1009  cprintf ("Found poor match to temp config %d = %4.1f%%.\n",
1010  IntResult.Config, (1.0 - IntResult.Rating) * 100.0);
1012  DisplayAdaptedChar(Blob, denorm, IClass);
1013  }
1014  NewTempConfigId = MakeNewTemporaryConfig(AdaptedTemplates,
1015  ClassId,
1016  FontinfoId,
1017  NumFeatures,
1018  IntFeatures,
1019  FloatFeatures);
1020  if (NewTempConfigId >= 0 &&
1021  TempConfigReliable(ClassId, TempConfigFor(Class, NewTempConfigId))) {
1022  MakePermanent(AdaptedTemplates, ClassId, NewTempConfigId, denorm, Blob);
1023  UpdateAmbigsGroup(ClassId, denorm, Blob);
1024  }
1025 
1026 #ifndef GRAPHICS_DISABLED
1028  DisplayAdaptedChar(Blob, denorm, IClass);
1029  }
1030 #endif
1031  }
1032  FreeFeatureSet(FloatFeatures);
1033  }
1034 } /* AdaptToChar */
#define reset_bit(array, bit)
Definition: bitvec.h:59
#define IsEmptyAdaptedClass(Class)
Definition: adaptive.h:90
BIT_VECTOR AllProtosOn
Definition: classify.h:433
#define ConfigIsPermanent(Class, ConfigId)
Definition: adaptive.h:93
void SetBaseLineMatch()
Definition: intmatcher.cpp:728
IntegerMatcher im_
Definition: classify.h:455
int classify_adapt_feature_threshold
Definition: classify.h:404
uinT8 MaxNumTimesSeen
Definition: adaptive.h:66
void InitAdaptedClass(TBLOB *Blob, const DENORM &denorm, CLASS_ID ClassId, int FontinfoId, ADAPT_CLASS Class, ADAPT_TEMPLATES Templates)
Definition: adaptmatch.cpp:758
#define ClassForClassId(T, c)
Definition: intproto.h:173
void DisplayAdaptedChar(TBLOB *blob, const DENORM &denorm, INT_CLASS_STRUCT *int_class)
void SetAdaptiveThreshold(FLOAT32 Threshold)
#define MAX_NUM_PROTOS
Definition: intproto.h:45
uinT8 NumConfigs
Definition: intproto.h:108
#define NULL
Definition: host.h:144
bool TempConfigReliable(CLASS_ID class_id, const TEMP_CONFIG &config)
void FreeFeatureSet(FEATURE_SET FeatureSet)
Definition: ocrfeatures.cpp:79
INT_TEMPLATES Templates
Definition: adaptive.h:77
int GetFontinfoId(ADAPT_CLASS Class, uinT8 ConfigId)
Definition: adaptive.cpp:190
ADAPT_TEMPLATES AdaptedTemplates
Definition: classify.h:430
void FreeBitVector(BIT_VECTOR BitVector)
Definition: bitvec.cpp:55
void Match(INT_CLASS ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask, inT16 NumFeatures, const INT_FEATURE_STRUCT *Features, INT_RESULT Result, int AdaptFeatureThreshold, int Debug, bool SeparateDebugWindows)
Definition: intmatcher.cpp:460
ADAPT_CLASS Class[MAX_NUM_CLASSES]
Definition: adaptive.h:81
void UpdateAmbigsGroup(CLASS_ID class_id, const DENORM &denorm, TBLOB *Blob)
#define TempConfigFor(Class, ConfigId)
Definition: adaptive.h:102
void ResetFeaturesHaveBeenExtracted()
uinT32 * BIT_VECTOR
Definition: bitvec.h:28
int MakeNewTemporaryConfig(ADAPT_TEMPLATES Templates, CLASS_ID ClassId, int FontinfoId, int NumFeatures, INT_FEATURE_ARRAY Features, FEATURE_SET FloatFeatures)
int classify_learning_debug_level
Definition: classify.h:380
INT_FEATURE_STRUCT INT_FEATURE_ARRAY[MAX_NUM_INT_FEATURES]
Definition: baseapi.h:66
BIT_VECTOR NewBitVector(int NumBits)
Definition: bitvec.cpp:111
bool matcher_debug_separate_windows
Definition: classify.h:415
#define LegalClassId(c)
Definition: intproto.h:171
#define IncreaseConfidence(TempConfig)
Definition: adaptive.h:108
int GetAdaptiveFeatures(TBLOB *Blob, INT_FEATURE_ARRAY IntFeatures, FEATURE_SET *FloatFeatures)
Definition: adaptmatch.cpp:854
#define SET_BIT(array, bit)
Definition: bitvec.h:57
void MakePermanent(ADAPT_TEMPLATES Templates, CLASS_ID ClassId, int ConfigId, const DENORM &denorm, TBLOB *Blob)
void cprintf(const char *format,...)
Definition: callcpp.cpp:41
uinT8 NumTimesSeen
Definition: adaptive.h:41
#define NO_DEBUG
Definition: adaptmatch.cpp:72
void tesseract::Classify::AdaptToPunc ( TBLOB Blob,
const DENORM denorm,
CLASS_ID  ClassId,
int  FontinfoId,
FLOAT32  Threshold 
)
Parameters
Blobblob to add to templates for ClassId
denormnormalization/denormalization parameters
ClassIdclass to add blob to
FontinfoIdfont information from pre-trained teamples
Thresholdminimum match rating to existing template

Globals:

  • PreTrainedTemplates current set of built-in templates
Note
Exceptions: none
History: Thu Mar 14 09:36:03 1991, DSJ, Created.

Definition at line 1082 of file adaptmatch.cpp.

1086  {
1087  ADAPT_RESULTS *Results = new ADAPT_RESULTS();
1088  int i;
1089 
1090  Results->Initialize();
1091  CharNormClassifier(Blob, denorm, PreTrainedTemplates, Results);
1092  RemoveBadMatches(Results);
1093 
1094  if (Results->NumMatches != 1) {
1095  if (classify_learning_debug_level >= 1) {
1096  cprintf ("Rejecting punc = %s (Alternatives = ",
1097  unicharset.id_to_unichar(ClassId));
1098 
1099  for (i = 0; i < Results->NumMatches; i++)
1100  tprintf("%s", unicharset.id_to_unichar(Results->match[i].unichar_id));
1101  tprintf(")\n");
1102  }
1103  } else {
1104  #ifndef SECURE_NAMES
1106  cprintf ("Adapting to punc = %s, thr= %g\n",
1107  unicharset.id_to_unichar(ClassId), Threshold);
1108  #endif
1109  AdaptToChar(Blob, denorm, ClassId, FontinfoId, Threshold);
1110  }
1111  delete Results;
1112 } /* AdaptToPunc */
int CharNormClassifier(TBLOB *Blob, const DENORM &denorm, INT_TEMPLATES Templates, ADAPT_RESULTS *Results)
const char *const id_to_unichar(UNICHAR_ID id) const
Definition: unicharset.cpp:233
CLASS_ID unichar_id
Definition: adaptmatch.cpp:82
INT_TEMPLATES PreTrainedTemplates
Definition: classify.h:426
void AdaptToChar(TBLOB *Blob, const DENORM &denorm, CLASS_ID ClassId, int FontinfoId, FLOAT32 Threshold)
Definition: adaptmatch.cpp:933
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:41
void RemoveBadMatches(ADAPT_RESULTS *Results)
UNICHARSET unicharset
Definition: ccutil.h:72
int classify_learning_debug_level
Definition: classify.h:380
ScoredClass match[MAX_NUM_CLASSES]
Definition: adaptmatch.cpp:95
void cprintf(const char *format,...)
Definition: callcpp.cpp:41
void Initialize()
Definition: adaptmatch.cpp:101
void tesseract::Classify::AddNewResult ( ADAPT_RESULTS results,
CLASS_ID  class_id,
int  shape_id,
FLOAT32  rating,
bool  adapted,
int  config,
int  fontinfo_id,
int  fontinfo_id2 
)

This routine adds the result of a classification into Results. If the new rating is much worse than the current best rating, it is not entered into results because it would end up being stripped later anyway. If the new rating is better than the old rating for the class, it replaces the old rating. If this is the first rating for the class, the class is added to the list of matched classes in Results. If the new rating is better than the best so far, it becomes the best so far.

Globals:

Parameters
[out]resultsresults to add new result to
class_idclass of new result
shape_idshape index
ratingrating of new result
adaptedadapted match or not
configconfig id of new result
fontinfo_idfont information of the new result
fontinfo_id2font information of the 2nd choice result
Note
Exceptions: none
History: Tue Mar 12 18:19:29 1991, DSJ, Created.

Definition at line 1142 of file adaptmatch.cpp.

1149  {
1150  ScoredClass *old_match = FindScoredUnichar(results, class_id);
1151  ScoredClass match =
1152  { class_id,
1153  shape_id,
1154  rating,
1155  adapted,
1156  static_cast<inT16>(config),
1157  static_cast<inT16>(fontinfo_id),
1158  static_cast<inT16>(fontinfo_id2) };
1159 
1160  if (rating > results->best_match.rating + matcher_bad_match_pad ||
1161  (old_match && rating >= old_match->rating))
1162  return;
1163 
1164  if (!unicharset.get_fragment(class_id))
1165  results->HasNonfragment = true;
1166 
1167  if (old_match)
1168  old_match->rating = rating;
1169  else
1170  results->match[results->NumMatches++] = match;
1171 
1172  if (rating < results->best_match.rating &&
1173  // Ensure that fragments do not affect best rating, class and config.
1174  // This is needed so that at least one non-fragmented character is
1175  // always present in the results.
1176  // TODO(daria): verify that this helps accuracy and does not
1177  // hurt performance.
1178  !unicharset.get_fragment(class_id)) {
1179  results->best_match = match;
1180  }
1181 } /* AddNewResult */
ScoredClass best_match
Definition: adaptmatch.cpp:96
bool HasNonfragment
Definition: adaptmatch.cpp:94
const CHAR_FRAGMENT * get_fragment(UNICHAR_ID unichar_id) const
Definition: unicharset.h:610
FLOAT32 rating
Definition: adaptmatch.cpp:84
ScoredClass * FindScoredUnichar(ADAPT_RESULTS *results, UNICHAR_ID id)
UNICHARSET unicharset
Definition: ccutil.h:72
short inT16
Definition: host.h:100
ScoredClass match[MAX_NUM_CLASSES]
Definition: adaptmatch.cpp:95
double matcher_bad_match_pad
Definition: classify.h:384
void tesseract::Classify::AmbigClassifier ( TBLOB Blob,
const DENORM denorm,
INT_TEMPLATES  Templates,
ADAPT_CLASS Classes,
UNICHAR_ID Ambiguities,
ADAPT_RESULTS Results 
)

This routine is identical to CharNormClassifier() except that it does no class pruning. It simply matches the unknown blob against the classes listed in Ambiguities.

Globals:

Parameters
Blobblob to be classified
denormnormalization/denormalization parameters
Templatesbuilt-in templates to classify against
Classesadapted class templates
Ambiguitiesarray of class id's to match against
[out]Resultsplace to put match results
Note
Exceptions: none
History: Tue Mar 12 19:40:36 1991, DSJ, Created.

Definition at line 1205 of file adaptmatch.cpp.

1210  {
1211  int NumFeatures;
1212  INT_FEATURE_ARRAY IntFeatures;
1213  uinT8* CharNormArray = new uinT8[unicharset.size()];
1214  INT_RESULT_STRUCT IntResult;
1215  CLASS_ID ClassId;
1216 
1217  AmbigClassifierCalls++;
1218 
1219  NumFeatures = GetCharNormFeatures(Blob, denorm, Templates, IntFeatures,
1220  NULL, CharNormArray,
1221  &(Results->BlobLength), NULL);
1222  if (NumFeatures <= 0) {
1223  delete [] CharNormArray;
1224  return;
1225  }
1226 
1227  bool debug = matcher_debug_level >= 2 || classify_debug_level > 1;
1228  if (debug)
1229  tprintf("AM Matches = ");
1230 
1231  int top = Blob->bounding_box().top();
1232  int bottom = Blob->bounding_box().bottom();
1233  while (*Ambiguities >= 0) {
1234  ClassId = *Ambiguities;
1235 
1237  im_.Match(ClassForClassId(Templates, ClassId),
1239  NumFeatures, IntFeatures,
1240  &IntResult,
1243 
1244  ExpandShapesAndApplyCorrections(NULL, debug, ClassId, bottom, top, 0,
1245  Results->BlobLength, CharNormArray,
1246  IntResult, Results);
1247  Ambiguities++;
1248 
1249  NumAmbigClassesTried++;
1250  }
1251  delete [] CharNormArray;
1252 } /* AmbigClassifier */
BIT_VECTOR AllProtosOn
Definition: classify.h:433
IntegerMatcher im_
Definition: classify.h:455
int classify_adapt_feature_threshold
Definition: classify.h:404
int size() const
Definition: unicharset.h:264
#define ClassForClassId(T, c)
Definition: intproto.h:173
int classify_integer_matcher_multiplier
Definition: classify.h:426
#define NULL
Definition: host.h:144
void SetCharNormMatch(int integer_matcher_multiplier)
Definition: intmatcher.cpp:734
void Match(INT_CLASS ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask, inT16 NumFeatures, const INT_FEATURE_STRUCT *Features, INT_RESULT Result, int AdaptFeatureThreshold, int Debug, bool SeparateDebugWindows)
Definition: intmatcher.cpp:460
inT16 top() const
Definition: rect.h:53
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:41
TBOX bounding_box() const
Definition: blobs.cpp:384
UNICHARSET unicharset
Definition: ccutil.h:72
INT_FEATURE_STRUCT INT_FEATURE_ARRAY[MAX_NUM_INT_FEATURES]
Definition: baseapi.h:66
bool matcher_debug_separate_windows
Definition: classify.h:415
void ExpandShapesAndApplyCorrections(ADAPT_CLASS *classes, bool debug, int class_id, int bottom, int top, float cp_rating, int blob_length, const uinT8 *cn_factors, INT_RESULT_STRUCT &int_result, ADAPT_RESULTS *final_results)
inT32 BlobLength
Definition: adaptmatch.cpp:92
unsigned char uinT8
Definition: host.h:99
int GetCharNormFeatures(TBLOB *Blob, const DENORM &denorm, INT_TEMPLATES Templates, INT_FEATURE_ARRAY IntFeatures, uinT8 *PrunerNormArray, uinT8 *CharNormArray, inT32 *BlobLength, inT32 *FeatureOutlineIndex)
BIT_VECTOR AllConfigsOn
Definition: classify.h:435
#define NO_DEBUG
Definition: adaptmatch.cpp:72
UNICHAR_ID CLASS_ID
Definition: matchdefs.h:35
inT16 bottom() const
Definition: rect.h:60
UNICHAR_ID * tesseract::Classify::BaselineClassifier ( TBLOB Blob,
const DENORM denorm,
ADAPT_TEMPLATES  Templates,
ADAPT_RESULTS Results 
)

This routine extracts baseline normalized features from the unknown character and matches them against the specified set of templates. The classes which match are added to Results.

Globals:

  • BaselineCutoffs expected num features for each class
Parameters
Blobblob to be classified
denormnormalization/denormalization parameters
Templatescurrent set of adapted templates
Resultsplace to put match results
Returns
Array of possible ambiguous chars that should be checked.
Note
Exceptions: none
History: Tue Mar 12 19:38:03 1991, DSJ, Created.

Definition at line 1423 of file adaptmatch.cpp.

1426  {
1427  int NumFeatures;
1428  int NumClasses;
1429  INT_FEATURE_ARRAY IntFeatures;
1430  uinT8* CharNormArray = new uinT8[unicharset.size()];
1431  CLASS_ID ClassId;
1432 
1433  BaselineClassifierCalls++;
1434 
1435  NumFeatures = GetBaselineFeatures(
1436  Blob, denorm, Templates->Templates, IntFeatures, CharNormArray,
1437  &(Results->BlobLength));
1438  if (NumFeatures <= 0) {
1439  delete [] CharNormArray;
1440  return NULL;
1441  }
1442 
1443  NumClasses = PruneClasses(Templates->Templates, NumFeatures, IntFeatures,
1444  CharNormArray, BaselineCutoffs, Results->CPResults);
1445 
1446  NumBaselineClassesTried += NumClasses;
1447 
1448  if (matcher_debug_level >= 2 || classify_debug_level > 1)
1449  cprintf ("BL Matches = ");
1450 
1452  MasterMatcher(Templates->Templates, NumFeatures, IntFeatures, CharNormArray,
1453  Templates->Class, matcher_debug_flags, NumClasses,
1454  Blob->bounding_box(), Results->CPResults, Results);
1455 
1456  delete [] CharNormArray;
1457  ClassId = Results->best_match.unichar_id;
1458  if (ClassId == NO_CLASS)
1459  return (NULL);
1460  /* this is a bug - maybe should return "" */
1461 
1462  return Templates->Class[ClassId]->
1463  Config[Results->best_match.config].Perm->Ambigs;
1464 } /* BaselineClassifier */
#define NO_CLASS
Definition: matchdefs.h:36
void SetBaseLineMatch()
Definition: intmatcher.cpp:728
IntegerMatcher im_
Definition: classify.h:455
ScoredClass best_match
Definition: adaptmatch.cpp:96
int size() const
Definition: unicharset.h:264
void MasterMatcher(INT_TEMPLATES templates, inT16 num_features, const INT_FEATURE_STRUCT *features, const uinT8 *norm_factors, ADAPT_CLASS *classes, int debug, int num_classes, const TBOX &blob_box, CLASS_PRUNER_RESULTS results, ADAPT_RESULTS *final_results)
#define NULL
Definition: host.h:144
CLASS_ID unichar_id
Definition: adaptmatch.cpp:82
INT_TEMPLATES Templates
Definition: adaptive.h:77
CLUSTERCONFIG Config
int PruneClasses(const INT_TEMPLATES_STRUCT *int_templates, int num_features, const INT_FEATURE_STRUCT *features, const uinT8 *normalization_factors, const uinT16 *expected_num_features, CP_RESULT_STRUCT *results)
Definition: intmatcher.cpp:406
inT16 config
Definition: adaptmatch.cpp:86
ADAPT_CLASS Class[MAX_NUM_CLASSES]
Definition: adaptive.h:81
TBOX bounding_box() const
Definition: blobs.cpp:384
UNICHARSET unicharset
Definition: ccutil.h:72
INT_FEATURE_STRUCT INT_FEATURE_ARRAY[MAX_NUM_INT_FEATURES]
Definition: baseapi.h:66
inT32 BlobLength
Definition: adaptmatch.cpp:92
unsigned char uinT8
Definition: host.h:99
int GetBaselineFeatures(TBLOB *Blob, const DENORM &denorm, INT_TEMPLATES Templates, INT_FEATURE_ARRAY IntFeatures, uinT8 *CharNormArray, inT32 *BlobLength)
CLASS_PRUNER_RESULTS CPResults
Definition: adaptmatch.cpp:97
void cprintf(const char *format,...)
Definition: callcpp.cpp:41
UNICHAR_ID CLASS_ID
Definition: matchdefs.h:35
int tesseract::Classify::CharNormClassifier ( TBLOB Blob,
const DENORM denorm,
INT_TEMPLATES  Templates,
ADAPT_RESULTS Results 
)

This routine extracts character normalized features from the unknown character and matches them against the specified set of templates. The classes which match are added to Results.

Parameters
Blobblob to be classified
denormnormalization/denormalization parameters
Templatestemplates to classify unknown against
Resultsplace to put match results

Globals:

  • CharNormCutoffs expected num features for each class
  • AllProtosOn mask that enables all protos
  • AllConfigsOn mask that enables all configs
Note
Exceptions: none
History: Tue Mar 12 16:02:52 1991, DSJ, Created.

Definition at line 1487 of file adaptmatch.cpp.

1490  {
1491  int NumFeatures;
1492  int NumClasses;
1493  INT_FEATURE_ARRAY IntFeatures;
1494 
1495  CharNormClassifierCalls++;
1496 
1497  uinT8* CharNormArray = new uinT8[unicharset.size()];
1498  int num_pruner_classes = MAX(unicharset.size(),
1500  uinT8* PrunerNormArray = new uinT8[num_pruner_classes];
1501  NumFeatures = GetCharNormFeatures(Blob, denorm, Templates, IntFeatures,
1502  PrunerNormArray, CharNormArray,
1503  &(Results->BlobLength), NULL);
1504  if (NumFeatures <= 0) {
1505  delete [] CharNormArray;
1506  delete [] PrunerNormArray;
1507  return 0;
1508  }
1509 
1510  NumClasses = PruneClasses(Templates, NumFeatures, IntFeatures,
1511  PrunerNormArray,
1512  shape_table_ != NULL ? &shapetable_cutoffs_[0]
1513  : CharNormCutoffs,
1514  Results->CPResults);
1515 
1516  if (tessedit_single_match && NumClasses > 1)
1517  NumClasses = 1;
1518  NumCharNormClassesTried += NumClasses;
1519 
1521  MasterMatcher(Templates, NumFeatures, IntFeatures, CharNormArray,
1522  NULL, matcher_debug_flags, NumClasses,
1523  Blob->bounding_box(), Results->CPResults, Results);
1524  delete [] CharNormArray;
1525  delete [] PrunerNormArray;
1526  return NumFeatures;
1527 } /* CharNormClassifier */
IntegerMatcher im_
Definition: classify.h:455
int size() const
Definition: unicharset.h:264
void MasterMatcher(INT_TEMPLATES templates, inT16 num_features, const INT_FEATURE_STRUCT *features, const uinT8 *norm_factors, ADAPT_CLASS *classes, int debug, int num_classes, const TBOX &blob_box, CLASS_PRUNER_RESULTS results, ADAPT_RESULTS *final_results)
int classify_integer_matcher_multiplier
Definition: classify.h:426
#define NULL
Definition: host.h:144
void SetCharNormMatch(int integer_matcher_multiplier)
Definition: intmatcher.cpp:734
ShapeTable * shape_table_
Definition: classify.h:464
int PruneClasses(const INT_TEMPLATES_STRUCT *int_templates, int num_features, const INT_FEATURE_STRUCT *features, const uinT8 *normalization_factors, const uinT16 *expected_num_features, CP_RESULT_STRUCT *results)
Definition: intmatcher.cpp:406
INT_TEMPLATES PreTrainedTemplates
Definition: classify.h:426
TBOX bounding_box() const
Definition: blobs.cpp:384
UNICHARSET unicharset
Definition: ccutil.h:72
INT_FEATURE_STRUCT INT_FEATURE_ARRAY[MAX_NUM_INT_FEATURES]
Definition: baseapi.h:66
inT32 BlobLength
Definition: adaptmatch.cpp:92
unsigned char uinT8
Definition: host.h:99
#define MAX(x, y)
Definition: ndminx.h:24
CLASS_PRUNER_RESULTS CPResults
Definition: adaptmatch.cpp:97
int GetCharNormFeatures(TBLOB *Blob, const DENORM &denorm, INT_TEMPLATES Templates, INT_FEATURE_ARRAY IntFeatures, uinT8 *PrunerNormArray, uinT8 *CharNormArray, inT32 *BlobLength, inT32 *FeatureOutlineIndex)
int tesseract::Classify::CharNormTrainingSample ( bool  pruner_only,
const TrainingSample sample,
GenericVector< ShapeRating > *  results 
)

Definition at line 1531 of file adaptmatch.cpp.

1533  {
1534  results->clear();
1535  ADAPT_RESULTS* adapt_results = new ADAPT_RESULTS();
1536  adapt_results->Initialize();
1537  // Compute the bounding box of the features.
1538  int num_features = sample.num_features();
1539  TBOX blob_box;
1540  for (int f = 0; f < num_features; ++f) {
1541  const INT_FEATURE_STRUCT feature = sample.features()[f];
1542  TBOX fbox(feature.X, feature.Y, feature.X, feature.Y);
1543  blob_box += fbox;
1544  }
1545  // Compute the char_norm_array from the saved cn_feature.
1546  FEATURE norm_feature = NewFeature(&CharNormDesc);
1547  norm_feature->Params[CharNormY] = sample.cn_feature(CharNormY);
1548  norm_feature->Params[CharNormLength] = sample.cn_feature(CharNormLength);
1549  norm_feature->Params[CharNormRx] = sample.cn_feature(CharNormRx);
1550  norm_feature->Params[CharNormRy] = sample.cn_feature(CharNormRy);
1551  uinT8* char_norm_array = new uinT8[unicharset.size()];
1552  int num_pruner_classes = MAX(unicharset.size(),
1554  uinT8* pruner_norm_array = new uinT8[num_pruner_classes];
1555  adapt_results->BlobLength =
1556  static_cast<int>(ActualOutlineLength(norm_feature) * 20 + 0.5);
1557  ComputeCharNormArrays(norm_feature, PreTrainedTemplates, char_norm_array,
1558  pruner_norm_array);
1559 
1560  int num_classes = PruneClasses(PreTrainedTemplates, num_features,
1561  sample.features(),
1562  pruner_norm_array,
1563  shape_table_ != NULL ? &shapetable_cutoffs_[0]
1564  : CharNormCutoffs,
1565  adapt_results->CPResults);
1566  delete [] pruner_norm_array;
1567  if (pruner_only) {
1568  // Convert pruner results to output format.
1569  for (int i = 0; i < num_classes; ++i) {
1570  int class_id = adapt_results->CPResults[i].Class;
1571  int shape_id = class_id;
1572  if (shape_table_ != NULL) {
1573  // All shapes in a class have the same combination of unichars, so
1574  // it doesn't really matter which config we give it, as we aren't
1575  // trying to get the font here.
1576  shape_id = ClassAndConfigIDToFontOrShapeID(class_id, 0);
1577  }
1578  results->push_back(
1579  ShapeRating(shape_id, 1.0f - adapt_results->CPResults[i].Rating));
1580  }
1581  } else {
1583  MasterMatcher(PreTrainedTemplates, num_features, sample.features(),
1584  char_norm_array,
1585  NULL, matcher_debug_flags, num_classes,
1586  blob_box, adapt_results->CPResults, adapt_results);
1587  // Convert master matcher results to output format.
1588  for (int i = 0; i < adapt_results->NumMatches; i++) {
1589  ScoredClass next = adapt_results->match[i];
1590  results->push_back(ShapeRating(next.shape_id, 1.0f - next.rating));
1591  }
1593  }
1594  delete [] char_norm_array;
1595  delete adapt_results;
1596  return num_features;
1597 } /* CharNormTrainingSample */
FLOAT32 ActualOutlineLength(FEATURE Feature)
Definition: normfeat.cpp:32
IntegerMatcher im_
Definition: classify.h:455
int size() const
Definition: unicharset.h:264
void MasterMatcher(INT_TEMPLATES templates, inT16 num_features, const INT_FEATURE_STRUCT *features, const uinT8 *norm_factors, ADAPT_CLASS *classes, int debug, int num_classes, const TBOX &blob_box, CLASS_PRUNER_RESULTS results, ADAPT_RESULTS *final_results)
virtual void clear()
int classify_integer_matcher_multiplier
Definition: classify.h:426
Definition: cluster.h:32
#define NULL
Definition: host.h:144
Definition: rect.h:29
void SetCharNormMatch(int integer_matcher_multiplier)
Definition: intmatcher.cpp:734
#define f(xc, yc)
Definition: imgscale.cpp:39
int push_back(T object)
ShapeTable * shape_table_
Definition: classify.h:464
int PruneClasses(const INT_TEMPLATES_STRUCT *int_templates, int num_features, const INT_FEATURE_STRUCT *features, const uinT8 *normalization_factors, const uinT16 *expected_num_features, CP_RESULT_STRUCT *results)
Definition: intmatcher.cpp:406
FEATURE NewFeature(const FEATURE_DESC_STRUCT *FeatureDesc)
INT_TEMPLATES PreTrainedTemplates
Definition: classify.h:426
static int SortDescendingRating(const void *t1, const void *t2)
FLOAT32 rating
Definition: adaptmatch.cpp:84
FLOAT32 Params[1]
Definition: ocrfeatures.h:64
int ClassAndConfigIDToFontOrShapeID(int class_id, int int_result_config) const
UNICHARSET unicharset
Definition: ccutil.h:72
inT32 BlobLength
Definition: adaptmatch.cpp:92
unsigned char uinT8
Definition: host.h:99
ScoredClass match[MAX_NUM_CLASSES]
Definition: adaptmatch.cpp:95
#define MAX(x, y)
Definition: ndminx.h:24
CLASS_PRUNER_RESULTS CPResults
Definition: adaptmatch.cpp:97
void ComputeCharNormArrays(FEATURE_STRUCT *norm_feature, INT_TEMPLATES_STRUCT *templates, uinT8 *char_norm_array, uinT8 *pruner_array)
void Initialize()
Definition: adaptmatch.cpp:101
const FEATURE_DESC_STRUCT CharNormDesc
int tesseract::Classify::ClassAndConfigIDToFontOrShapeID ( int  class_id,
int  int_result_config 
) const

Definition at line 2733 of file adaptmatch.cpp.

2734  {
2735  int font_set_id = PreTrainedTemplates->Class[class_id]->font_set_id;
2736  // Older inttemps have no font_ids.
2737  if (font_set_id < 0)
2738  return kBlankFontinfoId;
2739  const FontSet &fs = fontset_table_.get(font_set_id);
2740  ASSERT_HOST(int_result_config >= 0 && int_result_config < fs.size);
2741  return fs.configs[int_result_config];
2742 }
UnicityTable< FontSet > fontset_table_
Definition: classify.h:451
INT_TEMPLATES PreTrainedTemplates
Definition: classify.h:426
INT_CLASS Class[MAX_NUM_CLASSES]
Definition: intproto.h:122
#define ASSERT_HOST(x)
Definition: errcode.h:84
STRING tesseract::Classify::ClassIDToDebugStr ( const INT_TEMPLATES_STRUCT templates,
int  class_id,
int  config_id 
) const

Definition at line 2720 of file adaptmatch.cpp.

2721  {
2722  STRING class_string;
2723  if (templates == PreTrainedTemplates && shape_table_ != NULL) {
2724  int shape_id = ClassAndConfigIDToFontOrShapeID(class_id, config_id);
2725  class_string = shape_table_->DebugStr(shape_id);
2726  } else {
2727  class_string = unicharset.debug_str(class_id);
2728  }
2729  return class_string;
2730 }
STRING DebugStr(int shape_id) const
Definition: shapetable.cpp:194
#define NULL
Definition: host.h:144
STRING debug_str(UNICHAR_ID id) const
Definition: unicharset.cpp:285
ShapeTable * shape_table_
Definition: classify.h:464
INT_TEMPLATES PreTrainedTemplates
Definition: classify.h:426
int ClassAndConfigIDToFontOrShapeID(int class_id, int int_result_config) const
UNICHARSET unicharset
Definition: ccutil.h:72
Definition: strngs.h:40
void tesseract::Classify::ClassifyAsNoise ( ADAPT_RESULTS Results)

This routine computes a rating which reflects the likelihood that the blob being classified is a noise blob. NOTE: assumes that the blob length has already been computed and placed into Results.

Parameters
Resultsresults to add noise classification to

Globals:

  • matcher_avg_noise_size avg. length of a noise blob
Note
Exceptions: none
History: Tue Mar 12 18:36:52 1991, DSJ, Created.

Definition at line 1615 of file adaptmatch.cpp.

1615  {
1616  register FLOAT32 Rating;
1617 
1618  Rating = Results->BlobLength / matcher_avg_noise_size;
1619  Rating *= Rating;
1620  Rating /= 1.0 + Rating;
1621 
1622  AddNewResult(Results, NO_CLASS, -1, Rating, false, -1,
1623  kBlankFontinfoId, kBlankFontinfoId);
1624 } /* ClassifyAsNoise */
#define NO_CLASS
Definition: matchdefs.h:36
float FLOAT32
Definition: host.h:111
void AddNewResult(ADAPT_RESULTS *results, CLASS_ID class_id, int shape_id, FLOAT32 rating, bool adapted, int config, int fontinfo_id, int fontinfo_id2)
double matcher_avg_noise_size
Definition: classify.h:386
inT32 BlobLength
Definition: adaptmatch.cpp:92
void tesseract::Classify::ClearCharNormArray ( uinT8 char_norm_array)

For each class in the unicharset, clears the corresponding entry in char_norm_array. char_norm_array is indexed by unichar_id.

Globals:

  • none
Parameters
char_norm_arrayarray to be cleared
Note
Exceptions: none
History: Wed Feb 20 11:20:54 1991, DSJ, Created.

Definition at line 48 of file float2int.cpp.

48  {
49  memset(char_norm_array, 0, sizeof(*char_norm_array) * unicharset.size());
50 } /* ClearCharNormArray */
int size() const
Definition: unicharset.h:264
UNICHARSET unicharset
Definition: ccutil.h:72
void tesseract::Classify::ComputeCharNormArrays ( FEATURE_STRUCT norm_feature,
INT_TEMPLATES_STRUCT templates,
uinT8 char_norm_array,
uinT8 pruner_array 
)

Definition at line 2092 of file adaptmatch.cpp.

2095  {
2096  ComputeIntCharNormArray(*norm_feature, char_norm_array);
2097  if (pruner_array != NULL) {
2098  if (shape_table_ == NULL) {
2099  ComputeIntCharNormArray(*norm_feature, pruner_array);
2100  } else {
2101  memset(pruner_array, MAX_UINT8,
2102  templates->NumClasses * sizeof(pruner_array[0]));
2103  // Each entry in the pruner norm array is the MIN of all the entries of
2104  // the corresponding unichars in the CharNormArray.
2105  for (int id = 0; id < templates->NumClasses; ++id) {
2106  int font_set_id = templates->Class[id]->font_set_id;
2107  const FontSet &fs = fontset_table_.get(font_set_id);
2108  for (int config = 0; config < fs.size; ++config) {
2109  const Shape& shape = shape_table_->GetShape(fs.configs[config]);
2110  for (int c = 0; c < shape.size(); ++c) {
2111  if (char_norm_array[shape[c].unichar_id] < pruner_array[id])
2112  pruner_array[id] = char_norm_array[shape[c].unichar_id];
2113  }
2114  }
2115  }
2116  }
2117  }
2118  FreeFeature(norm_feature);
2119 }
void ComputeIntCharNormArray(const FEATURE_STRUCT &norm_feature, uinT8 *char_norm_array)
Definition: float2int.cpp:69
#define MAX_UINT8
Definition: host.h:121
UnicityTable< FontSet > fontset_table_
Definition: classify.h:451
void FreeFeature(FEATURE Feature)
Definition: ocrfeatures.cpp:59
const Shape & GetShape(int shape_id) const
Definition: shapetable.h:179
#define NULL
Definition: host.h:144
ShapeTable * shape_table_
Definition: classify.h:464
INT_CLASS Class[MAX_NUM_CLASSES]
Definition: intproto.h:122
double tesseract::Classify::ComputeCorrectedRating ( bool  debug,
int  unichar_id,
double  cp_rating,
double  im_rating,
int  feature_misses,
int  bottom,
int  top,
int  blob_length,
const uinT8 cn_factors 
)

Definition at line 1360 of file adaptmatch.cpp.

1365  {
1366  // Compute class feature corrections.
1367  double cn_corrected = im_.ApplyCNCorrection(im_rating, blob_length,
1368  cn_factors[unichar_id]);
1369  double miss_penalty = tessedit_class_miss_scale * feature_misses;
1370  double vertical_penalty = 0.0;
1371  // Penalize non-alnums for being vertical misfits.
1372  if (!unicharset.get_isalpha(unichar_id) &&
1373  !unicharset.get_isdigit(unichar_id) &&
1374  cn_factors[unichar_id] != 0 && classify_misfit_junk_penalty > 0.0) {
1375  int min_bottom, max_bottom, min_top, max_top;
1376  unicharset.get_top_bottom(unichar_id, &min_bottom, &max_bottom,
1377  &min_top, &max_top);
1378  if (debug) {
1379  tprintf("top=%d, vs [%d, %d], bottom=%d, vs [%d, %d]\n",
1380  top, min_top, max_top, bottom, min_bottom, max_bottom);
1381  }
1382  if (top < min_top || top > max_top ||
1383  bottom < min_bottom || bottom > max_bottom) {
1384  vertical_penalty = classify_misfit_junk_penalty;
1385  }
1386  }
1387  double result =cn_corrected + miss_penalty + vertical_penalty;
1388  if (result > WORST_POSSIBLE_RATING)
1389  result = WORST_POSSIBLE_RATING;
1390  if (debug) {
1391  tprintf("%s: %2.1f(CP%2.1f, IM%2.1f + CN%.2f(%d) + MP%2.1f + VP%2.1f)\n",
1392  unicharset.id_to_unichar(unichar_id),
1393  result * 100.0,
1394  cp_rating * 100.0,
1395  im_rating * 100.0,
1396  (cn_corrected - im_rating) * 100.0,
1397  cn_factors[unichar_id],
1398  miss_penalty * 100.0,
1399  vertical_penalty * 100.0);
1400  }
1401  return result;
1402 }
#define WORST_POSSIBLE_RATING
Definition: adaptmatch.cpp:79
bool get_isalpha(UNICHAR_ID unichar_id) const
Definition: unicharset.h:392
const char *const id_to_unichar(UNICHAR_ID id) const
Definition: unicharset.cpp:233
IntegerMatcher im_
Definition: classify.h:455
bool get_isdigit(UNICHAR_ID unichar_id) const
Definition: unicharset.h:413
void get_top_bottom(UNICHAR_ID unichar_id, int *min_bottom, int *max_bottom, int *min_top, int *max_top) const
Definition: unicharset.h:459
float ApplyCNCorrection(float rating, int blob_length, int normalization_factor)
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:41
double classify_misfit_junk_penalty
Definition: classify.h:396
UNICHARSET unicharset
Definition: ccutil.h:72
double tessedit_class_miss_scale
Definition: classify.h:400
void tesseract::Classify::ComputeIntCharNormArray ( const FEATURE_STRUCT norm_feature,
uinT8 char_norm_array 
)

For each class in unicharset, computes the match between norm_feature and the normalization protos for that class. Converts this number to the range from 0 - 255 and stores it into char_norm_array. CharNormArray is indexed by unichar_id.

Globals:

  • none
Parameters
norm_featurecharacter normalization feature
[out]char_norm_arrayplace to put results of size unicharset.size()
Note
Exceptions: none
History: Wed Feb 20 11:20:54 1991, DSJ, Created.

Definition at line 69 of file float2int.cpp.

70  {
71  for (int i = 0; i < unicharset.size(); i++) {
72  int norm_adjust = static_cast<int>(INT_CHAR_NORM_RANGE *
73  ComputeNormMatch(i, norm_feature, FALSE));
74  char_norm_array[i] = ClipToRange(norm_adjust, 0, MAX_INT_CHAR_NORM);
75  }
76 } /* ComputeIntCharNormArray */
#define INT_CHAR_NORM_RANGE
Definition: intproto.h:131
int size() const
Definition: unicharset.h:264
#define MAX_INT_CHAR_NORM
Definition: float2int.cpp:28
#define FALSE
Definition: capi.h:28
UNICHARSET unicharset
Definition: ccutil.h:72
FLOAT32 ComputeNormMatch(CLASS_ID ClassId, const FEATURE_STRUCT &feature, BOOL8 DebugMatch)
Definition: normmatch.cpp:73
T ClipToRange(const T &x, const T &lower_bound, const T &upper_bound)
Definition: helpers.h:64
void tesseract::Classify::ComputeIntFeatures ( FEATURE_SET  Features,
INT_FEATURE_ARRAY  IntFeatures 
)

This routine converts each floating point pico-feature in Features into integer format and saves it into IntFeatures.

Globals:

  • none
Parameters
Featuresfloating point pico-features to be converted
[out]IntFeaturesarray to put converted features into
Note
Exceptions: none
History: Wed Feb 20 10:58:45 1991, DSJ, Created.

Definition at line 94 of file float2int.cpp.

95  {
96  int Fid;
97  FEATURE Feature;
98  FLOAT32 YShift;
99 
101  YShift = BASELINE_Y_SHIFT;
102  else
103  YShift = Y_SHIFT;
104 
105  for (Fid = 0; Fid < Features->NumFeatures; Fid++) {
106  Feature = Features->Features[Fid];
107 
108  IntFeatures[Fid].X = BucketFor (Feature->Params[PicoFeatX],
110  IntFeatures[Fid].Y = BucketFor (Feature->Params[PicoFeatY],
111  YShift, INT_FEAT_RANGE);
112  IntFeatures[Fid].Theta = CircBucketFor (Feature->Params[PicoFeatDir],
114  IntFeatures[Fid].CP_misses = 0;
115  }
116 } /* ComputeIntFeatures */
#define BASELINE_Y_SHIFT
Definition: float2int.h:28
float FLOAT32
Definition: host.h:111
#define X_SHIFT
Definition: intproto.h:38
int BucketFor(FLOAT32 Param, FLOAT32 Offset, int NumBuckets)
Definition: intproto.cpp:425
#define ANGLE_SHIFT
Definition: intproto.h:37
FEATURE Features[1]
Definition: ocrfeatures.h:71
FLOAT32 Params[1]
Definition: ocrfeatures.h:64
#define Y_SHIFT
Definition: intproto.h:39
#define INT_FEAT_RANGE
Definition: float2int.h:27
int CircBucketFor(FLOAT32 Param, FLOAT32 Offset, int NumBuckets)
Definition: intproto.cpp:447
FLOAT32 tesseract::Classify::ComputeNormMatch ( CLASS_ID  ClassId,
const FEATURE_STRUCT feature,
BOOL8  DebugMatch 
)

Definition at line 73 of file normmatch.cpp.

75  {
76 /*
77  ** Parameters:
78  ** ClassId id of class to match against
79  ** Feature character normalization feature
80  ** DebugMatch controls dump of debug info
81  ** Globals:
82  ** NormProtos character normalization prototypes
83  ** Operation: This routine compares Features against each character
84  ** normalization proto for ClassId and returns the match
85  ** rating of the best match.
86  ** Return: Best match rating for Feature against protos of ClassId.
87  ** Exceptions: none
88  ** History: Wed Dec 19 16:56:12 1990, DSJ, Created.
89  */
90  LIST Protos;
91  FLOAT32 BestMatch;
92  FLOAT32 Match;
93  FLOAT32 Delta;
94  PROTOTYPE *Proto;
95  int ProtoId;
96 
97  /* handle requests for classification as noise */
98  if (ClassId == NO_CLASS) {
99  /* kludge - clean up constants and make into control knobs later */
100  Match = (feature.Params[CharNormLength] *
101  feature.Params[CharNormLength] * 500.0 +
102  feature.Params[CharNormRx] *
103  feature.Params[CharNormRx] * 8000.0 +
104  feature.Params[CharNormRy] *
105  feature.Params[CharNormRy] * 8000.0);
106  return (1.0 - NormEvidenceOf (Match));
107  }
108 
109  BestMatch = MAX_FLOAT32;
110  Protos = NormProtos->Protos[ClassId];
111 
112  if (DebugMatch) {
113  tprintf("\nChar norm for class %s\n", unicharset.id_to_unichar(ClassId));
114  }
115 
116  ProtoId = 0;
117  iterate(Protos) {
118  Proto = (PROTOTYPE *) first_node (Protos);
119  Delta = feature.Params[CharNormY] - Proto->Mean[CharNormY];
120  Match = Delta * Delta * Proto->Weight.Elliptical[CharNormY];
121  if (DebugMatch) {
122  tprintf("YMiddle: Proto=%g, Delta=%g, Var=%g, Dist=%g\n",
123  Proto->Mean[CharNormY], Delta,
124  Proto->Weight.Elliptical[CharNormY], Match);
125  }
126  Delta = feature.Params[CharNormRx] - Proto->Mean[CharNormRx];
127  Match += Delta * Delta * Proto->Weight.Elliptical[CharNormRx];
128  if (DebugMatch) {
129  tprintf("Height: Proto=%g, Delta=%g, Var=%g, Dist=%g\n",
130  Proto->Mean[CharNormRx], Delta,
131  Proto->Weight.Elliptical[CharNormRx], Match);
132  }
133  // Ry is width! See intfx.cpp.
134  Delta = feature.Params[CharNormRy] - Proto->Mean[CharNormRy];
135  if (DebugMatch) {
136  tprintf("Width: Proto=%g, Delta=%g, Var=%g\n",
137  Proto->Mean[CharNormRy], Delta,
138  Proto->Weight.Elliptical[CharNormRy]);
139  }
140  Delta = Delta * Delta * Proto->Weight.Elliptical[CharNormRy];
141  Delta *= kWidthErrorWeighting;
142  Match += Delta;
143  if (DebugMatch) {
144  tprintf("Total Dist=%g, scaled=%g, sigmoid=%g, penalty=%g\n",
145  Match, Match / classify_norm_adj_midpoint,
146  NormEvidenceOf(Match), 256 * (1 - NormEvidenceOf(Match)));
147  }
148 
149  if (Match < BestMatch)
150  BestMatch = Match;
151 
152  ProtoId++;
153  }
154  return 1.0 - NormEvidenceOf(BestMatch);
155 } /* ComputeNormMatch */
#define NO_CLASS
Definition: matchdefs.h:36
const char *const id_to_unichar(UNICHAR_ID id) const
Definition: unicharset.cpp:233
double NormEvidenceOf(register double NormAdj)
Definition: normmatch.cpp:179
LIST * Protos
Definition: normmatch.cpp:42
FLOAT32 * Elliptical
Definition: cluster.h:64
float FLOAT32
Definition: host.h:111
const double kWidthErrorWeighting
Definition: normmatch.cpp:66
FLOATUNION Weight
Definition: cluster.h:83
#define MAX_FLOAT32
Definition: host.h:124
FLOAT32 Params[1]
Definition: ocrfeatures.h:64
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:41
UNICHARSET unicharset
Definition: ccutil.h:72
FLOAT32 * Mean
Definition: cluster.h:78
NORM_PROTOS * NormProtos
Definition: classify.h:441
#define iterate(l)
Definition: oldlist.h:159
#define first_node(l)
Definition: oldlist.h:139
double classify_norm_adj_midpoint
Definition: normmatch.cpp:63
void tesseract::Classify::ConvertMatchesToChoices ( const DENORM denorm,
const TBOX box,
ADAPT_RESULTS Results,
BLOB_CHOICE_LIST *  Choices 
)

The function converts the given match ratings to the list of blob choices with ratings and certainties (used by the context checkers). If character fragments are present in the results, this function also makes sure that there is at least one non-fragmented classification included. For each classification result check the unicharset for "definite" ambiguities and modify the resulting Choices accordingly.

Definition at line 1675 of file adaptmatch.cpp.

1677  {
1678  assert(Choices != NULL);
1679  FLOAT32 Rating;
1680  FLOAT32 Certainty;
1681  BLOB_CHOICE_IT temp_it;
1682  bool contains_nonfrag = false;
1683  temp_it.set_to_list(Choices);
1684  int choices_length = 0;
1685  // With no shape_table_ maintain the previous MAX_MATCHES as the maximum
1686  // number of returned results, but with a shape_table_ we want to have room
1687  // for at least the biggest shape (which might contain hundreds of Indic
1688  // grapheme fragments) and more, so use double the size of the biggest shape
1689  // if that is more than the default.
1690  int max_matches = MAX_MATCHES;
1691  if (shape_table_ != NULL) {
1692  max_matches = shape_table_->MaxNumUnichars() * 2;
1693  if (max_matches < MAX_MATCHES)
1694  max_matches = MAX_MATCHES;
1695  }
1696 
1697  for (int i = 0; i < Results->NumMatches; i++) {
1698  ScoredClass next = Results->match[i];
1699  int fontinfo_id = next.fontinfo_id;
1700  int fontinfo_id2 = next.fontinfo_id2;
1701  bool adapted = next.adapted;
1702  bool current_is_frag = (unicharset.get_fragment(next.unichar_id) != NULL);
1703  if (temp_it.length()+1 == max_matches &&
1704  !contains_nonfrag && current_is_frag) {
1705  continue; // look for a non-fragmented character to fill the
1706  // last spot in Choices if only fragments are present
1707  }
1708  // BlobLength can never be legally 0, this means recognition failed.
1709  // But we must return a classification result because some invoking
1710  // functions (chopper/permuter) do not anticipate a null blob choice.
1711  // So we need to assign a poor, but not infinitely bad score.
1712  if (Results->BlobLength == 0) {
1713  Certainty = -20;
1714  Rating = 100; // should be -certainty * real_blob_length
1715  } else {
1716  Rating = Certainty = next.rating;
1717  Rating *= rating_scale * Results->BlobLength;
1718  Certainty *= -(getDict().certainty_scale);
1719  }
1720  inT16 min_xheight, max_xheight;
1721  denorm.XHeightRange(next.unichar_id, unicharset, box,
1722  &min_xheight, &max_xheight);
1723  temp_it.add_to_end(new BLOB_CHOICE(next.unichar_id, Rating, Certainty,
1724  fontinfo_id, fontinfo_id2,
1726  min_xheight, max_xheight, adapted));
1727  contains_nonfrag |= !current_is_frag; // update contains_nonfrag
1728  choices_length++;
1729  if (choices_length >= max_matches) break;
1730  }
1731  Results->NumMatches = choices_length;
1732 } // ConvertMatchesToChoices
bool XHeightRange(int unichar_id, const UNICHARSET &unicharset, const TBOX &bbox, inT16 *min_xht, inT16 *max_xht) const
Definition: normalis.cpp:275
int MaxNumUnichars() const
Definition: shapetable.cpp:358
#define NULL
Definition: host.h:144
CLASS_ID unichar_id
Definition: adaptmatch.cpp:82
double certainty_scale
Definition: dict.h:845
float FLOAT32
Definition: host.h:111
ShapeTable * shape_table_
Definition: classify.h:464
Dict & getDict()
Definition: classify.h:62
int get_script(UNICHAR_ID unichar_id) const
Definition: unicharset.h:552
const CHAR_FRAGMENT * get_fragment(UNICHAR_ID unichar_id) const
Definition: unicharset.h:610
FLOAT32 rating
Definition: adaptmatch.cpp:84
UNICHARSET unicharset
Definition: ccutil.h:72
short inT16
Definition: host.h:100
#define MAX_MATCHES
Definition: adaptmatch.cpp:70
inT16 fontinfo_id2
Definition: adaptmatch.cpp:88
inT32 BlobLength
Definition: adaptmatch.cpp:92
ScoredClass match[MAX_NUM_CLASSES]
Definition: adaptmatch.cpp:95
inT16 fontinfo_id
Definition: adaptmatch.cpp:87
void tesseract::Classify::ConvertProto ( PROTO  Proto,
int  ProtoId,
INT_CLASS  Class 
)

Definition at line 528 of file intproto.cpp.

528  {
529 /*
530  ** Parameters:
531  ** Proto floating-pt proto to be converted to integer format
532  ** ProtoId id of proto
533  ** Class integer class to add converted proto to
534  ** Globals: none
535  ** Operation: This routine converts Proto to integer format and
536  ** installs it as ProtoId in Class.
537  ** Return: none
538  ** Exceptions: none
539  ** History: Fri Feb 8 11:22:43 1991, DSJ, Created.
540  */
541  INT_PROTO P;
542  FLOAT32 Param;
543 
544  assert(ProtoId < Class->NumProtos);
545 
546  P = ProtoForProtoId(Class, ProtoId);
547 
548  Param = Proto->A * 128;
549  P->A = TruncateParam(Param, -128, 127, NULL);
550 
551  Param = -Proto->B * 256;
552  P->B = TruncateParam(Param, 0, 255, NULL);
553 
554  Param = Proto->C * 128;
555  P->C = TruncateParam(Param, -128, 127, NULL);
556 
557  Param = Proto->Angle * 256;
558  if (Param < 0 || Param >= 256)
559  P->Angle = 0;
560  else
561  P->Angle = (uinT8) Param;
562 
563  /* round proto length to nearest integer number of pico-features */
564  Param = (Proto->Length / GetPicoFeatureLength()) + 0.5;
565  Class->ProtoLengths[ProtoId] = TruncateParam(Param, 1, 255, NULL);
567  cprintf("Converted ffeat to (A=%d,B=%d,C=%d,L=%d)",
568  P->A, P->B, P->C, Class->ProtoLengths[ProtoId]);
569 } /* ConvertProto */
FLOAT32 B
Definition: protos.h:45
FLOAT32 Length
Definition: protos.h:50
#define NULL
Definition: host.h:144
float FLOAT32
Definition: host.h:111
uinT8 * ProtoLengths
Definition: intproto.h:110
int TruncateParam(FLOAT32 Param, int Min, int Max, char *Id)
Definition: intproto.cpp:1927
FLOAT32 A
Definition: protos.h:44
FLOAT32 Angle
Definition: protos.h:49
int classify_learning_debug_level
Definition: classify.h:380
unsigned char uinT8
Definition: host.h:99
#define GetPicoFeatureLength()
Definition: picofeat.h:59
void cprintf(const char *format,...)
Definition: callcpp.cpp:41
#define ProtoForProtoId(C, P)
Definition: intproto.h:163
FLOAT32 C
Definition: protos.h:46
INT_TEMPLATES tesseract::Classify::CreateIntTemplates ( CLASSES  FloatProtos,
const UNICHARSET target_unicharset 
)

Definition at line 573 of file intproto.cpp.

575  {
576 /*
577  ** Parameters:
578  ** FloatProtos prototypes in old floating pt format
579  ** Globals: none
580  ** Operation: This routine converts from the old floating point format
581  ** to the new integer format.
582  ** Return: New set of training templates in integer format.
583  ** Exceptions: none
584  ** History: Thu Feb 7 14:40:42 1991, DSJ, Created.
585  */
586  INT_TEMPLATES IntTemplates;
587  CLASS_TYPE FClass;
588  INT_CLASS IClass;
589  int ClassId;
590  int ProtoId;
591  int ConfigId;
592 
593  IntTemplates = NewIntTemplates();
594 
595  for (ClassId = 0; ClassId < target_unicharset.size(); ClassId++) {
596  FClass = &(FloatProtos[ClassId]);
597  if (FClass->NumProtos == 0 && FClass->NumConfigs == 0 &&
598  strcmp(target_unicharset.id_to_unichar(ClassId), " ") != 0) {
599  cprintf("Warning: no protos/configs for %s in CreateIntTemplates()\n",
600  target_unicharset.id_to_unichar(ClassId));
601  }
602  assert(UnusedClassIdIn(IntTemplates, ClassId));
603  IClass = NewIntClass(FClass->NumProtos, FClass->NumConfigs);
604  FontSet fs;
605  fs.size = FClass->font_set.size();
606  fs.configs = new int[fs.size];
607  for (int i = 0; i < fs.size; ++i) {
608  fs.configs[i] = FClass->font_set.get(i);
609  }
610  if (this->fontset_table_.contains(fs)) {
611  IClass->font_set_id = this->fontset_table_.get_id(fs);
612  delete[] fs.configs;
613  } else {
614  IClass->font_set_id = this->fontset_table_.push_back(fs);
615  }
616  AddIntClass(IntTemplates, ClassId, IClass);
617 
618  for (ProtoId = 0; ProtoId < FClass->NumProtos; ProtoId++) {
619  AddIntProto(IClass);
620  ConvertProto(ProtoIn(FClass, ProtoId), ProtoId, IClass);
621  AddProtoToProtoPruner(ProtoIn(FClass, ProtoId), ProtoId, IClass,
623  AddProtoToClassPruner(ProtoIn(FClass, ProtoId), ClassId, IntTemplates);
624  }
625 
626  for (ConfigId = 0; ConfigId < FClass->NumConfigs; ConfigId++) {
627  AddIntConfig(IClass);
628  ConvertConfig(FClass->Configurations[ConfigId], ConfigId, IClass);
629  }
630  }
631  return (IntTemplates);
632 } /* CreateIntTemplates */
void ConvertConfig(BIT_VECTOR Config, int ConfigId, INT_CLASS Class)
Definition: intproto.cpp:494
const char *const id_to_unichar(UNICHAR_ID id) const
Definition: unicharset.cpp:233
UnicityTable< FontSet > fontset_table_
Definition: classify.h:451
int size() const
Definition: unicharset.h:264
void AddIntClass(INT_TEMPLATES Templates, CLASS_ID ClassId, INT_CLASS Class)
Definition: intproto.cpp:224
int AddIntConfig(INT_CLASS Class)
Definition: intproto.cpp:257
inT16 NumProtos
Definition: protos.h:59
CONFIGS Configurations
Definition: protos.h:64
INT_CLASS NewIntClass(int MaxNumProtos, int MaxNumConfigs)
Definition: intproto.cpp:683
#define ProtoIn(Class, Pid)
Definition: protos.h:123
UnicityTableEqEq< int > font_set
Definition: protos.h:65
inT16 NumConfigs
Definition: protos.h:62
void AddProtoToClassPruner(PROTO Proto, CLASS_ID ClassId, INT_TEMPLATES Templates)
Definition: intproto.cpp:320
INT_TEMPLATES NewIntTemplates()
Definition: intproto.cpp:749
const T & get(int id) const
Return the object from an id.
void ConvertProto(PROTO Proto, int ProtoId, INT_CLASS Class)
Definition: intproto.cpp:528
#define UnusedClassIdIn(T, c)
Definition: intproto.h:172
int size() const
Return the size used.
int classify_learning_debug_level
Definition: classify.h:380
int AddIntProto(INT_CLASS Class)
Definition: intproto.cpp:281
void cprintf(const char *format,...)
Definition: callcpp.cpp:41
void AddProtoToProtoPruner(PROTO Proto, int ProtoId, INT_CLASS Class, bool debug)
Definition: intproto.cpp:364
void tesseract::Classify::DebugAdaptiveClassifier ( TBLOB Blob,
const DENORM denorm,
ADAPT_RESULTS Results 
)
Parameters
Blobblob whose classification is being debugged
denormnormalization/denormalization parameters
Resultsresults of match being debugged

Globals: none

Note
Exceptions: none
History: Wed Mar 13 16:44:41 1991, DSJ, Created.

Definition at line 1748 of file adaptmatch.cpp.

1750  {
1751  for (int i = 0; i < Results->NumMatches; i++) {
1752  if (Results->match[i].rating < Results->best_match.rating)
1753  Results->best_match = Results->match[i];
1754  }
1755  const char *Prompt =
1756  "Left-click in IntegerMatch Window to continue or right click to debug...";
1757  CLASS_ID unichar_id = Results->best_match.unichar_id;
1758  int shape_id = Results->best_match.shape_id;
1759  bool adaptive_on = true;
1760  bool pretrained_on = true;
1761 
1762  const char* debug_mode;
1763  do {
1764  if (!pretrained_on)
1765  debug_mode = "Adaptive Templates Only";
1766  else if (!adaptive_on)
1767  debug_mode = "PreTrained Templates Only";
1768  else
1769  debug_mode = "All Templates";
1770  ShowMatchDisplay();
1771  tprintf("Debugging class %d = %s in mode %s ...",
1772  unichar_id, unicharset.id_to_unichar(unichar_id), debug_mode);
1773  if (shape_id >= 0 && shape_table_ != NULL) {
1774  tprintf(" from shape %s\n", shape_table_->DebugStr(shape_id).string());
1775  }
1776  ShowBestMatchFor(Blob, denorm, unichar_id, shape_id, adaptive_on,
1777  pretrained_on, Results);
1779  } while ((unichar_id = GetClassToDebug(Prompt, &adaptive_on,
1780  &pretrained_on, &shape_id)) != 0);
1781 } /* DebugAdaptiveClassifier */
STRING DebugStr(int shape_id) const
Definition: shapetable.cpp:194
const char *const id_to_unichar(UNICHAR_ID id) const
Definition: unicharset.cpp:233
ScoredClass best_match
Definition: adaptmatch.cpp:96
void UpdateMatchDisplay()
Definition: intproto.cpp:476
CLASS_ID GetClassToDebug(const char *Prompt, bool *adaptive_on, bool *pretrained_on, int *shape_id)
Definition: intproto.cpp:1432
#define NULL
Definition: host.h:144
CLASS_ID unichar_id
Definition: adaptmatch.cpp:82
ShapeTable * shape_table_
Definition: classify.h:464
const char * string() const
Definition: strngs.cpp:156
FLOAT32 rating
Definition: adaptmatch.cpp:84
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:41
UNICHARSET unicharset
Definition: ccutil.h:72
ScoredClass match[MAX_NUM_CLASSES]
Definition: adaptmatch.cpp:95
void ShowBestMatchFor(TBLOB *Blob, const DENORM &denorm, CLASS_ID ClassId, int shape_id, BOOL8 AdaptiveOn, BOOL8 PreTrainedOn, ADAPT_RESULTS *Results)
UNICHAR_ID CLASS_ID
Definition: matchdefs.h:35
void tesseract::Classify::DisplayAdaptedChar ( TBLOB blob,
const DENORM denorm,
INT_CLASS_STRUCT int_class 
)

Definition at line 1036 of file adaptmatch.cpp.

1037  {
1038 #ifndef GRAPHICS_DISABLED
1039  int bloblength = 0;
1040  INT_FEATURE_ARRAY features;
1041  uinT8* norm_array = new uinT8[unicharset.size()];
1042  int num_features = GetBaselineFeatures(blob, denorm, PreTrainedTemplates,
1043  features,
1044  norm_array, &bloblength);
1045  delete [] norm_array;
1046  INT_RESULT_STRUCT IntResult;
1047 
1048  im_.Match(int_class, AllProtosOn, AllConfigsOn,
1049  num_features, features,
1052  cprintf ("Best match to temp config %d = %4.1f%%.\n",
1053  IntResult.Config, (1.0 - IntResult.Rating) * 100.0);
1054  if (classify_learning_debug_level >= 2) {
1055  uinT32 ConfigMask;
1056  ConfigMask = 1 << IntResult.Config;
1057  ShowMatchDisplay();
1058  im_.Match(int_class, AllProtosOn, (BIT_VECTOR)&ConfigMask,
1059  num_features, features,
1061  6 | 0x19, matcher_debug_separate_windows);
1063  }
1064 #endif
1065 }
BIT_VECTOR AllProtosOn
Definition: classify.h:433
IntegerMatcher im_
Definition: classify.h:455
int classify_adapt_feature_threshold
Definition: classify.h:404
int size() const
Definition: unicharset.h:264
void UpdateMatchDisplay()
Definition: intproto.cpp:476
void Match(INT_CLASS ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask, inT16 NumFeatures, const INT_FEATURE_STRUCT *Features, INT_RESULT Result, int AdaptFeatureThreshold, int Debug, bool SeparateDebugWindows)
Definition: intmatcher.cpp:460
INT_TEMPLATES PreTrainedTemplates
Definition: classify.h:426
uinT32 * BIT_VECTOR
Definition: bitvec.h:28
UNICHARSET unicharset
Definition: ccutil.h:72
int classify_learning_debug_level
Definition: classify.h:380
INT_FEATURE_STRUCT INT_FEATURE_ARRAY[MAX_NUM_INT_FEATURES]
Definition: baseapi.h:66
bool matcher_debug_separate_windows
Definition: classify.h:415
unsigned char uinT8
Definition: host.h:99
int GetBaselineFeatures(TBLOB *Blob, const DENORM &denorm, INT_TEMPLATES Templates, INT_FEATURE_ARRAY IntFeatures, uinT8 *CharNormArray, inT32 *BlobLength)
void cprintf(const char *format,...)
Definition: callcpp.cpp:41
unsigned int uinT32
Definition: host.h:103
BIT_VECTOR AllConfigsOn
Definition: classify.h:435
#define NO_DEBUG
Definition: adaptmatch.cpp:72
void tesseract::Classify::DoAdaptiveMatch ( TBLOB Blob,
const DENORM denorm,
ADAPT_RESULTS Results 
)

This routine performs an adaptive classification. If we have not yet adapted to enough classes, a simple classification to the pre-trained templates is performed. Otherwise, we match the blob against the adapted templates. If the adapted templates do not match well, we try a match against the pre-trained templates. If an adapted template match is found, we do a match to any pre-trained templates which could be ambiguous. The results from all of these classifications are merged together into Results.

Parameters
Blobblob to be classified
denormnormalization/denormalization parameters
Resultsplace to put match results

Globals:

  • PreTrainedTemplates built-in training templates
  • AdaptedTemplates templates adapted for this page
  • matcher_great_threshold rating limit for a great match
Note
Exceptions: none
History: Tue Mar 12 08:50:11 1991, DSJ, Created.

Definition at line 1808 of file adaptmatch.cpp.

1810  {
1811  UNICHAR_ID *Ambiguities;
1812 
1813  AdaptiveMatcherCalls++;
1814  InitIntFX();
1815 
1817  tess_cn_matching) {
1818  CharNormClassifier(Blob, denorm, PreTrainedTemplates, Results);
1819  } else {
1820  Ambiguities = BaselineClassifier(Blob, denorm, AdaptedTemplates, Results);
1821  if ((Results->NumMatches > 0 &&
1822  MarginalMatch (Results->best_match.rating) &&
1823  !tess_bn_matching) ||
1824  Results->NumMatches == 0) {
1825  CharNormClassifier(Blob, denorm, PreTrainedTemplates, Results);
1826  } else if (Ambiguities && *Ambiguities >= 0 && !tess_bn_matching) {
1827  AmbigClassifier(Blob, denorm,
1830  Ambiguities,
1831  Results);
1832  }
1833  }
1834 
1835  // Force the blob to be classified as noise
1836  // if the results contain only fragments.
1837  // TODO(daria): verify that this is better than
1838  // just adding a NULL classification.
1839  if (!Results->HasNonfragment || Results->NumMatches == 0)
1840  ClassifyAsNoise(Results);
1841 } /* DoAdaptiveMatch */
int CharNormClassifier(TBLOB *Blob, const DENORM &denorm, INT_TEMPLATES Templates, ADAPT_RESULTS *Results)
#define MarginalMatch(Rating)
Definition: adaptmatch.cpp:124
int UNICHAR_ID
Definition: unichar.h:31
ScoredClass best_match
Definition: adaptmatch.cpp:96
bool HasNonfragment
Definition: adaptmatch.cpp:94
UNICHAR_ID * BaselineClassifier(TBLOB *Blob, const DENORM &denorm, ADAPT_TEMPLATES Templates, ADAPT_RESULTS *Results)
ADAPT_TEMPLATES AdaptedTemplates
Definition: classify.h:430
int matcher_permanent_classes_min
Definition: classify.h:387
#define InitIntFX()
Definition: adaptmatch.cpp:127
ADAPT_CLASS Class[MAX_NUM_CLASSES]
Definition: adaptive.h:81
INT_TEMPLATES PreTrainedTemplates
Definition: classify.h:426
FLOAT32 rating
Definition: adaptmatch.cpp:84
void ClassifyAsNoise(ADAPT_RESULTS *Results)
void AmbigClassifier(TBLOB *Blob, const DENORM &denorm, INT_TEMPLATES Templates, ADAPT_CLASS *Classes, UNICHAR_ID *Ambiguities, ADAPT_RESULTS *Results)
void tesseract::Classify::EndAdaptiveClassifier ( )

This routine performs cleanup operations on the adaptive classifier. It should be called before the program is terminated. Its main function is to save the adapted templates to a file.

Globals:

Note
Exceptions: none
History: Tue Mar 19 14:37:06 1991, DSJ, Created.

Definition at line 476 of file adaptmatch.cpp.

476  {
477  STRING Filename;
478  FILE *File;
479 
480  #ifndef SECURE_NAMES
481  if (AdaptedTemplates != NULL &&
483  Filename = imagefile + ADAPT_TEMPLATE_SUFFIX;
484  File = fopen (Filename.string(), "wb");
485  if (File == NULL)
486  cprintf ("Unable to save adapted templates to %s!\n", Filename.string());
487  else {
488  cprintf ("\nSaving adapted templates to %s ...", Filename.string());
489  fflush(stdout);
491  cprintf ("\n");
492  fclose(File);
493  }
494  }
495  #endif
496 
497  if (AdaptedTemplates != NULL) {
500  }
501 
502  if (PreTrainedTemplates != NULL) {
505  }
507  FreeNormProtos();
508  if (AllProtosOn != NULL) {
515  AllProtosOn = NULL;
516  PrunedProtos = NULL;
517  AllConfigsOn = NULL;
518  AllProtosOff = NULL;
521  }
522  delete shape_table_;
523  shape_table_ = NULL;
524 } /* EndAdaptiveClassifier */
BIT_VECTOR AllProtosOn
Definition: classify.h:433
void EndDangerousAmbigs()
Definition: stopper.cpp:778
void free_adapted_templates(ADAPT_TEMPLATES templates)
Definition: adaptive.cpp:199
BIT_VECTOR AllProtosOff
Definition: classify.h:436
#define NULL
Definition: host.h:144
bool classify_enable_adaptive_matcher
Definition: classify.h:372
ShapeTable * shape_table_
Definition: classify.h:464
ADAPT_TEMPLATES AdaptedTemplates
Definition: classify.h:430
void free_int_templates(INT_TEMPLATES templates)
Definition: intproto.cpp:774
void FreeBitVector(BIT_VECTOR BitVector)
Definition: bitvec.cpp:55
Dict & getDict()
Definition: classify.h:62
INT_TEMPLATES PreTrainedTemplates
Definition: classify.h:426
BIT_VECTOR AllConfigsOff
Definition: classify.h:437
const char * string() const
Definition: strngs.cpp:156
Definition: strngs.h:40
#define ADAPT_TEMPLATE_SUFFIX
Definition: adaptmatch.cpp:68
STRING imagefile
Definition: ccutil.h:74
bool classify_save_adapted_templates
Definition: classify.h:376
void WriteAdaptedTemplates(FILE *File, ADAPT_TEMPLATES Templates)
Definition: adaptive.cpp:507
BIT_VECTOR TempProtoMask
Definition: classify.h:438
void cprintf(const char *format,...)
Definition: callcpp.cpp:41
BIT_VECTOR PrunedProtos
Definition: classify.h:434
BIT_VECTOR AllConfigsOn
Definition: classify.h:435
void tesseract::Classify::ExpandShapesAndApplyCorrections ( ADAPT_CLASS classes,
bool  debug,
int  class_id,
int  bottom,
int  top,
float  cp_rating,
int  blob_length,
const uinT8 cn_factors,
INT_RESULT_STRUCT int_result,
ADAPT_RESULTS final_results 
)

Definition at line 1295 of file adaptmatch.cpp.

1298  {
1299  // Compute the fontinfo_ids.
1300  int fontinfo_id = kBlankFontinfoId;
1301  int fontinfo_id2 = kBlankFontinfoId;
1302  if (classes != NULL) {
1303  // Adapted result.
1304  fontinfo_id = GetFontinfoId(classes[class_id], int_result.Config);
1305  if (int_result.Config2 >= 0)
1306  fontinfo_id2 = GetFontinfoId(classes[class_id], int_result.Config2);
1307  } else {
1308  // Pre-trained result.
1309  fontinfo_id = ClassAndConfigIDToFontOrShapeID(class_id, int_result.Config);
1310  if (int_result.Config2 >= 0) {
1311  fontinfo_id2 = ClassAndConfigIDToFontOrShapeID(class_id,
1312  int_result.Config2);
1313  }
1314  if (shape_table_ != NULL) {
1315  // Actually fontinfo_id is an index into the shape_table_ and it
1316  // contains a list of unchar_id/font_id pairs.
1317  int shape_id = fontinfo_id;
1318  const Shape& shape = shape_table_->GetShape(fontinfo_id);
1319  double min_rating = 0.0;
1320  for (int c = 0; c < shape.size(); ++c) {
1321  int unichar_id = shape[c].unichar_id;
1322  fontinfo_id = shape[c].font_ids[0];
1323  if (shape[c].font_ids.size() > 1)
1324  fontinfo_id2 = shape[c].font_ids[1];
1325  else if (fontinfo_id2 != kBlankFontinfoId)
1326  fontinfo_id2 = shape_table_->GetShape(fontinfo_id2)[0].font_ids[0];
1327  double rating = ComputeCorrectedRating(debug, unichar_id, cp_rating,
1328  int_result.Rating,
1329  int_result.FeatureMisses,
1330  bottom, top, blob_length,
1331  cn_factors);
1332  if (c == 0 || rating < min_rating)
1333  min_rating = rating;
1334  if (unicharset.get_enabled(unichar_id)) {
1335  AddNewResult(final_results, unichar_id, shape_id, rating,
1336  classes != NULL, int_result.Config,
1337  fontinfo_id, fontinfo_id2);
1338  }
1339  }
1340  int_result.Rating = min_rating;
1341  return;
1342  }
1343  }
1344  double rating = ComputeCorrectedRating(debug, class_id, cp_rating,
1345  int_result.Rating,
1346  int_result.FeatureMisses,
1347  bottom, top, blob_length,
1348  cn_factors);
1349  if (unicharset.get_enabled(class_id)) {
1350  AddNewResult(final_results, class_id, -1, rating,
1351  classes != NULL, int_result.Config,
1352  fontinfo_id, fontinfo_id2);
1353  }
1354  int_result.Rating = rating;
1355 }
uinT16 FeatureMisses
Definition: intmatcher.h:45
double ComputeCorrectedRating(bool debug, int unichar_id, double cp_rating, double im_rating, int feature_misses, int bottom, int top, int blob_length, const uinT8 *cn_factors)
const Shape & GetShape(int shape_id) const
Definition: shapetable.h:179
#define NULL
Definition: host.h:144
ShapeTable * shape_table_
Definition: classify.h:464
int GetFontinfoId(ADAPT_CLASS Class, uinT8 ConfigId)
Definition: adaptive.cpp:190
void AddNewResult(ADAPT_RESULTS *results, CLASS_ID class_id, int shape_id, FLOAT32 rating, bool adapted, int config, int fontinfo_id, int fontinfo_id2)
int ClassAndConfigIDToFontOrShapeID(int class_id, int int_result_config) const
UNICHARSET unicharset
Definition: ccutil.h:72
bool get_enabled(UNICHAR_ID unichar_id) const
Definition: unicharset.h:747
FEATURE_SET tesseract::Classify::ExtractOutlineFeatures ( TBLOB Blob)

Definition at line 36 of file outfeat.cpp.

36  {
37 /*
38  ** Parameters:
39  ** Blob blob to extract pico-features from
40  ** LineStats statistics on text row blob is in
41  ** Globals: none
42  ** Operation: Convert each segment in the outline to a feature
43  ** and return the features.
44  ** Return: Outline-features for Blob.
45  ** Exceptions: none
46  ** History: 11/13/90, DSJ, Created.
47  ** 05/24/91, DSJ, Updated for either char or baseline normalize.
48  */
49  LIST Outlines;
50  LIST RemainingOutlines;
51  MFOUTLINE Outline;
52  FEATURE_SET FeatureSet;
53  FLOAT32 XScale, YScale;
54 
55  FeatureSet = NewFeatureSet (MAX_OUTLINE_FEATURES);
56  if (Blob == NULL)
57  return (FeatureSet);
58 
59  Outlines = ConvertBlob (Blob);
60 
61  NormalizeOutlines(Outlines, &XScale, &YScale);
62  RemainingOutlines = Outlines;
63  iterate(RemainingOutlines) {
64  Outline = (MFOUTLINE) first_node (RemainingOutlines);
65  ConvertToOutlineFeatures(Outline, FeatureSet);
66  }
68  NormalizeOutlineX(FeatureSet);
69  FreeOutlines(Outlines);
70  return (FeatureSet);
71 } /* ExtractOutlineFeatures */
FEATURE_SET NewFeatureSet(int NumFeatures)
void NormalizeOutlines(LIST Outlines, FLOAT32 *XScale, FLOAT32 *YScale)
Definition: mfoutline.cpp:346
#define NULL
Definition: host.h:144
float FLOAT32
Definition: host.h:111
void ConvertToOutlineFeatures(MFOUTLINE Outline, FEATURE_SET FeatureSet)
Definition: outfeat.cpp:111
void FreeOutlines(LIST Outlines)
Definition: mfoutline.cpp:227
LIST ConvertBlob(TBLOB *blob)
Definition: mfoutline.cpp:41
void NormalizeOutlineX(FEATURE_SET FeatureSet)
Definition: outfeat.cpp:155
LIST MFOUTLINE
Definition: mfoutline.h:33
#define iterate(l)
Definition: oldlist.h:159
#define MAX_OUTLINE_FEATURES
Definition: outfeat.h:35
#define first_node(l)
Definition: oldlist.h:139
FEATURE_SET tesseract::Classify::ExtractPicoFeatures ( TBLOB Blob)

Definition at line 57 of file picofeat.cpp.

57  {
58 /*
59  ** Parameters:
60  ** Blob blob to extract pico-features from
61  ** LineStats statistics on text row blob is in
62  ** Globals:
63  ** classify_norm_method normalization method currently specified
64  ** Operation: Dummy for now.
65  ** Return: Pico-features for Blob.
66  ** Exceptions: none
67  ** History: 9/4/90, DSJ, Created.
68  */
69  LIST Outlines;
70  LIST RemainingOutlines;
71  MFOUTLINE Outline;
72  FEATURE_SET FeatureSet;
73  FLOAT32 XScale, YScale;
74 
75  FeatureSet = NewFeatureSet(MAX_PICO_FEATURES);
76  Outlines = ConvertBlob(Blob);
77  NormalizeOutlines(Outlines, &XScale, &YScale);
78  RemainingOutlines = Outlines;
79  iterate(RemainingOutlines) {
80  Outline = (MFOUTLINE) first_node (RemainingOutlines);
81  ConvertToPicoFeatures2(Outline, FeatureSet);
82  }
84  NormalizePicoX(FeatureSet);
85  FreeOutlines(Outlines);
86  return (FeatureSet);
87 
88 } /* ExtractPicoFeatures */
FEATURE_SET NewFeatureSet(int NumFeatures)
void NormalizePicoX(FEATURE_SET FeatureSet)
Definition: picofeat.cpp:197
void NormalizeOutlines(LIST Outlines, FLOAT32 *XScale, FLOAT32 *YScale)
Definition: mfoutline.cpp:346
float FLOAT32
Definition: host.h:111
void ConvertToPicoFeatures2(MFOUTLINE Outline, FEATURE_SET FeatureSet)
Definition: picofeat.cpp:151
void FreeOutlines(LIST Outlines)
Definition: mfoutline.cpp:227
LIST ConvertBlob(TBLOB *blob)
Definition: mfoutline.cpp:41
#define MAX_PICO_FEATURES
Definition: picofeat.h:47
LIST MFOUTLINE
Definition: mfoutline.h:33
#define iterate(l)
Definition: oldlist.h:159
#define first_node(l)
Definition: oldlist.h:139
void tesseract::Classify::FreeNormProtos ( )

Definition at line 157 of file normmatch.cpp.

157  {
158  if (NormProtos != NULL) {
159  for (int i = 0; i < NormProtos->NumProtos; i++)
163  Efree(NormProtos);
164  NormProtos = NULL;
165  }
166 }
void Efree(void *ptr)
Definition: emalloc.cpp:85
LIST * Protos
Definition: normmatch.cpp:42
#define NULL
Definition: host.h:144
void FreeProtoList(LIST *ProtoList)
Definition: cluster.cpp:560
PARAM_DESC * ParamDesc
Definition: normmatch.cpp:41
NORM_PROTOS * NormProtos
Definition: classify.h:441
UnicityTable<FontInfo>& tesseract::Classify::get_fontinfo_table ( )
inline

Definition at line 336 of file classify.h.

336  {
337  return fontinfo_table_;
338  }
UnicityTable< FontInfo > fontinfo_table_
Definition: classify.h:443
UnicityTable<FontSet>& tesseract::Classify::get_fontset_table ( )
inline

Definition at line 339 of file classify.h.

339  {
340  return fontset_table_;
341  }
UnicityTable< FontSet > fontset_table_
Definition: classify.h:451
int tesseract::Classify::GetAdaptiveFeatures ( TBLOB Blob,
INT_FEATURE_ARRAY  IntFeatures,
FEATURE_SET FloatFeatures 
)

This routine sets up the feature extractor to extract baseline normalized pico-features.

The extracted pico-features are converted to integer form and placed in IntFeatures. The original floating-pt. features are returned in FloatFeatures.

Globals: none

Parameters
Blobblob to extract features from
[out]IntFeaturesarray to fill with integer features
[out]FloatFeaturesplace to return actual floating-pt features
Returns
Number of pico-features returned (0 if an error occurred)
Note
Exceptions: none
History: Tue Mar 12 17:55:18 1991, DSJ, Created.

Definition at line 854 of file adaptmatch.cpp.

856  {
857  FEATURE_SET Features;
858  int NumFeatures;
859 
860  classify_norm_method.set_value(baseline);
861  Features = ExtractPicoFeatures(Blob);
862 
863  NumFeatures = Features->NumFeatures;
864  if (NumFeatures > UNLIKELY_NUM_FEAT) {
865  FreeFeatureSet(Features);
866  return 0;
867  }
868 
869  ComputeIntFeatures(Features, IntFeatures);
870  *FloatFeatures = Features;
871 
872  return NumFeatures;
873 } /* GetAdaptiveFeatures */
void ComputeIntFeatures(FEATURE_SET Features, INT_FEATURE_ARRAY IntFeatures)
Definition: float2int.cpp:94
void FreeFeatureSet(FEATURE_SET FeatureSet)
Definition: ocrfeatures.cpp:79
FEATURE_SET ExtractPicoFeatures(TBLOB *Blob)
Definition: picofeat.cpp:57
#define UNLIKELY_NUM_FEAT
Definition: adaptmatch.cpp:71
void tesseract::Classify::GetAdaptThresholds ( TWERD Word,
const DENORM denorm,
const WERD_CHOICE BestChoice,
const WERD_CHOICE BestRawChoice,
FLOAT32  Thresholds[] 
)

This routine tries to estimate how tight the adaptation threshold should be set for each character in the current word. In general, the routine tries to set tighter thresholds for a character when the current set of templates would have made an error on that character. It tries to set a threshold tight enough to eliminate the error. Two different sets of rules can be used to determine the desired thresholds.

Parameters
Wordcurrent word
denormnormalization/denormalization parameters
BestChoicebest choice for current word with context
BestRawChoicebest choice for current word without context
[out]Thresholdsarray of thresholds to be filled in

Globals:

  • matcher_good_threshold
  • matcher_perfect_threshold
  • matcher_rating_margin
Returns
none (results are returned in Thresholds)
Note
Exceptions: none
History: Fri May 31 09:22:08 1991, DSJ, Created.

Definition at line 1869 of file adaptmatch.cpp.

1873  {
1877  Thresholds);
1878 } /* GetAdaptThresholds */
double matcher_rating_margin
Definition: classify.h:385
double matcher_good_threshold
Definition: classify.h:381
Dict & getDict()
Definition: classify.h:62
void FindClassifierErrors(FLOAT32 MinRating, FLOAT32 MaxRating, FLOAT32 RatingMargin, FLOAT32 Thresholds[])
Definition: stopper.cpp:394
double matcher_perfect_threshold
Definition: classify.h:383
UNICHAR_ID * tesseract::Classify::GetAmbiguities ( TBLOB Blob,
const DENORM denorm,
CLASS_ID  CorrectClass 
)

This routine matches blob to the built-in templates to find out if there are any classes other than the correct class which are potential ambiguities.

Parameters
Blobblob to get classification ambiguities for
denormnormalization/denormalization parameters
CorrectClasscorrect class for Blob

Globals:

  • CurrentRatings used by qsort compare routine
  • PreTrainedTemplates built-in templates
Returns
String containing all possible ambiguous classes.
Note
Exceptions: none
History: Fri Mar 15 08:08:22 1991, DSJ, Created.

Definition at line 1898 of file adaptmatch.cpp.

1900  {
1901  ADAPT_RESULTS *Results = new ADAPT_RESULTS();
1902  UNICHAR_ID *Ambiguities;
1903  int i;
1904 
1905  Results->Initialize();
1906 
1907  CharNormClassifier(Blob, denorm, PreTrainedTemplates, Results);
1908  RemoveBadMatches(Results);
1909  qsort((void *)Results->match, Results->NumMatches,
1910  sizeof(ScoredClass), CompareByRating);
1911 
1912  /* copy the class id's into an string of ambiguities - don't copy if
1913  the correct class is the only class id matched */
1914  Ambiguities = (UNICHAR_ID *) Emalloc (sizeof (UNICHAR_ID) *
1915  (Results->NumMatches + 1));
1916  if (Results->NumMatches > 1 ||
1917  (Results->NumMatches == 1 &&
1918  Results->match[0].unichar_id != CorrectClass)) {
1919  for (i = 0; i < Results->NumMatches; i++)
1920  Ambiguities[i] = Results->match[i].unichar_id;
1921  Ambiguities[i] = -1;
1922  } else {
1923  Ambiguities[0] = -1;
1924  }
1925 
1926  delete Results;
1927  return Ambiguities;
1928 } /* GetAmbiguities */
int CharNormClassifier(TBLOB *Blob, const DENORM &denorm, INT_TEMPLATES Templates, ADAPT_RESULTS *Results)
int UNICHAR_ID
Definition: unichar.h:31
int CompareByRating(const void *arg1, const void *arg2)
CLASS_ID unichar_id
Definition: adaptmatch.cpp:82
INT_TEMPLATES PreTrainedTemplates
Definition: classify.h:426
void * Emalloc(size_t Size)
Definition: emalloc.cpp:35
void RemoveBadMatches(ADAPT_RESULTS *Results)
ScoredClass match[MAX_NUM_CLASSES]
Definition: adaptmatch.cpp:95
void Initialize()
Definition: adaptmatch.cpp:101
int tesseract::Classify::GetBaselineFeatures ( TBLOB Blob,
const DENORM denorm,
INT_TEMPLATES  Templates,
INT_FEATURE_ARRAY  IntFeatures,
uinT8 CharNormArray,
inT32 BlobLength 
)

This routine calls the integer (Hardware) feature extractor if it has not been called before for this blob. The results from the feature extractor are placed into globals so that they can be used in other routines without re-extracting the features. It then copies the baseline features into the IntFeatures array provided by the caller.

Parameters
Blobblob to extract features from
denormnormalization/denormalization parameters
Templatesused to compute char norm adjustments
IntFeaturesarray to fill with integer features
CharNormArrayarray to fill with dummy char norm adjustments
BlobLengthlength of blob in baseline-normalized units

Globals:

  • FeaturesHaveBeenExtracted TRUE if fx has been done
  • BaselineFeatures holds extracted baseline feat
  • CharNormFeatures holds extracted char norm feat
  • FXInfo holds misc. FX info
Returns
Number of features extracted or 0 if an error occured.
Note
Exceptions: none
History: Tue May 28 10:40:52 1991, DSJ, Created.

Definition at line 1957 of file adaptmatch.cpp.

1962  {
1963  register INT_FEATURE Src, Dest, End;
1964 
1965  if (!FeaturesHaveBeenExtracted) {
1966  FeaturesOK = ExtractIntFeat(Blob, denorm, BaselineFeatures,
1967  CharNormFeatures, &FXInfo, NULL);
1968  FeaturesHaveBeenExtracted = TRUE;
1969  }
1970 
1971  if (!FeaturesOK) {
1972  *BlobLength = FXInfo.NumBL;
1973  return 0;
1974  }
1975 
1976  for (Src = BaselineFeatures, End = Src + FXInfo.NumBL, Dest = IntFeatures;
1977  Src < End;
1978  *Dest++ = *Src++);
1979 
1980  ClearCharNormArray(CharNormArray);
1981  *BlobLength = FXInfo.NumBL;
1982  return FXInfo.NumBL;
1983 } /* GetBaselineFeatures */
#define NULL
Definition: host.h:144
void ClearCharNormArray(uinT8 *char_norm_array)
Definition: float2int.cpp:48
int ExtractIntFeat(TBLOB *Blob, const DENORM &denorm, INT_FEATURE_ARRAY BLFeat, INT_FEATURE_ARRAY CNFeat, INT_FX_RESULT_STRUCT *Results, inT32 *FeatureOutlineArray)
Definition: intfx.cpp:143
#define TRUE
Definition: capi.h:27
int tesseract::Classify::GetCharNormFeatures ( TBLOB Blob,
const DENORM denorm,
INT_TEMPLATES  Templates,
INT_FEATURE_ARRAY  IntFeatures,
uinT8 PrunerNormArray,
uinT8 CharNormArray,
inT32 BlobLength,
inT32 FeatureOutlineArray 
)

This routine calls the integer (Hardware) feature extractor if it has not been called before for this blob.

The results from the feature extractor are placed into globals so that they can be used in other routines without re-extracting the features.

It then copies the char norm features into the IntFeatures array provided by the caller.

Parameters
Blobblob to extract features from
denormnormalization/denormalization parameters
Templatesused to compute char norm adjustments
IntFeaturesarray to fill with integer features
PrunerNormArrayArray of factors from blob normalization process
CharNormArrayarray to fill with dummy char norm adjustments
BlobLengthlength of blob in baseline-normalized units
FeatureOutlineArrayGlobals:
  • FeaturesHaveBeenExtracted TRUE if fx has been done
  • BaselineFeatures holds extracted baseline feat
  • CharNormFeatures holds extracted char norm feat
  • FXInfo holds misc. FX info
Returns
Number of features extracted or 0 if an error occured.
Note
Exceptions: none
History: Tue May 28 10:40:52 1991, DSJ, Created.

Definition at line 2045 of file adaptmatch.cpp.

2052  {
2053  register INT_FEATURE Src, Dest, End;
2054  FEATURE NormFeature;
2055  FLOAT32 Baseline, Scale;
2056  inT32 FeatureOutlineIndex[MAX_NUM_INT_FEATURES];
2057 
2058  if (!FeaturesHaveBeenExtracted) {
2059  FeaturesOK = ExtractIntFeat(Blob, denorm, BaselineFeatures,
2060  CharNormFeatures, &FXInfo,
2061  FeatureOutlineIndex);
2062  FeaturesHaveBeenExtracted = TRUE;
2063  }
2064 
2065  if (!FeaturesOK) {
2066  *BlobLength = FXInfo.NumBL;
2067  return (0);
2068  }
2069 
2070  for (Src = CharNormFeatures, End = Src + FXInfo.NumCN, Dest = IntFeatures;
2071  Src < End;
2072  *Dest++ = *Src++);
2073  for (int i = 0; FeatureOutlineArray && i < FXInfo.NumCN; ++i) {
2074  FeatureOutlineArray[i] = FeatureOutlineIndex[i];
2075  }
2076 
2077  NormFeature = NewFeature(&CharNormDesc);
2078  Baseline = BASELINE_OFFSET;
2079  Scale = MF_SCALE_FACTOR;
2080  NormFeature->Params[CharNormY] = (FXInfo.Ymean - Baseline) * Scale;
2081  NormFeature->Params[CharNormLength] =
2082  FXInfo.Length * Scale / LENGTH_COMPRESSION;
2083  NormFeature->Params[CharNormRx] = FXInfo.Rx * Scale;
2084  NormFeature->Params[CharNormRy] = FXInfo.Ry * Scale;
2085  ComputeCharNormArrays(NormFeature, Templates, CharNormArray, PrunerNormArray);
2086  *BlobLength = FXInfo.NumBL;
2087  return (FXInfo.NumCN);
2088 } /* GetCharNormFeatures */
#define BASELINE_OFFSET
Definition: baseline.h:38
#define LENGTH_COMPRESSION
Definition: normfeat.h:26
int inT32
Definition: host.h:102
float FLOAT32
Definition: host.h:111
FEATURE NewFeature(const FEATURE_DESC_STRUCT *FeatureDesc)
#define MF_SCALE_FACTOR
Definition: mfoutline.h:71
#define MAX_NUM_INT_FEATURES
Definition: baseapi.h:63
FLOAT32 Params[1]
Definition: ocrfeatures.h:64
int ExtractIntFeat(TBLOB *Blob, const DENORM &denorm, INT_FEATURE_ARRAY BLFeat, INT_FEATURE_ARRAY CNFeat, INT_FX_RESULT_STRUCT *Results, inT32 *FeatureOutlineArray)
Definition: intfx.cpp:143
void ComputeCharNormArrays(FEATURE_STRUCT *norm_feature, INT_TEMPLATES_STRUCT *templates, uinT8 *char_norm_array, uinT8 *pruner_array)
#define TRUE
Definition: capi.h:27
const FEATURE_DESC_STRUCT CharNormDesc
CLASS_ID tesseract::Classify::GetClassToDebug ( const char *  Prompt,
bool *  adaptive_on,
bool *  pretrained_on,
int *  shape_id 
)

Definition at line 1432 of file intproto.cpp.

1433  {
1434 /*
1435  ** Parameters:
1436  ** Prompt prompt to print while waiting for input from window
1437  ** Globals: none
1438  ** Operation: This routine prompts the user with Prompt and waits
1439  ** for the user to enter something in the debug window.
1440  ** Return: Character entered in the debug window.
1441  ** Exceptions: none
1442  ** History: Thu Mar 21 16:55:13 1991, DSJ, Created.
1443  */
1444  tprintf("%s\n", Prompt);
1445  SVEvent* ev;
1446  SVEventType ev_type;
1447  int unichar_id = INVALID_UNICHAR_ID;
1448  // Wait until a click or popup event.
1449  do {
1451  ev_type = ev->type;
1452  if (ev_type == SVET_POPUP) {
1453  if (ev->command_id == IDA_SHAPE_INDEX) {
1454  if (shape_table_ != NULL) {
1455  *shape_id = atoi(ev->parameter);
1456  *adaptive_on = false;
1457  *pretrained_on = true;
1458  if (*shape_id >= 0 && *shape_id < shape_table_->NumShapes()) {
1459  int font_id;
1460  shape_table_->GetFirstUnicharAndFont(*shape_id, &unichar_id,
1461  &font_id);
1462  tprintf("Shape %d, first unichar=%d, font=%d\n",
1463  *shape_id, unichar_id, font_id);
1464  return unichar_id;
1465  }
1466  tprintf("Shape index '%s' not found in shape table\n", ev->parameter);
1467  } else {
1468  tprintf("No shape table loaded!\n");
1469  }
1470  } else {
1472  unichar_id = unicharset.unichar_to_id(ev->parameter);
1473  if (ev->command_id == IDA_ADAPTIVE) {
1474  *adaptive_on = true;
1475  *pretrained_on = false;
1476  *shape_id = -1;
1477  } else if (ev->command_id == IDA_STATIC) {
1478  *adaptive_on = false;
1479  *pretrained_on = true;
1480  } else {
1481  *adaptive_on = true;
1482  *pretrained_on = true;
1483  }
1484  if (ev->command_id == IDA_ADAPTIVE || shape_table_ == NULL) {
1485  *shape_id = -1;
1486  return unichar_id;
1487  }
1488  for (int s = 0; s < shape_table_->NumShapes(); ++s) {
1489  if (shape_table_->GetShape(s).ContainsUnichar(unichar_id)) {
1490  tprintf("%s\n", shape_table_->DebugStr(s).string());
1491  }
1492  }
1493  } else {
1494  tprintf("Char class '%s' not found in unicharset",
1495  ev->parameter);
1496  }
1497  }
1498  }
1499  delete ev;
1500  } while (ev_type != SVET_CLICK);
1501  return 0;
1502 } /* GetClassToDebug */
SVEvent * AwaitEvent(SVEventType type)
Definition: scrollview.cpp:444
void GetFirstUnicharAndFont(int shape_id, int *unichar_id, int *font_id) const
Definition: shapetable.cpp:308
STRING DebugStr(int shape_id) const
Definition: shapetable.cpp:194
const UNICHAR_ID unichar_to_id(const char *const unichar_repr) const
Definition: unicharset.cpp:176
int NumShapes() const
Definition: shapetable.h:140
const Shape & GetShape(int shape_id) const
Definition: shapetable.h:179
#define NULL
Definition: host.h:144
ShapeTable * shape_table_
Definition: classify.h:464
SVEventType type
Definition: scrollview.h:64
ScrollView * IntMatchWindow
Definition: intproto.cpp:180
const char * string() const
Definition: strngs.cpp:156
char * parameter
Definition: scrollview.h:71
int command_id
Definition: scrollview.h:70
bool contains_unichar(const char *const unichar_repr) const
Definition: unicharset.cpp:543
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:41
UNICHARSET unicharset
Definition: ccutil.h:72
SVEventType
Definition: scrollview.h:45
bool ContainsUnichar(int unichar_id) const
Definition: shapetable.cpp:121
Dict& tesseract::Classify::getDict ( )
inline

Definition at line 62 of file classify.h.

62  {
63  return dict_;
64  }
int tesseract::Classify::GetFontinfoId ( ADAPT_CLASS  Class,
uinT8  ConfigId 
)

Definition at line 190 of file adaptive.cpp.

190  {
191  return (ConfigIsPermanent(Class, ConfigId) ?
192  PermConfigFor(Class, ConfigId)->FontinfoId :
193  TempConfigFor(Class, ConfigId)->FontinfoId);
194 }
#define ConfigIsPermanent(Class, ConfigId)
Definition: adaptive.h:93
#define PermConfigFor(Class, ConfigId)
Definition: adaptive.h:105
#define TempConfigFor(Class, ConfigId)
Definition: adaptive.h:102
void tesseract::Classify::InitAdaptedClass ( TBLOB Blob,
const DENORM denorm,
CLASS_ID  ClassId,
int  FontinfoId,
ADAPT_CLASS  Class,
ADAPT_TEMPLATES  Templates 
)

This routine creates a new adapted class and uses Blob as the model for the first config in that class.

Parameters
Blobblob to model new class after
denormnormalization/denormalization parameters
ClassIdid of the class to be initialized
FontinfoIdfont information inferred from pre-trained templates
Classadapted class to be initialized
Templatesadapted templates to add new class to

Globals:

Note
Exceptions: none
History: Thu Mar 14 12:49:39 1991, DSJ, Created.

Definition at line 758 of file adaptmatch.cpp.

763  {
764  FEATURE_SET Features;
765  int Fid, Pid;
766  FEATURE Feature;
767  int NumFeatures;
768  TEMP_PROTO TempProto;
769  PROTO Proto;
770  INT_CLASS IClass;
772 
773  classify_norm_method.set_value(baseline);
774  Features = ExtractOutlineFeatures(Blob);
775  NumFeatures = Features->NumFeatures;
776  if (NumFeatures > UNLIKELY_NUM_FEAT || NumFeatures <= 0) {
777  FreeFeatureSet(Features);
778  return;
779  }
780 
781  Config = NewTempConfig(NumFeatures - 1, FontinfoId);
782  TempConfigFor(Class, 0) = Config;
783 
784  /* this is a kludge to construct cutoffs for adapted templates */
785  if (Templates == AdaptedTemplates)
786  BaselineCutoffs[ClassId] = CharNormCutoffs[ClassId];
787 
788  IClass = ClassForClassId (Templates->Templates, ClassId);
789 
790  for (Fid = 0; Fid < Features->NumFeatures; Fid++) {
791  Pid = AddIntProto (IClass);
792  assert (Pid != NO_PROTO);
793 
794  Feature = Features->Features[Fid];
795  TempProto = NewTempProto ();
796  Proto = &(TempProto->Proto);
797 
798  /* compute proto params - NOTE that Y_DIM_OFFSET must be used because
799  ConvertProto assumes that the Y dimension varies from -0.5 to 0.5
800  instead of the -0.25 to 0.75 used in baseline normalization */
801  Proto->Angle = Feature->Params[OutlineFeatDir];
802  Proto->X = Feature->Params[OutlineFeatX];
803  Proto->Y = Feature->Params[OutlineFeatY] - Y_DIM_OFFSET;
804  Proto->Length = Feature->Params[OutlineFeatLength];
805  FillABC(Proto);
806 
807  TempProto->ProtoId = Pid;
808  SET_BIT (Config->Protos, Pid);
809 
810  ConvertProto(Proto, Pid, IClass);
811  AddProtoToProtoPruner(Proto, Pid, IClass,
813 
814  Class->TempProtos = push (Class->TempProtos, TempProto);
815  }
816  FreeFeatureSet(Features);
817 
818  AddIntConfig(IClass);
819  ConvertConfig (AllProtosOn, 0, IClass);
820 
822  cprintf ("Added new class '%s' with class id %d and %d protos.\n",
823  unicharset.id_to_unichar(ClassId), ClassId, NumFeatures);
825  DisplayAdaptedChar(Blob, denorm, IClass);
826  }
827 
828  if (IsEmptyAdaptedClass(Class))
829  (Templates->NumNonEmptyClasses)++;
830 } /* InitAdaptedClass */
#define IsEmptyAdaptedClass(Class)
Definition: adaptive.h:90
BIT_VECTOR AllProtosOn
Definition: classify.h:433
void ConvertConfig(BIT_VECTOR Config, int ConfigId, INT_CLASS Class)
Definition: intproto.cpp:494
const char *const id_to_unichar(UNICHAR_ID id) const
Definition: unicharset.cpp:233
PROTO_STRUCT Proto
Definition: adaptive.h:32
#define ClassForClassId(T, c)
Definition: intproto.h:173
FLOAT32 Length
Definition: protos.h:50
void DisplayAdaptedChar(TBLOB *blob, const DENORM &denorm, INT_CLASS_STRUCT *int_class)
int AddIntConfig(INT_CLASS Class)
Definition: intproto.cpp:257
LIST push(LIST list, void *element)
Definition: oldlist.cpp:323
FLOAT32 Y
Definition: protos.h:48
TEMP_PROTO NewTempProto()
Definition: adaptive.cpp:254
void FreeFeatureSet(FEATURE_SET FeatureSet)
Definition: ocrfeatures.cpp:79
INT_TEMPLATES Templates
Definition: adaptive.h:77
ADAPT_TEMPLATES AdaptedTemplates
Definition: classify.h:430
CLUSTERCONFIG Config
TEMP_CONFIG NewTempConfig(int MaxProtoId, int FontinfoId)
Definition: adaptive.cpp:223
uinT16 ProtoId
Definition: adaptive.h:30
FEATURE Features[1]
Definition: ocrfeatures.h:71
#define TempConfigFor(Class, ConfigId)
Definition: adaptive.h:102
FLOAT32 Angle
Definition: protos.h:49
void ConvertProto(PROTO Proto, int ProtoId, INT_CLASS Class)
Definition: intproto.cpp:528
FLOAT32 Params[1]
Definition: ocrfeatures.h:64
#define Y_DIM_OFFSET
Definition: adaptmatch.cpp:77
FLOAT32 X
Definition: protos.h:47
BIT_VECTOR Protos
Definition: adaptive.h:45
#define NO_PROTO
Definition: matchdefs.h:42
UNICHARSET unicharset
Definition: ccutil.h:72
int classify_learning_debug_level
Definition: classify.h:380
#define UNLIKELY_NUM_FEAT
Definition: adaptmatch.cpp:71
FEATURE_SET ExtractOutlineFeatures(TBLOB *Blob)
Definition: outfeat.cpp:36
int AddIntProto(INT_CLASS Class)
Definition: intproto.cpp:281
#define SET_BIT(array, bit)
Definition: bitvec.h:57
void FillABC(PROTO Proto)
Definition: protos.cpp:198
void cprintf(const char *format,...)
Definition: callcpp.cpp:41
void AddProtoToProtoPruner(PROTO Proto, int ProtoId, INT_CLASS Class, bool debug)
Definition: intproto.cpp:364
void tesseract::Classify::InitAdaptiveClassifier ( bool  load_pre_trained_templates)

This routine reads in the training information needed by the adaptive classifier and saves it into global variables. Parameters: load_pre_trained_templates Indicates whether the pre-trained templates (inttemp, normproto and pffmtable components) should be lodaded. Should only be set to true if the necesary classifier components are present in the [lang].traineddata file. Globals: BuiltInTemplatesFile file to get built-in temps from BuiltInCutoffsFile file to get avg. feat per class from classify_use_pre_adapted_templates enables use of pre-adapted templates

Note
History: Mon Mar 11 12:49:34 1991, DSJ, Created.

Definition at line 545 of file adaptmatch.cpp.

545  {
547  return;
548  if (AllProtosOn != NULL)
549  EndAdaptiveClassifier(); // Don't leak with multiple inits.
550 
551  // If there is no language_data_path_prefix, the classifier will be
552  // adaptive only.
553  if (language_data_path_prefix.length() > 0 &&
554  load_pre_trained_templates) {
558  if (tessdata_manager.DebugLevel() > 0) tprintf("Loaded inttemp\n");
559 
561  shape_table_ = new ShapeTable(unicharset);
564  tprintf("Error loading shape table!\n");
565  delete shape_table_;
566  shape_table_ = NULL;
567  } else if (tessdata_manager.DebugLevel() > 0) {
568  tprintf("Successfully loaded shape table!\n");
569  }
570  }
571 
576  CharNormCutoffs);
577  if (tessdata_manager.DebugLevel() > 0) tprintf("Loaded pffmtable\n");
578 
580  NormProtos =
583  if (tessdata_manager.DebugLevel() > 0) tprintf("Loaded normproto\n");
584  }
585 
587  InitIntegerFX();
588 
600 
601  for (int i = 0; i < MAX_NUM_CLASSES; i++) {
602  BaselineCutoffs[i] = 0;
603  }
604 
606  FILE *File;
607  STRING Filename;
608 
609  Filename = imagefile;
610  Filename += ADAPT_TEMPLATE_SUFFIX;
611  File = fopen(Filename.string(), "rb");
612  if (File == NULL) {
614  } else {
615  #ifndef SECURE_NAMES
616  cprintf("\nReading pre-adapted templates from %s ...\n",
617  Filename.string());
618  fflush(stdout);
619  #endif
621  cprintf("\n");
622  fclose(File);
624 
625  for (int i = 0; i < AdaptedTemplates->Templates->NumClasses; i++) {
626  BaselineCutoffs[i] = CharNormCutoffs[i];
627  }
628  }
629  } else {
630  if (AdaptedTemplates != NULL)
633  }
634 } /* InitAdaptiveClassifier */
BIT_VECTOR AllProtosOn
Definition: classify.h:433
ADAPT_TEMPLATES NewAdaptedTemplates(bool InitFromUnicharset)
Definition: adaptive.cpp:167
#define MAX_NUM_CONFIGS
Definition: intproto.h:44
IntegerMatcher im_
Definition: classify.h:455
void free_adapted_templates(ADAPT_TEMPLATES templates)
Definition: adaptive.cpp:199
NORM_PROTOS * ReadNormProtos(FILE *File, inT64 end_offset)
Definition: normmatch.cpp:230
BIT_VECTOR AllProtosOff
Definition: classify.h:436
void ReadNewCutoffs(FILE *CutoffFile, bool swap, inT64 end_offset, CLASS_CUTOFF_ARRAY Cutoffs)
Definition: cutoffs.cpp:42
int classify_integer_matcher_multiplier
Definition: classify.h:426
inT32 length() const
Definition: strngs.cpp:151
#define zero_all_bits(array, length)
Definition: bitvec.h:33
void PrintAdaptedTemplates(FILE *File, ADAPT_TEMPLATES Templates)
Definition: adaptive.cpp:273
FILE * GetDataFilePtr() const
#define WordsInVectorOfSize(NumBits)
Definition: bitvec.h:63
void InitIntegerFX()
Definition: intfx.cpp:74
#define MAX_NUM_PROTOS
Definition: intproto.h:45
#define NULL
Definition: host.h:144
TessdataManager tessdata_manager
Definition: ccutil.h:71
STRING language_data_path_prefix
Definition: ccutil.h:70
bool classify_enable_adaptive_matcher
Definition: classify.h:372
INT_TEMPLATES Templates
Definition: adaptive.h:77
void EndAdaptiveClassifier()
Definition: adaptmatch.cpp:476
ShapeTable * shape_table_
Definition: classify.h:464
ADAPT_TEMPLATES AdaptedTemplates
Definition: classify.h:430
void Init(tesseract::IntParam *classify_debug_level, int classify_integer_matcher_multiplier)
Definition: intmatcher.cpp:696
bool SeekToStart(TessdataType tessdata_type)
#define set_all_bits(array, length)
Definition: bitvec.h:41
INT_TEMPLATES PreTrainedTemplates
Definition: classify.h:426
#define MAX_NUM_CLASSES
Definition: matchdefs.h:31
BIT_VECTOR AllConfigsOff
Definition: classify.h:437
const char * string() const
Definition: strngs.cpp:156
INT_TEMPLATES ReadIntTemplates(FILE *File)
Definition: intproto.cpp:786
inT64 GetEndOffset(TessdataType tessdata_type) const
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:41
bool DeSerialize(bool swap, FILE *fp)
Definition: shapetable.cpp:188
UNICHARSET unicharset
Definition: ccutil.h:72
Definition: strngs.h:40
#define ADAPT_TEMPLATE_SUFFIX
Definition: adaptmatch.cpp:68
BIT_VECTOR NewBitVector(int NumBits)
Definition: bitvec.cpp:111
STRING imagefile
Definition: ccutil.h:74
bool classify_use_pre_adapted_templates
Definition: classify.h:374
BIT_VECTOR TempProtoMask
Definition: classify.h:438
void cprintf(const char *format,...)
Definition: callcpp.cpp:41
#define ASSERT_HOST(x)
Definition: errcode.h:84
ADAPT_TEMPLATES ReadAdaptedTemplates(FILE *File)
Definition: adaptive.cpp:371
BIT_VECTOR PrunedProtos
Definition: classify.h:434
NORM_PROTOS * NormProtos
Definition: classify.h:441
BIT_VECTOR AllConfigsOn
Definition: classify.h:435
void tesseract::Classify::LearnPieces ( const char *  filename,
int  start,
int  length,
float  threshold,
CharSegmentationType  segmentation,
const char *  correct_text,
WERD_RES word 
)

Definition at line 394 of file adaptmatch.cpp.

396  {
397  // TODO(daria) Remove/modify this if/when we want
398  // to train and/or adapt to n-grams.
399  if (segmentation != CST_WHOLE &&
400  (segmentation != CST_FRAGMENT || disable_character_fragments))
401  return;
402 
403  if (length > 1) {
405  start, start + length - 1);
406  }
407  TBLOB* blob = word->chopped_word->blobs;
408  for (int i = 0; i < start; ++i)
409  blob = blob->next;
410  // Rotate the blob if needed for classification.
411  const DENORM* denorm = &word->denorm;
412  TBLOB* rotated_blob = blob->ClassifyNormalizeIfNeeded(&denorm);
413  if (rotated_blob == NULL)
414  rotated_blob = blob;
415 
416  #ifndef GRAPHICS_DISABLED
417  // Draw debug windows showing the blob that is being learned if needed.
418  if (strcmp(classify_learn_debug_str.string(), correct_text) == 0) {
419  RefreshDebugWindow(&learn_debug_win_, "LearnPieces", 600,
420  word->chopped_word->bounding_box());
421  rotated_blob->plot(learn_debug_win_, ScrollView::GREEN, ScrollView::BROWN);
422  learn_debug_win_->Update();
423  window_wait(learn_debug_win_);
424  }
425  if (classify_debug_character_fragments && segmentation == CST_FRAGMENT) {
426  ASSERT_HOST(learn_fragments_debug_win_ != NULL); // set up in LearnWord
427  blob->plot(learn_fragments_debug_win_,
429  learn_fragments_debug_win_->Update();
430  }
431  #endif // GRAPHICS_DISABLED
432 
433  if (filename != NULL) {
434  classify_norm_method.set_value(character); // force char norm spc 30/11/93
435  tess_bn_matching.set_value(false); // turn it off
436  tess_cn_matching.set_value(false);
437  LearnBlob(feature_defs_, filename, rotated_blob, *denorm,
438  correct_text);
439  } else if (unicharset.contains_unichar(correct_text)) {
440  UNICHAR_ID class_id = unicharset.unichar_to_id(correct_text);
441  int font_id = word->fontinfo != NULL
442  ? fontinfo_table_.get_id(*word->fontinfo)
443  : 0;
445  tprintf("Adapting to char = %s, thr= %g font_id= %d\n",
446  unicharset.id_to_unichar(class_id), threshold, font_id);
447  // If filename is not NULL we are doing recognition
448  // (as opposed to training), so we must have already set word fonts.
449  AdaptToChar(rotated_blob, *denorm, class_id, font_id, threshold);
450  } else if (classify_debug_level >= 1) {
451  tprintf("Can't adapt to %s not in unicharset\n", correct_text);
452  }
453  if (rotated_blob != blob) {
454  delete rotated_blob;
455  delete denorm;
456  }
457 
458  break_pieces(blob, word->seam_array, start, start + length - 1);
459 } // LearnPieces.
const char *const id_to_unichar(UNICHAR_ID id) const
Definition: unicharset.cpp:233
int UNICHAR_ID
Definition: unichar.h:31
const UNICHAR_ID unichar_to_id(const char *const unichar_repr) const
Definition: unicharset.cpp:176
const FontInfo * fontinfo
Definition: pageres.h:424
void RefreshDebugWindow(ScrollView **win, const char *msg, int y_offset, const TBOX &wbox)
Definition: adaptmatch.cpp:228
void break_pieces(TBLOB *blobs, SEAMS seams, inT16 start, inT16 end)
Definition: seam.cpp:535
char * classify_learn_debug_str
Definition: classify.h:416
#define NULL
Definition: host.h:144
TBLOB * blobs
Definition: blobs.h:274
char window_wait(ScrollView *win)
Definition: callcpp.cpp:112
void join_pieces(TBLOB *piece_blobs, SEAMS seams, inT16 start, inT16 end)
Definition: seam.cpp:564
SEAMS seam_array
Definition: pageres.h:358
static void Update()
Definition: scrollview.cpp:710
bool disable_character_fragments
Definition: classify.h:407
Definition: blobs.h:174
UnicityTable< FontInfo > fontinfo_table_
Definition: classify.h:443
void AdaptToChar(TBLOB *Blob, const DENORM &denorm, CLASS_ID ClassId, int FontinfoId, FLOAT32 Threshold)
Definition: adaptmatch.cpp:933
bool contains_unichar(const char *const unichar_repr) const
Definition: unicharset.cpp:543
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:41
DENORM denorm
Definition: pageres.h:346
TBOX bounding_box() const
Definition: blobs.cpp:483
UNICHARSET unicharset
Definition: ccutil.h:72
int classify_learning_debug_level
Definition: classify.h:380
TWERD * chopped_word
Definition: pageres.h:357
void LearnBlob(const FEATURE_DEFS_STRUCT &FeatureDefs, const STRING &filename, TBLOB *Blob, const DENORM &denorm, const char *BlobText)
Definition: blobclass.cpp:52
void plot(ScrollView *window, ScrollView::Color color, ScrollView::Color child_color)
Definition: blobs.cpp:396
#define ASSERT_HOST(x)
Definition: errcode.h:84
TBLOB * ClassifyNormalizeIfNeeded(const DENORM **denorm) const
Definition: blobs.cpp:281
TBLOB * next
Definition: blobs.h:228
bool classify_debug_character_fragments
Definition: classify.h:412
FEATURE_DEFS_STRUCT feature_defs_
Definition: classify.h:459
void tesseract::Classify::LearnWord ( const char *  filename,
const char *  rejmap,
WERD_RES word 
)

Definition at line 254 of file adaptmatch.cpp.

255  {
256  int word_len = word->correct_text.size();
257  if (word_len == 0) return;
258 
259  float* thresholds = NULL;
260  if (filename == NULL) {
261  // Adaption mode.
262  if (!EnableLearning || word->best_choice == NULL ||
263  // If word->best_choice is not recorded at the top of accumulator's
264  // best choices (which could happen for choices that are
265  // altered with ReplaceAmbig()) we skip the adaption.
266  !getDict().CurrentBestChoiceIs(*(word->best_choice)))
267  return; // Can't or won't adapt.
268 
269  NumWordsAdaptedTo++;
271  tprintf("\n\nAdapting to word = %s\n",
272  word->best_choice->debug_string().string());
273  thresholds = new float[word_len];
274  GetAdaptThresholds(word->rebuild_word, word->denorm, *word->best_choice,
275  *word->raw_choice, thresholds);
276  }
277  int start_blob = 0;
278  char prev_map_char = '0';
279 
280  #ifndef GRAPHICS_DISABLED
282  if (learn_fragmented_word_debug_win_ != NULL) {
283  window_wait(learn_fragmented_word_debug_win_);
284  }
285  RefreshDebugWindow(&learn_fragments_debug_win_, "LearnPieces", 400,
286  word->chopped_word->bounding_box());
287  RefreshDebugWindow(&learn_fragmented_word_debug_win_, "LearnWord", 200,
288  word->chopped_word->bounding_box());
289  word->chopped_word->plot(learn_fragmented_word_debug_win_);
291  }
292  #endif // GRAPHICS_DISABLED
293 
294  for (int ch = 0; ch < word_len; ++ch) {
296  tprintf("\nLearning %s\n", word->correct_text[ch].string());
297  }
298  char rej_map_char = rejmap != NULL ? *rejmap++ : '1';
299 
300  if (word->correct_text[ch].length() > 0 && rej_map_char == '1') {
301  float threshold = thresholds != NULL ? thresholds[ch] : 0.0f;
302 
303  LearnPieces(filename, start_blob, word->best_state[ch],
304  threshold, CST_WHOLE, word->correct_text[ch].string(), word);
305 
306  if (word->best_state[ch] > 1 && !disable_character_fragments) {
307  // Check that the character breaks into meaningful fragments
308  // that each match a whole character with at least
309  // classify_character_fragments_garbage_certainty_threshold
310  bool garbage = false;
311  TBLOB* frag_blob = word->chopped_word->blobs;
312  for (int i = 0; i < start_blob; ++i) frag_blob = frag_blob->next;
313  int frag;
314  for (frag = 0; frag < word->best_state[ch]; ++frag) {
316  garbage |= LooksLikeGarbage(word->denorm, frag_blob);
317  }
318  frag_blob = frag_blob->next;
319  }
320  // Learn the fragments.
321  if (!garbage) {
322  bool pieces_all_natural = word->PiecesAllNatural(start_blob,
323  word->best_state[ch]);
324  if (pieces_all_natural || !prioritize_division) {
325  for (frag = 0; frag < word->best_state[ch]; ++frag) {
326  GenericVector<STRING> tokens;
327  word->correct_text[ch].split(' ', &tokens);
328 
329  tokens[0] = CHAR_FRAGMENT::to_string(
330  tokens[0].string(), frag, word->best_state[ch],
331  pieces_all_natural);
332 
333  STRING full_string;
334  for (int i = 0; i < tokens.size(); i++) {
335  full_string += tokens[i];
336  if (i != tokens.size() - 1)
337  full_string += ' ';
338  }
339  LearnPieces(filename, start_blob + frag, 1,
340  threshold, CST_FRAGMENT, full_string.string(), word);
341  }
342  }
343  }
344  }
345 
346  // TODO(rays): re-enable this part of the code when we switch to the
347  // new classifier that needs to see examples of garbage.
348  /*
349  char next_map_char = ch + 1 < word_len
350  ? (rejmap != NULL ? *rejmap : '1')
351  : '0';
352  if (word->best_state[ch] > 1) {
353  // If the next blob is good, make junk with the rightmost fragment.
354  if (ch + 1 < word_len && word->correct_text[ch + 1].length() > 0 &&
355  next_map_char == '1') {
356  LearnPieces(filename, start_blob + word->best_state[ch] - 1,
357  word->best_state[ch + 1] + 1,
358  threshold, CST_IMPROPER, INVALID_UNICHAR, word);
359  }
360  // If the previous blob is good, make junk with the leftmost fragment.
361  if (ch > 0 && word->correct_text[ch - 1].length() > 0 &&
362  prev_map_char == '1') {
363  LearnPieces(filename, start_blob - word->best_state[ch - 1],
364  word->best_state[ch - 1] + 1,
365  threshold, CST_IMPROPER, INVALID_UNICHAR, word);
366  }
367  }
368  // If the next blob is good, make a join with it.
369  if (ch + 1 < word_len && word->correct_text[ch + 1].length() > 0 &&
370  next_map_char == '1') {
371  STRING joined_text = word->correct_text[ch];
372  joined_text += word->correct_text[ch + 1];
373  LearnPieces(filename, start_blob,
374  word->best_state[ch] + word->best_state[ch + 1],
375  threshold, CST_NGRAM, joined_text.string(), word);
376  }
377  */
378  }
379  start_blob += word->best_state[ch];
380  prev_map_char = rej_map_char;
381  }
382  delete [] thresholds;
383 } // LearnWord.
TWERD * rebuild_word
Definition: pageres.h:381
const STRING debug_string() const
Definition: ratngs.h:373
void GetAdaptThresholds(TWERD *Word, const DENORM &denorm, const WERD_CHOICE &BestChoice, const WERD_CHOICE &BestRawChoice, FLOAT32 Thresholds[])
void LearnPieces(const char *filename, int start, int length, float threshold, CharSegmentationType segmentation, const char *correct_text, WERD_RES *word)
Definition: adaptmatch.cpp:394
void RefreshDebugWindow(ScrollView **win, const char *msg, int y_offset, const TBOX &wbox)
Definition: adaptmatch.cpp:228
bool LooksLikeGarbage(const DENORM &denorm, TBLOB *blob)
STRING to_string() const
Definition: unicharset.h:61
#define NULL
Definition: host.h:144
GenericVector< int > best_state
Definition: pageres.h:392
TBLOB * blobs
Definition: blobs.h:274
char window_wait(ScrollView *win)
Definition: callcpp.cpp:112
Dict & getDict()
Definition: classify.h:62
double classify_character_fragments_garbage_certainty_threshold
Definition: classify.h:410
static void Update()
Definition: scrollview.cpp:710
bool PiecesAllNatural(int start, int count) const
Definition: pageres.cpp:733
bool disable_character_fragments
Definition: classify.h:407
Definition: blobs.h:174
const char * string() const
Definition: strngs.cpp:156
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:41
DENORM denorm
Definition: pageres.h:346
WERD_CHOICE * raw_choice
Definition: pageres.h:360
TBOX bounding_box() const
Definition: blobs.cpp:483
Definition: strngs.h:40
int classify_learning_debug_level
Definition: classify.h:380
int size() const
Definition: genericvector.h:59
void plot(ScrollView *window)
Definition: blobs.cpp:522
int length() const
Definition: genericvector.h:63
GenericVector< STRING > correct_text
Definition: pageres.h:396
TWERD * chopped_word
Definition: pageres.h:357
bool prioritize_division
Definition: classify.h:354
TBLOB * next
Definition: blobs.h:228
bool classify_debug_character_fragments
Definition: classify.h:412
WERD_CHOICE * best_choice
Definition: pageres.h:359
bool tesseract::Classify::LooksLikeGarbage ( const DENORM denorm,
TBLOB blob 
)

Definition at line 1991 of file adaptmatch.cpp.

1991  {
1992  BLOB_CHOICE_LIST *ratings = new BLOB_CHOICE_LIST();
1993  AdaptiveClassifier(blob, denorm, ratings, NULL);
1994  BLOB_CHOICE_IT ratings_it(ratings);
1997  print_ratings_list("======================\nLooksLikeGarbage() got ",
1998  ratings, unicharset);
1999  }
2000  for (ratings_it.mark_cycle_pt(); !ratings_it.cycled_list();
2001  ratings_it.forward()) {
2002  if (unicharset.get_fragment(ratings_it.data()->unichar_id()) != NULL) {
2003  continue;
2004  }
2005  delete ratings;
2006  return (ratings_it.data()->certainty() <
2008  }
2009  delete ratings;
2010  return true; // no whole characters in ratings
2011 }
void AdaptiveClassifier(TBLOB *Blob, const DENORM &denorm, BLOB_CHOICE_LIST *Choices, CLASS_PRUNER_RESULTS cp_results)
Definition: adaptmatch.cpp:178
#define NULL
Definition: host.h:144
const UNICHARSET & getUnicharset() const
Definition: dict.h:100
Dict & getDict()
Definition: classify.h:62
double classify_character_fragments_garbage_certainty_threshold
Definition: classify.h:410
void print_ratings_list(const char *msg, BLOB_CHOICE_LIST *ratings, const UNICHARSET &current_unicharset)
Definition: ratngs.cpp:511
const CHAR_FRAGMENT * get_fragment(UNICHAR_ID unichar_id) const
Definition: unicharset.h:610
UNICHARSET unicharset
Definition: ccutil.h:72
bool classify_debug_character_fragments
Definition: classify.h:412
int tesseract::Classify::MakeNewTemporaryConfig ( ADAPT_TEMPLATES  Templates,
CLASS_ID  ClassId,
int  FontinfoId,
int  NumFeatures,
INT_FEATURE_ARRAY  Features,
FEATURE_SET  FloatFeatures 
)
Parameters
Templatesadapted templates to add new config to
ClassIdclass id to associate with new config
FontinfoIdfont information inferred from pre-trained templates
NumFeaturesnumber of features in IntFeatures
Featuresfeatures describing model for new config
FloatFeaturesfloating-pt representation of features
Returns
The id of the new config created, a negative integer in case of error.
Note
Exceptions: none
History: Fri Mar 15 08:49:46 1991, DSJ, Created.

Definition at line 2136 of file adaptmatch.cpp.

2141  {
2142  INT_CLASS IClass;
2143  ADAPT_CLASS Class;
2144  PROTO_ID OldProtos[MAX_NUM_PROTOS];
2145  FEATURE_ID BadFeatures[MAX_NUM_INT_FEATURES];
2146  int NumOldProtos;
2147  int NumBadFeatures;
2148  int MaxProtoId, OldMaxProtoId;
2149  int BlobLength = 0;
2150  int MaskSize;
2151  int ConfigId;
2153  int i;
2154  int debug_level = NO_DEBUG;
2155 
2157  debug_level =
2159 
2160  IClass = ClassForClassId(Templates->Templates, ClassId);
2161  Class = Templates->Class[ClassId];
2162 
2163  if (IClass->NumConfigs >= MAX_NUM_CONFIGS) {
2164  ++NumAdaptationsFailed;
2166  cprintf("Cannot make new temporary config: maximum number exceeded.\n");
2167  return -1;
2168  }
2169 
2170  OldMaxProtoId = IClass->NumProtos - 1;
2171 
2172  NumOldProtos = im_.FindGoodProtos(IClass, AllProtosOn, AllConfigsOff,
2173  BlobLength, NumFeatures, Features,
2174  OldProtos, classify_adapt_proto_threshold,
2175  debug_level);
2176 
2177  MaskSize = WordsInVectorOfSize(MAX_NUM_PROTOS);
2178  zero_all_bits(TempProtoMask, MaskSize);
2179  for (i = 0; i < NumOldProtos; i++)
2180  SET_BIT(TempProtoMask, OldProtos[i]);
2181 
2182  NumBadFeatures = im_.FindBadFeatures(IClass, TempProtoMask, AllConfigsOn,
2183  BlobLength, NumFeatures, Features,
2184  BadFeatures,
2186  debug_level);
2187 
2188  MaxProtoId = MakeNewTempProtos(FloatFeatures, NumBadFeatures, BadFeatures,
2189  IClass, Class, TempProtoMask);
2190  if (MaxProtoId == NO_PROTO) {
2191  ++NumAdaptationsFailed;
2193  cprintf("Cannot make new temp protos: maximum number exceeded.\n");
2194  return -1;
2195  }
2196 
2197  ConfigId = AddIntConfig(IClass);
2198  ConvertConfig(TempProtoMask, ConfigId, IClass);
2199  Config = NewTempConfig(MaxProtoId, FontinfoId);
2200  TempConfigFor(Class, ConfigId) = Config;
2202 
2204  cprintf("Making new temp config %d fontinfo id %d"
2205  " using %d old and %d new protos.\n",
2206  ConfigId, Config->FontinfoId,
2207  NumOldProtos, MaxProtoId - OldMaxProtoId);
2208 
2209  return ConfigId;
2210 } /* MakeNewTemporaryConfig */
BIT_VECTOR AllProtosOn
Definition: classify.h:433
#define MAX_NUM_CONFIGS
Definition: intproto.h:44
void ConvertConfig(BIT_VECTOR Config, int ConfigId, INT_CLASS Class)
Definition: intproto.cpp:494
IntegerMatcher im_
Definition: classify.h:455
int classify_adapt_feature_threshold
Definition: classify.h:404
#define ClassForClassId(T, c)
Definition: intproto.h:173
inT16 PROTO_ID
Definition: matchdefs.h:41
#define zero_all_bits(array, length)
Definition: bitvec.h:33
int AddIntConfig(INT_CLASS Class)
Definition: intproto.cpp:257
#define WordsInVectorOfSize(NumBits)
Definition: bitvec.h:63
int classify_adapt_proto_threshold
Definition: classify.h:402
#define MAX_NUM_PROTOS
Definition: intproto.h:45
uinT8 NumConfigs
Definition: intproto.h:108
INT_TEMPLATES Templates
Definition: adaptive.h:77
uinT8 FEATURE_ID
Definition: matchdefs.h:47
CLUSTERCONFIG Config
int FindBadFeatures(INT_CLASS ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask, uinT16 BlobLength, inT16 NumFeatures, INT_FEATURE_ARRAY Features, FEATURE_ID *FeatureArray, int AdaptFeatureThreshold, int Debug)
Definition: intmatcher.cpp:625
PROTO_ID MakeNewTempProtos(FEATURE_SET Features, int NumBadFeat, FEATURE_ID BadFeat[], INT_CLASS IClass, ADAPT_CLASS Class, BIT_VECTOR TempProtoMask)
#define copy_all_bits(source, dest, length)
Definition: bitvec.h:49
uinT8 ProtoVectorSize
Definition: adaptive.h:42
TEMP_CONFIG NewTempConfig(int MaxProtoId, int FontinfoId)
Definition: adaptive.cpp:223
ADAPT_CLASS Class[MAX_NUM_CLASSES]
Definition: adaptive.h:81
#define TempConfigFor(Class, ConfigId)
Definition: adaptive.h:102
BIT_VECTOR AllConfigsOff
Definition: classify.h:437
#define MAX_NUM_INT_FEATURES
Definition: baseapi.h:63
BIT_VECTOR Protos
Definition: adaptive.h:45
#define NO_PROTO
Definition: matchdefs.h:42
int FindGoodProtos(INT_CLASS ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask, uinT16 BlobLength, inT16 NumFeatures, INT_FEATURE_ARRAY Features, PROTO_ID *ProtoArray, int AdaptProtoThreshold, int Debug)
Definition: intmatcher.cpp:546
int classify_learning_debug_level
Definition: classify.h:380
#define PRINT_PROTO_MATCHES
Definition: intproto.h:186
#define PRINT_MATCH_SUMMARY
Definition: intproto.h:182
uinT16 NumProtos
Definition: intproto.h:106
#define SET_BIT(array, bit)
Definition: bitvec.h:57
BIT_VECTOR TempProtoMask
Definition: classify.h:438
void cprintf(const char *format,...)
Definition: callcpp.cpp:41
#define PRINT_FEATURE_MATCHES
Definition: intproto.h:185
BIT_VECTOR AllConfigsOn
Definition: classify.h:435
#define NO_DEBUG
Definition: adaptmatch.cpp:72
PROTO_ID tesseract::Classify::MakeNewTempProtos ( FEATURE_SET  Features,
int  NumBadFeat,
FEATURE_ID  BadFeat[],
INT_CLASS  IClass,
ADAPT_CLASS  Class,
BIT_VECTOR  TempProtoMask 
)

This routine finds sets of sequential bad features that all have the same angle and converts each set into a new temporary proto. The temp proto is added to the proto pruner for IClass, pushed onto the list of temp protos in Class, and added to TempProtoMask.

Parameters
Featuresfloating-pt features describing new character
NumBadFeatnumber of bad features to turn into protos
BadFeatfeature id's of bad features
IClassinteger class templates to add new protos to
Classadapted class templates to add new protos to
TempProtoMaskproto mask to add new protos to

Globals: none

Returns
Max proto id in class after all protos have been added. Exceptions: none History: Fri Mar 15 11:39:38 1991, DSJ, Created.

Definition at line 2233 of file adaptmatch.cpp.

2238  {
2239  FEATURE_ID *ProtoStart;
2240  FEATURE_ID *ProtoEnd;
2241  FEATURE_ID *LastBad;
2242  TEMP_PROTO TempProto;
2243  PROTO Proto;
2244  FEATURE F1, F2;
2245  FLOAT32 X1, X2, Y1, Y2;
2246  FLOAT32 A1, A2, AngleDelta;
2247  FLOAT32 SegmentLength;
2248  PROTO_ID Pid;
2249 
2250  for (ProtoStart = BadFeat, LastBad = ProtoStart + NumBadFeat;
2251  ProtoStart < LastBad; ProtoStart = ProtoEnd) {
2252  F1 = Features->Features[*ProtoStart];
2253  X1 = F1->Params[PicoFeatX];
2254  Y1 = F1->Params[PicoFeatY];
2255  A1 = F1->Params[PicoFeatDir];
2256 
2257  for (ProtoEnd = ProtoStart + 1,
2258  SegmentLength = GetPicoFeatureLength();
2259  ProtoEnd < LastBad;
2260  ProtoEnd++, SegmentLength += GetPicoFeatureLength()) {
2261  F2 = Features->Features[*ProtoEnd];
2262  X2 = F2->Params[PicoFeatX];
2263  Y2 = F2->Params[PicoFeatY];
2264  A2 = F2->Params[PicoFeatDir];
2265 
2266  AngleDelta = fabs(A1 - A2);
2267  if (AngleDelta > 0.5)
2268  AngleDelta = 1.0 - AngleDelta;
2269 
2270  if (AngleDelta > matcher_clustering_max_angle_delta ||
2271  fabs(X1 - X2) > SegmentLength ||
2272  fabs(Y1 - Y2) > SegmentLength)
2273  break;
2274  }
2275 
2276  F2 = Features->Features[*(ProtoEnd - 1)];
2277  X2 = F2->Params[PicoFeatX];
2278  Y2 = F2->Params[PicoFeatY];
2279  A2 = F2->Params[PicoFeatDir];
2280 
2281  Pid = AddIntProto(IClass);
2282  if (Pid == NO_PROTO)
2283  return (NO_PROTO);
2284 
2285  TempProto = NewTempProto();
2286  Proto = &(TempProto->Proto);
2287 
2288  /* compute proto params - NOTE that Y_DIM_OFFSET must be used because
2289  ConvertProto assumes that the Y dimension varies from -0.5 to 0.5
2290  instead of the -0.25 to 0.75 used in baseline normalization */
2291  Proto->Length = SegmentLength;
2292  Proto->Angle = A1;
2293  Proto->X = (X1 + X2) / 2.0;
2294  Proto->Y = (Y1 + Y2) / 2.0 - Y_DIM_OFFSET;
2295  FillABC(Proto);
2296 
2297  TempProto->ProtoId = Pid;
2298  SET_BIT(TempProtoMask, Pid);
2299 
2300  ConvertProto(Proto, Pid, IClass);
2301  AddProtoToProtoPruner(Proto, Pid, IClass,
2303 
2304  Class->TempProtos = push(Class->TempProtos, TempProto);
2305  }
2306  return IClass->NumProtos - 1;
2307 } /* MakeNewTempProtos */
PROTO_STRUCT Proto
Definition: adaptive.h:32
inT16 PROTO_ID
Definition: matchdefs.h:41
FLOAT32 Length
Definition: protos.h:50
LIST push(LIST list, void *element)
Definition: oldlist.cpp:323
double matcher_clustering_max_angle_delta
Definition: classify.h:393
FLOAT32 Y
Definition: protos.h:48
TEMP_PROTO NewTempProto()
Definition: adaptive.cpp:254
float FLOAT32
Definition: host.h:111
uinT8 FEATURE_ID
Definition: matchdefs.h:47
uinT16 ProtoId
Definition: adaptive.h:30
FEATURE Features[1]
Definition: ocrfeatures.h:71
FLOAT32 Angle
Definition: protos.h:49
void ConvertProto(PROTO Proto, int ProtoId, INT_CLASS Class)
Definition: intproto.cpp:528
FLOAT32 Params[1]
Definition: ocrfeatures.h:64
#define Y_DIM_OFFSET
Definition: adaptmatch.cpp:77
FLOAT32 X
Definition: protos.h:47
#define NO_PROTO
Definition: matchdefs.h:42
int classify_learning_debug_level
Definition: classify.h:380
int AddIntProto(INT_CLASS Class)
Definition: intproto.cpp:281
uinT16 NumProtos
Definition: intproto.h:106
#define GetPicoFeatureLength()
Definition: picofeat.h:59
#define SET_BIT(array, bit)
Definition: bitvec.h:57
BIT_VECTOR TempProtoMask
Definition: classify.h:438
void FillABC(PROTO Proto)
Definition: protos.cpp:198
void AddProtoToProtoPruner(PROTO Proto, int ProtoId, INT_CLASS Class, bool debug)
Definition: intproto.cpp:364
void tesseract::Classify::MakePermanent ( ADAPT_TEMPLATES  Templates,
CLASS_ID  ClassId,
int  ConfigId,
const DENORM denorm,
TBLOB Blob 
)
Parameters
Templatescurrent set of adaptive templates
ClassIdclass containing config to be made permanent
ConfigIdconfig to be made permanent
denormnormalization/denormalization parameters
Blobcurrent blob being adapted to

Globals: none

Note
Exceptions: none
History: Thu Mar 14 15:54:08 1991, DSJ, Created.

Definition at line 2323 of file adaptmatch.cpp.

2327  {
2328  UNICHAR_ID *Ambigs;
2330  ADAPT_CLASS Class;
2331  PROTO_KEY ProtoKey;
2332 
2333  Class = Templates->Class[ClassId];
2334  Config = TempConfigFor(Class, ConfigId);
2335 
2336  MakeConfigPermanent(Class, ConfigId);
2337  if (Class->NumPermConfigs == 0)
2338  Templates->NumPermClasses++;
2339  Class->NumPermConfigs++;
2340 
2341  // Initialize permanent config.
2342  Ambigs = GetAmbiguities(Blob, denorm, ClassId);
2344  "PERM_CONFIG_STRUCT");
2345  Perm->Ambigs = Ambigs;
2346  Perm->FontinfoId = Config->FontinfoId;
2347 
2348  // Free memory associated with temporary config (since ADAPTED_CONFIG
2349  // is a union we need to clean up before we record permanent config).
2350  ProtoKey.Templates = Templates;
2351  ProtoKey.ClassId = ClassId;
2352  ProtoKey.ConfigId = ConfigId;
2353  Class->TempProtos = delete_d(Class->TempProtos, &ProtoKey, MakeTempProtoPerm);
2354  FreeTempConfig(Config);
2355 
2356  // Record permanent config.
2357  PermConfigFor(Class, ConfigId) = Perm;
2358 
2359  if (classify_learning_debug_level >= 1) {
2360  tprintf("Making config %d for %s (ClassId %d) permanent:"
2361  " fontinfo id %d, ambiguities '",
2362  ConfigId, getDict().getUnicharset().debug_str(ClassId).string(),
2363  ClassId, PermConfigFor(Class, ConfigId)->FontinfoId);
2364  for (UNICHAR_ID *AmbigsPointer = Ambigs;
2365  *AmbigsPointer >= 0; ++AmbigsPointer)
2366  tprintf("%s", unicharset.id_to_unichar(*AmbigsPointer));
2367  tprintf("'.\n");
2368  }
2369 } /* MakePermanent */
const char *const id_to_unichar(UNICHAR_ID id) const
Definition: unicharset.cpp:233
int UNICHAR_ID
Definition: unichar.h:31
#define PermConfigFor(Class, ConfigId)
Definition: adaptive.h:105
LIST delete_d(LIST list, void *key, int_compare is_equal)
Definition: oldlist.cpp:125
UNICHAR_ID * Ambigs
Definition: adaptive.h:52
CLASS_ID ClassId
Definition: adaptmatch.cpp:117
ADAPT_TEMPLATES Templates
Definition: adaptmatch.cpp:116
PERM_CONFIG_STRUCT * PERM_CONFIG
Definition: adaptive.h:55
CLUSTERCONFIG Config
Dict & getDict()
Definition: classify.h:62
#define MakeConfigPermanent(Class, ConfigId)
Definition: adaptive.h:96
ADAPT_CLASS Class[MAX_NUM_CLASSES]
Definition: adaptive.h:81
uinT8 NumPermConfigs
Definition: adaptive.h:65
#define TempConfigFor(Class, ConfigId)
Definition: adaptive.h:102
int MakeTempProtoPerm(void *item1, void *item2)
void FreeTempConfig(TEMP_CONFIG Config)
Definition: adaptive.cpp:80
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:41
UNICHARSET unicharset
Definition: ccutil.h:72
int classify_learning_debug_level
Definition: classify.h:380
void * alloc_struct(inT32 count, const char *)
Definition: memry.cpp:40
UNICHAR_ID * GetAmbiguities(TBLOB *Blob, const DENORM &denorm, CLASS_ID CorrectClass)
void tesseract::Classify::MasterMatcher ( INT_TEMPLATES  templates,
inT16  num_features,
const INT_FEATURE_STRUCT features,
const uinT8 norm_factors,
ADAPT_CLASS classes,
int  debug,
int  num_classes,
const TBOX blob_box,
CLASS_PRUNER_RESULTS  results,
ADAPT_RESULTS final_results 
)

Factored-out calls to IntegerMatcher based on class pruner results. Returns integer matcher results inside CLASS_PRUNER_RESULTS structure.

Definition at line 1257 of file adaptmatch.cpp.

1266  {
1267  int top = blob_box.top();
1268  int bottom = blob_box.bottom();
1269  for (int c = 0; c < num_classes; c++) {
1270  CLASS_ID class_id = results[c].Class;
1271  INT_RESULT_STRUCT& int_result = results[c].IMResult;
1272  BIT_VECTOR protos = classes != NULL ? classes[class_id]->PermProtos
1273  : AllProtosOn;
1274  BIT_VECTOR configs = classes != NULL ? classes[class_id]->PermConfigs
1275  : AllConfigsOn;
1276 
1277  im_.Match(ClassForClassId(templates, class_id),
1278  protos, configs,
1279  num_features, features,
1280  &int_result, classify_adapt_feature_threshold, debug,
1282  bool debug = matcher_debug_level >= 2 || classify_debug_level > 1;
1283  ExpandShapesAndApplyCorrections(classes, debug, class_id, bottom, top,
1284  results[c].Rating,
1285  final_results->BlobLength, norm_factors,
1286  int_result, final_results);
1287  }
1288 }
BIT_VECTOR AllProtosOn
Definition: classify.h:433
IntegerMatcher im_
Definition: classify.h:455
int classify_adapt_feature_threshold
Definition: classify.h:404
#define ClassForClassId(T, c)
Definition: intproto.h:173
#define NULL
Definition: host.h:144
void Match(INT_CLASS ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask, inT16 NumFeatures, const INT_FEATURE_STRUCT *Features, INT_RESULT Result, int AdaptFeatureThreshold, int Debug, bool SeparateDebugWindows)
Definition: intmatcher.cpp:460
uinT32 * BIT_VECTOR
Definition: bitvec.h:28
inT16 top() const
Definition: rect.h:53
BIT_VECTOR PermProtos
Definition: adaptive.h:68
bool matcher_debug_separate_windows
Definition: classify.h:415
void ExpandShapesAndApplyCorrections(ADAPT_CLASS *classes, bool debug, int class_id, int bottom, int top, float cp_rating, int blob_length, const uinT8 *cn_factors, INT_RESULT_STRUCT &int_result, ADAPT_RESULTS *final_results)
inT32 BlobLength
Definition: adaptmatch.cpp:92
BIT_VECTOR PermConfigs
Definition: adaptive.h:69
BIT_VECTOR AllConfigsOn
Definition: classify.h:435
UNICHAR_ID CLASS_ID
Definition: matchdefs.h:35
inT16 bottom() const
Definition: rect.h:60
ADAPT_TEMPLATES tesseract::Classify::NewAdaptedTemplates ( bool  InitFromUnicharset)

Allocates memory for adapted tempates. each char in unicharset to the newly created templates

Parameters
InitFromUnicharsetif true, add an empty class for
Returns
Ptr to new adapted templates.
Note
Globals: none
Exceptions: none
History: Fri Mar 8 10:15:28 1991, DSJ, Created.

Definition at line 167 of file adaptive.cpp.

167  {
168  ADAPT_TEMPLATES Templates;
169  int i;
170 
171  Templates = (ADAPT_TEMPLATES) Emalloc (sizeof (ADAPT_TEMPLATES_STRUCT));
172 
173  Templates->Templates = NewIntTemplates ();
174  Templates->NumPermClasses = 0;
175  Templates->NumNonEmptyClasses = 0;
176 
177  /* Insert an empty class for each unichar id in unicharset */
178  for (i = 0; i < MAX_NUM_CLASSES; i++) {
179  Templates->Class[i] = NULL;
180  if (InitFromUnicharset && i < unicharset.size()) {
181  AddAdaptedClass(Templates, NewAdaptedClass(), i);
182  }
183  }
184 
185  return (Templates);
186 
187 } /* NewAdaptedTemplates */
int size() const
Definition: unicharset.h:264
ADAPT_TEMPLATES_STRUCT * ADAPT_TEMPLATES
Definition: adaptive.h:83
#define NULL
Definition: host.h:144
INT_TEMPLATES Templates
Definition: adaptive.h:77
void AddAdaptedClass(ADAPT_TEMPLATES Templates, ADAPT_CLASS Class, CLASS_ID ClassId)
Definition: adaptive.cpp:49
ADAPT_CLASS NewAdaptedClass()
Definition: adaptive.cpp:113
INT_TEMPLATES NewIntTemplates()
Definition: intproto.cpp:749
ADAPT_CLASS Class[MAX_NUM_CLASSES]
Definition: adaptive.h:81
#define MAX_NUM_CLASSES
Definition: matchdefs.h:31
void * Emalloc(size_t Size)
Definition: emalloc.cpp:35
UNICHARSET unicharset
Definition: ccutil.h:72
void tesseract::Classify::NormalizeOutlines ( LIST  Outlines,
FLOAT32 XScale,
FLOAT32 YScale 
)

Definition at line 346 of file mfoutline.cpp.

348  {
349 /*
350  ** Parameters:
351  ** Outlines list of outlines to be normalized
352  ** XScale x-direction scale factor used by routine
353  ** YScale y-direction scale factor used by routine
354  ** Globals:
355  ** classify_norm_method method being used for normalization
356  ** classify_char_norm_range map radius of gyration to this value
357  ** Operation: This routine normalizes every outline in Outlines
358  ** according to the currently selected normalization method.
359  ** It also returns the scale factors that it used to do this
360  ** scaling. The scale factors returned represent the x and
361  ** y sizes in the normalized coordinate system that correspond
362  ** to 1 pixel in the original coordinate system.
363  ** Return: none (Outlines are changed and XScale and YScale are updated)
364  ** Exceptions: none
365  ** History: Fri Dec 14 08:14:55 1990, DSJ, Created.
366  */
367  MFOUTLINE Outline;
368  OUTLINE_STATS OutlineStats;
369  FLOAT32 BaselineScale;
370 
371  switch (classify_norm_method) {
372  case character:
373  ComputeOutlineStats(Outlines, &OutlineStats);
374 
375  /* limit scale factor to avoid overscaling small blobs (.,`'),
376  thin blobs (l1ift), and merged blobs */
377  *XScale = *YScale = BaselineScale = MF_SCALE_FACTOR;
378  *XScale *= OutlineStats.Ry;
379  *YScale *= OutlineStats.Rx;
380  if (*XScale < classify_min_norm_scale_x)
381  *XScale = classify_min_norm_scale_x;
382  if (*YScale < classify_min_norm_scale_y)
383  *YScale = classify_min_norm_scale_y;
384  if (*XScale > classify_max_norm_scale_x &&
385  *YScale <= classify_max_norm_scale_y)
386  *XScale = classify_max_norm_scale_x;
387  *XScale = classify_char_norm_range * BaselineScale / *XScale;
388  *YScale = classify_char_norm_range * BaselineScale / *YScale;
389 
390  iterate(Outlines) {
391  Outline = (MFOUTLINE) first_node (Outlines);
392  CharNormalizeOutline (Outline,
393  OutlineStats.x, OutlineStats.y,
394  *XScale, *YScale);
395  }
396  break;
397 
398  case baseline:
399  iterate(Outlines) {
400  Outline = (MFOUTLINE) first_node(Outlines);
401  NormalizeOutline(Outline, 0.0);
402  }
403  *XScale = *YScale = MF_SCALE_FACTOR;
404  break;
405  }
406 } /* NormalizeOutlines */
FLOAT64 Ry
Definition: mfoutline.h:58
void CharNormalizeOutline(MFOUTLINE Outline, FLOAT32 XCenter, FLOAT32 YCenter, FLOAT32 XScale, FLOAT32 YScale)
Definition: mfoutline.cpp:439
double classify_max_norm_scale_y
Definition: classify.h:367
float FLOAT32
Definition: host.h:111
double classify_char_norm_range
Definition: classify.h:363
FLOAT64 y
Definition: mfoutline.h:56
void ComputeOutlineStats(LIST Outlines, OUTLINE_STATS *OutlineStats)
Definition: mfoutline.cpp:108
#define MF_SCALE_FACTOR
Definition: mfoutline.h:71
void NormalizeOutline(MFOUTLINE Outline, FLOAT32 XOrigin)
Definition: mfoutline.cpp:312
double classify_min_norm_scale_y
Definition: classify.h:366
double classify_min_norm_scale_x
Definition: classify.h:364
FLOAT64 Rx
Definition: mfoutline.h:58
LIST MFOUTLINE
Definition: mfoutline.h:33
FLOAT64 x
Definition: mfoutline.h:56
#define iterate(l)
Definition: oldlist.h:159
#define first_node(l)
Definition: oldlist.h:139
double classify_max_norm_scale_x
Definition: classify.h:365
void tesseract::Classify::PrintAdaptedTemplates ( FILE *  File,
ADAPT_TEMPLATES  Templates 
)

This routine prints a summary of the adapted templates in Templates to File.

Parameters
Fileopen text file to print Templates to
Templatesadapted templates to print to File
Note
Globals: none
Exceptions: none
History: Wed Mar 20 13:35:29 1991, DSJ, Created.

Definition at line 273 of file adaptive.cpp.

273  {
274  int i;
275  INT_CLASS IClass;
276  ADAPT_CLASS AClass;
277 
278  #ifndef SECURE_NAMES
279  fprintf (File, "\n\nSUMMARY OF ADAPTED TEMPLATES:\n\n");
280  fprintf (File, "Num classes = %d; Num permanent classes = %d\n\n",
281  Templates->NumNonEmptyClasses, Templates->NumPermClasses);
282  fprintf (File, " Id NC NPC NP NPP\n");
283  fprintf (File, "------------------------\n");
284 
285  for (i = 0; i < (Templates->Templates)->NumClasses; i++) {
286  IClass = Templates->Templates->Class[i];
287  AClass = Templates->Class[i];
288  if (!IsEmptyAdaptedClass (AClass)) {
289  fprintf (File, "%5d %s %3d %3d %3d %3d\n",
291  IClass->NumConfigs, AClass->NumPermConfigs,
292  IClass->NumProtos,
293  IClass->NumProtos - count (AClass->TempProtos));
294  }
295  }
296  #endif
297  fprintf (File, "\n");
298 
299 } /* PrintAdaptedTemplates */
#define IsEmptyAdaptedClass(Class)
Definition: adaptive.h:90
const char *const id_to_unichar(UNICHAR_ID id) const
Definition: unicharset.cpp:233
uinT8 NumConfigs
Definition: intproto.h:108
INT_TEMPLATES Templates
Definition: adaptive.h:77
ADAPT_CLASS Class[MAX_NUM_CLASSES]
Definition: adaptive.h:81
uinT8 NumPermConfigs
Definition: adaptive.h:65
INT_CLASS Class[MAX_NUM_CLASSES]
Definition: intproto.h:122
UNICHARSET unicharset
Definition: ccutil.h:72
uinT16 NumProtos
Definition: intproto.h:106
int count(LIST var_list)
Definition: oldlist.cpp:108
void tesseract::Classify::PrintAdaptiveMatchResults ( FILE *  File,
ADAPT_RESULTS Results 
)

This routine writes the matches in Results to File.

Parameters
Fileopen text file to write Results to
Resultsmatch results to write to File

Globals: none

Note
Exceptions: none
History: Mon Mar 18 09:24:53 1991, DSJ, Created.

Definition at line 2424 of file adaptmatch.cpp.

2424  {
2425  for (int i = 0; i < Results->NumMatches; ++i) {
2426  tprintf("%s(%d), shape %d, %.2f ",
2427  unicharset.debug_str(Results->match[i].unichar_id).string(),
2428  Results->match[i].unichar_id, Results->match[i].shape_id,
2429  Results->match[i].rating * 100.0);
2430  }
2431  tprintf("\n");
2432 } /* PrintAdaptiveMatchResults */
CLASS_ID unichar_id
Definition: adaptmatch.cpp:82
STRING debug_str(UNICHAR_ID id) const
Definition: unicharset.cpp:285
const char * string() const
Definition: strngs.cpp:156
FLOAT32 rating
Definition: adaptmatch.cpp:84
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:41
UNICHARSET unicharset
Definition: ccutil.h:72
ScoredClass match[MAX_NUM_CLASSES]
Definition: adaptmatch.cpp:95
void tesseract::Classify::PrintAdaptiveStatistics ( FILE *  File)

Print to File the statistics which have been gathered for the adaptive matcher.

Parameters
Fileopen text file to print adaptive statistics to

Globals: none

Note
Exceptions: none
History: Thu Apr 18 14:37:37 1991, DSJ, Created.

Definition at line 659 of file adaptmatch.cpp.

659  {
660  #ifndef SECURE_NAMES
661 
662  fprintf (File, "\nADAPTIVE MATCHER STATISTICS:\n");
663  fprintf (File, "\tNum blobs classified = %d\n", AdaptiveMatcherCalls);
664  fprintf (File, "\tNum classes output = %d (Avg = %4.2f)\n",
665  NumClassesOutput,
666  ((AdaptiveMatcherCalls == 0) ? (0.0) :
667  ((float) NumClassesOutput / AdaptiveMatcherCalls)));
668  fprintf (File, "\t\tBaseline Classifier: %4d calls (%4.2f classes/call)\n",
669  BaselineClassifierCalls,
670  ((BaselineClassifierCalls == 0) ? (0.0) :
671  ((float) NumBaselineClassesTried / BaselineClassifierCalls)));
672  fprintf (File, "\t\tCharNorm Classifier: %4d calls (%4.2f classes/call)\n",
673  CharNormClassifierCalls,
674  ((CharNormClassifierCalls == 0) ? (0.0) :
675  ((float) NumCharNormClassesTried / CharNormClassifierCalls)));
676  fprintf (File, "\t\tAmbig Classifier: %4d calls (%4.2f classes/call)\n",
677  AmbigClassifierCalls,
678  ((AmbigClassifierCalls == 0) ? (0.0) :
679  ((float) NumAmbigClassesTried / AmbigClassifierCalls)));
680 
681  fprintf (File, "\nADAPTIVE LEARNER STATISTICS:\n");
682  fprintf (File, "\tNumber of words adapted to: %d\n", NumWordsAdaptedTo);
683  fprintf (File, "\tNumber of chars adapted to: %d\n", NumCharsAdaptedTo);
684 
686  #endif
687 } /* PrintAdaptiveStatistics */
void PrintAdaptedTemplates(FILE *File, ADAPT_TEMPLATES Templates)
Definition: adaptive.cpp:273
ADAPT_TEMPLATES AdaptedTemplates
Definition: classify.h:430
int tesseract::Classify::PruneClasses ( const INT_TEMPLATES_STRUCT int_templates,
int  num_features,
const INT_FEATURE_STRUCT features,
const uinT8 normalization_factors,
const uinT16 expected_num_features,
CP_RESULT_STRUCT results 
)

Definition at line 406 of file intmatcher.cpp.

411  {
412 /*
413  ** Operation:
414  ** Prunes the classes using a modified fast match table.
415  ** Returns a sorted list of classes along with the number
416  ** of pruned classes in that list.
417  ** Return: Number of pruned classes.
418  ** Exceptions: none
419  ** History: Tue Feb 19 10:24:24 MST 1991, RWM, Created.
420  */
421  ClassPruner pruner(int_templates->NumClasses);
422  // Compute initial match scores for all classes.
423  pruner.ComputeScores(int_templates, num_features, features);
424  // Adjust match scores for number of expected features.
425  pruner.AdjustForExpectedNumFeatures(expected_num_features,
427  // Apply disabled classes in unicharset - only works without a shape_table.
428  if (shape_table_ == NULL)
429  pruner.DisableDisabledClasses(unicharset);
430  // If fragments are disabled, remove them, also only without a shape table.
432  pruner.DisableFragments(unicharset);
433 
434  // If we have good x-heights, apply the given normalization factors.
435  if (normalization_factors != NULL) {
436  pruner.NormalizeForXheight(classify_class_pruner_multiplier,
437  normalization_factors);
438  } else {
439  pruner.NoNormalization();
440  }
441  // Do the actual pruning and sort the short-list.
442  pruner.PruneAndSort(classify_class_pruner_threshold,
444 
445  if (classify_debug_level > 2) {
446  pruner.DebugMatch(*this, int_templates, features);
447  }
448  if (classify_debug_level > 1) {
449  pruner.SummarizeResult(*this, int_templates, expected_num_features,
451  normalization_factors);
452  }
453  // Convert to the expected output format.
454  return pruner.SetupResults(results);
455 }
int classify_class_pruner_threshold
Definition: classify.h:420
#define NULL
Definition: host.h:144
ShapeTable * shape_table_
Definition: classify.h:464
bool disable_character_fragments
Definition: classify.h:407
UNICHARSET unicharset
Definition: ccutil.h:72
int classify_cp_cutoff_strength
Definition: classify.h:424
int classify_class_pruner_multiplier
Definition: classify.h:422
ADAPT_TEMPLATES tesseract::Classify::ReadAdaptedTemplates ( FILE *  File)

Read a set of adapted templates from File and return a ptr to the templates.

Parameters
Fileopen text file to read adapted templates from
Returns
Ptr to adapted templates read from File.
Note
Globals: none
Exceptions: none
History: Mon Mar 18 15:18:10 1991, DSJ, Created.

Definition at line 371 of file adaptive.cpp.

371  {
372  int i;
373  ADAPT_TEMPLATES Templates;
374 
375  /* first read the high level adaptive template struct */
376  Templates = (ADAPT_TEMPLATES) Emalloc (sizeof (ADAPT_TEMPLATES_STRUCT));
377  fread ((char *) Templates, sizeof (ADAPT_TEMPLATES_STRUCT), 1, File);
378 
379  /* then read in the basic integer templates */
380  Templates->Templates = ReadIntTemplates (File);
381 
382  /* then read in the adaptive info for each class */
383  for (i = 0; i < (Templates->Templates)->NumClasses; i++) {
384  Templates->Class[i] = ReadAdaptedClass (File);
385  }
386  return (Templates);
387 
388 } /* ReadAdaptedTemplates */
ADAPT_CLASS ReadAdaptedClass(FILE *File)
Definition: adaptive.cpp:315
ADAPT_TEMPLATES_STRUCT * ADAPT_TEMPLATES
Definition: adaptive.h:83
INT_TEMPLATES Templates
Definition: adaptive.h:77
ADAPT_CLASS Class[MAX_NUM_CLASSES]
Definition: adaptive.h:81
INT_TEMPLATES ReadIntTemplates(FILE *File)
Definition: intproto.cpp:786
void * Emalloc(size_t Size)
Definition: emalloc.cpp:35
void tesseract::Classify::ReadClassFile ( )

Definition at line 293 of file protos.cpp.

293  {
294  FILE *File;
295  char TextLine[CHARS_PER_LINE];
296  char unichar[CHARS_PER_LINE];
297 
298  cprintf ("Reading training data from '%s' ...",
299  static_cast<STRING>(classify_training_file).string());
300  fflush(stdout);
301 
302  File = open_file(static_cast<STRING>(classify_training_file).string(), "r");
303  while (fgets (TextLine, CHARS_PER_LINE, File) != NULL) {
304 
305  sscanf(TextLine, "%s", unichar);
306  ReadClassFromFile (File, unicharset.unichar_to_id(unichar));
307  fgets(TextLine, CHARS_PER_LINE, File);
308  fgets(TextLine, CHARS_PER_LINE, File);
309  }
310  fclose(File);
311  new_line();
312 }
char * classify_training_file
Definition: protos.cpp:50
const UNICHAR_ID unichar_to_id(const char *const unichar_repr) const
Definition: unicharset.cpp:176
FILE * open_file(const char *filename, const char *mode)
Definition: cutil.cpp:82
#define NULL
Definition: host.h:144
#define CHARS_PER_LINE
Definition: cutil.h:57
UNICHARSET unicharset
Definition: ccutil.h:72
#define new_line()
Definition: cutil.h:83
void cprintf(const char *format,...)
Definition: callcpp.cpp:41
void ReadClassFromFile(FILE *File, UNICHAR_ID unichar_id)
Definition: protos.cpp:321
INT_TEMPLATES tesseract::Classify::ReadIntTemplates ( FILE *  File)

Definition at line 786 of file intproto.cpp.

786  {
787 /*
788  ** Parameters:
789  ** File open file to read templates from
790  ** Globals: none
791  ** Operation: This routine reads a set of integer templates from
792  ** File. File must already be open and must be in the
793  ** correct binary format.
794  ** Return: Pointer to integer templates read from File.
795  ** Exceptions: none
796  ** History: Wed Feb 27 11:48:46 1991, DSJ, Created.
797  */
798  int i, j, w, x, y, z;
799  BOOL8 swap;
800  int nread;
801  int unicharset_size;
802  int version_id = 0;
803  INT_TEMPLATES Templates;
804  CLASS_PRUNER_STRUCT* Pruner;
805  INT_CLASS Class;
806  uinT8 *Lengths;
807  PROTO_SET ProtoSet;
808 
809  /* variables for conversion from older inttemp formats */
810  int b, bit_number, last_cp_bit_number, new_b, new_i, new_w;
811  CLASS_ID class_id, max_class_id;
812  inT16 *IndexFor = new inT16[MAX_NUM_CLASSES];
813  CLASS_ID *ClassIdFor = new CLASS_ID[MAX_NUM_CLASSES];
814  CLASS_PRUNER_STRUCT **TempClassPruner =
816  uinT32 SetBitsForMask = // word with NUM_BITS_PER_CLASS
817  (1 << NUM_BITS_PER_CLASS) - 1; // set starting at bit 0
818  uinT32 Mask, NewMask, ClassBits;
819  int MaxNumConfigs = MAX_NUM_CONFIGS;
820  int WerdsPerConfigVec = WERDS_PER_CONFIG_VEC;
821 
822  /* first read the high level template struct */
823  Templates = NewIntTemplates();
824  // Read Templates in parts for 64 bit compatibility.
825  if (fread(&unicharset_size, sizeof(int), 1, File) != 1)
826  cprintf("Bad read of inttemp!\n");
827  if (fread(&Templates->NumClasses,
828  sizeof(Templates->NumClasses), 1, File) != 1 ||
829  fread(&Templates->NumClassPruners,
830  sizeof(Templates->NumClassPruners), 1, File) != 1)
831  cprintf("Bad read of inttemp!\n");
832  // Swap status is determined automatically.
833  swap = Templates->NumClassPruners < 0 ||
835  if (swap) {
836  Reverse32(&Templates->NumClassPruners);
837  Reverse32(&Templates->NumClasses);
838  Reverse32(&unicharset_size);
839  }
840  if (Templates->NumClasses < 0) {
841  // This file has a version id!
842  version_id = -Templates->NumClasses;
843  if (fread(&Templates->NumClasses, sizeof(Templates->NumClasses),
844  1, File) != 1)
845  cprintf("Bad read of inttemp!\n");
846  if (swap)
847  Reverse32(&Templates->NumClasses);
848  }
849 
850  if (version_id < 3) {
851  MaxNumConfigs = OLD_MAX_NUM_CONFIGS;
852  WerdsPerConfigVec = OLD_WERDS_PER_CONFIG_VEC;
853  }
854 
855  if (version_id < 2) {
856  for (i = 0; i < unicharset_size; ++i) {
857  if (fread(&IndexFor[i], sizeof(inT16), 1, File) != 1)
858  cprintf("Bad read of inttemp!\n");
859  }
860  for (i = 0; i < Templates->NumClasses; ++i) {
861  if (fread(&ClassIdFor[i], sizeof(CLASS_ID), 1, File) != 1)
862  cprintf("Bad read of inttemp!\n");
863  }
864  if (swap) {
865  for (i = 0; i < Templates->NumClasses; i++)
866  Reverse16(&IndexFor[i]);
867  for (i = 0; i < Templates->NumClasses; i++)
868  Reverse32(&ClassIdFor[i]);
869  }
870  }
871 
872  /* then read in the class pruners */
873  for (i = 0; i < Templates->NumClassPruners; i++) {
874  Pruner = new CLASS_PRUNER_STRUCT;
875  if ((nread =
876  fread(Pruner, 1, sizeof(CLASS_PRUNER_STRUCT),
877  File)) != sizeof(CLASS_PRUNER_STRUCT))
878  cprintf("Bad read of inttemp!\n");
879  if (swap) {
880  for (x = 0; x < NUM_CP_BUCKETS; x++) {
881  for (y = 0; y < NUM_CP_BUCKETS; y++) {
882  for (z = 0; z < NUM_CP_BUCKETS; z++) {
883  for (w = 0; w < WERDS_PER_CP_VECTOR; w++) {
884  Reverse32(&Pruner->p[x][y][z][w]);
885  }
886  }
887  }
888  }
889  }
890  if (version_id < 2) {
891  TempClassPruner[i] = Pruner;
892  } else {
893  Templates->ClassPruners[i] = Pruner;
894  }
895  }
896 
897  /* fix class pruners if they came from an old version of inttemp */
898  if (version_id < 2) {
899  // Allocate enough class pruners to cover all the class ids.
900  max_class_id = 0;
901  for (i = 0; i < Templates->NumClasses; i++)
902  if (ClassIdFor[i] > max_class_id)
903  max_class_id = ClassIdFor[i];
904  for (i = 0; i <= CPrunerIdFor(max_class_id); i++) {
905  Templates->ClassPruners[i] = new CLASS_PRUNER_STRUCT;
906  memset(Templates->ClassPruners[i], 0, sizeof(CLASS_PRUNER_STRUCT));
907  }
908  // Convert class pruners from the old format (indexed by class index)
909  // to the new format (indexed by class id).
910  last_cp_bit_number = NUM_BITS_PER_CLASS * Templates->NumClasses - 1;
911  for (i = 0; i < Templates->NumClassPruners; i++) {
912  for (x = 0; x < NUM_CP_BUCKETS; x++)
913  for (y = 0; y < NUM_CP_BUCKETS; y++)
914  for (z = 0; z < NUM_CP_BUCKETS; z++)
915  for (w = 0; w < WERDS_PER_CP_VECTOR; w++) {
916  if (TempClassPruner[i]->p[x][y][z][w] == 0)
917  continue;
918  for (b = 0; b < BITS_PER_WERD; b += NUM_BITS_PER_CLASS) {
919  bit_number = i * BITS_PER_CP_VECTOR + w * BITS_PER_WERD + b;
920  if (bit_number > last_cp_bit_number)
921  break; // the rest of the bits in this word are not used
922  class_id = ClassIdFor[bit_number / NUM_BITS_PER_CLASS];
923  // Single out NUM_BITS_PER_CLASS bits relating to class_id.
924  Mask = SetBitsForMask << b;
925  ClassBits = TempClassPruner[i]->p[x][y][z][w] & Mask;
926  // Move these bits to the new position in which they should
927  // appear (indexed corresponding to the class_id).
928  new_i = CPrunerIdFor(class_id);
929  new_w = CPrunerWordIndexFor(class_id);
930  new_b = CPrunerBitIndexFor(class_id) * NUM_BITS_PER_CLASS;
931  if (new_b > b) {
932  ClassBits <<= (new_b - b);
933  } else {
934  ClassBits >>= (b - new_b);
935  }
936  // Copy bits relating to class_id to the correct position
937  // in Templates->ClassPruner.
938  NewMask = SetBitsForMask << new_b;
939  Templates->ClassPruners[new_i]->p[x][y][z][new_w] &= ~NewMask;
940  Templates->ClassPruners[new_i]->p[x][y][z][new_w] |= ClassBits;
941  }
942  }
943  }
944  for (i = 0; i < Templates->NumClassPruners; i++) {
945  delete TempClassPruner[i];
946  }
947  }
948 
949  /* then read in each class */
950  for (i = 0; i < Templates->NumClasses; i++) {
951  /* first read in the high level struct for the class */
952  Class = (INT_CLASS) Emalloc (sizeof (INT_CLASS_STRUCT));
953  if (fread(&Class->NumProtos, sizeof(Class->NumProtos), 1, File) != 1 ||
954  fread(&Class->NumProtoSets, sizeof(Class->NumProtoSets), 1, File) != 1 ||
955  fread(&Class->NumConfigs, sizeof(Class->NumConfigs), 1, File) != 1)
956  cprintf ("Bad read of inttemp!\n");
957  if (version_id == 0) {
958  // Only version 0 writes 5 pointless pointers to the file.
959  for (j = 0; j < 5; ++j) {
960  int junk;
961  if (fread(&junk, sizeof(junk), 1, File) != 1)
962  cprintf ("Bad read of inttemp!\n");
963  }
964  }
965  if (version_id < 4) {
966  for (j = 0; j < MaxNumConfigs; ++j) {
967  if (fread(&Class->ConfigLengths[j], sizeof(uinT16), 1, File) != 1)
968  cprintf ("Bad read of inttemp!\n");
969  }
970  if (swap) {
971  Reverse16(&Class->NumProtos);
972  for (j = 0; j < MaxNumConfigs; j++)
973  Reverse16(&Class->ConfigLengths[j]);
974  }
975  } else {
976  ASSERT_HOST(Class->NumConfigs < MaxNumConfigs);
977  for (j = 0; j < Class->NumConfigs; ++j) {
978  if (fread(&Class->ConfigLengths[j], sizeof(uinT16), 1, File) != 1)
979  cprintf ("Bad read of inttemp!\n");
980  }
981  if (swap) {
982  Reverse16(&Class->NumProtos);
983  for (j = 0; j < MaxNumConfigs; j++)
984  Reverse16(&Class->ConfigLengths[j]);
985  }
986  }
987  if (version_id < 2) {
988  ClassForClassId (Templates, ClassIdFor[i]) = Class;
989  } else {
990  ClassForClassId (Templates, i) = Class;
991  }
992 
993  /* then read in the proto lengths */
994  Lengths = NULL;
995  if (MaxNumIntProtosIn (Class) > 0) {
996  Lengths = (uinT8 *)Emalloc(sizeof(uinT8) * MaxNumIntProtosIn(Class));
997  if ((nread =
998  fread((char *)Lengths, sizeof(uinT8),
999  MaxNumIntProtosIn(Class), File)) != MaxNumIntProtosIn (Class))
1000  cprintf ("Bad read of inttemp!\n");
1001  }
1002  Class->ProtoLengths = Lengths;
1003 
1004  /* then read in the proto sets */
1005  for (j = 0; j < Class->NumProtoSets; j++) {
1006  ProtoSet = (PROTO_SET)Emalloc(sizeof(PROTO_SET_STRUCT));
1007  if (version_id < 3) {
1008  if ((nread =
1009  fread((char *) &ProtoSet->ProtoPruner, 1,
1010  sizeof(PROTO_PRUNER), File)) != sizeof(PROTO_PRUNER))
1011  cprintf("Bad read of inttemp!\n");
1012  for (x = 0; x < PROTOS_PER_PROTO_SET; x++) {
1013  if ((nread = fread((char *) &ProtoSet->Protos[x].A, 1,
1014  sizeof(inT8), File)) != sizeof(inT8) ||
1015  (nread = fread((char *) &ProtoSet->Protos[x].B, 1,
1016  sizeof(uinT8), File)) != sizeof(uinT8) ||
1017  (nread = fread((char *) &ProtoSet->Protos[x].C, 1,
1018  sizeof(inT8), File)) != sizeof(inT8) ||
1019  (nread = fread((char *) &ProtoSet->Protos[x].Angle, 1,
1020  sizeof(uinT8), File)) != sizeof(uinT8))
1021  cprintf("Bad read of inttemp!\n");
1022  for (y = 0; y < WerdsPerConfigVec; y++)
1023  if ((nread = fread((char *) &ProtoSet->Protos[x].Configs[y], 1,
1024  sizeof(uinT32), File)) != sizeof(uinT32))
1025  cprintf("Bad read of inttemp!\n");
1026  }
1027  } else {
1028  if ((nread =
1029  fread((char *) ProtoSet, 1, sizeof(PROTO_SET_STRUCT),
1030  File)) != sizeof(PROTO_SET_STRUCT))
1031  cprintf("Bad read of inttemp!\n");
1032  }
1033  if (swap) {
1034  for (x = 0; x < NUM_PP_PARAMS; x++)
1035  for (y = 0; y < NUM_PP_BUCKETS; y++)
1036  for (z = 0; z < WERDS_PER_PP_VECTOR; z++)
1037  Reverse32(&ProtoSet->ProtoPruner[x][y][z]);
1038  for (x = 0; x < PROTOS_PER_PROTO_SET; x++)
1039  for (y = 0; y < WerdsPerConfigVec; y++)
1040  Reverse32(&ProtoSet->Protos[x].Configs[y]);
1041  }
1042  Class->ProtoSets[j] = ProtoSet;
1043  }
1044  if (version_id < 4)
1045  Class->font_set_id = -1;
1046  else {
1047  fread(&Class->font_set_id, sizeof(int), 1, File);
1048  if (swap)
1049  Reverse32(&Class->font_set_id);
1050  }
1051  }
1052 
1053  if (version_id < 2) {
1054  /* add an empty NULL class with class id 0 */
1055  assert(UnusedClassIdIn (Templates, 0));
1056  ClassForClassId (Templates, 0) = NewIntClass (1, 1);
1057  ClassForClassId (Templates, 0)->font_set_id = -1;
1058  Templates->NumClasses++;
1059  /* make sure the classes are contiguous */
1060  for (i = 0; i < MAX_NUM_CLASSES; i++) {
1061  if (i < Templates->NumClasses) {
1062  if (ClassForClassId (Templates, i) == NULL) {
1063  fprintf(stderr, "Non-contiguous class ids in inttemp\n");
1064  exit(1);
1065  }
1066  } else {
1067  if (ClassForClassId (Templates, i) != NULL) {
1068  fprintf(stderr, "Class id %d exceeds NumClassesIn (Templates) %d\n",
1069  i, Templates->NumClasses);
1070  exit(1);
1071  }
1072  }
1073  }
1074  }
1075  if (version_id >= 4) {
1076  this->fontinfo_table_.read(File, NewPermanentTessCallback(read_info), swap);
1077  if (version_id >= 5) {
1078  this->fontinfo_table_.read(File,
1080  swap);
1081  }
1082  this->fontset_table_.read(File, NewPermanentTessCallback(read_set), swap);
1083  }
1084 
1085  // Clean up.
1086  delete[] IndexFor;
1087  delete[] ClassIdFor;
1088  delete[] TempClassPruner;
1089 
1090  return (Templates);
1091 } /* ReadIntTemplates */
#define MAX_NUM_CONFIGS
Definition: intproto.h:44
void Reverse32(void *ptr)
Definition: helpers.h:142
uinT32 Configs[WERDS_PER_CONFIG_VEC]
Definition: intproto.h:84
UnicityTable< FontSet > fontset_table_
Definition: classify.h:451
#define ClassForClassId(T, c)
Definition: intproto.h:173
unsigned char BOOL8
Definition: host.h:113
PROTO_SET ProtoSets[MAX_NUM_PROTO_SETS]
Definition: intproto.h:109
uinT8 NumConfigs
Definition: intproto.h:108
#define NULL
Definition: host.h:144
struct PROTO_SET_STRUCT * PROTO_SET
#define NUM_CP_BUCKETS
Definition: intproto.h:50
#define OLD_MAX_NUM_CONFIGS
Definition: intproto.cpp:113
#define WERDS_PER_CP_VECTOR
Definition: intproto.h:59
#define CPrunerBitIndexFor(c)
Definition: intproto.h:178
uinT8 * ProtoLengths
Definition: intproto.h:110
INT_CLASS NewIntClass(int MaxNumProtos, int MaxNumConfigs)
Definition: intproto.cpp:683
#define CPrunerWordIndexFor(c)
Definition: intproto.h:177
#define OLD_WERDS_PER_CONFIG_VEC
Definition: intproto.cpp:114
uinT32 p[NUM_CP_BUCKETS][NUM_CP_BUCKETS][NUM_CP_BUCKETS][WERDS_PER_CP_VECTOR]
Definition: intproto.h:75
uinT32 PROTO_PRUNER[NUM_PP_PARAMS][NUM_PP_BUCKETS][WERDS_PER_PP_VECTOR]
Definition: intproto.h:90
#define CPrunerIdFor(c)
Definition: intproto.h:175
void Reverse16(void *ptr)
Definition: helpers.h:137
INT_TEMPLATES NewIntTemplates()
Definition: intproto.cpp:749
#define MAX_NUM_CLASSES
Definition: matchdefs.h:31
struct INT_CLASS_STRUCT * INT_CLASS
#define NUM_BITS_PER_CLASS
Definition: intproto.h:52
UnicityTable< FontInfo > fontinfo_table_
Definition: classify.h:443
#define UnusedClassIdIn(T, c)
Definition: intproto.h:172
INT_PROTO_STRUCT Protos[PROTOS_PER_PROTO_SET]
Definition: intproto.h:95
#define BITS_PER_WERD
Definition: intproto.h:42
#define WERDS_PER_PP_VECTOR
Definition: intproto.h:60
#define PROTOS_PER_PROTO_SET
Definition: intproto.h:46
void * Emalloc(size_t Size)
Definition: emalloc.cpp:35
unsigned short uinT16
Definition: host.h:101
CLASS_PRUNER_STRUCT * ClassPruners[MAX_NUM_CLASS_PRUNERS]
Definition: intproto.h:123
bool read_set(FILE *f, FontSet *fs, bool swap)
Definition: fontinfo.cpp:140
#define NUM_PP_PARAMS
Definition: intproto.h:48
bool read_info(FILE *f, FontInfo *fi, bool swap)
Definition: fontinfo.cpp:57
short inT16
Definition: host.h:100
#define NUM_PP_BUCKETS
Definition: intproto.h:49
SIGNED char inT8
Definition: host.h:98
_ConstTessMemberResultCallback_0_0< false, R, T1 >::base * NewPermanentTessCallback(const T1 *obj, R(T2::*member)() const)
Definition: tesscallback.h:116
#define MaxNumIntProtosIn(C)
Definition: intproto.h:160
uinT16 ConfigLengths[MAX_NUM_CONFIGS]
Definition: intproto.h:111
uinT16 NumProtos
Definition: intproto.h:106
unsigned char uinT8
Definition: host.h:99
#define BITS_PER_CP_VECTOR
Definition: intproto.h:56
PROTO_PRUNER ProtoPruner
Definition: intproto.h:94
bool read_spacing_info(FILE *f, FontInfo *fi, bool swap)
Definition: fontinfo.cpp:80
#define MAX_NUM_CLASS_PRUNERS
Definition: intproto.h:57
void cprintf(const char *format,...)
Definition: callcpp.cpp:41
#define ASSERT_HOST(x)
Definition: errcode.h:84
unsigned int uinT32
Definition: host.h:103
#define WERDS_PER_CONFIG_VEC
Definition: intproto.h:66
uinT8 NumProtoSets
Definition: intproto.h:107
UNICHAR_ID CLASS_ID
Definition: matchdefs.h:35
void tesseract::Classify::ReadNewCutoffs ( FILE *  CutoffFile,
bool  swap,
inT64  end_offset,
CLASS_CUTOFF_ARRAY  Cutoffs 
)

Definition at line 42 of file cutoffs.cpp.

43  {
44 /*
45  ** Parameters:
46  ** Filename name of file containing cutoff definitions
47  ** Cutoffs array to put cutoffs into
48  ** Globals: none
49  ** Operation: Open Filename, read in all of the class-id/cutoff pairs
50  ** and insert them into the Cutoffs array. Cutoffs are
51  ** indexed in the array by class id. Unused entries in the
52  ** array are set to an arbitrarily high cutoff value.
53  ** Return: none
54  ** Exceptions: none
55  ** History: Wed Feb 20 09:38:26 1991, DSJ, Created.
56  */
57  char Class[UNICHAR_LEN + 1];
58  CLASS_ID ClassId;
59  int Cutoff;
60  int i;
61 
62  if (shape_table_ != NULL) {
63  if (!shapetable_cutoffs_.DeSerialize(swap, CutoffFile)) {
64  tprintf("Error during read of shapetable pffmtable!\n");
65  }
66  }
67  for (i = 0; i < MAX_NUM_CLASSES; i++)
68  Cutoffs[i] = MAX_CUTOFF;
69 
70  while ((end_offset < 0 || ftell(CutoffFile) < end_offset) &&
71  fscanf(CutoffFile, "%" REALLY_QUOTE_IT(UNICHAR_LEN) "s %d",
72  Class, &Cutoff) == 2) {
73  if (strcmp(Class, "NULL") == 0) {
74  ClassId = unicharset.unichar_to_id(" ");
75  } else {
76  ClassId = unicharset.unichar_to_id(Class);
77  }
78  Cutoffs[ClassId] = Cutoff;
79  SkipNewline(CutoffFile);
80  }
81 } /* ReadNewCutoffs */
const UNICHAR_ID unichar_to_id(const char *const unichar_repr) const
Definition: unicharset.cpp:176
void SkipNewline(FILE *file)
Definition: helpers.h:41
#define MAX_CUTOFF
Definition: cutoffs.cpp:35
virtual bool DeSerialize(bool swap, FILE *fp)
#define NULL
Definition: host.h:144
ShapeTable * shape_table_
Definition: classify.h:464
#define MAX_NUM_CLASSES
Definition: matchdefs.h:31
#define REALLY_QUOTE_IT(x)
Definition: cutoffs.cpp:33
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:41
UNICHARSET unicharset
Definition: ccutil.h:72
#define UNICHAR_LEN
Definition: unichar.h:28
UNICHAR_ID CLASS_ID
Definition: matchdefs.h:35
NORM_PROTOS * tesseract::Classify::ReadNormProtos ( FILE *  File,
inT64  end_offset 
)

Definition at line 230 of file normmatch.cpp.

230  {
231 /*
232  ** Parameters:
233  ** File open text file to read normalization protos from
234  ** Globals: none
235  ** Operation: This routine allocates a new data structure to hold
236  ** a set of character normalization protos. It then fills in
237  ** the data structure by reading from the specified File.
238  ** Return: Character normalization protos.
239  ** Exceptions: none
240  ** History: Wed Dec 19 16:38:49 1990, DSJ, Created.
241  */
243  int i;
244  char unichar[2 * UNICHAR_LEN + 1];
245  UNICHAR_ID unichar_id;
246  LIST Protos;
247  int NumProtos;
248 
249  /* allocate and initialization data structure */
250  NormProtos = (NORM_PROTOS *) Emalloc (sizeof (NORM_PROTOS));
251  NormProtos->NumProtos = unicharset.size();
252  NormProtos->Protos = (LIST *) Emalloc (NormProtos->NumProtos * sizeof(LIST));
253  for (i = 0; i < NormProtos->NumProtos; i++)
254  NormProtos->Protos[i] = NIL_LIST;
255 
256  /* read file header and save in data structure */
257  NormProtos->NumParams = ReadSampleSize (File);
258  NormProtos->ParamDesc = ReadParamDesc (File, NormProtos->NumParams);
259 
260  /* read protos for each class into a separate list */
261  while ((end_offset < 0 || ftell(File) < end_offset) &&
262  fscanf(File, "%s %d", unichar, &NumProtos) == 2) {
263  if (unicharset.contains_unichar(unichar)) {
264  unichar_id = unicharset.unichar_to_id(unichar);
265  Protos = NormProtos->Protos[unichar_id];
266  for (i = 0; i < NumProtos; i++)
267  Protos =
268  push_last (Protos, ReadPrototype (File, NormProtos->NumParams));
269  NormProtos->Protos[unichar_id] = Protos;
270  } else {
271  cprintf("Error: unichar %s in normproto file is not in unichar set.\n",
272  unichar);
273  for (i = 0; i < NumProtos; i++)
274  FreePrototype(ReadPrototype (File, NormProtos->NumParams));
275  }
276  SkipNewline(File);
277  }
278  return (NormProtos);
279 } /* ReadNormProtos */
PARAM_DESC * ReadParamDesc(FILE *File, uinT16 N)
Definition: clusttool.cpp:68
LIST push_last(LIST list, void *item)
Definition: oldlist.cpp:338
int UNICHAR_ID
Definition: unichar.h:31
const UNICHAR_ID unichar_to_id(const char *const unichar_repr) const
Definition: unicharset.cpp:176
void SkipNewline(FILE *file)
Definition: helpers.h:41
int size() const
Definition: unicharset.h:264
LIST * Protos
Definition: normmatch.cpp:42
#define NIL_LIST
Definition: oldlist.h:126
uinT16 ReadSampleSize(FILE *File)
Definition: clusttool.cpp:46
void FreePrototype(void *arg)
Definition: cluster.cpp:575
bool contains_unichar(const char *const unichar_repr) const
Definition: unicharset.cpp:543
void * Emalloc(size_t Size)
Definition: emalloc.cpp:35
PROTOTYPE * ReadPrototype(FILE *File, uinT16 N)
Definition: clusttool.cpp:115
UNICHARSET unicharset
Definition: ccutil.h:72
#define UNICHAR_LEN
Definition: unichar.h:28
PARAM_DESC * ParamDesc
Definition: normmatch.cpp:41
void cprintf(const char *format,...)
Definition: callcpp.cpp:41
NORM_PROTOS * NormProtos
Definition: classify.h:441
void tesseract::Classify::RefreshDebugWindow ( ScrollView **  win,
const char *  msg,
int  y_offset,
const TBOX wbox 
)

Definition at line 228 of file adaptmatch.cpp.

229  {
230  #ifndef GRAPHICS_DISABLED
231  const int kSampleSpaceWidth = 500;
232  if (*win == NULL) {
233  *win = new ScrollView(msg, 100, y_offset, kSampleSpaceWidth * 2, 200,
234  kSampleSpaceWidth * 2, 200, true);
235  }
236  (*win)->Clear();
237  (*win)->Pen(64, 64, 64);
238  (*win)->Line(-kSampleSpaceWidth, kBlnBaselineOffset,
239  kSampleSpaceWidth, kBlnBaselineOffset);
240  (*win)->Line(-kSampleSpaceWidth, kBlnXHeight + kBlnBaselineOffset,
241  kSampleSpaceWidth, kBlnXHeight + kBlnBaselineOffset);
242  (*win)->ZoomToRectangle(wbox.left(), wbox.top(),
243  wbox.right(), wbox.bottom());
244  #endif // GRAPHICS_DISABLED
245 }
const int kBlnXHeight
Definition: normalis.h:27
const int kBlnBaselineOffset
Definition: normalis.h:28
#define NULL
Definition: host.h:144
inT16 left() const
Definition: rect.h:67
inT16 right() const
Definition: rect.h:74
inT16 top() const
Definition: rect.h:53
inT16 bottom() const
Definition: rect.h:60
void tesseract::Classify::RemoveBadMatches ( ADAPT_RESULTS Results)

This routine steps thru each matching class in Results and removes it from the match list if its rating is worse than the BestRating plus a pad. In other words, all good matches get moved to the front of the classes array.

Parameters
Resultscontains matches to be filtered

Globals:

  • matcher_bad_match_pad defines a "bad match"
Note
Exceptions: none
History: Tue Mar 12 13:51:03 1991, DSJ, Created.

Definition at line 2450 of file adaptmatch.cpp.

2450  {
2451  int Next, NextGood;
2452  FLOAT32 BadMatchThreshold;
2453  static const char* romans = "i v x I V X";
2454  BadMatchThreshold = Results->best_match.rating + matcher_bad_match_pad;
2455 
2457  UNICHAR_ID unichar_id_one = unicharset.contains_unichar("1") ?
2458  unicharset.unichar_to_id("1") : -1;
2459  UNICHAR_ID unichar_id_zero = unicharset.contains_unichar("0") ?
2460  unicharset.unichar_to_id("0") : -1;
2461  ScoredClass scored_one = ScoredUnichar(Results, unichar_id_one);
2462  ScoredClass scored_zero = ScoredUnichar(Results, unichar_id_zero);
2463 
2464  for (Next = NextGood = 0; Next < Results->NumMatches; Next++) {
2465  if (Results->match[Next].rating <= BadMatchThreshold) {
2466  ScoredClass match = Results->match[Next];
2467  if (!unicharset.get_isalpha(match.unichar_id) ||
2468  strstr(romans,
2469  unicharset.id_to_unichar(match.unichar_id)) != NULL) {
2470  Results->match[NextGood++] = Results->match[Next];
2471  } else if (unicharset.eq(match.unichar_id, "l") &&
2472  scored_one.rating >= BadMatchThreshold) {
2473  Results->match[NextGood] = scored_one;
2474  Results->match[NextGood].rating = match.rating;
2475  NextGood++;
2476  } else if (unicharset.eq(match.unichar_id, "O") &&
2477  scored_zero.rating >= BadMatchThreshold) {
2478  Results->match[NextGood] = scored_zero;
2479  Results->match[NextGood].rating = match.rating;
2480  NextGood++;
2481  }
2482  }
2483  }
2484  } else {
2485  for (Next = NextGood = 0; Next < Results->NumMatches; Next++) {
2486  if (Results->match[Next].rating <= BadMatchThreshold)
2487  Results->match[NextGood++] = Results->match[Next];
2488  }
2489  }
2490  Results->NumMatches = NextGood;
2491 } /* RemoveBadMatches */
bool get_isalpha(UNICHAR_ID unichar_id) const
Definition: unicharset.h:392
const char *const id_to_unichar(UNICHAR_ID id) const
Definition: unicharset.cpp:233
int UNICHAR_ID
Definition: unichar.h:31
ScoredClass best_match
Definition: adaptmatch.cpp:96
const UNICHAR_ID unichar_to_id(const char *const unichar_repr) const
Definition: unicharset.cpp:176
ScoredClass ScoredUnichar(ADAPT_RESULTS *results, UNICHAR_ID id)
#define NULL
Definition: host.h:144
CLASS_ID unichar_id
Definition: adaptmatch.cpp:82
float FLOAT32
Definition: host.h:111
bool classify_bln_numeric_mode
Definition: classify.h:455
FLOAT32 rating
Definition: adaptmatch.cpp:84
bool eq(UNICHAR_ID unichar_id, const char *const unichar_repr) const
Definition: unicharset.cpp:555
bool contains_unichar(const char *const unichar_repr) const
Definition: unicharset.cpp:543
UNICHARSET unicharset
Definition: ccutil.h:72
ScoredClass match[MAX_NUM_CLASSES]
Definition: adaptmatch.cpp:95
double matcher_bad_match_pad
Definition: classify.h:384
void tesseract::Classify::RemoveExtraPuncs ( ADAPT_RESULTS Results)

This routine discards extra digits or punctuation from the results. We keep only the top 2 punctuation answers and the top 1 digit answer if present.

Parameters
Resultscontains matches to be filtered
Note
History: Tue Mar 12 13:51:03 1991, DSJ, Created.

Definition at line 2503 of file adaptmatch.cpp.

2503  {
2504  int Next, NextGood;
2505  int punc_count; /*no of garbage characters */
2506  int digit_count;
2507  /*garbage characters */
2508  static char punc_chars[] = ". , ; : / ` ~ ' - = \\ | \" ! _ ^";
2509  static char digit_chars[] = "0 1 2 3 4 5 6 7 8 9";
2510 
2511  punc_count = 0;
2512  digit_count = 0;
2513  for (Next = NextGood = 0; Next < Results->NumMatches; Next++) {
2514  ScoredClass match = Results->match[Next];
2515  if (strstr(punc_chars,
2516  unicharset.id_to_unichar(match.unichar_id)) != NULL) {
2517  if (punc_count < 2)
2518  Results->match[NextGood++] = match;
2519  punc_count++;
2520  } else {
2521  if (strstr(digit_chars,
2522  unicharset.id_to_unichar(match.unichar_id)) != NULL) {
2523  if (digit_count < 1)
2524  Results->match[NextGood++] = match;
2525  digit_count++;
2526  } else {
2527  Results->match[NextGood++] = match;
2528  }
2529  }
2530  }
2531  Results->NumMatches = NextGood;
2532 } /* RemoveExtraPuncs */
const char *const id_to_unichar(UNICHAR_ID id) const
Definition: unicharset.cpp:233
#define NULL
Definition: host.h:144
CLASS_ID unichar_id
Definition: adaptmatch.cpp:82
UNICHARSET unicharset
Definition: ccutil.h:72
ScoredClass match[MAX_NUM_CLASSES]
Definition: adaptmatch.cpp:95
void tesseract::Classify::ResetAdaptiveClassifierInternal ( )

Definition at line 636 of file adaptmatch.cpp.

636  {
638  tprintf("Resetting adaptive classifier (NumAdaptationsFailed=%d)\n",
639  NumAdaptationsFailed);
640  }
643  NumAdaptationsFailed = 0;
644 }
void free_adapted_templates(ADAPT_TEMPLATES templates)
Definition: adaptive.cpp:199
#define NULL
Definition: host.h:144
ADAPT_TEMPLATES AdaptedTemplates
Definition: classify.h:430
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:41
int classify_learning_debug_level
Definition: classify.h:380
void tesseract::Classify::ResetFeaturesHaveBeenExtracted ( )

Definition at line 1985 of file adaptmatch.cpp.

1985  {
1986  FeaturesHaveBeenExtracted = FALSE;
1987 }
#define FALSE
Definition: capi.h:28
void tesseract::Classify::SetAdaptiveThreshold ( FLOAT32  Threshold)

This routine resets the internal thresholds inside the integer matcher to correspond to the specified threshold.

Parameters
Thresholdthreshold for creating new templates

Globals:

  • matcher_good_threshold default good match rating
Note
Exceptions: none
History: Tue Apr 9 08:33:13 1991, DSJ, Created.

Definition at line 2548 of file adaptmatch.cpp.

2548  {
2549  Threshold = (Threshold == matcher_good_threshold) ? 0.9: (1.0 - Threshold);
2551  ClipToRange<int>(255 * Threshold, 0, 255));
2553  ClipToRange<int>(255 * Threshold, 0, 255));
2554 } /* SetAdaptiveThreshold */
int classify_adapt_feature_threshold
Definition: classify.h:404
int classify_adapt_proto_threshold
Definition: classify.h:402
double matcher_good_threshold
Definition: classify.h:381
void tesseract::Classify::SettupPass1 ( )

This routine prepares the adaptive matcher for the start of the first pass. Learning is enabled (unless it is disabled for the whole program).

Note
this is somewhat redundant, it simply says that if learning is enabled then it will remain enabled on the first pass. If it is disabled, then it will remain disabled. This is only put here to make it very clear that learning is controlled directly by the global setting of EnableLearning.

Globals:

Note
Exceptions: none
History: Mon Apr 15 16:39:29 1991, DSJ, Created.

Definition at line 710 of file adaptmatch.cpp.

710  {
712 
714 
715 } /* SettupPass1 */
Dict & getDict()
Definition: classify.h:62
bool classify_enable_learning
Definition: classify.h:356
void SettupStopperPass1()
Sets up stopper variables in preparation for the first pass.
Definition: stopper.cpp:780
void tesseract::Classify::SettupPass2 ( )

This routine prepares the adaptive matcher for the start of the second pass. Further learning is disabled.

Globals:

Note
Exceptions: none
History: Mon Apr 15 16:39:29 1991, DSJ, Created.

Definition at line 730 of file adaptmatch.cpp.

730  {
733 
734 } /* SettupPass2 */
void SettupStopperPass2()
Sets up stopper variables in preparation for the second pass.
Definition: stopper.cpp:784
#define FALSE
Definition: capi.h:28
Dict & getDict()
Definition: classify.h:62
const ShapeTable* tesseract::Classify::shape_table ( ) const
inline

Definition at line 66 of file classify.h.

66  {
67  return shape_table_;
68  }
ShapeTable * shape_table_
Definition: classify.h:464
int tesseract::Classify::ShapeIDToClassID ( int  shape_id) const

Definition at line 2746 of file adaptmatch.cpp.

2746  {
2747  for (int id = 0; id < PreTrainedTemplates->NumClasses; ++id) {
2748  int font_set_id = PreTrainedTemplates->Class[id]->font_set_id;
2749  ASSERT_HOST(font_set_id >= 0);
2750  const FontSet &fs = fontset_table_.get(font_set_id);
2751  for (int config = 0; config < fs.size; ++config) {
2752  if (fs.configs[config] == shape_id)
2753  return id;
2754  }
2755  }
2756  tprintf("Shape %d not found\n", shape_id);
2757  return -1;
2758 }
UnicityTable< FontSet > fontset_table_
Definition: classify.h:451
INT_TEMPLATES PreTrainedTemplates
Definition: classify.h:426
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:41
INT_CLASS Class[MAX_NUM_CLASSES]
Definition: intproto.h:122
#define ASSERT_HOST(x)
Definition: errcode.h:84
void tesseract::Classify::ShowBestMatchFor ( TBLOB Blob,
const DENORM denorm,
CLASS_ID  ClassId,
int  shape_id,
BOOL8  AdaptiveOn,
BOOL8  PreTrainedOn,
ADAPT_RESULTS Results 
)

This routine compares Blob to both sets of templates (adaptive and pre-trained) and then displays debug information for the config which matched best.

Parameters
Blobblob to show best matching config for
denormnormalization/denormalization parameters
ClassIdclass whose configs are to be searched
shape_idshape index
AdaptiveOnTRUE if adaptive configs are enabled
PreTrainedOnTRUE if pretrained configs are enabled
Resultsresults of match being debugged

Globals:

  • PreTrainedTemplates built-in training
  • AdaptedTemplates adaptive templates
  • AllProtosOn dummy proto mask
  • AllConfigsOn dummy config mask
Note
Exceptions: none
History: Fri Mar 22 08:43:52 1991, DSJ, Created.

Definition at line 2579 of file adaptmatch.cpp.

2585  {
2586  int NumCNFeatures = 0, NumBLFeatures = 0;
2587  INT_FEATURE_ARRAY CNFeatures, BLFeatures;
2588  INT_RESULT_STRUCT CNResult, BLResult;
2589  inT32 BlobLength;
2590  uinT32 ConfigMask;
2591  static int next_config = -1;
2592 
2593  if (PreTrainedOn) next_config = -1;
2594 
2595  CNResult.Rating = BLResult.Rating = 2.0;
2596 
2597  if (!LegalClassId (ClassId)) {
2598  cprintf ("%d is not a legal class id!!\n", ClassId);
2599  return;
2600  }
2601 
2602  uinT8 *CNAdjust = new uinT8[MAX_NUM_CLASSES];
2603  uinT8 *BLAdjust = new uinT8[MAX_NUM_CLASSES];
2604 
2605  if (shape_table_ == NULL)
2606  shape_id = ClassId;
2607  else
2608  shape_id = ShapeIDToClassID(shape_id);
2609  if (PreTrainedOn && shape_id >= 0) {
2610  if (UnusedClassIdIn(PreTrainedTemplates, shape_id)) {
2611  tprintf("No built-in templates for class/shape %d\n", shape_id);
2612  } else {
2613  NumCNFeatures = GetCharNormFeatures(Blob, denorm, PreTrainedTemplates,
2614  CNFeatures, NULL, CNAdjust,
2615  &BlobLength, NULL);
2616  if (NumCNFeatures <= 0) {
2617  tprintf("Illegal blob (char norm features)!\n");
2618  } else {
2622  NumCNFeatures, CNFeatures,
2623  &CNResult,
2626  ExpandShapesAndApplyCorrections(NULL, false, shape_id,
2627  Blob->bounding_box().bottom(),
2628  Blob->bounding_box().top(),
2629  0, BlobLength, CNAdjust,
2630  CNResult, Results);
2631  }
2632  }
2633  }
2634 
2635  if (AdaptiveOn) {
2636  if (ClassId < 0 || ClassId >= AdaptedTemplates->Templates->NumClasses) {
2637  tprintf("Invalid adapted class id: %d\n", ClassId);
2638  } else if (UnusedClassIdIn(AdaptedTemplates->Templates, ClassId) ||
2639  AdaptedTemplates->Class[ClassId] == NULL ||
2641  tprintf("No AD templates for class %d = %s\n",
2642  ClassId, unicharset.id_to_unichar(ClassId));
2643  } else {
2644  NumBLFeatures = GetBaselineFeatures(Blob,
2645  denorm,
2647  BLFeatures, BLAdjust,
2648  &BlobLength);
2649  if (NumBLFeatures <= 0)
2650  tprintf("Illegal blob (baseline features)!\n");
2651  else {
2655  NumBLFeatures, BLFeatures,
2656  &BLResult,
2660  AdaptedTemplates->Class, false,
2661  ClassId, Blob->bounding_box().bottom(),
2662  Blob->bounding_box().top(), 0, BlobLength, CNAdjust,
2663  BLResult, Results);
2664  }
2665  }
2666  }
2667 
2668  tprintf("\n");
2669  if (BLResult.Rating < CNResult.Rating) {
2670  if (next_config < 0) {
2671  ConfigMask = 1 << BLResult.Config;
2672  next_config = 0;
2673  } else {
2674  ConfigMask = 1 << next_config;
2675  ++next_config;
2676  }
2677  classify_norm_method.set_value(baseline);
2678 
2680  tprintf("Adaptive Class ID: %d\n", ClassId);
2682  AllProtosOn, (BIT_VECTOR) &ConfigMask,
2683  NumBLFeatures, BLFeatures,
2684  &BLResult,
2689  AdaptedTemplates->Class, true,
2690  ClassId, Blob->bounding_box().bottom(),
2691  Blob->bounding_box().top(), 0, BlobLength, CNAdjust,
2692  BLResult, Results);
2693  } else if (shape_id >= 0) {
2694  ConfigMask = 1 << CNResult.Config;
2695  classify_norm_method.set_value(character);
2696 
2697  tprintf("Static Shape ID: %d\n", shape_id);
2700  AllProtosOn, (BIT_VECTOR) & ConfigMask,
2701  NumCNFeatures, CNFeatures,
2702  &CNResult,
2706  ExpandShapesAndApplyCorrections(NULL, true, shape_id,
2707  Blob->bounding_box().bottom(),
2708  Blob->bounding_box().top(),
2709  0, BlobLength, CNAdjust,
2710  CNResult, Results);
2711  }
2712 
2713  // Clean up.
2714  delete[] CNAdjust;
2715  delete[] BLAdjust;
2716 } /* ShowBestMatchFor */
#define IsEmptyAdaptedClass(Class)
Definition: adaptive.h:90
BIT_VECTOR AllProtosOn
Definition: classify.h:433
void SetBaseLineMatch()
Definition: intmatcher.cpp:728
const char *const id_to_unichar(UNICHAR_ID id) const
Definition: unicharset.cpp:233
IntegerMatcher im_
Definition: classify.h:455
int classify_adapt_feature_threshold
Definition: classify.h:404
#define ClassForClassId(T, c)
Definition: intproto.h:173
int classify_integer_matcher_multiplier
Definition: classify.h:426
#define NULL
Definition: host.h:144
int inT32
Definition: host.h:102
void SetCharNormMatch(int integer_matcher_multiplier)
Definition: intmatcher.cpp:734
INT_TEMPLATES Templates
Definition: adaptive.h:77
int ShapeIDToClassID(int shape_id) const
ShapeTable * shape_table_
Definition: classify.h:464
ADAPT_TEMPLATES AdaptedTemplates
Definition: classify.h:430
void Match(INT_CLASS ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask, inT16 NumFeatures, const INT_FEATURE_STRUCT *Features, INT_RESULT Result, int AdaptFeatureThreshold, int Debug, bool SeparateDebugWindows)
Definition: intmatcher.cpp:460
ADAPT_CLASS Class[MAX_NUM_CLASSES]
Definition: adaptive.h:81
INT_TEMPLATES PreTrainedTemplates
Definition: classify.h:426
#define MAX_NUM_CLASSES
Definition: matchdefs.h:31
#define UnusedClassIdIn(T, c)
Definition: intproto.h:172
uinT32 * BIT_VECTOR
Definition: bitvec.h:28
inT16 top() const
Definition: rect.h:53
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:41
TBOX bounding_box() const
Definition: blobs.cpp:384
UNICHARSET unicharset
Definition: ccutil.h:72
INT_FEATURE_STRUCT INT_FEATURE_ARRAY[MAX_NUM_INT_FEATURES]
Definition: baseapi.h:66
bool matcher_debug_separate_windows
Definition: classify.h:415
void ExpandShapesAndApplyCorrections(ADAPT_CLASS *classes, bool debug, int class_id, int bottom, int top, float cp_rating, int blob_length, const uinT8 *cn_factors, INT_RESULT_STRUCT &int_result, ADAPT_RESULTS *final_results)
#define LegalClassId(c)
Definition: intproto.h:171
unsigned char uinT8
Definition: host.h:99
int GetBaselineFeatures(TBLOB *Blob, const DENORM &denorm, INT_TEMPLATES Templates, INT_FEATURE_ARRAY IntFeatures, uinT8 *CharNormArray, inT32 *BlobLength)
void cprintf(const char *format,...)
Definition: callcpp.cpp:41
int GetCharNormFeatures(TBLOB *Blob, const DENORM &denorm, INT_TEMPLATES Templates, INT_FEATURE_ARRAY IntFeatures, uinT8 *PrunerNormArray, uinT8 *CharNormArray, inT32 *BlobLength, inT32 *FeatureOutlineIndex)
unsigned int uinT32
Definition: host.h:103
BIT_VECTOR AllConfigsOn
Definition: classify.h:435
#define NO_DEBUG
Definition: adaptmatch.cpp:72
inT16 bottom() const
Definition: rect.h:60
void tesseract::Classify::ShowMatchDisplay ( )

Definition at line 1096 of file intproto.cpp.

1096  {
1097 /*
1098  ** Parameters: none
1099  ** Globals:
1100  ** FeatureShapes display list containing feature matches
1101  ** ProtoShapes display list containing proto matches
1102  ** Operation: This routine sends the shapes in the global display
1103  ** lists to the match debugger window.
1104  ** Return: none
1105  ** Exceptions: none
1106  ** History: Thu Mar 21 15:47:33 1991, DSJ, Created.
1107  */
1109  if (ProtoDisplayWindow) {
1111  }
1112  if (FeatureDisplayWindow) {
1114  }
1116  static_cast<NORM_METHOD>(static_cast<int>(classify_norm_method)),
1117  IntMatchWindow);
1119  INT_MAX_X, INT_MAX_Y);
1120  if (ProtoDisplayWindow) {
1122  INT_MAX_X, INT_MAX_Y);
1123  }
1124  if (FeatureDisplayWindow) {
1126  INT_MAX_X, INT_MAX_Y);
1127  }
1128 } /* ShowMatchDisplay */
void Clear()
Definition: scrollview.cpp:590
ScrollView * FeatureDisplayWindow
Definition: intproto.cpp:181
void ZoomToRectangle(int x1, int y1, int x2, int y2)
Definition: scrollview.cpp:760
#define INT_MAX_Y
Definition: intproto.cpp:67
void InitIntMatchWindowIfReqd()
Definition: intproto.cpp:1949
#define INT_MIN_Y
Definition: intproto.cpp:65
ScrollView * IntMatchWindow
Definition: intproto.cpp:180
void ClearFeatureSpaceWindow(NORM_METHOD norm_method, ScrollView *window)
Definition: intproto.cpp:1132
ScrollView * ProtoDisplayWindow
Definition: intproto.cpp:182
#define INT_MIN_X
Definition: intproto.cpp:64
#define INT_MAX_X
Definition: intproto.cpp:66
bool tesseract::Classify::TempConfigReliable ( CLASS_ID  class_id,
const TEMP_CONFIG config 
)

Definition at line 2762 of file adaptmatch.cpp.

2763  {
2764  if (classify_learning_debug_level >= 1) {
2765  tprintf("NumTimesSeen for config of %s is %d\n",
2766  getDict().getUnicharset().debug_str(class_id).string(),
2767  config->NumTimesSeen);
2768  }
2770  return true;
2771  } else if (config->NumTimesSeen < matcher_min_examples_for_prototyping) {
2772  return false;
2773  } else if (use_ambigs_for_adaption) {
2774  // Go through the ambigs vector and see whether we have already seen
2775  // enough times all the characters represented by the ambigs vector.
2776  const UnicharIdVector *ambigs =
2778  int ambigs_size = (ambigs == NULL) ? 0 : ambigs->size();
2779  for (int ambig = 0; ambig < ambigs_size; ++ambig) {
2780  ADAPT_CLASS ambig_class = AdaptedTemplates->Class[(*ambigs)[ambig]];
2781  assert(ambig_class != NULL);
2782  if (ambig_class->NumPermConfigs == 0 &&
2783  ambig_class->MaxNumTimesSeen <
2785  if (classify_learning_debug_level >= 1) {
2786  tprintf("Ambig %s has not been seen enough times,"
2787  " not making config for %s permanent\n",
2788  getDict().getUnicharset().debug_str(
2789  (*ambigs)[ambig]).string(),
2790  getDict().getUnicharset().debug_str(class_id).string());
2791  }
2792  return false;
2793  }
2794  }
2795  }
2796  return true;
2797 }
uinT8 MaxNumTimesSeen
Definition: adaptive.h:66
const UnicharIdVector * AmbigsForAdaption(UNICHAR_ID unichar_id) const
Definition: ambigs.h:178
GenericVector< UNICHAR_ID > UnicharIdVector
Definition: ambigs.h:34
#define NULL
Definition: host.h:144
ADAPT_TEMPLATES AdaptedTemplates
Definition: classify.h:430
Dict & getDict()
Definition: classify.h:62
ADAPT_CLASS Class[MAX_NUM_CLASSES]
Definition: adaptive.h:81
uinT8 NumPermConfigs
Definition: adaptive.h:65
int matcher_min_examples_for_prototyping
Definition: classify.h:389
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:41
int matcher_sufficient_examples_for_prototyping
Definition: classify.h:391
int classify_learning_debug_level
Definition: classify.h:380
int size() const
Definition: genericvector.h:59
bool use_ambigs_for_adaption
Definition: ccutil.h:93
const UnicharAmbigs & getUnicharAmbigs()
Definition: dict.h:106
uinT8 NumTimesSeen
Definition: adaptive.h:41
void tesseract::Classify::UpdateAmbigsGroup ( CLASS_ID  class_id,
const DENORM denorm,
TBLOB Blob 
)

Definition at line 2799 of file adaptmatch.cpp.

2800  {
2801  const UnicharIdVector *ambigs =
2803  int ambigs_size = (ambigs == NULL) ? 0 : ambigs->size();
2804  if (classify_learning_debug_level >= 1) {
2805  tprintf("Running UpdateAmbigsGroup for %s class_id=%d\n",
2806  getDict().getUnicharset().debug_str(class_id).string(), class_id);
2807  }
2808  for (int ambig = 0; ambig < ambigs_size; ++ambig) {
2809  CLASS_ID ambig_class_id = (*ambigs)[ambig];
2810  const ADAPT_CLASS ambigs_class = AdaptedTemplates->Class[ambig_class_id];
2811  for (int cfg = 0; cfg < MAX_NUM_CONFIGS; ++cfg) {
2812  if (ConfigIsPermanent(ambigs_class, cfg)) continue;
2813  const TEMP_CONFIG config =
2814  TempConfigFor(AdaptedTemplates->Class[ambig_class_id], cfg);
2815  if (config != NULL && TempConfigReliable(ambig_class_id, config)) {
2816  if (classify_learning_debug_level >= 1) {
2817  tprintf("Making config %d of %s permanent\n", cfg,
2818  getDict().getUnicharset().debug_str(
2819  ambig_class_id).string());
2820  }
2821  MakePermanent(AdaptedTemplates, ambig_class_id, cfg, denorm, Blob);
2822  }
2823  }
2824  }
2825 }
#define ConfigIsPermanent(Class, ConfigId)
Definition: adaptive.h:93
#define MAX_NUM_CONFIGS
Definition: intproto.h:44
GenericVector< UNICHAR_ID > UnicharIdVector
Definition: ambigs.h:34
const UnicharIdVector * ReverseAmbigsForAdaption(UNICHAR_ID unichar_id) const
Definition: ambigs.h:187
#define NULL
Definition: host.h:144
bool TempConfigReliable(CLASS_ID class_id, const TEMP_CONFIG &config)
ADAPT_TEMPLATES AdaptedTemplates
Definition: classify.h:430
Dict & getDict()
Definition: classify.h:62
ADAPT_CLASS Class[MAX_NUM_CLASSES]
Definition: adaptive.h:81
#define TempConfigFor(Class, ConfigId)
Definition: adaptive.h:102
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:41
int classify_learning_debug_level
Definition: classify.h:380
int size() const
Definition: genericvector.h:59
void MakePermanent(ADAPT_TEMPLATES Templates, CLASS_ID ClassId, int ConfigId, const DENORM &denorm, TBLOB *Blob)
const UnicharAmbigs & getUnicharAmbigs()
Definition: dict.h:106
UNICHAR_ID CLASS_ID
Definition: matchdefs.h:35
void tesseract::Classify::WriteAdaptedTemplates ( FILE *  File,
ADAPT_TEMPLATES  Templates 
)

This routine saves Templates to File in a binary format.

Parameters
Fileopen text file to write Templates to
Templatesset of adapted templates to write to File
Note
Globals: none
Exceptions: none
History: Mon Mar 18 15:07:32 1991, DSJ, Created.

Definition at line 507 of file adaptive.cpp.

507  {
508  int i;
509 
510  /* first write the high level adaptive template struct */
511  fwrite ((char *) Templates, sizeof (ADAPT_TEMPLATES_STRUCT), 1, File);
512 
513  /* then write out the basic integer templates */
514  WriteIntTemplates (File, Templates->Templates, unicharset);
515 
516  /* then write out the adaptive info for each class */
517  for (i = 0; i < (Templates->Templates)->NumClasses; i++) {
518  WriteAdaptedClass (File, Templates->Class[i],
519  Templates->Templates->Class[i]->NumConfigs);
520  }
521 } /* WriteAdaptedTemplates */
uinT8 NumConfigs
Definition: intproto.h:108
INT_TEMPLATES Templates
Definition: adaptive.h:77
ADAPT_CLASS Class[MAX_NUM_CLASSES]
Definition: adaptive.h:81
void WriteIntTemplates(FILE *File, INT_TEMPLATES Templates, const UNICHARSET &target_unicharset)
Definition: intproto.cpp:1155
INT_CLASS Class[MAX_NUM_CLASSES]
Definition: intproto.h:122
UNICHARSET unicharset
Definition: ccutil.h:72
void WriteAdaptedClass(FILE *File, ADAPT_CLASS Class, int NumConfigs)
Definition: adaptive.cpp:461
void tesseract::Classify::WriteIntTemplates ( FILE *  File,
INT_TEMPLATES  Templates,
const UNICHARSET target_unicharset 
)

Definition at line 1155 of file intproto.cpp.

1156  {
1157 /*
1158  ** Parameters:
1159  ** File open file to write templates to
1160  ** Templates templates to save into File
1161  ** Globals: none
1162  ** Operation: This routine writes Templates to File. The format
1163  ** is an efficient binary format. File must already be open
1164  ** for writing.
1165  ** Return: none
1166  ** Exceptions: none
1167  ** History: Wed Feb 27 11:48:46 1991, DSJ, Created.
1168  */
1169  int i, j;
1170  INT_CLASS Class;
1171  int unicharset_size = target_unicharset.size();
1172  int version_id = -5; // When negated by the reader -1 becomes +1 etc.
1173 
1174  if (Templates->NumClasses != unicharset_size) {
1175  cprintf("Warning: executing WriteIntTemplates() with %d classes in"
1176  " Templates, while target_unicharset size is %d\n",
1177  Templates->NumClasses, unicharset_size);
1178  }
1179 
1180  /* first write the high level template struct */
1181  fwrite(&unicharset_size, sizeof(unicharset_size), 1, File);
1182  fwrite(&version_id, sizeof(version_id), 1, File);
1183  fwrite(&Templates->NumClassPruners, sizeof(Templates->NumClassPruners),
1184  1, File);
1185  fwrite(&Templates->NumClasses, sizeof(Templates->NumClasses), 1, File);
1186 
1187  /* then write out the class pruners */
1188  for (i = 0; i < Templates->NumClassPruners; i++)
1189  fwrite(Templates->ClassPruners[i],
1190  sizeof(CLASS_PRUNER_STRUCT), 1, File);
1191 
1192  /* then write out each class */
1193  for (i = 0; i < Templates->NumClasses; i++) {
1194  Class = Templates->Class[i];
1195 
1196  /* first write out the high level struct for the class */
1197  fwrite(&Class->NumProtos, sizeof(Class->NumProtos), 1, File);
1198  fwrite(&Class->NumProtoSets, sizeof(Class->NumProtoSets), 1, File);
1199  ASSERT_HOST(Class->NumConfigs == this->fontset_table_.get(Class->font_set_id).size);
1200  fwrite(&Class->NumConfigs, sizeof(Class->NumConfigs), 1, File);
1201  for (j = 0; j < Class->NumConfigs; ++j) {
1202  fwrite(&Class->ConfigLengths[j], sizeof(uinT16), 1, File);
1203  }
1204 
1205  /* then write out the proto lengths */
1206  if (MaxNumIntProtosIn (Class) > 0) {
1207  fwrite ((char *) (Class->ProtoLengths), sizeof (uinT8),
1208  MaxNumIntProtosIn (Class), File);
1209  }
1210 
1211  /* then write out the proto sets */
1212  for (j = 0; j < Class->NumProtoSets; j++)
1213  fwrite ((char *) Class->ProtoSets[j],
1214  sizeof (PROTO_SET_STRUCT), 1, File);
1215 
1216  /* then write the fonts info */
1217  fwrite(&Class->font_set_id, sizeof(int), 1, File);
1218  }
1219 
1220  /* Write the fonts info tables */
1222  this->fontinfo_table_.write(File,
1225 } /* WriteIntTemplates */
bool write_info(FILE *f, const FontInfo &fi)
Definition: fontinfo.cpp:72
UnicityTable< FontSet > fontset_table_
Definition: classify.h:451
int size() const
Definition: unicharset.h:264
PROTO_SET ProtoSets[MAX_NUM_PROTO_SETS]
Definition: intproto.h:109
uinT8 NumConfigs
Definition: intproto.h:108
bool write_spacing_info(FILE *f, const FontInfo &fi)
Definition: fontinfo.cpp:112
bool write_set(FILE *f, const FontSet &fs)
Definition: fontinfo.cpp:153
uinT8 * ProtoLengths
Definition: intproto.h:110
UnicityTable< FontInfo > fontinfo_table_
Definition: classify.h:443
INT_CLASS Class[MAX_NUM_CLASSES]
Definition: intproto.h:122
unsigned short uinT16
Definition: host.h:101
CLASS_PRUNER_STRUCT * ClassPruners[MAX_NUM_CLASS_PRUNERS]
Definition: intproto.h:123
_ConstTessMemberResultCallback_0_0< false, R, T1 >::base * NewPermanentTessCallback(const T1 *obj, R(T2::*member)() const)
Definition: tesscallback.h:116
#define MaxNumIntProtosIn(C)
Definition: intproto.h:160
uinT16 ConfigLengths[MAX_NUM_CONFIGS]
Definition: intproto.h:111
unsigned char uinT8
Definition: host.h:99
uinT16 NumProtos
Definition: intproto.h:106
void cprintf(const char *format,...)
Definition: callcpp.cpp:41
#define ASSERT_HOST(x)
Definition: errcode.h:84
uinT8 NumProtoSets
Definition: intproto.h:107

Member Data Documentation

ADAPT_TEMPLATES tesseract::Classify::AdaptedTemplates

Definition at line 430 of file classify.h.

BIT_VECTOR tesseract::Classify::AllConfigsOff

Definition at line 437 of file classify.h.

BIT_VECTOR tesseract::Classify::AllConfigsOn

Definition at line 435 of file classify.h.

BIT_VECTOR tesseract::Classify::AllProtosOff

Definition at line 436 of file classify.h.

BIT_VECTOR tesseract::Classify::AllProtosOn

Definition at line 433 of file classify.h.

double tesseract::Classify::certainty_scale = 20.0

"Certainty scaling factor"

Definition at line 398 of file classify.h.

int tesseract::Classify::classify_adapt_feature_threshold = 230

"Threshold for good features during adaptive 0-255"

Definition at line 404 of file classify.h.

int tesseract::Classify::classify_adapt_proto_threshold = 230

"Threshold for good protos during adaptive 0-255"

Definition at line 402 of file classify.h.

bool tesseract::Classify::classify_bln_numeric_mode = 0

"Assume the input is numbers [0-9]."

Definition at line 455 of file classify.h.

double tesseract::Classify::classify_char_norm_range = 0.2

"Character Normalization Range ..."

Definition at line 363 of file classify.h.

double tesseract::Classify::classify_character_fragments_garbage_certainty_threshold = -3.0

"Exclude fragments that do not match any whole character" " with at least this certainty"

Definition at line 410 of file classify.h.

int tesseract::Classify::classify_class_pruner_multiplier = 30

"Class Pruner Multiplier 0-255: "

Definition at line 422 of file classify.h.

int tesseract::Classify::classify_class_pruner_threshold = 229

"Class Pruner Threshold 0-255"

Definition at line 420 of file classify.h.

int tesseract::Classify::classify_cp_cutoff_strength = 7

"Class Pruner CutoffStrength: "

Definition at line 424 of file classify.h.

bool tesseract::Classify::classify_debug_character_fragments = FALSE

"Bring up graphical debugging windows for fragments training"

Definition at line 412 of file classify.h.

int tesseract::Classify::classify_debug_level = 0

"Classify debug level"

Definition at line 357 of file classify.h.

bool tesseract::Classify::classify_enable_adaptive_debugger = 0

"Enable match debugger"

Definition at line 377 of file classify.h.

bool tesseract::Classify::classify_enable_adaptive_matcher = 1

"Enable adaptive classifier"

Definition at line 372 of file classify.h.

bool tesseract::Classify::classify_enable_learning = true

"Enable adaptive classifier"

Definition at line 356 of file classify.h.

int tesseract::Classify::classify_integer_matcher_multiplier = 14

"Integer Matcher Multiplier 0-255: "

Definition at line 426 of file classify.h.

char* tesseract::Classify::classify_learn_debug_str = ""

"Class str to debug learning"

Definition at line 416 of file classify.h.

int tesseract::Classify::classify_learning_debug_level = 0

"Learning Debug Level: "

Definition at line 380 of file classify.h.

double tesseract::Classify::classify_max_norm_scale_x = 0.325

"Max char x-norm scale ..."

Definition at line 365 of file classify.h.

double tesseract::Classify::classify_max_norm_scale_y = 0.325

"Max char y-norm scale ..."

Definition at line 367 of file classify.h.

double tesseract::Classify::classify_min_norm_scale_x = 0.0

"Min char x-norm scale ..."

Definition at line 364 of file classify.h.

double tesseract::Classify::classify_min_norm_scale_y = 0.0

"Min char y-norm scale ..."

Definition at line 366 of file classify.h.

double tesseract::Classify::classify_misfit_junk_penalty = 0.0

"Penalty to apply when a non-alnum is vertically out of " "its expected textline position"

Definition at line 396 of file classify.h.

int tesseract::Classify::classify_norm_method = character

"Normalization Method ..."

Definition at line 361 of file classify.h.

bool tesseract::Classify::classify_save_adapted_templates = 0

"Save adapted templates to a file"

Definition at line 376 of file classify.h.

bool tesseract::Classify::classify_use_pre_adapted_templates = 0

"Use pre-adapted classifier templates"

Definition at line 374 of file classify.h.

bool tesseract::Classify::disable_character_fragments = TRUE

"Do not include character fragments in the" " results of the classifier"

Definition at line 407 of file classify.h.

bool tesseract::Classify::EnableLearning

Definition at line 439 of file classify.h.

FEATURE_DEFS_STRUCT tesseract::Classify::feature_defs_
protected

Definition at line 459 of file classify.h.

UnicityTable<FontInfo> tesseract::Classify::fontinfo_table_

Definition at line 443 of file classify.h.

UnicityTable<FontSet> tesseract::Classify::fontset_table_

Definition at line 451 of file classify.h.

int tesseract::Classify::il1_adaption_test = 0

"Dont adapt to i/I at beginning of word"

Definition at line 453 of file classify.h.

IntegerMatcher tesseract::Classify::im_
protected

Definition at line 455 of file classify.h.

double tesseract::Classify::matcher_avg_noise_size = 12.0

"Avg. noise blob length: "

Definition at line 386 of file classify.h.

double tesseract::Classify::matcher_bad_match_pad = 0.15

"Bad Match Pad (0-1)"

Definition at line 384 of file classify.h.

double tesseract::Classify::matcher_clustering_max_angle_delta = 0.015

"Maximum angle delta for prototype clustering"

Definition at line 393 of file classify.h.

int tesseract::Classify::matcher_debug_flags = 0

"Matcher Debug Flags"

Definition at line 379 of file classify.h.

int tesseract::Classify::matcher_debug_level = 0

"Matcher Debug Level"

Definition at line 378 of file classify.h.

bool tesseract::Classify::matcher_debug_separate_windows = FALSE

"Use two different windows for debugging the matching: " "One for the protos and one for the features."

Definition at line 415 of file classify.h.

double tesseract::Classify::matcher_good_threshold = 0.125

"Good Match (0-1)"

Definition at line 381 of file classify.h.

double tesseract::Classify::matcher_great_threshold = 0.0

"Great Match (0-1)"

Definition at line 382 of file classify.h.

int tesseract::Classify::matcher_min_examples_for_prototyping = 3

"Reliable Config Threshold"

Definition at line 389 of file classify.h.

double tesseract::Classify::matcher_perfect_threshold = 0.02

"Perfect Match (0-1)"

Definition at line 383 of file classify.h.

int tesseract::Classify::matcher_permanent_classes_min = 1

"Min # of permanent classes"

Definition at line 387 of file classify.h.

double tesseract::Classify::matcher_rating_margin = 0.1

"New template margin (0-1)"

Definition at line 385 of file classify.h.

int tesseract::Classify::matcher_sufficient_examples_for_prototyping = 5

"Enable adaption even if the ambiguities have not been seen"

Definition at line 391 of file classify.h.

NORM_PROTOS* tesseract::Classify::NormProtos

Definition at line 441 of file classify.h.

INT_TEMPLATES tesseract::Classify::PreTrainedTemplates

Definition at line 426 of file classify.h.

bool tesseract::Classify::prioritize_division = FALSE

"Prioritize blob division over chopping"

Definition at line 354 of file classify.h.

BIT_VECTOR tesseract::Classify::PrunedProtos

Definition at line 434 of file classify.h.

double tesseract::Classify::rating_scale = 1.5

"Rating scaling factor"

Definition at line 397 of file classify.h.

ShapeTable* tesseract::Classify::shape_table_
protected

Definition at line 464 of file classify.h.

BIT_VECTOR tesseract::Classify::TempProtoMask

Definition at line 438 of file classify.h.

bool tesseract::Classify::tess_bn_matching = 0

"Baseline Normalized Matching"

Definition at line 371 of file classify.h.

bool tesseract::Classify::tess_cn_matching = 0

"Character Normalized Matching"

Definition at line 370 of file classify.h.

double tesseract::Classify::tessedit_class_miss_scale = 0.00390625

"Scale factor for features not used"

Definition at line 400 of file classify.h.

int tesseract::Classify::tessedit_single_match = FALSE

"Top choice only from CP"

Definition at line 355 of file classify.h.


The documentation for this class was generated from the following files: