19 #ifndef TESSERACT_CLASSIFY_CLASSIFY_H__
20 #define TESSERACT_CLASSIFY_CLASSIFY_H__
41 static const int kUnknownFontinfoId = -1;
42 static const int kBlankFontinfoId = -2;
90 const uinT8* normalization_factors,
91 const uinT16* expected_num_features,
130 const char* correct_text,
WERD_RES *word);
152 const uinT8* norm_factors,
156 const TBOX& blob_box,
170 const uinT8* cn_factors,
177 double im_rating,
int feature_misses,
179 int blob_length,
const uinT8* cn_factors);
182 BLOB_CHOICE_LIST *Choices);
195 #ifndef GRAPHICS_DISABLED
237 int class_id,
int config_id)
const;
249 int int_result_config)
const;
288 BLOB_CHOICE_LIST *Choices,
297 uinT8* CharNormArray,
303 uinT8* PrunerNormArray,
304 uinT8* CharNormArray,
306 inT32 *FeatureOutlineIndex);
312 uinT8* char_norm_array,
313 uinT8* pruner_array);
322 int y_offset,
const TBOX &wbox);
326 uinT8* char_norm_array);
333 bool* pretrained_on,
int* shape_id);
354 "Prioritize blob division over chopping");
363 "Character Normalization Range ...");
374 "Use pre-adapted classifier templates");
376 "Save adapted templates to a file");
389 "Reliable Config Threshold");
391 "Enable adaption even if the ambiguities have not been seen");
393 "Maximum angle delta for prototype clustering");
395 "Penalty to apply when a non-alnum is vertically out of "
396 "its expected textline position");
400 "Scale factor for features not used");
402 "Threshold for good protos during adaptive 0-255");
404 "Threshold for good features during adaptive 0-255");
406 "Do not include character fragments in the"
407 " results of the classifier");
409 "Exclude fragments that do not match any whole character"
410 " with at least this certainty");
412 "Bring up graphical debugging windows for fragments training");
414 "Use two different windows for debugging the matching: "
415 "One for the protos and one for the features.");
420 "Class Pruner Threshold 0-255");
422 "Class Pruner Multiplier 0-255: ");
424 "Class Pruner CutoffStrength: ");
426 "Integer Matcher Multiplier 0-255: ");
455 "Assume the input is numbers [0-9].");
471 int AdaptiveMatcherCalls;
472 int BaselineClassifierCalls;
473 int CharNormClassifierCalls;
474 int AmbigClassifierCalls;
475 int NumWordsAdaptedTo;
476 int NumCharsAdaptedTo;
477 int NumBaselineClassesTried;
478 int NumCharNormClassesTried;
479 int NumAmbigClassesTried;
480 int NumClassesOutput;
481 int NumAdaptationsFailed;
487 bool FeaturesHaveBeenExtracted;
510 #endif // TESSERACT_CLASSIFY_CLASSIFY_H__
int CharNormClassifier(TBLOB *Blob, const DENORM &denorm, INT_TEMPLATES Templates, ADAPT_RESULTS *Results)
int classify_class_pruner_threshold
void ComputeIntCharNormArray(const FEATURE_STRUCT &norm_feature, uinT8 *char_norm_array)
ADAPT_TEMPLATES NewAdaptedTemplates(bool InitFromUnicharset)
double matcher_rating_margin
CP_RESULT_STRUCT CLASS_PRUNER_RESULTS[MAX_NUM_CLASSES]
void NormalizeOutlines(LIST Outlines, FLOAT32 *XScale, FLOAT32 *YScale)
void AdaptiveClassifier(TBLOB *Blob, const DENORM &denorm, BLOB_CHOICE_LIST *Choices, CLASS_PRUNER_RESULTS cp_results)
int classify_adapt_feature_threshold
double ComputeCorrectedRating(bool debug, int unichar_id, double cp_rating, double im_rating, int feature_misses, int bottom, int top, int blob_length, const uinT8 *cn_factors)
void DebugAdaptiveClassifier(TBLOB *Blob, const DENORM &denorm, ADAPT_RESULTS *Results)
UnicityTable< FontSet > fontset_table_
void InitAdaptedClass(TBLOB *Blob, const DENORM &denorm, CLASS_ID ClassId, int FontinfoId, ADAPT_CLASS Class, ADAPT_TEMPLATES Templates)
void GetAdaptThresholds(TWERD *Word, const DENORM &denorm, const WERD_CHOICE &BestChoice, const WERD_CHOICE &BestRawChoice, FLOAT32 Thresholds[])
NORM_PROTOS * ReadNormProtos(FILE *File, inT64 end_offset)
void MasterMatcher(INT_TEMPLATES templates, inT16 num_features, const INT_FEATURE_STRUCT *features, const uinT8 *norm_factors, ADAPT_CLASS *classes, int debug, int num_classes, const TBOX &blob_box, CLASS_PRUNER_RESULTS results, ADAPT_RESULTS *final_results)
void ReadNewCutoffs(FILE *CutoffFile, bool swap, inT64 end_offset, CLASS_CUTOFF_ARRAY Cutoffs)
#define BOOL_VAR_H(name, val, comment)
int classify_integer_matcher_multiplier
CLASS_ID GetClassToDebug(const char *Prompt, bool *adaptive_on, bool *pretrained_on, int *shape_id)
void LearnPieces(const char *filename, int start, int length, float threshold, CharSegmentationType segmentation, const char *correct_text, WERD_RES *word)
UnicityTable< FontSet > & get_fontset_table()
void RefreshDebugWindow(ScrollView **win, const char *msg, int y_offset, const TBOX &wbox)
uinT16 CLASS_CUTOFF_ARRAY[MAX_NUM_CLASSES]
void PrintAdaptedTemplates(FILE *File, ADAPT_TEMPLATES Templates)
double classify_max_norm_scale_y
void DisplayAdaptedChar(TBLOB *blob, const DENORM &denorm, INT_CLASS_STRUCT *int_class)
bool LooksLikeGarbage(const DENORM &denorm, TBLOB *blob)
void LearnWord(const char *filename, const char *rejmap, WERD_RES *word)
char * classify_learn_debug_str
bool classify_enable_adaptive_debugger
void SetAdaptiveThreshold(FLOAT32 Threshold)
int classify_adapt_proto_threshold
double matcher_clustering_max_angle_delta
bool TempConfigReliable(CLASS_ID class_id, const TEMP_CONFIG &config)
void ComputeIntFeatures(FEATURE_SET Features, INT_FEATURE_ARRAY IntFeatures)
bool classify_enable_adaptive_matcher
void InitAdaptiveClassifier(bool load_pre_trained_templates)
int ShapeIDToClassID(int shape_id) const
void EndAdaptiveClassifier()
ShapeTable * shape_table_
UNICHAR_ID * BaselineClassifier(TBLOB *Blob, const DENORM &denorm, ADAPT_TEMPLATES Templates, ADAPT_RESULTS *Results)
int GetFontinfoId(ADAPT_CLASS Class, uinT8 ConfigId)
void AddNewResult(ADAPT_RESULTS *results, CLASS_ID class_id, int shape_id, FLOAT32 rating, bool adapted, int config, int fontinfo_id, int fontinfo_id2)
ADAPT_TEMPLATES AdaptedTemplates
int matcher_permanent_classes_min
int CharNormTrainingSample(bool pruner_only, const TrainingSample &sample, GenericVector< ShapeRating > *results)
FEATURE_SET ExtractPicoFeatures(TBLOB *Blob)
double classify_char_norm_range
int PruneClasses(const INT_TEMPLATES_STRUCT *int_templates, int num_features, const INT_FEATURE_STRUCT *features, const uinT8 *normalization_factors, const uinT16 *expected_num_features, CP_RESULT_STRUCT *results)
#define double_VAR_H(name, val, comment)
int AdaptableWord(TWERD *Word, const WERD_CHOICE &BestChoiceWord, const WERD_CHOICE &RawChoiceWord)
void RemoveExtraPuncs(ADAPT_RESULTS *Results)
PROTO_ID MakeNewTempProtos(FEATURE_SET Features, int NumBadFeat, FEATURE_ID BadFeat[], INT_CLASS IClass, ADAPT_CLASS Class, BIT_VECTOR TempProtoMask)
#define STRING_VAR_H(name, val, comment)
double matcher_avg_noise_size
double matcher_good_threshold
void ClearCharNormArray(uinT8 *char_norm_array)
INT_TEMPLATES PreTrainedTemplates
void UpdateAmbigsGroup(CLASS_ID class_id, const DENORM &denorm, TBLOB *Blob)
double classify_character_fragments_garbage_certainty_threshold
bool disable_character_fragments
bool classify_bln_numeric_mode
UnicityTable< FontInfo > & get_fontinfo_table()
void WriteIntTemplates(FILE *File, INT_TEMPLATES Templates, const UNICHARSET &target_unicharset)
UnicityTable< FontInfo > fontinfo_table_
void ResetFeaturesHaveBeenExtracted()
void ConvertProto(PROTO Proto, int ProtoId, INT_CLASS Class)
int matcher_min_examples_for_prototyping
bool classify_enable_learning
int MakeNewTemporaryConfig(ADAPT_TEMPLATES Templates, CLASS_ID ClassId, int FontinfoId, int NumFeatures, INT_FEATURE_ARRAY Features, FEATURE_SET FloatFeatures)
INT_TEMPLATES ReadIntTemplates(FILE *File)
int tessedit_single_match
void AdaptToChar(TBLOB *Blob, const DENORM &denorm, CLASS_ID ClassId, int FontinfoId, FLOAT32 Threshold)
double classify_misfit_junk_penalty
int matcher_sufficient_examples_for_prototyping
int ClassAndConfigIDToFontOrShapeID(int class_id, int int_result_config) const
void RemoveBadMatches(ADAPT_RESULTS *Results)
int classify_learning_debug_level
FEATURE_SET ExtractOutlineFeatures(TBLOB *Blob)
INT_FEATURE_STRUCT INT_FEATURE_ARRAY[MAX_NUM_INT_FEATURES]
void PrintAdaptiveMatchResults(FILE *File, ADAPT_RESULTS *Results)
void ClassifyAsNoise(ADAPT_RESULTS *Results)
bool matcher_debug_separate_windows
void ExpandShapesAndApplyCorrections(ADAPT_CLASS *classes, bool debug, int class_id, int bottom, int top, float cp_rating, int blob_length, const uinT8 *cn_factors, INT_RESULT_STRUCT &int_result, ADAPT_RESULTS *final_results)
int GetAdaptiveFeatures(TBLOB *Blob, INT_FEATURE_ARRAY IntFeatures, FEATURE_SET *FloatFeatures)
bool AdaptiveClassifierIsFull()
#define INT_VAR_H(name, val, comment)
bool classify_use_pre_adapted_templates
FLOAT32 ComputeNormMatch(CLASS_ID ClassId, const FEATURE_STRUCT &feature, BOOL8 DebugMatch)
bool classify_save_adapted_templates
void WriteAdaptedTemplates(FILE *File, ADAPT_TEMPLATES Templates)
int classify_cp_cutoff_strength
void AdaptToPunc(TBLOB *Blob, const DENORM &denorm, CLASS_ID ClassId, int FontinfoId, FLOAT32 Threshold)
double classify_min_norm_scale_y
int GetBaselineFeatures(TBLOB *Blob, const DENORM &denorm, INT_TEMPLATES Templates, INT_FEATURE_ARRAY IntFeatures, uinT8 *CharNormArray, inT32 *BlobLength)
INT_TEMPLATES CreateIntTemplates(CLASSES FloatProtos, const UNICHARSET &target_unicharset)
void MakePermanent(ADAPT_TEMPLATES Templates, CLASS_ID ClassId, int ConfigId, const DENORM &denorm, TBLOB *Blob)
double matcher_perfect_threshold
void ShowBestMatchFor(TBLOB *Blob, const DENORM &denorm, CLASS_ID ClassId, int shape_id, BOOL8 AdaptiveOn, BOOL8 PreTrainedOn, ADAPT_RESULTS *Results)
void AmbigClassifier(TBLOB *Blob, const DENORM &denorm, INT_TEMPLATES Templates, ADAPT_CLASS *Classes, UNICHAR_ID *Ambiguities, ADAPT_RESULTS *Results)
const ShapeTable * shape_table() const
double classify_min_norm_scale_x
double tessedit_class_miss_scale
void ComputeCharNormArrays(FEATURE_STRUCT *norm_feature, INT_TEMPLATES_STRUCT *templates, uinT8 *char_norm_array, uinT8 *pruner_array)
void PrintAdaptiveStatistics(FILE *File)
int GetCharNormFeatures(TBLOB *Blob, const DENORM &denorm, INT_TEMPLATES Templates, INT_FEATURE_ARRAY IntFeatures, uinT8 *PrunerNormArray, uinT8 *CharNormArray, inT32 *BlobLength, inT32 *FeatureOutlineIndex)
int classify_class_pruner_multiplier
ADAPT_TEMPLATES ReadAdaptedTemplates(FILE *File)
double matcher_bad_match_pad
STRING ClassIDToDebugStr(const INT_TEMPLATES_STRUCT *templates, int class_id, int config_id) const
double matcher_great_threshold
void ConvertMatchesToChoices(const DENORM &denorm, const TBOX &box, ADAPT_RESULTS *Results, BLOB_CHOICE_LIST *Choices)
void ResetAdaptiveClassifierInternal()
void DoAdaptiveMatch(TBLOB *Blob, const DENORM &denorm, ADAPT_RESULTS *Results)
UNICHAR_ID * GetAmbiguities(TBLOB *Blob, const DENORM &denorm, CLASS_ID CorrectClass)
double classify_max_norm_scale_x
bool classify_debug_character_fragments
FEATURE_DEFS_STRUCT feature_defs_