68 #define ADAPT_TEMPLATE_SUFFIX ".a"
70 #define MAX_MATCHES 10
71 #define UNLIKELY_NUM_FEAT 200
73 #define MAX_ADAPTABLE_WERD_SIZE 40
75 #define ADAPTABLE_WERD_ADJUSTMENT (0.05)
77 #define Y_DIM_OFFSET (Y_SHIFT - BASELINE_Y_SHIFT)
79 #define WORST_POSSIBLE_RATING (1.0)
124 #define MarginalMatch(Rating) \
125 ((Rating) > matcher_great_threshold)
127 #define InitIntFX() (FeaturesHaveBeenExtracted = FALSE)
180 BLOB_CHOICE_LIST *Choices,
182 assert(Choices !=
NULL);
189 if (CPResults !=
NULL)
207 #ifndef GRAPHICS_DISABLED
212 NumClassesOutput += Choices->length();
213 if (Choices->length() == 0) {
215 tprintf (
"Empty classification!\n");
216 Choices =
new BLOB_CHOICE_LIST();
217 BLOB_CHOICE_IT temp_it;
218 temp_it.set_to_list(Choices);
229 int y_offset,
const TBOX &wbox) {
230 #ifndef GRAPHICS_DISABLED
231 const int kSampleSpaceWidth = 500;
233 *win =
new ScrollView(msg, 100, y_offset, kSampleSpaceWidth * 2, 200,
234 kSampleSpaceWidth * 2, 200,
true);
237 (*win)->Pen(64, 64, 64);
242 (*win)->ZoomToRectangle(wbox.
left(), wbox.
top(),
244 #endif // GRAPHICS_DISABLED
257 if (word_len == 0)
return;
259 float* thresholds =
NULL;
260 if (filename ==
NULL) {
271 tprintf(
"\n\nAdapting to word = %s\n",
273 thresholds =
new float[word_len];
278 char prev_map_char =
'0';
280 #ifndef GRAPHICS_DISABLED
282 if (learn_fragmented_word_debug_win_ !=
NULL) {
292 #endif // GRAPHICS_DISABLED
294 for (
int ch = 0; ch < word_len; ++ch) {
298 char rej_map_char = rejmap !=
NULL ? *rejmap++ :
'1';
301 float threshold = thresholds !=
NULL ? thresholds[ch] : 0.0f;
310 bool garbage =
false;
312 for (
int i = 0; i < start_blob; ++i) frag_blob = frag_blob->
next;
314 for (frag = 0; frag < word->
best_state[ch]; ++frag) {
318 frag_blob = frag_blob->
next;
325 for (frag = 0; frag < word->
best_state[ch]; ++frag) {
330 tokens[0].
string(), frag, word->
best_state[ch],
334 for (
int i = 0; i < tokens.
size(); i++) {
335 full_string += tokens[i];
336 if (i != tokens.
size() - 1)
380 prev_map_char = rej_map_char;
382 delete [] thresholds;
396 const char* correct_text,
WERD_RES *word) {
405 start, start + length - 1);
408 for (
int i = 0; i < start; ++i)
413 if (rotated_blob ==
NULL)
416 #ifndef GRAPHICS_DISABLED
422 learn_debug_win_->
Update();
427 blob->
plot(learn_fragments_debug_win_,
429 learn_fragments_debug_win_->
Update();
431 #endif // GRAPHICS_DISABLED
433 if (filename !=
NULL) {
445 tprintf(
"Adapting to char = %s, thr= %g font_id= %d\n",
449 AdaptToChar(rotated_blob, *denorm, class_id, font_id, threshold);
451 tprintf(
"Can't adapt to %s not in unicharset\n", correct_text);
453 if (rotated_blob != blob) {
484 File = fopen (Filename.
string(),
"wb");
486 cprintf (
"Unable to save adapted templates to %s!\n", Filename.
string());
488 cprintf (
"\nSaving adapted templates to %s ...", Filename.
string());
554 load_pre_trained_templates) {
564 tprintf(
"Error loading shape table!\n");
568 tprintf(
"Successfully loaded shape table!\n");
602 BaselineCutoffs[i] = 0;
611 File = fopen(Filename.
string(),
"rb");
616 cprintf(
"\nReading pre-adapted templates from %s ...\n",
626 BaselineCutoffs[i] = CharNormCutoffs[i];
638 tprintf(
"Resetting adaptive classifier (NumAdaptationsFailed=%d)\n",
639 NumAdaptationsFailed);
643 NumAdaptationsFailed = 0;
662 fprintf (File,
"\nADAPTIVE MATCHER STATISTICS:\n");
663 fprintf (File,
"\tNum blobs classified = %d\n", AdaptiveMatcherCalls);
664 fprintf (File,
"\tNum classes output = %d (Avg = %4.2f)\n",
666 ((AdaptiveMatcherCalls == 0) ? (0.0) :
667 ((
float) NumClassesOutput / AdaptiveMatcherCalls)));
668 fprintf (File,
"\t\tBaseline Classifier: %4d calls (%4.2f classes/call)\n",
669 BaselineClassifierCalls,
670 ((BaselineClassifierCalls == 0) ? (0.0) :
671 ((
float) NumBaselineClassesTried / BaselineClassifierCalls)));
672 fprintf (File,
"\t\tCharNorm Classifier: %4d calls (%4.2f classes/call)\n",
673 CharNormClassifierCalls,
674 ((CharNormClassifierCalls == 0) ? (0.0) :
675 ((
float) NumCharNormClassesTried / CharNormClassifierCalls)));
676 fprintf (File,
"\t\tAmbig Classifier: %4d calls (%4.2f classes/call)\n",
677 AmbigClassifierCalls,
678 ((AmbigClassifierCalls == 0) ? (0.0) :
679 ((
float) NumAmbigClassesTried / AmbigClassifierCalls)));
681 fprintf (File,
"\nADAPTIVE LEARNER STATISTICS:\n");
682 fprintf (File,
"\tNumber of words adapted to: %d\n", NumWordsAdaptedTo);
683 fprintf (File,
"\tNumber of chars adapted to: %d\n", NumCharsAdaptedTo);
786 BaselineCutoffs[ClassId] = CharNormCutoffs[ClassId];
790 for (Fid = 0; Fid < Features->
NumFeatures; Fid++) {
796 Proto = &(TempProto->
Proto);
822 cprintf (
"Added new class '%s' with class id %d and %d protos.\n",
870 *FloatFeatures = Features;
897 int BestChoiceLength = BestChoiceWord.
length();
898 float adaptable_score =
901 BestChoiceLength > 0 &&
902 BestChoiceLength == Word->
NumBlobs() &&
953 assert(Class !=
NULL);
962 if (NumFeatures <= 0)
968 for (
int cfg = 0; cfg < IClass->
NumConfigs; ++cfg) {
970 SET_BIT(MatchingFontConfigs, cfg);
976 NumFeatures, IntFeatures,
983 if (IntResult.
Rating <= Threshold) {
986 cprintf (
"Found good match to perm config %d = %4.1f%%.\n",
998 cprintf (
"Increasing reliability of temp config %d to %d.\n",
1009 cprintf (
"Found poor match to temp config %d = %4.1f%%.\n",
1020 if (NewTempConfigId >= 0 &&
1026 #ifndef GRAPHICS_DISABLED
1038 #ifndef GRAPHICS_DISABLED
1044 norm_array, &bloblength);
1045 delete [] norm_array;
1049 num_features, features,
1052 cprintf (
"Best match to temp config %d = %4.1f%%.\n",
1056 ConfigMask = 1 << IntResult.
Config;
1059 num_features, features,
1096 cprintf (
"Rejecting punc = %s (Alternatives = ",
1104 #ifndef SECURE_NAMES
1106 cprintf (
"Adapting to punc = %s, thr= %g\n",
1109 AdaptToChar(Blob, denorm, ClassId, FontinfoId, Threshold);
1156 static_cast<inT16>(config),
1157 static_cast<inT16>(fontinfo_id),
1158 static_cast<inT16>(fontinfo_id2) };
1161 (old_match && rating >= old_match->
rating))
1168 old_match->
rating = rating;
1172 if (rating < results->best_match.rating &&
1217 AmbigClassifierCalls++;
1220 NULL, CharNormArray,
1222 if (NumFeatures <= 0) {
1223 delete [] CharNormArray;
1233 while (*Ambiguities >= 0) {
1234 ClassId = *Ambiguities;
1239 NumFeatures, IntFeatures,
1246 IntResult, Results);
1249 NumAmbigClassesTried++;
1251 delete [] CharNormArray;
1260 const uinT8* norm_factors,
1264 const TBOX& blob_box,
1267 int top = blob_box.
top();
1268 int bottom = blob_box.
bottom();
1269 for (
int c = 0; c < num_classes; c++) {
1270 CLASS_ID class_id = results[c].Class;
1279 num_features, features,
1286 int_result, final_results);
1296 ADAPT_CLASS* classes,
bool debug,
int class_id,
int bottom,
int top,
1297 float cp_rating,
int blob_length,
const uinT8* cn_factors,
1300 int fontinfo_id = kBlankFontinfoId;
1301 int fontinfo_id2 = kBlankFontinfoId;
1302 if (classes !=
NULL) {
1310 if (int_result.
Config2 >= 0) {
1317 int shape_id = fontinfo_id;
1319 double min_rating = 0.0;
1320 for (
int c = 0; c < shape.
size(); ++c) {
1321 int unichar_id = shape[c].unichar_id;
1322 fontinfo_id = shape[c].font_ids[0];
1323 if (shape[c].font_ids.
size() > 1)
1324 fontinfo_id2 = shape[c].font_ids[1];
1325 else if (fontinfo_id2 != kBlankFontinfoId)
1330 bottom, top, blob_length,
1332 if (c == 0 || rating < min_rating)
1333 min_rating = rating;
1335 AddNewResult(final_results, unichar_id, shape_id, rating,
1337 fontinfo_id, fontinfo_id2);
1340 int_result.
Rating = min_rating;
1347 bottom, top, blob_length,
1352 fontinfo_id, fontinfo_id2);
1354 int_result.
Rating = rating;
1361 double cp_rating,
double im_rating,
1363 int bottom,
int top,
1365 const uinT8* cn_factors) {
1368 cn_factors[unichar_id]);
1370 double vertical_penalty = 0.0;
1375 int min_bottom, max_bottom, min_top, max_top;
1377 &min_top, &max_top);
1379 tprintf(
"top=%d, vs [%d, %d], bottom=%d, vs [%d, %d]\n",
1380 top, min_top, max_top, bottom, min_bottom, max_bottom);
1382 if (top < min_top || top > max_top ||
1383 bottom < min_bottom || bottom > max_bottom) {
1387 double result =cn_corrected + miss_penalty + vertical_penalty;
1391 tprintf(
"%s: %2.1f(CP%2.1f, IM%2.1f + CN%.2f(%d) + MP%2.1f + VP%2.1f)\n",
1396 (cn_corrected - im_rating) * 100.0,
1397 cn_factors[unichar_id],
1398 miss_penalty * 100.0,
1399 vertical_penalty * 100.0);
1433 BaselineClassifierCalls++;
1436 Blob, denorm, Templates->
Templates, IntFeatures, CharNormArray,
1438 if (NumFeatures <= 0) {
1439 delete [] CharNormArray;
1444 CharNormArray, BaselineCutoffs, Results->
CPResults);
1446 NumBaselineClassesTried += NumClasses;
1456 delete [] CharNormArray;
1462 return Templates->
Class[ClassId]->
1495 CharNormClassifierCalls++;
1500 uinT8* PrunerNormArray =
new uinT8[num_pruner_classes];
1502 PrunerNormArray, CharNormArray,
1504 if (NumFeatures <= 0) {
1505 delete [] CharNormArray;
1506 delete [] PrunerNormArray;
1510 NumClasses =
PruneClasses(Templates, NumFeatures, IntFeatures,
1518 NumCharNormClassesTried += NumClasses;
1521 MasterMatcher(Templates, NumFeatures, IntFeatures, CharNormArray,
1524 delete [] CharNormArray;
1525 delete [] PrunerNormArray;
1540 for (
int f = 0;
f < num_features; ++
f) {
1542 TBOX fbox(feature.
X, feature.
Y, feature.
X, feature.
Y);
1554 uinT8* pruner_norm_array =
new uinT8[num_pruner_classes];
1566 delete [] pruner_norm_array;
1569 for (
int i = 0; i < num_classes; ++i) {
1570 int class_id = adapt_results->
CPResults[i].Class;
1571 int shape_id = class_id;
1586 blob_box, adapt_results->
CPResults, adapt_results);
1588 for (
int i = 0; i < adapt_results->
NumMatches; i++) {
1594 delete [] char_norm_array;
1595 delete adapt_results;
1596 return num_features;
1620 Rating /= 1.0 + Rating;
1623 kBlankFontinfoId, kBlankFontinfoId);
1631 for (
int i = 0; i < results->
NumMatches; i++) {
1633 return &results->
match[i];
1643 kBlankFontinfoId, kBlankFontinfoId};
1645 return (entry ==
NULL) ? poor_result : *entry;
1677 BLOB_CHOICE_LIST *Choices) {
1678 assert(Choices !=
NULL);
1681 BLOB_CHOICE_IT temp_it;
1682 bool contains_nonfrag =
false;
1683 temp_it.set_to_list(Choices);
1684 int choices_length = 0;
1697 for (
int i = 0; i < Results->
NumMatches; i++) {
1703 if (temp_it.length()+1 == max_matches &&
1704 !contains_nonfrag && current_is_frag) {
1716 Rating = Certainty = next.
rating;
1720 inT16 min_xheight, max_xheight;
1722 &min_xheight, &max_xheight);
1724 fontinfo_id, fontinfo_id2,
1726 min_xheight, max_xheight, adapted));
1727 contains_nonfrag |= !current_is_frag;
1729 if (choices_length >= max_matches)
break;
1736 #ifndef GRAPHICS_DISABLED
1751 for (
int i = 0; i < Results->
NumMatches; i++) {
1755 const char *Prompt =
1756 "Left-click in IntegerMatch Window to continue or right click to debug...";
1759 bool adaptive_on =
true;
1760 bool pretrained_on =
true;
1762 const char* debug_mode;
1765 debug_mode =
"Adaptive Templates Only";
1766 else if (!adaptive_on)
1767 debug_mode =
"PreTrained Templates Only";
1769 debug_mode =
"All Templates";
1771 tprintf(
"Debugging class %d = %s in mode %s ...",
1777 pretrained_on, Results);
1780 &pretrained_on, &shape_id)) != 0);
1813 AdaptiveMatcherCalls++;
1921 Ambiguities[i] = -1;
1923 Ambiguities[0] = -1;
1961 uinT8* CharNormArray,
1962 inT32 *BlobLength) {
1965 if (!FeaturesHaveBeenExtracted) {
1967 CharNormFeatures, &FXInfo,
NULL);
1968 FeaturesHaveBeenExtracted =
TRUE;
1972 *BlobLength = FXInfo.
NumBL;
1976 for (Src = BaselineFeatures, End = Src + FXInfo.
NumBL, Dest = IntFeatures;
1981 *BlobLength = FXInfo.
NumBL;
1982 return FXInfo.
NumBL;
1986 FeaturesHaveBeenExtracted =
FALSE;
1992 BLOB_CHOICE_LIST *ratings =
new BLOB_CHOICE_LIST();
1994 BLOB_CHOICE_IT ratings_it(ratings);
1998 ratings, unicharset);
2000 for (ratings_it.mark_cycle_pt(); !ratings_it.cycled_list();
2001 ratings_it.forward()) {
2006 return (ratings_it.data()->certainty() <
2049 uinT8* PrunerNormArray,
2050 uinT8* CharNormArray,
2052 inT32 *FeatureOutlineArray) {
2058 if (!FeaturesHaveBeenExtracted) {
2060 CharNormFeatures, &FXInfo,
2061 FeatureOutlineIndex);
2062 FeaturesHaveBeenExtracted =
TRUE;
2066 *BlobLength = FXInfo.
NumBL;
2070 for (Src = CharNormFeatures, End = Src + FXInfo.
NumCN, Dest = IntFeatures;
2073 for (
int i = 0; FeatureOutlineArray && i < FXInfo.
NumCN; ++i) {
2074 FeatureOutlineArray[i] = FeatureOutlineIndex[i];
2086 *BlobLength = FXInfo.
NumBL;
2087 return (FXInfo.
NumCN);
2094 uinT8* char_norm_array,
2095 uinT8* pruner_array) {
2097 if (pruner_array !=
NULL) {
2102 templates->
NumClasses *
sizeof(pruner_array[0]));
2105 for (
int id = 0;
id < templates->
NumClasses; ++id) {
2108 for (
int config = 0; config < fs.
size; ++config) {
2110 for (
int c = 0; c < shape.
size(); ++c) {
2111 if (char_norm_array[shape[c].unichar_id] < pruner_array[
id])
2112 pruner_array[id] = char_norm_array[shape[c].unichar_id];
2148 int MaxProtoId, OldMaxProtoId;
2161 Class = Templates->
Class[ClassId];
2164 ++NumAdaptationsFailed;
2166 cprintf(
"Cannot make new temporary config: maximum number exceeded.\n");
2173 BlobLength, NumFeatures, Features,
2179 for (i = 0; i < NumOldProtos; i++)
2183 BlobLength, NumFeatures, Features,
2191 ++NumAdaptationsFailed;
2193 cprintf(
"Cannot make new temp protos: maximum number exceeded.\n");
2204 cprintf(
"Making new temp config %d fontinfo id %d"
2205 " using %d old and %d new protos.\n",
2207 NumOldProtos, MaxProtoId - OldMaxProtoId);
2250 for (ProtoStart = BadFeat, LastBad = ProtoStart + NumBadFeat;
2251 ProtoStart < LastBad; ProtoStart = ProtoEnd) {
2252 F1 = Features->
Features[*ProtoStart];
2257 for (ProtoEnd = ProtoStart + 1,
2261 F2 = Features->
Features[*ProtoEnd];
2266 AngleDelta = fabs(A1 - A2);
2267 if (AngleDelta > 0.5)
2268 AngleDelta = 1.0 - AngleDelta;
2271 fabs(X1 - X2) > SegmentLength ||
2272 fabs(Y1 - Y2) > SegmentLength)
2276 F2 = Features->
Features[*(ProtoEnd - 1)];
2286 Proto = &(TempProto->
Proto);
2291 Proto->
Length = SegmentLength;
2293 Proto->
X = (X1 + X2) / 2.0;
2333 Class = Templates->
Class[ClassId];
2344 "PERM_CONFIG_STRUCT");
2360 tprintf(
"Making config %d for %s (ClassId %d) permanent:"
2361 " fontinfo id %d, ambiguities '",
2362 ConfigId,
getDict().getUnicharset().debug_str(ClassId).
string(),
2365 *AmbigsPointer >= 0; ++AmbigsPointer)
2425 for (
int i = 0; i < Results->
NumMatches; ++i) {
2426 tprintf(
"%s(%d), shape %d, %.2f ",
2453 static const char* romans =
"i v x I V X";
2464 for (Next = NextGood = 0; Next < Results->
NumMatches; Next++) {
2465 if (Results->
match[Next].
rating <= BadMatchThreshold) {
2470 Results->
match[NextGood++] = Results->
match[Next];
2472 scored_one.
rating >= BadMatchThreshold) {
2473 Results->
match[NextGood] = scored_one;
2477 scored_zero.
rating >= BadMatchThreshold) {
2478 Results->
match[NextGood] = scored_zero;
2485 for (Next = NextGood = 0; Next < Results->
NumMatches; Next++) {
2486 if (Results->
match[Next].
rating <= BadMatchThreshold)
2487 Results->
match[NextGood++] = Results->
match[Next];
2508 static char punc_chars[] =
". , ; : / ` ~ ' - = \\ | \" ! _ ^";
2509 static char digit_chars[] =
"0 1 2 3 4 5 6 7 8 9";
2513 for (Next = NextGood = 0; Next < Results->
NumMatches; Next++) {
2515 if (strstr(punc_chars,
2518 Results->
match[NextGood++] = match;
2521 if (strstr(digit_chars,
2523 if (digit_count < 1)
2524 Results->
match[NextGood++] = match;
2527 Results->
match[NextGood++] = match;
2551 ClipToRange<int>(255 * Threshold, 0, 255));
2553 ClipToRange<int>(255 * Threshold, 0, 255));
2586 int NumCNFeatures = 0, NumBLFeatures = 0;
2591 static int next_config = -1;
2593 if (PreTrainedOn) next_config = -1;
2598 cprintf (
"%d is not a legal class id!!\n", ClassId);
2609 if (PreTrainedOn && shape_id >= 0) {
2611 tprintf(
"No built-in templates for class/shape %d\n", shape_id);
2614 CNFeatures,
NULL, CNAdjust,
2616 if (NumCNFeatures <= 0) {
2617 tprintf(
"Illegal blob (char norm features)!\n");
2622 NumCNFeatures, CNFeatures,
2629 0, BlobLength, CNAdjust,
2637 tprintf(
"Invalid adapted class id: %d\n", ClassId);
2641 tprintf(
"No AD templates for class %d = %s\n",
2647 BLFeatures, BLAdjust,
2649 if (NumBLFeatures <= 0)
2650 tprintf(
"Illegal blob (baseline features)!\n");
2655 NumBLFeatures, BLFeatures,
2670 if (next_config < 0) {
2671 ConfigMask = 1 << BLResult.
Config;
2674 ConfigMask = 1 << next_config;
2680 tprintf(
"Adaptive Class ID: %d\n", ClassId);
2683 NumBLFeatures, BLFeatures,
2693 }
else if (shape_id >= 0) {
2694 ConfigMask = 1 << CNResult.
Config;
2697 tprintf(
"Static Shape ID: %d\n", shape_id);
2701 NumCNFeatures, CNFeatures,
2709 0, BlobLength, CNAdjust,
2721 int class_id,
int config_id)
const {
2729 return class_string;
2734 int int_result_config)
const {
2737 if (font_set_id < 0)
2738 return kBlankFontinfoId;
2741 return fs.
configs[int_result_config];
2751 for (
int config = 0; config < fs.
size; ++config) {
2752 if (fs.
configs[config] == shape_id)
2756 tprintf(
"Shape %d not found\n", shape_id);
2765 tprintf(
"NumTimesSeen for config of %s is %d\n",
2766 getDict().getUnicharset().debug_str(class_id).
string(),
2778 int ambigs_size = (ambigs ==
NULL) ? 0 : ambigs->
size();
2779 for (
int ambig = 0; ambig < ambigs_size; ++ambig) {
2781 assert(ambig_class !=
NULL);
2786 tprintf(
"Ambig %s has not been seen enough times,"
2787 " not making config for %s permanent\n",
2788 getDict().getUnicharset().debug_str(
2789 (*ambigs)[ambig]).
string(),
2790 getDict().getUnicharset().debug_str(class_id).
string());
2803 int ambigs_size = (ambigs ==
NULL) ? 0 : ambigs->
size();
2805 tprintf(
"Running UpdateAmbigsGroup for %s class_id=%d\n",
2806 getDict().getUnicharset().debug_str(class_id).
string(), class_id);
2808 for (
int ambig = 0; ambig < ambigs_size; ++ambig) {
2809 CLASS_ID ambig_class_id = (*ambigs)[ambig];
2817 tprintf(
"Making config %d of %s permanent\n", cfg,
2818 getDict().getUnicharset().debug_str(
2819 ambig_class_id).
string());
int CharNormClassifier(TBLOB *Blob, const DENORM &denorm, INT_TEMPLATES Templates, ADAPT_RESULTS *Results)
#define reset_bit(array, bit)
float cn_feature(int index) const
void SettupStopperPass2()
Sets up stopper variables in preparation for the second pass.
#define IsEmptyAdaptedClass(Class)
#define WORST_POSSIBLE_RATING
void ComputeIntCharNormArray(const FEATURE_STRUCT &norm_feature, uinT8 *char_norm_array)
ADAPT_TEMPLATES NewAdaptedTemplates(bool InitFromUnicharset)
bool get_isalpha(UNICHAR_ID unichar_id) const
double matcher_rating_margin
#define MarginalMatch(Rating)
CP_RESULT_STRUCT CLASS_PRUNER_RESULTS[MAX_NUM_CLASSES]
void EndDangerousAmbigs()
#define LENGTH_COMPRESSION
STRING DebugStr(int shape_id) const
#define ConfigIsPermanent(Class, ConfigId)
void ConvertConfig(BIT_VECTOR Config, int ConfigId, INT_CLASS Class)
FLOAT32 ActualOutlineLength(FEATURE Feature)
const char *const id_to_unichar(UNICHAR_ID id) const
void AdaptiveClassifier(TBLOB *Blob, const DENORM &denorm, BLOB_CHOICE_LIST *Choices, CLASS_PRUNER_RESULTS cp_results)
int classify_adapt_feature_threshold
int CompareByRating(const void *arg1, const void *arg2)
const UNICHAR_ID unichar_to_id(const char *const unichar_repr) const
double ComputeCorrectedRating(bool debug, int unichar_id, double cp_rating, double im_rating, int feature_misses, int bottom, int top, int blob_length, const uinT8 *cn_factors)
void DebugAdaptiveClassifier(TBLOB *Blob, const DENORM &denorm, ADAPT_RESULTS *Results)
const UnicharIdVector * AmbigsForAdaption(UNICHAR_ID unichar_id) const
UnicityTable< FontSet > fontset_table_
void InitAdaptedClass(TBLOB *Blob, const DENORM &denorm, CLASS_ID ClassId, int FontinfoId, ADAPT_CLASS Class, ADAPT_TEMPLATES Templates)
const STRING debug_string() const
const int kBlnBaselineOffset
void GetAdaptThresholds(TWERD *Word, const DENORM &denorm, const WERD_CHOICE &BestChoice, const WERD_CHOICE &BestRawChoice, FLOAT32 Thresholds[])
void free_adapted_templates(ADAPT_TEMPLATES templates)
NORM_PROTOS * ReadNormProtos(FILE *File, inT64 end_offset)
#define ClassForClassId(T, c)
void MasterMatcher(INT_TEMPLATES templates, inT16 num_features, const INT_FEATURE_STRUCT *features, const uinT8 *norm_factors, ADAPT_CLASS *classes, int debug, int num_classes, const TBOX &blob_box, CLASS_PRUNER_RESULTS results, ADAPT_RESULTS *final_results)
bool XHeightRange(int unichar_id, const UNICHARSET &unicharset, const TBOX &bbox, inT16 *min_xht, inT16 *max_xht) const
int MaxNumUnichars() const
void UpdateMatchDisplay()
void ReadNewCutoffs(FILE *CutoffFile, bool swap, inT64 end_offset, CLASS_CUTOFF_ARRAY Cutoffs)
const FontInfo * fontinfo
void FreeFeature(FEATURE Feature)
int classify_integer_matcher_multiplier
CLASS_ID GetClassToDebug(const char *Prompt, bool *adaptive_on, bool *pretrained_on, int *shape_id)
void LearnPieces(const char *filename, int start, int length, float threshold, CharSegmentationType segmentation, const char *correct_text, WERD_RES *word)
#define zero_all_bits(array, length)
void RefreshDebugWindow(ScrollView **win, const char *msg, int y_offset, const TBOX &wbox)
void PrintAdaptedTemplates(FILE *File, ADAPT_TEMPLATES Templates)
#define PermConfigFor(Class, ConfigId)
void DisplayAdaptedChar(TBLOB *blob, const DENORM &denorm, INT_CLASS_STRUCT *int_class)
const UnicharIdVector * ReverseAmbigsForAdaption(UNICHAR_ID unichar_id) const
bool LooksLikeGarbage(const DENORM &denorm, TBLOB *blob)
void break_pieces(TBLOB *blobs, SEAMS seams, inT16 start, inT16 end)
void LearnWord(const char *filename, const char *rejmap, WERD_RES *word)
int AddIntConfig(INT_CLASS Class)
FILE * GetDataFilePtr() const
char * classify_learn_debug_str
#define WordsInVectorOfSize(NumBits)
bool classify_enable_adaptive_debugger
const Shape & GetShape(int shape_id) const
void SetAdaptiveThreshold(FLOAT32 Threshold)
int classify_adapt_proto_threshold
ScoredClass ScoredUnichar(ADAPT_RESULTS *results, UNICHAR_ID id)
LIST delete_d(LIST list, void *key, int_compare is_equal)
LIST push(LIST list, void *element)
double matcher_clustering_max_angle_delta
ADAPT_TEMPLATES Templates
const UNICHARSET & getUnicharset() const
TEMP_PROTO NewTempProto()
bool TempConfigReliable(CLASS_ID class_id, const TEMP_CONFIG &config)
GenericVector< int > best_state
TessdataManager tessdata_manager
STRING language_data_path_prefix
void SetCharNormMatch(int integer_matcher_multiplier)
void ComputeIntFeatures(FEATURE_SET Features, INT_FEATURE_ARRAY IntFeatures)
bool classify_enable_adaptive_matcher
void InitMatcherRatings(register FLOAT32 *Rating)
void InitAdaptiveClassifier(bool load_pre_trained_templates)
void FreeFeatureSet(FEATURE_SET FeatureSet)
bool get_isdigit(UNICHAR_ID unichar_id) const
void SetAdaptiveThreshold(FLOAT32 Threshold)
char window_wait(ScrollView *win)
STRING debug_str(UNICHAR_ID id) const
void get_top_bottom(UNICHAR_ID unichar_id, int *min_bottom, int *max_bottom, int *min_top, int *max_top) const
int ShapeIDToClassID(int shape_id) const
#define test_bit(array, bit)
void EndAdaptiveClassifier()
ShapeTable * shape_table_
UNICHAR_ID * BaselineClassifier(TBLOB *Blob, const DENORM &denorm, ADAPT_TEMPLATES Templates, ADAPT_RESULTS *Results)
int GetFontinfoId(ADAPT_CLASS Class, uinT8 ConfigId)
void join_pieces(TBLOB *piece_blobs, SEAMS seams, inT16 start, inT16 end)
void AddNewResult(ADAPT_RESULTS *results, CLASS_ID class_id, int shape_id, FLOAT32 rating, bool adapted, int config, int fontinfo_id, int fontinfo_id2)
ADAPT_TEMPLATES AdaptedTemplates
int matcher_permanent_classes_min
PERM_CONFIG_STRUCT * PERM_CONFIG
void free_int_templates(INT_TEMPLATES templates)
void Init(tesseract::IntParam *classify_debug_level, int classify_integer_matcher_multiplier)
int CharNormTrainingSample(bool pruner_only, const TrainingSample &sample, GenericVector< ShapeRating > *results)
FEATURE_SET ExtractPicoFeatures(TBLOB *Blob)
int FindBadFeatures(INT_CLASS ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask, uinT16 BlobLength, inT16 NumFeatures, INT_FEATURE_ARRAY Features, FEATURE_ID *FeatureArray, int AdaptFeatureThreshold, int Debug)
bool SeekToStart(TessdataType tessdata_type)
int PruneClasses(const INT_TEMPLATES_STRUCT *int_templates, int num_features, const INT_FEATURE_STRUCT *features, const uinT8 *normalization_factors, const uinT16 *expected_num_features, CP_RESULT_STRUCT *results)
void FreeBitVector(BIT_VECTOR BitVector)
FEATURE NewFeature(const FEATURE_DESC_STRUCT *FeatureDesc)
int AdaptableWord(TWERD *Word, const WERD_CHOICE &BestChoiceWord, const WERD_CHOICE &RawChoiceWord)
void RemoveExtraPuncs(ADAPT_RESULTS *Results)
#define set_all_bits(array, length)
PROTO_ID MakeNewTempProtos(FEATURE_SET Features, int NumBadFeat, FEATURE_ID BadFeat[], INT_CLASS IClass, ADAPT_CLASS Class, BIT_VECTOR TempProtoMask)
#define MAX_ADAPTABLE_WERD_SIZE
float ApplyCNCorrection(float rating, int blob_length, int normalization_factor)
#define copy_all_bits(source, dest, length)
double matcher_avg_noise_size
TEMP_CONFIG NewTempConfig(int MaxProtoId, int FontinfoId)
double matcher_good_threshold
#define MakeProtoPermanent(Class, ProtoId)
void ClearCharNormArray(uinT8 *char_norm_array)
FLOAT32 CurrentBestChoiceAdjustFactor()
Returns the adjustment factor for the best choice for the current word.
#define MakeConfigPermanent(Class, ConfigId)
void AddProtoToClassPruner(PROTO Proto, CLASS_ID ClassId, INT_TEMPLATES Templates)
TEMP_PROTO_STRUCT * TEMP_PROTO
void Match(INT_CLASS ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask, inT16 NumFeatures, const INT_FEATURE_STRUCT *Features, INT_RESULT Result, int AdaptFeatureThreshold, int Debug, bool SeparateDebugWindows)
ADAPT_CLASS Class[MAX_NUM_CLASSES]
INT_TEMPLATES PreTrainedTemplates
void UpdateAmbigsGroup(CLASS_ID class_id, const DENORM &denorm, TBLOB *Blob)
double classify_character_fragments_garbage_certainty_threshold
#define ADAPTABLE_WERD_ADJUSTMENT
void FreeTempProto(void *arg)
int get_script(UNICHAR_ID unichar_id) const
bool AlternativeChoicesWorseThan(FLOAT32 Threshold)
#define TempConfigFor(Class, ConfigId)
bool PiecesAllNatural(int start, int count) const
bool disable_character_fragments
#define MAX_NUM_INT_FEATURES
bool classify_bln_numeric_mode
void print_ratings_list(const char *msg, BLOB_CHOICE_LIST *ratings, const UNICHARSET ¤t_unicharset)
static int SortDescendingRating(const void *t1, const void *t2)
const CHAR_FRAGMENT * get_fragment(UNICHAR_ID unichar_id) const
const char * string() const
UnicityTable< FontInfo > fontinfo_table_
bool CurrentBestChoiceIs(const WERD_CHOICE &WordChoice)
Returns true if WordChoice is the same as the current best choice.
int MakeTempProtoPerm(void *item1, void *item2)
void ResetFeaturesHaveBeenExtracted()
void ConvertProto(PROTO Proto, int ProtoId, INT_CLASS Class)
#define UnusedClassIdIn(T, c)
int matcher_min_examples_for_prototyping
void FreeTempConfig(TEMP_CONFIG Config)
bool classify_enable_learning
ScoredClass * FindScoredUnichar(ADAPT_RESULTS *results, UNICHAR_ID id)
int MakeNewTemporaryConfig(ADAPT_TEMPLATES Templates, CLASS_ID ClassId, int FontinfoId, int NumFeatures, INT_FEATURE_ARRAY Features, FEATURE_SET FloatFeatures)
INT_TEMPLATES ReadIntTemplates(FILE *File)
void FindClassifierErrors(FLOAT32 MinRating, FLOAT32 MaxRating, FLOAT32 RatingMargin, FLOAT32 Thresholds[])
int tessedit_single_match
bool eq(UNICHAR_ID unichar_id, const char *const unichar_repr) const
void AdaptToChar(TBLOB *Blob, const DENORM &denorm, CLASS_ID ClassId, int FontinfoId, FLOAT32 Threshold)
bool contains_unichar(const char *const unichar_repr) const
inT64 GetEndOffset(TessdataType tessdata_type) const
DLLSYM void tprintf(const char *format,...)
double classify_misfit_junk_penalty
INT_CLASS Class[MAX_NUM_CLASSES]
void * Emalloc(size_t Size)
void SettupStopperPass1()
Sets up stopper variables in preparation for the first pass.
int matcher_sufficient_examples_for_prototyping
TBOX bounding_box() const
int ClassAndConfigIDToFontOrShapeID(int class_id, int int_result_config) const
TBOX bounding_box() const
void RemoveBadMatches(ADAPT_RESULTS *Results)
bool DeSerialize(bool swap, FILE *fp)
int FindGoodProtos(INT_CLASS ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask, uinT16 BlobLength, inT16 NumFeatures, INT_FEATURE_ARRAY Features, PROTO_ID *ProtoArray, int AdaptProtoThreshold, int Debug)
int classify_learning_debug_level
#define UNLIKELY_NUM_FEAT
#define ADAPT_TEMPLATE_SUFFIX
FEATURE_SET ExtractOutlineFeatures(TBLOB *Blob)
INT_FEATURE_STRUCT INT_FEATURE_ARRAY[MAX_NUM_INT_FEATURES]
BIT_VECTOR NewBitVector(int NumBits)
void plot(ScrollView *window)
void PrintAdaptiveMatchResults(FILE *File, ADAPT_RESULTS *Results)
void ClassifyAsNoise(ADAPT_RESULTS *Results)
GenericVector< STRING > correct_text
bool matcher_debug_separate_windows
bool get_enabled(UNICHAR_ID unichar_id) const
int ExtractIntFeat(TBLOB *Blob, const DENORM &denorm, INT_FEATURE_ARRAY BLFeat, INT_FEATURE_ARRAY CNFeat, INT_FX_RESULT_STRUCT *Results, inT32 *FeatureOutlineArray)
void ExpandShapesAndApplyCorrections(ADAPT_CLASS *classes, bool debug, int class_id, int bottom, int top, float cp_rating, int blob_length, const uinT8 *cn_factors, INT_RESULT_STRUCT &int_result, ADAPT_RESULTS *final_results)
void * alloc_struct(inT32 count, const char *)
#define PRINT_PROTO_MATCHES
bool use_ambigs_for_adaption
#define IncreaseConfidence(TempConfig)
int GetAdaptiveFeatures(TBLOB *Blob, INT_FEATURE_ARRAY IntFeatures, FEATURE_SET *FloatFeatures)
int AddIntProto(INT_CLASS Class)
#define PRINT_MATCH_SUMMARY
bool classify_use_pre_adapted_templates
double segment_penalty_dict_case_ok
void AddLargeSpeckleTo(BLOB_CHOICE_LIST *Choices)
bool classify_save_adapted_templates
ScoredClass match[MAX_NUM_CLASSES]
void WriteAdaptedTemplates(FILE *File, ADAPT_TEMPLATES Templates)
void AdaptToPunc(TBLOB *Blob, const DENORM &denorm, CLASS_ID ClassId, int FontinfoId, FLOAT32 Threshold)
#define GetPicoFeatureLength()
#define SET_BIT(array, bit)
int GetBaselineFeatures(TBLOB *Blob, const DENORM &denorm, INT_TEMPLATES Templates, INT_FEATURE_ARRAY IntFeatures, uinT8 *CharNormArray, inT32 *BlobLength)
void MakePermanent(ADAPT_TEMPLATES Templates, CLASS_ID ClassId, int ConfigId, const DENORM &denorm, TBLOB *Blob)
double matcher_perfect_threshold
void ShowBestMatchFor(TBLOB *Blob, const DENORM &denorm, CLASS_ID ClassId, int shape_id, BOOL8 AdaptiveOn, BOOL8 PreTrainedOn, ADAPT_RESULTS *Results)
void AmbigClassifier(TBLOB *Blob, const DENORM &denorm, INT_TEMPLATES Templates, ADAPT_CLASS *Classes, UNICHAR_ID *Ambiguities, ADAPT_RESULTS *Results)
void FillABC(PROTO Proto)
void LearnBlob(const FEATURE_DEFS_STRUCT &FeatureDefs, const STRING &filename, TBLOB *Blob, const DENORM &denorm, const char *BlobText)
CLASS_PRUNER_RESULTS CPResults
double tessedit_class_miss_scale
void ComputeCharNormArrays(FEATURE_STRUCT *norm_feature, INT_TEMPLATES_STRUCT *templates, uinT8 *char_norm_array, uinT8 *pruner_array)
void PrintAdaptiveStatistics(FILE *File)
void cprintf(const char *format,...)
void plot(ScrollView *window, ScrollView::Color color, ScrollView::Color child_color)
int GetCharNormFeatures(TBLOB *Blob, const DENORM &denorm, INT_TEMPLATES Templates, INT_FEATURE_ARRAY IntFeatures, uinT8 *PrunerNormArray, uinT8 *CharNormArray, inT32 *BlobLength, inT32 *FeatureOutlineIndex)
void AddProtoToProtoPruner(PROTO Proto, int ProtoId, INT_CLASS Class, bool debug)
TBLOB * ClassifyNormalizeIfNeeded(const DENORM **denorm) const
const UnicharAmbigs & getUnicharAmbigs()
#define PRINT_FEATURE_MATCHES
ADAPT_TEMPLATES ReadAdaptedTemplates(FILE *File)
double matcher_bad_match_pad
const INT_FEATURE_STRUCT * features() const
STRING ClassIDToDebugStr(const INT_TEMPLATES_STRUCT *templates, int class_id, int config_id) const
void ConvertMatchesToChoices(const DENORM &denorm, const TBOX &box, ADAPT_RESULTS *Results, BLOB_CHOICE_LIST *Choices)
void ResetAdaptiveClassifierInternal()
void DoAdaptiveMatch(TBLOB *Blob, const DENORM &denorm, ADAPT_RESULTS *Results)
UNICHAR_ID * GetAmbiguities(TBLOB *Blob, const DENORM &denorm, CLASS_ID CorrectClass)
BOOL8 LargeSpeckle(TBLOB *blob)
bool classify_debug_character_fragments
WERD_CHOICE * best_choice
const FEATURE_DESC_STRUCT CharNormDesc
FEATURE_DEFS_STRUCT feature_defs_