Tesseract  3.02
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
intmatcher.h
Go to the documentation of this file.
1 /******************************************************************************
2  ** Filename: intmatcher.h
3  ** Purpose: Interface to high level generic classifier routines.
4  ** Author: Robert Moss
5  ** History: Wed Feb 13 15:24:15 MST 1991, RWM, Created.
6  **
7  ** (c) Copyright Hewlett-Packard Company, 1988.
8  ** Licensed under the Apache License, Version 2.0 (the "License");
9  ** you may not use this file except in compliance with the License.
10  ** You may obtain a copy of the License at
11  ** http://www.apache.org/licenses/LICENSE-2.0
12  ** Unless required by applicable law or agreed to in writing, software
13  ** distributed under the License is distributed on an "AS IS" BASIS,
14  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  ** See the License for the specific language governing permissions and
16  ** limitations under the License.
17  ******************************************************************************/
18 #ifndef INTMATCHER_H
19 #define INTMATCHER_H
20 
21 #include "params.h"
22 
23 // Character fragments could be present in the trained templaes
24 // but turned on/off on the language-by-language basis or depending
25 // on particular properties of the corpus (e.g. when we expect the
26 // images to have low exposure).
28  "Do not include character fragments in the"
29  " results of the classifier");
30 
32  "Integer Matcher Multiplier 0-255: ");
33 
34 
38 #include "intproto.h"
39 #include "cutoffs.h"
40 
46 };
47 
49 
50 
55 };
56 
58 
59 /*----------------------------------------------------------------------------
60  Variables
61 -----------------------------------------------------------------------------*/
62 
64  "Threshold for good protos during adaptive 0-255: ");
65 
67  "Threshold for good features during adaptive 0-255: ");
68 
73 #define SE_TABLE_BITS 9
74 #define SE_TABLE_SIZE 512
75 
80 
81  void Clear(const INT_CLASS class_template);
82  void ClearFeatureEvidence(const INT_CLASS class_template);
83  void NormalizeSums(INT_CLASS ClassTemplate, inT16 NumFeatures,
84  inT32 used_features);
86  INT_CLASS ClassTemplate, BIT_VECTOR ConfigMask, inT16 NumFeatures);
87 };
88 
89 
91  public:
92  // Integer Matcher Theta Fudge (0-255).
93  static const int kIntThetaFudge = 128;
94  // Bits in Similarity to Evidence Lookup (8-9).
95  static const int kEvidenceTableBits = 9;
96  // Integer Evidence Truncation Bits (8-14).
97  static const int kIntEvidenceTruncBits = 14;
98  // Similarity to Evidence Table Exponential Multiplier.
99  static const float kSEExponentialMultiplier;
100  // Center of Similarity Curve.
101  static const float kSimilarityCenter;
102 
103  IntegerMatcher() : classify_debug_level_(0) {}
104 
105  void Init(tesseract::IntParam *classify_debug_level,
107 
108  void SetBaseLineMatch();
109  void SetCharNormMatch(int integer_matcher_multiplier);
110 
111  void Match(INT_CLASS ClassTemplate,
112  BIT_VECTOR ProtoMask,
113  BIT_VECTOR ConfigMask,
114  inT16 NumFeatures,
115  const INT_FEATURE_STRUCT* Features,
116  INT_RESULT Result,
117  int AdaptFeatureThreshold,
118  int Debug,
119  bool SeparateDebugWindows);
120 
121  // Applies the CN normalization factor to the given rating and returns
122  // the modified rating.
123  float ApplyCNCorrection(float rating, int blob_length,
124  int normalization_factor);
125 
126  int FindGoodProtos(INT_CLASS ClassTemplate,
127  BIT_VECTOR ProtoMask,
128  BIT_VECTOR ConfigMask,
129  uinT16 BlobLength,
130  inT16 NumFeatures,
131  INT_FEATURE_ARRAY Features,
132  PROTO_ID *ProtoArray,
133  int AdaptProtoThreshold,
134  int Debug);
135 
136  int FindBadFeatures(INT_CLASS ClassTemplate,
137  BIT_VECTOR ProtoMask,
138  BIT_VECTOR ConfigMask,
139  uinT16 BlobLength,
140  inT16 NumFeatures,
141  INT_FEATURE_ARRAY Features,
142  FEATURE_ID *FeatureArray,
143  int AdaptFeatureThreshold,
144  int Debug);
145 
146  private:
147  int UpdateTablesForFeature(
148  INT_CLASS ClassTemplate,
149  BIT_VECTOR ProtoMask,
150  BIT_VECTOR ConfigMask,
151  int FeatureNum,
152  const INT_FEATURE_STRUCT* Feature,
153  ScratchEvidence *evidence,
154  int Debug);
155 
156  int FindBestMatch(INT_CLASS ClassTemplate,
157  const ScratchEvidence &tables,
158  INT_RESULT Result);
159 
160 #ifndef GRAPHICS_DISABLED
161  void DebugFeatureProtoError(
162  INT_CLASS ClassTemplate,
163  BIT_VECTOR ProtoMask,
164  BIT_VECTOR ConfigMask,
165  const ScratchEvidence &tables,
166  inT16 NumFeatures,
167  int Debug);
168 
169  void DisplayProtoDebugInfo(
170  INT_CLASS ClassTemplate,
171  BIT_VECTOR ProtoMask,
172  BIT_VECTOR ConfigMask,
173  const ScratchEvidence &tables,
174  bool SeparateDebugWindows);
175 
176  void DisplayFeatureDebugInfo(
177  INT_CLASS ClassTemplate,
178  BIT_VECTOR ProtoMask,
179  BIT_VECTOR ConfigMask,
180  inT16 NumFeatures,
181  const INT_FEATURE_STRUCT* Features,
182  int AdaptFeatureThreshold,
183  int Debug,
184  bool SeparateDebugWindows);
185 
186  void DebugBestMatch(int BestMatch, INT_RESULT Result);
187 #endif
188 
189 
190  private:
191  uinT8 similarity_evidence_table_[SE_TABLE_SIZE];
192  uinT32 evidence_table_mask_;
193  uinT32 mult_trunc_shift_bits_;
194  uinT32 table_trunc_shift_bits_;
195  inT16 local_matcher_multiplier_;
196  tesseract::IntParam *classify_debug_level_;
197  uinT32 evidence_mult_mask_;
198 };
199 
203 void IMDebugConfiguration(INT_FEATURE FeatureNum,
204  uinT16 ActualProtoNum,
205  uinT8 Evidence,
206  BIT_VECTOR ConfigMask,
207  uinT32 ConfigWord);
208 
209 void IMDebugConfigurationSum(INT_FEATURE FeatureNum,
210  uinT8 *FeatureEvidence,
211  inT32 ConfigCount);
212 
213 void HeapSort (int n, register int ra[], register int rb[]);
214 
218 #endif
uinT16 FeatureMisses
Definition: intmatcher.h:45
CP_RESULT_STRUCT CLASS_PRUNER_RESULTS[MAX_NUM_CLASSES]
Definition: intmatcher.h:57
uinT8 feature_evidence_[MAX_NUM_CONFIGS]
Definition: intmatcher.h:77
void SetBaseLineMatch()
Definition: intmatcher.cpp:728
#define MAX_NUM_CONFIGS
Definition: intproto.h:44
void NormalizeSums(INT_CLASS ClassTemplate, inT16 NumFeatures, inT32 used_features)
#define BOOL_VAR_H(name, val, comment)
Definition: params.h:239
inT16 PROTO_ID
Definition: matchdefs.h:41
#define MAX_PROTO_INDEX
Definition: intproto.h:41
#define MAX_NUM_PROTOS
Definition: intproto.h:45
FLOAT32 Rating
Definition: intmatcher.h:52
INT_RESULT_STRUCT * INT_RESULT
Definition: intmatcher.h:48
int inT32
Definition: host.h:102
void SetCharNormMatch(int integer_matcher_multiplier)
Definition: intmatcher.cpp:734
#define FALSE
Definition: capi.h:28
uinT8 proto_evidence_[MAX_NUM_PROTOS][MAX_PROTO_INDEX]
Definition: intmatcher.h:79
int sum_feature_evidence_[MAX_NUM_CONFIGS]
Definition: intmatcher.h:78
float FLOAT32
Definition: host.h:111
bool disable_character_fragments
uinT8 FEATURE_ID
Definition: matchdefs.h:47
void ClearFeatureEvidence(const INT_CLASS class_template)
Definition: intmatcher.cpp:749
void Init(tesseract::IntParam *classify_debug_level, int classify_integer_matcher_multiplier)
Definition: intmatcher.cpp:696
void HeapSort(int n, register int ra[], register int rb[])
int FindBadFeatures(INT_CLASS ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask, uinT16 BlobLength, inT16 NumFeatures, INT_FEATURE_ARRAY Features, FEATURE_ID *FeatureArray, int AdaptFeatureThreshold, int Debug)
Definition: intmatcher.cpp:625
int classify_adapt_feature_thresh
float ApplyCNCorrection(float rating, int blob_length, int normalization_factor)
INT_RESULT_STRUCT IMResult
Definition: intmatcher.h:53
static const float kSEExponentialMultiplier
Definition: intmatcher.h:99
void Match(INT_CLASS ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask, inT16 NumFeatures, const INT_FEATURE_STRUCT *Features, INT_RESULT Result, int AdaptFeatureThreshold, int Debug, bool SeparateDebugWindows)
Definition: intmatcher.cpp:460
#define MAX_NUM_CLASSES
Definition: matchdefs.h:31
void IMDebugConfigurationSum(INT_FEATURE FeatureNum, uinT8 *FeatureEvidence, inT32 ConfigCount)
#define SE_TABLE_SIZE
Definition: intmatcher.h:74
uinT32 * BIT_VECTOR
Definition: bitvec.h:28
int classify_integer_matcher_multiplier
static const int kEvidenceTableBits
Definition: intmatcher.h:95
int classify_adapt_proto_thresh
static const int kIntEvidenceTruncBits
Definition: intmatcher.h:97
unsigned short uinT16
Definition: host.h:101
static const int kIntThetaFudge
Definition: intmatcher.h:93
void UpdateSumOfProtoEvidences(INT_CLASS ClassTemplate, BIT_VECTOR ConfigMask, inT16 NumFeatures)
int FindGoodProtos(INT_CLASS ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask, uinT16 BlobLength, inT16 NumFeatures, INT_FEATURE_ARRAY Features, PROTO_ID *ProtoArray, int AdaptProtoThreshold, int Debug)
Definition: intmatcher.cpp:546
short inT16
Definition: host.h:100
CLASS_ID Class
Definition: intmatcher.h:54
INT_FEATURE_STRUCT INT_FEATURE_ARRAY[MAX_NUM_INT_FEATURES]
Definition: baseapi.h:66
#define INT_VAR_H(name, val, comment)
Definition: params.h:236
static const float kSimilarityCenter
Definition: intmatcher.h:101
unsigned char uinT8
Definition: host.h:99
void IMDebugConfiguration(INT_FEATURE FeatureNum, uinT16 ActualProtoNum, uinT8 Evidence, BIT_VECTOR ConfigMask, uinT32 ConfigWord)
void Clear(const INT_CLASS class_template)
Definition: intmatcher.cpp:742
unsigned int uinT32
Definition: host.h:103
UNICHAR_ID CLASS_ID
Definition: matchdefs.h:35