Tesseract  3.02
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
adaptive.cpp
Go to the documentation of this file.
1 /******************************************************************************
2  ** Filename: adaptive.c
3  ** Purpose: Adaptive matcher.
4  ** Author: Dan Johnson
5  ** History: Fri Mar 8 10:00:21 1991, DSJ, Created.
6  **
7  ** (c) Copyright Hewlett-Packard Company, 1988.
8  ** Licensed under the Apache License, Version 2.0 (the "License");
9  ** you may not use this file except in compliance with the License.
10  ** You may obtain a copy of the License at
11  ** http://www.apache.org/licenses/LICENSE-2.0
12  ** Unless required by applicable law or agreed to in writing, software
13  ** distributed under the License is distributed on an "AS IS" BASIS,
14  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  ** See the License for the specific language governing permissions and
16  ** limitations under the License.
17  ******************************************************************************/
18 
19 /*----------------------------------------------------------------------------
20  Include Files and Type Defines
21 ----------------------------------------------------------------------------*/
22 #include "adaptive.h"
23 #include "emalloc.h"
24 #include "freelist.h"
25 #include "globals.h"
26 #include "classify.h"
27 
28 #ifdef __UNIX__
29 #include <assert.h>
30 #endif
31 #include <stdio.h>
32 
33 /*----------------------------------------------------------------------------
34  Public Code
35 ----------------------------------------------------------------------------*/
36 /*---------------------------------------------------------------------------*/
50  ADAPT_CLASS Class,
51  CLASS_ID ClassId) {
52  INT_CLASS IntClass;
53 
54  assert (Templates != NULL);
55  assert (Class != NULL);
56  assert (LegalClassId (ClassId));
57  assert (UnusedClassIdIn (Templates->Templates, ClassId));
58  assert (Class->NumPermConfigs == 0);
59 
60  IntClass = NewIntClass (1, 1);
61  AddIntClass (Templates->Templates, ClassId, IntClass);
62 
63  assert (Templates->Class[ClassId] == NULL);
64  Templates->Class[ClassId] = Class;
65 
66 } /* AddAdaptedClass */
67 
68 
69 /*---------------------------------------------------------------------------*/
81  assert (Config != NULL);
82 
84  FreeBitVector (Config->Protos);
85  free_struct (Config, sizeof (TEMP_CONFIG_STRUCT), "TEMP_CONFIG_STRUCT");
86 
87 } /* FreeTempConfig */
88 
89 /*---------------------------------------------------------------------------*/
90 void FreeTempProto(void *arg) {
91  PROTO proto = (PROTO) arg;
92 
93  free_struct (proto, sizeof (TEMP_PROTO_STRUCT), "TEMP_PROTO_STRUCT");
94 }
95 
97  assert(Config != NULL);
98  Efree(Config->Ambigs);
99  free_struct(Config, sizeof(PERM_CONFIG_STRUCT), "PERM_CONFIG_STRUCT");
100 }
101 
102 /*---------------------------------------------------------------------------*/
114  ADAPT_CLASS Class;
115  int i;
116 
117  Class = (ADAPT_CLASS) Emalloc (sizeof (ADAPT_CLASS_STRUCT));
118  Class->NumPermConfigs = 0;
119  Class->MaxNumTimesSeen = 0;
120  Class->TempProtos = NIL_LIST;
121 
126 
127  for (i = 0; i < MAX_NUM_CONFIGS; i++)
128  TempConfigFor (Class, i) = NULL;
129 
130  return (Class);
131 
132 } /* NewAdaptedClass */
133 
134 
135 /*-------------------------------------------------------------------------*/
136 void free_adapted_class(ADAPT_CLASS adapt_class) {
137  int i;
138 
139  for (i = 0; i < MAX_NUM_CONFIGS; i++) {
140  if (ConfigIsPermanent (adapt_class, i)
141  && PermConfigFor (adapt_class, i) != NULL)
142  FreePermConfig (PermConfigFor (adapt_class, i));
143  else if (!ConfigIsPermanent (adapt_class, i)
144  && TempConfigFor (adapt_class, i) != NULL)
145  FreeTempConfig (TempConfigFor (adapt_class, i));
146  }
147  FreeBitVector (adapt_class->PermProtos);
148  FreeBitVector (adapt_class->PermConfigs);
149  destroy_nodes (adapt_class->TempProtos, FreeTempProto);
150  Efree(adapt_class);
151 }
152 
153 
154 /*---------------------------------------------------------------------------*/
155 namespace tesseract {
168  ADAPT_TEMPLATES Templates;
169  int i;
170 
171  Templates = (ADAPT_TEMPLATES) Emalloc (sizeof (ADAPT_TEMPLATES_STRUCT));
172 
173  Templates->Templates = NewIntTemplates ();
174  Templates->NumPermClasses = 0;
175  Templates->NumNonEmptyClasses = 0;
176 
177  /* Insert an empty class for each unichar id in unicharset */
178  for (i = 0; i < MAX_NUM_CLASSES; i++) {
179  Templates->Class[i] = NULL;
180  if (InitFromUnicharset && i < unicharset.size()) {
181  AddAdaptedClass(Templates, NewAdaptedClass(), i);
182  }
183  }
184 
185  return (Templates);
186 
187 } /* NewAdaptedTemplates */
188 
189 // Returns FontinfoId of the given config of the given adapted class.
191  return (ConfigIsPermanent(Class, ConfigId) ?
192  PermConfigFor(Class, ConfigId)->FontinfoId :
193  TempConfigFor(Class, ConfigId)->FontinfoId);
194 }
195 
196 } // namespace tesseract
197 
198 /*----------------------------------------------------------------------------*/
200 
201  if (templates != NULL) {
202  int i;
203  for (i = 0; i < (templates->Templates)->NumClasses; i++)
204  free_adapted_class (templates->Class[i]);
205  free_int_templates (templates->Templates);
206  Efree(templates);
207  }
208 }
209 
210 
211 /*---------------------------------------------------------------------------*/
223 TEMP_CONFIG NewTempConfig(int MaxProtoId, int FontinfoId) {
225  int NumProtos = MaxProtoId + 1;
226 
227  Config =
229  "TEMP_CONFIG_STRUCT");
230  Config->Protos = NewBitVector (NumProtos);
231 
232  Config->NumTimesSeen = 1;
233  Config->MaxProtoId = MaxProtoId;
234  Config->ProtoVectorSize = WordsInVectorOfSize (NumProtos);
235  Config->ContextsSeen = NIL_LIST;
236  zero_all_bits (Config->Protos, Config->ProtoVectorSize);
237  Config->FontinfoId = FontinfoId;
238 
239  return (Config);
240 
241 } /* NewTempConfig */
242 
243 
244 /*---------------------------------------------------------------------------*/
255  return ((TEMP_PROTO)
256  alloc_struct (sizeof (TEMP_PROTO_STRUCT), "TEMP_PROTO_STRUCT"));
257 } /* NewTempProto */
258 
259 
260 /*---------------------------------------------------------------------------*/
261 namespace tesseract {
274  int i;
275  INT_CLASS IClass;
276  ADAPT_CLASS AClass;
277 
278  #ifndef SECURE_NAMES
279  fprintf (File, "\n\nSUMMARY OF ADAPTED TEMPLATES:\n\n");
280  fprintf (File, "Num classes = %d; Num permanent classes = %d\n\n",
281  Templates->NumNonEmptyClasses, Templates->NumPermClasses);
282  fprintf (File, " Id NC NPC NP NPP\n");
283  fprintf (File, "------------------------\n");
284 
285  for (i = 0; i < (Templates->Templates)->NumClasses; i++) {
286  IClass = Templates->Templates->Class[i];
287  AClass = Templates->Class[i];
288  if (!IsEmptyAdaptedClass (AClass)) {
289  fprintf (File, "%5d %s %3d %3d %3d %3d\n",
291  IClass->NumConfigs, AClass->NumPermConfigs,
292  IClass->NumProtos,
293  IClass->NumProtos - count (AClass->TempProtos));
294  }
295  }
296  #endif
297  fprintf (File, "\n");
298 
299 } /* PrintAdaptedTemplates */
300 } // namespace tesseract
301 
302 
303 /*---------------------------------------------------------------------------*/
316  int NumTempProtos;
317  int NumConfigs;
318  int i;
319  ADAPT_CLASS Class;
320  TEMP_PROTO TempProto;
321 
322  /* first read high level adapted class structure */
323  Class = (ADAPT_CLASS) Emalloc (sizeof (ADAPT_CLASS_STRUCT));
324  fread ((char *) Class, sizeof (ADAPT_CLASS_STRUCT), 1, File);
325 
326  /* then read in the definitions of the permanent protos and configs */
329  fread ((char *) Class->PermProtos, sizeof (uinT32),
331  fread ((char *) Class->PermConfigs, sizeof (uinT32),
333 
334  /* then read in the list of temporary protos */
335  fread ((char *) &NumTempProtos, sizeof (int), 1, File);
336  Class->TempProtos = NIL_LIST;
337  for (i = 0; i < NumTempProtos; i++) {
338  TempProto =
340  "TEMP_PROTO_STRUCT");
341  fread ((char *) TempProto, sizeof (TEMP_PROTO_STRUCT), 1, File);
342  Class->TempProtos = push_last (Class->TempProtos, TempProto);
343  }
344 
345  /* then read in the adapted configs */
346  fread ((char *) &NumConfigs, sizeof (int), 1, File);
347  for (i = 0; i < NumConfigs; i++)
348  if (test_bit (Class->PermConfigs, i))
349  Class->Config[i].Perm = ReadPermConfig (File);
350  else
351  Class->Config[i].Temp = ReadTempConfig (File);
352 
353  return (Class);
354 
355 } /* ReadAdaptedClass */
356 
357 
358 /*---------------------------------------------------------------------------*/
359 namespace tesseract {
372  int i;
373  ADAPT_TEMPLATES Templates;
374 
375  /* first read the high level adaptive template struct */
376  Templates = (ADAPT_TEMPLATES) Emalloc (sizeof (ADAPT_TEMPLATES_STRUCT));
377  fread ((char *) Templates, sizeof (ADAPT_TEMPLATES_STRUCT), 1, File);
378 
379  /* then read in the basic integer templates */
380  Templates->Templates = ReadIntTemplates (File);
381 
382  /* then read in the adaptive info for each class */
383  for (i = 0; i < (Templates->Templates)->NumClasses; i++) {
384  Templates->Class[i] = ReadAdaptedClass (File);
385  }
386  return (Templates);
387 
388 } /* ReadAdaptedTemplates */
389 } // namespace tesseract
390 
391 
392 /*---------------------------------------------------------------------------*/
406  "PERM_CONFIG_STRUCT");
407  uinT8 NumAmbigs;
408  fread ((char *) &NumAmbigs, sizeof(uinT8), 1, File);
409  Config->Ambigs = (UNICHAR_ID *)Emalloc(sizeof(UNICHAR_ID) * (NumAmbigs + 1));
410  fread(Config->Ambigs, sizeof(UNICHAR_ID), NumAmbigs, File);
411  Config->Ambigs[NumAmbigs] = -1;
412  fread(&(Config->FontinfoId), sizeof(int), 1, File);
413 
414  return (Config);
415 
416 } /* ReadPermConfig */
417 
418 
419 /*---------------------------------------------------------------------------*/
433 
434  Config =
436  "TEMP_CONFIG_STRUCT");
437  fread ((char *) Config, sizeof (TEMP_CONFIG_STRUCT), 1, File);
438 
439  Config->Protos = NewBitVector (Config->ProtoVectorSize * BITSINLONG);
440  fread ((char *) Config->Protos, sizeof (uinT32),
441  Config->ProtoVectorSize, File);
442 
443  return (Config);
444 
445 } /* ReadTempConfig */
446 
447 
448 /*---------------------------------------------------------------------------*/
461 void WriteAdaptedClass(FILE *File, ADAPT_CLASS Class, int NumConfigs) {
462  int NumTempProtos;
463  LIST TempProtos;
464  int i;
465 
466  /* first write high level adapted class structure */
467  fwrite ((char *) Class, sizeof (ADAPT_CLASS_STRUCT), 1, File);
468 
469  /* then write out the definitions of the permanent protos and configs */
470  fwrite ((char *) Class->PermProtos, sizeof (uinT32),
472  fwrite ((char *) Class->PermConfigs, sizeof (uinT32),
474 
475  /* then write out the list of temporary protos */
476  NumTempProtos = count (Class->TempProtos);
477  fwrite ((char *) &NumTempProtos, sizeof (int), 1, File);
478  TempProtos = Class->TempProtos;
479  iterate (TempProtos) {
480  void* proto = first_node(TempProtos);
481  fwrite ((char *) proto, sizeof (TEMP_PROTO_STRUCT), 1, File);
482  }
483 
484  /* then write out the adapted configs */
485  fwrite ((char *) &NumConfigs, sizeof (int), 1, File);
486  for (i = 0; i < NumConfigs; i++)
487  if (test_bit (Class->PermConfigs, i))
488  WritePermConfig (File, Class->Config[i].Perm);
489  else
490  WriteTempConfig (File, Class->Config[i].Temp);
491 
492 } /* WriteAdaptedClass */
493 
494 
495 /*---------------------------------------------------------------------------*/
496 namespace tesseract {
508  int i;
509 
510  /* first write the high level adaptive template struct */
511  fwrite ((char *) Templates, sizeof (ADAPT_TEMPLATES_STRUCT), 1, File);
512 
513  /* then write out the basic integer templates */
514  WriteIntTemplates (File, Templates->Templates, unicharset);
515 
516  /* then write out the adaptive info for each class */
517  for (i = 0; i < (Templates->Templates)->NumClasses; i++) {
518  WriteAdaptedClass (File, Templates->Class[i],
519  Templates->Templates->Class[i]->NumConfigs);
520  }
521 } /* WriteAdaptedTemplates */
522 } // namespace tesseract
523 
524 
525 /*---------------------------------------------------------------------------*/
537 void WritePermConfig(FILE *File, PERM_CONFIG Config) {
538  uinT8 NumAmbigs = 0;
539 
540  assert (Config != NULL);
541  while (Config->Ambigs[NumAmbigs] > 0) ++NumAmbigs;
542 
543  fwrite((char *) &NumAmbigs, sizeof(uinT8), 1, File);
544  fwrite(Config->Ambigs, sizeof(UNICHAR_ID), NumAmbigs, File);
545  fwrite(&(Config->FontinfoId), sizeof(int), 1, File);
546 } /* WritePermConfig */
547 
548 
549 /*---------------------------------------------------------------------------*/
561 void WriteTempConfig(FILE *File, TEMP_CONFIG Config) {
562  assert (Config != NULL);
563  /* contexts not yet implemented */
564  assert (Config->ContextsSeen == NULL);
565 
566  fwrite ((char *) Config, sizeof (TEMP_CONFIG_STRUCT), 1, File);
567  fwrite ((char *) Config->Protos, sizeof (uinT32),
568  Config->ProtoVectorSize, File);
569 
570 } /* WriteTempConfig */
#define IsEmptyAdaptedClass(Class)
Definition: adaptive.h:90
void Efree(void *ptr)
Definition: emalloc.cpp:85
LIST push_last(LIST list, void *item)
Definition: oldlist.cpp:338
ADAPT_TEMPLATES NewAdaptedTemplates(bool InitFromUnicharset)
Definition: adaptive.cpp:167
#define ConfigIsPermanent(Class, ConfigId)
Definition: adaptive.h:93
#define MAX_NUM_CONFIGS
Definition: intproto.h:44
const char *const id_to_unichar(UNICHAR_ID id) const
Definition: unicharset.cpp:233
int UNICHAR_ID
Definition: unichar.h:31
uinT8 MaxNumTimesSeen
Definition: adaptive.h:66
ADAPT_CLASS ReadAdaptedClass(FILE *File)
Definition: adaptive.cpp:315
int size() const
Definition: unicharset.h:264
void free_adapted_templates(ADAPT_TEMPLATES templates)
Definition: adaptive.cpp:199
PROTO_ID MaxProtoId
Definition: adaptive.h:43
void memfree(void *element)
Definition: freelist.cpp:30
TEMP_CONFIG Temp
Definition: adaptive.h:59
#define zero_all_bits(array, length)
Definition: bitvec.h:33
void AddIntClass(INT_TEMPLATES Templates, CLASS_ID ClassId, INT_CLASS Class)
Definition: intproto.cpp:224
void PrintAdaptedTemplates(FILE *File, ADAPT_TEMPLATES Templates)
Definition: adaptive.cpp:273
#define PermConfigFor(Class, ConfigId)
Definition: adaptive.h:105
void FreePermConfig(PERM_CONFIG Config)
Definition: adaptive.cpp:96
#define WordsInVectorOfSize(NumBits)
Definition: bitvec.h:63
#define NIL_LIST
Definition: oldlist.h:126
ADAPT_TEMPLATES_STRUCT * ADAPT_TEMPLATES
Definition: adaptive.h:83
#define MAX_NUM_PROTOS
Definition: intproto.h:45
uinT8 NumConfigs
Definition: intproto.h:108
void free_struct(void *deadstruct, inT32, const char *)
Definition: memry.cpp:44
#define NULL
Definition: host.h:144
UNICHAR_ID * Ambigs
Definition: adaptive.h:52
TEMP_PROTO NewTempProto()
Definition: adaptive.cpp:254
ADAPT_CLASS_STRUCT * ADAPT_CLASS
Definition: adaptive.h:73
TEMP_CONFIG ReadTempConfig(FILE *File)
Definition: adaptive.cpp:431
INT_TEMPLATES Templates
Definition: adaptive.h:77
void AddAdaptedClass(ADAPT_TEMPLATES Templates, ADAPT_CLASS Class, CLASS_ID ClassId)
Definition: adaptive.cpp:49
#define test_bit(array, bit)
Definition: bitvec.h:61
int GetFontinfoId(ADAPT_CLASS Class, uinT8 ConfigId)
Definition: adaptive.cpp:190
INT_CLASS NewIntClass(int MaxNumProtos, int MaxNumConfigs)
Definition: intproto.cpp:683
PERM_CONFIG_STRUCT * PERM_CONFIG
Definition: adaptive.h:55
void free_int_templates(INT_TEMPLATES templates)
Definition: intproto.cpp:774
CLUSTERCONFIG Config
ADAPT_CLASS NewAdaptedClass()
Definition: adaptive.cpp:113
void FreeBitVector(BIT_VECTOR BitVector)
Definition: bitvec.cpp:55
void destroy_nodes(LIST list, void_dest destructor)
Definition: oldlist.cpp:204
uinT8 ProtoVectorSize
Definition: adaptive.h:42
TEMP_CONFIG NewTempConfig(int MaxProtoId, int FontinfoId)
Definition: adaptive.cpp:223
INT_TEMPLATES NewIntTemplates()
Definition: intproto.cpp:749
TEMP_PROTO_STRUCT * TEMP_PROTO
Definition: adaptive.h:37
ADAPT_CLASS Class[MAX_NUM_CLASSES]
Definition: adaptive.h:81
PERM_CONFIG ReadPermConfig(FILE *File)
Definition: adaptive.cpp:404
void FreeTempProto(void *arg)
Definition: adaptive.cpp:90
uinT8 NumPermConfigs
Definition: adaptive.h:65
#define MAX_NUM_CLASSES
Definition: matchdefs.h:31
#define TempConfigFor(Class, ConfigId)
Definition: adaptive.h:102
void WriteIntTemplates(FILE *File, INT_TEMPLATES Templates, const UNICHARSET &target_unicharset)
Definition: intproto.cpp:1155
#define UnusedClassIdIn(T, c)
Definition: intproto.h:172
void FreeTempConfig(TEMP_CONFIG Config)
Definition: adaptive.cpp:80
INT_TEMPLATES ReadIntTemplates(FILE *File)
Definition: intproto.cpp:786
PERM_CONFIG Perm
Definition: adaptive.h:60
INT_CLASS Class[MAX_NUM_CLASSES]
Definition: intproto.h:122
void * Emalloc(size_t Size)
Definition: emalloc.cpp:35
BIT_VECTOR Protos
Definition: adaptive.h:45
PROTO_STRUCT * PROTO
Definition: protos.h:52
UNICHARSET unicharset
Definition: ccutil.h:72
void free_adapted_class(ADAPT_CLASS adapt_class)
Definition: adaptive.cpp:136
TEMP_CONFIG_STRUCT * TEMP_CONFIG
Definition: adaptive.h:48
void WritePermConfig(FILE *File, PERM_CONFIG Config)
Definition: adaptive.cpp:537
BIT_VECTOR NewBitVector(int NumBits)
Definition: bitvec.cpp:111
BIT_VECTOR PermProtos
Definition: adaptive.h:68
#define LegalClassId(c)
Definition: intproto.h:171
void * alloc_struct(inT32 count, const char *)
Definition: memry.cpp:40
BIT_VECTOR PermConfigs
Definition: adaptive.h:69
uinT16 NumProtos
Definition: intproto.h:106
unsigned char uinT8
Definition: host.h:99
void WriteAdaptedTemplates(FILE *File, ADAPT_TEMPLATES Templates)
Definition: adaptive.cpp:507
#define BITSINLONG
Definition: bitvec.h:27
ADAPTED_CONFIG Config[MAX_NUM_CONFIGS]
Definition: adaptive.h:71
unsigned int uinT32
Definition: host.h:103
int count(LIST var_list)
Definition: oldlist.cpp:108
ADAPT_TEMPLATES ReadAdaptedTemplates(FILE *File)
Definition: adaptive.cpp:371
void WriteTempConfig(FILE *File, TEMP_CONFIG Config)
Definition: adaptive.cpp:561
void WriteAdaptedClass(FILE *File, ADAPT_CLASS Class, int NumConfigs)
Definition: adaptive.cpp:461
#define iterate(l)
Definition: oldlist.h:159
uinT8 NumTimesSeen
Definition: adaptive.h:41
#define first_node(l)
Definition: oldlist.h:139
UNICHAR_ID CLASS_ID
Definition: matchdefs.h:35