Tesseract  3.02
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
adaptions.cpp
Go to the documentation of this file.
1 /**********************************************************************
2  * File: adaptions.cpp (Formerly adaptions.c)
3  * Description: Functions used to adapt to blobs already confidently
4  * identified
5  * Author: Chris Newton
6  * Created: Thu Oct 7 10:17:28 BST 1993
7  *
8  * (C) Copyright 1992, Hewlett-Packard Ltd.
9  ** Licensed under the Apache License, Version 2.0 (the "License");
10  ** you may not use this file except in compliance with the License.
11  ** You may obtain a copy of the License at
12  ** http://www.apache.org/licenses/LICENSE-2.0
13  ** Unless required by applicable law or agreed to in writing, software
14  ** distributed under the License is distributed on an "AS IS" BASIS,
15  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  ** See the License for the specific language governing permissions and
17  ** limitations under the License.
18  *
19  **********************************************************************/
20 
21 #ifdef _MSC_VER
22 #pragma warning(disable:4244) // Conversion warnings
23 #pragma warning(disable:4305) // int/float warnings
24 #endif
25 
26 #include "mfcpch.h"
27 
28 #ifdef __UNIX__
29 #include <assert.h>
30 #endif
31 #include <ctype.h>
32 #include <string.h>
33 #include "tessbox.h"
34 #include "tessvars.h"
35 #include "memry.h"
36 #include "imgs.h"
37 #include "scaleimg.h"
38 #include "reject.h"
39 #include "control.h"
40 #include "stopper.h"
41 #include "secname.h"
42 #include "tesseractclass.h"
43 
44 // Include automatically generated configuration file if running autoconf.
45 #ifdef HAVE_CONFIG_H
46 #include "config_auto.h"
47 #endif
48 
49 namespace tesseract {
50 BOOL8 Tesseract::word_adaptable( //should we adapt?
51  WERD_RES *word,
52  uinT16 mode) {
54  tprintf("Running word_adaptable() for %s rating %.4f certainty %.4f\n",
55  word->best_choice == NULL ? "" :
57  word->best_choice->rating(), word->best_choice->certainty());
58  }
59 
60  BOOL8 status = FALSE;
61  BITS16 flags(mode);
62 
63  enum MODES
64  {
65  ADAPTABLE_WERD,
66  ACCEPTABLE_WERD,
67  CHECK_DAWGS,
68  CHECK_SPACES,
69  CHECK_ONE_ELL_CONFLICT,
70  CHECK_AMBIG_WERD
71  };
72 
73  /*
74  0: NO adaption
75  */
76  if (mode == 0) {
77  if (tessedit_adaption_debug) tprintf("adaption disabled\n");
78  return FALSE;
79  }
80 
81  if (flags.bit (ADAPTABLE_WERD)) {
82  status |= word->tess_would_adapt; // result of Classify::AdaptableWord()
83  if (tessedit_adaption_debug && !status) {
84  tprintf("tess_would_adapt bit is false\n");
85  }
86  }
87 
88  if (flags.bit (ACCEPTABLE_WERD)) {
89  status |= word->tess_accepted;
90  if (tessedit_adaption_debug && !status) {
91  tprintf("tess_accepted bit is false\n");
92  }
93  }
94 
95  if (!status) { // If not set then
96  return FALSE; // ignore other checks
97  }
98 
99  if (flags.bit (CHECK_DAWGS) &&
100  (word->best_choice->permuter () != SYSTEM_DAWG_PERM) &&
101  (word->best_choice->permuter () != FREQ_DAWG_PERM) &&
102  (word->best_choice->permuter () != USER_DAWG_PERM) &&
103  (word->best_choice->permuter () != NUMBER_PERM)) {
104  if (tessedit_adaption_debug) tprintf("word not in dawgs\n");
105  return FALSE;
106  }
107 
108  if (flags.bit (CHECK_ONE_ELL_CONFLICT) && one_ell_conflict (word, FALSE)) {
109  if (tessedit_adaption_debug) tprintf("word has ell conflict\n");
110  return FALSE;
111  }
112 
113  if (flags.bit (CHECK_SPACES) &&
114  (strchr(word->best_choice->unichar_string().string(), ' ') != NULL)) {
115  if (tessedit_adaption_debug) tprintf("word contains spaces\n");
116  return FALSE;
117  }
118 
119 // if (flags.bit (CHECK_AMBIG_WERD) && test_ambig_word (word))
120  if (flags.bit (CHECK_AMBIG_WERD) &&
121  !getDict().NoDangerousAmbig(word->best_choice, NULL, false, NULL, NULL)) {
122  if (tessedit_adaption_debug) tprintf("word is ambiguous\n");
123  return FALSE;
124  }
125 
126  // Do not adapt to words that are composed from fragments if
127  // tessedit_adapt_to_char_fragments is false.
129  const char *fragment_lengths = word->best_choice->fragment_lengths();
130  if (fragment_lengths != NULL && *fragment_lengths != '\0') {
131  for (int i = 0; i < word->best_choice->length(); ++i) {
132  if (fragment_lengths[i] > 1) {
133  if (tessedit_adaption_debug) tprintf("won't adapt to fragments\n");
134  return false; // found a character composed from fragments
135  }
136  }
137  }
138  }
139 
141  tprintf("returning status %d\n", status);
142  }
143  return status;
144 }
145 
146 } // namespace tesseract
int length() const
Definition: ratngs.h:214
const STRING & unichar_string() const
Definition: ratngs.h:395
float certainty() const
Definition: ratngs.h:234
unsigned char BOOL8
Definition: host.h:113
#define NULL
Definition: host.h:144
BOOL8 one_ell_conflict(WERD_RES *word_res, BOOL8 update_map)
Definition: reject.cpp:456
#define FALSE
Definition: capi.h:28
BOOL8 tess_would_adapt
Definition: pageres.h:418
const char * fragment_lengths() const
Definition: ratngs.h:224
Dict & getDict()
Definition: classify.h:62
bool NoDangerousAmbig(WERD_CHOICE *BestChoice, DANGERR *fixpt, bool fix_replaceable, BLOB_CHOICE_LIST_VECTOR *Choices, bool *modified_blobs)
Definition: stopper.cpp:581
uinT8 permuter() const
Definition: ratngs.h:237
const char * string() const
Definition: strngs.cpp:156
CMD_EVENTS mode
Definition: pgedit.cpp:115
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:41
unsigned short uinT16
Definition: host.h:101
BOOL8 tess_accepted
Definition: pageres.h:417
BOOL8 word_adaptable(WERD_RES *word, uinT16 mode)
Definition: adaptions.cpp:50
BOOL8 bit(uinT8 bit_num) const
Definition: bits16.h:56
Definition: bits16.h:25
float rating() const
Definition: ratngs.h:231
WERD_CHOICE * best_choice
Definition: pageres.h:359