56 #define VARDIR "configs/"
58 #define API_CONFIG "configs/api_config"
73 if ((fp = fopen(path.
string(),
"rb")) !=
NULL) {
77 path +=
"tessconfigs/";
79 if ((fp = fopen(path.
string(),
"rb")) !=
NULL) {
99 const char *arg0,
const char *textbase,
const char *language,
103 bool set_only_non_debug_params) {
108 lang = language !=
NULL ? language :
"eng";
127 tprintf(
"Loaded language config file\n");
136 for (
int i = 0; i < configs_size; ++i) {
142 if (vars_vec !=
NULL && vars_values !=
NULL) {
143 for (
int i = 0; i < vars_vec->
size(); ++i) {
145 (*vars_values)[i].
string(),
146 set_params_constraint, this->
params())) {
147 tprintf(
"Error setting param %s\n", (*vars_vec)[i].
string());
154 FILE *params_file = fopen(tessedit_write_params_to_file.
string(),
"wb");
155 if (params_file !=
NULL) {
159 tprintf(
"Wrote parameters to %s\n",
160 tessedit_write_params_to_file.
string());
163 tprintf(
"Failed to open %s for writing params.\n",
164 tessedit_write_params_to_file.
string());
171 tprintf(
"Loading Tesseract/Cube with tessedit_ocr_engine_mode %d\n",
179 tprintf(
"Returning after loading config file\n");
190 tprintf(
"Error: Size of unicharset is greater than MAX_NUM_CLASSES\n");
209 tprintf(
"Loaded Cube w/out combiner\n");
213 tprintf(
"Loaded Cube with combiner\n");
220 static bool IsStrInList(
const STRING& str,
222 for (
int i = 0; i < str_list.
size(); ++i) {
223 if (str_list[i] == str)
238 while (remains.
length() > 0) {
240 const char* start = remains.
string();
241 while (*start ==
'+')
245 target = not_to_load;
249 int end = strlen(start);
250 const char* plus = strchr(start,
'+');
251 if (plus !=
NULL && plus - start < end)
258 if (!IsStrInList(lang_code, *target)) {
260 tprintf(
"Adding language '%s' to list\n", lang_code.
string());
271 const char *arg0,
const char *textbase,
const char *language,
275 bool set_only_non_debug_params) {
280 sub_langs_.delete_data_pointers();
284 bool loaded_primary =
false;
286 for (
int lang_index = 0; lang_index < langs_to_load.
size(); ++lang_index) {
287 if (!IsStrInList(langs_to_load[lang_index], langs_not_to_load)) {
288 const char *lang_str = langs_to_load[lang_index].string();
290 if (!loaded_primary) {
297 arg0, textbase, lang_str, oem, configs, configs_size,
298 vars_vec, vars_values, set_only_non_debug_params);
300 if (!loaded_primary) {
302 tprintf(
"Failed loading language '%s'\n", lang_str);
305 tprintf(
"Loaded language '%s' as main language\n", lang_str);
307 &langs_to_load, &langs_not_to_load);
308 loaded_primary =
true;
312 tprintf(
"Failed loading language '%s'\n", lang_str);
316 tprintf(
"Loaded language '%s' as secondary language\n", lang_str);
317 sub_langs_.push_back(tess_to_init);
320 &langs_to_load, &langs_not_to_load);
325 if (!loaded_primary) {
326 tprintf(
"Tesseract couldn't load any languages!\n");
350 const char *arg0,
const char *textbase,
const char *language,
354 bool set_only_non_debug_params) {
356 configs_size, vars_vec, vars_values,
357 set_only_non_debug_params)) {
366 bool init_tesseract_classifier =
382 for (
int i = 0; i < new_fonts.
size(); ++i) {
391 for (
int i = 0; i < lang_fonts->
size(); ++i) {
392 int index = all_fonts.
get_id(lang_fonts->
get(i));
408 for (
int i = 0; i < sub_langs_.size(); ++i) {
413 for (
int i = 0; i < sub_langs_.size(); ++i) {
416 font_table_size_ = all_fonts.
size();
421 const char *textbase,
422 const char *language) {
void truncate_at(inT32 index)
int init_tesseract_internal(const char *arg0, const char *textbase, const char *language, OcrEngineMode oem, char **configs, int configs_size, const GenericVector< STRING > *vars_vec, const GenericVector< STRING > *vars_values, bool set_only_init_params)
void ParseLanguageString(const char *lang_str, GenericVector< STRING > *to_load, GenericVector< STRING > *not_to_load)
void LoadUnicharAmbigs(FILE *ambigs_file, inT64 end_offset, int debug_level, bool use_ambigs_for_adaption, UNICHARSET *unicharset)
bool tessedit_init_config_only
bool Init(const char *data_file_name, int debug_level)
bool CompareFontInfo(const FontInfo &fi1, const FontInfo &fi2)
int tessedit_ocr_engine_mode
FILE * GetDataFilePtr() const
TessdataManager tessdata_manager
void program_editup(const char *textbase, bool init_classifier, bool init_permute)
STRING language_data_path_prefix
static bool ReadParamsFile(const char *file, SetParamConstraint constraint, ParamsVectors *member_params)
bool load_from_file(const char *const filename, bool skip_fragments)
static bool ReadParamsFromFp(FILE *fp, inT64 end_offset, SetParamConstraint constraint, ParamsVectors *member_params)
char * tessedit_load_sublangs
bool init_cube_objects(bool load_combiner, TessdataManager *tessdata_manager)
void read_config_file(const char *filename, SetParamConstraint constraint)
bool SeekToStart(TessdataType tessdata_type)
ETEXT_DESC * global_monitor
int push_back(T object)
Add an element in the table.
int init_tesseract_lm(const char *arg0, const char *textbase, const char *language)
const T & get(int id) const
Return the object from an id.
UnicityTable< FontInfo > & get_fontinfo_table()
const char * string() const
int init_tesseract(const char *arg0, const char *textbase, const char *language, OcrEngineMode oem, char **configs, int configs_size, const GenericVector< STRING > *vars_vec, const GenericVector< STRING > *vars_values, bool set_only_init_params)
static bool SetParam(const char *name, const char *value, SetParamConstraint constraint, ParamsVectors *member_params)
void SetupUniversalFontIds()
inT64 GetEndOffset(TessdataType tessdata_type) const
DLLSYM void tprintf(const char *format,...)
void set_compare_callback(TessResultCallback2< bool, T const &, T const & > *cb)
int tessdata_manager_debug_level
int size() const
Return the size used.
bool init_tesseract_lang_data(const char *arg0, const char *textbase, const char *language, OcrEngineMode oem, char **configs, int configs_size, const GenericVector< STRING > *vars_vec, const GenericVector< STRING > *vars_values, bool set_only_init_params)
bool major_right_to_left() const
bool use_ambigs_for_adaption
char * tessedit_write_params_to_file
_ConstTessMemberResultCallback_0_0< false, R, T1 >::base * NewPermanentTessCallback(const T1 *obj, R(T2::*member)() const)
bool tessedit_ambigs_training
void main_setup(const char *argv0, const char *basename)
int get_id(T object) const
static void PrintParams(FILE *fp, const ParamsVectors *member_params)
UnicharAmbigs unichar_ambigs