#include <tessdatamanager.h>
Definition at line 131 of file tessdatamanager.h.
| tesseract::TessdataManager::TessdataManager |
( |
| ) |
|
|
inline |
Definition at line 133 of file tessdatamanager.h.
135 actual_tessdata_num_entries_ = 0;
137 offset_table_[i] = -1;
| tesseract::TessdataManager::~TessdataManager |
( |
| ) |
|
|
inline |
| bool tesseract::TessdataManager::CombineDataFiles |
( |
const char * |
language_data_path_prefix, |
|
|
const char * |
output_filename |
|
) |
| |
|
static |
Reads all the standard tesseract config and data files for a language at the given path and bundles them up into one binary data file. Returns true if the combined traineddata file was successfully written.
Definition at line 108 of file tessdatamanager.cpp.
114 FILE *output_file = fopen(output_filename,
"wb");
115 if (output_file ==
NULL) {
116 tprintf(
"Error opening %s for writing\n", output_filename);
121 sizeof(
inT32) +
sizeof(
inT64) * TESSDATA_NUM_ENTRIES, SEEK_SET);
124 bool text_file =
false;
130 kTessdataFileSuffixes[i], &type, &text_file));
132 filename += kTessdataFileSuffixes[i];
133 file_ptr[i] = fopen(filename.
string(),
"rb");
134 if (file_ptr[i] !=
NULL) {
135 offset_table[type] = ftell(output_file);
136 CopyFile(file_ptr[i], output_file, text_file, -1);
143 tprintf(
"Error opening unicharset file\n");
150 tprintf(
"Error opening pffmtable and/or normproto files"
151 " while inttemp file was present\n");
static void WriteMetadata(inT64 *offset_table, FILE *output_file)
static void CopyFile(FILE *input_file, FILE *output_file, bool newline_end, inT64 num_bytes_to_copy)
static bool TessdataTypeFromFileSuffix(const char *suffix, TessdataType *type, bool *text_file)
const char * string() const
DLLSYM void tprintf(const char *format,...)
| void tesseract::TessdataManager::CopyFile |
( |
FILE * |
input_file, |
|
|
FILE * |
output_file, |
|
|
bool |
newline_end, |
|
|
inT64 |
num_bytes_to_copy |
|
) |
| |
|
static |
Copies data from the given input file to the output_file provided. If num_bytes_to_copy is >= 0, only num_bytes_to_copy is copied from the input file, otherwise all the data in the input file is copied.
Definition at line 68 of file tessdatamanager.cpp.
70 if (num_bytes_to_copy == 0)
return;
71 int buffer_size = 1024;
72 if (num_bytes_to_copy > 0 && buffer_size > num_bytes_to_copy) {
73 buffer_size = num_bytes_to_copy;
75 inT64 num_bytes_copied = 0;
76 char *chunk =
new char[buffer_size];
79 while ((bytes_read = fread(chunk,
sizeof(
char),
80 buffer_size, input_file))) {
81 fwrite(chunk,
sizeof(
char), bytes_read, output_file);
82 last_char = chunk[bytes_read-1];
83 if (num_bytes_to_copy > 0) {
84 num_bytes_copied += bytes_read;
85 if (num_bytes_copied == num_bytes_to_copy)
break;
86 if (num_bytes_copied + buffer_size > num_bytes_to_copy) {
87 buffer_size = num_bytes_to_copy - num_bytes_copied;
| int tesseract::TessdataManager::DebugLevel |
( |
| ) |
|
|
inline |
| void tesseract::TessdataManager::End |
( |
| ) |
|
|
inline |
| bool tesseract::TessdataManager::ExtractToFile |
( |
const char * |
filename | ) |
|
Extracts tessdata component implied by the name of the input file from the combined traineddata loaded into TessdataManager. Writes the extracted component to the file indicated by the file name. E.g. if the filename given is somepath/somelang.unicharset, unicharset will be extracted from the data loaded into the TessdataManager and will be written to somepath/somelang.unicharset.
- Returns
- true if the component was successfully extracted, false if the component was not present in the traineddata loaded into TessdataManager.
Definition at line 233 of file tessdatamanager.cpp.
235 bool text_file =
false;
240 FILE *output_file = fopen(
filename,
"wb");
241 if (output_file ==
NULL) {
242 printf(
"Error openning %s\n",
filename);
249 end_offset - begin_offset + 1);
FILE * GetDataFilePtr() const
static void CopyFile(FILE *input_file, FILE *output_file, bool newline_end, inT64 num_bytes_to_copy)
static bool TessdataTypeFromFileName(const char *filename, TessdataType *type, bool *text_file)
bool SeekToStart(TessdataType tessdata_type)
inT64 GetEndOffset(TessdataType tessdata_type) const
| FILE* tesseract::TessdataManager::GetDataFilePtr |
( |
| ) |
const |
|
inline |
Returns data file pointer.
Definition at line 150 of file tessdatamanager.h.
150 {
return data_file_; }
Returns the end offset for the given tesseract data file type.
Definition at line 173 of file tessdatamanager.h.
174 int index = tessdata_type + 1;
175 while (index < actual_tessdata_num_entries_ && offset_table_[index] == -1) {
179 tprintf(
"TessdataManager: end offset for type %d is %lld\n",
181 (index == actual_tessdata_num_entries_) ? -1
182 : offset_table_[index]);
184 return (index == actual_tessdata_num_entries_) ? -1 : offset_table_[index] - 1;
DLLSYM void tprintf(const char *format,...)
| bool tesseract::TessdataManager::Init |
( |
const char * |
data_file_name, |
|
|
int |
debug_level |
|
) |
| |
Opens the given data file and reads the offset table. Returns true on success.
Definition at line 35 of file tessdatamanager.cpp.
37 debug_level_ = debug_level;
38 data_file_ = fopen(data_file_name,
"rb");
39 if (data_file_ ==
NULL) {
40 tprintf(
"Error opening data file %s\n", data_file_name);
41 tprintf(
"Please make sure the TESSDATA_PREFIX environment variable is set "
42 "to the parent directory of your \"tessdata\" directory.\n");
45 fread(&actual_tessdata_num_entries_,
sizeof(
inT32), 1, data_file_);
46 swap_ = (actual_tessdata_num_entries_ > kMaxNumTessdataEntries);
48 actual_tessdata_num_entries_ =
reverse32(actual_tessdata_num_entries_);
51 fread(offset_table_,
sizeof(
inT64),
52 actual_tessdata_num_entries_, data_file_);
54 for (i = 0 ; i < actual_tessdata_num_entries_; ++i) {
55 offset_table_[i] =
reverse64(offset_table_[i]);
59 tprintf(
"TessdataManager loaded %d types of tesseract data files.\n",
60 actual_tessdata_num_entries_);
61 for (i = 0; i < actual_tessdata_num_entries_; ++i) {
62 tprintf(
"Offset for type %d is %lld\n", i, offset_table_[i]);
DLLSYM uinT64 reverse64(uinT64 num)
DLLSYM uinT32 reverse32(uinT32 num)
DLLSYM void tprintf(const char *format,...)
| bool tesseract::TessdataManager::OverwriteComponents |
( |
const char * |
new_traineddata_filename, |
|
|
char ** |
component_filenames, |
|
|
int |
num_new_components |
|
) |
| |
Gets the individual components from the data_file_ with which the class was initialized. Overwrites the components specified by component_filenames. Writes the updated traineddata file to new_traineddata_filename.
Definition at line 160 of file tessdatamanager.cpp.
167 bool text_file =
false;
170 offset_table[i] = -1;
173 FILE *output_file = fopen(new_traineddata_filename,
"wb");
174 if (output_file ==
NULL) {
175 tprintf(
"Error opening %s for writing\n", new_traineddata_filename);
181 sizeof(
inT32) +
sizeof(
inT64) * TESSDATA_NUM_ENTRIES, SEEK_SET);
184 for (i = 0; i < num_new_components; ++i) {
186 file_ptr[type] = fopen(component_filenames[i],
"rb");
191 if (file_ptr[i] !=
NULL) {
193 offset_table[i] = ftell(output_file);
194 CopyFile(file_ptr[i], output_file, kTessdataFileIsText[i], -1);
199 offset_table[i] = ftell(output_file);
200 CopyFile(data_file_, output_file, kTessdataFileIsText[i],
202 ftell(data_file_) + 1);
static void WriteMetadata(inT64 *offset_table, FILE *output_file)
static void CopyFile(FILE *input_file, FILE *output_file, bool newline_end, inT64 num_bytes_to_copy)
static bool TessdataTypeFromFileName(const char *filename, TessdataType *type, bool *text_file)
bool SeekToStart(TessdataType tessdata_type)
inT64 GetEndOffset(TessdataType tessdata_type) const
DLLSYM void tprintf(const char *format,...)
| bool tesseract::TessdataManager::SeekToStart |
( |
TessdataType |
tessdata_type | ) |
|
|
inline |
Returns false if there is no data of the given type. Otherwise does a seek on the data_file_ to position the pointer at the start of the data of the given type.
Definition at line 157 of file tessdatamanager.h.
159 tprintf(
"TessdataManager: seek to offset %lld - start of tessdata"
160 "type %d (%s))\n", offset_table_[tessdata_type],
161 tessdata_type, kTessdataFileSuffixes[tessdata_type]);
163 if (offset_table_[tessdata_type] < 0) {
167 static_cast<size_t>(offset_table_[tessdata_type]),
DLLSYM void tprintf(const char *format,...)
| bool tesseract::TessdataManager::swap |
( |
| ) |
const |
|
inline |
| bool tesseract::TessdataManager::TessdataTypeFromFileName |
( |
const char * |
filename, |
|
|
TessdataType * |
type, |
|
|
bool * |
text_file |
|
) |
| |
|
static |
Tries to determine tessdata component file suffix from filename, returns true on success.
Definition at line 225 of file tessdatamanager.cpp.
228 const char *suffix = strrchr(
filename,
'.');
229 if (suffix ==
NULL || *(++suffix) ==
'\0')
return false;
static bool TessdataTypeFromFileSuffix(const char *suffix, TessdataType *type, bool *text_file)
| bool tesseract::TessdataManager::TessdataTypeFromFileSuffix |
( |
const char * |
suffix, |
|
|
TessdataType * |
type, |
|
|
bool * |
text_file |
|
) |
| |
|
static |
Fills type with TessdataType of the tessdata component represented by the given file name. E.g. tessdata/eng.unicharset -> TESSDATA_UNICHARSET. Sets *text_file to true if the component is in text format (e.g. unicharset, unichar ambigs, config, etc).
- Returns
- true if the tessdata component type could be determined from the given file name.
Definition at line 211 of file tessdatamanager.cpp.
214 if (strcmp(kTessdataFileSuffixes[i], suffix) == 0) {
216 *text_file = kTessdataFileIsText[i];
220 printf(
"TessdataManager can't determine which tessdata"
221 " component is represented by %s\n", suffix);
| void tesseract::TessdataManager::WriteMetadata |
( |
inT64 * |
offset_table, |
|
|
FILE * |
output_file |
|
) |
| |
|
static |
Writes the number of entries and the given offset table to output_file.
Definition at line 95 of file tessdatamanager.cpp.
96 fseek(output_file, 0, SEEK_SET);
98 fwrite(&num_entries,
sizeof(
inT32), 1, output_file);
102 tprintf(
"TessdataManager combined tesseract data files.\n");
104 tprintf(
"Offset for type %d is %lld\n", i, offset_table[i]);
DLLSYM void tprintf(const char *format,...)
The documentation for this class was generated from the following files: