Tesseract  3.02
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
classifier_tester.cpp File Reference
#include "base/commandlineflags.h"
#include "baseapi.h"
#include "commontraining.h"
#include "cubeclassifier.h"
#include "mastertrainer.h"
#include "params.h"
#include "strngs.h"
#include "tessclassifier.h"

Go to the source code of this file.

Enumerations

enum  ClassifierName {
  CN_PRUNER, CN_FULL, CN_CUBE, CN_CUBETESS,
  CN_COUNT
}
 

Functions

 STRING_PARAM_FLAG (classifier,"","Classifier to test")
 
 STRING_PARAM_FLAG (lang,"eng","Language to test")
 
 STRING_PARAM_FLAG (tessdata_dir,"","Directory of traineddata files")
 
int main (int argc, char **argv)
 

Variables

const char * names [] = {"pruner", "full", "cube", "cubetess", NULL }
 

Enumeration Type Documentation

Enumerator
CN_PRUNER 
CN_FULL 
CN_CUBE 
CN_CUBETESS 
CN_COUNT 

Definition at line 34 of file classifier_tester.cpp.

Function Documentation

int main ( int  argc,
char **  argv 
)

Public Function Prototypes

Definition at line 65 of file classifier_tester.cpp.

65  {
66  ParseArguments(&argc, &argv);
67  // Decode the classifier string.
68  ClassifierName classifier = CN_COUNT;
69  for (int c = 0; c < CN_COUNT; ++c) {
70  if (strcmp(FLAGS_classifier.c_str(), names[c]) == 0) {
71  classifier = static_cast<ClassifierName>(c);
72  break;
73  }
74  }
75  if (classifier == CN_COUNT) {
76  fprintf(stderr, "Invalid classifier name:%s\n", FLAGS_classifier.c_str());
77  return 1;
78  }
79 
80  STRING file_prefix;
82  argc, argv, true, NULL, &file_prefix);
83  // We want to test junk as well if it is available.
84  trainer->IncludeJunk();
85  // We want to test with replicated samples too.
87 
88  // We need to initialize tesseract to test.
91  if (classifier == CN_CUBE || classifier == CN_CUBETESS)
93  if (api.Init(FLAGS_tessdata_dir.c_str(), FLAGS_lang.c_str(),
94  engine_mode) < 0) {
95  fprintf(stderr, "Tesseract initialization failed!\n");
96  return 1;
97  }
98  tesseract::ShapeClassifier* shape_classifier = NULL;
100  const_cast<tesseract::Tesseract*>(api.tesseract());
101  tesseract::Classify* classify =
102  reinterpret_cast<tesseract::Classify*>(tesseract);
103  // Copy the shape_table from the classifier and add the space character if
104  // not already present to count junk.
105  tesseract::ShapeTable shape_table;
106  shape_table.set_unicharset(classify->shape_table()->unicharset());
107  shape_table.AppendMasterShapes(*classify->shape_table());
108  if (shape_table.FindShape(0, -1) < 0)
109  shape_table.AddShape(0, 0);
110  if (classifier == CN_PRUNER) {
111  shape_classifier = new tesseract::TessClassifier(true, classify);
112  } else if (classifier == CN_FULL) {
113  shape_classifier = new tesseract::TessClassifier(false, classify);
114  } else if (classifier == CN_CUBE) {
115  shape_classifier = new tesseract::CubeClassifier(tesseract);
116  } else if (classifier == CN_CUBETESS) {
117  shape_classifier = new tesseract::CubeTessClassifier(tesseract);
118  } else {
119  fprintf(stderr, "%s tester not yet implemented\n",
120  FLAGS_classifier.c_str());
121  return 1;
122  }
123  tprintf("Testing classifier %s:\n", FLAGS_classifier.c_str());
124  trainer->TestClassifierOnSamples(3, false, shape_classifier, NULL);
125  if (classifier != CN_CUBE && classifier != CN_CUBETESS) {
126  // Test with replicated samples as well.
127  trainer->TestClassifierOnSamples(3, true, shape_classifier, NULL);
128  }
129  delete shape_classifier;
130  delete trainer;
131 
132  return 0;
133 } /* main */
int AddShape(int unichar_id, int font_id)
Definition: shapetable.cpp:249
MasterTrainer * LoadTrainingData(int argc, const char *const *argv, bool replication, ShapeTable **shape_table, STRING *file_prefix)
void ParseArguments(int *argc, char ***argv)
void set_unicharset(const UNICHARSET &unicharset)
Definition: shapetable.h:148
#define NULL
Definition: host.h:144
int Init(const char *datapath, const char *language, OcrEngineMode mode, char **configs, int configs_size, const GenericVector< STRING > *vars_vec, const GenericVector< STRING > *vars_values, bool set_only_non_debug_params)
Definition: baseapi.cpp:213
ClassifierName
const UNICHARSET & unicharset() const
Definition: shapetable.h:143
void TestClassifierOnSamples(int report_level, bool replicate_samples, ShapeClassifier *test_classifier, STRING *report_string)
void AppendMasterShapes(const ShapeTable &other)
Definition: shapetable.cpp:439
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:41
void ReplicateAndRandomizeSamplesIfRequired()
Definition: strngs.h:40
const char * names[]
int FindShape(int unichar_id, int font_id) const
Definition: shapetable.cpp:290
const ShapeTable * shape_table() const
Definition: classify.h:66
Tesseract *const tesseract() const
Definition: baseapi.h:653
STRING_PARAM_FLAG ( classifier  ,
""  ,
"Classifier to test"   
)
STRING_PARAM_FLAG ( lang  ,
"eng"  ,
"Language to test"   
)
STRING_PARAM_FLAG ( tessdata_dir  ,
""  ,
"Directory of traineddata files"   
)

Variable Documentation

const char* names[] = {"pruner", "full", "cube", "cubetess", NULL }

Definition at line 42 of file classifier_tester.cpp.