Tesseract  3.02
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
tesseract::ParagraphModelSmearer Class Reference

#include <paragraphs_internal.h>

Public Member Functions

 ParagraphModelSmearer (GenericVector< RowScratchRegisters > *rows, int row_start, int row_end, ParagraphTheory *theory)
 
void Smear ()
 

Detailed Description

Definition at line 234 of file paragraphs_internal.h.

Constructor & Destructor Documentation

tesseract::ParagraphModelSmearer::ParagraphModelSmearer ( GenericVector< RowScratchRegisters > *  rows,
int  row_start,
int  row_end,
ParagraphTheory theory 
)

Definition at line 1258 of file paragraphs.cpp.

1261  : theory_(theory), rows_(rows), row_start_(row_start),
1262  row_end_(row_end) {
1263  if (!AcceptableRowArgs(0, 0, __func__, rows, row_start, row_end)) {
1264  row_start_ = 0;
1265  row_end_ = 0;
1266  return;
1267  }
1268  SetOfModels no_models;
1269  for (int row = row_start - 1; row <= row_end; row++) {
1270  open_models_.push_back(no_models);
1271  }
1272 }
GenericVectorEqEq< const ParagraphModel * > SetOfModels

Member Function Documentation

void tesseract::ParagraphModelSmearer::Smear ( )

Definition at line 1305 of file paragraphs.cpp.

1305  {
1306  CalculateOpenModels(row_start_, row_end_);
1307 
1308  // For each row which we're unsure about (that is, it is LT_UNKNOWN or
1309  // we have multiple LT_START hypotheses), see if there's a model that
1310  // was recently used (an "open" model) which might model it well.
1311  for (int i = row_start_; i < row_end_; i++) {
1312  RowScratchRegisters &row = (*rows_)[i];
1313  if (row.ri_->num_words == 0)
1314  continue;
1315 
1316  // Step One:
1317  // Figure out if there are "open" models which are left-alined or
1318  // right-aligned. This is important for determining whether the
1319  // "first" word in a row would fit at the "end" of the previous row.
1320  bool left_align_open = false;
1321  bool right_align_open = false;
1322  for (int m = 0; m < OpenModels(i).size(); m++) {
1323  switch (OpenModels(i)[m]->justification()) {
1324  case JUSTIFICATION_LEFT: left_align_open = true; break;
1325  case JUSTIFICATION_RIGHT: right_align_open = true; break;
1326  default: left_align_open = right_align_open = true;
1327  }
1328  }
1329  // Step Two:
1330  // Use that knowledge to figure out if this row is likely to
1331  // start a paragraph.
1332  bool likely_start;
1333  if (i == 0) {
1334  likely_start = true;
1335  } else {
1336  if ((left_align_open && right_align_open) ||
1337  (!left_align_open && !right_align_open)) {
1338  likely_start = LikelyParagraphStart((*rows_)[i - 1], row,
1339  JUSTIFICATION_LEFT) ||
1340  LikelyParagraphStart((*rows_)[i - 1], row,
1342  } else if (left_align_open) {
1343  likely_start = LikelyParagraphStart((*rows_)[i - 1], row,
1345  } else {
1346  likely_start = LikelyParagraphStart((*rows_)[i - 1], row,
1348  }
1349  }
1350 
1351  // Step Three:
1352  // If this text line seems like an obvious first line of an
1353  // open model, or an obvious continuation of an existing
1354  // modelled paragraph, mark it up.
1355  if (likely_start) {
1356  // Add Start Hypotheses for all Open models that fit.
1357  for (int m = 0; m < OpenModels(i).size(); m++) {
1358  if (ValidFirstLine(rows_, i, OpenModels(i)[m])) {
1359  row.AddStartLine(OpenModels(i)[m]);
1360  }
1361  }
1362  } else {
1363  // Add relevant body line hypotheses.
1364  SetOfModels last_line_models;
1365  if (i > 0) {
1366  (*rows_)[i - 1].StrongHypotheses(&last_line_models);
1367  } else {
1368  theory_->NonCenteredModels(&last_line_models);
1369  }
1370  for (int m = 0; m < last_line_models.size(); m++) {
1371  const ParagraphModel *model = last_line_models[m];
1372  if (ValidBodyLine(rows_, i, model))
1373  row.AddBodyLine(model);
1374  }
1375  }
1376 
1377  // Step Four:
1378  // If we're still quite unsure about this line, go through all
1379  // models in our theory and see if this row could be the start
1380  // of any of our models.
1381  if (row.GetLineType() == LT_UNKNOWN ||
1382  (row.GetLineType() == LT_START && !row.UniqueStartHypothesis())) {
1383  SetOfModels all_models;
1384  theory_->NonCenteredModels(&all_models);
1385  for (int m = 0; m < all_models.size(); m++) {
1386  if (ValidFirstLine(rows_, i, all_models[m])) {
1387  row.AddStartLine(all_models[m]);
1388  }
1389  }
1390  }
1391  // Step Five:
1392  // Since we may have updated the hypotheses about this row, we need
1393  // to recalculate the Open models for the rest of rows[i + 1, row_end)
1394  if (row.GetLineType() != LT_UNKNOWN) {
1395  CalculateOpenModels(i + 1, row_end_);
1396  }
1397  }
1398 }
void NonCenteredModels(SetOfModels *models)
bool ValidBodyLine(const GenericVector< RowScratchRegisters > *rows, int row, const ParagraphModel *model)
bool ValidFirstLine(const GenericVector< RowScratchRegisters > *rows, int row, const ParagraphModel *model)
int size() const
Definition: genericvector.h:59
bool LikelyParagraphStart(const RowScratchRegisters &before, const RowScratchRegisters &after)
GenericVectorEqEq< const ParagraphModel * > SetOfModels

The documentation for this class was generated from the following files: