Tesseract  3.02
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
ocrblock.cpp File Reference
#include "mfcpch.h"
#include <stdlib.h>
#include "blckerr.h"
#include "ocrblock.h"
#include "stepblob.h"
#include "tprintf.h"

Go to the source code of this file.

Macros

#define BLOCK_LABEL_HEIGHT   150
 
#define ROW_SPACING   5
 

Functions

int decreasing_top_order (const void *row1, const void *row2)
 
bool LeftMargin (ICOORDELT_LIST *segments, int x, int *margin)
 
bool RightMargin (ICOORDELT_LIST *segments, int x, int *margin)
 
void PrintSegmentationStats (BLOCK_LIST *block_list)
 
void ExtractBlobsFromSegmentation (BLOCK_LIST *blocks, C_BLOB_LIST *output_blob_list)
 
void RefreshWordBlobsFromNewBlobs (BLOCK_LIST *block_list, C_BLOB_LIST *new_blobs, C_BLOB_LIST *not_found_blobs)
 

Macro Definition Documentation

#define BLOCK_LABEL_HEIGHT   150

Definition at line 27 of file ocrblock.cpp.

#define ROW_SPACING   5

Function Documentation

int decreasing_top_order ( const void *  row1,
const void *  row2 
)

decreasing_top_order

Sort Comparator: Return <0 if row1 top < row2 top

Definition at line 72 of file ocrblock.cpp.

74  {
75  return (*(ROW **) row2)->bounding_box ().top () -
76  (*(ROW **) row1)->bounding_box ().top ();
77 }
Definition: ocrrow.h:32
void ExtractBlobsFromSegmentation ( BLOCK_LIST *  blocks,
C_BLOB_LIST *  output_blob_list 
)

Definition at line 433 of file ocrblock.cpp.

434  {
435  C_BLOB_IT return_list_it(output_blob_list);
436  BLOCK_IT block_it(blocks);
437  for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
438  BLOCK* block = block_it.data();
439  ROW_IT row_it(block->row_list());
440  for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
441  ROW* row = row_it.data();
442  // Iterate over all werds in the row.
443  WERD_IT werd_it(row->word_list());
444  for (werd_it.mark_cycle_pt(); !werd_it.cycled_list(); werd_it.forward()) {
445  WERD* werd = werd_it.data();
446  return_list_it.move_to_last();
447  return_list_it.add_list_after(werd->cblob_list());
448  return_list_it.move_to_last();
449  return_list_it.add_list_after(werd->rej_cblob_list());
450  }
451  }
452  }
453 }
C_BLOB_LIST * cblob_list()
Definition: werd.h:100
C_BLOB_LIST * rej_cblob_list()
Definition: werd.h:95
ROW_LIST * row_list()
get rows
Definition: ocrblock.h:121
Definition: ocrrow.h:32
Definition: ocrblock.h:31
Definition: werd.h:60
WERD_LIST * word_list()
Definition: ocrrow.h:52
bool LeftMargin ( ICOORDELT_LIST *  segments,
int  x,
int *  margin 
)

Definition at line 244 of file ocrblock.cpp.

244  {
245  bool found = false;
246  *margin = 0;
247  if (segments->empty())
248  return found;
249  ICOORDELT_IT seg_it(segments);
250  for (seg_it.mark_cycle_pt(); !seg_it.cycled_list(); seg_it.forward()) {
251  int cur_margin = x - seg_it.data()->x();
252  if (cur_margin >= 0) {
253  if (!found) {
254  *margin = cur_margin;
255  } else if (cur_margin < *margin) {
256  *margin = cur_margin;
257  }
258  found = true;
259  }
260  }
261  return found;
262 }
void PrintSegmentationStats ( BLOCK_LIST *  block_list)

Definition at line 400 of file ocrblock.cpp.

400  {
401  int num_blocks = 0;
402  int num_rows = 0;
403  int num_words = 0;
404  int num_blobs = 0;
405  BLOCK_IT block_it(block_list);
406  for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
407  BLOCK* block = block_it.data();
408  ++num_blocks;
409  ROW_IT row_it(block->row_list());
410  for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
411  ++num_rows;
412  ROW* row = row_it.data();
413  // Iterate over all werds in the row.
414  WERD_IT werd_it(row->word_list());
415  for (werd_it.mark_cycle_pt(); !werd_it.cycled_list(); werd_it.forward()) {
416  WERD* werd = werd_it.data();
417  ++num_words;
418  num_blobs += werd->cblob_list()->length();
419  }
420  }
421  }
422  tprintf("Block list stats:\nBlocks = %d\nRows = %d\nWords = %d\nBlobs = %d\n",
423  num_blocks, num_rows, num_words, num_blobs);
424 }
C_BLOB_LIST * cblob_list()
Definition: werd.h:100
ROW_LIST * row_list()
get rows
Definition: ocrblock.h:121
Definition: ocrrow.h:32
Definition: ocrblock.h:31
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:41
Definition: werd.h:60
WERD_LIST * word_list()
Definition: ocrrow.h:52
void RefreshWordBlobsFromNewBlobs ( BLOCK_LIST *  block_list,
C_BLOB_LIST *  new_blobs,
C_BLOB_LIST *  not_found_blobs 
)

Definition at line 468 of file ocrblock.cpp.

470  {
471  // Now iterate over all the blobs in the segmentation_block_list_, and just
472  // replace the corresponding c-blobs inside the werds.
473  BLOCK_IT block_it(block_list);
474  for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
475  BLOCK* block = block_it.data();
476  // Iterate over all rows in the block.
477  ROW_IT row_it(block->row_list());
478  for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
479  ROW* row = row_it.data();
480  // Iterate over all werds in the row.
481  WERD_IT werd_it(row->word_list());
482  WERD_LIST new_words;
483  WERD_IT new_words_it(&new_words);
484  for (werd_it.mark_cycle_pt(); !werd_it.cycled_list(); werd_it.forward()) {
485  WERD* werd = werd_it.extract();
486  WERD* new_werd = werd->ConstructWerdWithNewBlobs(new_blobs,
487  not_found_blobs);
488  if (new_werd) {
489  // Insert this new werd into the actual row's werd-list. Remove the
490  // existing one.
491  new_words_it.add_after_then_move(new_werd);
492  delete werd;
493  } else {
494  // Reinsert the older word back, for lack of better options.
495  // This is critical since dropping the words messes up segmentation:
496  // eg. 1st word in the row might otherwise have W_FUZZY_NON turned on.
497  new_words_it.add_after_then_move(werd);
498  }
499  }
500  // Get rid of the old word list & replace it with the new one.
501  row->word_list()->clear();
502  werd_it.move_to_first();
503  werd_it.add_list_after(&new_words);
504  }
505  }
506 }
WERD * ConstructWerdWithNewBlobs(C_BLOB_LIST *all_blobs, C_BLOB_LIST *orphan_blobs)
Definition: werd.cpp:402
ROW_LIST * row_list()
get rows
Definition: ocrblock.h:121
Definition: ocrrow.h:32
Definition: ocrblock.h:31
Definition: werd.h:60
WERD_LIST * word_list()
Definition: ocrrow.h:52
bool RightMargin ( ICOORDELT_LIST *  segments,
int  x,
int *  margin 
)

Definition at line 274 of file ocrblock.cpp.

274  {
275  bool found = false;
276  *margin = 0;
277  if (segments->empty())
278  return found;
279  ICOORDELT_IT seg_it(segments);
280  for (seg_it.mark_cycle_pt(); !seg_it.cycled_list(); seg_it.forward()) {
281  int cur_margin = seg_it.data()->x() + seg_it.data()->y() - x;
282  if (cur_margin >= 0) {
283  if (!found) {
284  *margin = cur_margin;
285  } else if (cur_margin < *margin) {
286  *margin = cur_margin;
287  }
288  found = true;
289  }
290  }
291  return found;
292 }