Tesseract  3.02
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
makerow.cpp File Reference
#include "mfcpch.h"
#include "stderr.h"
#include "blobbox.h"
#include "ccstruct.h"
#include "detlinefit.h"
#include "statistc.h"
#include "drawtord.h"
#include "blkocc.h"
#include "sortflts.h"
#include "oldbasel.h"
#include "textord.h"
#include "tordmain.h"
#include "underlin.h"
#include "makerow.h"
#include "tprintf.h"
#include "tovars.h"

Go to the source code of this file.

Namespaces

 tesseract
 

Macros

#define MAX_HEIGHT_MODES   12
 

Functions

float MakeRowFromSubBlobs (TO_BLOCK *block, C_BLOB *blob, TO_ROW_IT *row_it)
 
make_single_row

Arrange the blobs into a single row... well actually, if there is only a single blob, it makes 2 rows, in case the top-level blob is a container of the real blobs to recognize.

float make_single_row (ICOORD page_tr, TO_BLOCK *block, TO_BLOCK_LIST *blocks)
 
make_rows

Arrange the blobs into rows.

float make_rows (ICOORD page_tr, TO_BLOCK_LIST *port_blocks)
 
make_initial_textrows

Arrange the good blobs into rows of text.

void make_initial_textrows (ICOORD page_tr, TO_BLOCK *block, FCOORD rotation, BOOL8 testing_on)
 
fit_lms_line

Fit an LMS line to a row.

void fit_lms_line (TO_ROW *row)
 
find_best_dropout_row

Delete this row if it has a neighbour with better dropout characteristics. TRUE is returned if the row should be deleted.

BOOL8 find_best_dropout_row (TO_ROW *row, inT32 distance, float dist_limit, inT32 line_index, TO_ROW_IT *row_it, BOOL8 testing_on)
 
deskew_block_coords

Compute the bounding box of all the blobs in the block if they were deskewed without actually doing it.

TBOX deskew_block_coords (TO_BLOCK *block, float gradient)
 
compute_line_occupation

Compute the pixel projection back on the y axis given the global skew. Also compute the 1st derivative.

void compute_line_occupation (TO_BLOCK *block, float gradient, inT32 min_y, inT32 max_y, inT32 *occupation, inT32 *deltas)
 
void compute_occupation_threshold (inT32 low_window, inT32 high_window, inT32 line_count, inT32 *occupation, inT32 *thresholds)
 
compute_dropout_distances

Compute the distance from each coordinate to the nearest dropout.

void compute_dropout_distances (inT32 *occupation, inT32 *thresholds, inT32 line_count)
 
expand_rows

Expand each row to the least of its allowed size and touching its neighbours. If the expansion would entirely swallow a neighbouring row then do so.

void expand_rows (ICOORD page_tr, TO_BLOCK *block, float gradient, FCOORD rotation, inT32 block_edge, BOOL8 testing_on)
 
void adjust_row_limits (TO_BLOCK *block)
 
compute_row_stats

Compute the linespacing and offset.

void compute_row_stats (TO_BLOCK *block, BOOL8 testing_on)
 
fill_heights

Fill the given heights with heights of the blobs that are legal candidates for estimating xheight.

void fill_heights (TO_ROW *row, float gradient, int min_height, int max_height, STATS *heights, STATS *floating_heights)
 
compute_xheight_from_modes

Given a STATS object heights, looks for two most frequently occurring heights that look like xheight and xheight + ascrise. If found, sets the values of *xheight and *ascrise accordingly, otherwise sets xheight to any most frequently occurring height and sets *ascrise to 0. Returns the number of times xheight occurred in heights. For each mode that is considered for being an xheight the count of floating blobs (stored in floating_heights) is subtracted from the total count of the blobs of this height. This is done because blobs that sit far above the baseline could represent valid ascenders, but it is highly unlikely that such a character's height will be an xheight (e.g. -, ', =, ^, `, ", ', etc) If cap_only, then force finding of only the top mode.

int compute_xheight_from_modes (STATS *heights, STATS *floating_heights, bool cap_only, int min_height, int max_height, float *xheight, float *ascrise)
 
compute_row_descdrop

Estimates the descdrop of this row. This function looks for "significant" descenders of lowercase letters (those that could not just be the small descenders of upper case letters like Q,J). The function also takes into account how many potential ascenders this row might contain. If the number of potential ascenders along with descenders is close to the expected fraction of the total number of blobs in the row, the function returns the descender height, returns 0 otherwise.

inT32 compute_row_descdrop (TO_ROW *row, float gradient, int xheight_blob_count, STATS *asc_heights)
 
compute_height_modes

Find the top maxmodes values in the input array and put their indices in the output in the order in which they occurred.

inT32 compute_height_modes (STATS *heights, inT32 min_height, inT32 max_height, inT32 *modes, inT32 maxmodes)
 
correct_row_xheight

Adjust the xheight etc of this row if not within reasonable limits of the average for the block.

void correct_row_xheight (TO_ROW *row, float xheight, float ascrise, float descdrop)
 
separate_underlines

Test wide objects for being potential underlines. If they are then put them in a separate list in the block.

void separate_underlines (TO_BLOCK *block, float gradient, FCOORD rotation, BOOL8 testing_on)
 
pre_associate_blobs

Associate overlapping blobs and fake chop wide blobs.

void pre_associate_blobs (ICOORD page_tr, TO_BLOCK *block, FCOORD rotation, BOOL8 testing_on)
 
fit_parallel_rows

Re-fit the rows in the block to the given gradient.

void fit_parallel_rows (TO_BLOCK *block, float gradient, FCOORD rotation, inT32 block_edge, BOOL8 testing_on)
 
fit_parallel_lms

Fit an LMS line to a row. Make the fit parallel to the given gradient and set the row accordingly.

void fit_parallel_lms (float gradient, TO_ROW *row)
 
make_baseline_spline

Fit an LMS line to a row. Make the fit parallel to the given gradient and set the row accordingly.

void make_baseline_spline (TO_ROW *row, TO_BLOCK *block)
 
segment_baseline

Divide the baseline up into segments which require a different quadratic fitted to them. Return TRUE if enough blobs were far enough away to need a quadratic.

BOOL8 segment_baseline (TO_ROW *row, TO_BLOCK *block, inT32 &segments, inT32 xstarts[])
 
linear_spline_baseline

Divide the baseline up into segments which require a different quadratic fitted to them.

Returns
TRUE if enough blobs were far enough away to need a quadratic.
double * linear_spline_baseline (TO_ROW *row, TO_BLOCK *block, inT32 &segments, inT32 xstarts[])
 
assign_blobs_to_rows

Make enough rows to allocate all the given blobs to one. If a block skew is given, use that, else attempt to track it.

void assign_blobs_to_rows (TO_BLOCK *block, float *gradient, int pass, BOOL8 reject_misses, BOOL8 make_new_rows, BOOL8 drawing_skew)
 
most_overlapping_row

Return the row which most overlaps the blob.

OVERLAP_STATE most_overlapping_row (TO_ROW_IT *row_it, TO_ROW *&best_row, float top, float bottom, float rowsize, BOOL8 testing_blob)
 
blob_x_order

Sort function to sort blobs in x from page left.

int blob_x_order (const void *item1, const void *item2)
 
row_y_order

Sort function to sort rows in y from page top.

int row_y_order (const void *item1, const void *item2)
 
row_spacing_order

Qsort style function to compare 2 TO_ROWS based on their spacing value.

int row_spacing_order (const void *item1, const void *item2)
 
mark_repeated_chars

Mark blobs marked with BTFT_LEADER in repeated sets using the repeated_set member of BLOBNBOX.

void mark_repeated_chars (TO_ROW *row)
 

Variables

bool textord_heavy_nr = FALSE
 
bool textord_show_initial_rows = FALSE
 
bool textord_show_parallel_rows = FALSE
 
bool textord_show_expanded_rows = FALSE
 
bool textord_show_final_rows = FALSE
 
bool textord_show_final_blobs = FALSE
 
bool textord_test_landscape = FALSE
 
bool textord_parallel_baselines = TRUE
 
bool textord_straight_baselines = FALSE
 
bool textord_old_baselines = TRUE
 
bool textord_old_xheight = FALSE
 
bool textord_fix_xheight_bug = TRUE
 
bool textord_fix_makerow_bug = TRUE
 
bool textord_debug_xheights = FALSE
 
bool textord_biased_skewcalc = TRUE
 
bool textord_interpolating_skew = TRUE
 
int textord_skewsmooth_offset = 2
 
int textord_skewsmooth_offset2 = 1
 
int textord_test_x = -1
 
int textord_test_y = -1
 
int textord_min_blobs_in_row = 4
 
int textord_spline_minblobs = 8
 
int textord_spline_medianwin = 6
 
int textord_max_blob_overlaps = 4
 
int textord_min_xheight = 10
 
double textord_spline_shift_fraction = 0.02
 
double textord_spline_outlier_fraction = 0.1
 
double textord_skew_ile = 0.5
 
double textord_skew_lag = 0.01
 
double textord_linespace_iqrlimit = 0.2
 
double textord_width_limit = 8
 
double textord_chop_width = 1.5
 
double textord_expansion_factor = 1.0
 
double textord_overlap_x = 0.5
 
double textord_minxh = 0.25
 
double textord_min_linesize = 1.25
 
double textord_excess_blobsize = 1.3
 
double textord_occupancy_threshold = 0.4
 
double textord_underline_width = 2.0
 
double textord_min_blob_height_fraction = 0.75
 
double textord_xheight_mode_fraction = 0.4
 
double textord_ascheight_mode_fraction = 0.08
 
double textord_descheight_mode_fraction = 0.08
 
double textord_ascx_ratio_min = 1.25
 
double textord_ascx_ratio_max = 1.8
 
double textord_descx_ratio_min = 0.25
 
double textord_descx_ratio_max = 0.6
 
double textord_xheight_error_margin = 0.1
 
int textord_lms_line_trials = 12
 
bool textord_new_initial_xheight = TRUE
 
const int kMinLeaderCount = 5
 

compute_page_skew

Compute the skew over a full page by averaging the gradients over all the lines. Get the error of the same row.

const double kNoiseSize = 0.5
 
const int kMinSize = 8
 
void compute_page_skew (TO_BLOCK_LIST *blocks, float &page_m, float &page_err)
 
void cleanup_rows_making (ICOORD page_tr, TO_BLOCK *block, float gradient, FCOORD rotation, inT32 block_edge, BOOL8 testing_on)
 
void delete_non_dropout_rows (TO_BLOCK *block, float gradient, FCOORD rotation, inT32 block_edge, BOOL8 testing_on)
 

Macro Definition Documentation

#define MAX_HEIGHT_MODES   12

Definition at line 105 of file makerow.cpp.

Function Documentation

void adjust_row_limits ( TO_BLOCK block)

adjust_row_limits

Change the limits of rows to suit the default fractions.

Definition at line 1223 of file makerow.cpp.

1225  {
1226  TO_ROW *row; //current row
1227  float size; //size of row
1228  float ymax; //top of row
1229  float ymin; //bottom of row
1230  TO_ROW_IT row_it = block->get_rows ();
1231 
1233  tprintf("Adjusting row limits for block(%d,%d)\n",
1234  block->block->bounding_box().left(),
1235  block->block->bounding_box().top());
1236  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
1237  row = row_it.data ();
1238  size = row->max_y () - row->min_y ();
1240  tprintf("Row at %f has min %f, max %f, size %f\n",
1241  row->intercept(), row->min_y(), row->max_y(), size);
1245  ymax = size * (tesseract::CCStruct::kXHeightFraction +
1248  row->set_limits (row->intercept () + ymin, row->intercept () + ymax);
1249  row->merged = FALSE;
1250  }
1251 }
float min_y() const
Definition: blobbox.h:533
float intercept() const
Definition: blobbox.h:560
void set_limits(float new_min, float new_max)
Definition: blobbox.h:594
#define FALSE
Definition: capi.h:28
static const double kXHeightFraction
Definition: ccstruct.h:35
static const double kAscenderFraction
Definition: ccstruct.h:36
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:41
bool textord_show_expanded_rows
Definition: makerow.cpp:48
TO_ROW_LIST * get_rows()
Definition: blobbox.h:676
float max_y() const
Definition: blobbox.h:530
void bounding_box(ICOORD &bottom_left, ICOORD &top_right) const
get box
Definition: pdblock.h:70
BLOCK * block
Definition: blobbox.h:740
BOOL8 merged
Definition: blobbox.h:617
static const double kDescenderFraction
Definition: ccstruct.h:34
void assign_blobs_to_rows ( TO_BLOCK block,
float *  gradient,
int  pass,
BOOL8  reject_misses,
BOOL8  make_new_rows,
BOOL8  drawing_skew 
)

Definition at line 2402 of file makerow.cpp.

2409  {
2410  OVERLAP_STATE overlap_result; //what to do with it
2411  float ycoord; //current y
2412  float top, bottom; //of blob
2413  float g_length = 1.0f; //from gradient
2414  inT16 row_count; //no of rows
2415  inT16 left_x; //left edge
2416  inT16 last_x; //previous edge
2417  float block_skew; //y delta
2418  float smooth_factor; //for new coords
2419  float near_dist; //dist to nearest row
2420  ICOORD testpt; //testing only
2421  BLOBNBOX *blob; //current blob
2422  TO_ROW *row; //current row
2423  TO_ROW *dest_row = NULL; //row to put blob in
2424  //iterators
2425  BLOBNBOX_IT blob_it = &block->blobs;
2426  TO_ROW_IT row_it = block->get_rows ();
2427 
2428  ycoord =
2429  (block->block->bounding_box ().bottom () +
2430  block->block->bounding_box ().top ()) / 2.0f;
2431  if (gradient != NULL)
2432  g_length = sqrt (1 + *gradient * *gradient);
2433 #ifndef GRAPHICS_DISABLED
2434  if (drawing_skew)
2435  to_win->SetCursor(block->block->bounding_box ().left (), ycoord);
2436 #endif
2437  testpt = ICOORD (textord_test_x, textord_test_y);
2438  blob_it.sort (blob_x_order);
2439  smooth_factor = 1.0;
2440  block_skew = 0.0f;
2441  row_count = row_it.length (); //might have rows
2442  if (!blob_it.empty ()) {
2443  left_x = blob_it.data ()->bounding_box ().left ();
2444  }
2445  else {
2446  left_x = block->block->bounding_box ().left ();
2447  }
2448  last_x = left_x;
2449  for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); blob_it.forward ()) {
2450  blob = blob_it.data ();
2451  if (gradient != NULL) {
2452  block_skew = (1 - 1 / g_length) * blob->bounding_box ().bottom ()
2453  + *gradient / g_length * blob->bounding_box ().left ();
2454  }
2455  else if (blob->bounding_box ().left () - last_x > block->line_size / 2
2456  && last_x - left_x > block->line_size * 2
2458  // tprintf("Interpolating skew from %g",block_skew);
2459  block_skew *= (float) (blob->bounding_box ().left () - left_x)
2460  / (last_x - left_x);
2461  // tprintf("to %g\n",block_skew);
2462  }
2463  last_x = blob->bounding_box ().left ();
2464  top = blob->bounding_box ().top () - block_skew;
2465  bottom = blob->bounding_box ().bottom () - block_skew;
2466 #ifndef GRAPHICS_DISABLED
2467  if (drawing_skew)
2468  to_win->DrawTo(blob->bounding_box ().left (), ycoord + block_skew);
2469 #endif
2470  if (!row_it.empty ()) {
2471  for (row_it.move_to_first ();
2472  !row_it.at_last () && row_it.data ()->min_y () > top;
2473  row_it.forward ());
2474  row = row_it.data ();
2475  if (row->min_y () <= top && row->max_y () >= bottom) {
2476  //any overlap
2477  dest_row = row;
2478  overlap_result = most_overlapping_row (&row_it, dest_row,
2479  top, bottom,
2480  block->line_size,
2481  blob->bounding_box ().
2482  contains (testpt));
2483  if (overlap_result == NEW_ROW && !reject_misses)
2484  overlap_result = ASSIGN;
2485  }
2486  else {
2487  overlap_result = NEW_ROW;
2488  if (!make_new_rows) {
2489  near_dist = row_it.data_relative (-1)->min_y () - top;
2490  //below bottom
2491  if (bottom < row->min_y ()) {
2492  if (row->min_y () - bottom <=
2493  (block->line_spacing -
2495  //done it
2496  overlap_result = ASSIGN;
2497  dest_row = row;
2498  }
2499  }
2500  else if (near_dist > 0
2501  && near_dist < bottom - row->max_y ()) {
2502  row_it.backward ();
2503  dest_row = row_it.data ();
2504  if (dest_row->min_y () - bottom <=
2505  (block->line_spacing -
2507  //done it
2508  overlap_result = ASSIGN;
2509  }
2510  }
2511  else {
2512  if (top - row->max_y () <=
2513  (block->line_spacing -
2514  block->line_size) * (textord_overlap_x +
2516  //done it
2517  overlap_result = ASSIGN;
2518  dest_row = row;
2519  }
2520  }
2521  }
2522  }
2523  if (overlap_result == ASSIGN)
2524  dest_row->add_blob (blob_it.extract (), top, bottom,
2525  block->line_size);
2526  if (overlap_result == NEW_ROW) {
2527  if (make_new_rows && top - bottom < block->max_blob_size) {
2528  dest_row =
2529  new TO_ROW (blob_it.extract (), top, bottom,
2530  block->line_size);
2531  row_count++;
2532  if (bottom > row_it.data ()->min_y ())
2533  row_it.add_before_then_move (dest_row);
2534  //insert in right place
2535  else
2536  row_it.add_after_then_move (dest_row);
2537  smooth_factor =
2538  1.0 / (row_count * textord_skew_lag +
2540  }
2541  else
2542  overlap_result = REJECT;
2543  }
2544  }
2545  else if (make_new_rows && top - bottom < block->max_blob_size) {
2546  overlap_result = NEW_ROW;
2547  dest_row =
2548  new TO_ROW (blob_it.extract (), top, bottom, block->line_size);
2549  row_count++;
2550  row_it.add_after_then_move (dest_row);
2551  smooth_factor = 1.0 / (row_count * textord_skew_lag +
2553  }
2554  else
2555  overlap_result = REJECT;
2556  if (blob->bounding_box ().contains (testpt)) {
2557  if (overlap_result != REJECT) {
2558  tprintf ("Test blob assigned to row at (%g,%g) on pass %d\n",
2559  dest_row->min_y (), dest_row->max_y (), pass);
2560  }
2561  else {
2562  tprintf ("Test blob assigned to no row on pass %d\n", pass);
2563  }
2564  }
2565  if (overlap_result != REJECT) {
2566  while (!row_it.at_first ()
2567  && row_it.data ()->min_y () >
2568  row_it.data_relative (-1)->min_y ()) {
2569  row = row_it.extract ();
2570  row_it.backward ();
2571  row_it.add_before_then_move (row);
2572  }
2573  while (!row_it.at_last ()
2574  && row_it.data ()->min_y () <
2575  row_it.data_relative (1)->min_y ()) {
2576  row = row_it.extract ();
2577  row_it.forward ();
2578  //keep rows in order
2579  row_it.add_after_then_move (row);
2580  }
2581  block_skew = (1 - smooth_factor) * block_skew
2582  + smooth_factor * (blob->bounding_box ().bottom () -
2583  dest_row->initial_min_y ());
2584  }
2585  }
2586  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
2587  if (row_it.data ()->blob_list ()->empty ())
2588  delete row_it.extract (); //discard empty rows
2589  }
2590 }
float min_y() const
Definition: blobbox.h:533
BLOBNBOX_LIST blobs
Definition: blobbox.h:735
EXTERN ScrollView * to_win
Definition: drawtord.cpp:40
int textord_test_y
Definition: makerow.cpp:64
void SetCursor(int x, int y)
Definition: scrollview.cpp:520
bool textord_interpolating_skew
Definition: makerow.cpp:60
const TBOX & bounding_box() const
Definition: blobbox.h:208
bool contains(const FCOORD pt) const
Definition: rect.h:323
float line_size
Definition: blobbox.h:748
double textord_skew_lag
Definition: makerow.cpp:76
#define NULL
Definition: host.h:144
inT16 left() const
Definition: rect.h:67
void add_blob(BLOBNBOX *blob, float top, float bottom, float row_size)
Definition: blobbox.cpp:673
float line_spacing
Definition: blobbox.h:742
#define f(xc, yc)
Definition: imgscale.cpp:39
int textord_skewsmooth_offset2
Definition: makerow.cpp:62
OVERLAP_STATE
Definition: makerow.h:30
int textord_skewsmooth_offset
Definition: makerow.cpp:61
void DrawTo(int x, int y)
Definition: scrollview.cpp:526
OVERLAP_STATE most_overlapping_row(TO_ROW_IT *row_it, TO_ROW *&best_row, float top, float bottom, float rowsize, BOOL8 testing_blob)
Definition: makerow.cpp:2598
float initial_min_y() const
Definition: blobbox.h:539
int blob_x_order(const void *item1, const void *item2)
Definition: makerow.cpp:2694
inT16 top() const
Definition: rect.h:53
static const double kAscenderFraction
Definition: ccstruct.h:36
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:41
double textord_overlap_x
Definition: makerow.cpp:82
Definition: makerow.h:33
short inT16
Definition: host.h:100
integer coordinate
Definition: points.h:30
int textord_test_x
Definition: makerow.cpp:63
size_t top
Definition: tessarray.h:52
TO_ROW_LIST * get_rows()
Definition: blobbox.h:676
float max_y() const
Definition: blobbox.h:530
void bounding_box(ICOORD &bottom_left, ICOORD &top_right) const
get box
Definition: pdblock.h:70
BLOCK * block
Definition: blobbox.h:740
Definition: makerow.h:32
static const double kDescenderFraction
Definition: ccstruct.h:34
inT16 bottom() const
Definition: rect.h:60
int blob_x_order ( const void *  item1,
const void *  item2 
)

Definition at line 2694 of file makerow.cpp.

2696  {
2697  //converted ptr
2698  BLOBNBOX *blob1 = *(BLOBNBOX **) item1;
2699  //converted ptr
2700  BLOBNBOX *blob2 = *(BLOBNBOX **) item2;
2701 
2702  if (blob1->bounding_box ().left () < blob2->bounding_box ().left ())
2703  return -1;
2704  else if (blob1->bounding_box ().left () > blob2->bounding_box ().left ())
2705  return 1;
2706  else
2707  return 0;
2708 }
const TBOX & bounding_box() const
Definition: blobbox.h:208
inT16 left() const
Definition: rect.h:67
void cleanup_rows_making ( ICOORD  page_tr,
TO_BLOCK block,
float  gradient,
FCOORD  rotation,
inT32  block_edge,
BOOL8  testing_on 
)

cleanup_rows_making

Remove overlapping rows and fit all the blobs to what's left.

Definition at line 534 of file makerow.cpp.

541  {
542  //iterators
543  BLOBNBOX_IT blob_it = &block->blobs;
544  TO_ROW_IT row_it = block->get_rows ();
545 
546 #ifndef GRAPHICS_DISABLED
547  if (textord_show_parallel_rows && testing_on) {
548  if (to_win == NULL)
549  create_to_win(page_tr);
550  }
551 #endif
552  //get row coords
553  fit_parallel_rows(block,
554  gradient,
555  rotation,
556  block_edge,
557  textord_show_parallel_rows &&testing_on);
559  gradient,
560  rotation,
561  block_edge,
562  textord_show_parallel_rows &&testing_on);
563  expand_rows(page_tr, block, gradient, rotation, block_edge, testing_on);
564  blob_it.set_to_list (&block->blobs);
565  row_it.set_to_list (block->get_rows ());
566  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ())
567  blob_it.add_list_after (row_it.data ()->blob_list ());
568  //give blobs back
569  assign_blobs_to_rows (block, &gradient, 1, FALSE, FALSE, FALSE);
570  //now new rows must be genuine
571  blob_it.set_to_list (&block->blobs);
572  blob_it.add_list_after (&block->large_blobs);
573  assign_blobs_to_rows (block, &gradient, 2, TRUE, TRUE, FALSE);
574  //safe to use big ones now
575  blob_it.set_to_list (&block->blobs);
576  //throw all blobs in
577  blob_it.add_list_after (&block->noise_blobs);
578  blob_it.add_list_after (&block->small_blobs);
579  assign_blobs_to_rows (block, &gradient, 3, FALSE, FALSE, FALSE);
580 }
BLOBNBOX_LIST noise_blobs
Definition: blobbox.h:737
void create_to_win(ICOORD page_tr)
Definition: drawtord.cpp:49
BLOBNBOX_LIST blobs
Definition: blobbox.h:735
EXTERN ScrollView * to_win
Definition: drawtord.cpp:40
void expand_rows(ICOORD page_tr, TO_BLOCK *block, float gradient, FCOORD rotation, inT32 block_edge, BOOL8 testing_on)
Definition: makerow.cpp:1065
void assign_blobs_to_rows(TO_BLOCK *block, float *gradient, int pass, BOOL8 reject_misses, BOOL8 make_new_rows, BOOL8 drawing_skew)
Definition: makerow.cpp:2402
#define NULL
Definition: host.h:144
#define FALSE
Definition: capi.h:28
void delete_non_dropout_rows(TO_BLOCK *block, float gradient, FCOORD rotation, inT32 block_edge, BOOL8 testing_on)
Definition: makerow.cpp:667
BLOBNBOX_LIST small_blobs
Definition: blobbox.h:738
bool textord_show_parallel_rows
Definition: makerow.cpp:47
void fit_parallel_rows(TO_BLOCK *block, float gradient, FCOORD rotation, inT32 block_edge, BOOL8 testing_on)
Definition: makerow.cpp:2051
BLOBNBOX_LIST large_blobs
Definition: blobbox.h:739
TO_ROW_LIST * get_rows()
Definition: blobbox.h:676
#define TRUE
Definition: capi.h:27
void compute_dropout_distances ( inT32 occupation,
inT32 thresholds,
inT32  line_count 
)

Definition at line 1018 of file makerow.cpp.

1022  {
1023  inT32 line_index; //of thresholds line
1024  inT32 distance; //from prev dropout
1025  inT32 next_dist; //to next dropout
1026  inT32 back_index; //for back filling
1027  inT32 prev_threshold; //before overwrite
1028 
1029  distance = -line_count;
1030  line_index = 0;
1031  do {
1032  do {
1033  distance--;
1034  prev_threshold = thresholds[line_index];
1035  //distance from prev
1036  thresholds[line_index] = distance;
1037  line_index++;
1038  }
1039  while (line_index < line_count
1040  && (occupation[line_index] < thresholds[line_index]
1041  || occupation[line_index - 1] >= prev_threshold));
1042  if (line_index < line_count) {
1043  back_index = line_index - 1;
1044  next_dist = 1;
1045  while (next_dist < -distance && back_index >= 0) {
1046  thresholds[back_index] = next_dist;
1047  back_index--;
1048  next_dist++;
1049  distance++;
1050  }
1051  distance = 1;
1052  }
1053  }
1054  while (line_index < line_count);
1055 }
int inT32
Definition: host.h:102
inT32 compute_height_modes ( STATS heights,
inT32  min_height,
inT32  max_height,
inT32 modes,
inT32  maxmodes 
)

Definition at line 1743 of file makerow.cpp.

1747  { // size of modes
1748  inT32 pile_count; // no in source pile
1749  inT32 src_count; // no of source entries
1750  inT32 src_index; // current entry
1751  inT32 least_count; // height of smalllest
1752  inT32 least_index; // index of least
1753  inT32 dest_count; // index in modes
1754 
1755  src_count = max_height + 1 - min_height;
1756  dest_count = 0;
1757  least_count = MAX_INT32;
1758  least_index = -1;
1759  for (src_index = 0; src_index < src_count; src_index++) {
1760  pile_count = heights->pile_count(min_height + src_index);
1761  if (pile_count > 0) {
1762  if (dest_count < maxmodes) {
1763  if (pile_count < least_count) {
1764  // find smallest in array
1765  least_count = pile_count;
1766  least_index = dest_count;
1767  }
1768  modes[dest_count++] = min_height + src_index;
1769  } else if (pile_count >= least_count) {
1770  while (least_index < maxmodes - 1) {
1771  modes[least_index] = modes[least_index + 1];
1772  // shuffle up
1773  least_index++;
1774  }
1775  // new one on end
1776  modes[maxmodes - 1] = min_height + src_index;
1777  if (pile_count == least_count) {
1778  // new smallest
1779  least_index = maxmodes - 1;
1780  } else {
1781  least_count = heights->pile_count(modes[0]);
1782  least_index = 0;
1783  for (dest_count = 1; dest_count < maxmodes; dest_count++) {
1784  pile_count = heights->pile_count(modes[dest_count]);
1785  if (pile_count < least_count) {
1786  // find smallest
1787  least_count = pile_count;
1788  least_index = dest_count;
1789  }
1790  }
1791  }
1792  }
1793  }
1794  }
1795  return dest_count;
1796 }
int inT32
Definition: host.h:102
inT32 pile_count(inT32 value) const
Definition: statistc.h:74
#define MAX_INT32
Definition: host.h:120
void compute_line_occupation ( TO_BLOCK block,
float  gradient,
inT32  min_y,
inT32  max_y,
inT32 occupation,
inT32 deltas 
)

Definition at line 871 of file makerow.cpp.

878  {
879  inT32 line_count; //maxy-miny+1
880  inT32 line_index; //of scan line
881  int index; //array index for daft compilers
882  float top, bottom; //coords of blob
883  inT32 width; //of blob
884  TO_ROW *row; //current row
885  TO_ROW_IT row_it = block->get_rows ();
886  BLOBNBOX *blob; //current blob
887  BLOBNBOX_IT blob_it; //iterator
888  float length; //of skew vector
889  TBOX blob_box; //bounding box
890  FCOORD rotation; //inverse of skew
891 
892  line_count = max_y - min_y + 1;
893  length = sqrt (gradient * gradient + 1);
894  rotation = FCOORD (1 / length, -gradient / length);
895  for (line_index = 0; line_index < line_count; line_index++)
896  deltas[line_index] = 0;
897  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
898  row = row_it.data ();
899  blob_it.set_to_list (row->blob_list ());
900  for (blob_it.mark_cycle_pt (); !blob_it.cycled_list ();
901  blob_it.forward ()) {
902  blob = blob_it.data ();
903  blob_box = blob->bounding_box ();
904  blob_box.rotate (rotation);//de-skew it
905  top = blob_box.top ();
906  bottom = blob_box.bottom ();
907  width =
908  (inT32) floor ((FLOAT32) (blob_box.right () - blob_box.left ()));
909  if ((inT32) floor (bottom) < min_y
910  || (inT32) floor (bottom) - min_y >= line_count)
911  fprintf (stderr,
912  "Bad y coord of bottom, " INT32FORMAT "(" INT32FORMAT ","
913  INT32FORMAT ")\n", (inT32) floor (bottom), min_y, max_y);
914  //count transitions
915  index = (inT32) floor (bottom) - min_y;
916  deltas[index] += width;
917  if ((inT32) floor (top) < min_y
918  || (inT32) floor (top) - min_y >= line_count)
919  fprintf (stderr,
920  "Bad y coord of top, " INT32FORMAT "(" INT32FORMAT ","
921  INT32FORMAT ")\n", (inT32) floor (top), min_y, max_y);
922  index = (inT32) floor (top) - min_y;
923  deltas[index] -= width;
924  }
925  }
926  occupation[0] = deltas[0];
927  for (line_index = 1; line_index < line_count; line_index++)
928  occupation[line_index] = occupation[line_index - 1] + deltas[line_index];
929 }
const TBOX & bounding_box() const
Definition: blobbox.h:208
#define INT32FORMAT
Definition: host.h:115
inT16 left() const
Definition: rect.h:67
int inT32
Definition: host.h:102
Definition: rect.h:29
inT16 right() const
Definition: rect.h:74
float FLOAT32
Definition: host.h:111
inT16 top() const
Definition: rect.h:53
Definition: points.h:189
void rotate(const FCOORD &vec)
Definition: rect.h:182
size_t top
Definition: tessarray.h:52
TO_ROW_LIST * get_rows()
Definition: blobbox.h:676
BLOBNBOX_LIST * blob_list()
Definition: blobbox.h:571
inT16 bottom() const
Definition: rect.h:60
void compute_occupation_threshold ( inT32  low_window,
inT32  high_window,
inT32  line_count,
inT32 occupation,
inT32 thresholds 
)

compute_occupation_threshold

Compute thresholds for textline or not for the occupation array.

Definition at line 937 of file makerow.cpp.

943  {
944  inT32 line_index; //of thresholds line
945  inT32 low_index; //in occupation
946  inT32 high_index; //in occupation
947  inT32 sum; //current average
948  inT32 divisor; //to get thresholds
949  inT32 min_index; //of min occ
950  inT32 min_occ; //min in locality
951  inT32 test_index; //for finding min
952 
953  divisor =
954  (inT32) ceil ((low_window + high_window) / textord_occupancy_threshold);
955  if (low_window + high_window < line_count) {
956  for (sum = 0, high_index = 0; high_index < low_window; high_index++)
957  sum += occupation[high_index];
958  for (low_index = 0; low_index < high_window; low_index++, high_index++)
959  sum += occupation[high_index];
960  min_occ = occupation[0];
961  min_index = 0;
962  for (test_index = 1; test_index < high_index; test_index++) {
963  if (occupation[test_index] <= min_occ) {
964  min_occ = occupation[test_index];
965  min_index = test_index; //find min in region
966  }
967  }
968  for (line_index = 0; line_index < low_window; line_index++)
969  thresholds[line_index] = (sum - min_occ) / divisor + min_occ;
970  //same out to end
971  for (low_index = 0; high_index < line_count; low_index++, high_index++) {
972  sum -= occupation[low_index];
973  sum += occupation[high_index];
974  if (occupation[high_index] <= min_occ) {
975  //find min in region
976  min_occ = occupation[high_index];
977  min_index = high_index;
978  }
979  //lost min from region
980  if (min_index <= low_index) {
981  min_occ = occupation[low_index + 1];
982  min_index = low_index + 1;
983  for (test_index = low_index + 2; test_index <= high_index;
984  test_index++) {
985  if (occupation[test_index] <= min_occ) {
986  min_occ = occupation[test_index];
987  //find min in region
988  min_index = test_index;
989  }
990  }
991  }
992  thresholds[line_index++] = (sum - min_occ) / divisor + min_occ;
993  }
994  }
995  else {
996  min_occ = occupation[0];
997  min_index = 0;
998  for (sum = 0, low_index = 0; low_index < line_count; low_index++) {
999  if (occupation[low_index] < min_occ) {
1000  min_occ = occupation[low_index];
1001  min_index = low_index;
1002  }
1003  sum += occupation[low_index];
1004  }
1005  line_index = 0;
1006  }
1007  for (; line_index < line_count; line_index++)
1008  thresholds[line_index] = (sum - min_occ) / divisor + min_occ;
1009  //same out to end
1010 }
int inT32
Definition: host.h:102
double textord_occupancy_threshold
Definition: makerow.cpp:87
void compute_page_skew ( TO_BLOCK_LIST *  blocks,
float &  page_m,
float &  page_err 
)

Definition at line 296 of file makerow.cpp.

300  {
301  inT32 row_count; //total rows
302  inT32 blob_count; //total_blobs
303  inT32 row_err; //integer error
304  float *gradients; //of rows
305  float *errors; //of rows
306  inT32 row_index; //of total
307  TO_ROW *row; //current row
308  TO_BLOCK_IT block_it = blocks; //iterator
309  TO_ROW_IT row_it;
310 
311  row_count = 0;
312  blob_count = 0;
313  for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
314  block_it.forward ()) {
315  POLY_BLOCK* pb = block_it.data()->block->poly_block();
316  if (pb != NULL && !pb->IsText())
317  continue; // Pretend non-text blocks don't exist.
318  row_count += block_it.data ()->get_rows ()->length ();
319  //count up rows
320  row_it.set_to_list (block_it.data ()->get_rows ());
321  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ())
322  blob_count += row_it.data ()->blob_list ()->length ();
323  }
324  if (row_count == 0) {
325  page_m = 0.0f;
326  page_err = 0.0f;
327  return;
328  }
329  gradients = (float *) alloc_mem (blob_count * sizeof (float));
330  //get mem
331  errors = (float *) alloc_mem (blob_count * sizeof (float));
332  if (gradients == NULL || errors == NULL)
333  MEMORY_OUT.error ("compute_page_skew", ABORT, NULL);
334 
335  row_index = 0;
336  for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
337  block_it.forward ()) {
338  POLY_BLOCK* pb = block_it.data()->block->poly_block();
339  if (pb != NULL && !pb->IsText())
340  continue; // Pretend non-text blocks don't exist.
341  row_it.set_to_list (block_it.data ()->get_rows ());
342  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
343  row = row_it.data ();
344  blob_count = row->blob_list ()->length ();
345  row_err = (inT32) ceil (row->line_error ());
346  if (row_err <= 0)
347  row_err = 1;
349  blob_count /= row_err;
350  for (blob_count /= row_err; blob_count > 0; blob_count--) {
351  gradients[row_index] = row->line_m ();
352  errors[row_index] = row->line_error ();
353  row_index++;
354  }
355  }
356  else if (blob_count >= textord_min_blobs_in_row) {
357  //get gradient
358  gradients[row_index] = row->line_m ();
359  errors[row_index] = row->line_error ();
360  row_index++;
361  }
362  }
363  }
364  if (row_index == 0) {
365  //desperate
366  for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
367  block_it.forward ()) {
368  POLY_BLOCK* pb = block_it.data()->block->poly_block();
369  if (pb != NULL && !pb->IsText())
370  continue; // Pretend non-text blocks don't exist.
371  row_it.set_to_list (block_it.data ()->get_rows ());
372  for (row_it.mark_cycle_pt (); !row_it.cycled_list ();
373  row_it.forward ()) {
374  row = row_it.data ();
375  gradients[row_index] = row->line_m ();
376  errors[row_index] = row->line_error ();
377  row_index++;
378  }
379  }
380  }
381  row_count = row_index;
382  row_index = choose_nth_item ((inT32) (row_count * textord_skew_ile),
383  gradients, row_count);
384  page_m = gradients[row_index];
385  row_index = choose_nth_item ((inT32) (row_count * textord_skew_ile),
386  errors, row_count);
387  page_err = errors[row_index];
388  free_mem(gradients);
389  free_mem(errors);
390 }
float line_m() const
Definition: blobbox.h:542
float line_error() const
Definition: blobbox.h:548
Definition: errcode.h:30
int textord_min_blobs_in_row
Definition: makerow.cpp:65
#define NULL
Definition: host.h:144
void free_mem(void *oldchunk)
Definition: memry.cpp:56
int inT32
Definition: host.h:102
void * alloc_mem(inT32 count)
Definition: memry.cpp:48
void error(const char *caller, TessErrorLogCode action, const char *format,...) const
Definition: errcode.cpp:41
double textord_skew_ile
Definition: makerow.cpp:75
inT32 choose_nth_item(inT32 index, float *array, inT32 count)
Definition: statistc.cpp:550
bool textord_biased_skewcalc
Definition: makerow.cpp:59
bool IsText() const
Definition: polyblk.h:54
BLOBNBOX_LIST * blob_list()
Definition: blobbox.h:571
const ERRCODE MEMORY_OUT
Definition: stderr.h:25
inT32 compute_row_descdrop ( TO_ROW row,
float  gradient,
int  xheight_blob_count,
STATS asc_heights 
)

Definition at line 1683 of file makerow.cpp.

1684  {
1685  // Count how many potential ascenders are in this row.
1686  int i_min = asc_heights->min_bucket();
1687  if ((i_min / row->xheight) < textord_ascx_ratio_min) {
1688  i_min = static_cast<int>(
1689  floor(row->xheight * textord_ascx_ratio_min + 0.5));
1690  }
1691  int i_max = asc_heights->max_bucket();
1692  if ((i_max / row->xheight) > textord_ascx_ratio_max) {
1693  i_max = static_cast<int>(floor(row->xheight * textord_ascx_ratio_max));
1694  }
1695  int num_potential_asc = 0;
1696  for (int i = i_min; i <= i_max; ++i) {
1697  num_potential_asc += asc_heights->pile_count(i);
1698  }
1699  inT32 min_height =
1700  static_cast<inT32>(floor(row->xheight * textord_descx_ratio_min + 0.5));
1701  inT32 max_height =
1702  static_cast<inT32>(floor(row->xheight * textord_descx_ratio_max));
1703  float xcentre; // centre of blob
1704  float height; // height of blob
1705  BLOBNBOX_IT blob_it = row->blob_list();
1706  BLOBNBOX *blob; // current blob
1707  STATS heights (min_height, max_height + 1);
1708  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
1709  blob = blob_it.data();
1710  if (!blob->joined_to_prev()) {
1711  xcentre = (blob->bounding_box().left() +
1712  blob->bounding_box().right()) / 2.0f;
1713  height = (gradient * xcentre + row->parallel_c() -
1714  blob->bounding_box().bottom());
1715  if (height >= min_height && height <= max_height)
1716  heights.add(static_cast<int>(floor(height + 0.5)), 1);
1717  }
1718  }
1719  int blob_index = heights.mode(); // find mode
1720  int blob_count = heights.pile_count(blob_index); // get count of mode
1721  float total_fraction =
1723  if (static_cast<float>(blob_count + num_potential_asc) <
1724  xheight_blob_count * total_fraction) {
1725  blob_count = 0;
1726  }
1727  int descdrop = blob_count > 0 ? -blob_index : 0;
1728  if (textord_debug_xheights) {
1729  tprintf("Descdrop: %d (potential ascenders %d, descenders %d)\n",
1730  descdrop, num_potential_asc, blob_count);
1731  heights.print();
1732  }
1733  return descdrop;
1734 }
float parallel_c() const
Definition: blobbox.h:551
double textord_ascx_ratio_min
Definition: makerow.cpp:97
const TBOX & bounding_box() const
Definition: blobbox.h:208
double textord_ascx_ratio_max
Definition: makerow.cpp:98
inT16 left() const
Definition: rect.h:67
int inT32
Definition: host.h:102
double textord_descheight_mode_fraction
Definition: makerow.cpp:96
#define f(xc, yc)
Definition: imgscale.cpp:39
double textord_descx_ratio_max
Definition: makerow.cpp:100
inT16 right() const
Definition: rect.h:74
double textord_descx_ratio_min
Definition: makerow.cpp:99
inT32 pile_count(inT32 value) const
Definition: statistc.h:74
bool joined_to_prev() const
Definition: blobbox.h:233
double textord_ascheight_mode_fraction
Definition: makerow.cpp:94
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:41
Definition: statistc.h:29
float xheight
Definition: blobbox.h:629
inT32 min_bucket() const
Definition: statistc.cpp:208
bool textord_debug_xheights
Definition: makerow.cpp:58
BLOBNBOX_LIST * blob_list()
Definition: blobbox.h:571
inT32 max_bucket() const
Definition: statistc.cpp:224
inT16 bottom() const
Definition: rect.h:60
void compute_row_stats ( TO_BLOCK block,
BOOL8  testing_on 
)

Definition at line 1259 of file makerow.cpp.

1262  {
1263  inT32 row_index; //of median
1264  TO_ROW *row; //current row
1265  TO_ROW *prev_row; //previous row
1266  float iqr; //inter quartile range
1267  TO_ROW_IT row_it = block->get_rows ();
1268  //number of rows
1269  inT16 rowcount = row_it.length ();
1270  TO_ROW **rows; //for choose nth
1271 
1272  rows = (TO_ROW **) alloc_mem (rowcount * sizeof (TO_ROW *));
1273  if (rows == NULL)
1274  MEMORY_OUT.error ("compute_row_stats", ABORT, NULL);
1275  rowcount = 0;
1276  prev_row = NULL;
1277  row_it.move_to_last (); //start at bottom
1278  do {
1279  row = row_it.data ();
1280  if (prev_row != NULL) {
1281  rows[rowcount++] = prev_row;
1282  prev_row->spacing = row->intercept () - prev_row->intercept ();
1283  if (testing_on)
1284  tprintf ("Row at %g yields spacing of %g\n",
1285  row->intercept (), prev_row->spacing);
1286  }
1287  prev_row = row;
1288  row_it.backward ();
1289  }
1290  while (!row_it.at_last ());
1291  block->key_row = prev_row;
1292  block->baseline_offset =
1293  fmod (prev_row->parallel_c (), block->line_spacing);
1294  if (testing_on)
1295  tprintf ("Blob based spacing=(%g,%g), offset=%g",
1296  block->line_size, block->line_spacing, block->baseline_offset);
1297  if (rowcount > 0) {
1298  row_index = choose_nth_item (rowcount * 3 / 4, rows, rowcount,
1299  sizeof (TO_ROW *), row_spacing_order);
1300  iqr = rows[row_index]->spacing;
1301  row_index = choose_nth_item (rowcount / 4, rows, rowcount,
1302  sizeof (TO_ROW *), row_spacing_order);
1303  iqr -= rows[row_index]->spacing;
1304  row_index = choose_nth_item (rowcount / 2, rows, rowcount,
1305  sizeof (TO_ROW *), row_spacing_order);
1306  block->key_row = rows[row_index];
1307  if (testing_on)
1308  tprintf (" row based=%g(%g)", rows[row_index]->spacing, iqr);
1309  if (rowcount > 2
1310  && iqr < rows[row_index]->spacing * textord_linespace_iqrlimit) {
1312  if (rows[row_index]->spacing < block->line_spacing
1313  && rows[row_index]->spacing > block->line_size)
1314  //within range
1315  block->line_size = rows[row_index]->spacing;
1316  //spacing=size
1317  else if (rows[row_index]->spacing > block->line_spacing)
1318  block->line_size = block->line_spacing;
1319  //too big so use max
1320  }
1321  else {
1322  if (rows[row_index]->spacing < block->line_spacing)
1323  block->line_size = rows[row_index]->spacing;
1324  else
1325  block->line_size = block->line_spacing;
1326  //too big so use max
1327  }
1328  if (block->line_size < textord_min_xheight)
1329  block->line_size = (float) textord_min_xheight;
1330  block->line_spacing = rows[row_index]->spacing;
1331  block->max_blob_size =
1333  }
1334  block->baseline_offset = fmod (rows[row_index]->intercept (),
1335  block->line_spacing);
1336  }
1337  if (testing_on)
1338  tprintf ("\nEstimate line size=%g, spacing=%g, offset=%g\n",
1339  block->line_size, block->line_spacing, block->baseline_offset);
1340  free_mem(rows);
1341 }
float parallel_c() const
Definition: blobbox.h:551
float intercept() const
Definition: blobbox.h:560
TO_ROW * key_row
Definition: blobbox.h:761
float line_size
Definition: blobbox.h:748
Definition: errcode.h:30
#define NULL
Definition: host.h:144
void free_mem(void *oldchunk)
Definition: memry.cpp:56
int inT32
Definition: host.h:102
float baseline_offset
Definition: blobbox.h:750
float line_spacing
Definition: blobbox.h:742
double textord_linespace_iqrlimit
Definition: makerow.cpp:77
void * alloc_mem(inT32 count)
Definition: memry.cpp:48
bool textord_new_initial_xheight
Definition: makerow.cpp:103
double textord_excess_blobsize
Definition: makerow.cpp:86
int row_spacing_order(const void *item1, const void *item2)
Definition: makerow.cpp:2738
float max_blob_size
Definition: blobbox.h:749
float spacing
Definition: blobbox.h:628
void error(const char *caller, TessErrorLogCode action, const char *format,...) const
Definition: errcode.cpp:41
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:41
int textord_min_xheight
Definition: makerow.cpp:70
short inT16
Definition: host.h:100
inT32 choose_nth_item(inT32 index, float *array, inT32 count)
Definition: statistc.cpp:550
TO_ROW_LIST * get_rows()
Definition: blobbox.h:676
const ERRCODE MEMORY_OUT
Definition: stderr.h:25
int compute_xheight_from_modes ( STATS heights,
STATS floating_heights,
bool  cap_only,
int  min_height,
int  max_height,
float *  xheight,
float *  ascrise 
)

Definition at line 1587 of file makerow.cpp.

1589  {
1590  int blob_index = heights->mode(); // find mode
1591  int blob_count = heights->pile_count(blob_index); // get count of mode
1592  if (textord_debug_xheights) {
1593  tprintf("min_height=%d, max_height=%d, mode=%d, count=%d, total=%d\n",
1594  min_height, max_height, blob_index, blob_count,
1595  heights->get_total());
1596  heights->print();
1597  floating_heights->print();
1598  }
1599  if (blob_count == 0) return 0;
1600  int modes[MAX_HEIGHT_MODES]; // biggest piles
1601  bool in_best_pile = FALSE;
1602  int prev_size = -MAX_INT32;
1603  int best_count = 0;
1604  int mode_count = compute_height_modes(heights, min_height, max_height,
1605  modes, MAX_HEIGHT_MODES);
1606  if (cap_only && mode_count > 1)
1607  mode_count = 1;
1608  int x;
1609  if (textord_debug_xheights) {
1610  tprintf("found %d modes: ", mode_count);
1611  for (x = 0; x < mode_count; x++) tprintf("%d ", modes[x]);
1612  tprintf("\n");
1613  }
1614 
1615  for (x = 0; x < mode_count - 1; x++) {
1616  if (modes[x] != prev_size + 1)
1617  in_best_pile = FALSE; // had empty height
1618  int modes_x_count = heights->pile_count(modes[x]) -
1619  floating_heights->pile_count(modes[x]);
1620  if ((modes_x_count >= blob_count * textord_xheight_mode_fraction) &&
1621  (in_best_pile || modes_x_count > best_count)) {
1622  for (int asc = x + 1; asc < mode_count; asc++) {
1623  float ratio =
1624  static_cast<float>(modes[asc]) / static_cast<float>(modes[x]);
1625  if (textord_ascx_ratio_min < ratio &&
1626  ratio < textord_ascx_ratio_max &&
1627  (heights->pile_count(modes[asc]) >=
1628  blob_count * textord_ascheight_mode_fraction)) {
1629  if (modes_x_count > best_count) {
1630  in_best_pile = true;
1631  best_count = modes_x_count;
1632  }
1633  if (textord_debug_xheights) {
1634  tprintf("X=%d, asc=%d, count=%d, ratio=%g\n",
1635  modes[x], modes[asc]-modes[x], modes_x_count, ratio);
1636  }
1637  prev_size = modes[x];
1638  *xheight = static_cast<float>(modes[x]);
1639  *ascrise = static_cast<float>(modes[asc] - modes[x]);
1640  }
1641  }
1642  }
1643  }
1644  if (*xheight == 0) { // single mode
1645  // Remove counts of the "floating" blobs (the one whose height is too
1646  // small in relation to it's top end of the bounding box) from heights
1647  // before computing the single-mode xheight.
1648  // Restore the counts in heights after the mode is found, since
1649  // floating blobs might be useful for determining potential ascenders
1650  // in compute_row_descdrop().
1651  if (floating_heights->get_total() > 0) {
1652  for (x = min_height; x < max_height; ++x) {
1653  heights->add(x, -(floating_heights->pile_count(x)));
1654  }
1655  blob_index = heights->mode(); // find the modified mode
1656  for (x = min_height; x < max_height; ++x) {
1657  heights->add(x, floating_heights->pile_count(x));
1658  }
1659  }
1660  *xheight = static_cast<float>(blob_index);
1661  *ascrise = 0.0f;
1662  best_count = heights->pile_count(blob_index);
1664  tprintf("Single mode xheight set to %g\n", *xheight);
1665  } else if (textord_debug_xheights) {
1666  tprintf("Multi-mode xheight set to %g, asc=%g\n", *xheight, *ascrise);
1667  }
1668  return best_count;
1669 }
inT32 compute_height_modes(STATS *heights, inT32 min_height, inT32 max_height, inT32 *modes, inT32 maxmodes)
Definition: makerow.cpp:1743
double textord_ascx_ratio_min
Definition: makerow.cpp:97
double textord_xheight_mode_fraction
Definition: makerow.cpp:92
double textord_ascx_ratio_max
Definition: makerow.cpp:98
inT32 get_total() const
Definition: statistc.h:82
#define FALSE
Definition: capi.h:28
void add(inT32 value, inT32 count)
Definition: statistc.cpp:103
inT32 pile_count(inT32 value) const
Definition: statistc.h:74
#define MAX_INT32
Definition: host.h:120
double textord_ascheight_mode_fraction
Definition: makerow.cpp:94
void print() const
Definition: statistc.cpp:446
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:41
inT32 mode() const
Definition: statistc.cpp:117
bool textord_debug_xheights
Definition: makerow.cpp:58
#define MAX_HEIGHT_MODES
Definition: makerow.cpp:105
void correct_row_xheight ( TO_ROW row,
float  xheight,
float  ascrise,
float  descdrop 
)

Definition at line 1805 of file makerow.cpp.

1806  {
1807  ROW_CATEGORY row_category = get_row_category(row);
1808  if (textord_debug_xheights) {
1809  tprintf("correcting row xheight: row->xheight %.4f"
1810  ", row->acrise %.4f row->descdrop %.4f\n",
1811  row->xheight, row->ascrise, row->descdrop);
1812  }
1813  bool normal_xheight =
1815  bool cap_xheight =
1816  within_error_margin(row->xheight, xheight + ascrise,
1818  // Use the average xheight/ascrise for the following cases:
1819  // -- the xheight of the row could not be determined at all
1820  // -- the row has descenders (e.g. "many groups", "ISBN 12345 p.3")
1821  // and its xheight is close to either cap height or average xheight
1822  // -- the row does not have ascenders or descenders, but its xheight
1823  // is close to the average block xheight (e.g. row with "www.mmm.com")
1824  if (row_category == ROW_ASCENDERS_FOUND) {
1825  if (row->descdrop >= 0.0) {
1826  row->descdrop = row->xheight * (descdrop / xheight);
1827  }
1828  } else if (row_category == ROW_INVALID ||
1829  (row_category == ROW_DESCENDERS_FOUND &&
1830  (normal_xheight || cap_xheight)) ||
1831  (row_category == ROW_UNKNOWN && normal_xheight)) {
1832  if (textord_debug_xheights) tprintf("using average xheight\n");
1833  row->xheight = xheight;
1834  row->ascrise = ascrise;
1835  row->descdrop = descdrop;
1836  } else if (row_category == ROW_DESCENDERS_FOUND) {
1837  // Assume this is a row with mostly lowercase letters and it's xheight
1838  // is computed correctly (unfortunately there is no way to distinguish
1839  // this from the case when descenders are found, but the most common
1840  // height is capheight).
1841  if (textord_debug_xheights) tprintf("lowercase, corrected ascrise\n");
1842  row->ascrise = row->xheight * (ascrise / xheight);
1843  } else if (row_category == ROW_UNKNOWN) {
1844  // Otherwise assume this row is an all-caps or small-caps row
1845  // and adjust xheight and ascrise of the row.
1846 
1847  row->all_caps = true;
1848  if (cap_xheight) { // regular all caps
1849  if (textord_debug_xheights) tprintf("all caps\n");
1850  row->xheight = xheight;
1851  row->ascrise = ascrise;
1852  row->descdrop = descdrop;
1853  } else { // small caps or caps with an odd xheight
1854  if (textord_debug_xheights) {
1855  if (row->xheight < xheight + ascrise && row->xheight > xheight) {
1856  tprintf("small caps\n");
1857  } else {
1858  tprintf("all caps with irregular xheight\n");
1859  }
1860  }
1861  row->ascrise = row->xheight * (ascrise / (xheight + ascrise));
1862  row->xheight -= row->ascrise;
1863  row->descdrop = row->xheight * (descdrop / xheight);
1864  }
1865  }
1866  if (textord_debug_xheights) {
1867  tprintf("corrected row->xheight = %.4f, row->acrise = %.4f, row->descdrop"
1868  " = %.4f\n", row->xheight, row->ascrise, row->descdrop);
1869  }
1870 }
float descdrop
Definition: blobbox.h:632
ROW_CATEGORY
Definition: makerow.h:37
double textord_xheight_error_margin
Definition: makerow.cpp:101
BOOL8 all_caps
Definition: blobbox.h:618
float ascrise
Definition: blobbox.h:631
ROW_CATEGORY get_row_category(const TO_ROW *row)
Definition: makerow.h:122
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:41
float xheight
Definition: blobbox.h:629
bool textord_debug_xheights
Definition: makerow.cpp:58
bool within_error_margin(float test, float num, float margin)
Definition: makerow.h:128
void delete_non_dropout_rows ( TO_BLOCK block,
float  gradient,
FCOORD  rotation,
inT32  block_edge,
BOOL8  testing_on 
)

delete_non_dropout_rows

Compute the linespacing and offset.

Definition at line 667 of file makerow.cpp.

673  {
674  TBOX block_box; //deskewed block
675  inT32 *deltas; //change in occupation
676  inT32 *occupation; //of pixel coords
677  inT32 max_y; //in block
678  inT32 min_y;
679  inT32 line_index; //of scan line
680  inT32 line_count; //no of scan lines
681  inT32 distance; //to drop-out
682  inT32 xleft; //of block
683  inT32 ybottom; //of block
684  TO_ROW *row; //current row
685  TO_ROW_IT row_it = block->get_rows ();
686  BLOBNBOX_IT blob_it = &block->blobs;
687 
688  if (row_it.length () == 0)
689  return; //empty block
690  block_box = deskew_block_coords (block, gradient);
691  xleft = block->block->bounding_box ().left ();
692  ybottom = block->block->bounding_box ().bottom ();
693  min_y = block_box.bottom () - 1;
694  max_y = block_box.top () + 1;
695  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
696  line_index = (inT32) floor (row_it.data ()->intercept ());
697  if (line_index <= min_y)
698  min_y = line_index - 1;
699  if (line_index >= max_y)
700  max_y = line_index + 1;
701  }
702  line_count = max_y - min_y + 1;
703  if (line_count <= 0)
704  return; //empty block
705  deltas = (inT32 *) alloc_mem (line_count * sizeof (inT32));
706  occupation = (inT32 *) alloc_mem (line_count * sizeof (inT32));
707  if (deltas == NULL || occupation == NULL)
708  MEMORY_OUT.error ("compute_line_spacing", ABORT, NULL);
709 
710  compute_line_occupation(block, gradient, min_y, max_y, occupation, deltas);
712  ceil (block->line_spacing *
715  (inT32) ceil (block->line_spacing *
718  max_y - min_y + 1, occupation, deltas);
719 #ifndef GRAPHICS_DISABLED
720  if (testing_on) {
721  draw_occupation(xleft, ybottom, min_y, max_y, occupation, deltas);
722  }
723 #endif
724  compute_dropout_distances(occupation, deltas, line_count);
725  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
726  row = row_it.data ();
727  line_index = (inT32) floor (row->intercept ());
728  distance = deltas[line_index - min_y];
729  if (find_best_dropout_row (row, distance, block->line_spacing / 2,
730  line_index, &row_it, testing_on)) {
731 #ifndef GRAPHICS_DISABLED
732  if (testing_on)
733  plot_parallel_row(row, gradient, block_edge,
734  ScrollView::WHITE, rotation);
735 #endif
736  blob_it.add_list_after (row_it.data ()->blob_list ());
737  delete row_it.extract (); //too far away
738  }
739  }
740  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
741  blob_it.add_list_after (row_it.data ()->blob_list ());
742  }
743 
744  free_mem(deltas);
745  free_mem(occupation);
746 }
void compute_dropout_distances(inT32 *occupation, inT32 *thresholds, inT32 line_count)
Definition: makerow.cpp:1018
TBOX deskew_block_coords(TO_BLOCK *block, float gradient)
Definition: makerow.cpp:835
BLOBNBOX_LIST blobs
Definition: blobbox.h:735
void draw_occupation(inT32 xleft, inT32 ybottom, inT32 min_y, inT32 max_y, inT32 occupation[], inT32 thresholds[])
Definition: drawtord.cpp:166
float intercept() const
Definition: blobbox.h:560
Definition: errcode.h:30
#define NULL
Definition: host.h:144
void free_mem(void *oldchunk)
Definition: memry.cpp:56
void compute_occupation_threshold(inT32 low_window, inT32 high_window, inT32 line_count, inT32 *occupation, inT32 *thresholds)
Definition: makerow.cpp:937
int inT32
Definition: host.h:102
Definition: rect.h:29
float line_spacing
Definition: blobbox.h:742
void plot_parallel_row(TO_ROW *row, float gradient, inT32 left, ScrollView::Color colour, FCOORD rotation)
Definition: drawtord.cpp:125
void * alloc_mem(inT32 count)
Definition: memry.cpp:48
static const double kXHeightFraction
Definition: ccstruct.h:35
BOOL8 find_best_dropout_row(TO_ROW *row, inT32 distance, float dist_limit, inT32 line_index, TO_ROW_IT *row_it, BOOL8 testing_on)
Definition: makerow.cpp:755
inT16 top() const
Definition: rect.h:53
void error(const char *caller, TessErrorLogCode action, const char *format,...) const
Definition: errcode.cpp:41
static const double kAscenderFraction
Definition: ccstruct.h:36
TO_ROW_LIST * get_rows()
Definition: blobbox.h:676
void compute_line_occupation(TO_BLOCK *block, float gradient, inT32 min_y, inT32 max_y, inT32 *occupation, inT32 *deltas)
Definition: makerow.cpp:871
void bounding_box(ICOORD &bottom_left, ICOORD &top_right) const
get box
Definition: pdblock.h:70
BLOCK * block
Definition: blobbox.h:740
const ERRCODE MEMORY_OUT
Definition: stderr.h:25
static const double kDescenderFraction
Definition: ccstruct.h:34
inT16 bottom() const
Definition: rect.h:60
TBOX deskew_block_coords ( TO_BLOCK block,
float  gradient 
)

Definition at line 835 of file makerow.cpp.

838  {
839  TBOX result; //block bounds
840  TBOX blob_box; //of block
841  FCOORD rotation; //deskew vector
842  float length; //of gradient vector
843  TO_ROW_IT row_it = block->get_rows ();
844  TO_ROW *row; //current row
845  BLOBNBOX *blob; //current blob
846  BLOBNBOX_IT blob_it; //iterator
847 
848  length = sqrt (gradient * gradient + 1);
849  rotation = FCOORD (1 / length, -gradient / length);
850  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
851  row = row_it.data ();
852  blob_it.set_to_list (row->blob_list ());
853  for (blob_it.mark_cycle_pt (); !blob_it.cycled_list ();
854  blob_it.forward ()) {
855  blob = blob_it.data ();
856  blob_box = blob->bounding_box ();
857  blob_box.rotate (rotation);//de-skew it
858  result += blob_box;
859  }
860  }
861  return result;
862 }
const TBOX & bounding_box() const
Definition: blobbox.h:208
Definition: rect.h:29
Definition: points.h:189
void rotate(const FCOORD &vec)
Definition: rect.h:182
TO_ROW_LIST * get_rows()
Definition: blobbox.h:676
BLOBNBOX_LIST * blob_list()
Definition: blobbox.h:571
void expand_rows ( ICOORD  page_tr,
TO_BLOCK block,
float  gradient,
FCOORD  rotation,
inT32  block_edge,
BOOL8  testing_on 
)

Definition at line 1065 of file makerow.cpp.

1072  {
1073  BOOL8 swallowed_row; //eaten a neighbour
1074  float y_max, y_min; //new row limits
1075  float y_bottom, y_top; //allowed limits
1076  TO_ROW *test_row; //next row
1077  TO_ROW *row; //current row
1078  //iterators
1079  BLOBNBOX_IT blob_it = &block->blobs;
1080  TO_ROW_IT row_it = block->get_rows ();
1081 
1082 #ifndef GRAPHICS_DISABLED
1083  if (textord_show_expanded_rows && testing_on) {
1084  if (to_win == NULL)
1085  create_to_win(page_tr);
1086  }
1087 #endif
1088 
1089  adjust_row_limits(block); //shift min,max.
1091  if (block->get_rows ()->length () == 0)
1092  return;
1093  compute_row_stats(block, textord_show_expanded_rows &&testing_on);
1094  }
1095  assign_blobs_to_rows (block, &gradient, 4, TRUE, FALSE, FALSE);
1096  //get real membership
1097  if (block->get_rows ()->length () == 0)
1098  return;
1099  fit_parallel_rows(block,
1100  gradient,
1101  rotation,
1102  block_edge,
1103  textord_show_expanded_rows &&testing_on);
1105  compute_row_stats(block, textord_show_expanded_rows &&testing_on);
1106  row_it.move_to_last ();
1107  do {
1108  row = row_it.data ();
1109  y_max = row->max_y (); //get current limits
1110  y_min = row->min_y ();
1111  y_bottom = row->intercept () - block->line_size * textord_expansion_factor *
1113  y_top = row->intercept () + block->line_size * textord_expansion_factor *
1116  if (y_min > y_bottom) { //expansion allowed
1117  if (textord_show_expanded_rows && testing_on)
1118  tprintf("Expanding bottom of row at %f from %f to %f\n",
1119  row->intercept(), y_min, y_bottom);
1120  //expandable
1121  swallowed_row = TRUE;
1122  while (swallowed_row && !row_it.at_last ()) {
1123  swallowed_row = FALSE;
1124  //get next one
1125  test_row = row_it.data_relative (1);
1126  //overlaps space
1127  if (test_row->max_y () > y_bottom) {
1128  if (test_row->min_y () > y_bottom) {
1129  if (textord_show_expanded_rows && testing_on)
1130  tprintf("Eating row below at %f\n", test_row->intercept());
1131  row_it.forward ();
1132 #ifndef GRAPHICS_DISABLED
1133  if (textord_show_expanded_rows && testing_on)
1134  plot_parallel_row(test_row,
1135  gradient,
1136  block_edge,
1138  rotation);
1139 #endif
1140  blob_it.set_to_list (row->blob_list ());
1141  blob_it.add_list_after (test_row->blob_list ());
1142  //swallow complete row
1143  delete row_it.extract ();
1144  row_it.backward ();
1145  swallowed_row = TRUE;
1146  }
1147  else if (test_row->max_y () < y_min) {
1148  //shorter limit
1149  y_bottom = test_row->max_y ();
1150  if (textord_show_expanded_rows && testing_on)
1151  tprintf("Truncating limit to %f due to touching row at %f\n",
1152  y_bottom, test_row->intercept());
1153  }
1154  else {
1155  y_bottom = y_min; //can't expand it
1156  if (textord_show_expanded_rows && testing_on)
1157  tprintf("Not expanding limit beyond %f due to touching row at %f\n",
1158  y_bottom, test_row->intercept());
1159  }
1160  }
1161  }
1162  y_min = y_bottom; //expand it
1163  }
1164  if (y_max < y_top) { //expansion allowed
1165  if (textord_show_expanded_rows && testing_on)
1166  tprintf("Expanding top of row at %f from %f to %f\n",
1167  row->intercept(), y_max, y_top);
1168  swallowed_row = TRUE;
1169  while (swallowed_row && !row_it.at_first ()) {
1170  swallowed_row = FALSE;
1171  //get one above
1172  test_row = row_it.data_relative (-1);
1173  if (test_row->min_y () < y_top) {
1174  if (test_row->max_y () < y_top) {
1175  if (textord_show_expanded_rows && testing_on)
1176  tprintf("Eating row above at %f\n", test_row->intercept());
1177  row_it.backward ();
1178  blob_it.set_to_list (row->blob_list ());
1179 #ifndef GRAPHICS_DISABLED
1180  if (textord_show_expanded_rows && testing_on)
1181  plot_parallel_row(test_row,
1182  gradient,
1183  block_edge,
1185  rotation);
1186 #endif
1187  blob_it.add_list_after (test_row->blob_list ());
1188  //swallow complete row
1189  delete row_it.extract ();
1190  row_it.forward ();
1191  swallowed_row = TRUE;
1192  }
1193  else if (test_row->min_y () < y_max) {
1194  //shorter limit
1195  y_top = test_row->min_y ();
1196  if (textord_show_expanded_rows && testing_on)
1197  tprintf("Truncating limit to %f due to touching row at %f\n",
1198  y_top, test_row->intercept());
1199  }
1200  else {
1201  y_top = y_max; //can't expand it
1202  if (textord_show_expanded_rows && testing_on)
1203  tprintf("Not expanding limit beyond %f due to touching row at %f\n",
1204  y_top, test_row->intercept());
1205  }
1206  }
1207  }
1208  y_max = y_top;
1209  }
1210  //new limits
1211  row->set_limits (y_min, y_max);
1212  row_it.backward ();
1213  }
1214  while (!row_it.at_last ());
1215 }
void compute_row_stats(TO_BLOCK *block, BOOL8 testing_on)
Definition: makerow.cpp:1259
void create_to_win(ICOORD page_tr)
Definition: drawtord.cpp:49
float min_y() const
Definition: blobbox.h:533
BLOBNBOX_LIST blobs
Definition: blobbox.h:735
EXTERN ScrollView * to_win
Definition: drawtord.cpp:40
float intercept() const
Definition: blobbox.h:560
void set_limits(float new_min, float new_max)
Definition: blobbox.h:594
float line_size
Definition: blobbox.h:748
unsigned char BOOL8
Definition: host.h:113
void assign_blobs_to_rows(TO_BLOCK *block, float *gradient, int pass, BOOL8 reject_misses, BOOL8 make_new_rows, BOOL8 drawing_skew)
Definition: makerow.cpp:2402
#define NULL
Definition: host.h:144
#define FALSE
Definition: capi.h:28
void plot_parallel_row(TO_ROW *row, float gradient, inT32 left, ScrollView::Color colour, FCOORD rotation)
Definition: drawtord.cpp:125
bool textord_new_initial_xheight
Definition: makerow.cpp:103
static const double kXHeightFraction
Definition: ccstruct.h:35
static const double kAscenderFraction
Definition: ccstruct.h:36
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:41
void adjust_row_limits(TO_BLOCK *block)
Definition: makerow.cpp:1223
void fit_parallel_rows(TO_BLOCK *block, float gradient, FCOORD rotation, inT32 block_edge, BOOL8 testing_on)
Definition: makerow.cpp:2051
bool textord_show_expanded_rows
Definition: makerow.cpp:48
TO_ROW_LIST * get_rows()
Definition: blobbox.h:676
double textord_expansion_factor
Definition: makerow.cpp:81
float max_y() const
Definition: blobbox.h:530
BLOBNBOX_LIST * blob_list()
Definition: blobbox.h:571
static const double kDescenderFraction
Definition: ccstruct.h:34
#define TRUE
Definition: capi.h:27
void fill_heights ( TO_ROW row,
float  gradient,
int  min_height,
int  max_height,
STATS heights,
STATS floating_heights 
)

Definition at line 1526 of file makerow.cpp.

1527  {
1528  float xcentre; // centre of blob
1529  float top; // top y coord of blob
1530  float height; // height of blob
1531  BLOBNBOX *blob; // current blob
1532  int repeated_set;
1533  BLOBNBOX_IT blob_it = row->blob_list();
1534  if (blob_it.empty()) return; // no blobs in this row
1535  bool has_rep_chars =
1536  row->rep_chars_marked() && row->num_repeated_sets() > 0;
1537  do {
1538  blob = blob_it.data();
1539  if (!blob->joined_to_prev()) {
1540  xcentre = (blob->bounding_box().left() +
1541  blob->bounding_box().right()) / 2.0f;
1542  top = blob->bounding_box().top();
1543  height = blob->bounding_box().height();
1545  top -= row->baseline.y(xcentre);
1546  else
1547  top -= gradient * xcentre + row->parallel_c();
1548  if (top >= min_height && top <= max_height) {
1549  heights->add(static_cast<inT32>(floor(top + 0.5)), 1);
1550  if (height / top < textord_min_blob_height_fraction) {
1551  floating_heights->add(static_cast<inT32>(floor(top + 0.5)), 1);
1552  }
1553  }
1554  }
1555  // Skip repeated chars, since they are likely to skew the height stats.
1556  if (has_rep_chars && blob->repeated_set() != 0) {
1557  repeated_set = blob->repeated_set();
1558  blob_it.forward();
1559  while (!blob_it.at_first() &&
1560  blob_it.data()->repeated_set() == repeated_set) {
1561  blob_it.forward();
1563  tprintf("Skipping repeated char when computing xheight\n");
1564  }
1565  } else {
1566  blob_it.forward();
1567  }
1568  } while (!blob_it.at_first());
1569 }
float parallel_c() const
Definition: blobbox.h:551
QSPLINE baseline
Definition: blobbox.h:642
const TBOX & bounding_box() const
Definition: blobbox.h:208
double textord_min_blob_height_fraction
Definition: makerow.cpp:90
inT16 left() const
Definition: rect.h:67
#define f(xc, yc)
Definition: imgscale.cpp:39
inT16 right() const
Definition: rect.h:74
void add(inT32 value, inT32 count)
Definition: statistc.cpp:103
int num_repeated_sets() const
Definition: blobbox.h:609
bool joined_to_prev() const
Definition: blobbox.h:233
bool rep_chars_marked() const
Definition: blobbox.h:603
inT16 top() const
Definition: rect.h:53
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:41
double y(double x) const
Definition: quspline.cpp:217
size_t top
Definition: tessarray.h:52
bool textord_fix_xheight_bug
Definition: makerow.cpp:56
bool textord_debug_xheights
Definition: makerow.cpp:58
BLOBNBOX_LIST * blob_list()
Definition: blobbox.h:571
int repeated_set() const
Definition: blobbox.h:239
inT16 height() const
Definition: rect.h:97
BOOL8 find_best_dropout_row ( TO_ROW row,
inT32  distance,
float  dist_limit,
inT32  line_index,
TO_ROW_IT *  row_it,
BOOL8  testing_on 
)

Definition at line 755 of file makerow.cpp.

762  {
763  inT32 next_index; //of neigbouring row
764  inT32 row_offset; //from current row
765  inT32 abs_dist; //absolute distance
766  inT8 row_inc; //increment to row_index
767  TO_ROW *next_row; //nextious row
768 
769  if (testing_on)
770  tprintf ("Row at %g(%g), dropout dist=%d,",
771  row->intercept (), row->parallel_c (), distance);
772  if (distance < 0) {
773  row_inc = 1;
774  abs_dist = -distance;
775  }
776  else {
777  row_inc = -1;
778  abs_dist = distance;
779  }
780  if (abs_dist > dist_limit) {
781  if (testing_on) {
782  tprintf (" too far - deleting\n");
783  }
784  return TRUE;
785  }
786  if ((distance < 0 && !row_it->at_last ())
787  || (distance >= 0 && !row_it->at_first ())) {
788  row_offset = row_inc;
789  do {
790  next_row = row_it->data_relative (row_offset);
791  next_index = (inT32) floor (next_row->intercept ());
792  if ((distance < 0
793  && next_index < line_index
794  && next_index > line_index + distance + distance)
795  || (distance >= 0
796  && next_index > line_index
797  && next_index < line_index + distance + distance)) {
798  if (testing_on) {
799  tprintf (" nearer neighbour (%d) at %g\n",
800  line_index + distance - next_index,
801  next_row->intercept ());
802  }
803  return TRUE; //other is nearer
804  }
805  else if (next_index == line_index
806  || next_index == line_index + distance + distance) {
807  if (row->believability () <= next_row->believability ()) {
808  if (testing_on) {
809  tprintf (" equal but more believable at %g (%g/%g)\n",
810  next_row->intercept (),
811  row->believability (),
812  next_row->believability ());
813  }
814  return TRUE; //other is more believable
815  }
816  }
817  row_offset += row_inc;
818  }
819  while ((next_index == line_index
820  || next_index == line_index + distance + distance)
821  && row_offset < row_it->length ());
822  if (testing_on)
823  tprintf (" keeping\n");
824  }
825  return FALSE;
826 }
float parallel_c() const
Definition: blobbox.h:551
float believability() const
Definition: blobbox.h:557
float intercept() const
Definition: blobbox.h:560
int inT32
Definition: host.h:102
#define FALSE
Definition: capi.h:28
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:41
SIGNED char inT8
Definition: host.h:98
#define TRUE
Definition: capi.h:27
void fit_lms_line ( TO_ROW row)

Definition at line 276 of file makerow.cpp.

276  {
277  float m, c; // fitted line
279  BLOBNBOX_IT blob_it = row->blob_list();
280 
281  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
282  const TBOX& box = blob_it.data()->bounding_box();
283  lms.Add(ICOORD((box.left() + box.right()) / 2, box.bottom()));
284  }
285  double error = lms.Fit(&m, &c);
286  row->set_line(m, c, error);
287 }
void set_line(float new_m, float new_c, float new_error)
Definition: blobbox.h:575
void Add(const ICOORD &pt)
Definition: detlinefit.cpp:41
inT16 left() const
Definition: rect.h:67
double Fit(ICOORD *pt1, ICOORD *pt2)
Definition: detlinefit.cpp:49
Definition: rect.h:29
inT16 right() const
Definition: rect.h:74
integer coordinate
Definition: points.h:30
BLOBNBOX_LIST * blob_list()
Definition: blobbox.h:571
inT16 bottom() const
Definition: rect.h:60
void fit_parallel_lms ( float  gradient,
TO_ROW row 
)

Definition at line 2093 of file makerow.cpp.

2093  {
2094  float c; // fitted line
2095  int blobcount; // no of blobs
2097  BLOBNBOX_IT blob_it = row->blob_list();
2098 
2099  blobcount = 0;
2100  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
2101  if (!blob_it.data()->joined_to_prev()) {
2102  const TBOX& box = blob_it.data()->bounding_box();
2103  lms.Add(ICOORD((box.left() + box.right()) / 2, box.bottom()));
2104  blobcount++;
2105  }
2106  }
2107  double error = lms.ConstrainedFit(gradient, &c);
2108  row->set_parallel_line(gradient, c, error);
2110  error = lms.Fit(&gradient, &c);
2111  }
2112  //set the other too
2113  row->set_line(gradient, c, error);
2114 }
void set_line(float new_m, float new_c, float new_error)
Definition: blobbox.h:575
void Add(const ICOORD &pt)
Definition: detlinefit.cpp:41
int textord_lms_line_trials
Definition: makerow.cpp:102
inT16 left() const
Definition: rect.h:67
double Fit(ICOORD *pt1, ICOORD *pt2)
Definition: detlinefit.cpp:49
Definition: rect.h:29
inT16 right() const
Definition: rect.h:74
void set_parallel_line(float gradient, float new_c, float new_error)
Definition: blobbox.h:583
double ConstrainedFit(double m, float *c)
Definition: detlinefit.cpp:159
integer coordinate
Definition: points.h:30
bool textord_straight_baselines
Definition: makerow.cpp:53
BLOBNBOX_LIST * blob_list()
Definition: blobbox.h:571
inT16 bottom() const
Definition: rect.h:60
void fit_parallel_rows ( TO_BLOCK block,
float  gradient,
FCOORD  rotation,
inT32  block_edge,
BOOL8  testing_on 
)

Definition at line 2051 of file makerow.cpp.

2057  {
2058 #ifndef GRAPHICS_DISABLED
2059  ScrollView::Color colour; //of row
2060 #endif
2061  TO_ROW_IT row_it = block->get_rows ();
2062 
2063  row_it.move_to_first ();
2064  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
2065  if (row_it.data ()->blob_list ()->empty ())
2066  delete row_it.extract (); //nothing in it
2067  else
2068  fit_parallel_lms (gradient, row_it.data ());
2069  }
2070 #ifndef GRAPHICS_DISABLED
2071  if (testing_on) {
2072  colour = ScrollView::RED;
2073  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
2074  plot_parallel_row (row_it.data (), gradient,
2075  block_edge, colour, rotation);
2076  colour = (ScrollView::Color) (colour + 1);
2077  if (colour > ScrollView::MAGENTA)
2078  colour = ScrollView::RED;
2079  }
2080  }
2081 #endif
2082  row_it.sort (row_y_order); //may have gone out of order
2083 }
void fit_parallel_lms(float gradient, TO_ROW *row)
Definition: makerow.cpp:2093
int row_y_order(const void *item1, const void *item2)
Definition: makerow.cpp:2716
void plot_parallel_row(TO_ROW *row, float gradient, inT32 left, ScrollView::Color colour, FCOORD rotation)
Definition: drawtord.cpp:125
TO_ROW_LIST * get_rows()
Definition: blobbox.h:676
double* linear_spline_baseline ( TO_ROW row,
TO_BLOCK block,
inT32 segments,
inT32  xstarts[] 
)

Definition at line 2311 of file makerow.cpp.

2316  {
2317  int blobcount; //no of blobs
2318  int blobindex; //current blob
2319  int index1, index2; //blob numbers
2320  int blobs_per_segment; //blobs in each
2321  TBOX box; //blob box
2322  TBOX new_box; //new_it box
2323  //blobs
2324  BLOBNBOX_IT blob_it = row->blob_list ();
2325  BLOBNBOX_IT new_it = blob_it; //front end
2326  float b, c; //fitted curve
2328  double *coeffs; //quadratic coeffs
2329  inT32 segment; //current segment
2330 
2331  box = box_next_pre_chopped (&blob_it);
2332  xstarts[0] = box.left ();
2333  blobcount = 1;
2334  while (!blob_it.at_first ()) {
2335  blobcount++;
2336  box = box_next_pre_chopped (&blob_it);
2337  }
2338  segments = blobcount / textord_spline_medianwin;
2339  if (segments < 1)
2340  segments = 1;
2341  blobs_per_segment = blobcount / segments;
2342  coeffs = (double *) alloc_mem (segments * 3 * sizeof (double));
2343  if (textord_oldbl_debug)
2344  tprintf
2345  ("Linear splining baseline of %d blobs at (%d,%d), into %d segments of %d blobs\n",
2346  blobcount, box.left (), box.bottom (), segments, blobs_per_segment);
2347  segment = 1;
2348  for (index2 = 0; index2 < blobs_per_segment / 2; index2++)
2349  box_next_pre_chopped(&new_it);
2350  index1 = 0;
2351  blobindex = index2;
2352  do {
2353  blobindex += blobs_per_segment;
2354  lms.Clear();
2355  while (index1 < blobindex || (segment == segments && index1 < blobcount)) {
2356  box = box_next_pre_chopped (&blob_it);
2357  int middle = (box.left() + box.right()) / 2;
2358  lms.Add(ICOORD(middle, box.bottom()));
2359  index1++;
2360  if (index1 == blobindex - blobs_per_segment / 2
2361  || index1 == blobcount - 1) {
2362  xstarts[segment] = box.left ();
2363  }
2364  }
2365  lms.Fit(&b, &c);
2366  coeffs[segment * 3 - 3] = 0;
2367  coeffs[segment * 3 - 2] = b;
2368  coeffs[segment * 3 - 1] = c;
2369  segment++;
2370  if (segment > segments)
2371  break;
2372 
2373  blobindex += blobs_per_segment;
2374  lms.Clear();
2375  while (index2 < blobindex || (segment == segments && index2 < blobcount)) {
2376  new_box = box_next_pre_chopped (&new_it);
2377  int middle = (new_box.left() + new_box.right()) / 2;
2378  lms.Add(ICOORD (middle, new_box.bottom()));
2379  index2++;
2380  if (index2 == blobindex - blobs_per_segment / 2
2381  || index2 == blobcount - 1) {
2382  xstarts[segment] = new_box.left ();
2383  }
2384  }
2385  lms.Fit(&b, &c);
2386  coeffs[segment * 3 - 3] = 0;
2387  coeffs[segment * 3 - 2] = b;
2388  coeffs[segment * 3 - 1] = c;
2389  segment++;
2390  }
2391  while (segment <= segments);
2392  return coeffs;
2393 }
TBOX box_next_pre_chopped(BLOBNBOX_IT *it)
Definition: blobbox.cpp:614
void Add(const ICOORD &pt)
Definition: detlinefit.cpp:41
EXTERN bool textord_oldbl_debug
Definition: oldbasel.cpp:40
inT16 left() const
Definition: rect.h:67
int inT32
Definition: host.h:102
double Fit(ICOORD *pt1, ICOORD *pt2)
Definition: detlinefit.cpp:49
Definition: rect.h:29
inT16 right() const
Definition: rect.h:74
void * alloc_mem(inT32 count)
Definition: memry.cpp:48
int textord_spline_medianwin
Definition: makerow.cpp:67
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:41
integer coordinate
Definition: points.h:30
BLOBNBOX_LIST * blob_list()
Definition: blobbox.h:571
inT16 bottom() const
Definition: rect.h:60
void make_baseline_spline ( TO_ROW row,
TO_BLOCK block 
)

Definition at line 2178 of file makerow.cpp.

2179  {
2180  BLOBNBOX_IT blob_it = row->blob_list ();
2181  inT32 *xstarts; // spline boundaries
2182  double *coeffs; // quadratic coeffs
2183  inT32 segments; // no of segments
2184 
2185  xstarts =
2186  (inT32 *) alloc_mem((row->blob_list()->length() + 1) * sizeof(inT32));
2187  if (segment_baseline(row, block, segments, xstarts)
2189  coeffs = linear_spline_baseline(row, block, segments, xstarts);
2190  } else {
2191  xstarts[1] = xstarts[segments];
2192  segments = 1;
2193  coeffs = (double *) alloc_mem (3 * sizeof (double));
2194  coeffs[0] = 0;
2195  coeffs[1] = row->line_m ();
2196  coeffs[2] = row->line_c ();
2197  }
2198  row->baseline = QSPLINE (segments, xstarts, coeffs);
2199  free_mem(coeffs);
2200  free_mem(xstarts);
2201 }
QSPLINE baseline
Definition: blobbox.h:642
float line_m() const
Definition: blobbox.h:542
void free_mem(void *oldchunk)
Definition: memry.cpp:56
float line_c() const
Definition: blobbox.h:545
int inT32
Definition: host.h:102
void * alloc_mem(inT32 count)
Definition: memry.cpp:48
double * linear_spline_baseline(TO_ROW *row, TO_BLOCK *block, inT32 &segments, inT32 xstarts[])
Definition: makerow.cpp:2311
BOOL8 segment_baseline(TO_ROW *row, TO_BLOCK *block, inT32 &segments, inT32 xstarts[])
Definition: makerow.cpp:2212
bool textord_parallel_baselines
Definition: makerow.cpp:52
bool textord_straight_baselines
Definition: makerow.cpp:53
BLOBNBOX_LIST * blob_list()
Definition: blobbox.h:571
void make_initial_textrows ( ICOORD  page_tr,
TO_BLOCK block,
FCOORD  rotation,
BOOL8  testing_on 
)

Definition at line 236 of file makerow.cpp.

241  {
242  TO_ROW_IT row_it = block->get_rows ();
243 
244 #ifndef GRAPHICS_DISABLED
245  ScrollView::Color colour; //of row
246 
247  if (textord_show_initial_rows && testing_on) {
248  if (to_win == NULL)
249  create_to_win(page_tr);
250  }
251 #endif
252  //guess skew
253  assign_blobs_to_rows (block, NULL, 0, TRUE, TRUE, textord_show_initial_rows && testing_on);
254  row_it.move_to_first ();
255  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ())
256  fit_lms_line (row_it.data ());
257 #ifndef GRAPHICS_DISABLED
258  if (textord_show_initial_rows && testing_on) {
259  colour = ScrollView::RED;
260  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
261  plot_to_row (row_it.data (), colour, rotation);
262  colour = (ScrollView::Color) (colour + 1);
263  if (colour > ScrollView::MAGENTA)
264  colour = ScrollView::RED;
265  }
266  }
267 #endif
268 }
void create_to_win(ICOORD page_tr)
Definition: drawtord.cpp:49
EXTERN ScrollView * to_win
Definition: drawtord.cpp:40
void assign_blobs_to_rows(TO_BLOCK *block, float *gradient, int pass, BOOL8 reject_misses, BOOL8 make_new_rows, BOOL8 drawing_skew)
Definition: makerow.cpp:2402
#define NULL
Definition: host.h:144
bool textord_show_initial_rows
Definition: makerow.cpp:46
void fit_lms_line(TO_ROW *row)
Definition: makerow.cpp:276
void plot_to_row(TO_ROW *row, ScrollView::Color colour, FCOORD rotation)
Definition: drawtord.cpp:91
TO_ROW_LIST * get_rows()
Definition: blobbox.h:676
#define TRUE
Definition: capi.h:27
float make_rows ( ICOORD  page_tr,
TO_BLOCK_LIST *  port_blocks 
)

Definition at line 197 of file makerow.cpp.

197  {
198  float port_m; // global skew
199  float port_err; // global noise
200  TO_BLOCK_IT block_it; // iterator
201 
202  block_it.set_to_list(port_blocks);
203  for (block_it.mark_cycle_pt(); !block_it.cycled_list();
204  block_it.forward())
205  make_initial_textrows(page_tr, block_it.data(), FCOORD(1.0f, 0.0f),
207  // compute globally
208  compute_page_skew(port_blocks, port_m, port_err);
209  block_it.set_to_list(port_blocks);
210  for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
211  cleanup_rows_making(page_tr, block_it.data(), port_m, FCOORD(1.0f, 0.0f),
212  block_it.data()->block->bounding_box().left(),
214  }
215  return port_m; // global skew
216 }
void cleanup_rows_making(ICOORD page_tr, TO_BLOCK *block, float gradient, FCOORD rotation, inT32 block_edge, BOOL8 testing_on)
Definition: makerow.cpp:534
unsigned char BOOL8
Definition: host.h:113
void make_initial_textrows(ICOORD page_tr, TO_BLOCK *block, FCOORD rotation, BOOL8 testing_on)
Definition: makerow.cpp:236
#define f(xc, yc)
Definition: imgscale.cpp:39
bool textord_test_landscape
Definition: makerow.cpp:51
void compute_page_skew(TO_BLOCK_LIST *blocks, float &page_m, float &page_err)
Definition: makerow.cpp:296
Definition: points.h:189
float make_single_row ( ICOORD  page_tr,
TO_BLOCK block,
TO_BLOCK_LIST *  blocks 
)

Definition at line 167 of file makerow.cpp.

167  {
168  BLOBNBOX_IT blob_it = &block->blobs;
169  TO_ROW_IT row_it = block->get_rows();
170 
171  // Include all the small blobs and large blobs.
172  blob_it.add_list_after(&block->small_blobs);
173  blob_it.add_list_after(&block->noise_blobs);
174  blob_it.add_list_after(&block->large_blobs);
175  if (block->blobs.singleton()) {
176  blob_it.move_to_first();
177  float size = MakeRowFromSubBlobs(block, blob_it.data()->cblob(), &row_it);
178  if (size > block->line_size)
179  block->line_size = size;
180  }
181  MakeRowFromBlobs(block->line_size, &blob_it, &row_it);
182  // Fit an LMS line to the rows.
183  for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward())
184  fit_lms_line(row_it.data());
185  float gradient;
186  float fit_error;
187  // Compute the skew based on the fitted line.
188  compute_page_skew(blocks, gradient, fit_error);
189  return gradient;
190 }
BLOBNBOX_LIST noise_blobs
Definition: blobbox.h:737
BLOBNBOX_LIST blobs
Definition: blobbox.h:735
float MakeRowFromSubBlobs(TO_BLOCK *block, C_BLOB *blob, TO_ROW_IT *row_it)
Definition: makerow.cpp:137
float line_size
Definition: blobbox.h:748
void fit_lms_line(TO_ROW *row)
Definition: makerow.cpp:276
BLOBNBOX_LIST small_blobs
Definition: blobbox.h:738
void compute_page_skew(TO_BLOCK_LIST *blocks, float &page_m, float &page_err)
Definition: makerow.cpp:296
BLOBNBOX_LIST large_blobs
Definition: blobbox.h:739
TO_ROW_LIST * get_rows()
Definition: blobbox.h:676
float MakeRowFromSubBlobs ( TO_BLOCK block,
C_BLOB blob,
TO_ROW_IT *  row_it 
)

Definition at line 137 of file makerow.cpp.

137  {
138  // The blobs made from the children will go in the small_blobs list.
139  BLOBNBOX_IT bb_it(&block->small_blobs);
140  C_OUTLINE_IT ol_it(blob->out_list());
141  // Get the children.
142  ol_it.set_to_list(ol_it.data()->child());
143  if (ol_it.empty())
144  return 0.0f;
145  for (ol_it.mark_cycle_pt(); !ol_it.cycled_list(); ol_it.forward()) {
146  // Deep copy the child outline and use that to make a blob.
147  C_OUTLINE* outline = C_OUTLINE::deep_copy(ol_it.data());
148  // The constructor from a list of outlines corrects the direction.
149  C_OUTLINE_LIST outlines;
150  C_OUTLINE_IT ol_it(&outlines);
151  ol_it.add_after_then_move(outline);
152  C_BLOB* blob = new C_BLOB(&outlines);
153  BLOBNBOX* bbox = new BLOBNBOX(blob);
154  bb_it.add_after_then_move(bbox);
155  }
156  // Now we can make a row from the blobs.
157  return MakeRowFromBlobs(block->line_size, &bb_it, row_it);
158 }
C_OUTLINE_LIST * out_list()
Definition: stepblob.h:42
float line_size
Definition: blobbox.h:748
#define f(xc, yc)
Definition: imgscale.cpp:39
static C_OUTLINE * deep_copy(const C_OUTLINE *src)
Definition: coutln.h:169
BLOBNBOX_LIST small_blobs
Definition: blobbox.h:738
void mark_repeated_chars ( TO_ROW row)

Definition at line 2760 of file makerow.cpp.

2760  {
2761  BLOBNBOX_IT box_it(row->blob_list()); // Iterator.
2762  int num_repeated_sets = 0;
2763  if (!box_it.empty()) {
2764  do {
2765  BLOBNBOX* bblob = box_it.data();
2766  int repeat_length = 0;
2767  if (bblob->flow() == BTFT_LEADER &&
2768  !bblob->joined_to_prev() && bblob->cblob() != NULL) {
2769  BLOBNBOX_IT test_it(box_it);
2770  for (test_it.forward(); !test_it.at_first(); test_it.forward()) {
2771  bblob = test_it.data();
2772  if (bblob->flow() != BTFT_LEADER)
2773  break;
2774  if (bblob->joined_to_prev() || bblob->cblob() == NULL) {
2775  tprintf("Cancelled repeat of length %d due to %s\n",
2776  repeat_length,
2777  bblob->joined_to_prev() ? "Joined" : "Null");
2778  repeat_length = 0;
2779  break;
2780  }
2781  ++repeat_length;
2782  }
2783  }
2784  if (repeat_length >= kMinLeaderCount) {
2785  num_repeated_sets++;
2786  for (; repeat_length > 0; box_it.forward(), --repeat_length) {
2787  bblob = box_it.data();
2788  bblob->set_repeated_set(num_repeated_sets);
2789  }
2790  if (!box_it.at_first())
2791  bblob->set_repeated_set(0);
2792  } else {
2793  box_it.forward();
2794  bblob->set_repeated_set(0);
2795  }
2796  } while (!box_it.at_first()); // until all done
2797  }
2798  row->set_num_repeated_sets(num_repeated_sets);
2799 }
void set_repeated_set(int set_id)
Definition: blobbox.h:242
C_BLOB * cblob() const
Definition: blobbox.h:245
#define NULL
Definition: host.h:144
void set_num_repeated_sets(int num_sets)
Definition: blobbox.h:612
const int kMinLeaderCount
Definition: makerow.cpp:107
bool joined_to_prev() const
Definition: blobbox.h:233
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:41
BlobTextFlowType flow() const
Definition: blobbox.h:272
BLOBNBOX_LIST * blob_list()
Definition: blobbox.h:571
OVERLAP_STATE most_overlapping_row ( TO_ROW_IT *  row_it,
TO_ROW *&  best_row,
float  top,
float  bottom,
float  rowsize,
BOOL8  testing_blob 
)

Definition at line 2598 of file makerow.cpp.

2605  {
2606  OVERLAP_STATE result; //result of tests
2607  float overlap; //of blob & row
2608  float bestover; //nearest row
2609  float merge_top, merge_bottom; //size of merged row
2610  ICOORD testpt; //testing only
2611  TO_ROW *row; //current row
2612  TO_ROW *test_row; //for multiple overlaps
2613  BLOBNBOX_IT blob_it; //for merging rows
2614 
2615  result = ASSIGN;
2616  row = row_it->data ();
2617  bestover = top - bottom;
2618  if (top > row->max_y ())
2619  bestover -= top - row->max_y ();
2620  if (bottom < row->min_y ())
2621  //compute overlap
2622  bestover -= row->min_y () - bottom;
2623  if (testing_blob) {
2624  tprintf ("Test blob y=(%g,%g), row=(%f,%f), overlap=%f\n",
2625  bottom, top, row->min_y (), row->max_y (), bestover);
2626  }
2627  test_row = row;
2628  do {
2629  if (!row_it->at_last ()) {
2630  row_it->forward ();
2631  test_row = row_it->data ();
2632  if (test_row->min_y () <= top && test_row->max_y () >= bottom) {
2633  merge_top =
2634  test_row->max_y () >
2635  row->max_y ()? test_row->max_y () : row->max_y ();
2636  merge_bottom =
2637  test_row->min_y () <
2638  row->min_y ()? test_row->min_y () : row->min_y ();
2639  if (merge_top - merge_bottom <= rowsize) {
2640  if (testing_blob) {
2641  tprintf ("Merging rows at (%g,%g), (%g,%g)\n",
2642  row->min_y (), row->max_y (),
2643  test_row->min_y (), test_row->max_y ());
2644  }
2645  test_row->set_limits (merge_bottom, merge_top);
2646  blob_it.set_to_list (test_row->blob_list ());
2647  blob_it.add_list_after (row->blob_list ());
2648  blob_it.sort (blob_x_order);
2649  row_it->backward ();
2650  delete row_it->extract ();
2651  row_it->forward ();
2652  bestover = -1.0f; //force replacement
2653  }
2654  overlap = top - bottom;
2655  if (top > test_row->max_y ())
2656  overlap -= top - test_row->max_y ();
2657  if (bottom < test_row->min_y ())
2658  overlap -= test_row->min_y () - bottom;
2659  if (bestover >= rowsize - 1 && overlap >= rowsize - 1) {
2660  result = REJECT;
2661  }
2662  if (overlap > bestover) {
2663  bestover = overlap; //find biggest overlap
2664  row = test_row;
2665  }
2666  if (testing_blob) {
2667  tprintf
2668  ("Test blob y=(%g,%g), row=(%f,%f), overlap=%f->%f\n",
2669  bottom, top, test_row->min_y (), test_row->max_y (),
2670  overlap, bestover);
2671  }
2672  }
2673  }
2674  }
2675  while (!row_it->at_last ()
2676  && test_row->min_y () <= top && test_row->max_y () >= bottom);
2677  while (row_it->data () != row)
2678  row_it->backward (); //make it point to row
2679  //doesn't overlap much
2680  if (top - bottom - bestover > rowsize * textord_overlap_x &&
2681  (!textord_fix_makerow_bug || bestover < rowsize * textord_overlap_x)
2682  && result == ASSIGN)
2683  result = NEW_ROW; //doesn't overlap enough
2684  best_row = row;
2685  return result;
2686 }
float min_y() const
Definition: blobbox.h:533
void set_limits(float new_min, float new_max)
Definition: blobbox.h:594
bool textord_fix_makerow_bug
Definition: makerow.cpp:57
OVERLAP_STATE
Definition: makerow.h:30
int blob_x_order(const void *item1, const void *item2)
Definition: makerow.cpp:2694
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:41
double textord_overlap_x
Definition: makerow.cpp:82
Definition: makerow.h:33
integer coordinate
Definition: points.h:30
size_t top
Definition: tessarray.h:52
float max_y() const
Definition: blobbox.h:530
BLOBNBOX_LIST * blob_list()
Definition: blobbox.h:571
Definition: makerow.h:32
void pre_associate_blobs ( ICOORD  page_tr,
TO_BLOCK block,
FCOORD  rotation,
BOOL8  testing_on 
)

Definition at line 1965 of file makerow.cpp.

1970  {
1971 #ifndef GRAPHICS_DISABLED
1972  ScrollView::Color colour; //of boxes
1973 #endif
1974  BLOBNBOX *blob; //current blob
1975  BLOBNBOX *nextblob; //next in list
1976  TBOX blob_box;
1977  FCOORD blob_rotation; //inverse of rotation
1978  BLOBNBOX_IT blob_it; //iterator
1979  BLOBNBOX_IT start_it; //iterator
1980  TO_ROW_IT row_it = block->get_rows ();
1981 
1982 #ifndef GRAPHICS_DISABLED
1983  colour = ScrollView::RED;
1984 #endif
1985 
1986  blob_rotation = FCOORD (rotation.x (), -rotation.y ());
1987  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
1988  //get blobs
1989  blob_it.set_to_list (row_it.data ()->blob_list ());
1990  for (blob_it.mark_cycle_pt (); !blob_it.cycled_list ();
1991  blob_it.forward ()) {
1992  blob = blob_it.data ();
1993  blob_box = blob->bounding_box ();
1994  start_it = blob_it; //save start point
1995  // if (testing_on && textord_show_final_blobs)
1996  // {
1997  // tprintf("Blob at (%d,%d)->(%d,%d), addr=%x, count=%d\n",
1998  // blob_box.left(),blob_box.bottom(),
1999  // blob_box.right(),blob_box.top(),
2000  // (void*)blob,blob_it.length());
2001  // }
2002  bool overlap;
2003  do {
2004  overlap = false;
2005  if (!blob_it.at_last ()) {
2006  nextblob = blob_it.data_relative(1);
2007  overlap = blob_box.major_x_overlap(nextblob->bounding_box());
2008  if (overlap) {
2009  blob->merge(nextblob); // merge new blob
2010  blob_box = blob->bounding_box(); // get bigger box
2011  blob_it.forward();
2012  }
2013  }
2014  }
2015  while (overlap);
2016  blob->chop (&start_it, &blob_it,
2017  blob_rotation,
2020  //attempt chop
2021  }
2022 #ifndef GRAPHICS_DISABLED
2023  if (testing_on && textord_show_final_blobs) {
2024  if (to_win == NULL)
2025  create_to_win(page_tr);
2026  to_win->Pen(colour);
2027  for (blob_it.mark_cycle_pt (); !blob_it.cycled_list ();
2028  blob_it.forward ()) {
2029  blob = blob_it.data ();
2030  blob_box = blob->bounding_box ();
2031  blob_box.rotate (rotation);
2032  if (!blob->joined_to_prev ()) {
2033  to_win->Rectangle (blob_box.left (), blob_box.bottom (),
2034  blob_box.right (), blob_box.top ());
2035  }
2036  }
2037  colour = (ScrollView::Color) (colour + 1);
2038  if (colour > ScrollView::MAGENTA)
2039  colour = ScrollView::RED;
2040  }
2041 #endif
2042  }
2043 }
bool major_x_overlap(const TBOX &box) const
Definition: rect.h:402
void create_to_win(ICOORD page_tr)
Definition: drawtord.cpp:49
EXTERN ScrollView * to_win
Definition: drawtord.cpp:40
void Pen(Color color)
Definition: scrollview.cpp:721
const TBOX & bounding_box() const
Definition: blobbox.h:208
float line_size
Definition: blobbox.h:748
double textord_chop_width
Definition: makerow.cpp:79
#define NULL
Definition: host.h:144
inT16 left() const
Definition: rect.h:67
Definition: rect.h:29
inT16 right() const
Definition: rect.h:74
static const double kXHeightFraction
Definition: ccstruct.h:35
void Rectangle(int x1, int y1, int x2, int y2)
Definition: scrollview.cpp:601
bool joined_to_prev() const
Definition: blobbox.h:233
bool textord_show_final_blobs
Definition: makerow.cpp:50
void chop(BLOBNBOX_IT *start_it, BLOBNBOX_IT *blob_it, FCOORD rotation, float xheight)
Definition: blobbox.cpp:111
inT16 top() const
Definition: rect.h:53
void merge(BLOBNBOX *nextblob)
Definition: blobbox.cpp:83
Definition: points.h:189
void rotate(const FCOORD &vec)
Definition: rect.h:182
float y() const
Definition: points.h:212
TO_ROW_LIST * get_rows()
Definition: blobbox.h:676
float x() const
Definition: points.h:209
inT16 bottom() const
Definition: rect.h:60
int row_spacing_order ( const void *  item1,
const void *  item2 
)

Definition at line 2738 of file makerow.cpp.

2740  {
2741  //converted ptr
2742  TO_ROW *row1 = *(TO_ROW **) item1;
2743  //converted ptr
2744  TO_ROW *row2 = *(TO_ROW **) item2;
2745 
2746  if (row1->spacing < row2->spacing)
2747  return -1;
2748  else if (row1->spacing > row2->spacing)
2749  return 1;
2750  else
2751  return 0;
2752 }
float spacing
Definition: blobbox.h:628
int row_y_order ( const void *  item1,
const void *  item2 
)

Definition at line 2716 of file makerow.cpp.

2718  {
2719  //converted ptr
2720  TO_ROW *row1 = *(TO_ROW **) item1;
2721  //converted ptr
2722  TO_ROW *row2 = *(TO_ROW **) item2;
2723 
2724  if (row1->parallel_c () > row2->parallel_c ())
2725  return -1;
2726  else if (row1->parallel_c () < row2->parallel_c ())
2727  return 1;
2728  else
2729  return 0;
2730 }
float parallel_c() const
Definition: blobbox.h:551
BOOL8 segment_baseline ( TO_ROW row,
TO_BLOCK block,
inT32 segments,
inT32  xstarts[] 
)

Definition at line 2212 of file makerow.cpp.

2217  {
2218  BOOL8 needs_curve; //needs curved line
2219  int blobcount; //no of blobs
2220  int blobindex; //current blob
2221  int last_state; //above, on , below
2222  int state; //of current blob
2223  float yshift; //from baseline
2224  TBOX box; //blob box
2225  TBOX new_box; //new_it box
2226  float middle; //xcentre of blob
2227  //blobs
2228  BLOBNBOX_IT blob_it = row->blob_list ();
2229  BLOBNBOX_IT new_it = blob_it; //front end
2230  SORTED_FLOATS yshifts; //shifts from baseline
2231 
2232  needs_curve = FALSE;
2233  box = box_next_pre_chopped (&blob_it);
2234  xstarts[0] = box.left ();
2235  segments = 1;
2236  blobcount = row->blob_list ()->length ();
2237  if (textord_oldbl_debug)
2238  tprintf ("Segmenting baseline of %d blobs at (%d,%d)\n",
2239  blobcount, box.left (), box.bottom ());
2240  if (blobcount <= textord_spline_medianwin
2241  || blobcount < textord_spline_minblobs) {
2242  blob_it.move_to_last ();
2243  box = blob_it.data ()->bounding_box ();
2244  xstarts[1] = box.right ();
2245  return FALSE;
2246  }
2247  last_state = 0;
2248  new_it.mark_cycle_pt ();
2249  for (blobindex = 0; blobindex < textord_spline_medianwin; blobindex++) {
2250  new_box = box_next_pre_chopped (&new_it);
2251  middle = (new_box.left () + new_box.right ()) / 2.0;
2252  yshift = new_box.bottom () - row->line_m () * middle - row->line_c ();
2253  //record shift
2254  yshifts.add (yshift, blobindex);
2255  if (new_it.cycled_list ()) {
2256  xstarts[1] = new_box.right ();
2257  return FALSE;
2258  }
2259  }
2260  for (blobcount = 0; blobcount < textord_spline_medianwin / 2; blobcount++)
2261  box = box_next_pre_chopped (&blob_it);
2262  do {
2263  new_box = box_next_pre_chopped (&new_it);
2264  //get middle one
2265  yshift = yshifts[textord_spline_medianwin / 2];
2266  if (yshift > textord_spline_shift_fraction * block->line_size)
2267  state = 1;
2268  else if (-yshift > textord_spline_shift_fraction * block->line_size)
2269  state = -1;
2270  else
2271  state = 0;
2272  if (state != 0)
2273  needs_curve = TRUE;
2274  // tprintf("State=%d, prev=%d, shift=%g\n",
2275  // state,last_state,yshift);
2276  if (state != last_state && blobcount > textord_spline_minblobs) {
2277  xstarts[segments++] = box.left ();
2278  blobcount = 0;
2279  }
2280  last_state = state;
2281  yshifts.remove (blobindex - textord_spline_medianwin);
2282  box = box_next_pre_chopped (&blob_it);
2283  middle = (new_box.left () + new_box.right ()) / 2.0;
2284  yshift = new_box.bottom () - row->line_m () * middle - row->line_c ();
2285  yshifts.add (yshift, blobindex);
2286  blobindex++;
2287  blobcount++;
2288  }
2289  while (!new_it.cycled_list ());
2290  if (blobcount > textord_spline_minblobs || segments == 1) {
2291  xstarts[segments] = new_box.right ();
2292  }
2293  else {
2294  xstarts[--segments] = new_box.right ();
2295  }
2296  if (textord_oldbl_debug)
2297  tprintf ("Made %d segments on row at (%d,%d)\n",
2298  segments, box.right (), box.bottom ());
2299  return needs_curve;
2300 }
TBOX box_next_pre_chopped(BLOBNBOX_IT *it)
Definition: blobbox.cpp:614
float line_m() const
Definition: blobbox.h:542
void add(float value, inT32 key)
Definition: sortflts.cpp:30
float line_size
Definition: blobbox.h:748
unsigned char BOOL8
Definition: host.h:113
EXTERN bool textord_oldbl_debug
Definition: oldbasel.cpp:40
inT16 left() const
Definition: rect.h:67
float line_c() const
Definition: blobbox.h:545
Definition: rect.h:29
#define FALSE
Definition: capi.h:28
inT16 right() const
Definition: rect.h:74
int textord_spline_medianwin
Definition: makerow.cpp:67
void remove(inT32 key)
Definition: sortflts.cpp:55
int textord_spline_minblobs
Definition: makerow.cpp:66
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:41
BLOBNBOX_LIST * blob_list()
Definition: blobbox.h:571
double textord_spline_shift_fraction
Definition: makerow.cpp:72
#define TRUE
Definition: capi.h:27
inT16 bottom() const
Definition: rect.h:60
void separate_underlines ( TO_BLOCK block,
float  gradient,
FCOORD  rotation,
BOOL8  testing_on 
)

Definition at line 1892 of file makerow.cpp.

1895  { // correct orientation
1896  BLOBNBOX *blob; // current blob
1897  C_BLOB *rotated_blob; // rotated blob
1898  TO_ROW *row; // current row
1899  float length; // of g_vec
1900  TBOX blob_box;
1901  FCOORD blob_rotation; // inverse of rotation
1902  FCOORD g_vec; // skew rotation
1903  BLOBNBOX_IT blob_it; // iterator
1904  // iterator
1905  BLOBNBOX_IT under_it = &block->underlines;
1906  BLOBNBOX_IT large_it = &block->large_blobs;
1907  TO_ROW_IT row_it = block->get_rows();
1908  int min_blob_height = static_cast<int>(textord_min_blob_height_fraction *
1909  block->line_size + 0.5);
1910 
1911  // length of vector
1912  length = sqrt(1 + gradient * gradient);
1913  g_vec = FCOORD(1 / length, -gradient / length);
1914  blob_rotation = FCOORD(rotation.x(), -rotation.y());
1915  blob_rotation.rotate(g_vec); // undoing everything
1916  for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
1917  row = row_it.data();
1918  // get blobs
1919  blob_it.set_to_list(row->blob_list());
1920  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list();
1921  blob_it.forward()) {
1922  blob = blob_it.data();
1923  blob_box = blob->bounding_box();
1924  if (blob_box.width() > block->line_size * textord_underline_width) {
1925  ASSERT_HOST(blob->cblob() != NULL);
1926  rotated_blob = crotate_cblob (blob->cblob(),
1927  blob_rotation);
1928  if (test_underline(
1929  testing_on && textord_show_final_rows,
1930  rotated_blob, static_cast<inT16>(row->intercept()),
1931  static_cast<inT16>(
1932  block->line_size *
1935  under_it.add_after_then_move(blob_it.extract());
1936  if (testing_on && textord_show_final_rows) {
1937  tprintf("Underlined blob at:");
1938  rotated_blob->bounding_box().print();
1939  tprintf("Was:");
1940  blob_box.print();
1941  }
1942  } else if (CountOverlaps(blob->bounding_box(), min_blob_height,
1943  row->blob_list()) >
1945  large_it.add_after_then_move(blob_it.extract());
1946  if (testing_on && textord_show_final_rows) {
1947  tprintf("Large blob overlaps %d blobs at:",
1948  CountOverlaps(blob_box, min_blob_height,
1949  row->blob_list()));
1950  blob_box.print();
1951  }
1952  }
1953  delete rotated_blob;
1954  }
1955  }
1956  }
1957 }
bool textord_show_final_rows
Definition: makerow.cpp:49
float intercept() const
Definition: blobbox.h:560
C_BLOB * cblob() const
Definition: blobbox.h:245
const TBOX & bounding_box() const
Definition: blobbox.h:208
float line_size
Definition: blobbox.h:748
double textord_min_blob_height_fraction
Definition: makerow.cpp:90
#define NULL
Definition: host.h:144
inT16 width() const
Definition: rect.h:104
Definition: rect.h:29
int textord_max_blob_overlaps
Definition: makerow.cpp:69
static const double kXHeightFraction
Definition: ccstruct.h:35
C_BLOB * crotate_cblob(C_BLOB *blob, FCOORD rotation)
Definition: blobbox.cpp:560
BOOL8 test_underline(BOOL8 testing_on, C_BLOB *blob, inT16 baseline, inT16 xheight)
Definition: blkocc.cpp:55
static const double kAscenderFraction
Definition: ccstruct.h:36
double textord_underline_width
Definition: makerow.cpp:88
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:41
void rotate(const FCOORD vec)
Definition: ipoints.h:471
Definition: points.h:189
BLOBNBOX_LIST large_blobs
Definition: blobbox.h:739
float y() const
Definition: points.h:212
TO_ROW_LIST * get_rows()
Definition: blobbox.h:676
BLOBNBOX_LIST underlines
Definition: blobbox.h:736
#define ASSERT_HOST(x)
Definition: errcode.h:84
BLOBNBOX_LIST * blob_list()
Definition: blobbox.h:571
void print() const
Definition: rect.h:263
float x() const
Definition: points.h:209

Variable Documentation

const int kMinLeaderCount = 5

Definition at line 107 of file makerow.cpp.

const int kMinSize = 8

Definition at line 393 of file makerow.cpp.

const double kNoiseSize = 0.5

Definition at line 392 of file makerow.cpp.

double textord_ascheight_mode_fraction = 0.08

"Min pile height to make ascheight"

Definition at line 94 of file makerow.cpp.

double textord_ascx_ratio_max = 1.8

"Max cap/xheight"

Definition at line 98 of file makerow.cpp.

double textord_ascx_ratio_min = 1.25

"Min cap/xheight"

Definition at line 97 of file makerow.cpp.

bool textord_biased_skewcalc = TRUE

"Bias skew estimates with line length"

Definition at line 59 of file makerow.cpp.

double textord_chop_width = 1.5

"Max width before chopping"

Definition at line 79 of file makerow.cpp.

bool textord_debug_xheights = FALSE

"Test xheight algorithms"

Definition at line 58 of file makerow.cpp.

double textord_descheight_mode_fraction = 0.08

"Min pile height to make descheight"

Definition at line 96 of file makerow.cpp.

double textord_descx_ratio_max = 0.6

"Max desc/xheight"

Definition at line 100 of file makerow.cpp.

double textord_descx_ratio_min = 0.25

"Min desc/xheight"

Definition at line 99 of file makerow.cpp.

double textord_excess_blobsize = 1.3

"New row made if blob makes row this big"

Definition at line 86 of file makerow.cpp.

double textord_expansion_factor = 1.0

"Factor to expand rows by in expand_rows"

Definition at line 81 of file makerow.cpp.

bool textord_fix_makerow_bug = TRUE

"Prevent multiple baselines"

Definition at line 57 of file makerow.cpp.

bool textord_fix_xheight_bug = TRUE

"Use spline baseline"

Definition at line 56 of file makerow.cpp.

bool textord_heavy_nr = FALSE

"Vigorously remove noise"

Definition at line 45 of file makerow.cpp.

bool textord_interpolating_skew = TRUE

"Interpolate across gaps"

Definition at line 60 of file makerow.cpp.

double textord_linespace_iqrlimit = 0.2

"Max iqr/median for linespace"

Definition at line 77 of file makerow.cpp.

int textord_lms_line_trials = 12

"Number of linew fits to do"

Definition at line 102 of file makerow.cpp.

int textord_max_blob_overlaps = 4

"Max number of blobs a big blob can overlap"

Definition at line 69 of file makerow.cpp.

double textord_min_blob_height_fraction = 0.75

"Min blob height/top to include blob top into xheight stats"

Definition at line 90 of file makerow.cpp.

int textord_min_blobs_in_row = 4

"Min blobs before gradient counted"

Definition at line 65 of file makerow.cpp.

double textord_min_linesize = 1.25

"* blob height for initial linesize"

Definition at line 84 of file makerow.cpp.

int textord_min_xheight = 10

"Min credible pixel xheight"

Definition at line 70 of file makerow.cpp.

double textord_minxh = 0.25

"fraction of linesize for min xheight"

Definition at line 83 of file makerow.cpp.

bool textord_new_initial_xheight = TRUE

"Use test xheight mechanism"

Definition at line 103 of file makerow.cpp.

double textord_occupancy_threshold = 0.4

"Fraction of neighbourhood"

Definition at line 87 of file makerow.cpp.

bool textord_old_baselines = TRUE

"Use old baseline algorithm"

Definition at line 54 of file makerow.cpp.

bool textord_old_xheight = FALSE

"Use old xheight algorithm"

Definition at line 55 of file makerow.cpp.

double textord_overlap_x = 0.5

"Fraction of linespace for good overlap"

Definition at line 82 of file makerow.cpp.

bool textord_parallel_baselines = TRUE

"Force parallel baselines"

Definition at line 52 of file makerow.cpp.

bool textord_show_expanded_rows = FALSE

"Display rows after expanding"

Definition at line 48 of file makerow.cpp.

bool textord_show_final_blobs = FALSE

"Display blob bounds after pre-ass"

Definition at line 50 of file makerow.cpp.

bool textord_show_final_rows = FALSE

"Display rows after final fitting"

Definition at line 49 of file makerow.cpp.

bool textord_show_initial_rows = FALSE

"Display row accumulation"

Definition at line 46 of file makerow.cpp.

bool textord_show_parallel_rows = FALSE

"Display page correlated rows"

Definition at line 47 of file makerow.cpp.

double textord_skew_ile = 0.5

"Ile of gradients for page skew"

Definition at line 75 of file makerow.cpp.

double textord_skew_lag = 0.01

"Lag for skew on row accumulation"

Definition at line 76 of file makerow.cpp.

int textord_skewsmooth_offset = 2

"For smooth factor"

Definition at line 61 of file makerow.cpp.

int textord_skewsmooth_offset2 = 1

"For smooth factor"

Definition at line 62 of file makerow.cpp.

int textord_spline_medianwin = 6

"Size of window for spline segmentation"

Definition at line 67 of file makerow.cpp.

int textord_spline_minblobs = 8

"Min blobs in each spline segment"

Definition at line 66 of file makerow.cpp.

double textord_spline_outlier_fraction = 0.1

"Fraction of line spacing for outlier"

Definition at line 74 of file makerow.cpp.

double textord_spline_shift_fraction = 0.02

"Fraction of line spacing for quad"

Definition at line 72 of file makerow.cpp.

bool textord_straight_baselines = FALSE

"Force straight baselines"

Definition at line 53 of file makerow.cpp.

bool textord_test_landscape = FALSE

"Tests refer to land/port"

Definition at line 51 of file makerow.cpp.

int textord_test_x = -1

"coord of test pt"

Definition at line 63 of file makerow.cpp.

int textord_test_y = -1

"coord of test pt"

Definition at line 64 of file makerow.cpp.

double textord_underline_width = 2.0

"Multiple of line_size for underline"

Definition at line 88 of file makerow.cpp.

double textord_width_limit = 8

"Max width of blobs to make rows"

Definition at line 78 of file makerow.cpp.

double textord_xheight_error_margin = 0.1

"Accepted variation"

Definition at line 101 of file makerow.cpp.

double textord_xheight_mode_fraction = 0.4

"Min pile height to make xheight"

Definition at line 92 of file makerow.cpp.