#!/usr/bin/ruby.ruby2.5 
# encoding: UTF-8

######################################################################
#
# PDFBeads -- convert scanned images to a single PDF file
# Version 1.0
#
# Unlike other PDF creation tools, this utility attempts to implement
# the approach typically used for DjVu books. Its key feature is
# separating scanned text (typically black, but indexed images with
# a small number of colors are also accepted) from halftone images
# placed into a background layer.
#
# Copyright (C) 2010 Alexey Kryukov (amkryukov@gmail.com).
# All rights reserved.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
#
#######################################################################

require 'optparse'
require 'iconv'
require 'time'

require 'pdfbeads'
include PDFBeads

pdfargs  = Hash[
  :labels         => nil,
  :toc            => nil,
  :pagelayout     => 'OneColumn',
  :meta           => nil,
  :textpdf        => nil,
  :delfiles       => false,
  :debug          => false,
  :rtl            => false
]
pageargs = Hash[
  :threshold       => 1,
  :maxcolors       => 4,
  :st_resolution   => 0,
  :bg_resolution   => 300,
  :bg_format       => 'JP2',
  :st_format       => 'JBIG2',
  :pages_per_dict  => 15,
  :force_update    => false,
  :force_grayscale => false
]
outpath = 'STDOUT'

OptionParser.new() do |opts|
  opts.set_summary_width(24)
  opts.set_summary_indent("  ")

  opts.banner = "Usage: pdfbeads [options] [files to process] > out.pdf"

  opts.separator "\n"
  opts.separator "PDF file properties:\n"

  opts.on("-C", "--toc TOCFILE",
                "Build PDF outline dictionary from a text file") do |tocfile|
    pdfargs[:toc] = tocfile
  end
  opts.on("-L", "--labels LSPEC",
                "Specify page labels for user-friendly page numbering") do |lspec|
    pdfargs[:labels] = lspec
  end
  opts.on("-M", "--meta METAFILE",
                "Take metadata for the PDF file from a text file") do |metafile|
    pdfargs[:meta] = metafile
  end
  opts.on("-P", "--pagelayout LAYOUT",
                ['SinglePage', 'OneColumn', 'TwoColumnLeft', 'TwoColumnRight', 'TwoPageLeft', 'TwoPageRight'],
                "Specify the default page layout for PDF viewer, where",
                "LAYOUT is `SinglePage', `OneColumn', `TwoColumnLeft'",
                "`TwoColumnRight', `TwoPageLeft', or `TwoPageRight'") do |pagelayout|

    pdfargs[:pagelayout] = pagelayout
  end
  opts.on("-R", "--right-to-left",
                "Set the flag indicating that the preferred reading",
                "direction for the resulting PDF file is right to left") do |rtl|
    pdfargs[:rtl] = rtl
  end
  opts.on("-T", "--text-pdf PDFFILE",
                "Specify a PDF file produced by passing the same set",
		"of files to an OCR program. Pdfbeads will use that file",
		"to generate the hidden text layer for its PDF output.") do |pdffile|

    if $has_pdfreader
      pdfargs[:textpdf] = pdffile
    else
      $stderr.puts( "Warning: the pdf/reader extension is not available." )
      $stderr.puts( "\tthe -T/--text-pdf option is ignored." )
    end
  end

  opts.separator "\n"
  opts.separator "Image encoding and compression options:\n"

  opts.on("-f", "--force-update",
                "Always write subsidiary image files even if a file",
                "with the same name is already found on the disk") do |f|
    pageargs[:force_update] = f
  end
  opts.on("-m", "--mask-compression FORMAT", ['JBIG2', 'jbig2', 'G4', 'g4', 'Group4', 'CCITTFax'],
                "Compression method for foreground text mask in PDF",
                "pages (JBIG2 or G4). JBIG2 is used by default, unless",
                "the encoder is not available" ) do |format|
    if 'JBIG2'.eql? format
      pageargs[:st_format] = 'JBIG2'
    else
      pageargs[:st_format] = 'G4'
    end
  end
  opts.on("-p", "--pages-per-dict NUM",
                "Generate one shared JBIG2 dictionary per NUM pages.",
                "This option is only applied when JBIG2 compression",
                "is used. Default value is #{pageargs[:pages_per_dict]}") do |p|
    pval = p.to_i
    pageargs[:pages_per_dict] = pval if ( pval >= 0 )
  end
  opts.on("-r", "--force-resolution DPI",
                "Set resolution for foreground mask images to the",
                "specified value (in pixels per inch). Note that the",
                "image is not actually resampled.") do |dpi|
    pageargs[:st_resolution] = dpi.to_f
  end
  opts.on("-t", "--threshold VAL",
                "Set binarization threshold for mixed images. Valid",
                "values are between 1 and 255. 1 is used by default,",
                "as the input files are assumed to be preprocessed",
                "with ScanTailor (http://scantailor.sourceforge.net)") do |t|
    tval = t.to_i
    pageargs[:threshold] = tval if ( (1..255).include? tval )
  end
  opts.on("-x", "--max-colors NUM",
                "If pdfbeads finds an indexed file with NUM or",
                "less colors, then it will attempt to split it into",
                "several bitonal images and encode them all into the",
                "PDF page mask. Otherwise the file is treated just",
                "like a normal greyscale or color image. Default",
                "value is #{pageargs[:maxcolors]}") do |num|
    cval = num.to_i
    pageargs[:maxcolors] = cval if ( (2..255).include? cval )
  end

  opts.separator "\n"
  opts.separator "The following options are only applied when pdfbeads attempts to split"
  opts.separator "a mixed source image into text mask and background layer:\n"

  opts.on("-b", "--bg-compression FORMAT",
                ['JP2', 'JPX', 'J2K', 'JPEG2000', 'JPG', 'JPEG', 'LOSSLESS', 'PNG', 'DEFLATE'],
                "Compression method for background images. Acceptable",
                "values are JP2|JPX|JPEG2000, JPG|JPEG or PNG|LOSSLESS.",
                "JP2 is used by default, unless this format is not",
                "supported by the available ImageMagick version" ) do |format|
    case format.upcase
    when 'JP2', 'JPX', 'J2K', 'JPEG2000'
      pageargs[:bg_format] = 'JP2'
    when 'JPG', 'JPEG'
      pageargs[:bg_format] = 'JPG'
    else
      pageargs[:bg_format] = 'PNG'
    end
  end
  opts.on("-B", "--bg-resolution DPI",
                "Set resolution for background images (300dpi default)" ) do |dpi|
    pageargs[:bg_resolution] = dpi.to_f
  end
  opts.on("-g", "--grayscale",
                "When separating text from background, always convert",
                "background images to grayscale") do |g|
    pageargs[:force_grayscale] = g
  end

  opts.separator "\n"
  opts.separator "General options:\n"

  opts.on("-d", "--delete",
                "Delete intermediate image files used to create PDF") do |d|
    pdfargs[:delfiles] = d
  end
  opts.on("-o", "--output FILE",
                "Print output to a file instead of STDERR") do |f|
    outpath = f
  end
  opts.on("-D", "--debug",
                "Simplify debugging the PDF output by making the hidden",
                "text layer visible and using uncompressed page streams") do |dbg|
    pdfargs[:debug] = dbg
  end
  opts.on_tail("-h", "--help", "Show this message") do
    puts opts
    exit
  end

  opts.parse!(ARGV)
end

if ARGV.length == 0
  files = Dir.glob("*").sort
else
  files = ARGV.sort
end

pages = PageDataProvider.new( files,pageargs )
if pages.length == 0
  $stderr.puts "pdfbeads: no pages to process"
else
  if pageargs[:st_format].eql? 'JBIG2'
    pageargs[:st_format] = 'G4' unless pages.jbig2Encode()
  end
  pdfproc = PDFBuilder.new( pdfargs )
  pdfproc.process( pages,pageargs[:st_format] )
  pdfproc.output( outpath )
end
