#! /bin/bash
# vim: set filetype=bash:

# pdfidiff: Create a new PDF file showing the differences between two specified
# PDF files.  Relies on ghostscript, img2pdf, imagdiff, and (optionally) GNU
# parallel.

# Copyright (C) 2018-2020 by Brian Lindholm.  This file is part of the
# littleutils utility set.
#
# The pdfidiff utility is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by the Free
# Software Foundation; either version 3, or (at your option) any later version.
#
# The opt-pdf utility is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
# more details.
#
# You should have received a copy of the GNU General Public License along with
# the littleutils.  If not, see <https://www.gnu.org/licenses/>.

# get a valid temporary directory and set up traps
TMPFOLDER=`tempname -D pdfidiff_$$` || exit 99
trap 'rm -rf ${TMPFOLDER} ; exit 1' 1 2 3 13 15
trap 'rm -rf ${TMPFOLDER} ; exit 0' 0

# get command-line options
DEVICE=pnggray
DPI=300
ENGINE=poppler
FUZZ=1
MODE=2
OPTIMIZE=n
USE_PARALLEL=y
QUIET=n
while getopts f:ghm:oqr:sv opts
do
  case $opts in
    f) FUZZ=${OPTARG} ;;
    g) ENGINE=ghostscript ;;
    h) echo 'pdfidiff 1.2.3'
       echo 'usage: pdfidiff [-h(elp)] [-g(hostscript)] [-f fuzz_dist] [-m mode]'
       echo '         [-q(uiet)] [-r resolution_DPI] [-o(ptimize)] [-s(ingle_threaded)]'
       echo '         old.pdf new.pdf diff.pdf'
       echo '       mode 1 = gray, 2 = light gray, 3 = dark gray, 4 = color,'
       echo '            5 = stretched color'
       exit 0 ;;
    m) MODE=${OPTARG} ;;
    o) OPTIMIZE=y ;;
    q) QUIET=y ;;
    r) DPI=${OPTARG} ;;
    s) USE_PARALLEL=n ;;
    v) QUIET=n ;;
    *) echo 'pdfidiff 1.2.3'
       echo 'usage: pdfidiff [-h(elp)] [-g(hostscript)] [-f fuzz_dist] [-m mode]'
       echo '         [-q(uiet)] [-r resolution_DPI] [-o(ptimize)] [-s(ingle_threaded)]'
       echo '         old.pdf new.pdf diff.pdf'
       echo '       mode 1 = gray, 2 = light gray, 3 = dark gray, 4 = color,'
       echo '            5 = stretched color'
       exit 1 ;;
  esac
done
shift `expr ${OPTIND} - 1`
# double-check parameters
if [ "$MODE" -lt 1 -o "$MODE" -gt 5 ]; then
  MODE=2
fi
command -v parallel > /dev/null
if [ "$?" != '0' ]; then
  USE_PARALLEL=n
fi
# compute ancilliary parameters
if [ "$DPI" -gt 0 ]; then
  if [ "$DPI" -gt 2400 ]; then
    DPI='2400'
  fi
  DPM=$((DPI*10000/254))
fi
if [ "$MODE" = '1' ]; then
  DEVICE=pnggray
  CAIROOPT='-gray'
elif [ "$MODE" = '2' ]; then
  DEVICE=pnggray
  CAIROOPT='-gray'
elif [ "$MODE" = '3' ]; then
  DEVICE=pnggray
  CAIROOPT='-gray'
elif [ "$MODE" = '4' ]; then
  DEVICE=png16m
  CAIROOPT=''
elif [ "$MODE" = '5' ]; then
  DEVICE=png16m
  CAIROOPT=''
fi

# make sure that input files and targets are specified
if [ ! -r "$1" ]; then
  echo "pdfidiff error: old input file ${1} is missing"
  exit 1
fi
if [ ! -r "$2" ]; then
  echo "pdfidiff error: new input file ${2} is missing"
  exit 1
fi
if [ "X$3" = 'X' ]; then
  echo "pdfidiff error: output file ${3} is unspecified"
  exit 1
fi
OLDFILE=`realpath "$1"`
NEWFILE=`realpath "$2"`
OUTPUTFILE=`realpath "$3"`

# change to temporary working directory
pushd "$TMPFOLDER" > /dev/null

# explode original files into individual pages
if [ "$QUIET" = 'n' ]; then
  echo 'pdfidiff message:  -BEGIN-'
  echo "pdfidiff message: splitting (old) $1 and (new) $2 into pages..."
fi
if [ "$ENGINE" = 'ghostscript' ]; then
  if [ "$USE_PARALLEL" = 'n' ]; then
    gs -q -dBATCH -dSAFER -dNOPAUSE -sDEVICE=${DEVICE} -r${DPI} -sBandListStorage=memory -sOutputFile=old-%05d.png -f "$OLDFILE"
    gs -q -dBATCH -dSAFER -dNOPAUSE -sDEVICE=${DEVICE} -r${DPI} -sBandListStorage=memory -sOutputFile=new-%05d.png -f "$NEWFILE"
  else
    parallel -s 10000 "gs -q -dBATCH -dSAFER -dNOPAUSE -sDEVICE=${DEVICE} -r${DPI} -sBandListStorage=memory -sOutputFile={1}-%05d.png -f {2}" \
      ::: 'old' 'new' :::+ "$OLDFILE" "$NEWFILE"
  fi
else
  if [ "$USE_PARALLEL" = 'n' ]; then
    pdftocairo -png ${CAIROOPT} -r ${DPI} "$OLDFILE" old
    pdftocairo -png ${CAIROOPT} -r ${DPI} "$NEWFILE" new
  else
    parallel -s 10000 "pdftocairo -png ${CAIROOPT} -r ${DPI} {1} {2}" ::: "$OLDFILE" "$NEWFILE" :::+ 'old' 'new'
  fi
fi

# count image files
for INDEX in old-*.png ; do
  touch diff${INDEX#old}
  if [ ! -r new${INDEX#old} ]; then
    echo "pdfidiff warning: $INDEX from $OLDFILE has no counterpart"
  fi
done
for INDEX in new-*.png ; do
  if [ ! -r diff${INDEX#new} ]; then
    touch diff${INDEX#new}
  fi
  if [ ! -r old${INDEX#new} ]; then
    echo "pdfidiff warning: $INDEX from $NEWFILE has no counterpart"
  fi
done

# create delta image files
if [ "$QUIET" = 'n' ]; then
  echo 'pdfidiff message: calculating difference between old and new pages...'
fi
if [ "$USE_PARALLEL" = 'n' ]; then
  for INDEX in diff-*.png ; do
    imagdiff -f ${FUZZ} -m ${MODE} old${INDEX#diff} new${INDEX#diff} ${INDEX}
  done
else
  parallel -s 10000 --plus "imagdiff -f ${FUZZ} -m ${MODE} old{#diff} new{#diff} {}" ::: diff-*.png
fi

# take some steps to reduce file size
if [ "$OPTIMIZE" = 'y' ]; then
  # full-blown PNG image optimization if requested
  if [ "$QUIET" = 'n' ]; then
    echo 'pdfidiff message: attempting filesize optimization on delta pages...'
  fi
  if [ "$USE_PARALLEL" = 'n' ]; then
    opt-png -q -r ${DPI} diff-*.png
  else
    parallel -s 10000 opt-png -q -r ${DPI} ::: diff-*.png
  fi
else
  # otherwise, attempt colorspace reduction and alpha channel stripping
  if [ "$QUIET" = 'n' ]; then
    echo 'pdfidiff message: attempting colorspace reduction on delta pages...'
  fi
  if [ "$USE_PARALLEL" = 'n' ]; then
    for INDEX in diff-*.png ; do
      pngrecolor -q -n ${INDEX} rc${INDEX#diff}
      if [ -s rc${INDEX#diff} ]; then
        mv rc${INDEX#diff} ${INDEX}
      fi
    done
  else
    parallel -s 10000 --plus 'pngrecolor -q -n {} rc{#diff} ; if [ -s rc{#diff} ]; then mv rc{#diff} {} ; fi' ::: diff-*.png
  fi
  if [ "$QUIET" = 'n' ]; then
    echo 'pdfidiff message: stripping alpha channel and setting DPI on delta pages...'
  fi
  if [ "$USE_PARALLEL" = 'n' ]; then
    for INDEX in diff-*.png ; do
      pngstrip -a -r ${DPM} ${INDEX} st${INDEX#diff}
      if [ -s st${INDEX#diff} ]; then
        mv st${INDEX#diff} ${INDEX}
      fi
    done
  else
    parallel -s 10000 --plus "pngstrip -a -r ${DPM} {} st{#diff} ; if [ -s st{#diff} ]; then mv st{#diff} {} ; fi" ::: diff-*.png
  fi
fi

# convert delta image files to PDF
if [ "$QUIET" = 'n' ]; then
  echo "pdfidiff message: combining delta pages files into (output) $3..."
fi
img2pdf -D --engine=internal -o "$OUTPUTFILE" diff-*.png

# clean up afterwards
if [ "$QUIET" = 'n' ]; then
  echo 'pdfidiff message:  -END-'
fi
popd > /dev/null
rm -rf ${TMPFOLDER}
