#!/usr/local/bin/perl
###########################################
# pdfsplit 
# 2012, Mike Schilli <cpan@perlmeister.com>
###########################################
use strict;
use warnings;
use Getopt::Std;
use Pod::Usage;
use Log::Log4perl qw(:easy);
use Sysadm::Install qw( :all );
use File::Basename;

getopts( "d:", \my %opts );

Log::Log4perl->easy_init( $DEBUG );

my $max_size   = 1024*1024*10;
my $grace_size = 15;

my $pdftk = bin_find "pdftk";

my( $path ) = @ARGV;

if( !defined $path ) {
    pod2usage( "No pdf specified" );
}

my $pages = nof_pages( $path );

DEBUG "Pages total: $pages";

if( !defined $pages ) {
    die "Can't determine number of pages";
}

my $pages_per_chunk = int( $max_size / ( (-s $path) / $pages ) );

DEBUG "Pages per chunk: $pages_per_chunk";

my $page   = 1;
my @chunks = ();

while( $page < $pages ) {
    my $last_page = $page + $pages_per_chunk - 1;

    if( $last_page > $pages ) {
        $last_page = $pages;
    }

    if( $last_page + $grace_size >= $pages ) {
        $last_page = $pages;
    }

    push @chunks, "$page-$last_page";
    $page = $last_page + 1;
}

my $digits = length scalar @chunks;

my $serial = 1;

for my $chunk ( @chunks ) {

    $serial = sprintf "%0${digits}d", $serial;

    ( my $outfile = basename $path ) =~ s/(\.[^.]+)$/-$serial-$chunk$1/;

    if( exists $opts{ d } ) {
        $outfile = "$opts{ d }/$outfile";
    }

    tap { raise_error => 1 }, $pdftk, "A=$path", "cat", 
        "A$chunk", "output", $outfile;

    $serial++;
}

###########################################
sub samples {
###########################################
    my( $nof_samples, $max ) = @_;

    my @samples = ();

    for( 1..$nof_samples ) {
        push @samples, 1 + int rand( $max );
    }

    return sort { $a <=> $b } @samples;
}

###########################################
sub nof_pages {
###########################################
    my( $file ) = @_;

    my( $stdout, $stderr, $rc ) = tap $pdftk, $file, "dump_data";

    if( $rc != 0 ) {
        ERROR "$pdftk can't get metadata";
        return undef;
    }

    if( $stdout =~ /NumberOfPages: (\d+)/ ) {
        return $1;
    }

    ERROR "dump_data returned unknown format";
    return undef;
}

__END__

=head1 NAME

    pdfsplit - Split a PDF into chunks

=head1 SYNOPSIS

    pdfsplit foo.pdf
    foo-1.pdf
    foo-2.pdf
    foo-3.pdf

=back

=head1 DESCRIPTION

pdfsplit breaks pdf files into chunks no larger than a predefined size.

=head1 LEGALESE

Copyright 2012 by Mike Schilli, all rights reserved.
This program is free software, you can redistribute it and/or
modify it under the same terms as Perl itself.

=head1 AUTHOR

2012, Mike Schilli <cpan@perlmeister.com>
