#############################
# RssMaker -- Generate a RSS
#         feed of a web page
# Mike Schilli, 2004
# (m@perlmeister.com)
#############################
package RssMaker;

use warnings;
use strict;

use LWP::UserAgent;
use HTTP::Request::Common;
use XML::RSS;
use HTML::Entities
         qw(decode_entities);
use URI::URL;
use HTTP::Date;
use DateTime;
use HTML::TreeBuilder;
use Log::Log4perl qw(:easy);

#############################
sub make {
#############################
  my (%o) = @_;

  $o{url}
    || LOGDIE "url missing";
  $o{title}
    || LOGDIE
    "title missing";
  $o{output}   ||= "out.rdf";
  $o{filter}   ||= sub { 1 };
  $o{encoding} ||= 'utf-8';

  my $ua =
    LWP::UserAgent->new();

  INFO "Fetching $o{url}";
  my $resp =
    $ua->request(
    GET $o{url} );

  LOGDIE "Error fetching ",
         "$o{url}: ", $resp->message()
      if $resp->is_error();

  my $http_time =
    $resp->header(
    'last-modified');

  $http_time ||=
    time2str( time() );

  INFO "Last modified: ",
       $http_time;

  my $mtime =
    str2time($http_time);

  my $isotime =
    DateTime->from_epoch(
    epoch => $mtime);

  DEBUG "Last modified:",
        $isotime;

  my $rss =
    XML::RSS->new(
    encoding =>
      $o{encoding} );

  $rss->channel(
    title => $o{title},
    link  => $o{url},
    dc    => {
      date => $isotime . "Z"
    },
  );

  foreach(exlinks(
      $resp->content(),
      $o{url})) {

    my ($lurl, $text) = @$_;

    $text =
      decode_entities($text);

    if ($o{filter}->(
            $lurl, $text)) {

      INFO "Adding rss ",
      "entry: $text $lurl";

      $rss->add_item(
        title => $text,
        link  => $lurl);
    }
  }

  INFO "Saving output in ",
       "$o{output}";
  $rss->save( $o{output} )
    or LOGDIE "Cannot write",
       " to", " $o{output}";
}

#############################
sub exlinks {
#############################
  my ($html, $base_url) = @_;

  my @links = ();

  my $tree =
    HTML::TreeBuilder->new();

  $tree->parse($html)
    or return ();

  for(@{$tree->extract_links(
        'a')}) {
    my ($link, $element,
        $attr, $tag) = @$_;

    next
      unless $attr eq "href";

    my $uri =
      URI->new_abs( $link,
      $base_url );

    next
      unless length $element
      ->as_trimmed_text();

    push @links,
      [
      URI->new_abs(
        $link, $base_url
      ),
      $element
        ->as_trimmed_text()
      ];
  }

  return @links;
}

1;
