###########################################
package RssMaker;
###########################################
# Mike Schilli, 2004 (m@perlmeister.com)
###########################################
use warnings;
use strict;

use LWP::UserAgent;
use HTTP::Request::Common;
use XML::RSS;
use HTML::Entities qw(decode_entities);
use URI::URL;
use HTTP::Date;
use DateTime;
use HTML::TreeBuilder;
use Log::Log4perl qw(:easy);

###########################################
sub make {
###########################################
  my(%o) = @_;

  $o{url}      ||  LOGDIE "url missing";
  $o{title}    ||  LOGDIE "title missing";
  $o{output}   ||= "out.rdf";
  $o{filter}   ||= sub { 1 };
  $o{encoding} ||= 'utf-8';

  my $ua = LWP::UserAgent->new();

  INFO "Fetching $o{url}";
  my $resp = $ua->request(GET $o{url});
  
  LOGDIE "Fetching $o{url} failed" if 
    $resp->is_error();

  my $http_time = 
            $resp->header('last-modified');
  INFO "Last modified: $http_time";

  my $mtime   = str2time($http_time);
  my $isotime = DateTime->from_epoch(
                          epoch => $mtime);
  DEBUG "Last modified: $isotime";

  my $rss = XML::RSS->new(
    encoding => $o{encoding});

  $rss->channel(
    title => $o{title},
    link  => $o{url},
    dc    => { date => $isotime . "Z"},
  );

  foreach(exlinks($resp->content(), 
                  $o{url})) {

    my($lurl, $text) = @$_;
      
    $text = decode_entities($text);

    if($o{filter}->($lurl, $text)) {
      INFO "Adding rss entry: $text $lurl";
      $rss->add_item(
        title => $text,
        link  => $lurl,
      );
    }
  }

  INFO "Saving output in $o{output}";
  $rss->save($o{output}) or 
      die "Cannot write to $o{output}";
}

###########################################
sub exlinks {
###########################################
  my($html, $base_url) = @_;

  my @links = ();

  my $tree = HTML::TreeBuilder->new();

  $tree->parse($html) or return ();

  for(@{$tree->extract_links('a')}) {
      my($link, $element, $attr, 
         $tag) = @$_;
    
      next unless $attr eq "href";

      my $uri = URI->new_abs($link, 
                             $base_url);
      next unless 
        length $element->as_trimmed_text();

      push @links, 
           [URI->new_abs($link, $base_url),
            $element->as_trimmed_text()];
    }

    return @links;
}

1;
