# (C) 2003 Dan Lenski <moxfyre@geocities.com>
#

package WWW::Google::News;

use strict;
use CGI::Util qw(escape);
use HTTP::Request::Common;
use LWP::UserAgent;
use XML::RSS;
use POSIX qw(strftime);

1;

sub new
{
    my $class = shift;
    my $self = bless {}, ref $class || $class;

    $self->{ua} = LWP::UserAgent->new(timeout => 10,
                                      agent => "Elinks (0.3; Unix)");
    $self->{results} = undef;
    $self->{url} = undef;
    $self->{query} = undef;
    $self->{lastbuilddate} = undef;
    $self;
}

sub results
{
    my $self = shift;
    $self->{results};
}

sub resultsRSS
{
    my $self = shift;

    my $rss = XML::RSS->new(version => '0.91');
    $rss->channel( title => "Google News Search: $self->{query}",
		   link => $self->{url},
		   description => "Google News Search: $self->{query}",
		   pubDate => $self->{lastbuilddate},
		   webMaster => 'moxfyre@geocities.com' );

    foreach ( @{$self->{results}} ) {
	$rss->add_item( title => $_->{title},
			link => $_->{link},
			description => $_->{desc} );
    }

    $rss->as_string;
}
    
sub search
{
    my $self = shift;
    my ($query, $num) = @_;

    ##############################################
    # Screen-scraping code begins
    
    my $url = "http://news.google.com/news?scoring=d&hl=en&q="
	.  escape($query) . "&num=" . escape($num);
    my $item_re = 
        qr(<a\ href=([^>]+)>(.+?)</a><br>
           (?: <font\ size=-1><font\ color=green>([^<]+)</font>
               &nbsp;-&nbsp;([^<]+)</font><br> )?
           <font\ size=-1>(.*?)</font><br>)sx;
    
    $self->{query} = $query;
    $self->{url} = $url;
    $self->{lastbuilddate} = strftime('%a, %e %b %Y %H:%M:%S %Z', gmtime);

    # get Google news page
    my $response = $self->{ua}->request(GET($url));
    return undef unless $response->is_success;
    $response->content =~ m|<div>(.*)</div>|s;
    my $content = $1;

    my $results = $self->{results} = [];
    while ($content =~ /$item_re/g) {
        my ($link, $title, $source, $when, $desc) = ($1,$2,$3,$4,$5);
        foreach ($title, $desc) { s[<.+?>][]g; tr[\n][ ]; s/^\s+//; s/\s+$// }

        push @$results, { link => $link,
                          title => $title,
                          source => $source,
                          when => $when,
                          desc => $desc };
    }

    # Screen-scraping code ends
    ##############################################

    return $results;
}