#!/sw/bin/perl
#######################
# By: Ventz Petkov    #
# Date: 9-01-05       #
# Scrape Yahoo Movies #
#######################

use warnings;
use strict;

use WWW::Mechanize;
use XML::RSS::SimpleGen;

my $mech = WWW::Mechanize->new();
my $url = 'http://movies.yahoo.com/';
my $base = 'shop?d=hv&cf=info&id=';

rss_new( $url, "Yahoo Movies!", "Yahoo Top 10 Movie List - AMC & Lowell's" );
rss_language( 'en' );
rss_webmaster( 'rss@lists.ccs.neu.edu' );
rss_weekly();

$mech->agent_alias( 'Mac Safari' );
$mech->get( $url );
my $page = $mech->content;

$page =~ s/.*<!-- top boxoffice -->(.*)<!-- \/top boxoffice -->.*/$1/gs;
my @pages = split("\n", $page);
for (@pages) {
    my $line = $_; 
#    $line =~ s/\n//sg;
    $line =~ s/<(.?)t(.?)>//g;
    $line =~ s/<td (.*)>//g;
    $line =~ s/.*<b>Top Box Office<\/b>(.*)<\/font>.*/Top Box Office: $1\n/;
    $line =~ s/(.*)&nbsp;.*/$1/g;
    if ($line =~ s/.*<A HRef="\/shop\?d=hv&cf=info&id=(.*)">(.*)<\/A>.*/$2\n($url$base$1)\n\n/g) {
        rss_item("$url$base$1", $2);
    }
    $line =~ s/<a(.*)>//g;
    next if($line eq ""); 
    print $line;
}
rss_save('yahoomovies.rss');
