#!/usr/bin/perl -w
use strict;
use IO::File;
use POSIX qw(strftime);
require LWP::UserAgent;
# Utility Functions
my $ua = LWP::UserAgent->new();
sub get_page {
my ($url, $file_to_store) = @_;
my $request = HTTP::Request->new("GET", $url);
my $response = $ua->request($request, $file_to_store);
if ($response->is_success) {
return $response->content();
} else {
print $response->error_as_HTML;
return 0;
}
}
my %dilbert = ( 'url' => 'http://www.dilbert.com' );
my %ucomics = ( 'url' => 'http://www.ucomics.com' );
# A parser returns (url of the strip, url of the img) for a comic
my %alt_name = ( 'Dilbert' => 'Dilbert.com' );
sub dilbert_parser {
my ($comic) = @_;
defined($alt_name{$comic}) and $comic = $alt_name{$comic};
my ($url) = $dilbert{page} =~ /OPTION VALUE="(\S*)">\s*$comic/i;
$url or return;
#print "XXX $url\n";
$url = $dilbert{url} . $url if $url !~ /^http/i;
my $buf = get_page($url);
$buf =~ /IMG\s*SRC=\S*["'](\S*\d\d\d\d\d\d\d+\.(?:gif|jpg))["'][^>]*alt/i
or return;
my $img = $1;
$img = $dilbert{url} . $img if $img !~ /^http/i;
return ($url, $img);
}
sub ucomics_parser {
my ($comic) = @_;
my ($url) = $ucomics{page} =~ /OPTION VALUE="(\S*)">\s*$comic/i;
$url or return;
$url = $ucomics{url} . $url if $url !~ /^http/i;
my $buf = get_page($url);
$buf =~ /IMG\s*SRC=\S*"(\S*\/\S+\d\d\d+\.(?:gif|jpg))/i or return;
return ($url, $1);
}
my @sites = ( \%dilbert, \%ucomics );
my @comics = ('Dilbert' => \&dilbert_parser,
'Garfield' => \&ucomics_parser,
'Peanuts' => \&dilbert_parser,
'Betty' => \&dilbert_parser,
'Nancy' => \&dilbert_parser,
'Calvin & Hobbes' => \&ucomics_parser,
'FoxTrot' => \&ucomics_parser,
'Get Fuzzy' => \&dilbert_parser,
'Frank and Ernest'=> \&dilbert_parser,
'Ziggy' => \&ucomics_parser,
'Herman' => \&dilbert_parser,
'Non Sequitur' => \&ucomics_parser,
'Reality Check' => \&dilbert_parser,
'B.C.' => \&dilbert_parser,
'PC and Pixel' => \&dilbert_parser,
'The Born Loser' => \&dilbert_parser,
'Doonesbury' => \&ucomics_parser,
'Wizard of Id' => \&dilbert_parser,
'Shoe' => \&ucomics_parser,
'Cathy' => \&ucomics_parser,
#'Charlie',
#'Ballard Street',
'Speed Bump' => \&dilbert_parser,
'Jump Start' => \&dilbert_parser,
'Pickles' => \&dilbert_parser,
#'The 5th Wave',
);
foreach my $site (@sites) {
$site->{page} = get_page($site->{url})
or $site->{page} = "NONE\n";
}
chdir(($^O =~ /Win32/) ? "d:/public_html" : "/home/public_html") or
die "Could not chdir: $!";
my $time = time();
#check for yesterdays page
my $date = strftime("%m%d%Y", localtime($time));
my $archive_page = "comics-archive/comics-$date.html";
my $file;
unless (-e $archive_page) {
$file = new IO::File;
open($file, "> $archive_page") or die "could not open $archive_page: $!";
}
my $html_str = "";
sub output {
$html_str .= "@_";
print $file "@_" if $file;
}
my $now = localtime($time);
output "<html> <title> Comic Strips</title> <body bgcolor=white>".
"<meta http-equiv=\"expires\" content=\"Sun, 13 Aug 2000 16:30:00 PDT\">".
"<br> <br>".
"<center><large><b> Comic Strips</b><large> (Updated daily. This page: ".
"$now PDT)".
"<br></center>\n".
"<table width=100\% cellpadding=0 cellspacing=0>";
my @missing_comics = ();
for (my $i=0; $i<@comics/2; $i++) {
my ($comic, $fn) = ($comics[$i*2], $comics[$i*2+1]);
push @missing_comics, $comic;
my ($url, $img) = &$fn($comic);
$img or next;
#print "XXXX: $url $img\n";
output "<tr> <td width=10\%> </td> <td> ".
"<STRONG> $comic </strong> <br> <a href=\"$url\">".
"<img src=\"$img\" alt=\"$comic\" border=0></a>".
"<br><br></td> </tr>\n";
if ($file) {
my $dir = "comics-archive-images/$comic";
mkdir($dir);
my @arr = split(/\//, $img);
my $img_name = $arr[-1];
$img_name =~ s/\d+\././;
get_page($img, "$dir/$date-$img_name");
}
pop @missing_comics;
}
#include Jaimie Hollenback's comic as he requested:
$html_str .= "<tr> <td width=10\%> </td> <td> ".
'<strong> Kim & Jason </strong> <br> '.
'<a href="http://www.kimandjason.com">'.
'<img src="http://www.kimandjason.com/stuff/kj_today.gif" border=0'.
' alt="(c) Copyright by Jason Kotecki. www.KimandJason.com"></a>'.
"<br><br></td> </tr>\n";
output "</table> <br> <br> ".
((@missing_comics) ? "Missing: @missing_comics <br>\n" : '');
$html_str .= "<a href=\"comics-archive/comics-".
strftime("%m%d%Y", localtime($time-86400)).
".html\"> Yesterday's page</a>. ";
if ($file) {
print $file "<a href=\"comics-". strftime("%m%d%Y", localtime($time+86400)).
".html\"> Next</a> <a href=\"comics-".
strftime("%m%d%Y", localtime($time-86400)). ".html\"> Prev</a>";
}
output "<br> <a href=\"http://www.angadi.org/comics.pl\">Source Code</a>. ".
'<a href="mailto:raghuangadi@yahoo.com">Mail me</a> any comics you want included here. <br>'.
"<em> <a href=\"http://www.angadi.org/\">Raghu Angadi</a> </em>".
"</body> </html>\n";
open(OUT, "> comics.html") or die "Could not open comics.html: $!";
print OUT $html_str;