#!/usr/bin/perl -w use lib "/home/decklin/ignatz/"; use Ignatz::Conf; package Ignatz; use strict; use Date::Parse; use Getopt::Std; use HTTP::Request; use POE qw(Component::Client::HTTP); use POSIX qw(strftime); use Text::Template; use URI; our %opts; my @comics_ok; my @comics_err; my @comics_skipped; my $start_time; my $output_dir; # -------------------------------------------------------------------- getopts('d:v', \%opts); if ($opts{d}) { $start_time = str2time($opts{d}); } else { $start_time = time(); } POE::Component::Client::HTTP->spawn( Agent => $Ignatz::Conf::user_agent, Alias => 'ua', Timeout => 60, ); $output_dir = strftime "$Ignatz::Conf::base_dir/%Y-%m-%d" => localtime($start_time); -d $output_dir or mkdir $output_dir or die "can't create dir: $!"; unlink $Ignatz::Conf::cur_symlink_name or die "can't clean symlink: $!" unless $opts{d}; symlink $output_dir, $Ignatz::Conf::cur_symlink_name or die "can't make symlink: $!" unless $opts{d}; # -------------------------------------------------------------------- for my $strip (@Ignatz::Conf::comics) { my $dow = (localtime($start_time))[6]; if (defined $strip->{days} && $strip->{days} !~ m/$dow/) { debug("skipping: $strip->{name}\n"); push @comics_skipped, $strip; } else { comic_init_session($strip); } } $poe_kernel->run(); my $h_tmpl = Text::Template->new( SOURCE => $Ignatz::Conf::h_tmpl_file, ); open OUT, ">$output_dir/index.html" or die "can't open output page: $!"; print OUT $h_tmpl->fill_in( HASH => { start_time => $start_time, comics_ok => \@comics_ok, comics_err => \@comics_err, comics_skipped => \@comics_skipped, }, ); close OUT; exit 0; # -------------------------------------------------------------------- sub comic_init_session { my ($strip) = @_; POE::Session->create( inline_states => { _start => \&comic_sess_start, _stop => \&comic_sess_stop, fetch_html => \&comic_fetch_html, recv_html => \&comic_recv_html, fetch_img => \&comic_fetch_img, recv_img => \&comic_recv_img, fetch_ad => \&comic_fetch_ad, recv_ad => \&comic_recv_ad, }, heap => { strip => $strip, }, ); } sub comic_sess_start { my ($kernel, $heap) = @_[KERNEL, HEAP]; my $strip = $heap->{strip}; debug("sess_start: $strip->{name}: started\n"); if ($strip->{img_template}) { $strip->{html_uri} = $strip->{front_page}; $strip->{img_uri} = strftime $strip->{img_template} => localtime($start_time); $kernel->yield('fetch_img'); } else { $kernel->yield('fetch_html'); } } sub comic_sess_stop { my ($kernel, $heap) = @_[KERNEL, HEAP]; my $strip = $heap->{strip}; debug("sess_stop: $strip->{name}: stopped\n"); } sub comic_fetch_html { my ($kernel, $heap) = @_[KERNEL, HEAP]; my $strip = $heap->{strip}; $strip->{today_uri} = strftime $strip->{today_page} => localtime($start_time); $strip->{html_uri} = (defined $opts{d} || $strip->{load_today}) ? $strip->{today_uri} : $strip->{front_page}; debug("fetch_html: $strip->{name}: spawning req: $strip->{html_uri}\n"); my $req = new HTTP::Request(GET => $strip->{html_uri}); $kernel->post('ua', 'request', 'recv_html', $req); } sub comic_recv_html { my ($kernel, $heap) = @_[KERNEL, HEAP]; my $strip = $heap->{strip}; my ($request_packet, $response_packet) = @_[ARG0, ARG1]; my $req = $request_packet->[0]; my $resp = $response_packet->[0]; debug("recv_html: $strip->{name}: " . $resp->status_line() . "\n"); if ($resp->is_redirect) { # can't we get the module(s) to take care of this? my $req = new HTTP::Request(GET => $resp->header("Location")); $kernel->post('ua', 'request', 'recv_html', $req); } elsif ($resp->is_success) { if ($resp->content =~ $strip->{img_regex}) { $strip->{img_uri} = URI->new_abs($1, $strip->{html_uri}); debug("recv_html: $strip->{name}: img_regex matched\n"); $kernel->yield('fetch_img'); } else { debug("recv_html: $strip->{name}: img_regex failed\n"); $strip->{err_reason} = "Image RE failed to match"; push @comics_err, $strip; } } else { debug("recv_html: $strip->{name}: http error\n"); $strip->{err_reason} = "Fetching HTML: " . $resp->status_line(); push @comics_err, $strip; } } sub comic_fetch_img { my ($kernel, $heap) = @_[KERNEL, HEAP]; my $strip = $heap->{strip}; debug("fetch_img: $strip->{name}: spawning req: $strip->{img_uri}\n"); my $req = new HTTP::Request(GET => $strip->{img_uri}); $req->header(Referer => $strip->{html_uri}); $kernel->post('ua', 'request', 'recv_img', $req); } sub comic_recv_img { my ($kernel, $heap) = @_[KERNEL, HEAP]; my $strip = $heap->{strip}; my ($request_packet, $response_packet) = @_[ARG0, ARG1]; my $req = $request_packet->[0]; my $resp = $response_packet->[0]; debug("recv_img: $strip->{name}: " . $resp->status_line() . "\n"); if ($resp->is_success) { open IMG, ">$output_dir/$strip->{filename}" or die "can't open img: $!"; print IMG $resp->content; close IMG; push @comics_ok, $strip; $kernel->yield('fetch_ad') if $strip->{ad_regex}; } else { $strip->{err_reason} = "Fetching image: " . $resp->status_line(); push @comics_err, $strip; } } sub comic_fetch_ad { # if ($strip->{html_resp}->content =~ $strip->{ad_regex}) { # $strip->{ad_uri} = URI->new_abs($1, $strip->{today_uri}); # debug("Found ad: $strip->{ad_uri}\n"); # debug("Faked clickthrough on ad: 'foo'\n"); # } else { # debug("Error: Couldn't match ad regex\n"); # } } sub comic_recv_ad { } # -------------------------------------------------------------------- sub debug { print @_ if $opts{v}; }