root/trunk/plagger/lib/Plagger/Plugin/Aggregator/Gungho.pm

Revision 1940 (checked in by daisuke, 2 years ago)

properly convert HTTP::Response to URI::Fetch::Response so Aggregator::Simple understands it. This requires the latest Gungho from svn (as of this writing)

  • Property svn:keywords set to Id
Line 
1 # $Id$
2 #
3 #
4 #
5
6 package Plagger::Plugin::Aggregator::Gungho::Handler;
7 use strict;
8 use base qw(Gungho::Handler::Null);
9
10 __PACKAGE__->mk_accessors($_) for qw(gungho_plugin);
11
12 sub TO_URI_FETCH_RESPONSE
13 {
14     my ($r) = @_;
15
16     my $ufr = URI::Fetch::Response->new();
17     $ufr->http_status($r->code);
18     $ufr->http_response($r);
19     $ufr->status(
20         $r->previous && $r->previous->code == &HTTP::Status::RC_MOVED_PERMANENTLY ? &URI::Fetch::URI_MOVED_PERMANENTLY :
21         $r->code == &HTTP::Status::RC_GONE ? &URI::Fetch::URI_GONE :
22         $r->code == &HTTP::Status::RC_NOT_MODIFIED ? &URI::Fetch::URI_NOT_MODIFIED :
23         &URI::Fetch::URI_OK
24     );
25     $ufr->etag($r->header('ETag'));
26     $ufr->last_modified($r->header('Last-Modified'));
27     $ufr->uri($r->request->uri);
28     $ufr->content($r->content);
29     $ufr->content_type($r->content_type);
30
31     return $ufr;
32 }
33
34 sub handle_response
35 {
36     my $self = shift;
37     my $c    = shift;
38     my $req  = shift;
39     my $res  = shift;
40     my $ufr  = TO_URI_FETCH_RESPONSE($res);
41
42     $self->next::method($c, $req, $res);
43
44     my $plugin   = $self->gungho_plugin;
45     my $url      = $req->url;
46     my $feed_url = Plagger::FeedParser->discover($ufr);
47     if ($url eq $feed_url) {
48         $plugin->handle_feed($url, \$ufr->content, $req->notes('feed'));
49     } elsif ($feed_url) {
50         my $clone = $req->clone;
51         $clone->uri($feed_url);
52         $plugin->gungho->send_request($clone);
53     } else {
54         return;
55     }
56 }
57
58 package Plagger::Plugin::Aggregator::Gungho;
59 use strict;
60 use base qw(Plagger::Plugin::Aggregator::Simple);
61 use Gungho;
62 use Gungho::Request;
63
64 __PACKAGE__->mk_accessors($_) for qw(gungho requests);
65
66 sub register
67 {
68     my ($self, $context) = @_;
69     $context->register_hook(
70         $self,
71         'customfeed.handle'   => \&accumulate,
72         'aggregator.finalize' => \&aggregate,
73     );
74     $self->requests([]);
75 }
76
77 sub accumulate
78 {
79     my($self, $context, $args) = @_;
80    
81     my $url = $args->{feed}->url;
82     return unless $url =~ m!^https?://!i;
83
84     my $req = Gungho::Request->new(GET => $url);
85     $req->notes( feed => $args->{feed} );
86     $context->log(info => "Fetch $url");
87     push @{ $self->requests }, $req;
88 }
89
90 sub aggregate
91 {
92     my ($self, $context) = @_;
93     my $g = Gungho->new({
94         provider => {
95             module => 'Simple'
96         },
97         handler  => {
98             module => '+Plagger::Plugin::Aggregator::Gungho::Handler'
99         }
100     });
101
102     $self->gungho($g);
103
104     $g->provider()->requests( $self->requests );
105     $g->provider()->has_requests( 1 );
106     $self->requests([]);
107     $g->handler()->gungho_plugin( $self );
108     $g->run;
109 }
110
111 1;
112
113 __END__
114
115 =head1 NAME
116
117 Plagger::Plugin::Aggregator::Gungho - Go Gungho!
118
119 =head1 SYNOPSIS
120
121   - module: Aggregator::Gungho
122
123 =head1 DESCRIPTION
124
125 [06 Apr 2007] Gungho is, as of this writing, extremely new crawler framework.
126 Beware of bugs! Please report them to the author. I'll be happy to apply
127 patches or fix problems.
128
129 =head1 AUTHOR
130
131 Copyright (c) 2007 Daisuke Maki E<lt>daisuke@endeworks.jpE<gt>
132
133 =head1
134
Note: See TracBrowser for help on using the browser.