root/trunk/plagger/lib/Plagger/Plugin/Filter/FetchEnclosure/Xango.pm

Revision 2048 (checked in by otsune, 3 years ago)

Added "no critic" comment to pass t/99-perlcritic. (via http://gist.github.com/10825 )
fix bug. (via http://d.hatena.ne.jp/h3x/20081104/1225811113 )

  • Property svn:keywords set to Id
Line 
1 # $Id$
2 #
3 # Copyright (c) 2006 Daisuke Maki <dmaki@cpan.org>
4 # All rights reserved.
5
6 package Plagger::Plugin::Filter::FetchEnclosure::Xango;
7 use strict;
8 use base qw(Plagger::Plugin::Filter::FetchEnclosure);
9 BEGIN {
10     sub Xango::DEBUG{ 1 } ## no critic (ProhibitNestedSubs)
11 }
12 use Xango::Broker::Push;
13
14 sub register {
15     my($self, $context) = @_;
16     my %xango_args = (
17         Alias => 'xgbroker',
18         HandlerAlias => 'xghandler',
19         HttpCompArgs => [ Agent => "Plagger/$Plagger::VERSION (http://plagger.org/)", Timeout => $self->conf->{timeout} || 10 ],
20         %{$self->conf->{xango_args} || {}},
21     );
22     $self->{xango_alias} = $xango_args{Alias};
23     Plagger::Plugin::Filter::FetchEnclosure::Xango::Crawler->spawn(
24         Plugin => $self,
25         BrokerAlias => $xango_args{Alias},
26         UseCache => exists $self->conf->{use_cache} ?
27             $self->conf->{use_cache} : 1,
28         MaxRedirect => $self->conf->{max_redirect} || 3,
29     );
30     Xango::Broker::Push->spawn(%xango_args);
31
32     $context->register_hook(
33         $self,
34         'update.entry.fixup' => \&enqueue,
35         'update.fixup'       => \&fetch,
36     );
37
38 }
39
40 sub enqueue
41 {
42     my($self, $context, $args) = @_;
43
44     for my $enclosure ($args->{entry}->enclosures) {
45         my $feed_dir = File::Spec->catfile($self->conf->{dir}, $args->{feed}->id_safe);
46         unless (-e $feed_dir && -d _) {
47             $context->log(info => "mkdir $feed_dir");
48             mkdir $feed_dir, 0777;
49         }
50
51         my $path = File::Spec->catfile($feed_dir, $enclosure->filename);
52         $context->log(info => "fetch " . $enclosure->url . " to " . $path);
53
54         my %job_args;
55         if ($self->conf->{fake_referer}) {
56             $context->log(debug => "Sending Referer: " . $args->{entry}->permalink);
57             $job_args{referer} = $args->{entry}->permalink;
58         }
59         my $job = Xango::Job->new(
60             uri      => URI->new($enclosure->url),
61             redirect => 0,
62             path     => $path,
63             enclosure => $enclosure,
64         );
65    
66         POE::Kernel->post($self->{xango_alias}, 'enqueue_job', $job);
67     }
68 }
69
70 sub fetch { POE::Kernel->run }
71
72 package Plagger::Plugin::Filter::FetchEnclosure::Xango::Crawler;
73 use strict;
74 use POE;
75 use File::Path qw(mkpath);
76 use File::Basename qw(dirname);
77
78 sub apply_policy { 1 }
79 sub spawn  {
80     my $class = shift;
81     my %args  = @_;
82
83     POE::Session->create(
84         heap => {
85             PLUGIN => $args{Plugin},
86             USE_CACHE => $args{UseCache},
87             BROKER_ALIAS => $args{BrokerAlias},
88             MaxRedirect => $args{MaxRedirect},
89         },
90         package_states => [
91             $class => [ qw(_start _stop apply_policy prep_request handle_response) ]
92         ]
93     );
94 }
95
96 sub _start { $_[KERNEL]->alias_set('xghandler') }
97 sub _stop  { }
98 sub prep_request {
99     return unless $_[HEAP]->{USE_CACHE};
100
101     my $job = $_[ARG0];
102     my $req = $_[ARG1];
103     my $plugin = $_[HEAP]->{PLUGIN};
104
105     my $ref = $plugin->cache->get($job->uri);
106     if ($ref) {
107         $req->if_modified_since($ref->{LastModified})
108             if $ref->{LastModified};
109         $req->header('If-None-Match', $ref->{ETag})
110             if $ref->{ETag};
111     }
112
113     $req->header(Referer => $job->notes('referer'))
114         if $job->notes('referer');
115 }
116
117 sub handle_response {
118     my $job = $_[ARG0];
119     my $plugin = $_[HEAP]->{PLUGIN};
120
121     my $redirect = $job->notes('redirect') + 1;
122     return if $redirect > $_[HEAP]->{MaxRedirect};
123
124     my $r = $job->notes('http_response');
125     my $url    = $job->uri;
126     if ($r->code =~ /^30[12]$/) {
127         $url = $r->header('location');
128         return unless $url =~ m!^https?://!i;
129         my $new_job = Xango::Job->new(
130             uri => URI->new($url),
131             redirect => $redirect,
132             path => $job->notes('path'), # TODO: rewrite path with the new URL? respect Content-Disposition?
133             enclosure => $job->notes('enclosure'),
134         );
135         $_[KERNEL]->post($_[HEAP]->{BROKER_ALIAS}, 'enqueue_job', $new_job);
136         return;
137     } else {
138         return unless $r->is_success;
139
140         my $local_path = $job->notes('path');
141
142         my $dir = dirname($local_path);
143         if (!-d $dir) {
144             if (! mkpath([$dir], 0, 0777) || !-d $dir || !-w _) {
145                 $plugin->log(warn => "failed to create directory $dir: $!");
146                 return;
147             }
148         }
149
150         open(my $fh, ">", $local_path);
151         if (! $fh) {
152             $plugin->log(warn => "failed to open $local_path for writing: $!");
153             return;
154         }
155
156         print $fh $r->content;
157         close($fh);
158
159         my $enclosure = $job->notes('enclosure');
160         $enclosure->local_path($local_path);
161         # Fix length if it's broken
162         if ($r->header('Content-Length')) {
163             $enclosure->length($r->header('Content-Length'));
164         }
165     }
166
167     if ($_[HEAP]->{USE_CACHE}) {
168         $plugin->cache->set(
169             $job->uri,
170             {ETag => $r->header('ETag'),
171                 LastModified => $r->header('Last-Modified')}
172         );
173     }
174 }
175
176 1;
177
178 1;
Note: See TracBrowser for help on using the browser.