root/trunk/plagger/lib/Plagger/Plugin/Filter/FetchEnclosure/ParallelUA.pm

Revision 1734 (checked in by miyagawa, 2 years ago)

added Test::Spelling and fixed typoes

Line 
1 package Plagger::Plugin::Filter::FetchEnclosure::ParallelUA;
2 use strict;
3 use base qw(Plagger::Plugin::Filter::FetchEnclosure);
4
5 use LWP::Parallel::UserAgent;
6 use HTTP::Request;
7 use Plagger::Cookies;
8
9 sub register {
10     my($self, $context) = @_;
11     $context->register_hook(
12         $self,
13         'update.entry.fixup' => \&enqueue,
14         'update.fixup'       => \&fetch,
15         'plugin.init'        => \&plugin_init,
16     );
17 }
18
19 sub plugin_init {
20     my $self = shift;
21     $self->{ua} = LWP::Parallel::UserAgent->new;
22     $self->{ua}->max_hosts( $self->conf->{concurrency} || 10 );
23     $self->{ua}->max_req( $self->conf->{max_requests_per_host} || 2 );
24
25     my $conf = Plagger->context->conf->{user_agent};
26     if ($conf->{cookies}) {
27         $self->{ua}->cookie_jar( Plagger::Cookies->create($conf->{cookies}) );
28     }
29 }
30
31 sub enqueue {
32     my($self, $context, $args) = @_;
33
34     for my $enclosure ($args->{entry}->enclosures) {
35         # TODO: do all of this in the base class ::Command
36         my $feed_dir = File::Spec->catfile($self->conf->{dir}, $args->{feed}->id_safe);
37         unless (-e $feed_dir && -d _) {
38             $context->log(info => "mkdir $feed_dir");
39             mkdir $feed_dir, 0777;
40         }
41
42         my $path = File::Spec->catfile($feed_dir, $enclosure->filename);
43
44         if ($enclosure->length && -e $path && -s _ == $enclosure->length) {
45             # TODO: if-none-match
46             $context->log(debug => $enclosure->url . "is already stored in $path");
47             next;
48         }
49
50         $context->log(info => "fetch " . $enclosure->url . " to " . $path);
51
52         my $req = HTTP::Request->new(GET => $enclosure->url);
53
54         if ($self->conf->{fake_referer}) {
55             $context->log(debug => "Sending Referer: " . $args->{entry}->permalink);
56             $req->header('Referer' => $args->{entry}->permalink);
57         }
58
59         $self->{ua}->register($req, $path);
60         $self->{callback}->{$enclosure->url} = sub {
61             my $response = shift;
62
63             if ($response->code =~ /^[23]/) {
64                 if (my $length = $response->header('Content-Length')) {
65                     $enclosure->length($length);
66                     $enclosure->local_path($path);
67                 }
68             } else {
69                 # xxx
70             }
71         };
72     }
73 }
74
75 sub fetch {
76     my($self, $context) = @_;
77
78     $context->log(debug => "wait for responses from Parallel UA ...");
79     my $entries = $self->{ua}->wait;
80
81     for my $entry (values %$entries) {
82         my $response = $entry->response;
83
84         if (my $cb = $self->{callback}->{$response->request->url}) {
85             $cb->($response);
86         }
87     }
88 }
89
90 1;
91
92 __END__
93
94 =head1 NAME
95
96 Plagger::Plugin::Filter::FetchEnclosure::ParallelUA - Fetch enclosures using Parallel UA
97
98 =head1 SYNOPSIS
99
100   - module: Filter::FetchEnclosure::ParallelUA
101     config:
102       dir: /path/to/download
103       concurrency: 5
104       max_requests_per_host: 2
105
106 =head1 DESCRIPTION
107
108 This plugin uses LWP::Parallel UA to download enclosures from multiple hosts in parallel.
109
110 =head1 AUTHOR
111
112 Tatsuhiko Miyagawa
113
114 =head1 SEE ALSO
115
116 L<Plagger>, L<LWP::Parallel::UserAgent>
117
118 =cut
Note: See TracBrowser for help on using the browser.