| 1 |
package Plagger::Plugin::Filter::FetchEnclosure::ParallelUA; |
|---|
| 2 |
use strict; |
|---|
| 3 |
use base qw(Plagger::Plugin::Filter::FetchEnclosure); |
|---|
| 4 |
|
|---|
| 5 |
use LWP::Parallel::UserAgent; |
|---|
| 6 |
use HTTP::Request; |
|---|
| 7 |
use Plagger::Cookies; |
|---|
| 8 |
|
|---|
| 9 |
sub register { |
|---|
| 10 |
my($self, $context) = @_; |
|---|
| 11 |
$context->register_hook( |
|---|
| 12 |
$self, |
|---|
| 13 |
'update.entry.fixup' => \&enqueue, |
|---|
| 14 |
'update.fixup' => \&fetch, |
|---|
| 15 |
'plugin.init' => \&plugin_init, |
|---|
| 16 |
); |
|---|
| 17 |
} |
|---|
| 18 |
|
|---|
| 19 |
sub plugin_init { |
|---|
| 20 |
my $self = shift; |
|---|
| 21 |
$self->{ua} = LWP::Parallel::UserAgent->new; |
|---|
| 22 |
$self->{ua}->max_hosts( $self->conf->{concurrency} || 10 ); |
|---|
| 23 |
$self->{ua}->max_req( $self->conf->{max_requests_per_host} || 2 ); |
|---|
| 24 |
|
|---|
| 25 |
my $conf = Plagger->context->conf->{user_agent}; |
|---|
| 26 |
if ($conf->{cookies}) { |
|---|
| 27 |
$self->{ua}->cookie_jar( Plagger::Cookies->create($conf->{cookies}) ); |
|---|
| 28 |
} |
|---|
| 29 |
} |
|---|
| 30 |
|
|---|
| 31 |
sub enqueue { |
|---|
| 32 |
my($self, $context, $args) = @_; |
|---|
| 33 |
|
|---|
| 34 |
for my $enclosure ($args->{entry}->enclosures) { |
|---|
| 35 |
|
|---|
| 36 |
my $feed_dir = File::Spec->catfile($self->conf->{dir}, $args->{feed}->id_safe); |
|---|
| 37 |
unless (-e $feed_dir && -d _) { |
|---|
| 38 |
$context->log(info => "mkdir $feed_dir"); |
|---|
| 39 |
mkdir $feed_dir, 0777; |
|---|
| 40 |
} |
|---|
| 41 |
|
|---|
| 42 |
my $path = File::Spec->catfile($feed_dir, $enclosure->filename); |
|---|
| 43 |
|
|---|
| 44 |
if ($enclosure->length && -e $path && -s _ == $enclosure->length) { |
|---|
| 45 |
|
|---|
| 46 |
$context->log(debug => $enclosure->url . "is already stored in $path"); |
|---|
| 47 |
next; |
|---|
| 48 |
} |
|---|
| 49 |
|
|---|
| 50 |
$context->log(info => "fetch " . $enclosure->url . " to " . $path); |
|---|
| 51 |
|
|---|
| 52 |
my $req = HTTP::Request->new(GET => $enclosure->url); |
|---|
| 53 |
|
|---|
| 54 |
if ($self->conf->{fake_referer}) { |
|---|
| 55 |
$context->log(debug => "Sending Referer: " . $args->{entry}->permalink); |
|---|
| 56 |
$req->header('Referer' => $args->{entry}->permalink); |
|---|
| 57 |
} |
|---|
| 58 |
|
|---|
| 59 |
$self->{ua}->register($req, $path); |
|---|
| 60 |
$self->{callback}->{$enclosure->url} = sub { |
|---|
| 61 |
my $response = shift; |
|---|
| 62 |
|
|---|
| 63 |
if ($response->code =~ /^[23]/) { |
|---|
| 64 |
if (my $length = $response->header('Content-Length')) { |
|---|
| 65 |
$enclosure->length($length); |
|---|
| 66 |
$enclosure->local_path($path); |
|---|
| 67 |
} |
|---|
| 68 |
} else { |
|---|
| 69 |
|
|---|
| 70 |
} |
|---|
| 71 |
}; |
|---|
| 72 |
} |
|---|
| 73 |
} |
|---|
| 74 |
|
|---|
| 75 |
sub fetch { |
|---|
| 76 |
my($self, $context) = @_; |
|---|
| 77 |
|
|---|
| 78 |
$context->log(debug => "wait for responses from Parallel UA ..."); |
|---|
| 79 |
my $entries = $self->{ua}->wait; |
|---|
| 80 |
|
|---|
| 81 |
for my $entry (values %$entries) { |
|---|
| 82 |
my $response = $entry->response; |
|---|
| 83 |
|
|---|
| 84 |
if (my $cb = $self->{callback}->{$response->request->url}) { |
|---|
| 85 |
$cb->($response); |
|---|
| 86 |
} |
|---|
| 87 |
} |
|---|
| 88 |
} |
|---|
| 89 |
|
|---|
| 90 |
1; |
|---|
| 91 |
|
|---|
| 92 |
__END__ |
|---|
| 93 |
|
|---|
| 94 |
=head1 NAME |
|---|
| 95 |
|
|---|
| 96 |
Plagger::Plugin::Filter::FetchEnclosure::ParallelUA - Fetch enclosures using Parallel UA |
|---|
| 97 |
|
|---|
| 98 |
=head1 SYNOPSIS |
|---|
| 99 |
|
|---|
| 100 |
- module: Filter::FetchEnclosure::ParallelUA |
|---|
| 101 |
config: |
|---|
| 102 |
dir: /path/to/download |
|---|
| 103 |
concurrency: 5 |
|---|
| 104 |
max_requests_per_host: 2 |
|---|
| 105 |
|
|---|
| 106 |
=head1 DESCRIPTION |
|---|
| 107 |
|
|---|
| 108 |
This plugin uses LWP::Parallel UA to download enclosures from multiple hosts in parallel. |
|---|
| 109 |
|
|---|
| 110 |
=head1 AUTHOR |
|---|
| 111 |
|
|---|
| 112 |
Tatsuhiko Miyagawa |
|---|
| 113 |
|
|---|
| 114 |
=head1 SEE ALSO |
|---|
| 115 |
|
|---|
| 116 |
L<Plagger>, L<LWP::Parallel::UserAgent> |
|---|
| 117 |
|
|---|
| 118 |
=cut |
|---|