root/trunk/plagger/lib/Plagger/Plugin/Filter/FetchEnclosure/Curl.pm

Revision 1734 (checked in by miyagawa, 2 years ago)

added Test::Spelling and fixed typoes

Line 
1 package Plagger::Plugin::Filter::FetchEnclosure::Curl;
2 use strict;
3 use base qw(Plagger::Plugin::Filter::FetchEnclosure);
4
5 use POE;
6 use POE::Session;
7 use POE::Wheel::Run;
8
9 sub register {
10     my($self, $context) = @_;
11     $context->register_hook(
12         $self,
13         'update.entry.fixup' => \&enqueue,
14         'update.fixup'       => \&fetch,
15     );
16 }
17
18 sub enqueue {
19     my($self, $context, $args) = @_;
20
21     for my $enclosure ($args->{entry}->enclosures) {
22         # TODO: do all of this in the base class ::Command
23         my $feed_dir = File::Spec->catfile($self->conf->{dir}, $args->{feed}->id_safe);
24         unless (-e $feed_dir && -d _) {
25             $context->log(info => "mkdir $feed_dir");
26             mkdir $feed_dir, 0777;
27         }
28
29         my $path = File::Spec->catfile($feed_dir, $enclosure->filename);
30
31         if ($enclosure->length && -e $path && -s _ == $enclosure->length) {
32             # TODO: if-none-match
33             $context->log(debug => $enclosure->url . "is already stored in $path");
34             next;
35         }
36
37         $context->log(info => "fetch " . $enclosure->url . " to " . $path);
38
39         my $referer;
40         if ($self->conf->{fake_referer}) {
41             $context->log(debug => "Sending Referer: " . $args->{entry}->permalink);
42             $referer = $args->{entry}->permalink;
43         }
44
45         my $cookies;
46         my $conf = $context->conf->{user_agent} || {};
47         if ($conf->{cookies}) {
48             my $cookie_jar = Plagger::Cookies->create($conf->{cookies});
49             if ($cookie_jar->isa('HTTP::Cookies::Mozilla')) {
50                 $cookies = $cookie_jar->{file};
51                 $context->log(debug => "Using cookie file $cookies");
52             }
53         }
54
55         # TODO: max connections per domain to respect RFC
56         POE::Session->create(
57             inline_states => {
58                 _start => sub {
59                     $_[HEAP]->{wheel} = POE::Wheel::Run->new(
60                         Program => [
61                             'curl',
62                             $enclosure->url,
63                             '--output', $path,
64                             # xxx resume
65                             '--retry', 5,
66                             '--location',
67                             ($referer ? ('--referer', $referer) : ()),
68                             ($cookies ? ('--cookie', $cookies) : ())
69                         ],
70                         StderrEvent => 'stderr',
71                         ErrorEvent => 'wheel_close',
72                         CloseEvent => 'wheel_close',
73                     );
74                 },
75                 stderr => sub {
76                     if ($_[ARG0] =~ /^100\s+(\d+)/m) { # xxx it doesn't work
77                         my $length = -s $path;
78                         $enclosure->local_path($path);
79                         $enclosure->length($length);
80                         $context->log(info => "Download to $path is done [$length]");
81                     }
82
83                     $context->log(debug => $_[ARG0]);
84                 },
85                 wheel_close => sub {
86                     delete $_[HEAP]->{wheel};
87                 },
88             },
89         );
90     }
91 }
92
93 sub fetch {
94     Plagger->context->log(info => "Start downloading files using curl.");
95     POE::Kernel->run;
96     Plagger->context->log(info => "w00t! Downloading finished.");
97 }
98
99 1;
100
101 __END__
102
103 =head1 NAME
104
105 Plagger::Plugin::Filter::FetchEnclosure::Curl - Fetch enclosures using curl
106
107 =head1 SYNOPSIS
108
109   - module: Filter::FetchEnclosure::Curl
110     config:
111       dir: /path/to/download
112       concurrency: 5
113       max_requests_per_host: 2
114
115 =head1 DESCRIPTION
116
117 This plugin uses curl command to download enclosure files.
118
119 =head1 AUTHOR
120
121 Tatsuhiko Miyagawa
122
123 =head1 SEE ALSO
124
125 L<Plagger>
126
127 =cut
Note: See TracBrowser for help on using the browser.