root/branches/feature-server/plagger/lib/Plagger/Plugin/Filter/HEADEnclosureMetadata.pm

Revision 856 (checked in by miyagawa, 3 years ago)

merge from trunk to plagger-server for Enclosures support and such. Sorry for the big commit

Line 
1 package Plagger::Plugin::Filter::HEADEnclosureMetadata;
2 use strict;
3 use base qw( Plagger::Plugin );
4
5 use Plagger::UserAgent;
6
7 sub register {
8     my($self, $context) = @_;
9     $context->register_hook(
10         $self,
11         'update.entry.fixup' => \&filter,
12     );
13 }
14
15 sub filter {
16     my($self, $context, $args) = @_;
17
18     for my $enclosure ($args->{entry}->enclosures) {
19         next if $enclosure->length or !$enclosure->url;
20
21         my $meta = $self->cache->get_callback(
22             $enclosure->url,
23             sub { $self->fetch_metadata($enclosure->url) },
24             '1 day',
25         );
26
27         unless ($meta) {
28             $context->log(error => "Can't get metadata from " . $enclosure->url);
29             next;
30         }
31
32         if ($meta->{length}) {
33             $enclosure->length($meta->{length}) ;
34             $context->log(info => "Set length of " . $enclosure->url . ": $meta->{length}");
35         }
36
37         if ($meta->{type} &&
38             (!$enclosure->type ||
39              $meta->{type} !~ m!^text/! && $enclosure->type ne $meta->{type})) {
40             $enclosure->type($meta->{type});
41             $context->log(info => "Set type of " . $enclosure->url . ": $meta->{type}");
42         }
43
44         if ($meta->{filename}) {
45             $enclosure->filename($meta->{filename});
46             $context->log(info => "Set filename of " . $enclosure->url . ": $meta->{filename}");
47         }
48     }
49 }
50
51 sub fetch_metadata {
52     my($self, $url) = @_;
53
54     Plagger->context->log(debug => "sending HEAD to $url");
55
56     my $ua  = Plagger::UserAgent->new;
57     my $req = HTTP::Request->new(HEAD => $url);
58
59     my $res = $ua->request($req);
60     return if $res->is_error;
61
62     return {
63         'length' => _header($res, 'Content-Length'),
64         'type'   => _header($res, 'Content-Type'),
65         'filename' => _filename($res),
66     };
67 }
68
69 sub _header {
70     my($res, $header) = @_;
71
72     my $value = $res->header($header) or return undef;
73     $value =~ s/;.*?$//;
74     $value;
75 }
76
77 sub _filename {
78     my $res = shift;
79     my $value = $res->header('Content-Disposition');
80
81     my $filename = ( $value =~ /; filename=(\S*)/ )[0] or return undef;
82     $filename =~ s/^"(.*?)"$/$1/;
83     $filename;
84 }
85
86 1;
87
88 __END__
89
90 =head1 NAME
91
92 Plagger::Plugin::Filter::HEADEnclosureMetadata - Fetch enclosure metadata by sending HEAD request(s)
93
94 =head1 SYNOPSIS
95
96   - module: Filter::FetchEnclosure
97     config:
98       dir: /path/to/files
99
100 =head1 DESCRIPTION
101
102 This plugin downloads enclosure files set for each entry.
103
104 =head1 TODO
105
106 =over 4
107
108 =item Support asynchronous download using POE
109
110 =back
111
112 =head1 AUTHOR
113
114 Tatsuhiko Miyagawa
115
116 =head1 SEE ALSO
117
118 L<Plagger>
119
120 =cut
121
Note: See TracBrowser for help on using the browser.