root/trunk/plagger/lib/Plagger/Plugin/Publish/Feed.pm

Revision 2053 (checked in by miyagawa, 7 months ago)

don't use XML::Feed::RSS

Line 
1 package Plagger::Plugin::Publish::Feed;
2
3 use strict;
4 use base qw( Plagger::Plugin );
5
6 use XML::Feed;
7 use XML::Feed::Entry;
8 use XML::RSS::LibXML;
9 use File::Spec;
10 eval {
11     require XML::Feed::RSS;
12     require XML::Feed::Format::RSS;
13 };
14 $XML::Feed::Format::RSS::PREFERRED_PARSER = $XML::Feed::RSS::PREFERRED_PARSER = "XML::RSS::LibXML";
15
16 sub register {
17     my($self, $context) = @_;
18     $context->autoload_plugin({ module => 'Filter::FloatingDateTime' });
19     $context->register_hook(
20         $self,
21         'publish.feed' => \&publish_feed,
22         'plugin.init'  => \&plugin_init,
23     );
24 }
25
26 sub plugin_init {
27     my($self, $context, $args) = @_;
28
29     # check dir
30     my $dir = $self->conf->{dir};
31     unless (-e $dir && -d _) {
32         mkdir $dir, 0755 or $context->error("mkdir $dir: $!");
33     }
34
35     unless (exists $self->conf->{full_content}) {
36         $self->conf->{full_content} = 1;
37     }
38 }
39
40 sub publish_feed {
41     my($self, $context, $args) = @_;
42
43     my $conf = $self->conf;
44     my $f = $args->{feed};
45     my $feed_format = $conf->{format} || 'Atom';
46
47     # generate feed
48     my $feed = XML::Feed->new($feed_format);
49     $feed->title($f->title);
50     $feed->link($f->link);
51     $feed->modified(Plagger::Date->now);
52     $feed->generator("Plagger/$Plagger::VERSION");
53     $feed->description($f->description);
54     $feed->copyright($f->meta->{copyright}) if $f->meta->{copyright};
55     $feed->author( $self->make_author($f->author, $feed_format) )
56         if $f->primary_author;
57
58     my $taguri_base = $self->conf->{taguri_base} || do {
59         require Sys::Hostname;
60         Sys::Hostname::hostname();
61     };
62
63     if ($feed_format eq 'Atom') {
64         $feed->{atom}->id("tag:$taguri_base,2006:" . $f->id); # XXX what if id is empty?
65     }
66
67     # add entry
68     for my $e ($f->entries) {
69         my $entry = XML::Feed::Entry->new($feed_format);
70         $entry->title($e->title);
71         $entry->link($e->permalink);
72         $entry->summary($e->body_text) if defined $e->body;
73
74         # hack to bypass XML::Feed Atom 0.3 crufts (type="text/html")
75         if ($self->conf->{full_content} && defined $e->body) {
76             if ($feed_format eq 'RSS') {
77                 $entry->content($e->body);
78             } else {
79                 $entry->{entry}->content($e->body->utf8);
80             }
81         }
82
83         $entry->category(join(' ', @{$e->tags})) if @{$e->tags};
84         $entry->issued($e->date)   if $e->date;
85         $entry->modified($e->date) if $e->date;
86
87         if ($feed_format eq 'RSS') {
88             my $author = 'nobody@example.com';
89             $author .= ' (' . $e->author . ')' if $e->author;
90             $entry->author($author);
91         } else {
92             unless ($feed->author) {
93                 $entry->author($e->author || 'nobody');
94             }
95         }
96
97         $entry->id("tag:$taguri_base,2006:" . $e->id);
98
99         if ($e->has_enclosure) {
100             for my $enclosure (grep { defined $_->url && !$_->is_inline } $e->enclosures) {
101                 $entry->add_enclosure({
102                     url    => $enclosure->url,
103                     length => $enclosure->length,
104                     type   => $enclosure->type,
105                 });
106
107                 # RSS 2.0 by spec doesn't allow multiple enclosures
108                 last if $feed_format eq 'RSS';
109             }
110         }
111
112         $feed->add_entry($entry);
113     }
114
115     # generate file path
116     my $tmpl = '%i.' . ($feed_format eq 'RSS' ? 'rss' : 'atom');
117     my $file = Plagger::Util::filename_for($f, $self->conf->{filename} || $tmpl);
118     my $filepath = File::Spec->catfile($self->conf->{dir}, $file);
119
120     $context->log(info => "save feed for " . $f->link . " to $filepath");
121
122     my $xml = $feed->as_xml;
123     utf8::decode($xml) unless utf8::is_utf8($xml);
124     open my $output, ">:utf8", $filepath or $context->error("$filepath: $!");
125     print $output $xml;
126     close $output;
127 }
128
129 sub make_author {
130     my($self, $author, $feed_format) = @_;
131
132     if ($feed_format eq 'RSS') {
133         my $rfc822 = 'nobody@example.com';
134         $rfc822 .= ' (' . $author . ')' if $author;
135         return $rfc822;
136     } else {
137         return defined $author ? $author : 'nobody';
138     }
139 }
140
141 # XXX okay, this is a hack until XML::Feed is updated
142 *XML::Feed::Entry::Format::Atom::add_enclosure =
143 *XML::Feed::Entry::Atom::add_enclosure = sub {
144     my($entry, $enclosure) = @_;
145     my $link = XML::Atom::Link->new;
146     $link->rel('enclosure');
147     $link->type($enclosure->{type});
148     $link->href($enclosure->{url});
149     $link->length($enclosure->{length});
150     $entry->{entry}->add_link($link);
151 };
152
153 *XML::Feed::Entry::Format::RSS::add_enclosure =
154 *XML::Feed::Entry::RSS::add_enclosure = sub {
155     my($entry, $enclosure) = @_;
156     $entry->{entry}->{enclosure} = XML::RSS::LibXML::MagicElement->new(
157         attributes => {
158             url    => $enclosure->{url},
159             type   => $enclosure->{type},
160             length => $enclosure->{length},
161         }
162     );
163 };
164
165 1;
166
167 __END__
168
169 =head1
170
171 Plagger::Plugin::Publish::Feed - republish RSS/Atom feeds
172
173 =head1 SYNOPSIS
174
175   - module: Publish::Feed
176     config:
177       format: RSS
178       dir: /home/yoshiki/plagger/feed
179       filename: my_%t.rss
180
181 =head1 CONFIG
182
183 =over 4
184
185 =item format
186
187 Specify the format of feed. C<Plagger::Plugin::Publish::Feed> supports
188 the following syndication feed formats:
189
190 =over 8
191
192 =item Atom (default)
193
194 =item RSS
195
196 =back
197
198 =item dir
199
200 Directory to save feed files in.
201
202 =item filename
203
204 Filename to be used to create feed files. It defaults to C<%i.rss> for
205 RSS and C<%i.atom> for Atom feed. It supports the following format
206 like printf():
207
208 =over 8
209
210 =item %u url
211
212 =item %l link
213
214 =item %t title
215
216 =item %i id
217
218 =back
219
220 =item full_content
221
222 Whether to publish full content feed. Defaults to 1.
223
224 =item taguri_base
225
226 Domain name to use with Tag URI base for Atom feed IDs. If it's not
227 set, the domain is grabbed using Sys::Hostname module Optional.
228
229 =back
230
231 =head1 AUTHOR
232
233 Tatsuhiko Miyagawa
234
235 =head1 CONTRIBUTORS
236
237 Yoshiki Kurihara
238
239 Gosuke Miyashita
240
241 =head1 SEE ALSO
242
243 L<Plagger>, L<XML::Feed>
244
245 =cut
Note: See TracBrowser for help on using the browser.