root/trunk/plagger/lib/Plagger/Plugin/CustomFeed/Script.pm

Revision 1974 (checked in by miyagawa, 9 months ago)

use URI::http so it can be loaded from YAML output

Line 
1 package Plagger::Plugin::CustomFeed::Script;
2 use strict;
3 use base qw( Plagger::Plugin::Aggregator::Simple );
4
5 use URI;
6 use URI::Escape;
7 use URI::http; # for autoloading
8 use YAML::Syck;
9
10 use Plagger::Plugin::Aggregator::Simple;
11 use Plagger::Plugin::CustomFeed::Debug;
12
13 sub register {
14     my($self, $context) = @_;
15     $context->register_hook(
16         $self,
17         'customfeed.handle' => \&handle,
18     );
19 }
20
21 sub handle {
22     my($self, $context, $args) = @_;
23
24     if (URI->new($args->{feed}->url)->scheme eq 'script') {
25         $self->aggregate($context, $args);
26         return 1;
27     }
28
29     return;
30 }
31
32 sub aggregate {
33     my($self, $context, $args) = @_;
34
35     my $script = URI->new($args->{feed}->url)->opaque;
36        $script =~ s!^//!!;
37     $script = URI::Escape::uri_unescape($script); # to support script://python.exe foo.py
38
39     $context->log(debug => "Executing '$script'");
40     my $output = qx($script);
41     if ($?) {
42         $context->log(error => "Error happend while executing '$script': $?");
43         return;
44     }
45
46     # TODO: check BOM?
47     if ($output =~ /^<\?xml/) {
48         $context->log(debug => "Looks like output is RSS/Atom");
49         $self->SUPER::handle_feed($args->{feed}->url, \$output, $args->{feed});
50     } else {
51         eval {
52             local $YAML::Syck::ImplicitUnicode = 1;
53             my $feed = YAML::Syck::Load($output);
54             $context->log(debug => "Looks like output is YAML");
55             local $self->{conf} = $feed;
56             $self->Plagger::Plugin::CustomFeed::Debug::aggregate($context, $args);
57         };
58         if ($@) {
59             $context->log(error => "Failed to parse as YAML. Can't determine output format of $script: $@");
60             return;
61         }
62     }
63
64     return 1;
65 }
66
67 1;
68 __END__
69
70 =head1 NAME
71
72 Plagger::Plugin::CustomFeed::Script - Script support for Plagger
73
74 =head1 SYNOPSIS
75
76   - module: Subscription::Config
77     config:
78       feed:
79         - script:/path/to/script.rb
80         - script:/path/to/scrape.py
81   - module: CustomFeed::Script
82
83 =head1 DESCRIPTION
84
85 This plugin executes arbitrary script specified in subscription with
86 I<script:> URI protocol, then parse the STDOUT from the script to
87 create a feed.
88
89 The output from the script can either be Atom/RSS feed, or YAML format
90 which is compatible to the one used in CustomFeed::Debug. This means
91 you can reuse your I<something2rss> script used for NetNewsWire or
92 similar tools, and you can even write your scraper code in other
93 languages like Python/Ruby.
94
95 This plugin auto-detects if the output is XML or YAML.
96
97 =head1 AUTHOR
98
99 Tatsuhiko Miyagawa
100
101 =head1 SEE ALSO
102
103 L<Plagger>
104
105 =cut
Note: See TracBrowser for help on using the browser.