| 1 |
package Plagger::Plugin::CustomFeed::Script; |
|---|
| 2 |
use strict; |
|---|
| 3 |
use base qw( Plagger::Plugin::Aggregator::Simple ); |
|---|
| 4 |
|
|---|
| 5 |
use URI; |
|---|
| 6 |
use URI::Escape; |
|---|
| 7 |
use URI::http; |
|---|
| 8 |
use YAML::Syck; |
|---|
| 9 |
|
|---|
| 10 |
use Plagger::Plugin::Aggregator::Simple; |
|---|
| 11 |
use Plagger::Plugin::CustomFeed::Debug; |
|---|
| 12 |
|
|---|
| 13 |
sub register { |
|---|
| 14 |
my($self, $context) = @_; |
|---|
| 15 |
$context->register_hook( |
|---|
| 16 |
$self, |
|---|
| 17 |
'customfeed.handle' => \&handle, |
|---|
| 18 |
); |
|---|
| 19 |
} |
|---|
| 20 |
|
|---|
| 21 |
sub handle { |
|---|
| 22 |
my($self, $context, $args) = @_; |
|---|
| 23 |
|
|---|
| 24 |
if (URI->new($args->{feed}->url)->scheme eq 'script') { |
|---|
| 25 |
$self->aggregate($context, $args); |
|---|
| 26 |
return 1; |
|---|
| 27 |
} |
|---|
| 28 |
|
|---|
| 29 |
return; |
|---|
| 30 |
} |
|---|
| 31 |
|
|---|
| 32 |
sub aggregate { |
|---|
| 33 |
my($self, $context, $args) = @_; |
|---|
| 34 |
|
|---|
| 35 |
my $script = URI->new($args->{feed}->url)->opaque; |
|---|
| 36 |
$script =~ s!^//!!; |
|---|
| 37 |
$script = URI::Escape::uri_unescape($script); |
|---|
| 38 |
|
|---|
| 39 |
$context->log(debug => "Executing '$script'"); |
|---|
| 40 |
my $output = qx($script); |
|---|
| 41 |
if ($?) { |
|---|
| 42 |
$context->log(error => "Error happend while executing '$script': $?"); |
|---|
| 43 |
return; |
|---|
| 44 |
} |
|---|
| 45 |
|
|---|
| 46 |
|
|---|
| 47 |
if ($output =~ /^<\?xml/) { |
|---|
| 48 |
$context->log(debug => "Looks like output is RSS/Atom"); |
|---|
| 49 |
$self->SUPER::handle_feed($args->{feed}->url, \$output, $args->{feed}); |
|---|
| 50 |
} else { |
|---|
| 51 |
eval { |
|---|
| 52 |
local $YAML::Syck::ImplicitUnicode = 1; |
|---|
| 53 |
my $feed = YAML::Syck::Load($output); |
|---|
| 54 |
$context->log(debug => "Looks like output is YAML"); |
|---|
| 55 |
local $self->{conf} = $feed; |
|---|
| 56 |
$self->Plagger::Plugin::CustomFeed::Debug::aggregate($context, $args); |
|---|
| 57 |
}; |
|---|
| 58 |
if ($@) { |
|---|
| 59 |
$context->log(error => "Failed to parse as YAML. Can't determine output format of $script: $@"); |
|---|
| 60 |
return; |
|---|
| 61 |
} |
|---|
| 62 |
} |
|---|
| 63 |
|
|---|
| 64 |
return 1; |
|---|
| 65 |
} |
|---|
| 66 |
|
|---|
| 67 |
1; |
|---|
| 68 |
__END__ |
|---|
| 69 |
|
|---|
| 70 |
=head1 NAME |
|---|
| 71 |
|
|---|
| 72 |
Plagger::Plugin::CustomFeed::Script - Script support for Plagger |
|---|
| 73 |
|
|---|
| 74 |
=head1 SYNOPSIS |
|---|
| 75 |
|
|---|
| 76 |
- module: Subscription::Config |
|---|
| 77 |
config: |
|---|
| 78 |
feed: |
|---|
| 79 |
- script:/path/to/script.rb |
|---|
| 80 |
- script:/path/to/scrape.py |
|---|
| 81 |
- module: CustomFeed::Script |
|---|
| 82 |
|
|---|
| 83 |
=head1 DESCRIPTION |
|---|
| 84 |
|
|---|
| 85 |
This plugin executes arbitrary script specified in subscription with |
|---|
| 86 |
I<script:> URI protocol, then parse the STDOUT from the script to |
|---|
| 87 |
create a feed. |
|---|
| 88 |
|
|---|
| 89 |
The output from the script can either be Atom/RSS feed, or YAML format |
|---|
| 90 |
which is compatible to the one used in CustomFeed::Debug. This means |
|---|
| 91 |
you can reuse your I<something2rss> script used for NetNewsWire or |
|---|
| 92 |
similar tools, and you can even write your scraper code in other |
|---|
| 93 |
languages like Python/Ruby. |
|---|
| 94 |
|
|---|
| 95 |
This plugin auto-detects if the output is XML or YAML. |
|---|
| 96 |
|
|---|
| 97 |
=head1 AUTHOR |
|---|
| 98 |
|
|---|
| 99 |
Tatsuhiko Miyagawa |
|---|
| 100 |
|
|---|
| 101 |
=head1 SEE ALSO |
|---|
| 102 |
|
|---|
| 103 |
L<Plagger> |
|---|
| 104 |
|
|---|
| 105 |
=cut |
|---|