root/branches/feature-server/plagger/lib/Plagger/Plugin/CustomFeed/Simple.pm

Revision 856 (checked in by miyagawa, 3 years ago)

merge from trunk to plagger-server for Enclosures support and such. Sorry for the big commit

Line 
1 package Plagger::Plugin::CustomFeed::Simple;
2 use strict;
3 use base qw( Plagger::Plugin );
4
5 use Encode;
6 use HTML::TokeParser;
7 use Plagger::UserAgent;
8 use Plagger::Util qw( decode_content extract_title );
9
10 sub register {
11     my($self, $context) = @_;
12     $context->register_hook(
13         $self,
14         'customfeed.handle' => \&handle,
15     );
16 }
17
18 sub handle {
19     my($self, $context, $args) = @_;
20
21     if (my $match = $args->{feed}->meta->{follow_link}) {
22         $args->{match} = $match;
23         return $self->aggregate($context, $args);
24     }
25
26     return;
27 }
28
29 sub aggregate {
30     my($self, $context, $args) = @_;
31
32     my $url = $args->{feed}->url;
33     $context->log(info => "GET $url");
34
35     my $agent = Plagger::UserAgent->new;
36     my $res = $agent->fetch($url, $self);
37
38     if ($res->http_response->is_error) {
39         $context->log(error => "GET $url failed: " . $res->status_line);
40         return;
41     }
42
43     my $content = decode_content($res);
44     my $title   = extract_title($content);
45
46     my $feed = Plagger::Feed->new;
47     $feed->title($title);
48     $feed->link($url);
49
50     my $re = $args->{match};
51
52     my %seen;
53     my $parser = HTML::TokeParser->new(\$content);
54     while (my $token = $parser->get_tag('a')) {
55         next unless $token->[0] eq 'S' || $token->[1]->{href} =~ /$re/;
56
57         my $text = $parser->get_trimmed_text('/a');
58         next if !$text || $text eq '[IMG]';
59
60         my $url = URI->new_abs($token->[1]->{href}, $url);
61         next if $seen{$url->as_string}++;
62
63         my $entry = Plagger::Entry->new;
64         $entry->title($text);
65         $entry->link($url);
66         $feed->add_entry($entry);
67
68         $context->log(debug => "Add $token->[1]->{href} ($text)");
69     }
70
71     $context->update->add($feed);
72
73     return 1;
74 }
75
76 1;
77
78 __END__
79
80 =head1 NAME
81
82 Plagger::Plugin::CustomFeed::Simple - Simple way to create title and link only custom feeds
83
84 =head1 SYNOPSIS
85
86   - module: Subscription::Config
87     config:
88       feed:
89         - url: http://sportsnavi.yahoo.co.jp/index.html
90           meta:
91             follow_link: /headlines/
92
93   - module: CustomFeed::Simple
94
95 =head1 DESCRIPTION
96
97
98 =head1 AUTHOR
99
100 Tatsuhiko Miyagawa
101
102 =head1 SEE ALSO
103
104 L<Plagger>
105
106 =cut
107
108
109
110 1;
Note: See TracBrowser for help on using the browser.