root/branches/feature-server/plagger/lib/Plagger/Plugin/Filter/StripRSSAd.pm

Revision 856 (checked in by miyagawa, 4 years ago)

merge from trunk to plagger-server for Enclosures support and such. Sorry for the big commit

  • Property svn:keywords set to Id Revision
Line 
1 package Plagger::Plugin::Filter::StripRSSAd;
2 use strict;
3 use base qw( Plagger::Plugin );
4
5 use DirHandle;
6
7 sub init {
8     my $self = shift;
9     $self->SUPER::init(@_);
10     Plagger->context->autoload_plugin('Filter::BloglinesContentNormalize');
11     $self->load_patterns();
12 }
13
14 sub load_patterns {
15     my $self = shift;
16
17     my $dir = $self->assets_dir;
18     my $dh = DirHandle->new($dir) or Plagger->context->error("$dir: $!");
19     for my $file (grep -f $_->[0] && $_->[1] =~ /^[\w\-\.]+$/,
20                   map [ File::Spec->catfile($dir, $_), $_ ], sort $dh->read) {
21         $self->load_pattern(@$file);
22     }
23 }
24
25 sub load_pattern {
26     my($self, $file, $base) = @_;
27
28     Plagger->context->log(debug => "loading $file");
29
30     if ($file =~ /\.yaml$/) {
31         $self->load_yaml($file, $base);
32     } else {
33         $self->load_regexp($file, $base);
34     }
35 }
36
37 sub load_regexp {
38     my($self, $file, $base) = @_;
39
40     open my $fh, $file or Plagger->context->error("$file: $!");
41     my $re = join '', <$fh>;
42     chomp($re);
43
44     push @{$self->{pattern}}, { site => $base, re => qr/$re/ };
45 }
46
47 sub load_yaml {
48     my($self, $file, $base) = @_;
49
50     my $pattern = eval { YAML::LoadFile($file) }
51         or Plagger->context->error("$file: $@");
52
53     push @{$self->{pattern}}, { site => $base, %$pattern };
54 }
55
56 sub register {
57     my($self, $context) = @_;
58     $context->register_hook(
59         $self,
60         'update.entry.fixup' => \&update,
61     );
62 }
63
64 sub update {
65     my($self, $context, $args) = @_;
66     my $body = $args->{entry}->body;
67
68     for my $pattern (@{ $self->{pattern} }) {
69         if (my $re = $pattern->{re}) {
70             if (my $count = $body =~ s!$re!defined($1) ? $1 : ''!egs) {
71                 Plagger->context->log(info => "Stripped $pattern->{site} Ad on " . $args->{entry}->link);
72             }
73         } elsif (my $cond = $pattern->{condition}) {
74             local $args->{body} = $body;
75             if (eval $cond && $pattern->{strip}) {
76                 $args->{feed}->delete_entry($args->{entry});
77                 Plagger->context->log(info => "Stripped Ad entry " . $args->{entry}->link);
78             } elsif ($@) {
79                 Plagger->context->log(error => "Error evaluating $cond: $@");
80             }
81         }
82     }
83
84     $args->{entry}->body($body);
85 }
86
87 1;
88
89 __END__
90
91 =head1 NAME
92
93 Plagger::Plugin::Filter::StripRSSAd - Strip RSS Ads from feed content
94
95 =head1 SYNOPSIS
96
97   - module: Filter::StripRSSAd
98
99 =head1 DESCRIPTION
100
101 This plugin strips RSS context based ads from feed content, like
102 Google AdSense or rssad.jp. It uses quick regular expression to strip
103 the images and map tags.
104
105 =head1 AUTHOR
106
107 Tatsuhiko Miyagawa, Masahiro Nagano
108
109 =head1 SEE ALSO
110
111 L<Plagger>
112
113 =cut
Note: See TracBrowser for help on using the browser.