Changeset 595

Show
Ignore:
Timestamp:
04/15/06 17:34:57
Author:
miyagawa
Message:

StripRSSAd now loads pattern files from assets dir. No need to update code anymore!
Fixes #179

Files:

Legend:

Unmodified
Added
Removed
Modified
Copied
Moved
  • trunk/plagger/lib/Plagger/Plugin/Filter/StripRSSAd.pm

    r594 r595  
    22use strict; 
    33use base qw( Plagger::Plugin ); 
     4 
     5use DirHandle; 
    46 
    57sub init { 
     
    79    $self->SUPER::init(@_); 
    810    Plagger->context->autoload_plugin('Filter::BloglinesLinkAttrStripper'); 
     11    $self->load_patterns(); 
     12} 
     13 
     14sub load_patterns { 
     15    my $self = shift; 
     16 
     17    my $dir = $self->assets_dir; 
     18    my $dh = DirHandle->new($dir) or Plagger->context->error("$dir: $!"); 
     19    for my $file (grep -f $_->[0] && $_->[1] =~ /^[\w\-]+$/, 
     20                  map [ File::Spec->catfile($dir, $_), $_ ], sort $dh->read) { 
     21        $self->load_pattern(@$file); 
     22    } 
     23} 
     24 
     25sub load_pattern { 
     26    my($self, $file, $base) = @_; 
     27 
     28    Plagger->context->log(debug => "loading $file"); 
     29 
     30    open my $fh, $file or Plagger->context->error("$file: $!"); 
     31    my $re = join '', <$fh>; 
     32    chomp($re); 
     33 
     34    push @{$self->{pattern}}, { site => $base, re => qr/$re/ }; 
    935} 
    1036 
     
    2652    my($self, $body, $link) = @_; 
    2753 
    28     # rssad.jp 
    29     my $count = $body =~ s!<br clear="all" /><a href="http://rss\.rssad\.jp/rss/ad/.*?" target="_blank".*?><img .*? src="http://rss\.rssad\.jp/rss/img/.*?" border="0"/></a><br.*?>!!; 
    30     Plagger->context->log(debug => "Stripped rssad.jp ad on $link") if $count; 
    31  
    32     # plaza.rakuten.co.jp 
    33     $count = $body =~ s!<br clear?=all /><br><SMALL>\n(?:<SCRIPT LANGUAGE="Javascript">\n<\!--\nfunction random\(\).*?infoseek.*?RssPlaza.*</SCRIPT>)?\n<NOSCRIPT>.*?infoseek.*?RssPlaza.*?</NOSCRIPT>\n</SMALL>!!s; 
    34     Plagger->context->log(debug => "Stripped plaza.rakuten ad on $link") if $count; 
    35  
    36     # Google AdSense for Feeds 
    37     $count = $body =~ s!<p><map name="google_ad_map_\d+\-\d+"><area.*?></map><img usemap="#google_ad_map_\d+-\d+" border="0" src="http://imageads\.googleadservices\.com/pagead/ads\?.*?" /></p>!!; 
    38  
    39     # Google AdSense for Feeds, part 2. 
    40     $count += $body =~ s!<table [^>]*>\n\s*(?:<tr>\n\s*<td><(?:defanged-)?span[^>]*> <br[^>]*></(?:defanged-)?span></td>\n\s*</tr>\s*\n\s*)?<tr>\n\s*<td><a href="http://imageads\.googleadservices\.com/pagead/imgclick/[^"]*"[^>]*>\n<img [^>]* src="http://imageads\.googleadservices\.com/pagead/ads\?[^"]*" / ?></a></td>\n\s*</tr>\n\s*<tr>\n\s*<td><div align="right">(?:<font [^>]*>)?<a href="http://www\.google\.com/ads_by_google\.html" [^>]*>Ads by Google</a>(?:</font>)?</div></td>\n\s*</tr>\n\s*</table>!!s; 
    41  
    42     Plagger->context->log(debug => "Stripped Google AdSense for feeds on $link") if $count; 
    43  
    44     # Pheedo ads 
    45     $count = $body =~ s!<br /><br />\n<a href="http://www\.pheedo\.com/click\.phdo\?feedUrl=.*?"*?><img border="0" src="http://www\.pheedo\.com/img\.phdo\?feedUrl=.*?" /></a>!!; 
    46     Plagger->context->log(debug => "Stripped Pheedo Ads on $link") if $count; 
    47  
    48     # FeedBurner ads 
    49     $count = $body =~ s!<p><a href="http://feeds\.feedburner\.(?:com|jp)/~a/[\w/]+\?a=\w+"[^>]*><img src="http://feeds\.feedburner\.(?:com|jp)/~a/[\w/]+\?i=\w+" border="0"></img></a></p>!!; 
    50     Plagger->context->log(debug => "Stripped FeedBurner Ads on $link") if $count; 
    51  
    52     # seesaa.net affiliate link 
    53     $count = $body =~ s!<a href="http://www\.seesaa\.jp/afr\.pl\?.*?"[^>]*class="affiliate-link"[^>]*>([^<]+)</a>!$1!g; 
    54     Plagger->context->log(debug => "Stripped Seesaa Ads on $link") if $count; 
    55  
    56     # NPR valueclick ads 
    57     $count = $body =~ s!<p>\s*<a href="http://ads\.npr\.valueclick\.net/redirect\?host=hs.*?" target="_top">\s*<img border="0" .*? src="http://ads\.npr\.valueclick\.net/cycle\?host=hs.*?" />\s*</a>!!g; 
    58     Plagger->context->log(debug => "Stripped valueclick ads on $link") if $count; 
     54    for my $pattern (@{ $self->{pattern} }) { 
     55        my $re = $pattern->{re}; 
     56        if (my $count = $body =~ s!$re!!g) { 
     57            Plagger->context->log(debug => "Stripped $pattern->{site} Ad on $link"); 
     58        } 
     59    } 
    5960 
    6061    $body;