| | 11 | $self->load_patterns(); |
|---|
| | 12 | } |
|---|
| | 13 | |
|---|
| | 14 | sub load_patterns { |
|---|
| | 15 | my $self = shift; |
|---|
| | 16 | |
|---|
| | 17 | my $dir = $self->assets_dir; |
|---|
| | 18 | my $dh = DirHandle->new($dir) or Plagger->context->error("$dir: $!"); |
|---|
| | 19 | for my $file (grep -f $_->[0] && $_->[1] =~ /^[\w\-]+$/, |
|---|
| | 20 | map [ File::Spec->catfile($dir, $_), $_ ], sort $dh->read) { |
|---|
| | 21 | $self->load_pattern(@$file); |
|---|
| | 22 | } |
|---|
| | 23 | } |
|---|
| | 24 | |
|---|
| | 25 | sub load_pattern { |
|---|
| | 26 | my($self, $file, $base) = @_; |
|---|
| | 27 | |
|---|
| | 28 | Plagger->context->log(debug => "loading $file"); |
|---|
| | 29 | |
|---|
| | 30 | open my $fh, $file or Plagger->context->error("$file: $!"); |
|---|
| | 31 | my $re = join '', <$fh>; |
|---|
| | 32 | chomp($re); |
|---|
| | 33 | |
|---|
| | 34 | push @{$self->{pattern}}, { site => $base, re => qr/$re/ }; |
|---|
| 28 | | # rssad.jp |
|---|
| 29 | | my $count = $body =~ s!<br clear="all" /><a href="http://rss\.rssad\.jp/rss/ad/.*?" target="_blank".*?><img .*? src="http://rss\.rssad\.jp/rss/img/.*?" border="0"/></a><br.*?>!!; |
|---|
| 30 | | Plagger->context->log(debug => "Stripped rssad.jp ad on $link") if $count; |
|---|
| 31 | | |
|---|
| 32 | | # plaza.rakuten.co.jp |
|---|
| 33 | | $count = $body =~ s!<br clear?=all /><br><SMALL>\n(?:<SCRIPT LANGUAGE="Javascript">\n<\!--\nfunction random\(\).*?infoseek.*?RssPlaza.*</SCRIPT>)?\n<NOSCRIPT>.*?infoseek.*?RssPlaza.*?</NOSCRIPT>\n</SMALL>!!s; |
|---|
| 34 | | Plagger->context->log(debug => "Stripped plaza.rakuten ad on $link") if $count; |
|---|
| 35 | | |
|---|
| 36 | | # Google AdSense for Feeds |
|---|
| 37 | | $count = $body =~ s!<p><map name="google_ad_map_\d+\-\d+"><area.*?></map><img usemap="#google_ad_map_\d+-\d+" border="0" src="http://imageads\.googleadservices\.com/pagead/ads\?.*?" /></p>!!; |
|---|
| 38 | | |
|---|
| 39 | | # Google AdSense for Feeds, part 2. |
|---|
| 40 | | $count += $body =~ s!<table [^>]*>\n\s*(?:<tr>\n\s*<td><(?:defanged-)?span[^>]*> <br[^>]*></(?:defanged-)?span></td>\n\s*</tr>\s*\n\s*)?<tr>\n\s*<td><a href="http://imageads\.googleadservices\.com/pagead/imgclick/[^"]*"[^>]*>\n<img [^>]* src="http://imageads\.googleadservices\.com/pagead/ads\?[^"]*" / ?></a></td>\n\s*</tr>\n\s*<tr>\n\s*<td><div align="right">(?:<font [^>]*>)?<a href="http://www\.google\.com/ads_by_google\.html" [^>]*>Ads by Google</a>(?:</font>)?</div></td>\n\s*</tr>\n\s*</table>!!s; |
|---|
| 41 | | |
|---|
| 42 | | Plagger->context->log(debug => "Stripped Google AdSense for feeds on $link") if $count; |
|---|
| 43 | | |
|---|
| 44 | | # Pheedo ads |
|---|
| 45 | | $count = $body =~ s!<br /><br />\n<a href="http://www\.pheedo\.com/click\.phdo\?feedUrl=.*?"*?><img border="0" src="http://www\.pheedo\.com/img\.phdo\?feedUrl=.*?" /></a>!!; |
|---|
| 46 | | Plagger->context->log(debug => "Stripped Pheedo Ads on $link") if $count; |
|---|
| 47 | | |
|---|
| 48 | | # FeedBurner ads |
|---|
| 49 | | $count = $body =~ s!<p><a href="http://feeds\.feedburner\.(?:com|jp)/~a/[\w/]+\?a=\w+"[^>]*><img src="http://feeds\.feedburner\.(?:com|jp)/~a/[\w/]+\?i=\w+" border="0"></img></a></p>!!; |
|---|
| 50 | | Plagger->context->log(debug => "Stripped FeedBurner Ads on $link") if $count; |
|---|
| 51 | | |
|---|
| 52 | | # seesaa.net affiliate link |
|---|
| 53 | | $count = $body =~ s!<a href="http://www\.seesaa\.jp/afr\.pl\?.*?"[^>]*class="affiliate-link"[^>]*>([^<]+)</a>!$1!g; |
|---|
| 54 | | Plagger->context->log(debug => "Stripped Seesaa Ads on $link") if $count; |
|---|
| 55 | | |
|---|
| 56 | | # NPR valueclick ads |
|---|
| 57 | | $count = $body =~ s!<p>\s*<a href="http://ads\.npr\.valueclick\.net/redirect\?host=hs.*?" target="_top">\s*<img border="0" .*? src="http://ads\.npr\.valueclick\.net/cycle\?host=hs.*?" />\s*</a>!!g; |
|---|
| 58 | | Plagger->context->log(debug => "Stripped valueclick ads on $link") if $count; |
|---|
| | 54 | for my $pattern (@{ $self->{pattern} }) { |
|---|
| | 55 | my $re = $pattern->{re}; |
|---|
| | 56 | if (my $count = $body =~ s!$re!!g) { |
|---|
| | 57 | Plagger->context->log(debug => "Stripped $pattern->{site} Ad on $link"); |
|---|
| | 58 | } |
|---|
| | 59 | } |
|---|