| 1 |
package Plagger::Plugin::Filter::2chRSSContent; |
|---|
| 2 |
use strict; |
|---|
| 3 |
use base qw( Plagger::Plugin ); |
|---|
| 4 |
|
|---|
| 5 |
sub register { |
|---|
| 6 |
my($self, $context) = @_; |
|---|
| 7 |
$context->register_hook( |
|---|
| 8 |
$self, |
|---|
| 9 |
'update.entry.fixup' => \&filter, |
|---|
| 10 |
); |
|---|
| 11 |
} |
|---|
| 12 |
|
|---|
| 13 |
sub filter { |
|---|
| 14 |
my($self, $context, $args) = @_; |
|---|
| 15 |
|
|---|
| 16 |
return unless $args->{entry}->link =~ m!\.2ch\.net/test/read\.cgi|rss\.s2ch\.net/test(\.cgi)?/\-/!; |
|---|
| 17 |
|
|---|
| 18 |
my $body = $args->{entry}->body; |
|---|
| 19 |
if ($body && $body =~ s!^([^:]*):(\d{4}/\d\d/\d\d)\(.*?\) (\d\d:\d\d:\d\d)(?:\.\d\d)? (ID:\S+)? ?!!) { |
|---|
| 20 |
my($from, $day, $time, $id) = ($1, $2, $3, $4); |
|---|
| 21 |
my $date = Plagger::Date->strptime('%Y/%m/%d %H:%M:%S', "$day $time"); |
|---|
| 22 |
$date->set_time_zone('Asia/Tokyo'); |
|---|
| 23 |
|
|---|
| 24 |
$context->log(info => "Normalize 2ch rss body $id on $date"); |
|---|
| 25 |
|
|---|
| 26 |
$args->{entry}->date($date); |
|---|
| 27 |
$args->{entry}->author( $from ? "$from $id" : $id ); |
|---|
| 28 |
$args->{entry}->body($body); |
|---|
| 29 |
} elsif ($args->{entry}->title =~ /^\d+\-$/ |
|---|
| 30 |
|| ($body && $body =~ m!http://www\.2ch\.net/ad\.html *powerd by Big-Server\.!)) { |
|---|
| 31 |
$context->log(info => "Strip 2ch bogus entry " . $args->{entry}->title); |
|---|
| 32 |
$args->{feed}->delete_entry($args->{entry}); |
|---|
| 33 |
} |
|---|
| 34 |
} |
|---|
| 35 |
|
|---|
| 36 |
1; |
|---|
| 37 |
|
|---|
| 38 |
__END__ |
|---|
| 39 |
|
|---|
| 40 |
=head1 NAME |
|---|
| 41 |
|
|---|
| 42 |
Plagger::Plugin::Filter::2chRSSContent - Normalize 2ch RSS content body |
|---|
| 43 |
|
|---|
| 44 |
=head1 SYNOPSIS |
|---|
| 45 |
|
|---|
| 46 |
- module: Filter::2chRSSContent |
|---|
| 47 |
|
|---|
| 48 |
=head1 DESCRIPTION |
|---|
| 49 |
|
|---|
| 50 |
This plugin fixes 2ch RSS content body to correctly handle date per |
|---|
| 51 |
item, set ID: to author and strips bogus links. |
|---|
| 52 |
|
|---|
| 53 |
=head1 AUTHOR |
|---|
| 54 |
|
|---|
| 55 |
Tatsuhiko Miyagawa |
|---|
| 56 |
|
|---|
| 57 |
=head1 SEE ALSO |
|---|
| 58 |
|
|---|
| 59 |
L<Plagger>, L<Plagger::Plugin::Filter::StripRSSAd> |
|---|
| 60 |
|
|---|
| 61 |
=cut |
|---|