package Plagger::Plugin::CustomFeed::2chSearch;
use strict;
use base qw( Plagger::Plugin );
use Encode;
use HTML::Entities;
use Plagger::UserAgent;
use Plagger::Util qw( decode_content );
sub register {
my($self, $context) = @_;
$context->register_hook(
$self,
'customfeed.handle' => \&handle,
);
}
sub handle {
my($self, $context, $args) = @_;
if ($args->{feed}->url =~ m!^http://find\.2ch\.net/index\.php\?.*TYPE=BODY!) {
$self->aggregate($context, $args);
return 1;
}
return;
}
sub aggregate {
my($self, $context, $args) = @_;
my $url = $args->{feed}->url;
$context->log(info => "GET $url");
my $agent = Plagger::UserAgent->new;
my $res = $agent->fetch($url, $self, { NoNetwork => 60 * 60 });
if (!$res->status && $res->is_error) {
$context->log(error => "GET $url failed: " . $res->status);
return;
}
my $content = decode_content($res);
my %query = URI->new($url)->query_form;
my $query = decode("euc-jp", $query{STR});
my $feed = $args->{feed};
$feed->title( decode("utf-8", "2ch 検索: ") . $query );
$feed->link($url);
my $re = decode('utf-8', <<'RE');
(.*?) \((\d+)\) - .*? - @.*?(.*?)
.*?鯖 / 最新:(\d{4}/\d\d/\d\d \d\d:\d\d) - .*?
RE
$content =~ s/\r\n/\n/g;
my @matches;
my @keys = qw( link title count body date );
my $date_format = "%Y/%m/%d %H:%M";
while ($content =~ /$re/gs) {
my $data;
@{$data}{@keys} = ($1, $2, $3, $4, $5);
$data->{date} = Plagger::Date->strptime($date_format, $data->{date});
$data->{date}->set_time_zone('Asia/Tokyo'); # set floating datetime
$data->{date}->set_time_zone(Plagger->context->conf->{timezone} || 'local');
$self->find_entry($data, $agent, $query);
my $entry = Plagger::Entry->new;
$entry->title($data->{title});
$entry->link( URI->new_abs($data->{link}, $url) );
$entry->date($data->{date});
$entry->body( munge_body($data->{body}) );
$feed->add_entry($entry);
}
$context->update->add($feed);
}
# mess with 2ch dat to find the actual entry, Ugggh
sub find_entry {
my($self, $data, $agent, $query) = @_;
# http://pc7.2ch.net/test/read.cgi/mac/1149563958/1-100
# => http://pc7.2ch.net/mac/dat/1149563958.dat
my($server, $board, $thread, $from, $to) =
$data->{link} =~ m!^http://(\w+)\.2ch\.net/test/read\.cgi/([^/]+)/(\d+)/(\d+)-(\d+)!;
my $dat = "http://$server.2ch.net/$board/dat/$thread.dat";
Plagger->context->log(debug => "GET $dat to find true entry link");
my $res = $agent->fetch($dat, $self);
if (!$res->status && $res->is_error) {
Plagger->context->log(error => "GET $dat failed: " . $res->status_code);
return;
}
my $content = decode('shift_jis', $res->content);
my @lines = split /\r?\n/, $content;
# if it links to 101-200, search from 200 to 101 to find the newest one
for my $id ( reverse ($from .. $to) ) {
my $line = $lines[$id-1] or next;
my @data = split /<>/, $line;
if ($data[3] =~ /$query/i) {
Plagger->context->log(info => "found entry on $id");
# xxx I could update other metadata, but leave it for EntryFullText ...
$data->{link} = "http://$server.2ch.net/test/read.cgi/$board/$thread/$id";
if ($data[2] =~ m!^(\d{4}/\d\d/\d\d)\(.*?\) (\d\d:\d\d:\d\d)!) {
$data->{date} = Plagger::Date->strptime("%Y/%m/%d %H:%M:%S", "$1 $2");
$data->{date}->set_time_zone('Asia/Tokyo'); # set floating datetime
$data->{date}->set_time_zone(Plagger->context->conf->{timezone} || 'local');
}
return;
}
}
}
sub munge_body {
my $body = shift;
$body =~ s!(.*?)!$1!g;
decode_entities($body);
}
1;
__END__
=head1 NAME
Plagger::Plugin::CustomFeed::2chSearch - Custom feed for 2ch Search with Moritapo
=head1 SYNOPSIS
global:
user_agent:
cookies: /path/to/cookies.txt
plugins:
- module: Subscription::Config
config:
feed:
- http://find.2ch.net/index.php?BBS=2ch&TYPE=BODY&STR=Plagger&COUNT=10
- module: CustomFeed::2chSearch
=head1 DESCRIPTION
This plugin creates a custom feed off of 2ch search
L. Since 2ch search requires Moritapo to search
by fulltext, this plugin also requires a valid login cookie set to
global I config.
=head1 FREQUENCY FOR SEARCHES
By default, this plugin doesn't search more than once in an hour by
default, to save your money (Moritapo). If you want to reduce search
frequency more (like once in a day), consider using
L to trigger Subscription::Config for it.
=head1 AUTHOR
Tatsuhiko Miyagawa
=head1 SEE ALSO
L, L
=cut