root/trunk/plagger/lib/Plagger/Plugin/CustomFeed/Frepa/FriendDiary.pm

Revision 1712 (checked in by nipotan, 2 years ago)

treated for site renewal of frepa

Line 
1 package Plagger::Plugin::CustomFeed::Frepa::FriendDiary;
2 use strict;
3 use warnings;
4 use HTML::Entities;
5 use Encode;
6
7 sub title { 'フレ友の日記' }
8
9 sub start_url { 'http://www.frepa.livedoor.com/home/friend_blog/' }
10
11 sub get_list {
12     my ($self, $mech) = @_;
13
14     my @msgs = ();
15     my $res = $mech->get($self->start_url);
16     return @msgs unless $mech->success;
17
18     my $html = decode('euc-jp', $mech->content);
19     my $reg  = decode('utf-8', $self->_list_regexp());
20     while ($html =~ m|$reg|igs) {
21         my $time = "$1/$2/$3 $4:$5";
22         my ($link, $subject, $user_link, $name) =
23             (decode_entities($6), decode_entities($7), decode_entities($8), decode_entities($9));
24
25         push(@msgs, +{
26             link      => $link,
27             subject   => $subject,
28             name      => $name,
29             user_link => $user_link,
30             time      => $time,
31         });
32     }
33     return @msgs;
34 }
35
36 sub get_detail {
37     my ($self, $link, $mech) = @_;
38
39     my $item = {};
40     my $res = $mech->get($link);
41     return $item unless $mech->success;
42
43     my $html = decode('euc-jp', $mech->content);
44     my $reg  = decode('utf-8', $self->_detail_regexp);
45     if ($html =~ m|$reg|is) {
46         $item = +{ subject => $6, description => $7};
47     }
48
49     return $item;
50 }
51
52 sub _list_regexp {
53     return <<'RE';
54 <tr>
55 <th>(\d\d\d\d)\.(\d\d)\.(\d\d) (\d\d):(\d\d)</th>
56 <td><span class="frepablog">
57 <a href="([^"]+?/blog/show[^"]+?)">(.*?)</a>\(<a href="([^"]+?)"(?: rel="popup")?>([^"]+?)</a>\)</span>.*?
58 RE
59 }
60
61 sub _detail_regexp {
62     return <<'RE';
63 <div class="blogcontainer">
64 <div class="date"><h4>(\d\d\d\d)\.(\d\d)\.(\d\d)<br />(\d\d):(\d\d)</h4></div>
65 <div class="blogbody">
66 \s*<h3>(.*?)</h3>
67 \s*<div class="blogbox">(.*?</p>)</div>
68 \s*</div>
69 <div class="brclear"></div>
70 RE
71 }
72
73 1;
Note: See TracBrowser for help on using the browser.