Changeset 458
- Timestamp:
- 04/01/06 18:30:48
- trunk/plagger (modified) (previous)
- trunk/plagger/lib/Plagger/Plugin/CustomFeed/Frepa (added)
- trunk/plagger/lib/Plagger/Plugin/CustomFeed/Frepa.pm (modified) (11 diffs)
- trunk/plagger/lib/Plagger/Plugin/CustomFeed/Frepa/FriendDiary.pm (added)
- trunk/plagger/lib/Plagger/Plugin/CustomFeed/Frepa/FriendStatus.pm (added)
- trunk/plagger/lib/Plagger/Plugin/CustomFeed/Frepa/RecentComment.pm (added)
Legend:
- Unmodified
- Added
- Removed
- Modified
- Copied
- Moved
trunk/plagger/lib/Plagger/Plugin/CustomFeed/Frepa.pm
r447 r458 6 6 use Encode; 7 7 use Time::HiRes; 8 use UNIVERSAL::require; 9 use WWW::Mechanize; 8 10 9 11 sub plugin_id { … … 13 15 14 16 sub register { 15 my ($self, $context) = @_;17 my ($self, $context) = @_; 16 18 $context->register_hook( 17 19 $self, … … 21 23 22 24 sub load { 23 my($self, $context) = @_; 24 $self->{frepa} = Plagger::Plugin::CustomFeed::Frepa::Mechanize->new($self); 25 my ($self, $context) = @_; 26 27 $self->{mech} = WWW::Mechanize->new(cookie_jar => $self->cache->cookie_jar); # enbug??? 28 $self->{mech}->agent_alias( "Windows IE 6" ); 25 29 26 30 my $feed = Plagger::Feed->new; … … 30 34 31 35 sub aggregate { 32 my ($self, $context, $args) = @_;33 34 unless ($self-> {frepa}->login) {35 $context->log(error => "Login to frepa failed.");36 my ($self, $context, $args) = @_; 37 38 unless ($self->login(livedoor_id => $self->conf->{livedoor_id}, password => $self->conf->{password})) { 39 $context->log(error => "Login to frepa failed."); 36 40 return; 37 41 } 38 42 39 43 $context->log(info => 'Login to frepa succeeded.'); 44 45 my $feed_type = $self->conf->{feed_type} or $context->error("feed_type is missing"); 46 for my $plugin (@$feed_type) { 47 my $plugin = (ref $self || $self) . "::$plugin"; 48 $plugin->use or $context->error($@); 49 $self->aggregate_by_plugin($context, $plugin, $args); 50 } 51 } 52 53 sub aggregate_by_plugin { 54 my ($self, $context, $plugin, $args) = @_; 55 40 56 41 57 my $feed = Plagger::Feed->new; 42 58 $feed->type('frepa'); 43 $feed->title( 'フレパ最新日記');44 $feed->link( 'http://frepa.jp/home/friend_blog/');59 $feed->title($plugin->title); 60 $feed->link($plugin->start_url); 45 61 46 62 my $format = DateTime::Format::Strptime->new(pattern => '%Y/%m/%d %H:%M'); 47 63 48 my @msgs = $ self->{frepa}->get_new_friend_diary;64 my @msgs = $plugin->get_list($self->{mech}); 49 65 my $items = $self->conf->{fetch_items} || 20; 50 66 … … 55 71 56 72 my $entry = Plagger::Entry->new; 57 $entry->title( decode('euc-jp', $msg->{subject}));73 $entry->title($msg->{subject}); 58 74 $entry->link($msg->{link}); 59 $entry->author( decode('euc-jp', $msg->{name}));75 $entry->author($msg->{name}); 60 76 $entry->date( Plagger::Date->parse($format, $msg->{time}) ); 61 77 62 if ($self->conf->{fetch_body} && !$blocked ) {78 if ($self->conf->{fetch_body} && !$blocked and $plugin->can('get_detail')) { 63 79 $context->log(info => "Fetch body from $msg->{link}"); 64 80 my $item = $self->cache->get_callback( … … 66 82 sub { 67 83 Time::HiRes::sleep( $self->conf->{fetch_body_interval} || 1.5 ); 68 $ self->{frepa}->get_view_diary($msg->{link});84 $plugin->get_detail($msg->{link}, $self->{mech}); 69 85 }, 70 86 "1 hour", 71 87 ); 72 88 if ($item) { 73 my $body = decode('euc-jp', $item->{description});89 my $body = $item->{description}; 74 90 $body =~ s!<br>!<br />!g; 75 91 $entry->body($body); 76 $entry->title( decode('euc-jp', $item->{subject})); # replace with full title92 $entry->title($item->{subject}); # replace with full title 77 93 } else { 78 94 $context->log(warn => "Fetch body failed. You might be blocked?"); … … 85 101 if ($item && $item->{image} !~ /no_photo/) { 86 102 $entry->icon({ 87 title => decode('euc-jp', $item->{name}),103 title => $item->{name}, 88 104 url => $item->{image}, 89 105 link => $msg->{user_link}, … … 104 120 $self->cache->get_callback( 105 121 "icon-$url", 106 sub { $self-> {frepa}->get_top($url) },122 sub { $self->get_top($url) }, 107 123 '1 day', 108 124 ); 109 125 } 110 126 111 package Plagger::Plugin::CustomFeed::Frepa::Mechanize;112 113 use strict;114 use WWW::Mechanize;115 116 sub new {117 my $class = shift;118 my $plugin = shift;119 120 my $mech = WWW::Mechanize->new(cookie_jar => $plugin->cache->cookie_jar);121 $mech->agent_alias( "Windows IE 6" );122 123 bless {124 mecha => $mech,125 livedoor_id => $plugin->conf->{livedoor_id},126 password => $plugin->conf->{password},127 start_url => 'http://www.frepa.livedoor.com/',128 }, $class;129 }130 131 127 sub login { 132 128 my $self = shift; 133 134 my $res = $self->{mecha}->get($self->{start_url}); 135 return 0 unless $self->{mecha}->success; 136 137 if ($self->{mecha}->content =~ /loginside/) { 129 my %args = @_; 130 131 my $start_url = 'http://www.frepa.livedoor.com/'; 132 my $res = $self->{mech}->get($start_url); 133 return 0 unless $self->{mech}->success; 134 135 if ($self->{mech}->content =~ /loginside/) { 138 136 Plagger->context->log(debug => "cookie not found. logging in"); 139 $self->{mech a}->submit_form(137 $self->{mech}->submit_form( 140 138 fields => { 141 livedoor_id => $ self->{livedoor_id},142 password => $ self->{password},139 livedoor_id => $args{livedoor_id}, 140 password => $args{password}, 143 141 auto_login => 'on', 144 142 }, 145 143 ); 146 $self->{mech a}->submit;147 return 0 unless $self->{mech a}->success;148 return 0 if $self->{mech a}->content =~ /loginside/;144 $self->{mech}->submit; 145 return 0 unless $self->{mech}->success; 146 return 0 if $self->{mech}->content =~ /loginside/; 149 147 } 150 148 151 149 return 1; 152 }153 154 sub get_new_friend_diary {155 my $self = shift;156 157 my @msgs = ();158 my $res = $self->{mecha}->follow_link(url_regex => qr{/friend_blog/});159 return @msgs unless $self->{mecha}->success;160 161 my $html = $self->{mecha}->content;162 my $reg = $self->list_regexp();163 while ($html =~ m|$reg|igs) {164 my $time = "$1/$2/$3 $4:$5";165 my ($link, $subject, $user_link, $name) =166 ($self->unescape($6), $self->unescape($7), $self->unescape($8), $self->unescape($9));167 168 push(@msgs, +{169 link => $link,170 subject => $subject,171 name => $name,172 user_link => $user_link,173 time => $time,174 });175 }176 return @msgs;177 }178 179 sub get_view_diary {180 my $self = shift;181 my $link = shift;182 183 my $item = {};184 my $res = $self->{mecha}->get($link);185 return $item unless $self->{mecha}->success;186 187 my $html = $self->{mecha}->content;188 my $reg = $self->detail_regexp();189 if ($html =~ m|$reg|is) {190 $item = +{ subject => $6, description => $7};191 }192 193 return $item;194 150 } 195 151 … … 199 155 200 156 my $item = {}; 201 my $res = $self->{mech a}->get($link);202 return $item unless $self->{mech a}->success;203 204 my $html = $self->{mecha}->content;205 206 chomp( my $re = $self->top_re);157 my $res = $self->{mech}->get($link); 158 return $item unless $self->{mech}->success; 159 160 my $html = decode('euc-jp', $self->{mech}->content); 161 162 chomp( my $re = decode('utf-8', $self->top_re) ); 207 163 if ($html =~ /$re/s) { 208 164 $item->{image} = $1; … … 211 167 212 168 return $item; 213 }214 215 sub unescape {216 my $self = shift;217 my $str = shift;218 my %unescaped = ('amp' => '&', 'quot' => '"', 'gt' => '>', 'lt' => '<', 'nbsp' => ' ', 'apos' => "'", 'copy' => '(c)');219 my $re_target = join('|', keys(%unescaped));220 $str =~ s/&($re_target|#x([0-9a-z]+));/defined($unescaped{$1}) ? $unescaped{$1} : defined($2) ? chr(hex($2)) : "&$1;"/ige;221 return $str;222 }223 224 sub list_regexp {225 return <<'RE';226 <tr class="bgwhite">227 <td width="1%" style="padding:5px 30px;" nowrap><small>(\d\d\d\d)\.(\d\d)\.(\d\d) (\d\d):(\d\d)</small></td>228 <td width="99%"><img src="/img/icon/diary_fp.gif" border="0" alt=".*?" title=".*?">229 <small>230 231 232 233 <a href="([^"]+?/blog/show[^"]+?)">(.*?)</a>.*?234 <a href="([^"]+?)"(?: rel="popup")?>([^"]+?)</a>.*?235 RE236 }237 238 sub detail_regexp {239 return <<'RE';240 <td width="105" valign="top" rowspan="3" class="bg2 blogline1" nowrap><small>(\d\d\d\d)\.(\d\d)\.(\d\d)<br>(\d\d):(\d\d)</small></td>241 <td width="445" class="bg2 blogline3 blogcell"><small><strong>(.*?)</strong></small></td>242 </tr>243 <tr>244 <td class="bgwhite blogline2" style="line-height:115%;border-bottom:1px solid #fff;"><small>(.*?)</small></td>245 </tr>246 247 </table>248 RE249 ;250 169 } 251 170 … … 275 194 fetch_body: 1 276 195 show_icon: 1 196 feed_type: 197 - FriendStatus 198 - RecentComment 277 199 278 200 =head1 DESCRIPTION
