root/trunk/plagger/lib/Plagger/Plugin/CustomFeed/MixiScraper.pm

Revision 2062 (checked in by charsbar, 1 year ago)

::CustomFeed?::MixiScraper?: add ' 00:00' before parsing datetime if necessary (spotted by id:mad-capone)

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
Line 
1 package Plagger::Plugin::CustomFeed::MixiScraper;
2 use strict;
3 use base qw( Plagger::Plugin );
4
5 use DateTime::Format::Strptime;
6 use WWW::Mixi::Scraper;
7 use Time::HiRes;
8
9 our $MAP = {
10     FriendDiary => {
11         title      => 'マイミク最新日記',
12         get_list   => 'new_friend_diary',
13         get_detail => 'view_diary',
14         icon       => 'owner_id',
15     },
16     # can't get icon
17     Message => {
18         title      => 'ミクシィメッセージ受信箱',
19         get_list   => 'list_message',
20         get_detail => 'view_message',
21     },
22     # can't get icon & body
23     RecentComment => {
24         title      => 'ミクシィ最近のコメント一覧',
25         get_list   => 'list_comment',
26     },
27     Log => {
28         title      => 'ミクシィ足跡',
29         get_list   => 'show_log',
30         icon       => 'id',
31     },
32     MyDiary => {
33         title      => 'ミクシィ日記',
34         get_list   => 'list_diary',
35         get_detail => 'view_diary',
36         icon       => 'owner_id',
37     },
38     Calendar => {
39         title      => 'ミクシィカレンダー',
40         get_list   => 'show_calendar',
41         get_detail => 'view_event',
42     },
43     BBS => {
44         title      => 'コミュニティ最新書き込み',
45         get_list   => 'new_bbs',
46         get_detail => 'view_bbs',
47     },
48 };
49
50 sub plugin_id {
51     my $self = shift;
52     $self->class_id . '-' . $self->conf->{email};
53 }
54
55 sub register {
56     my($self, $context) = @_;
57     $context->register_hook(
58         $self,
59         'subscription.load' => \&load,
60     );
61 }
62
63 sub load {
64     my($self, $context) = @_;
65
66     my $cookie_jar = $self->cookie_jar;
67     if (ref($cookie_jar) ne 'HTTP::Cookies') {
68         # using foreign cookies = don't have to set email/password. Fake them
69         $self->conf->{email}    ||= 'plagger@localhost';
70         $self->conf->{password} ||= 'pl4gg5r';
71     }
72
73     $self->{mixi} = WWW::Mixi::Scraper->new(
74       email => $self->conf->{email},
75       password => $self->conf->{password},
76       cookie_jar => $cookie_jar,
77       mode => $self->conf->{mode},
78     );
79
80     my $feed = Plagger::Feed->new;
81        $feed->aggregator(sub { $self->aggregate(@_) });
82     $context->subscription->add($feed);
83 }
84
85 sub aggregate {
86     my($self, $context, $args) = @_;
87     for my $type (@{$self->conf->{feed_type} || ['FriendDiary']}) {
88         $context->error("$type not found") unless $MAP->{$type};
89         if ($type eq 'BBS' and $self->conf->{split_bbs_feed}) {
90             $self->aggregate_bbs_feed($context, $type, $args);
91         }
92         else {
93             $self->aggregate_feed($context, $type, $args);
94         }
95     }
96 }
97
98 sub aggregate_feed {
99     my($self, $context, $type, $args) = @_;
100
101     my $feed = Plagger::Feed->new;
102     $feed->type('mixi');
103     $feed->title($MAP->{$type}->{title});
104
105     my $meth = $MAP->{$type}->{get_list};
106     my @msgs = $self->{mixi}->$meth->parse;
107     my $items = $self->conf->{fetch_items} || 20;
108     $self->log(info => 'fetch ' . scalar(@msgs) . ' entries');
109
110     $feed->link($self->{mixi}->{mech}->uri);
111
112     my $i = 0;
113     $self->{blocked} = 0;
114     for my $msg (@msgs) {
115         next if $type eq 'FriendDiary' and $msg->{link}->query_param('url'); # external blog
116         last if $i++ >= $items;
117
118         $self->add_entry( $context, $type, $feed, $msg );
119     }
120
121     $context->update->add($feed);
122 }
123
124 sub aggregate_bbs_feed {
125     my($self, $context, $type, $args) = @_;
126
127     my $meth = $MAP->{$type}->{get_list};
128     my @msgs = $self->{mixi}->$meth->parse;
129     my $items = $self->conf->{fetch_items} || 20;
130     $self->log(info => 'fetch ' . scalar(@msgs) . ' entries');
131
132     my $i = 0;
133     $self->{blocked} = 0;
134     for my $msg (@msgs) {
135         next if $type eq 'FriendDiary' and $msg->{link}->query_param('url'); # external blog
136         last if $i++ >= $items;
137
138         my $feed = Plagger::Feed->new;
139         $feed->type('mixi');
140         (my $subject = $msg->{subject}) =~ s/\(\d+\)$//;
141         (my $link = $msg->{link}) =~ s/&comment_count=\d*//;
142         $feed->title($subject);
143         $feed->description($MAP->{$type}->{title}.': '.$msg->{name});
144         $feed->link($link);
145
146         $self->add_entry( $context, $type, $feed, $msg );
147
148         $context->update->add($feed);
149     }
150 }
151
152 my $format = DateTime::Format::Strptime->new(pattern => '%Y-%m-%d %H:%M');
153
154 sub parse_date {
155     my ($self, $datetime) = @_;
156
157     # Calendar doesn't have %H:%M part (spotted by id:mad-capone)
158     return unless defined $datetime;
159     $datetime .= ' 00:00' unless $datetime =~ /\d+:\d+$/;
160
161     Plagger::Date->parse($format, $datetime);
162 }
163
164 sub add_entry {
165     my ($self, $context, $type, $feed, $msg) = @_;
166
167     if ($type eq 'Log') {
168         $msg->{subject} = $msg->{time} . ' ' . $msg->{name};
169     }
170
171     my $entry = Plagger::Entry->new;
172     $entry->title($msg->{subject});
173     $entry->link($msg->{link});
174     $entry->author($msg->{name});
175     $entry->date( $self->parse_date($msg->{time}) );
176     $entry->date->set_time_zone('Asia/Tokyo') if $entry->date;
177
178     if ($self->conf->{show_icon} && !$self->{blocked} && defined $MAP->{$type}->{icon}) {
179         my $owner_id = $msg->{link}->query_param($MAP->{$type}->{icon});
180         $context->log(info => "Fetch icon of id=$owner_id");
181
182         my $item = $self->cache->get_callback(
183             "outline-$owner_id",
184             sub {
185                 Time::HiRes::sleep( $self->conf->{fetch_body_interval} || 1.5 );
186                 my $item = $self->{mixi}->show_friend->parse(id => $owner_id)->{outline};
187                 $item;
188             },
189             '12 hours',
190         );
191         if ($item && $item->{image} !~ /no_photo/) {
192             # prefer smaller image
193             my $image = $item->{image};
194                $image =~ s/\.jpg$/s.jpg/;
195             $entry->icon({
196                 title => $item->{name},
197                 url   => $image,
198                 link  => $item->{link},
199             });
200         }
201     }
202
203     my @comments;
204     if ($self->conf->{fetch_body} && !$self->{blocked} && $msg->{link} =~ /view_/ && defined $MAP->{$type}->{get_detail}) {
205         # view_enquete is not implemented and probably
206         # won't be implemented as it seems redirected to
207         # reply_enquete
208         return if $msg->{link} =~ /view_enquete/;
209         $context->log(info => "Fetch body from $msg->{link}");
210         my $item = $self->cache->get_callback(
211             "item-".$msg->{link},
212             sub {
213                 Time::HiRes::sleep( $self->conf->{fetch_body_interval} || 1.5 );
214                 my $item = $self->{mixi}->parse($msg->{link});
215                 $item;
216             },
217             '12 hours',
218         );
219         if ($item) {
220             my $body = $item->{description};
221                $body =~ s!(\r\n?|\n)!<br />!g;
222             for my $image (@{ $item->{images} || [] }) {
223                 $body .= qq(<div><a href="$image->{link}"><img src="$image->{thumb_link}" style="border:0" /></a></div>);
224                 my $enclosure = Plagger::Enclosure->new;
225                 $enclosure->url($image->{thumb_link});
226                 $enclosure->auto_set_type;
227                 $enclosure->is_inline(1);
228                 $entry->add_enclosure($enclosure);
229             }
230             $entry->body($body);
231
232             $entry->date( $self->parse_date($item->{time}) );
233             $entry->date->set_time_zone('Asia/Tokyo') if $entry->date;
234             if ($self->conf->{fetch_comment}) {
235               for my $comment (@{ $item->{comments} || [] }) {
236                   my $c = Plagger::Entry->new;
237                      $c->title($entry->title . ': '. $comment->{subject});
238                      $c->body($comment->{description});
239                      $c->link($comment->{link});
240                      $c->author($comment->{name});
241                      $c->date( $self->parse_date($comment->{time}) );
242                      $c->date->set_time_zone('Asia/Tokyo') if $c->date;
243                   push @comments, $c;
244               }
245             }
246         } else {
247             $context->log(warn => "Fetch body failed. You might be blocked?");
248             $self->{blocked}++;
249         }
250     }
251
252     $feed->add_entry($entry);
253     for my $comment ( @comments ) {
254         $feed->add_entry($comment);
255     }
256 }
257
258 1;
259
260 __END__
261
262 =head1 NAME
263
264 Plagger::Plugin::CustomFeed::MixiScraper -  Custom feed for mixi.jp
265
266 =head1 SYNOPSIS
267
268     - module: CustomFeed::MixiScraper
269       config:
270         email: email@example.com
271         password: password
272         fetch_body: 1
273         fetch_comment: 0
274         show_icon: 1
275         feed_type:
276           - RecentComment
277           - FriendDiary
278           - Message
279
280 =head1 DESCRIPTION
281
282 This plugin fetches your friends diary updates from mixi
283 (L<http://mixi.jp/>) and creates a custom feed.
284
285 =head1 CONFIGURATION
286
287 =over 4
288
289 =item email, password
290
291 Credential you need to login to mixi.jp.
292
293 Note that you don't have to supply email and password if you set
294 global cookie_jar in your configuration file and the cookie_jar
295 contains a valid login session there, such as:
296
297   global:
298     user_agent:
299       cookies: /path/to/cookies.txt
300
301 See L<Plagger::Cookies> for details.
302
303 =item fetch_body
304
305 With this option set, this plugin fetches entry body HTML, not just a
306 link to the entry. Defaults to 0.
307
308 =item fetch_comment
309
310 With this option set, this plugin fetches entry's comments as well
311 (meaningless when C<fetch_body> is not set). Defaults to 0.
312
313 =item fetch_body_interval
314
315 With C<fetch_body> option set, your Plagger script is recommended to
316 wait for a little, to avoid mixi.jp throttling. Defaults to 1.5.
317
318 =item show_icon: 1
319
320 With this option set, this plugin fetches users buddy icon from
321 mixi.jp site, which makes the output HTML very user-friendly.
322
323 =item split_bbs_feed
324
325 With this option set, BBS feed will be split up. Defaults to 0.
326
327 =item feed_type
328
329 With this option set, you can set the feed types.
330
331 Now supports: RecentComment, FriendDiary, Message, Log, MyDiary, and Calendar.
332
333 Default: FriendDiary.
334
335 =back
336
337 =head1 SCREENSHOT
338
339 L<http://blog.bulknews.net/mt/archives/plagger-mixi-icon.gif>
340
341 =head1 AUTHOR
342
343 Tatsuhiko Miyagawa, modified by Kenichi Ishigaki
344
345 =head1 SEE ALSO
346
347 L<Plagger>, L<WWW::Mixi::Scraper>
348
349 =cut
Note: See TracBrowser for help on using the browser.