root/trunk/plagger/lib/Plagger/Plugin/CustomFeed/MixiScraper.pm

Revision 2027 (checked in by charsbar, 5 months ago)

CustomFeed?::MixiScraper?: split_bbs_feed

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
Line 
1 package Plagger::Plugin::CustomFeed::MixiScraper;
2 use strict;
3 use base qw( Plagger::Plugin );
4
5 use DateTime::Format::Strptime;
6 use WWW::Mixi::Scraper;
7 use Time::HiRes;
8
9 our $MAP = {
10     FriendDiary => {
11         title      => 'マイミク最新日記',
12         get_list   => 'new_friend_diary',
13         get_detail => 'view_diary',
14         icon       => 'owner_id',
15     },
16     # can't get icon
17     Message => {
18         title      => 'ミクシィメッセージ受信箱',
19         get_list   => 'list_message',
20         get_detail => 'view_message',
21     },
22     # can't get icon & body
23     RecentComment => {
24         title      => 'ミクシィ最近のコメント一覧',
25         get_list   => 'list_comment',
26     },
27     Log => {
28         title      => 'ミクシィ足跡',
29         get_list   => 'show_log',
30         icon       => 'id',
31     },
32     MyDiary => {
33         title      => 'ミクシィ日記',
34         get_list   => 'list_diary',
35         get_detail => 'view_diary',
36         icon       => 'owner_id',
37     },
38     Calendar => {
39         title      => 'ミクシィカレンダー',
40         get_list   => 'show_calendar',
41         get_detail => 'view_event',
42     },
43     BBS => {
44         title      => 'コミュニティ最新書き込み',
45         get_list   => 'new_bbs',
46         get_detail => 'view_bbs',
47     },
48 };
49
50 sub plugin_id {
51     my $self = shift;
52     $self->class_id . '-' . $self->conf->{email};
53 }
54
55 sub register {
56     my($self, $context) = @_;
57     $context->register_hook(
58         $self,
59         'subscription.load' => \&load,
60     );
61 }
62
63 sub load {
64     my($self, $context) = @_;
65
66     my $cookie_jar = $self->cookie_jar;
67     if (ref($cookie_jar) ne 'HTTP::Cookies') {
68         # using foreign cookies = don't have to set email/password. Fake them
69         $self->conf->{email}    ||= 'plagger@localhost';
70         $self->conf->{password} ||= 'pl4gg5r';
71     }
72
73     $self->{mixi} = WWW::Mixi::Scraper->new(
74       email => $self->conf->{email},
75       password => $self->conf->{password},
76       cookie_jar => $cookie_jar,
77     );
78
79     my $feed = Plagger::Feed->new;
80        $feed->aggregator(sub { $self->aggregate(@_) });
81     $context->subscription->add($feed);
82 }
83
84 sub aggregate {
85     my($self, $context, $args) = @_;
86     for my $type (@{$self->conf->{feed_type} || ['FriendDiary']}) {
87         $context->error("$type not found") unless $MAP->{$type};
88         if ($type eq 'BBS' and $self->conf->{split_bbs_feed}) {
89             $self->aggregate_bbs_feed($context, $type, $args);
90         }
91         else {
92             $self->aggregate_feed($context, $type, $args);
93         }
94     }
95 }
96
97 sub aggregate_feed {
98     my($self, $context, $type, $args) = @_;
99
100     my $feed = Plagger::Feed->new;
101     $feed->type('mixi');
102     $feed->title($MAP->{$type}->{title});
103
104     my $meth = $MAP->{$type}->{get_list};
105     my @msgs = $self->{mixi}->$meth->parse;
106     my $items = $self->conf->{fetch_items} || 20;
107     $self->log(info => 'fetch ' . scalar(@msgs) . ' entries');
108
109     $feed->link($self->{mixi}->{mech}->uri);
110
111     my $i = 0;
112     $self->{blocked} = 0;
113     for my $msg (@msgs) {
114         next if $type eq 'FriendDiary' and $msg->{link}->query_param('url'); # external blog
115         last if $i++ >= $items;
116
117         $self->add_entry( $context, $type, $feed, $msg );
118     }
119
120     $context->update->add($feed);
121 }
122
123 sub aggregate_bbs_feed {
124     my($self, $context, $type, $args) = @_;
125
126     my $meth = $MAP->{$type}->{get_list};
127     my @msgs = $self->{mixi}->$meth->parse;
128     my $items = $self->conf->{fetch_items} || 20;
129     $self->log(info => 'fetch ' . scalar(@msgs) . ' entries');
130
131     my $i = 0;
132     $self->{blocked} = 0;
133     for my $msg (@msgs) {
134         next if $type eq 'FriendDiary' and $msg->{link}->query_param('url'); # external blog
135         last if $i++ >= $items;
136
137         my $feed = Plagger::Feed->new;
138         $feed->type('mixi');
139         (my $subject = $msg->{subject}) =~ s/\(\d+\)$//;
140         (my $link = $msg->{link}) =~ s/&comment_count=\d*//;
141         $feed->title($subject);
142         $feed->description($MAP->{$type}->{title}.': '.$msg->{name});
143         $feed->link($link);
144
145         $self->add_entry( $context, $type, $feed, $msg );
146
147         $context->update->add($feed);
148     }
149 }
150
151 my $format = DateTime::Format::Strptime->new(pattern => '%Y-%m-%d %H:%M');
152
153 sub add_entry {
154     my ($self, $context, $type, $feed, $msg) = @_;
155
156     my $entry = Plagger::Entry->new;
157     $entry->title($msg->{subject});
158     $entry->link($msg->{link});
159     $entry->author($msg->{name});
160     $entry->date( Plagger::Date->parse($format, $msg->{time}) );
161     $entry->date->set_time_zone('Asia/Tokyo') if $entry->date;
162
163     if ($self->conf->{show_icon} && !$self->{blocked} && defined $MAP->{$type}->{icon}) {
164         my $owner_id = $msg->{link}->query_param($MAP->{$type}->{icon});
165         $context->log(info => "Fetch icon of id=$owner_id");
166
167         my $item = $self->cache->get_callback(
168             "outline-$owner_id",
169             sub {
170                 Time::HiRes::sleep( $self->conf->{fetch_body_interval} || 1.5 );
171                 my $item = $self->{mixi}->show_friend->parse(id => $owner_id)->{outline};
172                 $item;
173             },
174             '12 hours',
175         );
176         if ($item && $item->{image} !~ /no_photo/) {
177             # prefer smaller image
178             my $image = $item->{image};
179                $image =~ s/\.jpg$/s.jpg/;
180             $entry->icon({
181                 title => $item->{name},
182                 url   => $image,
183                 link  => $item->{link},
184             });
185         }
186     }
187
188     my @comments;
189     if ($self->conf->{fetch_body} && !$self->{blocked} && $msg->{link} =~ /view_/ && defined $MAP->{$type}->{get_detail}) {
190         # view_enquete is not implemented and probably
191         # won't be implemented as it seems redirected to
192         # reply_enquete
193         return if $msg->{link} =~ /view_enquete/;
194         $context->log(info => "Fetch body from $msg->{link}");
195         my $item = $self->cache->get_callback(
196             "item-".$msg->{link},
197             sub {
198                 Time::HiRes::sleep( $self->conf->{fetch_body_interval} || 1.5 );
199                 my $item = $self->{mixi}->parse($msg->{link});
200                 $item;
201             },
202             '12 hours',
203         );
204         if ($item) {
205             my $body = $item->{description};
206                $body =~ s!(\r\n?|\n)!<br />!g;
207             for my $image (@{ $item->{images} || [] }) {
208                 $body .= qq(<div><a href="$image->{link}"><img src="$image->{thumb_link}" style="border:0" /></a></div>);
209                 my $enclosure = Plagger::Enclosure->new;
210                 $enclosure->url($image->{thumb_link});
211                 $enclosure->auto_set_type;
212                 $enclosure->is_inline(1);
213                 $entry->add_enclosure($enclosure);
214             }
215             $entry->body($body);
216
217             $entry->date( Plagger::Date->parse($format, $item->{time}) );
218             $entry->date->set_time_zone('Asia/Tokyo') if $entry->date;
219             if ($self->conf->{fetch_comment}) {
220               for my $comment (@{ $item->{comments} || [] }) {
221                   my $c = Plagger::Entry->new;
222                      $c->title($entry->title . ': '. $comment->{subject});
223                      $c->body($comment->{description});
224                      $c->link($comment->{link});
225                      $c->author($comment->{name});
226                      $c->date( Plagger::Date->parse($format, $comment->{time}) );
227                      $c->date->set_time_zone('Asia/Tokyo') if $c->date;
228                   push @comments, $c;
229               }
230             }
231         } else {
232             $context->log(warn => "Fetch body failed. You might be blocked?");
233             $self->{blocked}++;
234         }
235     }
236
237     $feed->add_entry($entry);
238     for my $comment ( @comments ) {
239         $feed->add_entry($comment);
240     }
241 }
242
243 1;
244
245 __END__
246
247 =head1 NAME
248
249 Plagger::Plugin::CustomFeed::MixiScraper -  Custom feed for mixi.jp
250
251 =head1 SYNOPSIS
252
253     - module: CustomFeed::MixiScraper
254       config:
255         email: email@example.com
256         password: password
257         fetch_body: 1
258         fetch_comment: 0
259         show_icon: 1
260         feed_type:
261           - RecentComment
262           - FriendDiary
263           - Message
264
265 =head1 DESCRIPTION
266
267 This plugin fetches your friends diary updates from mixi
268 (L<http://mixi.jp/>) and creates a custom feed.
269
270 =head1 CONFIGURATION
271
272 =over 4
273
274 =item email, password
275
276 Credential you need to login to mixi.jp.
277
278 Note that you don't have to supply email and password if you set
279 global cookie_jar in your configuration file and the cookie_jar
280 contains a valid login session there, such as:
281
282   global:
283     user_agent:
284       cookies: /path/to/cookies.txt
285
286 See L<Plagger::Cookies> for details.
287
288 =item fetch_body
289
290 With this option set, this plugin fetches entry body HTML, not just a
291 link to the entry. Defaults to 0.
292
293 =item fetch_comment
294
295 With this option set, this plugin fetches entry's comments as well
296 (meaningless when C<fetch_body> is not set). Defaults to 0.
297
298 =item fetch_body_interval
299
300 With C<fetch_body> option set, your Plagger script is recommended to
301 wait for a little, to avoid mixi.jp throttling. Defaults to 1.5.
302
303 =item show_icon: 1
304
305 With this option set, this plugin fetches users buddy icon from
306 mixi.jp site, which makes the output HTML very user-friendly.
307
308 =item split_bbs_feed
309
310 With this option set, BBS feed will be split up. Defaults to 0.
311
312 =item feed_type
313
314 With this option set, you can set the feed types.
315
316 Now supports: RecentComment, FriendDiary, Message, Log, MyDiary, and Calendar.
317
318 Default: FriendDiary.
319
320 =back
321
322 =head1 SCREENSHOT
323
324 L<http://blog.bulknews.net/mt/archives/plagger-mixi-icon.gif>
325
326 =head1 AUTHOR
327
328 Tatsuhiko Miyagawa, modified by Kenichi Ishigaki
329
330 =head1 SEE ALSO
331
332 L<Plagger>, L<WWW::Mixi::Scraper>
333
334 =cut
Note: See TracBrowser for help on using the browser.