root/trunk/plagger/assets/plugins/CustomFeed-Script/wowow-liga-espanola.pl

Revision 1786 (checked in by miyagawa, 2 years ago)

OMG wowow-liga site looks like maintained manually

  • Property svn:executable set to *
Line 
1 #!/usr/bin/perl -w
2 use strict;
3 use utf8;
4 use DateTime;
5 use DateTime::Format::W3CDTF;
6 use Encode;
7 use LWP::Simple ();
8 use HTML::TreeBuilder::XPath;
9 use URI;
10 use YAML;
11
12 my $url  = "http://www.wowow.co.jp/liga/contents/top.html";
13 my $html = decode('shift_jis', LWP::Simple::get($url));
14 my $tree = HTML::TreeBuilder::XPath->new;
15 $tree->parse($html);
16 $tree->eof;
17
18 my $feed = {
19     title => 'WOWOW リーガ・エスパニョーラ番組表',
20     link  => "http://www.wowow.co.jp/liga/",
21 };
22
23 my @teams = $tree->findnodes(q(//table[@width=573]/tr/td/img[@width=90]));
24 my @dates = $tree->findnodes(q(//table[@width=368]/tr/td[@class="date"]));
25 my @links = $tree->findnodes(q(//p[@class="cardview"]/a));
26
27 while (my($t1, $t2) = splice(@teams, 0, 2)) {
28     my $link = (shift @links)->attr('href');
29     # onair, repeat
30     for (1..2) {
31         my($date, $channel) = munge_datetime(shift @dates);
32
33         push @{$feed->{entry}}, {
34             title => $t1->attr('alt') . ' vs ' . $t2->attr('alt'),
35             link  => URI->new_abs($link, $url)->as_string,
36             date  => $date,
37             tags  => [ $channel ],
38         };
39     }
40 }
41
42 binmode STDOUT, ":utf8";
43 print YAML::Dump $feed;
44
45 sub munge_datetime {
46     my $date = shift->content->[0];
47
48     # 10月15日(日)深夜2:55 WOWOW/BS-5ch/191ch
49     $date =~ m!^\s*(\d{1,2})月(\d{1,2})日[(\(].*?[)\)]\s*(午前|午後|深夜)(\d{1,2}):(\d{2})\s*WOWOW.*?(\d{3}[cc][hh])!
50         or die "No match: $date";
51     my($month, $day, $am_pm_midnight, $hour, $minute, $channel) = ($1, $2, $3, $4, $5, $6);
52     $hour += 12 if $am_pm_midnight eq '午後';
53
54     my $dt = DateTime->new(
55         year  => DateTime->now->year,
56         month => $month,
57         day   => $day,
58         hour  => $hour,
59         minute => $minute,
60         time_zone => 'Asia/Tokyo',
61     );
62     $dt->add( days => 1 ) if $am_pm_midnight eq '深夜';
63
64     return DateTime::Format::W3CDTF->format_datetime($dt), $channel;
65 }
66
Note: See TracBrowser for help on using the browser.