| 1 |
package Plagger::Plugin::Filter::GuessTimeZoneByDomain; |
|---|
| 2 |
use strict; |
|---|
| 3 |
use base qw( Plagger::Plugin ); |
|---|
| 4 |
|
|---|
| 5 |
use DateTime::TimeZone; |
|---|
| 6 |
use List::Util qw( first ); |
|---|
| 7 |
|
|---|
| 8 |
sub register { |
|---|
| 9 |
my($self, $context) = @_; |
|---|
| 10 |
|
|---|
| 11 |
unless (DateTime::TimeZone->can('names_in_country')) { |
|---|
| 12 |
$context->log(error => 'DateTime::TimeZone >= 0.51 is required.'); |
|---|
| 13 |
return; |
|---|
| 14 |
} |
|---|
| 15 |
|
|---|
| 16 |
$context->register_hook( |
|---|
| 17 |
$self, |
|---|
| 18 |
'update.entry.fixup' => \&update, |
|---|
| 19 |
'plugin.init' => \&initialize, |
|---|
| 20 |
); |
|---|
| 21 |
} |
|---|
| 22 |
|
|---|
| 23 |
sub initialize { |
|---|
| 24 |
my($self, $context, $args) = @_; |
|---|
| 25 |
|
|---|
| 26 |
unless (defined $self->conf->{use_ip_country} && !$self->conf->{use_ip_country}) { |
|---|
| 27 |
eval { require IP::Country::Fast }; |
|---|
| 28 |
$self->{ip_country} = IP::Country::Fast->new unless $@; |
|---|
| 29 |
} |
|---|
| 30 |
|
|---|
| 31 |
my %valid_policy = map { $_ => 1 } qw( cc ip ); |
|---|
| 32 |
unless ($self->conf->{conflict_policy} && $valid_policy{$self->conf->{conflict_policy}}) { |
|---|
| 33 |
$self->conf->{conflict_policy} = 'cc'; |
|---|
| 34 |
} |
|---|
| 35 |
} |
|---|
| 36 |
|
|---|
| 37 |
sub update { |
|---|
| 38 |
my($self, $context, $args) = @_; |
|---|
| 39 |
|
|---|
| 40 |
return unless $args->{entry}->date && |
|---|
| 41 |
($args->{entry}->date->time_zone->is_floating || $args->{entry}->date->time_zone->is_utc); |
|---|
| 42 |
|
|---|
| 43 |
my $uri = URI->new($args->{entry}->permalink); |
|---|
| 44 |
$uri->can('host') or return; |
|---|
| 45 |
|
|---|
| 46 |
my $host = $uri->host; |
|---|
| 47 |
my %result; |
|---|
| 48 |
|
|---|
| 49 |
my $cctld = ($host =~ /\.(\w{2})$/)[0]; |
|---|
| 50 |
if ($cctld) { |
|---|
| 51 |
my @names = DateTime::TimeZone->names_in_country($cctld); |
|---|
| 52 |
if (@names <= 3) { |
|---|
| 53 |
$result{cc} = $names[0]; |
|---|
| 54 |
$context->log(info => "guess by ccTLD ($cctld): " . ($names[0] || '(undef)')); |
|---|
| 55 |
} |
|---|
| 56 |
} |
|---|
| 57 |
|
|---|
| 58 |
if ($self->{ip_country}) { |
|---|
| 59 |
my $ccip = $self->cache->get_callback( |
|---|
| 60 |
$host, |
|---|
| 61 |
sub { $self->{ip_country}->inet_atocc($host) }, |
|---|
| 62 |
'1 day', |
|---|
| 63 |
); |
|---|
| 64 |
if ($ccip) { |
|---|
| 65 |
my @names = DateTime::TimeZone->names_in_country($ccip); |
|---|
| 66 |
if (@names <= 3) { |
|---|
| 67 |
$result{ip} = $names[0]; |
|---|
| 68 |
$context->log(info => "guess by IP::Country ($ccip): " . ($names[0] || '(undef)')); |
|---|
| 69 |
} |
|---|
| 70 |
} |
|---|
| 71 |
} |
|---|
| 72 |
|
|---|
| 73 |
my @cand = $self->conf->{conflict_policy} eq 'cc' ? |
|---|
| 74 |
@result{qw(cc ip)} : @result{qw(ip cc)}; |
|---|
| 75 |
|
|---|
| 76 |
my $tz = first { defined } @cand; |
|---|
| 77 |
if ($tz) { |
|---|
| 78 |
$context->log(info => "Use timezone $tz for $uri"); |
|---|
| 79 |
$args->{entry}->date->set_time_zone($tz); |
|---|
| 80 |
} |
|---|
| 81 |
} |
|---|
| 82 |
|
|---|
| 83 |
1; |
|---|
| 84 |
__END__ |
|---|
| 85 |
|
|---|
| 86 |
=head1 NAME |
|---|
| 87 |
|
|---|
| 88 |
Plagger::Plugin::Filter::GuessTimeZoneByDomain - Guess timezone by domains if datetime is floating or UTC |
|---|
| 89 |
|
|---|
| 90 |
=head1 SYNOPSIS |
|---|
| 91 |
|
|---|
| 92 |
- module: Filter::GuessTimeZoneByDomain |
|---|
| 93 |
|
|---|
| 94 |
=head1 DESCRIPTION |
|---|
| 95 |
|
|---|
| 96 |
This plugin guesses feed date timezone by domains, if dates are |
|---|
| 97 |
floating or UTC. It uses the mapping table from ISO 3166 country code to |
|---|
| 98 |
timezones available in Olson database (hence requires |
|---|
| 99 |
DateTime::TimeZone 0.51). |
|---|
| 100 |
|
|---|
| 101 |
Optionally, if you have IP::Country module installed. This plugin also |
|---|
| 102 |
checks the country name which the host address is assigned to, instead |
|---|
| 103 |
of its domain name (ccTLD). |
|---|
| 104 |
|
|---|
| 105 |
For example, if the datetime is floating or UTC set in the feed of |
|---|
| 106 |
I<example.jp>, it is resolved to I<Asia/Tokyo> since its ccTLD is |
|---|
| 107 |
I<jp>. In the case of I<www.asahi.com>, ccTLD is null but the IP |
|---|
| 108 |
address is assigned to Japan, hence it is resolved to I<Asia/Tokyo> as |
|---|
| 109 |
well. |
|---|
| 110 |
|
|---|
| 111 |
=head1 CONFIG |
|---|
| 112 |
|
|---|
| 113 |
=over 4 |
|---|
| 114 |
|
|---|
| 115 |
=item conflict_policy |
|---|
| 116 |
|
|---|
| 117 |
conflict_policy: cc |
|---|
| 118 |
conflict_policy: ip |
|---|
| 119 |
|
|---|
| 120 |
I<conflict_policy> determines what to do if timezones guessed from 1) |
|---|
| 121 |
ccTLD and 2) country code from IP::Country doesn't match. I<cc> |
|---|
| 122 |
prioritizes ccTLD, and I<ip> prioritizes IP::Country. |
|---|
| 123 |
|
|---|
| 124 |
For example, I<http://www.sixapart.jp/> has a ccTLD I<jp>, but its |
|---|
| 125 |
host address is assigned to the United States (I<US>). In this case: |
|---|
| 126 |
|
|---|
| 127 |
conflict_policy timezone |
|---|
| 128 |
----------------------------------- |
|---|
| 129 |
cc Asia/Tokyo |
|---|
| 130 |
ip America/New_York |
|---|
| 131 |
|
|---|
| 132 |
(Note that US has multiple timezones but I<America/New_York> is used |
|---|
| 133 |
since this one is listed first in the Olson database.) |
|---|
| 134 |
|
|---|
| 135 |
Defaults to I<cc>. |
|---|
| 136 |
|
|---|
| 137 |
=back |
|---|
| 138 |
|
|---|
| 139 |
=head1 AUTHOR |
|---|
| 140 |
|
|---|
| 141 |
Tatsuhiko Miyagawa |
|---|
| 142 |
|
|---|
| 143 |
=head1 SEE ALSO |
|---|
| 144 |
|
|---|
| 145 |
L<Plagger>, L<Plagger::Plugin::Filter::FloatingDateTime>, L<DateTime::TimeZone> |
|---|
| 146 |
|
|---|
| 147 |
=cut |
|---|