Changeset 481
- Timestamp:
- 04/02/06 06:24:04
- Files:
-
- trunk/plagger/assets/plugins/filter-entryfulltext/asahi_com.pl (modified) (1 diff)
- trunk/plagger/assets/plugins/filter-entryfulltext/hatena_diary.pl (modified) (1 diff)
- trunk/plagger/assets/plugins/filter-entryfulltext/livedoorblog.pl (modified) (1 diff)
- trunk/plagger/assets/plugins/filter-entryfulltext/sixapart.pl (modified) (1 diff)
- trunk/plagger/assets/plugins/filter-entryfulltext/sportsnavi.pl (added)
- trunk/plagger/lib/Plagger/Date.pm (modified) (2 diffs)
- trunk/plagger/lib/Plagger/Plugin/CustomFeed/Simple.pm (added)
- trunk/plagger/lib/Plagger/Plugin/Filter/EntryFullText.pm (modified) (4 diffs)
- trunk/plagger/lib/Plagger/Plugin/Subscription/Config.pm (modified) (1 diff)
- trunk/plagger/lib/Plagger/Util.pm (modified) (2 diffs)
Legend:
- Unmodified
- Added
- Removed
- Modified
- Copied
- Moved
trunk/plagger/assets/plugins/filter-entryfulltext/asahi_com.pl
r474 r481 4 4 } 5 5 6 sub extract _body{6 sub extract { 7 7 my($self, $args) = @_; 8 8 ( $args->{content} =~ /<!-- Start of Kiji -->(.*)<!-- End of Kiji -->/s )[0]; trunk/plagger/assets/plugins/filter-entryfulltext/hatena_diary.pl
r474 r481 4 4 } 5 5 6 sub extract _body{6 sub extract { 7 7 my($self, $args) = @_; 8 8 my $name = ( $args->{entry}->link =~ /\#([\w\-]+)$/ )[0]; trunk/plagger/assets/plugins/filter-entryfulltext/livedoorblog.pl
r474 r481 4 4 } 5 5 6 sub extract _body{6 sub extract { 7 7 my($self, $args) = @_; 8 8 trunk/plagger/assets/plugins/filter-entryfulltext/sixapart.pl
r474 r481 4 4 } 5 5 6 sub extract _body{6 sub extract { 7 7 my($self, $args) = @_; 8 8 my $body = ($args->{content} =~ m!<div class="entry-body-text">(.*?)</div>!s)[0]; trunk/plagger/lib/Plagger/Date.pm
r325 r481 3 3 use base qw( DateTime ); 4 4 5 use Encode; 6 use DateTime::Format::Strptime; 5 7 use UNIVERSAL::require; 6 8 … … 24 26 25 27 bless $dt, $class; 28 } 29 30 sub strptime { 31 my($class, $pattern, $date) = @_; 32 Encode::_utf8_on($pattern); 33 my $format = DateTime::Format::Strptime->new(pattern => $pattern); 34 $class->parse($format, $date); 26 35 } 27 36 trunk/plagger/lib/Plagger/Plugin/Filter/EntryFullText.pm
r478 r481 7 7 use File::Spec; 8 8 use List::Util qw(first); 9 use Plagger::Date; # for metadata in plugins 10 use Plagger::Util qw( decode_content ); 9 11 10 12 use Plagger::UserAgent; … … 77 79 return if $res->http_response->is_error; 78 80 79 $args->{content} = $self->decode_content($res);81 $args->{content} = decode_content($res); 80 82 81 83 my @plugins = $handler ? ($handler) : @{ $self->{plugins} }; … … 84 86 if ( $handler || $plugin->handle($args) ) { 85 87 $context->log(debug => $args->{entry}->permalink . " handled by " . $plugin->site_name); 86 my $body = $plugin->extract_body($args); 87 if ($body) { 88 my $data = $plugin->extract($args); 89 $data = { body => $data } if $data && !ref $data; 90 if ($data) { 88 91 $context->log(info => "Extract content succeeded on " . $args->{entry}->permalink); 89 $args->{entry}->body($body); 92 $args->{entry}->body($data->{body}); 93 $args->{entry}->title($data->{title}) if $data->{title}; 94 $args->{entry}->date($data->{date}) if $data->{date}; 90 95 return 1; 91 96 } … … 102 107 } 103 108 104 # xxx make it Plagger::Entry's method so that other plugins can use105 sub decode_content {106 my($self, $res) = @_;107 my $content = $res->content;108 109 my $charset = ($res->http_response->content_type =~ /charset=([\w\-]+)/)[0];110 unless ($charset) {111 $charset = ( $content =~ m!<meta http-equiv="Content-Type" content=".*charset=([\w\-]+)"! )[0] || "utf-8";112 }113 114 return decode($charset, $content);115 }116 109 117 110 package Plagger::Plugin::Filter::EntryFullText::Site; trunk/plagger/lib/Plagger/Plugin/Subscription/Config.pm
r430 r481 28 28 $feed->link($config->{link}) if $config->{link}; 29 29 $feed->title($config->{title}) if $config->{title}; 30 $feed->meta($config->{meta}) if $config->{meta}; 30 31 31 32 if (my $tags = $config->{tag}) { trunk/plagger/lib/Plagger/Util.pm
r346 r481 2 2 use strict; 3 3 our @ISA = qw(Exporter); 4 our @EXPORT_OK = qw( strip_html dumbnail );4 our @EXPORT_OK = qw( strip_html dumbnail decode_content extract_title ); 5 5 6 use Encode (); 6 7 use List::Util qw(min); 7 8 use HTML::Entities; … … 31 32 } 32 33 34 sub decode_content { 35 my $res = shift; 36 my $content = $res->content; 37 38 my $charset = ($res->http_response->content_type =~ /charset=([\w\-]+)/)[0]; 39 unless ($charset) { 40 $charset = ( $content =~ m!<meta http-equiv="Content-Type" content=".*charset=([\w\-]+)"! )[0] || "utf-8"; 41 } 42 43 return Encode::decode($charset, $content); 44 } 45 46 sub extract_title { 47 my $content = shift; 48 my $title = ($content =~ m!<title>\s*(.*?)\s*</title>!s)[0] or return; 49 HTML::Entities::decode($1); 50 } 51 33 52 1;
