Changeset 1538
- Timestamp:
- 08/20/06 22:08:25
- Files:
-
- branches/hackathon-summary/plagger/lib/Plagger.pm (modified) (1 diff)
- branches/hackathon-summary/plagger/lib/Plagger/Plugin/Aggregator/Simple.pm (modified) (1 diff)
- branches/hackathon-summary/plagger/lib/Plagger/Util.pm (modified) (2 diffs)
- branches/hackathon-summary/plagger/t/core/summarize.t (added)
- branches/hackathon-summary/plagger/t/core/summary.t (added)
Legend:
- Unmodified
- Added
- Removed
- Modified
- Copied
- Moved
branches/hackathon-summary/plagger/lib/Plagger.pm
r1507 r1538 274 274 } 275 275 276 return if $once; 276 277 return @ret; 277 278 } branches/hackathon-summary/plagger/lib/Plagger/Plugin/Aggregator/Simple.pm
r1493 r1538 136 136 $entry->feed_link($feed->link); 137 137 $entry->id($e->id); 138 $entry->body(_u($e->content->body || $e->summary->body)); 138 $entry->body($e->content->body || $e->summary->body); 139 if ($e->summary->body) { 140 $entry->summary($e->summary->body); 141 } else { 142 $entry->summary( Plagger::Util::summarize($entry->body) ); 143 } 139 144 140 145 # enclosure support, to be added to XML::Feed branches/hackathon-summary/plagger/lib/Plagger/Util.pm
r1389 r1538 2 2 use strict; 3 3 our @ISA = qw(Exporter); 4 our @EXPORT_OK = qw( strip_html dumbnail decode_content extract_title load_uri mime_type_of filename_for );4 our @EXPORT_OK = qw( strip_html dumbnail decode_content extract_title load_uri mime_type_of filename_for summarize ); 5 5 6 6 use Encode (); 7 7 use List::Util qw(min); 8 8 use HTML::Entities; 9 use HTML::Tagset; 9 10 use MIME::Types; 10 11 use MIME::Type; 12 use Plagger::Text; 11 13 12 14 our $Detector; … … 185 187 } 186 188 189 sub summarize { 190 my($text) = @_; 191 $text = Plagger::Text->new_from_text($text) unless ref $text; 192 193 # give plugins a chance 194 if (Plagger->context) { 195 my $summarized = Plagger->context->run_hook_once('summarizer.summarize', { text => $text }); 196 return $summarized if defined $summarized; 197 } 198 199 if ($text->is_html) { 200 # HTML: grab first block paragraph, or until first <br /> 201 my $html = $text->data; 202 if ($html =~ m|^\s*<(\w*)\s*[^>]*>(.*?)</\1>|s && $HTML::Tagset::isBodyElement{lc($1)}) { 203 return "<$1>$2</$1>"; 204 } elsif ($html =~ m!^(.*?)<br\s*/?>!s) { 205 return $1; 206 } else { 207 return $html; 208 } 209 } else { 210 # text: substring with 255 bytes 211 if (length($text) > 255) { 212 return substr($text, 0, 255) . "..."; 213 } else { 214 return $text; 215 } 216 } 217 } 218 187 219 1;
