Changeset 1538

Show
Ignore:
Timestamp:
08/20/06 22:08:25
Author:
miyagawa
Message:
  • If feed has summary (atom:summary or rss:description), add to $entry->summary
  • Added dumb Summarizer and summarizer.summarize support.
Files:

Legend:

Unmodified
Added
Removed
Modified
Copied
Moved
  • branches/hackathon-summary/plagger/lib/Plagger.pm

    r1507 r1538  
    274274    } 
    275275 
     276    return if $once; 
    276277    return @ret; 
    277278} 
  • branches/hackathon-summary/plagger/lib/Plagger/Plugin/Aggregator/Simple.pm

    r1493 r1538  
    136136        $entry->feed_link($feed->link); 
    137137        $entry->id($e->id); 
    138         $entry->body(_u($e->content->body || $e->summary->body)); 
     138        $entry->body($e->content->body || $e->summary->body); 
     139        if ($e->summary->body) { 
     140            $entry->summary($e->summary->body); 
     141        } else { 
     142            $entry->summary( Plagger::Util::summarize($entry->body) ); 
     143        } 
    139144 
    140145        # enclosure support, to be added to XML::Feed 
  • branches/hackathon-summary/plagger/lib/Plagger/Util.pm

    r1389 r1538  
    22use strict; 
    33our @ISA = qw(Exporter); 
    4 our @EXPORT_OK = qw( strip_html dumbnail decode_content extract_title load_uri mime_type_of filename_for ); 
     4our @EXPORT_OK = qw( strip_html dumbnail decode_content extract_title load_uri mime_type_of filename_for summarize ); 
    55 
    66use Encode (); 
    77use List::Util qw(min); 
    88use HTML::Entities; 
     9use HTML::Tagset; 
    910use MIME::Types; 
    1011use MIME::Type; 
     12use Plagger::Text; 
    1113 
    1214our $Detector; 
     
    185187} 
    186188 
     189sub summarize { 
     190    my($text) = @_; 
     191    $text = Plagger::Text->new_from_text($text) unless ref $text; 
     192 
     193    # give plugins a chance 
     194    if (Plagger->context) { 
     195        my $summarized = Plagger->context->run_hook_once('summarizer.summarize', { text => $text }); 
     196        return $summarized if defined $summarized; 
     197    } 
     198 
     199    if ($text->is_html) { 
     200        # HTML: grab first block paragraph, or until first <br /> 
     201        my $html = $text->data; 
     202        if ($html =~ m|^\s*<(\w*)\s*[^>]*>(.*?)</\1>|s && $HTML::Tagset::isBodyElement{lc($1)}) { 
     203            return "<$1>$2</$1>"; 
     204        } elsif ($html =~ m!^(.*?)<br\s*/?>!s) { 
     205            return $1; 
     206        } else { 
     207            return $html; 
     208        } 
     209    } else { 
     210        # text: substring with 255 bytes 
     211        if (length($text) > 255) { 
     212            return substr($text, 0, 255) . "..."; 
     213        } else { 
     214            return $text; 
     215        } 
     216    } 
     217} 
     218 
    1872191;