root/branches/hackathon-summary/plagger/lib/Plagger/Plugin/Summary/Simple.pm

Revision 1623 (checked in by miyagawa, 2 years ago)

Summary::Simple: Fixed summary generator to skip div ... not sure if this is the right thing.

Line 
1 package Plagger::Plugin::Summary::Simple;
2 use strict;
3 use base qw( Plagger::Plugin );
4
5 sub register {
6     my($self, $context) = @_;
7     $context->register_hook(
8         $self,
9         'summarizer.summarize' => \&summarize,
10     );
11 }
12
13 sub summarize {
14     my($self, $context, $args) = @_;
15
16     my $text = $args->{text};
17     $text = Plagger::Text->new_from_text($text) unless ref $text;
18
19     if ($text->is_html) {
20         # HTML: grab first block paragraph, or until first <br />
21         local $HTML::Tagset::isBodyElement{div} = 0;
22         my $html = $text->data;
23         while ($html =~ s|^\s*<(\w*)\s*[^>]*>(.*?)</\1>|$2|gs) {
24             if ($HTML::Tagset::isBodyElement{lc($1)}) {
25                 return "<$1>$2</$1>";
26             }
27         }
28
29         if ($text->data =~ m!^(.*?)<br\s*/?>!s) {
30             return $1;
31         } else {
32             return $text->data;
33         }
34     } else {
35         # text: substring with 255 bytes
36         if (length($text) > 255) {
37             return substr($text, 0, 255) . "...";
38         } else {
39             return $text;
40         }
41     }
42 }
43
44 1;
45 __END__
46
47 =head1 NAME
48
49 Plagger::Plugin::Summary::Simple - Default summary generator
50
51 =head1 SYNOPSIS
52
53   # this is not actually needed
54   - module: Summary::Simple
55
56 =head1 DESCRIPTION
57
58 Summary::Simple is a core plugin that does simple genration of summary
59 using HTML snippet extraction algorithm. This plugin is autoloaded
60 from Plagger core and if you don't load any Summary plugins, or all of
61 your plugins declined to handle summary generation, Plagger fallbacks
62 to this plugin.
63
64 =head1 AUTHOR
65
66 Tatsuhiko Miyagawa
67
68 =head1 SEE ALSO
69
70 L<Plagger>
71
72 =cut
Note: See TracBrowser for help on using the browser.