root/trunk/plagger/lib/Plagger/Text.pm

Revision 1741 (checked in by miyagawa, 2 years ago)

merge from hackathon-summary

Line 
1 package Plagger::Text;
2 use strict;
3 use base qw( Class::Accessor::Fast );
4 __PACKAGE__->mk_accessors(qw( type data ));
5
6 use overload q("") => sub { $_[0]->data }, fallback => 1;
7
8 use HTML::Tagset;
9 use Plagger::Util;
10
11 sub new {
12     my($class, %param) = @_;
13     bless {%param}, $class;
14 }
15
16 sub new_from_text {
17     my($class, $text) = @_;
18
19     return unless defined $text;
20     utf8::decode($text) unless utf8::is_utf8($text);
21
22     my @tags = $text =~ m!<(\w+)\s?/?>!g;
23     my @unknown = grep !$HTML::Tagset::isKnown{$_}, @tags;
24     my $type;
25     if (@unknown > @tags / 2) {
26         $type = 'text';
27     } elsif (@tags || $text =~ m!&(?:amp|gt|lt|quot);!) {
28         $type = 'html';
29     } else {
30         $type = 'text';
31     }
32
33     bless { type => $type, data => $text }, $class;
34 }
35
36 sub is_html {
37     my $self = shift;
38     $self->type eq 'html';
39 }
40
41 sub is_text {
42     my $self = shift;
43     $self->type eq 'text';
44 }
45
46 sub html {
47     my $self = shift;
48     if ($self->is_html) {
49         return $self->data;
50     } else {
51         Plagger::Util::encode_xml($self->data);
52     }
53 }
54
55 sub plaintext {
56     my $self = shift;
57     if ($self->is_html) {
58         return Plagger::Util::strip_html($self->data);
59     } else {
60         return $self->data;
61     }
62 }
63
64 sub unicode { $_[0]->data }
65 sub utf8    { Encode::encode_utf8($_[0]->data) }
66 sub encode  { Encode::encode($_[1], $_[0]->data) }
67
68 sub serialize {
69     my $self = shift;
70     $self->data;
71 }
72
73 1;
Note: See TracBrowser for help on using the browser.