|
Revision 1741
(checked in by miyagawa, 2 years ago)
|
merge from hackathon-summary
|
| Line | |
|---|
| 1 |
package Plagger::Text; |
|---|
| 2 |
use strict; |
|---|
| 3 |
use base qw( Class::Accessor::Fast ); |
|---|
| 4 |
__PACKAGE__->mk_accessors(qw( type data )); |
|---|
| 5 |
|
|---|
| 6 |
use overload q("") => sub { $_[0]->data }, fallback => 1; |
|---|
| 7 |
|
|---|
| 8 |
use HTML::Tagset; |
|---|
| 9 |
use Plagger::Util; |
|---|
| 10 |
|
|---|
| 11 |
sub new { |
|---|
| 12 |
my($class, %param) = @_; |
|---|
| 13 |
bless {%param}, $class; |
|---|
| 14 |
} |
|---|
| 15 |
|
|---|
| 16 |
sub new_from_text { |
|---|
| 17 |
my($class, $text) = @_; |
|---|
| 18 |
|
|---|
| 19 |
return unless defined $text; |
|---|
| 20 |
utf8::decode($text) unless utf8::is_utf8($text); |
|---|
| 21 |
|
|---|
| 22 |
my @tags = $text =~ m!<(\w+)\s?/?>!g; |
|---|
| 23 |
my @unknown = grep !$HTML::Tagset::isKnown{$_}, @tags; |
|---|
| 24 |
my $type; |
|---|
| 25 |
if (@unknown > @tags / 2) { |
|---|
| 26 |
$type = 'text'; |
|---|
| 27 |
} elsif (@tags || $text =~ m!&(?:amp|gt|lt|quot);!) { |
|---|
| 28 |
$type = 'html'; |
|---|
| 29 |
} else { |
|---|
| 30 |
$type = 'text'; |
|---|
| 31 |
} |
|---|
| 32 |
|
|---|
| 33 |
bless { type => $type, data => $text }, $class; |
|---|
| 34 |
} |
|---|
| 35 |
|
|---|
| 36 |
sub is_html { |
|---|
| 37 |
my $self = shift; |
|---|
| 38 |
$self->type eq 'html'; |
|---|
| 39 |
} |
|---|
| 40 |
|
|---|
| 41 |
sub is_text { |
|---|
| 42 |
my $self = shift; |
|---|
| 43 |
$self->type eq 'text'; |
|---|
| 44 |
} |
|---|
| 45 |
|
|---|
| 46 |
sub html { |
|---|
| 47 |
my $self = shift; |
|---|
| 48 |
if ($self->is_html) { |
|---|
| 49 |
return $self->data; |
|---|
| 50 |
} else { |
|---|
| 51 |
Plagger::Util::encode_xml($self->data); |
|---|
| 52 |
} |
|---|
| 53 |
} |
|---|
| 54 |
|
|---|
| 55 |
sub plaintext { |
|---|
| 56 |
my $self = shift; |
|---|
| 57 |
if ($self->is_html) { |
|---|
| 58 |
return Plagger::Util::strip_html($self->data); |
|---|
| 59 |
} else { |
|---|
| 60 |
return $self->data; |
|---|
| 61 |
} |
|---|
| 62 |
} |
|---|
| 63 |
|
|---|
| 64 |
sub unicode { $_[0]->data } |
|---|
| 65 |
sub utf8 { Encode::encode_utf8($_[0]->data) } |
|---|
| 66 |
sub encode { Encode::encode($_[1], $_[0]->data) } |
|---|
| 67 |
|
|---|
| 68 |
sub serialize { |
|---|
| 69 |
my $self = shift; |
|---|
| 70 |
$self->data; |
|---|
| 71 |
} |
|---|
| 72 |
|
|---|
| 73 |
1; |
|---|