Changeset 1544
- Timestamp:
- 08/20/06 22:52:16
- Files:
Legend:
- Unmodified
- Added
- Removed
- Modified
- Copied
- Moved
branches/hackathon-summary/plagger/Makefile.PL
r1351 r1544 64 64 recommends('HTML::TreeBuilder::XPath'), 65 65 ], 66 'Better html to text formatter' => [ 67 -default => 1, 68 recommends('HTML::TreeBuilder'), 69 recommends('HTML::FormatText'), 70 ], 66 71 ); 67 72 branches/hackathon-summary/plagger/lib/Plagger/Util.pm
r1538 r1544 28 28 sub strip_html { 29 29 my $html = shift; 30 $html =~ s/<[^>]*>//g; 31 HTML::Entities::decode($html); 30 31 eval { 32 require HTML::FormatText; 33 require HTML::TreeBuilder; 34 }; 35 36 if ($@) { 37 # dump stripper 38 $html =~ s/<[^>]*>//g; 39 return HTML::Entities::decode($html); 40 } 41 42 my $tree = HTML::TreeBuilder->new; 43 $tree->parse($html); 44 $tree->eof; 45 46 my $formatter = HTML::FormatText->new(leftmargin => 0); 47 my $text = $formatter->format($tree); 48 $text =~ s/\s*$//s; 49 $text; 32 50 } 33 51 branches/hackathon-summary/plagger/t/core/text.t
r1527 r1544 1 1 use t::TestPlagger; 2 2 plan tests => 1 * blocks; 3 4 test_requires('HTML::FormatText'); 5 test_requires('HTML::TreeBuilder'); 3 6 4 7 filters { input => 'chomp', expected => 'yaml' }; … … 53 56 --- expected 54 57 type: html 55 plaintext: Hello World58 plaintext: "Hello\nWorld" 56 59 57 60 === <wbr> … … 60 63 --- expected 61 64 type: html 62 plaintext: Hello World65 plaintext: Hello World 63 66 64 67 === Unknown Tags
