Changeset 504
- Timestamp:
- 04/02/06 15:47:35
- Files:
-
- trunk/plagger/Makefile.PL (modified) (1 diff)
- trunk/plagger/lib/Plagger/Util.pm (modified) (2 diffs)
Legend:
- Unmodified
- Added
- Removed
- Modified
- Copied
- Moved
trunk/plagger/Makefile.PL
r486 r504 25 25 26 26 features( 27 'Better Encoding detection' => [ 28 -default => 1, 29 recommends('Encode::Detect'), 30 ], 27 31 'POD Testing' => [ 28 32 -default => 0, trunk/plagger/lib/Plagger/Util.pm
r492 r504 5 5 6 6 use Encode (); 7 use Encode::Guess;8 7 use List::Util qw(min); 9 8 use HTML::Entities; 9 10 our $Detector; 11 12 BEGIN { 13 if ( eval { require Encode::Detect::Detector; 1 } ) { 14 $Detector = sub { Encode::Detect::Detector::detect($_[0]) }; 15 } else { 16 require Encode::Guess; 17 $Detector = sub { 18 my @guess = qw(utf-8 euc-jp shift_jis); # xxx japanese only? 19 eval { guess_encoding($_[0], @guess)->name }; 20 }; 21 } 22 } 10 23 11 24 sub strip_html { … … 43 56 $charset ||= ( $content =~ m!<meta http-equiv="Content-Type" content=".*charset=([\w\-]+)"!i )[0]; 44 57 45 # 3) if there's not still, try Guess 46 # xxx it supports Japanese only 47 my @guess = qw(utf-8 euc-jp shift_jis); 48 $charset ||= eval { guess_encoding($content, @guess)->name }; 58 # 3) if there's not still, try Detector/Guess 59 $charset ||= $Detector->($content); 49 60 50 61 # 4) falls back to UTF-8
