Changeset 487
- Timestamp:
- 04/02/06 11:41:30
- Files:
-
- trunk/plagger/lib/Plagger/Util.pm (modified) (2 diffs)
Legend:
- Unmodified
- Added
- Removed
- Modified
- Copied
- Moved
trunk/plagger/lib/Plagger/Util.pm
r481 r487 5 5 6 6 use Encode (); 7 use Encode::Guess; 7 8 use List::Util qw(min); 8 9 use HTML::Entities; … … 36 37 my $content = $res->content; 37 38 39 # 1) get charset from HTTP Content-Type header 38 40 my $charset = ($res->http_response->content_type =~ /charset=([\w\-]+)/)[0]; 39 unless ($charset) { 40 $charset = ( $content =~ m!<meta http-equiv="Content-Type" content=".*charset=([\w\-]+)"! )[0] || "utf-8"; 41 } 41 42 # 2) if there's not, try META tag 43 $charset ||= ( $content =~ m!<meta http-equiv="Content-Type" content=".*charset=([\w\-]+)"!i )[0]; 44 45 # 3) if there's not still, try Guess 46 # xxx it supports Japanese only 47 my @guess = qw(utf-8 euc-jp shift_jis); 48 $charset = guess_encoding($content, @guess); 42 49 43 50 return Encode::decode($charset, $content);
