|
Revision 1620
(checked in by miyagawa, 2 years ago)
|
backout the call to utf8::decode in strip_html function. FeedParser? now set $XML::Atom::ForceUnicode? = 1 globally rather than local(), since this flag takes effect during the runtime access to $entry->body or other elements when XML::Atom accesses its DOM node. Nasty.
|
| Line | |
|---|
| 1 |
package Plagger::FeedParser; |
|---|
| 2 |
use strict; |
|---|
| 3 |
|
|---|
| 4 |
use Feed::Find; |
|---|
| 5 |
use XML::Atom; |
|---|
| 6 |
use XML::Feed; |
|---|
| 7 |
use XML::Feed::RSS; |
|---|
| 8 |
$XML::Feed::RSS::PREFERRED_PARSER = "XML::RSS::LibXML"; |
|---|
| 9 |
$XML::Atom::ForceUnicode = 1; |
|---|
| 10 |
|
|---|
| 11 |
use Plagger::Util; |
|---|
| 12 |
|
|---|
| 13 |
sub parse { |
|---|
| 14 |
my($class, $content_ref) = @_; |
|---|
| 15 |
|
|---|
| 16 |
|
|---|
| 17 |
my $sweeper; |
|---|
| 18 |
|
|---|
| 19 |
eval { require XML::Liberal }; |
|---|
| 20 |
if (!$@ && $XML::Liberal::VERSION >= 0.10) { |
|---|
| 21 |
$sweeper = XML::Liberal->globally_override('LibXML'); |
|---|
| 22 |
} |
|---|
| 23 |
|
|---|
| 24 |
my $remote = eval { XML::Feed->parse($content_ref) } |
|---|
| 25 |
or Carp::croak("Parsing content failed: " . ($@ || XML::Feed->errstr)); |
|---|
| 26 |
|
|---|
| 27 |
return $remote; |
|---|
| 28 |
} |
|---|
| 29 |
|
|---|
| 30 |
sub discover { |
|---|
| 31 |
my($self, $res) = @_; |
|---|
| 32 |
|
|---|
| 33 |
my $content_type = eval { $res->content_type } || |
|---|
| 34 |
$res->http_response->content_type || |
|---|
| 35 |
"text/xml"; |
|---|
| 36 |
|
|---|
| 37 |
$content_type =~ s/;.*$//; # strip charset= cruft |
|---|
| 38 |
|
|---|
| 39 |
my $content = $res->content; |
|---|
| 40 |
if ( $Feed::Find::IsFeed{$content_type} || $self->looks_like_feed(\$content) ) { |
|---|
| 41 |
return $res->uri; |
|---|
| 42 |
} else { |
|---|
| 43 |
$content = Plagger::Util::decode_content($res); |
|---|
| 44 |
my @feeds = Feed::Find->find_in_html(\$content, $res->uri); |
|---|
| 45 |
if (@feeds) { |
|---|
| 46 |
return $feeds[0]; |
|---|
| 47 |
} else { |
|---|
| 48 |
return; |
|---|
| 49 |
} |
|---|
| 50 |
} |
|---|
| 51 |
} |
|---|
| 52 |
|
|---|
| 53 |
sub looks_like_feed { |
|---|
| 54 |
my($self, $content_ref) = @_; |
|---|
| 55 |
$$content_ref =~ m!<rss |<rdf:RDF\s+.*?xmlns="http://purl\.org/rss|<feed\s+xmlns="!s; |
|---|
| 56 |
} |
|---|
| 57 |
|
|---|
| 58 |
1; |
|---|