root/trunk/plagger/lib/Plagger/FeedParser.pm

Revision 2053 (checked in by miyagawa, 3 years ago)

don't use XML::Feed::RSS

Line 
1 package Plagger::FeedParser;
2 use strict;
3
4 use Feed::Find;
5 use XML::Atom;
6 use XML::Feed;
7 eval {
8     require XML::Feed::RSS;
9     require XML::Feed::Format::RSS;
10 };
11 $XML::Feed::Format::RSS::PREFERRED_PARSER = $XML::Feed::RSS::PREFERRED_PARSER = "XML::RSS::LibXML";
12 $XML::Atom::ForceUnicode = 1;
13
14 use Plagger::Util;
15
16 sub parse {
17     my($class, $content_ref) = @_;
18
19     # override XML::LibXML with Liberal
20     my $sweeper; # XML::Liberal >= 0.13
21
22     eval { require XML::Liberal };
23     if (!$@ && $XML::Liberal::VERSION >= 0.10) {
24         $sweeper = XML::Liberal->globally_override('LibXML');
25     }
26
27     my $remote = eval { XML::Feed->parse($content_ref) }
28         or Carp::croak("Parsing content failed: " . ($@ || XML::Feed->errstr));
29
30     return $remote;
31 }
32
33 sub discover {
34     my($self, $res) = @_;
35
36     my $content_type = eval { $res->content_type } ||
37                        $res->http_response->content_type ||
38                        "text/xml";
39
40     $content_type =~ s/;.*$//; # strip charset= cruft
41
42     my $content = $res->content;
43     if ( $Feed::Find::IsFeed{$content_type} || $self->looks_like_feed(\$content) ) {
44         return $res->uri;
45     } else {
46         $content  = Plagger::Util::decode_content($res);
47         my @feeds = Feed::Find->find_in_html(\$content, $res->uri);
48         if (@feeds) {
49             return $feeds[0];
50         } else {
51             return;
52         }
53     }
54 }
55
56 sub looks_like_feed {
57     my($self, $content_ref) = @_;
58     $$content_ref =~ m!<rss |<rdf:RDF\s+.*?xmlns="http://purl\.org/rss|<feed\s+xmlns="!s;
59 }
60
61 1;
Note: See TracBrowser for help on using the browser.