Changeset 1937
- Timestamp:
- 03/14/07 17:09:49
- Files:
Legend:
- Unmodified
- Added
- Removed
- Modified
- Copied
- Moved
trunk/plagger/lib/Plagger/Plugin/Aggregator/Xango.pm
r683 r1937 7 7 use strict; 8 8 use base qw( Plagger::Plugin::Aggregator::Simple ); 9 use Plagger::FeedParser; 10 use URI::Fetch; 11 use HTTP::Status; 9 12 use POE; 10 13 use Xango::Broker::Push; … … 19 22 Alias => 'xgbroker', 20 23 HandlerAlias => 'xghandler', 21 HttpCompArgs => [ Agent => "Plagger/$Plagger::VERSION (http://plagger.org/)", Timeout => $self->conf->{timeout} || 10 ], 24 HttpCompArgs => [ 25 Agent => $self->conf->{agent} || "Plagger/$Plagger::VERSION (http://plagger.org/)", 26 Timeout => $self->conf->{timeout} || 10 27 ], 22 28 %{$self->conf->{xango_args} || {}}, 23 29 ); … … 47 53 48 54 $context->log(info => "Fetch $url"); 49 POE::Kernel->post($self->{xango_alias}, 'enqueue_job', Xango::Job->new(uri => URI->new($url), redirect => 0)); 55 56 my $job = Xango::Job->new( 57 uri => URI->new($url), 58 redirect => 0, 59 is_original_request => 1 60 ); 61 POE::Kernel->post($self->{xango_alias}, 'enqueue_job', $job); 50 62 } 51 63 … … 115 127 return unless $url =~ m!^https?://!i; 116 128 $_[KERNEL]->post($_[HEAP]->{BROKER_ALIAS}, 'enqueue_job', Xango::Job->new(uri => URI->new($url), redirect => $redirect)); 117 return; 129 return; 130 } 131 132 if (! $r->is_success) { 133 Plagger->context->log(error => "Fetch for $url failed: " . $r->code); 134 return; 135 } 136 137 # P::P::A::Simple does this bit as the first thing when aggregate() 138 # gets called. But since we're going through Xango, we need to figure 139 # out if this is the "original" feed or not 140 141 if (! $job->notes('is_original_request')) { 142 $plugin->handle_feed($url, $r->content_ref); 118 143 } else { 119 return unless $r->is_success; 144 # If this is the original request, chack if the content we've 145 # just fetched is a parsable feed. if not, refetch what's claimed 146 # to be the feed. 120 147 121 my $ct = $r->content_type; 122 if ( $Feed::Find::IsFeed{$ct} ) { 148 # XXX - Hack. P::F->discover likes to have URI::Fetch::Response 149 my $ufr = TO_URI_FETCH_RESPONSE( $r ); 150 my $feed_url = Plagger::FeedParser->discover($ufr); 151 if ($feed_url eq $url) { 123 152 $plugin->handle_feed($url, $r->content_ref); 124 } else { 125 my @feeds = Feed::Find->find_in_html($r->content_ref, $url); 126 if (@feeds) { 127 my $feed_url = $feeds[0]; 128 return unless $feed_url =~ m!^https?://!i; 153 } elsif($feed_url) { 154 # OMG we should alias Feed so it can be looked up with $feed_url, too 155 $plugin->{_url2feed}->{$feed_url} = $plugin->{_url2feed}->{$url}; 129 156 130 # OMG we should alias Feed so it can be looked up with $feed_url, too 131 $plugin->{_url2feed}->{$feed_url} = $plugin->{_url2feed}->{$url}; 132 133 $_[KERNEL]->post($_[HEAP]->{BROKER_ALIAS}, 'enqueue_job', Xango::Job->new(uri => URI->new($feed_url), redirect => $redirect)); 134 } 135 return; 157 $_[KERNEL]->post($_[HEAP]->{BROKER_ALIAS}, 'enqueue_job', Xango::Job->new(uri => URI->new($feed_url), redirect => $redirect)); 136 158 } 137 159 } … … 146 168 } 147 169 170 sub TO_URI_FETCH_RESPONSE 171 { 172 my ($r) = @_; 173 174 my $ufr = URI::Fetch::Response->new(); 175 $ufr->http_status($r->code); 176 $ufr->http_response($r); 177 $ufr->status( 178 $r->previous && $r->previous->code == &HTTP::Status::RC_MOVED_PERMANENTLY ? &URI::Fetch::URI_MOVED_PERMANENTLY : 179 $r->code == &HTTP::Status::RC_GONE ? &URI::Fetch::URI_GONE : 180 $r->code == &HTTP::Status::RC_NOT_MODIFIED ? &URI::Fetch::URI_NOT_MODIFIED : 181 &URI::Fetch::URI_OK 182 ); 183 $ufr->etag($r->header('ETag')); 184 $ufr->last_modified($r->header('Last-Modified')); 185 $ufr->uri($r->request->uri); 186 $ufr->content($r->content); 187 $ufr->content_type($r->content_type); 188 189 return $ufr; 190 } 191 148 192 1; 149 193
