Changeset 1683
- Timestamp:
- 09/15/06 00:19:30
- Files:
Legend:
- Unmodified
- Added
- Removed
- Modified
- Copied
- Moved
trunk/plagger/lib/Plagger/Plugin/CustomFeed/Simple.pm
r1673 r1683 6 6 use HTML::TokeParser; 7 7 use HTML::ResolveLink; 8 use HTML::TreeBuilder::XPath; 8 9 use Plagger::UserAgent; 9 10 use Plagger::Util qw( decode_content extract_title ); … … 20 21 my($self, $context, $args) = @_; 21 22 22 if ( my $match = $args->{feed}->meta->{follow_link}) {23 if ( my $match = $args->{feed}->meta->{follow_link} || $args->{feed}->meta->{follow_xpath} ) { 23 24 $args->{match} = $match; 24 25 return $self->aggregate($context, $args); … … 45 46 my $title = extract_title($content); 46 47 47 my $resolver = HTML::ResolveLink->new(base => $url);48 $content = $resolver->resolve($content);49 50 48 my $feed = Plagger::Feed->new; 51 49 $feed->title($title); … … 54 52 my $re = $args->{match}; 55 53 56 my %seen; 57 my $parser = HTML::TokeParser->new(\$content); 58 while (my $token = $parser->get_tag('a')) { 59 next unless ($token->[1]->{href} || '') =~ /$re/; 54 if( $args->{feed}->meta->{follow_link} ) { 55 my $resolver = HTML::ResolveLink->new(base => $url); 56 $content = $resolver->resolve($content); 60 57 61 my $text = $parser->get_trimmed_text('/a'); 62 next if !$text || $text eq '[IMG]'; 58 my %seen; 59 my $parser = HTML::TokeParser->new(\$content); 60 while (my $token = $parser->get_tag('a')) { 61 next unless ($token->[1]->{href} || '') =~ /$re/; 63 62 64 my $url = URI->new_abs($token->[1]->{href}, $url);65 next if $seen{$url->as_string}++;63 my $text = $parser->get_trimmed_text('/a'); 64 next if !$text || $text eq '[IMG]'; 66 65 67 my $entry = Plagger::Entry->new; 68 $entry->title($text); 69 $entry->link($url); 70 $feed->add_entry($entry); 66 my $url = URI->new_abs($token->[1]->{href}, $url); 67 next if $seen{$url->as_string}++; 71 68 72 $context->log(debug => "Add $token->[1]->{href} ($text)"); 69 my $entry = Plagger::Entry->new; 70 $entry->title($text); 71 $entry->link($url); 72 $feed->add_entry($entry); 73 74 $context->log(debug => "Add $token->[1]->{href} ($text)"); 75 } 76 } 77 else { 78 my $tree = HTML::TreeBuilder::XPath->new; 79 $tree->parse($content); 80 $tree->eof; 81 82 for my $child ( $tree->findnodes($re || '//a') ) { 83 my $href = $child->attr('href') or next; 84 my $title = $child->attr('title') || $child->as_text; 85 86 my $entry = Plagger::Entry->new; 87 $entry->title($title); 88 $entry->link($href); 89 $feed->add_entry($entry); 90 91 $context->log(debug => "Add $href ($title)"); 92 } 73 93 } 74 94 … … 94 114 meta: 95 115 follow_link: /headlines/ 116 - url: http://d.hatena.ne.jp/antipop/20050628/1119966355 117 meta: 118 follow_xpath: //ul[@class="xoxo" or @class="subscriptionlist"]//a 96 119 97 120 - module: CustomFeed::Simple
