Changeset 1046

Show
Ignore:
Timestamp:
07/03/06 11:31:00
Author:
miyagawa
Message:
  • Planet: support URL based search. Fixes #286
  • Planet: support language based grouping of search engines. Fixes #288
  • start using Test::Base. ingy++
  • Plagger->bootstrap() returns $context to make it use in test scripts.
  • Aggregator::Null now works.
  • Added $plugin->load_assets method to unify assets loading mechanism
Files:

Legend:

Unmodified
Added
Removed
Modified
Copied
Moved
  • branches/refactoring-planet/plagger/Makefile.PL

    r1006 r1046  
    3131 
    3232build_requires(Test::More => 0.42); 
     33build_requires(Test::Base => 0.52); 
    3334 
    3435features( 
     
    277278tests 't/*.t t/*/*.t t/*/*/*.t'; 
    278279 
     280use_test_base; 
     281 
    279282auto_include; 
    280283auto_install; 
  • branches/refactoring-planet/plagger/lib/Plagger.pm

    r1037 r1046  
    88use File::Copy; 
    99use File::Basename; 
    10 use File::Find::Rule; 
     10use File::Find::Rule (); # don't import rule()! 
    1111use YAML; 
    1212use Storable; 
     
    6363    $self->rewrite_config if @{ $self->{rewrite_tasks} }; 
    6464    $self->run(); 
     65 
     66    $self; 
    6567} 
    6668 
  • branches/refactoring-planet/plagger/lib/Plagger/Plugin.pm

    r1037 r1046  
    1111 
    1212use FindBin; 
     13use File::Find::Rule (); 
    1314use File::Spec; 
    1415 
     
    138139} 
    139140 
     141sub load_assets { 
     142    my($self, $rule, $callback) = @_; 
     143 
     144    my $context = Plagger->context; 
     145 
     146    my $dir = $self->assets_dir; 
     147 
     148    # $rule isa File::Find::Rule 
     149    for my $file ($rule->in($dir)) { 
     150        $callback->($file); 
     151    } 
     152} 
     153 
    1401541; 
  • branches/refactoring-planet/plagger/lib/Plagger/Plugin/Aggregator/Null.pm

    r921 r1046  
    1414    my($self, $context, $args) = @_; 
    1515    $context->update->add($args->{feed}); 
     16    return 1; 
    1617} 
    1718 
  • branches/refactoring-planet/plagger/lib/Plagger/Plugin/Subscription/Planet.pm

    r1038 r1046  
    66use URI::Escape; 
    77 
    8 my @sites = ( 
    9     'http://www.blogpulse.com/rss?query=%s&sort=date&operator=and', 
    10     'http://www.feedster.jp/search/lang/all/%s', 
    11     'http://blogsearch.google.com/blogsearch_feeds?hl=en&q=%s&scoring=d&ie=utf-8&num=100&output=rss', 
    12     'http://blog-search.yahoo.co.jp/rss?p=%s:euc-jp', 
    13     'http://blog.goo.ne.jp/search/search.php?status=select&tg=all&st=time&dc=10&dp=all&bu=&ts=all&MT=%s:euc-jp&da=all&rss=1', 
    14     'http://www.namaan.net/rss?namaan=NAMA&ty=5&query=%s', 
    15     'http://bulkfeeds.net/app/search2.rdf?q=%s', 
    16     'http://rss.sf.livedoor.com/search?q=%s&sf=update_date&start=0', 
    17     'http://b.hatena.ne.jp/t/%s?mode=rss&sort=eid&threshold=1', 
    18     'http://d.hatena.ne.jp/keyworddiary/%s?mode=rss', 
    19     'http://feeds.technorati.com/feed/posts/tag/%s', 
    20     'http://bloglines.com/search?q=%s&ql=any&s=f&pop=l&news=m&n=100&format=rss', 
    21 ); 
    22  
    238sub load { 
    249    my($self, $context) = @_; 
     
    2712       $keyword = [ $keyword ] unless ref $keyword; 
    2813 
     14    my $lang = $self->conf->{lang} || 'default'; 
     15    $lang = [ $lang ] unless ref $lang; 
     16 
     17    $self->load_assets( 
     18        File::Find::Rule->file->name([ map "$_.yaml", @$lang ]), 
     19        sub { 
     20            my($file) = @_; 
     21            my $data = YAML::LoadFile($file); 
     22            push @{ $self->{engines} }, @{ $data->{engines} }; 
     23        }, 
     24    ); 
     25 
    2926    for my $kw (@$keyword) { 
    30         for my $site (@sites) { 
    31             my $site_url = $site; 
    32             $site_url =~ s{%s(?::([\w\-]+))?}{ 
    33                 my $query = $kw; 
    34                 if ($1) { 
    35                     Encode::from_to($query, "utf-8" => $1); 
    36                 } 
    37                 URI::Escape::uri_escape($query); 
    38             }eg; 
    39             push @{$self->conf->{feed}}, { url => $site_url } 
     27        for my $site (@{ $self->{engines} }) { 
     28            my $site_url = $site; # copy 
     29 
     30            # use eval ... die to skip if there's no url/keyword 
     31            eval { 
     32                $site_url =~ s{{([\w\-\:]+)}}{ 
     33                    my($key, $encoding) = split /:/, $1; 
     34 
     35                    my $data = $self->conf->{$key} or die "$key is not there"; 
     36                    if ($encoding && $encoding ne 'utf-8') { 
     37                        Encode::from_to($data, "utf-8" => $encoding); 
     38                    } 
     39 
     40                    my $chunk = URI::Escape::uri_escape($data); 
     41                    $chunk =~ s/%20/+/g; # hack 
     42                    $chunk; 
     43                }eg; 
     44                push @{$self->conf->{feed}}, { url => $site_url } 
     45            }; 
    4046        } 
    4147    } 
     
    5763    config: 
    5864      keyword: Plagger 
     65      lang: en 
    5966 
    6067=head1 DESCRIPTION 
     
    6976=item keyword 
    7077 
    71 The keyword to use as a query in web search engines. 
     78The keyword to use as a query in web search engines. Required. 
     79 
     80=item lang 
     81 
     82Language code to either 1) specify list of search engines or 2) pass 
     83to search query.  Optional. 
     84 
     85For example, technorati.jp will be added if you use I<ja>, while 
     86technorati.com will be if you use I<en>. Default is to search 
     87everything. 
    7288 
    7389=back 
     90 
     91=head1 EXAMPLES 
     92 
     93  # search "Plagger" on default engines 
     94  - module: Subscription::Planet 
     95    config: 
     96      keyword: Plagger 
     97 
     98  # search "Pokemon" on Japanese search engines 
     99  - module: Subscription::Planet 
     100    config: 
     101      keyword: Pokemon 
     102      lang: ja 
     103 
     104  # search "Plagger" and pages linking to "http://plagger.org/" 
     105  - module: Subscription::Planet 
     106    config: 
     107      keyword: Plagger 
     108      url: http://plagger.org/ 
    74109 
    75110=head1 AUTHOR 
  • branches/refactoring-planet/plagger/t/plugins/Subscription-Planet/planet.t

    r1044 r1046  
    22use FindBin; 
    33use File::Spec; 
    4 use Test::More tests => 1; 
    54 
    6 use Plagger; 
    7 Plagger->bootstrap(config => \<<CONFIG); 
     5use t::TestPlagger; 
     6 
     7plan tests => 1 * blocks; 
     8 
     9run { 
     10    my $block = shift; 
     11    my $context = $block->input; 
     12    eval $block->expected; 
     13    fail $@ if $@; 
     14}; 
     15 
     16 
     17__END__ 
     18 
     19=== Test simple keyword 
     20--- input config 
    821global: 
    922  log: 
    1023    level: error 
     24  assets_path: $FindBin::Bin/../../../assets 
    1125plugins: 
    1226  - module: Subscription::Planet 
    1327    config: 
    1428      keyword: foo 
    15   - module: Aggregator::Test 
    16 CONFIG 
     29  - module: Aggregator::Null 
     30--- expected 
     31is $context->subscription->feeds->[0]->url, 'http://feeds.technorati.com/feed/posts/tag/foo', $block->name 
    1732 
    18 package Plagger::Plugin::Aggregator::Test; 
    19 use base qw( Plagger::Plugin ); 
     33=== Test keyword with space in it 
     34--- input config 
     35global: 
     36  log: 
     37    level: error 
     38  assets_path: $FindBin::Bin/../../../assets 
     39plugins: 
     40  - module: Subscription::Planet 
     41    config: 
     42      keyword: foo bar 
     43  - module: Aggregator::Null 
     44--- expected 
     45is $context->subscription->feeds->[0]->url, 'http://feeds.technorati.com/feed/posts/tag/foo+bar', $block->name; 
    2046 
    21 sub register { 
    22     my($self, $context) = @_; 
    23     $context->register_hook( 
    24         $self, 
    25         'customfeed.handle' => \&load, 
    26         'aggregator.finalize' => \&test, 
    27     ); 
    28 
     47=== Test multibyte keyword 
     48--- input config 
     49global: 
     50  log: 
     51    level: error 
     52  assets_path: $FindBin::Bin/../../../assets 
     53plugins: 
     54  - module: Subscription::Planet 
     55    config: 
     56      keyword: ぷらがー 
     57  - module: Aggregator::Null 
     58--- expected 
     59is $context->subscription->feeds->[0]->url, 'http://feeds.technorati.com/feed/posts/tag/%E3%81%B7%E3%82%89%E3%81%8C%E3%83%BC', $block->name; 
    2960 
    30 sub load { 
    31     my($self, $context, $args) = @_; 
    32     push @{$self->{feeds}}, $args->{feed}->url; 
    33     return 1; 
    34 
     61=== Test keyword and URL 
     62--- input config 
     63global: 
     64  log: 
     65    level: error 
     66  assets_path: $FindBin::Bin/../../../assets 
     67plugins: 
     68  - module: Subscription::Planet 
     69    config: 
     70      keyword: Plagger 
     71      url: http://plagger.org/ 
     72  - module: Aggregator::Null 
     73--- expected 
     74is $context->subscription->feeds->[-1]->url, "http://www.bloglines.com/search?q=bcite:http%3A%2F%2Fplagger.org%2F&ql=any&s=f&pop=n&news=m&n=100&format=rss", $block->name; 
    3575 
    36 sub test { 
    37     my $self = shift; 
    38     ::is $self->{feeds}[0], 'http://www.blogpulse.com/rss?query=foo&sort=date&operator=and'; 
    39 
     76=== Test lang=ja 
     77--- input config 
     78global: 
     79  log: 
     80    level: error 
     81  assets_path: $FindBin::Bin/../../../assets 
     82plugins: 
     83  - module: Subscription::Planet 
     84    config: 
     85      keyword: Plagger 
     86      lang: ja 
     87  - module: Aggregator::Null 
     88--- expected 
     89is $context->subscription->feeds->[0]->url, "http://www.feedster.jp/search/type/rss/Plagger", $block->name; 
     90 
     91=== Test lang=ja with euc-jp 
     92--- input config 
     93global: 
     94  log: 
     95    level: error 
     96  assets_path: $FindBin::Bin/../../../assets 
     97plugins: 
     98  - module: Subscription::Planet 
     99    config: 
     100      keyword: しょこたん 
     101      lang: ja 
     102  - module: Aggregator::Null 
     103--- expected 
     104is $context->subscription->feeds->[1]->url, "http://blog-search.yahoo.co.jp/rss?p=%A4%B7%A4%E7%A4%B3%A4%BF%A4%F3", $block->name; 
     105