| 1 |
package Plagger::Plugin::Search::KinoSearch; |
|---|
| 2 |
use strict; |
|---|
| 3 |
use base qw( Plagger::Plugin ); |
|---|
| 4 |
|
|---|
| 5 |
use Encode; |
|---|
| 6 |
use KinoSearch::Index::Term; |
|---|
| 7 |
use KinoSearch::InvIndexer; |
|---|
| 8 |
use KinoSearch::Searcher; |
|---|
| 9 |
use KinoSearch::Analysis::PolyAnalyzer; |
|---|
| 10 |
|
|---|
| 11 |
sub register { |
|---|
| 12 |
my($self, $context) = @_; |
|---|
| 13 |
$context->register_hook( |
|---|
| 14 |
$self, |
|---|
| 15 |
'publish.entry' => \&entry, |
|---|
| 16 |
'plugin.init' => \&initialize, |
|---|
| 17 |
'plugin.finalize' => \&finalize, |
|---|
| 18 |
'searcher.search' => \&search, |
|---|
| 19 |
); |
|---|
| 20 |
} |
|---|
| 21 |
|
|---|
| 22 |
sub initialize { |
|---|
| 23 |
my($self, $context, $args) = @_; |
|---|
| 24 |
|
|---|
| 25 |
$self->conf->{invindex} ||= $self->cache->path_to('invindex'); |
|---|
| 26 |
|
|---|
| 27 |
|
|---|
| 28 |
$self->{analyzer} = KinoSearch::Analysis::PolyAnalyzer->new( |
|---|
| 29 |
analyzers => [ |
|---|
| 30 |
KinoSearch::Analysis::LCNormalizer->new, |
|---|
| 31 |
KinoSearch::Analysis::Tokenizer->new, |
|---|
| 32 |
], |
|---|
| 33 |
); |
|---|
| 34 |
|
|---|
| 35 |
$self->{indexer} = KinoSearch::InvIndexer->new( |
|---|
| 36 |
invindex => $self->conf->{invindex}, |
|---|
| 37 |
create => !-e $self->conf->{invindex}, |
|---|
| 38 |
analyzer => $self->{analyzer}, |
|---|
| 39 |
); |
|---|
| 40 |
|
|---|
| 41 |
$self->{indexer}->spec_field( name => 'link' ); |
|---|
| 42 |
$self->{indexer}->spec_field( name => 'title', boost => 3 ); |
|---|
| 43 |
$self->{indexer}->spec_field( name => 'body' ); |
|---|
| 44 |
$self->{indexer}->spec_field( name => 'date' ); |
|---|
| 45 |
$self->{indexer}->spec_field( name => 'author' ); |
|---|
| 46 |
} |
|---|
| 47 |
|
|---|
| 48 |
sub entry { |
|---|
| 49 |
my($self, $context, $args) = @_; |
|---|
| 50 |
|
|---|
| 51 |
return unless $args->{entry}->permalink; |
|---|
| 52 |
$context->log(info => "Going to index entry " . $args->{entry}->permalink ); |
|---|
| 53 |
|
|---|
| 54 |
my $term = KinoSearch::Index::Term->new( url => $args->{entry}->permalink ); |
|---|
| 55 |
$self->{indexer}->delete_docs_by_term($term); |
|---|
| 56 |
|
|---|
| 57 |
my $doc = $self->{indexer}->new_doc; |
|---|
| 58 |
$doc->set_value( link => $args->{entry}->permalink ); |
|---|
| 59 |
$doc->set_value( title => $args->{entry}->title ); |
|---|
| 60 |
$doc->set_value( body => $args->{entry}->body_text ); |
|---|
| 61 |
$doc->set_value( date => $args->{entry}->date->format('W3CDTF') ) if $args->{entry}->date; |
|---|
| 62 |
$doc->set_value( author => $args->{entry}->author ) if $args->{entry}->author; |
|---|
| 63 |
|
|---|
| 64 |
$self->{indexer}->add_doc($doc); |
|---|
| 65 |
} |
|---|
| 66 |
|
|---|
| 67 |
sub finalize { |
|---|
| 68 |
my($self, $context, $args) = @_; |
|---|
| 69 |
$self->{indexer}->finish; |
|---|
| 70 |
|
|---|
| 71 |
$self->search($context, { query => "murakami" }); |
|---|
| 72 |
} |
|---|
| 73 |
|
|---|
| 74 |
sub search { |
|---|
| 75 |
my($self, $context, $args) = @_; |
|---|
| 76 |
|
|---|
| 77 |
my $searcher = KinoSearch::Searcher->new( |
|---|
| 78 |
invindex => $self->conf->{invindex}, |
|---|
| 79 |
analyzer => $self->{analyzer}, |
|---|
| 80 |
); |
|---|
| 81 |
|
|---|
| 82 |
my $feed = Plagger::Feed->new; |
|---|
| 83 |
$feed->type('search:KinoSearch'); |
|---|
| 84 |
$feed->title("Search: $args->{query}"); |
|---|
| 85 |
|
|---|
| 86 |
my $hits = $searcher->search( query => $args->{query} ); |
|---|
| 87 |
while ( my $hit = $hits->fetch_hit_hashref ) { |
|---|
| 88 |
my $entry = Plagger::Entry->new; |
|---|
| 89 |
|
|---|
| 90 |
for my $col (qw( link title body date author )) { |
|---|
| 91 |
$entry->$col($hit->{$col}) if defined $hit->{$col}; |
|---|
| 92 |
} |
|---|
| 93 |
$feed->add_entry($entry); |
|---|
| 94 |
} |
|---|
| 95 |
|
|---|
| 96 |
return $feed; |
|---|
| 97 |
} |
|---|
| 98 |
|
|---|
| 99 |
1; |
|---|
| 100 |
|
|---|
| 101 |
__END__ |
|---|
| 102 |
|
|---|
| 103 |
=head1 NAME |
|---|
| 104 |
|
|---|
| 105 |
Plagger::Plugin::Search::KinoSearch - Index entries using KinoSearch |
|---|
| 106 |
|
|---|
| 107 |
=head1 SYNOPSIS |
|---|
| 108 |
|
|---|
| 109 |
- module: Search::KinoSearch |
|---|
| 110 |
config: |
|---|
| 111 |
invindex: /path/to/invindex |
|---|
| 112 |
|
|---|
| 113 |
=head1 DESCRIPTION |
|---|
| 114 |
|
|---|
| 115 |
This plugin stores feeds to KinoSearch inverted index. KinoSearch is a |
|---|
| 116 |
Lucene loose port to Perl/C. |
|---|
| 117 |
|
|---|
| 118 |
=head1 AUTHOR |
|---|
| 119 |
|
|---|
| 120 |
Tatsuhiko Miyagawa |
|---|
| 121 |
|
|---|
| 122 |
=head1 SEE ALSO |
|---|
| 123 |
|
|---|
| 124 |
L<Plagger>, L<KinoSearch> |
|---|
| 125 |
|
|---|
| 126 |
=cut |
|---|