X-Git-Url: http://git.rot13.org/?a=blobdiff_plain;f=lib%2FWebPAC%2FOutput%2FKinoSearch.pm;h=8129e9492181c5ce39d9a54fd5b21abef9de3285;hb=a400e5b81ce1e2af3b222875562e105330061157;hp=7f562a4afd82ab62808617025242d7207bfb8dac;hpb=459ddaad5ded79415230f0507cc2be86764a0d50;p=webpac2 diff --git a/lib/WebPAC/Output/KinoSearch.pm b/lib/WebPAC/Output/KinoSearch.pm index 7f562a4..8129e94 100644 --- a/lib/WebPAC/Output/KinoSearch.pm +++ b/lib/WebPAC/Output/KinoSearch.pm @@ -3,12 +3,21 @@ package WebPAC::Output::KinoSearch; use warnings; use strict; -use base qw/WebPAC::Common/; - -use KinoSearch::InvIndexer; -use KinoSearch::Analysis::PolyAnalyzer; +use base qw/WebPAC::Common WebPAC::Output Class::Accessor/; +__PACKAGE__->mk_accessors(qw( + path + database + encoding + clean + + index +)); + +use KinoSearch::Simple; +use File::Path; use Encode qw/from_to/; -use Data::Dumper; +use Data::Dump qw/dump/; +use Storable; =head1 NAME @@ -16,11 +25,11 @@ WebPAC::Output::KinoSearch - Create KinoSearch full text index =head1 VERSION -Version 0.01 +Version 0.05 =cut -our $VERSION = '0.01'; +our $VERSION = '0.05'; =head1 SYNOPSIS @@ -33,35 +42,25 @@ type C. Open KinoSearch index - my $est = new WebPAC::Output::KinoSearch( - index_path => '/path/to/invindex', - fields => qw/name of all filelds used/, + my $out = new WebPAC::Output::KinoSearch({ + path => '/path/to/invindex', database => 'demo', - label => 'node label', encoding => 'iso-8859-2', clean => 1, - ); + }); Options are: =over 4 -=item index_path +=item path path to KinoSearch index to use -=item fields - -name of all fields used in this index - =item database name of database from which data comes -=item label - -label for node (optional) - =item encoding character encoding of C if it's differenet than C @@ -70,129 +69,79 @@ index. =back +=head2 init + + $out->init; + =cut -sub new { - my $class = shift; - my $self = {@_}; - bless($self, $class); +sub init { + my $self = shift; my $log = $self->_get_logger; - #$log->debug("self: ", sub { Dumper($self) }); + #$log->debug("self: ", sub { dump($self) }); - foreach my $p (qw/index_path fields database/) { - $log->logdie("need $p") unless ($self->{$p}); + foreach my $p (qw/path database/) { + $log->logdie("need $p") unless ($self->$p); } - $log->logdie("fields is not ARRAY") unless (ref($self->{fields}) eq 'ARRAY'); +# $log->logdie("fields is not ARRAY") unless (ref($self->{fields}) eq 'ARRAY'); - $self->{encoding} ||= 'ISO-8859-2'; + $self->encoding( 'ISO-8859-2' ) unless $self->encoding; + + if ( ! -e $self->path ) { + mkpath $self->path || $log->logdie("can't create ", $self->path,": $!"); + $log->info("created ", $self->path); + } elsif ( $self->clean ) { + $log->info("removing existing ", $self->path); + rmtree $self->path || $log->logdie("can't remove ", $self->path,": $!"); + mkpath $self->path || $log->logdie("can't create ", $self->path,": $!"); + } - $log->info("using index $self->{index_path} with encoding $self->{encoding}"); + my $path = $self->path . '/' . $self->database; - my $analyzer = KinoSearch::Analysis::PolyAnalyzer->new( language => 'en' ); + $log->info("using index $path with encoding ", $self->encoding); - $self->{invindex} = KinoSearch::InvIndexer->new( - invindex => $self->{index_path}, - create => $self->{clean}, - analyzer => $analyzer, + my $index = KinoSearch::Simple->new( + path => $path, + language => 'en', ); - foreach my $f (@{ $self->{fields} }) { - $self->{invindex}->spec_field( - name => $f, -# boost => 10, - stored => 1, - indexed => 1, - vectorized => 0, - ); - } + $log->logdie("can't open $path: $!") unless $index; + + $self->index( $index ); - $self ? return $self : return undef; } =head2 add -Adds one entry to database. - - $est->add( - id => 42, - ds => $ds, - type => 'display', - text => 'optional text from which snippet is created', - ); - -This function will create entries in index using following URI format: - - C +Adds one entry -Each tag in C with specified C will create one -attribute and corresponding hidden text (used for search). + $out->add( 42, $ds ); =cut sub add { my $self = shift; - my $args = {@_}; + my ( $id, $ds ) = @_; my $log = $self->_get_logger; + $log->logdie("need id") unless defined $id; + $log->logdie("need ds") unless $ds; - my $database = $self->{'database'} || $log->logconfess('no database in $self'); - $log->logconfess('need invindex in object') unless ($self->{'invindex'}); - - foreach my $p (qw/id ds type/) { - $log->logdie("need $p") unless ($args->{$p}); - } - - my $type = $args->{'type'}; - my $id = $args->{'id'}; - - my $uri = "file:///$type/$database/$id"; - $log->debug("creating $uri"); + $log->debug("id: $id ds = ",dump($ds)); - my $doc = $self->{invindex}->new_doc( $uri ) || $log->logdie("can't create new_doc( $uri )"); + my $hash = $self->ds_to_hash( $ds, 'search' ) || return; - sub add_value($$$$$) { - my ($self,$log,$doc,$n,$v) = @_; - return unless ($v); - eval { $doc->set_value($n, $self->convert($v) ) }; - $log->warn("can't insert: $n = $v") if ($@); - } - - add_value($self,$log,$doc, 'uri', $uri); - - $log->debug("ds = ", sub { Dumper($args->{'ds'}) } ); - - # filter all tags which have type defined - my @tags = grep { - ref($args->{'ds'}->{$_}) eq 'HASH' && defined( $args->{'ds'}->{$_}->{$type} ) - } keys %{ $args->{'ds'} }; - - $log->debug("tags = ", join(",", @tags)); + $hash->{database} ||= $self->database; + $hash->{id} ||= $id; - return unless (@tags); + $log->debug("add( $id, ", sub { dump($ds) }," ) => ", sub { dump( $hash ) }); - foreach my $tag (@tags) { - - my $vals = join(" ", @{ $args->{'ds'}->{$tag}->{$type} }); - - next if (! $vals); - - $vals = $self->convert( $vals ) or - $log->logdie("can't convert '$vals' to UTF-8"); - - add_value($self, $log, $doc, $tag, $vals ); - } - - if (my $text = $args->{'text'}) { - add_value($self, $log, $doc, 'bodytext', $text ); - } - - #$log->debug("adding ", sub { $doc->dump_draft } ); - $self->{invindex}->add_doc($doc) || $log->warn("can't add document $uri"); + $self->index->add_doc( $hash ); return 1; } @@ -201,29 +150,17 @@ sub add { Close index - $index->finish; + $out->finish; =cut sub finish { my $self = shift; - $self->_get_logger()->info("finish index writing to disk"); - $self->{invindex}->finish; -} - -=head2 convert + my $log = $self->_get_logger(); - my $utf8_string = $self->convert('string in codepage'); - -=cut - -sub convert { - my $self = shift; + $log->info("dummy finish"); - my $text = shift || return; - from_to($text, $self->{encoding}, 'UTF-8'); - return $text; } =head1 AUTHOR @@ -232,7 +169,7 @@ Dobrica Pavlinusic, C<< >> =head1 COPYRIGHT & LICENSE -Copyright 2005 Dobrica Pavlinusic, All Rights Reserved. +Copyright 2005-2007 Dobrica Pavlinusic, All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself.