From: Dobrica Pavlinusic Date: Tue, 30 Oct 2007 20:11:04 +0000 (+0000) Subject: r1373@llin: dpavlin | 2007-10-30 21:11:01 +0100 X-Git-Url: http://git.rot13.org/?p=webpac2;a=commitdiff_plain;h=082f53e42015c44d05b5fe2a1b753cd647a6e741;ds=sidebyside r1373@llin: dpavlin | 2007-10-30 21:11:01 +0100 use KinoSearch::Simple and convert to new Output API git-svn-id: svn+ssh://mjesec/home/dpavlin/svn/webpac2/trunk@914 07558da8-63fa-0310-ba24-9fe276d99e06 --- diff --git a/lib/WebPAC/Output/KinoSearch.pm b/lib/WebPAC/Output/KinoSearch.pm index 2c03ec6..1a87496 100644 --- a/lib/WebPAC/Output/KinoSearch.pm +++ b/lib/WebPAC/Output/KinoSearch.pm @@ -3,10 +3,18 @@ package WebPAC::Output::KinoSearch; use warnings; use strict; -use base qw/WebPAC::Common/; - -use KinoSearch::InvIndexer; -use KinoSearch::Analysis::PolyAnalyzer; +use base qw/WebPAC::Common WebPAC::Output Class::Accessor/; +__PACKAGE__->mk_accessors(qw( + path + database + encoding + clean + + index +)); + +use KinoSearch::Simple; +use File::Path; use Encode qw/from_to/; use Data::Dump qw/dump/; use Storable; @@ -17,11 +25,11 @@ WebPAC::Output::KinoSearch - Create KinoSearch full text index =head1 VERSION -Version 0.03 +Version 0.04 =cut -our $VERSION = '0.03'; +our $VERSION = '0.04'; =head1 SYNOPSIS @@ -34,35 +42,25 @@ type C. Open KinoSearch index - my $est = new WebPAC::Output::KinoSearch( - index_path => '/path/to/invindex', - fields => qw/name of all filelds used/, + my $est = new WebPAC::Output::KinoSearch({ + path => '/path/to/invindex', database => 'demo', - label => 'node label', encoding => 'iso-8859-2', clean => 1, - ); + }); Options are: =over 4 -=item index_path +=item path path to KinoSearch index to use -=item fields - -name of all fields used in this index - =item database name of database from which data comes -=item label - -label for node (optional) - =item encoding character encoding of C if it's differenet than C @@ -73,147 +71,66 @@ index. =cut -sub new { - my $class = shift; - my $self = {@_}; - bless($self, $class); +sub init { + my $self = shift; my $log = $self->_get_logger; #$log->debug("self: ", sub { dump($self) }); - foreach my $p (qw/index_path fields database/) { - $log->logdie("need $p") unless ($self->{$p}); + foreach my $p (qw/path database/) { + $log->logdie("need $p") unless ($self->$p); } - $log->logdie("fields is not ARRAY") unless (ref($self->{fields}) eq 'ARRAY'); - - $self->{encoding} ||= 'ISO-8859-2'; +# $log->logdie("fields is not ARRAY") unless (ref($self->{fields}) eq 'ARRAY'); - $self->{index_path} .= '/' . $self->{database}; + $self->encoding( 'ISO-8859-2' ) unless $self->encoding; - $self->{clean} = 1 if (! -e $self->{index_path} . '/segments'); + if ( ! -e $self->path ) { + mkpath $self->path || $log->logdie("can't create ", $self->path,": $!"); + $log->info("created ", $self->path); + } - $log->info("using", $self->{clean} ? ' new' : '', " index $self->{index_path} with encoding $self->{encoding}"); + my $path = $self->path . '/' . $self->database; - my $analyzer = KinoSearch::Analysis::PolyAnalyzer->new( language => 'en' ); + $log->info("using index $path with encoding ", $self->encoding); - $self->{invindex} = KinoSearch::InvIndexer->new( - invindex => $self->{index_path}, - create => $self->{clean}, - analyzer => $analyzer, + my $index = KinoSearch::Simple->new( + path => $path, + language => 'en', ); - my $fields_path = $self->{index_path} . '/fields.storable'; - $fields_path =~ s#//#/#g; - if (-e $fields_path) { - $self->{fields} = retrieve($fields_path) || - $log->warn("can't open $fields_path: $!"); - } else { - $log->error("This will be dummy run since no fields statistics are found!"); - $log->error("You will have to re-run indexing to get search results!"); - $self->{dummy_run} = 1; - } - $self->{fields_path} = $fields_path; - - foreach my $f (@{ $self->{fields} }) { - $self->{invindex}->spec_field( - name => $f, -# boost => 10, - stored => 1, - indexed => 1, - vectorized => 0, - ); - } + $log->logdie("can't open $path: $!") unless $index; + + $self->index( $index ); - $self ? return $self : return undef; } =head2 add -Adds one entry to database. - - $est->add( - id => 42, - ds => $ds, - type => 'display', - text => 'optional text from which snippet is created', - ); - -This function will create entries in index using following URI format: +Adds one entry - C - -Each tag in C with specified C will create one -attribute and corresponding hidden text (used for search). + $est->add( 42, $ds ); =cut sub add { my $self = shift; - my $args = {@_}; + my ( $id, $ds ) = @_; my $log = $self->_get_logger; + $log->logdie("need id") unless defined $id; + $log->logdie("need ds") unless $ds; - my $database = $self->{'database'} || $log->logconfess('no database in $self'); - $log->logconfess('need invindex in object') unless ($self->{'invindex'}); + $log->debug("id: $id ds = ",dump($ds)); - foreach my $p (qw/id ds type/) { - $log->logdie("need $p") unless ($args->{$p}); - } + my $hash = $self->ds_to_hash( $ds, 'search' ) || return; - my $type = $args->{'type'}; - my $id = $args->{'id'}; + warn "add( $id, ",dump($ds)," ) => ", dump( $hash ); - my $uri = "file:///$type/$database/$id"; - $log->debug("creating $uri"); - - my $doc = $self->{invindex}->new_doc( $uri ) || $log->logdie("can't create new_doc( $uri )"); - - sub _add_value($$$$$) { - my ($self,$log,$doc,$n,$v) = @_; - return unless ($v); - - $self->{value_usage}->{$n}++; - return if ($self->{dummy_run}); - - eval { $doc->set_value($n, $self->convert($v) ) }; - $log->warn("can't insert: $n = $v") if ($@); - } - - _add_value($self,$log,$doc, 'uri', $uri); - - $log->debug("ds = ", sub { dump($args->{'ds'}) } ); - - # filter all tags which have type defined - my @tags = grep { - ref($args->{'ds'}->{$_}) eq 'HASH' && defined( $args->{'ds'}->{$_}->{$type} ) - } keys %{ $args->{'ds'} }; - - $log->debug("tags = ", join(",", @tags)); - - return unless (@tags); - - foreach my $tag (@tags) { - - my $vals = join(" ", @{ $args->{'ds'}->{$tag}->{$type} }); - - next if (! $vals); - - $vals = $self->convert( $vals ) or - $log->logdie("can't convert '$vals' to UTF-8"); - - _add_value($self, $log, $doc, $tag, $vals ); - } - - if (my $text = $args->{'text'}) { - _add_value($self, $log, $doc, 'bodytext', $text ); - } - - #$log->debug("adding ", sub { $doc->dump_draft } ); - $self->{invindex}->add_doc($doc) || $log->warn("can't add document $uri"); + $self->index->add_doc( $hash ); return 1; } @@ -231,17 +148,7 @@ sub finish { my $log = $self->_get_logger(); - $log->info("finish index writing to disk"); - $self->{invindex}->finish; - - $log->info("writing value usage file"); - - # add fields from last run - map { $self->{value_usage}->{$_}++ } @{ $self->{fields} }; - - my @fields = keys %{ $self->{value_usage} }; - store \@fields, $self->{fields_path} || - $log->warn("can't write $self->{fields_path}: $!"); + $log->info("dummy finish"); } @@ -265,7 +172,7 @@ Dobrica Pavlinusic, C<< >> =head1 COPYRIGHT & LICENSE -Copyright 2005 Dobrica Pavlinusic, All Rights Reserved. +Copyright 2005-2007 Dobrica Pavlinusic, All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself. diff --git a/t/5-output-kinosearch.t b/t/5-output-kinosearch.t new file mode 100755 index 0000000..03fb846 --- /dev/null +++ b/t/5-output-kinosearch.t @@ -0,0 +1,73 @@ +#!/usr/bin/perl -w + +use Test::More tests => 14; +use Test::Exception; +use Cwd qw/abs_path/; +use KinoSearch; +use File::Slurp; +use Data::Dump qw/dump/; +use blib; +use strict; + +BEGIN { +use_ok( 'WebPAC::Output::KinoSearch' ); +} + +my $debug = shift @ARGV; + +ok(my $abs_path = abs_path($0), "abs_path"); +$abs_path =~ s#/[^/]*$#/#; # +diag "abs_path: $abs_path"; +my $path = "$abs_path/kino/"; + +ok(my $out = new WebPAC::Output::KinoSearch({ + path => $path, + database => 'test', +}), "new"); + +ok( $out->init, 'init' ); + +my $ds = { + 'Source' => { + 'name' => 'Izvor: ', + 'search' => [ 'foo' ] + }, + 'ID' => { + 'search' => 'id', + }, + 'Array' => { + 'search' => [ qw/a1 a2 s3 a4 a5/ ], + }, +}; + +throws_ok { $out->add( ) } qr/need id/, 'add without params'; +throws_ok { $out->add( 42 ) } qr/need ds/, 'add without ds'; + +ok( $out->add( 42, $ds ), 'add 42' ); + +ok( $out->add( 99, { foo => { search => 'bar' } } ), 'add 99' ); + +ok( $out->add( 100, { foo => { search => [ qw/foo bar baz/ ] } } ), 'add 100' ); + +ok( -e $out->path, "created $path" ); + +ok( my $index = $out->index, 'have index' ); + +diag $out->path," eq ",$path; +cmp_ok( $out->path, 'eq', $path, 'path' ); + +my $query_string = 'foo'; + +my $total_hits = $index->search( + query => $query_string, + offset => 0, + num_wanted => 10, +); + +diag "Total hits: $total_hits\n"; +while ( my $hit = $index->fetch_hit_hashref ) { + diag dump($hit); +} + +ok( $out->finish, 'finish' ); +