From db74a57fa06de2166941f5b6549ca65915c5f893 Mon Sep 17 00:00:00 2001 From: Dobrica Pavlinusic Date: Thu, 23 Apr 2009 13:20:51 +0000 Subject: [PATCH] r1802@llin: dpavlin | 2009-04-23 15:20:49 +0200 more changes to make indexing of WebPAC::Output::SWISH work (but not tests yet) git-svn-id: svn+ssh://mjesec/home/dpavlin/svn/webpac2/trunk@1147 07558da8-63fa-0310-ba24-9fe276d99e06 --- lib/WebPAC/Output/SWISH.pm | 88 ++++++++++++++++---------------------- t/5-output-swish.t | 20 ++++----- vhost/ecas/config.yml | 2 +- 3 files changed, 49 insertions(+), 61 deletions(-) diff --git a/lib/WebPAC/Output/SWISH.pm b/lib/WebPAC/Output/SWISH.pm index 69c1fc7..60ce87f 100644 --- a/lib/WebPAC/Output/SWISH.pm +++ b/lib/WebPAC/Output/SWISH.pm @@ -3,12 +3,21 @@ package WebPAC::Output::SWISH; use warnings; use strict; -use base qw/WebPAC::Common/; +use lib 'lib'; + +use base qw/WebPAC::Common Class::Accessor/; +__PACKAGE__->mk_accessors(qw( + database + type + + index_path +)); use File::Path qw/make_path/; use Data::Dump qw/dump/; use YAML; + =head1 NAME WebPAC::Output::SWISH - Create swish-e full text index @@ -26,9 +35,9 @@ type C. =head2 new - my $out = new WebPAC::Output::SWISH( + my $out = new WebPAC::Output::SWISH({ database => 'demo', - ); + }); Options are: @@ -46,16 +55,14 @@ Name of database will be used to form URI of documents in index. our $dir = 'var/swish'; -sub new { - my $class = shift; - my $self = {@_}; - bless($self, $class); +sub init { + my $self = shift; my $log = $self->_get_logger; - #$log->debug("self: ", sub { dump($self) }); + my $database = $self->database || $log->logdie("need database"); - my $database = $self->{database} || $log->logdie("need database"); + make_path $dir if ! -e $dir; my $path = "$dir/$database.conf"; @@ -80,7 +87,7 @@ UndefinedXMLAttributes auto IndexFile $dir/$database # Croatian ISO-8859-2 characters to unaccented equivalents -TranslateCharacters ¹©ðÐèÈæƾ® ssddcccczz +#TranslateCharacters ¹©ðÐèÈæƾ® ssddcccczz # disable output @@ -91,10 +98,12 @@ DEFAULT_SWISH_CONF close($conf) || die "can't write config $path: $!"; - $self->{_swish_index_path} = "$dir/$database"; + $self->index_path( "$dir/$database" ); my $swish = "swish-e -S prog -c $path"; - open( $self->{_swish_fh}, '-|', $swish ) || die "can't open pipe to $swish: $!"; + open( $self->{_swish_fh}, '|-', $swish ) || die "can't open pipe to $swish: $!"; + + $log->info( "created $path ", -s $path, " bytes for ", $self->index_path ); $self->{stats} = {}; @@ -105,27 +114,9 @@ DEFAULT_SWISH_CONF my $path = $out->index_path; -=cut - -sub index_path { $_[0]->{_swish_index_path} }; - =head2 add -Adds one entry to database. - - $out->add( - id => 42, - ds => $ds, - type => 'display', - text => 'optional text from which snippet is created', - ); - -This function will create entries in index using following URI format: - - C - -Each tag in C with specified C will create one -attribute and corresponding hidden text (used for search). + $out->add( 42, $ds ); =cut @@ -133,29 +124,21 @@ my %escape = ('<'=>'<', '>'=>'>', '&'=>'&', '"'=>'"'); my $escape_re = join '|' => keys %escape; sub add { - my $self = shift; - - my $args = {@_}; + my ($self,$id,$ds) = @_; my $log = $self->_get_logger; + $log->debug("id: $id ds = ",sub { dump($ds) }); - my $database = $self->{'database'} || $log->logconfess('no database in $self'); - $log->logconfess('need db in object') unless ($self->{'db'}); + my $database = $self->database || $log->logconfess('no database in $self'); - foreach my $p (qw/id ds type/) { - $log->logdie("need $p") unless ($args->{$p}); - } - - my $type = $args->{'type'}; - my $id = $args->{'id'}; - - my $uri = "$database/$id"; + my $uri = $self->database . "/$id"; $log->debug("creating $uri"); # filter all tags which have type defined + my $type = $self->type || 'search'; my @tags = grep { - ref($args->{'ds'}->{$_}) eq 'HASH' && defined( $args->{'ds'}->{$_}->{$type} ) - } keys %{ $args->{'ds'} }; + ref($ds->{$_}) eq 'HASH' && defined( $ds->{$_}->{$type} ) + } keys %{ $ds }; $log->debug("tags = ", join(",", @tags)); @@ -165,7 +148,10 @@ sub add { foreach my $tag (@tags) { - my $vals = join(" ", @{ $args->{'ds'}->{$tag}->{$type} }); + my $r = ref $ds->{$tag}->{$type}; + die "tag $tag type $type not ARRAY but '$r' = ",dump( $ds->{$tag}->{$type} ) unless $r eq 'ARRAY'; + + my $vals = join(" ", @{ $ds->{$tag}->{$type} }); next if ! $vals; @@ -179,10 +165,10 @@ sub add { my $len = length($xml); - my $fh = $self->{_swish_fh}; + my $fh = $self->{_swish_fh} || die "_swish_fh missing"; - print $fh "Path-Name: $uri\nContent-Length: $len\nDocument-Type: XML\n\n$xml"; -# || die "can't add $uri: $@\n$xml\n"; + print $fh "Path-Name: $uri\nContent-Length: $len\nDocument-Type: XML\n\n$xml" or + die "can't add $uri: $@\n$xml"; return 1; } @@ -201,6 +187,8 @@ sub finish { YAML::DumpFile( $path, $self->{stats} ); $log->info("created $path ", -s $path, " bytes"); $log->debug( dump( $self->{stats} ) ); + + close( $self->{_swish_fh} ) || die "can't close index ", $self->index_path, ": $!"; } =head1 AUTHOR diff --git a/t/5-output-swish.t b/t/5-output-swish.t index 2747732..f9e1bb9 100755 --- a/t/5-output-swish.t +++ b/t/5-output-swish.t @@ -6,17 +6,20 @@ use blib; use Test::More tests => 35; BEGIN { -use_ok( 'WebPAC::Test' ); -use_ok( 'WebPAC::Output::SWISH' ); -use_ok( 'SWISH::API' ); + use lib 'lib'; + use_ok( 'WebPAC::Test' ); + use_ok( 'WebPAC::Output::SWISH' ); + use_ok( 'SWISH::API' ); } my $path = "$abs_path/kino/"; -ok(my $out = new WebPAC::Output::SWISH( +ok(my $out = new WebPAC::Output::SWISH({ database => 'test', %LOG -), "new"); +}), "new"); + +ok( $out->init, 'init' ); my $ds = { 'Source' => { @@ -24,7 +27,7 @@ my $ds = { 'search' => [ 'tko zna' ] }, 'ID' => { - 'search' => 'id', + 'search' => [ 'id' ], }, 'Array' => { 'search' => [ qw/a1 a2 s3 a4 a5/ ], @@ -34,9 +37,6 @@ my $ds = { }, }; -throws_ok { $out->add( ) } qr/need id/, 'add without params'; -throws_ok { $out->add( 42 ) } qr/need ds/, 'add without ds'; - ok( $out->add( 42, $ds ), 'add 42' ); my @strange = ( qw/èajðinica odma¹æivanje ¾abokreèina ¹uma/ ); @@ -53,7 +53,7 @@ sub test_search { my $swish = SWISH::API->new( $out->index_path ); $swish->abort_last_error if $swish->Error; - my $results = $swish->search( $query_string ); + my $results = $swish->query( $query_string ); my $total_hits = $results->hits; diff --git a/vhost/ecas/config.yml b/vhost/ecas/config.yml index 216db23..e45de32 100644 --- a/vhost/ecas/config.yml +++ b/vhost/ecas/config.yml @@ -1,7 +1,7 @@ # Encoding of this file must be UTF-8! vhost: - databases: 'Zbirke' + inputs: 'Zbirke' no_results: 'nema rezultata' -- 2.20.1