1 package WebPAC::Output::KinoSearch;
6 use base qw/WebPAC::Common/;
8 use KinoSearch::InvIndexer;
9 use KinoSearch::Analysis::PolyAnalyzer;
10 use Encode qw/from_to/;
16 WebPAC::Output::KinoSearch - Create KinoSearch full text index
24 our $VERSION = '0.03';
28 Create full text index using KinoSearch index from data with
37 my $est = new WebPAC::Output::KinoSearch(
38 index_path => '/path/to/invindex',
39 fields => qw/name of all filelds used/,
41 label => 'node label',
42 encoding => 'iso-8859-2',
52 path to KinoSearch index to use
56 name of all fields used in this index
60 name of database from which data comes
64 label for node (optional)
68 character encoding of C<data_structure> if it's differenet than C<ISO-8859-2>
69 (and it probably is). This encoding will be converted to C<UTF-8> for
81 my $log = $self->_get_logger;
83 #$log->debug("self: ", sub { Dumper($self) });
85 foreach my $p (qw/index_path fields database/) {
86 $log->logdie("need $p") unless ($self->{$p});
89 $log->logdie("fields is not ARRAY") unless (ref($self->{fields}) eq 'ARRAY');
91 $self->{encoding} ||= 'ISO-8859-2';
93 $self->{clean} = 1 if (! -e $self->{index_path} . '/segments');
95 $log->info("using", $self->{clean} ? ' new' : '', " index $self->{index_path} with encoding $self->{encoding}");
97 my $analyzer = KinoSearch::Analysis::PolyAnalyzer->new( language => 'en' );
99 $self->{invindex} = KinoSearch::InvIndexer->new(
100 invindex => $self->{index_path},
101 create => $self->{clean},
102 analyzer => $analyzer,
105 my $fields_path = $self->{index_path} . '/fields.storable';
106 $fields_path =~ s#//#/#g;
107 if (-e $fields_path) {
108 $self->{fields} = retrieve($fields_path) ||
109 $log->warn("can't open $fields_path: $!");
111 $log->error("This will be dummy run since no fields statistics are found!");
112 $log->error("You will have to re-run indexing to get search results!");
113 $self->{dummy_run} = 1;
115 $self->{fields_path} = $fields_path;
117 foreach my $f (@{ $self->{fields} }) {
118 $self->{invindex}->spec_field(
127 $self ? return $self : return undef;
133 Adds one entry to database.
139 text => 'optional text from which snippet is created',
142 This function will create entries in index using following URI format:
144 C<file:///type/database%20name/000>
146 Each tag in C<data_structure> with specified C<type> will create one
147 attribute and corresponding hidden text (used for search).
156 my $log = $self->_get_logger;
158 my $database = $self->{'database'} || $log->logconfess('no database in $self');
159 $log->logconfess('need invindex in object') unless ($self->{'invindex'});
161 foreach my $p (qw/id ds type/) {
162 $log->logdie("need $p") unless ($args->{$p});
165 my $type = $args->{'type'};
166 my $id = $args->{'id'};
168 my $uri = "file:///$type/$database/$id";
169 $log->debug("creating $uri");
171 my $doc = $self->{invindex}->new_doc( $uri ) || $log->logdie("can't create new_doc( $uri )");
173 sub add_value($$$$$) {
174 my ($self,$log,$doc,$n,$v) = @_;
177 $self->{value_usage}->{$n}++;
178 return if ($self->{dummy_run});
180 eval { $doc->set_value($n, $self->convert($v) ) };
181 $log->warn("can't insert: $n = $v") if ($@);
184 add_value($self,$log,$doc, 'uri', $uri);
186 $log->debug("ds = ", sub { Dumper($args->{'ds'}) } );
188 # filter all tags which have type defined
190 ref($args->{'ds'}->{$_}) eq 'HASH' && defined( $args->{'ds'}->{$_}->{$type} )
191 } keys %{ $args->{'ds'} };
193 $log->debug("tags = ", join(",", @tags));
195 return unless (@tags);
197 foreach my $tag (@tags) {
199 my $vals = join(" ", @{ $args->{'ds'}->{$tag}->{$type} });
203 $vals = $self->convert( $vals ) or
204 $log->logdie("can't convert '$vals' to UTF-8");
206 add_value($self, $log, $doc, $tag, $vals );
209 if (my $text = $args->{'text'}) {
210 add_value($self, $log, $doc, 'bodytext', $text );
213 #$log->debug("adding ", sub { $doc->dump_draft } );
214 $self->{invindex}->add_doc($doc) || $log->warn("can't add document $uri");
230 my $log = $self->_get_logger();
232 $log->info("finish index writing to disk");
233 $self->{invindex}->finish;
235 $log->info("writing value usage file");
237 # add fields from last run
238 map { $self->{value_usage}->{$_}++ } @{ $self->{fields} };
240 my @fields = keys %{ $self->{value_usage} };
241 store \@fields, $self->{fields_path} ||
242 $log->warn("can't write $self->{fields_path}: $!");
248 my $utf8_string = $self->convert('string in codepage');
255 my $text = shift || return;
256 from_to($text, $self->{encoding}, 'UTF-8');
262 Dobrica Pavlinusic, C<< <dpavlin@rot13.org> >>
264 =head1 COPYRIGHT & LICENSE
266 Copyright 2005 Dobrica Pavlinusic, All Rights Reserved.
268 This program is free software; you can redistribute it and/or modify it
269 under the same terms as Perl itself.
273 1; # End of WebPAC::Output::Estraier