1 package WebPAC::Output::KinoSearch;
6 use base qw/WebPAC::Common/;
8 use KinoSearch::InvIndexer;
9 use KinoSearch::Analysis::PolyAnalyzer;
10 use Encode qw/from_to/;
15 WebPAC::Output::KinoSearch - Create KinoSearch full text index
23 our $VERSION = '0.01';
27 Create full text index using KinoSearch index from data with
36 my $est = new WebPAC::Output::KinoSearch(
37 index_path => '/path/to/invindex',
38 fields => qw/name of all filelds used/,
40 label => 'node label',
41 encoding => 'iso-8859-2',
51 path to KinoSearch index to use
55 name of all fields used in this index
59 name of database from which data comes
63 label for node (optional)
67 character encoding of C<data_structure> if it's differenet than C<ISO-8859-2>
68 (and it probably is). This encoding will be converted to C<UTF-8> for
80 my $log = $self->_get_logger;
82 #$log->debug("self: ", sub { Dumper($self) });
84 foreach my $p (qw/index_path fields database/) {
85 $log->logdie("need $p") unless ($self->{$p});
88 $log->logdie("fields is not ARRAY") unless (ref($self->{fields}) eq 'ARRAY');
90 $self->{encoding} ||= 'ISO-8859-2';
92 $log->info("using index $self->{index_path} with encoding $self->{encoding}");
94 my $analyzer = KinoSearch::Analysis::PolyAnalyzer->new( language => 'en' );
96 $self->{invindex} = KinoSearch::InvIndexer->new(
97 invindex => $self->{index_path},
98 create => $self->{clean},
99 analyzer => $analyzer,
102 foreach my $f (@{ $self->{fields} }) {
103 $self->{invindex}->spec_field(
112 $self ? return $self : return undef;
118 Adds one entry to database.
124 text => 'optional text from which snippet is created',
127 This function will create entries in index using following URI format:
129 C<file:///type/database%20name/000>
131 Each tag in C<data_structure> with specified C<type> will create one
132 attribute and corresponding hidden text (used for search).
141 my $log = $self->_get_logger;
143 my $database = $self->{'database'} || $log->logconfess('no database in $self');
144 $log->logconfess('need invindex in object') unless ($self->{'invindex'});
146 foreach my $p (qw/id ds type/) {
147 $log->logdie("need $p") unless ($args->{$p});
150 my $type = $args->{'type'};
151 my $id = $args->{'id'};
153 my $uri = "file:///$type/$database/$id";
154 $log->debug("creating $uri");
156 my $doc = $self->{invindex}->new_doc( $uri ) || $log->logdie("can't create new_doc( $uri )");
158 sub add_value($$$$$) {
159 my ($self,$log,$doc,$n,$v) = @_;
161 eval { $doc->set_value($n, $self->convert($v) ) };
162 $log->warn("can't insert: $n = $v") if ($@);
165 add_value($self,$log,$doc, 'uri', $uri);
167 $log->debug("ds = ", sub { Dumper($args->{'ds'}) } );
169 # filter all tags which have type defined
171 ref($args->{'ds'}->{$_}) eq 'HASH' && defined( $args->{'ds'}->{$_}->{$type} )
172 } keys %{ $args->{'ds'} };
174 $log->debug("tags = ", join(",", @tags));
176 return unless (@tags);
178 foreach my $tag (@tags) {
180 my $vals = join(" ", @{ $args->{'ds'}->{$tag}->{$type} });
184 $vals = $self->convert( $vals ) or
185 $log->logdie("can't convert '$vals' to UTF-8");
187 add_value($self, $log, $doc, $tag, $vals );
190 if (my $text = $args->{'text'}) {
191 add_value($self, $log, $doc, 'bodytext', $text );
194 #$log->debug("adding ", sub { $doc->dump_draft } );
195 $self->{invindex}->add_doc($doc) || $log->warn("can't add document $uri");
211 $self->_get_logger()->info("finish index writing to disk");
212 $self->{invindex}->finish;
217 my $utf8_string = $self->convert('string in codepage');
224 my $text = shift || return;
225 from_to($text, $self->{encoding}, 'UTF-8');
231 Dobrica Pavlinusic, C<< <dpavlin@rot13.org> >>
233 =head1 COPYRIGHT & LICENSE
235 Copyright 2005 Dobrica Pavlinusic, All Rights Reserved.
237 This program is free software; you can redistribute it and/or modify it
238 under the same terms as Perl itself.
242 1; # End of WebPAC::Output::Estraier