1 package WebPAC::Output::KinoSearch;
6 use base qw/WebPAC::Common/;
8 use KinoSearch::InvIndexer;
9 use KinoSearch::Analysis::PolyAnalyzer;
10 use Encode qw/from_to/;
15 WebPAC::Output::KinoSearch - Create KinoSearch full text index
23 our $VERSION = '0.01';
27 Create full text index using KinoSearch index from data with
36 my $est = new WebPAC::Output::KinoSearch(
37 index_path => '/path/to/invindex',
38 fields => qw/name of all filelds used/,
40 label => 'node label',
41 encoding => 'iso-8859-2',
51 path to KinoSearch index to use
55 name of all fields used in this index
59 name of database from which data comes
63 label for node (optional)
67 character encoding of C<data_structure> if it's differenet than C<ISO-8859-2>
68 (and it probably is). This encoding will be converted to C<UTF-8> for
80 my $log = $self->_get_logger;
82 #$log->debug("self: ", sub { Dumper($self) });
84 foreach my $p (qw/index_path fields database/) {
85 $log->logdie("need $p") unless ($self->{$p});
88 $log->logdie("fields is not ARRAY") unless (ref($self->{fields}) eq 'ARRAY');
90 $self->{encoding} ||= 'ISO-8859-2';
92 $log->info("using index $self->{index_path} with encoding $self->{encoding}");
94 my $analyzer = KinoSearch::Analysis::PolyAnalyzer->new( language => 'en' );
96 $self->{invindex} = KinoSearch::InvIndexer->new(
97 invindex => $self->{index_path},
98 create => $self->{clean},
99 analyzer => $analyzer,
102 foreach my $f (@{ $self->{fields} }) {
103 $self->{invindex}->spec_field(
112 $self ? return $self : return undef;
118 Adds one entry to database.
124 text => 'optional text from which snippet is created',
127 This function will create entries in index using following URI format:
129 C<file:///type/database%20name/000>
131 Each tag in C<data_structure> with specified C<type> will create one
132 attribute and corresponding hidden text (used for search).
141 my $log = $self->_get_logger;
143 my $database = $self->{'database'} || $log->logconfess('no database in $self');
144 $log->logconfess('need invindex in object') unless ($self->{'invindex'});
146 foreach my $p (qw/id ds type/) {
147 $log->logdie("need $p") unless ($args->{$p});
150 my $type = $args->{'type'};
151 my $id = $args->{'id'};
153 my $uri = "file:///$type/$database/$id";
154 $log->debug("creating $uri");
156 my $doc = $self->{invindex}->new_doc( $uri ) || $log->logdie("can't create new_doc( $uri )");
157 eval { $doc->set_value('uri', $self->convert($uri) ) };
159 $log->debug("ds = ", sub { Dumper($args->{'ds'}) } );
161 # filter all tags which have type defined
163 ref($args->{'ds'}->{$_}) eq 'HASH' && defined( $args->{'ds'}->{$_}->{$type} )
164 } keys %{ $args->{'ds'} };
166 $log->debug("tags = ", join(",", @tags));
168 return unless (@tags);
170 foreach my $tag (@tags) {
172 my $vals = join(" ", @{ $args->{'ds'}->{$tag}->{$type} });
176 $vals = $self->convert( $vals ) or
177 $log->logdie("can't convert '$vals' to UTF-8");
179 eval { $doc->set_value( $tag, $vals ) };
182 my $text = $args->{'text'};
184 $text = $self->convert( $text ) or
185 $log->logdie("can't convert '$text' to UTF-8");
186 eval { $doc->set_value( bodytext => $text ) };
189 #$log->debug("adding ", sub { $doc->dump_draft } );
190 $self->{invindex}->add_doc($doc) || $log->warn("can't add document $uri");
198 my $utf8_string = $self->convert('string in codepage');
205 my $text = shift || return;
206 from_to($text, $self->{encoding}, 'UTF-8');
212 Dobrica Pavlinusic, C<< <dpavlin@rot13.org> >>
214 =head1 COPYRIGHT & LICENSE
216 Copyright 2005 Dobrica Pavlinusic, All Rights Reserved.
218 This program is free software; you can redistribute it and/or modify it
219 under the same terms as Perl itself.
223 1; # End of WebPAC::Output::Estraier