1 package WebPAC::Output::SWISH;
6 use base qw/WebPAC::Common/;
8 use File::Path qw/make_path/;
9 use Data::Dump qw/dump/;
14 WebPAC::Output::SWISH - Create swish-e full text index
18 our $VERSION = '0.01';
22 Create full text index using swish-e indexer from data with
29 my $out = new WebPAC::Output::SWISH(
39 name of database from which data comes
43 Name of database will be used to form URI of documents in index.
47 our $dir = 'var/swish';
54 my $log = $self->_get_logger;
56 #$log->debug("self: ", sub { dump($self) });
58 my $database = $self->{database} || $log->logdie("need database");
60 my $path = "$dir/$database.conf";
62 open(my $conf, '>', $path) || die "can't open $path: $!";
64 print $conf <<"DEFAULT_SWISH_CONF";
65 # swish-e config file for $database
69 # input file definition
73 MetaNames xml swishdocpath
76 #XMLClassAttributes type
77 UndefinedMetaTags auto
78 UndefinedXMLAttributes auto
80 IndexFile $dir/$database
82 # Croatian ISO-8859-2 characters to unaccented equivalents
83 TranslateCharacters ¹©ðÐèÈæƾ® ssddcccczz
92 close($conf) || die "can't write config $path: $!";
94 $self->{_swish_index_path} = "$dir/$database";
96 my $swish = "swish-e -S prog -c $path";
97 open( $self->{_swish_fh}, '-|', $swish ) || die "can't open pipe to $swish: $!";
101 $self ? return $self : return undef;
106 my $path = $out->index_path;
110 sub index_path { $_[0]->{_swish_index_path} };
114 Adds one entry to database.
120 text => 'optional text from which snippet is created',
123 This function will create entries in index using following URI format:
125 C<file:///type/database%20name/000>
127 Each tag in C<data_structure> with specified C<type> will create one
128 attribute and corresponding hidden text (used for search).
132 my %escape = ('<'=>'<', '>'=>'>', '&'=>'&', '"'=>'"');
133 my $escape_re = join '|' => keys %escape;
140 my $log = $self->_get_logger;
142 my $database = $self->{'database'} || $log->logconfess('no database in $self');
143 $log->logconfess('need db in object') unless ($self->{'db'});
145 foreach my $p (qw/id ds type/) {
146 $log->logdie("need $p") unless ($args->{$p});
149 my $type = $args->{'type'};
150 my $id = $args->{'id'};
152 my $uri = "$database/$id";
153 $log->debug("creating $uri");
155 # filter all tags which have type defined
157 ref($args->{'ds'}->{$_}) eq 'HASH' && defined( $args->{'ds'}->{$_}->{$type} )
158 } keys %{ $args->{'ds'} };
160 $log->debug("tags = ", join(",", @tags));
162 return unless (@tags);
166 foreach my $tag (@tags) {
168 my $vals = join(" ", @{ $args->{'ds'}->{$tag}->{$type} });
172 $vals =~ s/($escape_re)/$escape{$1}/gs;
173 $xml .= qq{<$tag><![CDATA[$vals]]></$tag>};
175 $self->{stats}->{attr}->{$tag}++;
178 $xml .= qq{</xml>\n};
180 my $len = length($xml);
182 my $fh = $self->{_swish_fh};
184 print $fh "Path-Name: $uri\nContent-Length: $len\nDocument-Type: XML\n\n$xml";
185 # || die "can't add $uri: $@\n$xml\n";
192 Dump attributes used on disk
198 my $log = $self->_get_logger();
200 my $path = $dir . '/' . $self->{database} . '.yaml';
201 YAML::DumpFile( $path, $self->{stats} );
202 $log->info("created $path ", -s $path, " bytes");
203 $log->debug( dump( $self->{stats} ) );
208 Dobrica Pavlinusic, C<< <dpavlin@rot13.org> >>
210 =head1 COPYRIGHT & LICENSE
212 Copyright 2004-2009 Dobrica Pavlinusic, All Rights Reserved.
214 This program is free software; you can redistribute it and/or modify it
215 under the same terms as Perl itself.