+++ /dev/null
-out.xml
-run.sh
-swish_isis.index*
-foo*
-bar*
-log
ver=`date +%Y%m%d`
dist:
- rcs2log -h rot13.org > ChangeLog
+ svn log -v > ChangeLog
rm -Rf webpac-$(ver)
mkdir webpac-$(ver)
- cvs-files.pl | cpio -pvd webpac-$(ver)/
+ svn ls -R | cpio -pvd webpac-$(ver)/
tar cfvz ../webpac-$(ver).tar.gz webpac-$(ver)/
rm -Rf webpac-$(ver)
my $ON_PAGE =$cfg_global->val('webpac', 'on_page') || 10;
my $MIN_WILDCARD =$cfg_global->val('webpac', 'min_wildcard') || 1;
my $TEMPLATE =$cfg_global->val('webpac', 'template');
-my $UNAC_FILTER =$cfg_global->val('global', 'unac_filter');
+my $UNAC_FILTER =$cfg_global->val('global', 'my_unac_filter');
my $BASE_PATH =$cfg_global->val('webpac', 'base_path');
# for pager
my $pages_per_set = $cfg_global->val('webpac', 'pages_per_set') || 10;
+Text::Iconv->raise_error(0); # Conversion errors raise exceptions
+
+my $from_utf8 = Text::Iconv->new('UTF8', $CHARSET);
if ($UNAC_FILTER) {
require $UNAC_FILTER;
+} else {
+ sub WebPac::my_unac_string {
+ my ($charset, $string) = (@_);
+ return $string;
+ }
}
-Text::Iconv->raise_error(0); # Conversion errors raise exceptions
-
-my $from_utf8 = Text::Iconv->new('UTF8', $CHARSET);
-
# use path from cgi script to support templates in subdirs
sub url_ex {
my $q = shift || die "suff2file needs CGI object!";
while (my $search = shift @param_vals) {
my $s;
# remove accents
- $search = unac_string($CHARSET,$search);
+ $search = my_unac_string($CHARSET,$search);
while ($search =~ s/\s*("[^"]+")\s*/ /) {
$s .= "$1 ";
}
use Getopt::Std;
use Data::Dumper;
use XML::Simple;
-use Text::Unaccent 1.02; # 1.01 won't compile on my platform,
use Text::Iconv;
use Config::IniFiles;
use Encode;
my $config_file = $0;
$config_file =~ s/\.pl$/.conf/;
-$config_file = $ARGV[0] if (-f $ARGV[0]);
+$config_file = $ARGV[0] if ($ARGV[0] && -f $ARGV[0]);
die "FATAL: can't find configuration file '$config_file'" if (! -e $config_file);
my $config;
$swish_data =~ s/ +/ /g;
$swish_data =~ s/ +$//g;
- $xml .= xmlify($field."_swish", unac_string($codepage,$swish_data));
+ $xml .= xmlify($field."_swish", my_unac_string($codepage,$swish_data));
}
my $swish_exact_data = $cache->{swish_exact_data}->{$field}->[$page];
# add delimiters before and after word.
# That is required to produce exact match
- $xml .= xmlify($field."_swish_exact", unac_string($codepage,$swish_exact_data));
+ $xml .= xmlify($field."_swish_exact", my_unac_string($codepage,$swish_exact_data));
}
my $idel = $cache->{index_delimiter}->{$field};
$swish_data =~ s/ +/ /g;
$swish_data =~ s/ +$//g;
- $xml .= xmlify($field."_swish", unac_string($codepage,$swish_data));
+ $xml .= xmlify($field."_swish", my_unac_string($codepage,$swish_data));
}
if ($swish_exact_data) {
# add delimiters before and after word.
# That is required to produce exact match
- $xml .= xmlify($field."_swish_exact", unac_string($codepage,$swish_exact_data));
+ $xml .= xmlify($field."_swish_exact", my_unac_string($codepage,$swish_exact_data));
}
}
}
my $show_progress = $cfg_global->val('global', 'show_progress');
-my $unac_filter = $cfg_global->val('global', 'unac_filter');
-if ($unac_filter) {
- require $unac_filter;
+my $my_unac_filter = $cfg_global->val('global', 'my_unac_filter');
+if ($my_unac_filter) {
+ print STDERR "using $my_unac_filter to filter characters for search\n";
+ require $my_unac_filter;
+} else {
+ print STDERR "### fallback to default my_unac_string!\n";
+ eval q{
+ sub main::my_unac_string($$) {
+ my ($charset, $string) = (@_);
+ return $string;
+ }
+ };
}
foreach my $database ($cfg->Sections) {
my $out = "";
foreach (@_) {
# tr/^~]}\|[{@`/ÈèÆæÐ𩹮¾/; # B1.002:1982
- tr/^\~]}\|[{@\`/ÈèÆæðй©¾®/; # Crolist croascii
+ tr/^~]}\\|[{@`/ÈèÆæðЩ¹®¾/; # Crolist croascii
# Crolist alternative encoding
s/ÏC/È/g;
s/Ïc/è/g;
+++ /dev/null
-# Alternative implementation for unac_string which supports charasters in
-# Croatian language which isn't really accented (ð) but needs to be coverted
-# to unaccented equivalent (d)
-
-sub unac_string($$) {
- my $charset = shift || return;
- my $string = shift || return;
-# $string = Text::Unaccent::unac_string($charset,$string);
-# $string =~ tr/ðÐ/dD/;
- $string =~ tr/èæ¾¹ðÈÆ®©Ð/cczsdCCZSD/;
- return $string;
-}
-
-1;
# display progress bar indicator (default is no)
show_progress=1
- # optional alternative Text::Unaccent filter
- unac_filter = /data/webpac-hidra/filter/unac_string_croatian.pm
+ # Filter characters before feeding them to swish. If you don't use
+ # this file, implementation will fall-back to passing through
+ # original charset, and if you have anything other than plain
+ # 7-bit ascii in your data, your words will end-up splitted in
+ # index on 8-bit characters and you won't be able to find them!
+ my_unac_filter = /data/webpac/my_unac_string.pm
[webpac]
# path to template html files
--- /dev/null
+# Alternative implementation for unac_string which supports charasters in
+# Croatian language which isn't really accented (ð) but needs to be coverted
+# to unaccented equivalent (d)
+
+use Text::Unaccent 1.02; # 1.01 won't compile on my platform,
+
+sub my_unac_string($$) {
+ my $charset = shift || return;
+ my $string = shift || return;
+ $string = unac_string($charset,$string);
+ $string =~ tr/ðÐ/dD/;
+ return $string;
+}
+
+1;