projects
/
webpac
/ blobdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
|
commitdiff
|
tree
raw
|
inline
| side by side
bug fix when transfering fields with multiple values (e.g. checkboxes) to
[webpac]
/
all2xml.pl
diff --git
a/all2xml.pl
b/all2xml.pl
index
0ffee07
..
c3388f1
100755
(executable)
--- a/
all2xml.pl
+++ b/
all2xml.pl
@@
-5,7
+5,6
@@
use OpenIsis;
use Getopt::Std;
use Data::Dumper;
use XML::Simple;
use Getopt::Std;
use Data::Dumper;
use XML::Simple;
-use Text::Unaccent 1.02; # 1.01 won't compile on my platform,
use Text::Iconv;
use Config::IniFiles;
use Encode;
use Text::Iconv;
use Config::IniFiles;
use Encode;
@@
-17,6
+16,7
@@
$|=1;
my $config_file = $0;
$config_file =~ s/\.pl$/.conf/;
my $config_file = $0;
$config_file =~ s/\.pl$/.conf/;
+$config_file = $ARGV[0] if ($ARGV[0] && -f $ARGV[0]);
die "FATAL: can't find configuration file '$config_file'" if (! -e $config_file);
my $config;
die "FATAL: can't find configuration file '$config_file'" if (! -e $config_file);
my $config;
@@
-168,6
+168,8
@@
sub data2xml {
($s,$se,$d,$i) = (0,1,0,0);
} elsif (lc($type) =~ /^lookup/) {
($s,$se,$d,$i,$il) = (0,1,0,0,1);
($s,$se,$d,$i) = (0,1,0,0);
} elsif (lc($type) =~ /^lookup/) {
($s,$se,$d,$i,$il) = (0,1,0,0,1);
+ } elsif ($type) {
+ print STDERR "WARNING: unknown type: $type\n";
}
return ($s,$se,$d,$i,$il);
}
}
return ($s,$se,$d,$i,$il);
}
@@
-525,7
+527,7
@@
sub data2xml {
$swish_data =~ s/ +/ /g;
$swish_data =~ s/ +$//g;
$swish_data =~ s/ +/ /g;
$swish_data =~ s/ +$//g;
- $xml .= xmlify($field."_swish", unac_string($codepage,$swish_data));
+ $xml .= xmlify($field."_swish",
my_
unac_string($codepage,$swish_data));
}
my $swish_exact_data = $cache->{swish_exact_data}->{$field}->[$page];
}
my $swish_exact_data = $cache->{swish_exact_data}->{$field}->[$page];
@@
-535,7
+537,7
@@
sub data2xml {
# add delimiters before and after word.
# That is required to produce exact match
# add delimiters before and after word.
# That is required to produce exact match
- $xml .= xmlify($field."_swish_exact", unac_string($codepage,$swish_exact_data));
+ $xml .= xmlify($field."_swish_exact",
my_
unac_string($codepage,$swish_exact_data));
}
my $idel = $cache->{index_delimiter}->{$field};
}
my $idel = $cache->{index_delimiter}->{$field};
@@
-568,7
+570,7
@@
sub data2xml {
$swish_data =~ s/ +/ /g;
$swish_data =~ s/ +$//g;
$swish_data =~ s/ +/ /g;
$swish_data =~ s/ +$//g;
- $xml .= xmlify($field."_swish", unac_string($codepage,$swish_data));
+ $xml .= xmlify($field."_swish",
my_
unac_string($codepage,$swish_data));
}
if ($swish_exact_data) {
}
if ($swish_exact_data) {
@@
-577,7
+579,7
@@
sub data2xml {
# add delimiters before and after word.
# That is required to produce exact match
# add delimiters before and after word.
# That is required to produce exact match
- $xml .= xmlify($field."_swish_exact", unac_string($codepage,$swish_exact_data));
+ $xml .= xmlify($field."_swish_exact",
my_
unac_string($codepage,$swish_exact_data));
}
}
}
}
}
}
@@
-614,9
+616,18
@@
$index = new index_DBI(
my $show_progress = $cfg_global->val('global', 'show_progress');
my $show_progress = $cfg_global->val('global', 'show_progress');
-my $unac_filter = $cfg_global->val('global', 'unac_filter');
-if ($unac_filter) {
- require $unac_filter;
+my $my_unac_filter = $cfg_global->val('global', 'my_unac_filter');
+if ($my_unac_filter) {
+ print STDERR "using $my_unac_filter to filter characters for search\n";
+ require $my_unac_filter;
+} else {
+ print STDERR "### fallback to default my_unac_string!\n";
+ eval q{
+ sub main::my_unac_string($$) {
+ my ($charset, $string) = (@_);
+ return $string;
+ }
+ };
}
foreach my $database ($cfg->Sections) {
}
foreach my $database ($cfg->Sections) {
@@
-628,6
+639,10
@@
foreach my $database ($cfg->Sections) {
my $lookup_file = $cfg -> val($database, 'lookup_newfile'); # optional
if ($lookup_file) {
#tie %lhash, 'GDBM_File', $lookup_file, &GDBM_NEWDB, 0644;
my $lookup_file = $cfg -> val($database, 'lookup_newfile'); # optional
if ($lookup_file) {
#tie %lhash, 'GDBM_File', $lookup_file, &GDBM_NEWDB, 0644;
+ if (! -e $lookup_file) {
+ open(LOOKUP, "> $lookup_file") || die "can't create $lookup_file': $!";
+ close(LOOKUP);
+ }
tie %lhash, 'TDB_File', $lookup_file, TDB_CLEAR_IF_FIRST, O_RDWR, 0644;
print STDERR "creating lookup file '$lookup_file'\n";
# delete memory cache for lookup file
tie %lhash, 'TDB_File', $lookup_file, TDB_CLEAR_IF_FIRST, O_RDWR, 0644;
print STDERR "creating lookup file '$lookup_file'\n";
# delete memory cache for lookup file
@@
-918,6
+933,10
@@
__END__
all2xml.pl - read various file formats and dump XML for SWISH-E
all2xml.pl - read various file formats and dump XML for SWISH-E
+=head1 SYNOPSYS
+
+ $ all2xml.pl [test.conf]
+
=head1 DESCRIPTION
This command will read ISIS data file using OpenIsis perl module, MARC
=head1 DESCRIPTION
This command will read ISIS data file using OpenIsis perl module, MARC
@@
-926,6
+945,9
@@
create one XML file for usage with I<SWISH-E> indexer. Dispite it's name,
this script B<isn't general xml generator> from isis files (isis allready
has something like that). Output of this script is tailor-made for SWISH-E.
this script B<isn't general xml generator> from isis files (isis allready
has something like that). Output of this script is tailor-made for SWISH-E.
+If no configuration file is specified, it will use default one called
+C<all2xml.conf>.
+
=head1 BUGS
Documentation is really lacking. However, in true Open Source spirit, source
=head1 BUGS
Documentation is really lacking. However, in true Open Source spirit, source