From d0ecbc408bf94fb0a9dd2ef74ffa5e9aeadad7ec Mon Sep 17 00:00:00 2001 From: Dobrica Pavlinusic Date: Sun, 4 Apr 2004 22:11:13 +0000 Subject: [PATCH] updated branches to HEAD git-svn-id: file:///home/dpavlin/private/svn/webpac/branches/hidra@303 13eb9ef6-21d5-0310-b721-a9d68796d827 --- all2xml.pl | 8 ++++++++ doc/exact_match.txt | 27 +++++++++++++++++++++++++++ parse_format.pm | 2 +- 3 files changed, 36 insertions(+), 1 deletion(-) diff --git a/all2xml.pl b/all2xml.pl index 0ffee07..860560b 100755 --- a/all2xml.pl +++ b/all2xml.pl @@ -17,6 +17,7 @@ $|=1; my $config_file = $0; $config_file =~ s/\.pl$/.conf/; +$config_file = $ARGV[0] if (-f $ARGV[0]); die "FATAL: can't find configuration file '$config_file'" if (! -e $config_file); my $config; @@ -918,6 +919,10 @@ __END__ all2xml.pl - read various file formats and dump XML for SWISH-E +=head1 SYNOPSYS + + $ all2xml.pl [test.conf] + =head1 DESCRIPTION This command will read ISIS data file using OpenIsis perl module, MARC @@ -926,6 +931,9 @@ create one XML file for usage with I indexer. Dispite it's name, this script B from isis files (isis allready has something like that). Output of this script is tailor-made for SWISH-E. +If no configuration file is specified, it will use default one called +C. + =head1 BUGS Documentation is really lacking. However, in true Open Source spirit, source diff --git a/doc/exact_match.txt b/doc/exact_match.txt index d749552..2dbb277 100644 --- a/doc/exact_match.txt +++ b/doc/exact_match.txt @@ -27,3 +27,30 @@ on TitleAndResponsibility, you would use: + +What are bit-masks? + +Bit-mask is usage of one byte (8 bits) as 8 separate bits with it's own +meaning (this is simplification, but bear with me for now). + +So, 1 = 2^0, thus it's bit 1. With analogy, 2=2^1 and 3=2^0+2^1. +So, for 1-3 we use two bits and have: + +number bits +1 01 (just begin bit set) +2 10 (just end bit set) +3 11 (begin and end bit set) + +Thus, with two bits (and values 1-3) we can express should we exact match from +beginning, end or both. For wild-card match, we use additional bit 3 (2^3 = 4) +so we have: + +number bits exact match +1 001 begin +2 010 end +3 011 begin+end +4 100 (not used) +5 101 (4+1) begin+wild-card +6 110 (4+2) end+wild-card +7 111 (4+3) begin+end+wild-card + diff --git a/parse_format.pm b/parse_format.pm index d12a94c..e5b23a6 100644 --- a/parse_format.pm +++ b/parse_format.pm @@ -224,7 +224,7 @@ sub parse_excel_format { } elsif ($format =~ s/^([^A-Z\|]+)(\|[A-Z]{1,2}\|)/$2/) { $prefix .= $1 if ($display); } else { - print STDERR "unparsed format: $format\n"; + #print STDERR "unparsed format: $format\n"; $prefix .= $format; $format = ""; } -- 2.20.1