updated branches to head

author Dobrica Pavlinusic <dpavlin@rot13.org>

Mon, 28 Feb 2005 10:43:38 +0000 (10:43 +0000)

committer Dobrica Pavlinusic <dpavlin@rot13.org>

Mon, 28 Feb 2005 10:43:38 +0000 (10:43 +0000)
author Dobrica Pavlinusic <dpavlin@rot13.org>
Mon, 28 Feb 2005 10:43:38 +0000 (10:43 +0000)
committer Dobrica Pavlinusic <dpavlin@rot13.org>
Mon, 28 Feb 2005 10:43:38 +0000 (10:43 +0000)
diff --git a/WebPac.pm b/WebPac.pm

index 8c2e88e..4af84d8 100644 (file)
--- a/WebPac.pm
+++ b/WebPac.pm
@@ -182,14 +182,17 @@ sub make_pager_vars {
         my $tmpl = shift @_;
         my @persist_vars = @_;
         my $hidden_vars = '';
+       my $hidden_search = '';
         foreach my $v (@persist_vars) {
                 foreach my $val ($q->param($v)) {
                         next if (! $val || $val eq '');
                         $hidden_vars .= '<input type="hidden" name="'.$v.'" value="'.$val.'"/>'."\n";
+                       $hidden_search .= '<input type="hidden" name="'.$v.'" value="'.$val.'"/>'."\n" if ($v ne "rm");
                 }
         }
  
         $tmpl->param('PAGER_HIDDEN', $hidden_vars);
+       $tmpl->param('SEARCH_HIDDEN', $hidden_search);
         $tmpl->param('PAGER_JAVASCRIPT', qq#
  <SCRIPT LANGUAGE="Javascript">
  <!-- Begin
@@ -224,6 +227,12 @@ sub show_results_list {
  
         my $q = $self->query();
  
+       # submit was reset?
+       if ($q->param('reset')) {
+               $q->delete_all;
+               return $self->show_search_form();
+       }
+
         # load template for this page
  
         my @s_arr;      # all queries are located here
@@ -264,6 +273,8 @@ sub show_results_list {
                 push @url_params_persist,"f$i=".$q->url_param("f$i") if ($persist);
  
                 foreach my $v ($q->url_param("v$i")) {
+                       # escape quotes so that phrase search work
+                       $v =~ s/"/%22/g;
                         push @url_params,"v$i=$v";
                         push @url_params_persist,"v$i=$v" if ($persist);
                 }
diff --git a/all2xml.pl b/all2xml.pl

index 1c5123b..c2d258c 100755 (executable)
--- a/all2xml.pl
+++ b/all2xml.pl
@@ -11,6 +11,7 @@ use Encode;
  #use GDBM_File;
  use Fcntl;     # for O_RDWR
  use TDB_File;
+use Carp;
  
  $|=1;
  
@@ -62,7 +63,7 @@ my %type2tag = (
         'isis' => 'isis',
         'excel' => 'column',
         'marc' => 'marc',
-       'feed' => 'feed'
+       'feed' => 'feed',
  );
  
  my $cache;     # for cacheing
@@ -112,6 +113,10 @@ sub data2xml {
                 $cache->{tags_by_order} = \@sorted_tags;
         }
  
+       if (! @sorted_tags) {
+               print STDERR "WARNING: no tags for this type found in import_xml file!\n";
+       }
+
         # lookup key
         my $lookup_key;
  
@@ -220,7 +225,7 @@ sub data2xml {
  
                 my ($swish,$display);
  
-               my $tag = $type2tag{$type} || die "can't find which tag to use for type $type";
+               my $tag = $cfg->val($database, 'import_xml_tag') || $type2tag{$type} || die "can't find which tag to use for type $type";
  
                 # is this field page-by-page?
                 my $iterate_by_page = $config->{indexer}->{$field}->{iterate_by_page};
@@ -662,13 +667,22 @@ foreach my $database ($cfg->Sections) {
                 print STDERR "opening lookup file '$lookup_file'\n";
         }
  
-print STDERR "reading ./import_xml/$type.xml\n";
+       my $import_xml_type = $cfg->val($database, 'import_xml_file') || $type;
+       my $import_xml_file = "./import_xml/$import_xml_type.xml";
+
+       if (! -r $import_xml_file) {
+               print STDERR "ERROR: file $import_xml_file not readable skipping!\n";
+               next;
+       }
+
+       print STDERR "reading $import_xml_file\n";
  
         # extract just type basic
         my $type_base = $type;
         $type_base =~ s/_.+$//g;
  
-       $config=XMLin("./import_xml/$type.xml", ForceArray => [ $type2tag{$type_base}, 'config', 'format' ], ForceContent => 1 );
+       my $tag = $cfg->val($database, 'import_xml_tag') || $type2tag{$type_base} || die "can't find which tag to use for type $type";
+       $config=XMLin($import_xml_file, ForceArray => [ $tag, 'config', 'format' ], ForceContent => 1 );
  
         # helper for progress bar
         sub fmt_time {
@@ -730,7 +744,6 @@ print STDERR "reading ./import_xml/$type.xml\n";
         }
  
         # now read database
-print STDERR "using: $type...\n";
  
         # erase cache for tags by order in this database
         delete $cache->{tags_by_order};
@@ -927,6 +940,72 @@ print STDERR "using: $type...\n";
                 }
                 # close lookup
                 untie %lhash if (%lhash);
+
+       } elsif ($type_base eq "dbf") {
+
+               my $dbf_file = $cfg -> val($database, 'dbf_file') || die "$database doesn't have 'dbf_file' defined!";
+               my $dbf_codepage = $cfg -> val($database, 'dbf_codepage') || die "$database doesn't have 'dbf_codepage' defined!";
+               my $dbf_mapping = $cfg -> val($database, 'dbf_mapping') || die "$database doesn't have 'dbf_mapping' defined!";
+
+               $import2cp = Text::Iconv->new($dbf_codepage,$codepage);
+               require XBase;
+               my $db = new XBase $dbf_file;
+
+               if (! $db) {
+                       print STDERR "ERROR: can't read DBF database: $dbf_file, skipping...\n";
+                       next;
+               }
+
+               my $max_rowid = $db->last_record;
+
+               print STDERR "Reading database: $dbf_file [$max_rowid rows]\n";
+
+               my %dbf2iso;
+               foreach my $m (split(/[\n\r]+/,$dbf_mapping)) {
+                       my ($col,$fld) = split(/\s+/,$m,2);
+                       $dbf2iso{$col} = $fld;
+               }
+
+#print STDERR "## dbf2iso: ",Dumper(\%dbf2iso),"\n## /dbf2iso\n";
+
+               # bad, bad...
+               require "to_hash.pm";
+
+               foreach my $row_id (0 .. $max_rowid) {
+                       my $dbf_row = $db->get_record_as_hash($row_id);
+                       if ($dbf_row) {
+
+#print STDERR "## dbf_row: ",Dumper($dbf_row),"\n## /dbf_row\n";
+                               # apply mapping from config file
+                               # all unspecified records will get _ in
+                               # front of them - _DELETE will be __DELETE
+                               my $rec;
+                               map { 
+                                       my $new_fld = $dbf2iso{$_} || '_'.$_;
+                                       my $data = $dbf_row->{$_};
+                                       push @{ $rec->{$new_fld} }, $data if ($data && $data !~ /^(?:\s+|\$a\.|)$/);
+                               } keys %{$dbf_row};
+#print STDERR "## rec: ",Dumper($rec),"\n## /rec\n";
+                               my $row = to_hash($row_id+1, $rec);
+
+                               $row->{mfn} = $row_id+1;
+                               $row->{record} = $rec;
+
+#print STDERR "## row: ",Dumper($row),"\n## /row\n";
+                               progress($row->{mfn}, $max_rowid);
+
+                               my $swishpath = $path."#".int($row->{mfn});
+
+                               if (my $xml = data2xml($type_base,$row,$add_xml,$cfg,$database)) {
+                                       $xml = $cp2utf->convert($xml);
+                                       use bytes;      # as opposed to chars
+                                       print "Path-Name: $swishpath\n";
+                                       print "Content-Length: ".(length($xml)+1)."\n";
+                                       print "Document-Type: XML\n\n$xml\n";
+                               }
+                       }
+               }
+               print STDERR "\n";
         }
  }
  
diff --git a/doc/dbf_import.pod b/doc/dbf_import.pod

new file mode 100644 (file)

index 0000000..2cac05d
--- /dev/null
+++ b/doc/dbf_import.pod
@@ -0,0 +1,69 @@
+=head1 Import DBF files into WebPAC
+
+dBase file format C<.dbf> is supported using C<XBase> perl module.
+Configuration in C<all2xml.conf> for this type looks like this:
+
+  [hda]
+       dbf_file=/data/drustvene/hda/ISO.DBF
+       type=dbf
+       import_xml_file=isis
+       import_xml_tag=isis
+       dbf_codepage=cp852
+       dbf_mapping=<<_END_OF_MAP_
+  ID_BROJ              mfn
+  ISBN_BROJ    010
+  SKUPINA1     200
+  SKUPINA2     205
+  SKUPINA4     210
+  SKUPINA5     215
+  SKUPINA6     225
+  SKUPINA7     300
+  ANOTACIJA    330
+  PREDMET1     610
+  PREDMET2     610
+  PREDMET3     510
+  UDK          675
+  REDALICA     700
+  SIGNATURA    990
+  _END_OF_MAP_
+
+Options are following:
+
+=over 4
+
+=item dbf_file
+
+Full path to C<.dbf> file that you want to import.
+
+=item type
+
+It should be C<dbf> for dBase files.
+
+=item import_xml_file
+
+If you want to re-use exising import_xml file, you can use this option
+to specify C<import_xml/B<import_xml_file>.xml> file.
+
+If not used, you will have to create C<import_xml/dbf.xml> file.
+
+=item import_xml_tag
+
+If using C<import_xml_file> you will like to specify which tag to use
+for C<dbf> data using this option.
+
+=item dbf_codepage
+
+Override codepage in C<import_xml_file> for this C<.dbf> database.
+
+=item dbf_mapping
+
+Specify mapping from C<.dbf> fields to ISO fields. Each field is assumed to
+have MARC/ISO subfields in form C<^a> or C<$a> and so on.
+
+Mapping must be specified using text here notation in config file, and must
+have two fields: original name of column from C<.dbf> file and new field name
+which is used in C<import_xml>.
+
+=back
+
+=cut
diff --git a/hash_sf.pm b/hash_sf.pm

new file mode 100644 (file)

index 0000000..8af05d3
--- /dev/null
+++ b/hash_sf.pm
@@ -0,0 +1,28 @@
+#
+# hash_sf($row_data,'field'[,'subfield'])
+#
+# e.g. hash_sf($row,'700','a')
+#
+sub hash_sf {
+       my $row = shift @_;
+       my $field = shift @_;
+       my $subfield = shift @_;
+
+       my $i = shift @_ || 0;
+
+       my $out;
+
+       if ($row->{$field}->[$i]) {
+               if (! $subfield) {
+                       # subfield list undef, empty or no defined subfields for this record
+                       my $all_sf = $row->{record}->{$field}->[$i] || confess "can't find field $field:$i",Dumper($row);
+                       $all_sf =~ s/[\^\$]./ /g;   # nuke definitions
+                       return $all_sf; 
+               }
+               my $sf = $row->{$field}->[$i]->{$subfield};
+               return $sf if ($sf);
+       }
+}
+
+1;
+
diff --git a/isis_sf.pm b/isis_sf.pm

deleted file mode 100644 (file)

index f118318..0000000
--- a/isis_sf.pm
+++ /dev/null
@@ -1,28 +0,0 @@
-#
-# isis_sf($isis_row,'isis_field'[,'subfield'])
-#
-# e.g. isis_sf($row,'700','a')
-#
-sub isis_sf {
-       my $row = shift @_;
-       my $isis_id = shift @_;
-       my $subfield = shift @_;
-
-       my $i = shift @_ || 0;
-
-       my $out;
-
-       if ($row->{$isis_id}->[$i]) {
-               if (! $subfield) {
-                       # subfield list undef, empty or no defined subfields for this record
-                       my $all_sf = $row->{record}->{$isis_id}->[$i];
-                       $all_sf =~ s/\^./ /g;   # nuke definitions
-                       return $all_sf; 
-               }
-               my $sf = $row->{$isis_id}->[$i]->{$subfield};
-               return $sf if ($sf);
-       }
-}
-
-1;
-
diff --git a/parse_format.pm b/parse_format.pm

index e348dde..12f7d96 100644 (file)
--- a/parse_format.pm
+++ b/parse_format.pm
@@ -10,13 +10,17 @@ sub parse_format {
         my $i = shift || 0;     # isis repeatable number
         my $codepage = shift || die "parse_format must be called with codepage!";
         if ($type eq "isis") {
-               return parse_iso_format($format,$row,$i,$codepage,'isis_sf');
+               return parse_iso_format($format,$row,$i,$codepage,'hash_sf');
         } elsif ($type eq "excel") {
                 return parse_excel_format($format,$row,$i,$codepage);
         } elsif ($type eq "marc") {
                 return parse_iso_format($format,$row,$i,$codepage,'marc_sf');
         } elsif ($type eq "feed") {
                 return parse_feed_format($format,$row,$i,$codepage);
+       } elsif ($type eq "dbf") {
+               return parse_iso_format($format,$row,$i,$codepage,'hash_sf');
+       } else {
+               confess "FATAL: unknown type '$type'";
         }
  }
  
diff --git a/template_html/results.html b/template_html/results.html

index fea805e..9647462 100644 (file)
--- a/template_html/results.html
+++ b/template_html/results.html
@@ -2,6 +2,7 @@
  
  <form method=post>
      <input type=submit value="Natrag na pretra¾ivanje" class="navigacijaDISABLED">
+    <TMPL_VAR NAME="SEARCH_HIDDEN">
  
  <!--
      <input type=submit value="Prija¹nja pretra¾ivanja"  class="navigacija">
diff --git a/template_html/search.html b/template_html/search.html

index 7b298cd..2a22664 100644 (file)
--- a/template_html/search.html
+++ b/template_html/search.html
@@ -136,7 +136,7 @@ na adesu <a href="mailto:knjiznice@ffzg.hr">knjiznice@ffzg.hr</a>
            <input type="submit" value="Pretra¾i !" class="submit" name="submit">
            </td>
          <td valign="bottom">
-               <br><input type="reset" value="Vrati poèetne vrijednosti !" name="reset" class="reset"></td>
+               <br><input type="submit" value="Vrati poèetne vrijednosti !" name="reset" class="reset"></td>
        </tr>
  </table>
  
@@ -287,7 +287,7 @@ na adesu <a href="mailto:knjiznice@ffzg.hr">knjiznice@ffzg.hr</a>
  <br>&nbsp;
            <div align="center">
                 <input type="submit" value="Pretra¾i !" class="submit">
-               <input type="reset" value="Vrati poèetne vrijednosti !" name="reset" class="reset">
+               <input type="submit" value="Vrati poèetne vrijednosti !" name="reset" class="reset">
         </div>
  
         
diff --git a/to_hash.pm b/to_hash.pm

new file mode 100644 (file)

index 0000000..a2813b1
--- /dev/null
+++ b/to_hash.pm
@@ -0,0 +1,39 @@
+# This is slight modification of to_hash as included in Biblio::Isis
+#
+# This is incredibly bad (duplicate code), but it would have to wait for
+# WebPac v2 to be fixed by extracting all file imported in sane OO way.
+
+
+sub to_hash {
+       my $mfn = shift || confess "need mfn!";
+       my $row = shift || confess "need data";
+
+       # init record to include MFN as field 000
+       my $rec = { '000' => [ $mfn ] };
+
+       foreach my $k (keys %{$row}) {
+               foreach my $l (@{$row->{$k}}) {
+
+                       my $val;
+
+                       # has identifiers?
+                       #($val->{'i1'},$val->{'i2'}) = ($1,$2) if ($l =~ s/^([01 #])([01 #])\^/\^/);
+
+                       # has subfields?
+                       if ($l =~ m/[\^\$]/) {
+                               foreach my $t (split(/[\^\$]/,$l)) {
+                                       next if (! $t);
+                                       $val->{substr($t,0,1)} = substr($t,1);
+                               }
+                       } else {
+                               $val = $l;
+                       }
+
+                       push @{$rec->{$k}}, $val;
+               }
+       }
+
+       return $rec;
+}
+
+1;
author	Dobrica Pavlinusic <dpavlin@rot13.org>
	Mon, 28 Feb 2005 10:43:38 +0000 (10:43 +0000)
committer	Dobrica Pavlinusic <dpavlin@rot13.org>
	Mon, 28 Feb 2005 10:43:38 +0000 (10:43 +0000)
WebPac.pm		patch \| blob \| history
all2xml.pl		patch \| blob \| history
doc/dbf_import.pod	[new file with mode: 0644]	patch \| blob
hash_sf.pm	[new file with mode: 0644]	patch \| blob
isis_sf.pm	[deleted file]	patch \| blob \| history
parse_format.pm		patch \| blob \| history
template_html/results.html		patch \| blob \| history
template_html/search.html		patch \| blob \| history
to_hash.pm	[new file with mode: 0644]	patch \| blob