#use GDBM_File;
use Fcntl; # for O_RDWR
use TDB_File;
+use Carp;
$|=1;
'isis' => 'isis',
'excel' => 'column',
'marc' => 'marc',
- 'feed' => 'feed'
+ 'feed' => 'feed',
+ 'dbf' => 'isis', # special case, re-use isis import_xml
);
my $cache; # for cacheing
$cache->{tags_by_order} = \@sorted_tags;
}
+ if (! @sorted_tags) {
+ print STDERR "WARNING: no tags for this type found in import_xml file!\n";
+ }
+
# lookup key
my $lookup_key;
print STDERR "opening lookup file '$lookup_file'\n";
}
-print STDERR "reading ./import_xml/$type.xml\n";
+ my $import_xml_file = "./import_xml/$type.xml";
+
+ if (! -r $import_xml_file) {
+ print STDERR "ERROR: file $import_xml_file not readable skipping!\n";
+ next;
+ }
+
+ print STDERR "reading $import_xml_file\n";
# extract just type basic
my $type_base = $type;
$type_base =~ s/_.+$//g;
- $config=XMLin("./import_xml/$type.xml", ForceArray => [ $type2tag{$type_base}, 'config', 'format' ], ForceContent => 1 );
+ $config=XMLin($import_xml_file, ForceArray => [ $type2tag{$type_base}, 'config', 'format' ], ForceContent => 1 );
# helper for progress bar
sub fmt_time {
}
# close lookup
untie %lhash if (%lhash);
+
+ } elsif ($type_base eq "dbf") {
+
+ my $dbf_file = $cfg -> val($database, 'dbf_file') || die "$database doesn't have 'dbf_file' defined!";
+ my $dbf_codepage = $cfg -> val($database, 'dbf_codepage') || die "$database doesn't have 'dbf_codepage' defined!";
+ my $dbf_mapping = $cfg -> val($database, 'dbf_mapping') || die "$database doesn't have 'dbf_mapping' defined!";
+
+ $import2cp = Text::Iconv->new($dbf_codepage,$codepage);
+ require XBase;
+ my $db = new XBase $dbf_file;
+
+ if (! $db) {
+ print STDERR "ERROR: can't read DBF database: $dbf_file, skipping...\n";
+ next;
+ }
+
+ my $max_rowid = $db->last_record;
+
+ print STDERR "Reading database: $dbf_file [$max_rowid rows]\n";
+
+ my %dbf2iso;
+ foreach my $m (split(/[\n\r]+/,$dbf_mapping)) {
+ my ($col,$fld) = split(/\s+/,$m,2);
+ $dbf2iso{$col} = $fld;
+ }
+
+#print STDERR "## dbf2iso: ",Dumper(\%dbf2iso),"\n## /dbf2iso\n";
+
+ # bad, bad...
+ require "to_hash.pm";
+
+ foreach my $row_id (0 .. $max_rowid) {
+ my $dbf_row = $db->get_record_as_hash($row_id);
+ if ($dbf_row) {
+
+#print STDERR "## dbf_row: ",Dumper($dbf_row),"\n## /dbf_row\n";
+ # apply mapping from config file
+ # all unspecified records will get _ in
+ # front of them - _DELETE will be __DELETE
+ my $rec;
+ map {
+ my $new_fld = $dbf2iso{$_} || '_'.$_;
+ my $data = $dbf_row->{$_};
+ push @{ $rec->{$new_fld} }, $data if ($data && $data !~ /^(?:\s+|\$a\.|)$/);
+ } keys %{$dbf_row};
+#print STDERR "## rec: ",Dumper($rec),"\n## /rec\n";
+ my $row = to_hash($row_id+1, $rec);
+
+ $row->{mfn} = $row_id+1;
+ $row->{record} = $rec;
+
+#print STDERR "## row: ",Dumper($row),"\n## /row\n";
+ progress($row->{mfn}, $max_rowid);
+
+ my $swishpath = $path."#".int($row->{mfn});
+
+ if (my $xml = data2xml($type_base,$row,$add_xml,$cfg,$database)) {
+ $xml = $cp2utf->convert($xml);
+ use bytes; # as opposed to chars
+ print "Path-Name: $swishpath\n";
+ print "Content-Length: ".(length($xml)+1)."\n";
+ print "Document-Type: XML\n\n$xml\n";
+ }
+ }
+ }
+ print STDERR "\n";
}
}
my $i = shift || 0; # isis repeatable number
my $codepage = shift || die "parse_format must be called with codepage!";
if ($type eq "isis") {
- return parse_iso_format($format,$row,$i,$codepage,'isis_sf');
+ return parse_iso_format($format,$row,$i,$codepage,'hash_sf');
} elsif ($type eq "excel") {
return parse_excel_format($format,$row,$i,$codepage);
} elsif ($type eq "marc") {
return parse_iso_format($format,$row,$i,$codepage,'marc_sf');
} elsif ($type eq "feed") {
return parse_feed_format($format,$row,$i,$codepage);
+ } elsif ($type eq "dbf") {
+ return parse_iso_format($format,$row,$i,$codepage,'hash_sf');
+ } else {
+ confess "FATAL: unknown type '$type'";
}
}
--- /dev/null
+# This is slight modification of to_hash as included in Biblio::Isis
+#
+# This is incredibly bad (duplicate code), but it would have to wait for
+# WebPac v2 to be fixed by extracting all file imported in sane OO way.
+
+
+sub to_hash {
+ my $mfn = shift || confess "need mfn!";
+ my $row = shift || confess "need data";
+
+ # init record to include MFN as field 000
+ my $rec = { '000' => [ $mfn ] };
+
+ foreach my $k (keys %{$row}) {
+ foreach my $l (@{$row->{$k}}) {
+
+ my $val;
+
+ # has identifiers?
+ #($val->{'i1'},$val->{'i2'}) = ($1,$2) if ($l =~ s/^([01 #])([01 #])\^/\^/);
+
+ # has subfields?
+ if ($l =~ m/[\^\$]/) {
+ foreach my $t (split(/[\^\$]/,$l)) {
+ next if (! $t);
+ $val->{substr($t,0,1)} = substr($t,1);
+ }
+ } else {
+ $val = $l;
+ }
+
+ push @{$rec->{$k}}, $val;
+ }
+ }
+
+ return $rec;
+}
+
+1;