#!/usr/bin/perl -w
-# this utility will convert some (or all, defined by regex)
-# fields into marc file from one or more CDS/ISIS files
+# This utility will convert some (or all, depending of definition in
+# configuration XMLfile) fields and subfields with remapping into MARC
+# file from one or more CDS/ISIS files
#
# 2004-02-23 Dobrica Pavlinusic <dpavlin@rot13.org>
#
-# if ISIS databases are named same as directories in which they
+#
+# Run without parametars for usage instructions or run without parametars
+# and redirect STDOUT to file to create example configuration file like
+# this:
+#
+# ./isis2marc.pl > config.xml
+#
+# If you want to create unique records, you need to define one or more
+# fields as key (which will be used to produce just one record for one
+# key)
+#
+# Keys are global for one run of script (that means for all ISIS databases
+# used in one run), but you can write arbitrary values (as opposed to field
+# names) inside key tag to produce unique key. For example,
+#
+# <key>author</key>
+# <key>700$a</key>
+#
+# WARNING: When using <key> tag you can enter field with subfield
+# (in format 700$a) just filed name (for fields which doesn't have subfileds
+# like 005) or literal value. Fields which doesn't exist in that record
+# will be skipped, and if key is empty no output record will be produced.
+#
+# So, best way to produce just few record in output is to specify field which
+# doesn't exist at all in ISIS database for key, or just one literal value!!
+#
+#
+# If ISIS databases are named same as directories in which they
# reside, you can specify just directories (so that shell globing work)
# like this:
#
-# ./isis2marc.pl all.marc /mnt2/*/LIBRI
+# ./isis2marc.pl config.xml all.marc /mnt2/*/LIBRI
+#
use strict;
use OpenIsis;
use MARC;
+use XML::Simple;
use Data::Dumper;
-# to select all fields use something like /./
-my $field_filter = '^700$';
+if ($#ARGV < 2) {
+ print STDERR "Usage: $0 config.xml marc_file.iso isis_db [isis_db ...|isis_dir]\n";
+ print STDERR <<'_END_OF_USAGE_';
+
+ isis_db can be path to directory (if ISIS database is called
+ same as database) which will make shell globing work
+ or full path to ISIS database (without any extension)
+
+ Example configuration file will be dumped to standard output
+ after this, so you can just re-direct output of this script
+ to produce config file like this:
+
+ $ ./isis2marc.pl > config.xml
+
+_END_OF_USAGE_
+
+ print <<'_END_OF_CONFIG_';
+
+<?xml version="1.0" encoding="ISO-8859-2"?>
+<!-- template configuration file -->
+<mapping>
+ <record>
+ <key>700$a</key>
+ <key>700$b</key>
+ <field tag="700">
+ <indicator1>0</indicator1>
+ <indicator2>#</indicator2>
+ <subfield id="a">700$a</subfield>
+ <subfield id="b">700$b</subfield>
+ </field>
+ <field tag="009">
+ <nosubfield>900</nosubfield>
+ </field>
+ </record>
+
+</mapping>
+
+_END_OF_CONFIG_
+
+ exit 1;
+}
+
+my $xml = new XML::Simple();
-my $marc_file = shift @ARGV || die "Usage: $0 [MARC file] [ISIS db]...";
+my $config_file = shift @ARGV || die "no config file?";
+
+my $config = $xml->XMLin($config_file,
+ KeyAttr => { subfield => 'id' },
+ ForceArray => [ 'record', 'field', 'subfield', 'nosubfield' ],
+ ContentKey => '-content',
+ ) || die "can't open configuration file '$config_file': $!";
+
+my $marc_file = shift @ARGV || die "no marc file?";
my $marc=MARC->new;
my %stored;
my $total = 0;
+
foreach my $db_file (@ARGV) {
print "reading '$db_file'";
for (my $mfn = 1; $mfn <= $maxmfn; $mfn++) {
print "." if ($mfn % $step == 0);
my $row = OpenIsis::read( $db, $mfn );
- foreach my $fld (keys %{$row}) {
- next if ($fld !~ m/$field_filter/);
- my @values;
- my $num;
+ # unroll this field to in-memory structure data
+ my %data;
- foreach my $sf (@{$row->{$fld}}) {
+ # delete mfn from $row because it's literal value and
+ # not array, so rest of code would croak
+ delete($row->{mfn});
- $stored{$sf}++;
+ foreach my $fld (keys %{$row}) {
- next if ($stored{$sf} > 1);
+ foreach my $rec_data (@{$row->{$fld}}) {
- my %v;
- while ($sf =~ s/\^(\w)([^\^]+)//) {
- $v{$1} = $2;
+ while ($rec_data =~ s/\^(\w)([^\^]+)//) {
+ $data{$fld.'$'.$1} = $2;
# delete last subfield delimiter
- $sf = "" if ($sf =~ /\^\w*$/);
+ $rec_data = "" if ($rec_data =~ /(\^\w*$|\^\w\s*$)/);
}
- if (%v) {
- push @values, %v;
- } elsif ($sf && $sf !~ /^(\^\w)*\s*$/) {
- # regex above remove empty subfields
- push @values, $sf;
+
+ # record data still exist? it's field without
+ # subfields, then...
+ if ($rec_data) {
+ $data{$fld} = $rec_data;
}
+ }
+ }
+ # now, create output MARC record(s)
+
+ foreach my $cfg_rec (@{$config->{record}}) {
+
+ # do we have unique key?
+ my $key;
+ foreach (@{$cfg_rec->{key}}) {
+ if ($data{$_}) {
+ $key .= $data{$_};
+ } elsif (! m/^\d{3,4}(\$\w)*$/) {
+ $key .= $_;
+ } else {
+ $key .= "";
+ }
+ }
+
+ next if ($key && $stored{$key} || $key eq "");
+
+ $stored{$key}++ if ($key);
+
+
+ # this will be new record (if needed)
+ my $num;
+
+ # with one or more fields
+ foreach my $cfg_fld (@{$cfg_rec->{field}}) {
+
+ my $new_fld = $cfg_fld->{tag};
+
+ #
+ # first create fields without subfields
+ #
+
+ # with one or more subfields
+ foreach my $f (@{$cfg_fld->{nosubfield}}) {
+ next if (! $data{$f});
+
+ if (! $num) {
+ $num=$marc->createrecord();
+ $new++;
+ }
+ my $i1 = $cfg_fld->{indicator1} || ' ';
+ my $i2 = $cfg_fld->{indicator2} || ' ';
+ $marc->addfield({record=>$num,
+ field=>$new_fld,
+ i1=>$i1,
+ i2=>$i2,
+ value=>$data{$f}
+ });
+ }
+
+ #
+ # then create fields with subfields
+ #
+
+ # this will hold subfield values
+ my @values;
+
+ # with one or more subfields
+ foreach my $new_sf (keys %{$cfg_fld->{subfield}}) {
+ # field$subfield
+ my $f = $cfg_fld->{subfield}->{$new_sf};
+ if ($data{$f}) {
+ push @values, $new_sf;
+ push @values, $data{$f};
+ }
+ }
next if (! @values);
if (! $num) {
$num=$marc->createrecord();
$new++;
}
+ my $i1 = $cfg_fld->{indicator1} || ' ';
+ my $i2 = $cfg_fld->{indicator2} || ' ';
$marc->addfield({record=>$num,
- field=>$fld,
- i1=>" ", i2=>" ",
- value=>\@values});
-
+ field=>$new_fld,
+ i1=>$i1,
+ i2=>$i2,
+ value=>\@values}
+ );
}
}