script which will select just some fields from CDS/ISIS database and insert
authorDobrica Pavlinusic <dpavlin@rot13.org>
Mon, 23 Feb 2004 23:17:17 +0000 (23:17 +0000)
committerDobrica Pavlinusic <dpavlin@rot13.org>
Mon, 23 Feb 2004 23:17:17 +0000 (23:17 +0000)
only unique values in MARC file (useful for creating ad-hoc authority file)

git-svn-id: file:///home/dpavlin/private/svn/webpac/trunk@228 13eb9ef6-21d5-0310-b721-a9d68796d827

tools/isis2marc.pl [new file with mode: 0755]

diff --git a/tools/isis2marc.pl b/tools/isis2marc.pl
new file mode 100755 (executable)
index 0000000..216720f
--- /dev/null
@@ -0,0 +1,103 @@
+#!/usr/bin/perl -w
+
+# this utility will convert some (or all, defined by regex)
+# fields into marc file from one or more CDS/ISIS files
+#
+# 2004-02-23 Dobrica Pavlinusic <dpavlin@rot13.org>
+#
+# if ISIS databases are named same as directories in which they
+# reside, you can specify just directories (so that shell globing work)
+# like this:
+# 
+# ./isis2marc.pl all.marc /mnt2/*/LIBRI
+
+use strict;
+use OpenIsis;
+use MARC;
+use Data::Dumper;
+
+# to select all fields use something like /./
+my $field_filter = '^700$';
+
+my $marc_file = shift @ARGV || die "Usage: $0 [MARC file] [ISIS db]...";
+
+my $marc=MARC->new;
+
+# it seems that I can't specify invalid template for 005 and prevent
+# output from creating field 005
+#$num->add_005s({record=>1});
+
+select(STDOUT); $|=1;
+
+my %stored;
+my $total = 0;
+
+foreach my $db_file (@ARGV) {
+
+       print "reading '$db_file'";
+
+       if (-d $db_file) {
+               $db_file =~ s,([^/]+)/*$,$1/$1,;
+       }
+
+       my $db = OpenIsis::open( $db_file );
+       my $maxmfn = OpenIsis::maxRowid( $db ) || 1;
+
+       print " [rows: $maxmfn]\n";
+
+       my $progress_len = 50;
+
+       my $step = int($maxmfn/$progress_len);
+       $step = 1 if ($step == 0);
+
+       my $new = 0;
+
+       for (my $mfn = 1; $mfn <= $maxmfn; $mfn++) {
+               print "." if ($mfn % $step == 0);
+               my $row = OpenIsis::read( $db, $mfn );
+               foreach my $fld (keys %{$row}) {
+                       next if ($fld !~ m/$field_filter/);
+
+                       my @values;
+                       my $num;
+
+                       foreach my $sf (@{$row->{$fld}}) {
+
+                               $stored{$sf}++;
+
+                               next if ($stored{$sf} > 1);
+
+                               my %v;
+                               while ($sf =~ s/\^(\w)([^\^]+)//) {
+                                       $v{$1} = $2;
+
+                                       # delete last subfield delimiter
+                                       $sf = "" if ($sf =~ /\^\w*$/);
+                               }
+                               if (%v) {
+                                       push @values, %v;
+                               } elsif ($sf && $sf !~ /^(\^\w)*\s*$/) {
+                                       # regex above remove empty subfields
+                                       push @values, $sf;
+                               }
+
+                               next if (! @values);
+
+                               if (! $num) {
+                                       $num=$marc->createrecord();
+                                       $new++;
+                               }
+                               $marc->addfield({record=>$num,
+                                       field=>$fld,
+                                       i1=>" ", i2=>" ",
+                                       value=>\@values});
+
+                       }
+
+               }
+       }
+       $total += $new;
+       printf "\t%d (%0.2f%%) t: %d\n",$new,($new*100/$maxmfn),$total;
+       $marc->output({file=>"> $marc_file",'format'=>"usmarc"})
+}
+