brown-bag-bug-fix: writing MARC file just once (at end) will save a lot of time
[webpac] / tools / isis2marc.pl
1 #!/usr/bin/perl -w
2
3 # this utility will convert some (or all, defined by regex)
4 # fields into marc file from one or more CDS/ISIS files
5 #
6 # 2004-02-23 Dobrica Pavlinusic <dpavlin@rot13.org>
7 #
8 # if ISIS databases are named same as directories in which they
9 # reside, you can specify just directories (so that shell globing work)
10 # like this:
11
12 # ./isis2marc.pl all.marc /mnt2/*/LIBRI
13
14 use strict;
15 use OpenIsis;
16 use MARC;
17 use Data::Dumper;
18
19 # to select all fields use something like /./
20 my $field_filter = '^700$';
21
22 my $marc_file = shift @ARGV || die "Usage: $0 [MARC file] [ISIS db]...";
23
24 my $marc=MARC->new;
25
26 # it seems that I can't specify invalid template for 005 and prevent
27 # output from creating field 005
28 #$num->add_005s({record=>1});
29
30 select(STDOUT); $|=1;
31
32 my %stored;
33 my $total = 0;
34
35 foreach my $db_file (@ARGV) {
36
37         print "reading '$db_file'";
38
39         if (-d $db_file) {
40                 $db_file =~ s,([^/]+)/*$,$1/$1,;
41         }
42
43         my $db = OpenIsis::open( $db_file );
44         my $maxmfn = OpenIsis::maxRowid( $db ) || 1;
45
46         print " [rows: $maxmfn]\n";
47
48         my $progress_len = 50;
49
50         my $step = int($maxmfn/$progress_len);
51         $step = 1 if ($step == 0);
52
53         my $new = 0;
54
55         for (my $mfn = 1; $mfn <= $maxmfn; $mfn++) {
56                 print "." if ($mfn % $step == 0);
57                 my $row = OpenIsis::read( $db, $mfn );
58                 foreach my $fld (keys %{$row}) {
59                         next if ($fld !~ m/$field_filter/);
60
61                         my @values;
62                         my $num;
63
64                         foreach my $sf (@{$row->{$fld}}) {
65
66                                 $stored{$sf}++;
67
68                                 next if ($stored{$sf} > 1);
69
70                                 my %v;
71                                 while ($sf =~ s/\^(\w)([^\^]+)//) {
72                                         $v{$1} = $2;
73
74                                         # delete last subfield delimiter
75                                         $sf = "" if ($sf =~ /\^\w*$/);
76                                 }
77                                 if (%v) {
78                                         push @values, %v;
79                                 } elsif ($sf && $sf !~ /^(\^\w)*\s*$/) {
80                                         # regex above remove empty subfields
81                                         push @values, $sf;
82                                 }
83
84                                 next if (! @values);
85
86                                 if (! $num) {
87                                         $num=$marc->createrecord();
88                                         $new++;
89                                 }
90                                 $marc->addfield({record=>$num,
91                                         field=>$fld,
92                                         i1=>" ", i2=>" ",
93                                         value=>\@values});
94
95                         }
96
97                 }
98         }
99         $total += $new;
100         printf "\t%d (%0.2f%%) t: %d\n",$new,($new*100/$maxmfn),$total;
101 }
102
103 $marc->output({file=>"> $marc_file",'format'=>"usmarc"})