3 # This utility will convert some (or all, depending of definition in
4 # configuration XMLfile) fields and subfields with remapping into MARC
5 # file from one or more CDS/ISIS files
7 # 2004-02-23 Dobrica Pavlinusic <dpavlin@rot13.org>
10 # Run without parametars for usage instructions or run without parametars
11 # and redirect STDOUT to file to create example configuration file like
14 # ./isis2marc.pl > config.xml
16 # If you want to create unique records, you need to define one or more
17 # fields as key (which will be used to produce just one record for one
20 # Keys are global for one run of script (that means for all ISIS databases
21 # used in one run), but you can write arbitrary values (as opposed to field
22 # names) inside key tag to produce unique key. For example,
27 # WARNING: When using <key> tag you can enter field with subfield
28 # (in format 700$a) just filed name (for fields which doesn't have subfileds
29 # like 005) or literal value. Fields which doesn't exist in that record
30 # will be skipped, and if key is empty no output record will be produced.
32 # So, best way to produce just few record in output is to specify field which
33 # doesn't exist at all in ISIS database for key, or just one literal value!!
36 # If ISIS databases are named same as directories in which they
37 # reside, you can specify just directories (so that shell globing work)
40 # ./isis2marc.pl config.xml all.marc /mnt2/*/LIBRI
50 print STDERR "Usage: $0 config.xml marc_file.iso isis_db [isis_db ...|isis_dir]\n";
51 print STDERR <<'_END_OF_USAGE_';
53 isis_db can be path to directory (if ISIS database is called
54 same as database) which will make shell globing work
55 or full path to ISIS database (without any extension)
57 Example configuration file will be dumped to standard output
58 after this, so you can just re-direct output of this script
59 to produce config file like this:
61 $ ./isis2marc.pl > config.xml
65 print <<'_END_OF_CONFIG_';
67 <?xml version="1.0" encoding="ISO-8859-2"?>
68 <!-- template configuration file -->
74 <indicator1>0</indicator1>
75 <indicator2>#</indicator2>
76 <subfield id="a">700$a</subfield>
77 <subfield id="b">700$b</subfield>
80 <nosubfield>900</nosubfield>
91 my $xml = new XML::Simple();
93 my $config_file = shift @ARGV || die "no config file?";
95 my $config = $xml->XMLin($config_file,
96 KeyAttr => { subfield => 'id' },
97 ForceArray => [ 'record', 'field', 'subfield', 'nosubfield' ],
98 ContentKey => '-content',
99 ) || die "can't open configuration file '$config_file': $!";
101 my $marc_file = shift @ARGV || die "no marc file?";
105 # it seems that I can't specify invalid template for 005 and prevent
106 # output from creating field 005
107 #$num->add_005s({record=>1});
109 select(STDOUT); $|=1;
115 foreach my $db_file (@ARGV) {
117 print "reading '$db_file'";
120 $db_file =~ s,([^/]+)/*$,$1/$1,;
123 my $db = OpenIsis::open( $db_file );
124 my $maxmfn = OpenIsis::maxRowid( $db ) || 1;
126 print " [rows: $maxmfn]\n";
128 my $progress_len = 50;
130 my $step = int($maxmfn/$progress_len);
131 $step = 1 if ($step == 0);
135 for (my $mfn = 1; $mfn <= $maxmfn; $mfn++) {
136 print "." if ($mfn % $step == 0);
137 my $row = OpenIsis::read( $db, $mfn );
139 # unroll this field to in-memory structure data
142 # delete mfn from $row because it's literal value and
143 # not array, so rest of code would croak
146 foreach my $fld (keys %{$row}) {
148 foreach my $rec_data (@{$row->{$fld}}) {
150 while ($rec_data =~ s/\^(\w)([^\^]+)//) {
151 $data{$fld.'$'.$1} = $2;
153 # delete last subfield delimiter
154 $rec_data = "" if ($rec_data =~ /(\^\w*$|\^\w\s*$)/);
157 # record data still exist? it's field without
160 $data{$fld} = $rec_data;
165 # now, create output MARC record(s)
167 foreach my $cfg_rec (@{$config->{record}}) {
169 # do we have unique key?
171 foreach (@{$cfg_rec->{key}}) {
174 } elsif (! m/^\d{3,4}(\$\w)*$/) {
181 next if ($key && $stored{$key} || $key eq "");
183 $stored{$key}++ if ($key);
186 # this will be new record (if needed)
189 # with one or more fields
190 foreach my $cfg_fld (@{$cfg_rec->{field}}) {
192 my $new_fld = $cfg_fld->{tag};
195 # first create fields without subfields
198 # with one or more subfields
199 foreach my $f (@{$cfg_fld->{nosubfield}}) {
200 next if (! $data{$f});
203 $num=$marc->createrecord();
206 my $i1 = $cfg_fld->{indicator1} || ' ';
207 my $i2 = $cfg_fld->{indicator2} || ' ';
208 $marc->addfield({record=>$num,
217 # then create fields with subfields
220 # this will hold subfield values
223 # with one or more subfields
224 foreach my $new_sf (keys %{$cfg_fld->{subfield}}) {
226 my $f = $cfg_fld->{subfield}->{$new_sf};
228 push @values, $new_sf;
229 push @values, $data{$f};
235 $num=$marc->createrecord();
238 my $i1 = $cfg_fld->{indicator1} || ' ';
239 my $i2 = $cfg_fld->{indicator2} || ' ';
240 $marc->addfield({record=>$num,
251 printf "\t%d (%0.2f%%) t: %d\n",$new,($new*100/$maxmfn),$total;
254 $marc->output({file=>"> $marc_file",'format'=>"usmarc"})