5 mods2marc.pl - convert MODS XML back to MARC (ISO2709)
9 mods2marc.pl export.marc mods.xml [mods2.xml ... ]
13 This script will convert MODS format
14 L<http://www.loc.gov/standards/mods/>
15 back to MARC (ISO2709) format.
17 Since conversion back to MARC is not simple, lot of things are hard-coded
20 This script B<is somewhat specific> to MODS export from
21 Faculty of Electrical Engineering and Computing
22 so you might want to edit it (among other thing, it includes a lot
23 of fields which are in Croatian).
25 Feel free to hack this script and convert it to your own needs.
29 This script will parse imput XML twice: once with C<XML::Twig> and
30 then each entry with C<XML::Simple> to produce in-memory structure.
31 That's because I wanted to keep node selection logical (and perl-like).
33 If you don't like it, you can rewrite this script to use XPATH. I tried
34 and failed (it seems that MODS is too complicated for my limited knowledge
47 my $marc_file = shift @ARGV || die "$0: need MARC export file";
48 die "$0: need at least one MODS XML file" if (! @ARGV);
55 my $ENCODING = 'ISO-8859-2';
56 $ENCODING = 'windows-1250';
58 my $twig=XML::Twig->new(
59 twig_roots => { 'mods' => \&mods },
60 output_encoding => 'UTF8',
63 my $utf2iso = Text::Iconv->new("UTF8", $ENCODING);
65 foreach my $xml_file (@ARGV) {
67 $twig->parsefile($xml_file);
72 print "Saving MARC file...\n";
74 $marc->output({file=>"> $marc_file",'format'=>"usmarc"});
79 my $xml=$elt->xml_string;
80 my $ref = XMLin('<xml>'.$xml.'</xml>',
90 'identifier' => 'type',
95 'place' => 'placeTerm',
96 'physicalDescription' => 'extent',
97 'roleTerm' => 'content',
99 ContentKey => '-content',
105 my $m_cache = \shift || die "need m_cache";
106 my $fld = shift || die "need field!";
108 my $data = shift || return;
110 #print "add: $fld",($sf ? "^".$sf : ''),": $data\n";
113 push @{$$m_cache->{tmp}->{$fld}}, $sf;
115 push @{$$m_cache->{tmp}->{$fld}}, $utf2iso->convert($data);
119 my $m_cache = \shift || die "need m_cache";
120 foreach my $fld (@_) {
121 #print "marc_rep: $fld\n";
122 push @{$$m_cache->{array}->{$fld}}, [ @{$$m_cache->{tmp}->{$fld}} ] if ($$m_cache->{tmp}->{$fld});
123 delete $$m_cache->{tmp}->{$fld};
128 my $m_cache = \shift || die "need m_cache";
129 foreach my $fld (@_) {
130 #print "marc_single: $fld\n";
132 die "$fld already defined! not single?" if ($$m_cache->{single}->{$fld});
134 $$m_cache->{single}->{$fld} = \@{$$m_cache->{tmp}->{$fld}} if ($$m_cache->{tmp}->{$fld});
135 delete $$m_cache->{tmp}->{$fld};
140 my $m_cache = \shift || die "need m_cache";
141 my $fld = shift || die "need field!";
143 my $data = shift || return;
145 marc_add($$m_cache,$fld,$sf,$data);
146 marc_rep($$m_cache,$fld);
149 sub marc_add_single {
150 my $m_cache = \shift || die "need m_cache";
151 my $fld = shift || die "need field!";
153 my $data = shift || return;
155 marc_add($$m_cache,$fld,$sf,$data);
156 marc_single($$m_cache,$fld);
160 # Journals start with c- in our MODS
161 $journal = 1 if ($ref->{recordInfo}->{recordIdentifier} =~ m/^c-/);
163 foreach my $t (@{$ref->{subject}->{topic}}) {
164 marc_add($m_cache,'610','a', $t);
165 marc_rep($m_cache,'610');
171 foreach my $name (@{$ref->{name}}) {
172 my $role = $name->{role}->{roleTerm}->{content};
174 if ($role eq "author") {
175 marc_add($m_cache,$fld_700,'a',$name->{namePart}->{family});
176 marc_add($m_cache,$fld_700,'b',$name->{namePart}->{given});
177 marc_add($m_cache,$fld_700,'4',$role);
179 marc_rep($m_cache,$fld_700);
181 # first author goes in 700, others in 701
183 } elsif ($role eq "editor" or $role eq "illustrator") {
184 marc_add($m_cache,'702','a',$name->{namePart}->{family});
185 marc_add($m_cache,'702','b',$name->{namePart}->{given});
186 marc_add($m_cache,'702','4',$role);
187 marc_rep($m_cache,'702');
188 } elsif ($role eq "corporate") {
189 marc_add_single($m_cache,"$fld_710\t0 ",'a',$name->{namePart});
191 } elsif ($role eq "conference") {
192 marc_add_single($m_cache,"$fld_710\t1 ",'a',$name->{namePart});
195 die "FATAL: don't know how to map role '$role'" if ($role);
199 my $note = $ref->{note};
202 foreach my $n (split(/\s*;\s+/, $note)) {
203 if ($n =~ s/bibliogr:\s+//i) {
204 marc_add_rep($m_cache,'320','a',"Bibliografija: $n");
205 } elsif ($n =~ s/ilustr:\s+//i) {
206 marc_add($m_cache,'215','c', $n);
208 marc_add_rep($m_cache,'320','a',$n);
214 my $type = $ref->{identifier}->{type};
217 if ($type eq "isbn") {
218 marc_add_rep($m_cache,'010','a',$ref->{identifier}->{content});
219 } elsif ($type eq "issn") {
220 marc_add_rep($m_cache,'011','a',$ref->{identifier}->{content});
221 } elsif ($type eq "uri") {
222 marc_add_rep($m_cache,'856','u',$ref->{identifier}->{content});
224 die "unknown identifier type $type";
228 my $phy_desc = $ref->{physicalDescription};
231 foreach my $t (split(/\s*;\s+/, $phy_desc)) {
232 if ($t =~ m/([^:]+):\s+(.+)$/) {
235 print STDERR "can't parse '$t' in ",Dumper($phy_desc);
238 my $data = $tmp->{pagin};
239 $data .= ", " if ($data);
241 $data .= $tmp->{str}." str";
243 marc_add($m_cache,'215','a', $data) if ($data);
244 marc_add($m_cache,'215','d', $tmp->{visina});
246 marc_rep($m_cache,'215');
248 my $mfn = $ref->{recordInfo}->{recordIdentifier};
250 marc_add_single($m_cache,'001',undef,$mfn);
252 marc_add($m_cache,'200','a',$ref->{titleInfo}->{title});
253 marc_add($m_cache,'200','e',$ref->{titleInfo}->{subTitle});
254 marc_single($m_cache,'200');
256 foreach my $c (@{$ref->{classification}}) {
257 if ($c->{'authority'} eq "udc") {
258 marc_add_rep($m_cache,'675','a', $c->{'content'});
262 foreach my $ri (@{$ref->{relatedItem}}) {
263 my $related = $ri->{type};
265 if ($related eq "series") {
266 marc_add_rep($m_cache,'225','a',$ri->{titleInfo}->{title});
267 foreach my $pn (@{$ri->{titleInfo}->{partNumber}}) {
269 marc_add_rep($m_cache,'999','a',$pn);
271 marc_add_rep($m_cache,'225','v',$pn);
274 } elsif ($related eq "preceding") {
275 marc_add($m_cache,'520','a',$ri->{titleInfo}->{title});
276 if ($ri->{identifier}) {
277 if ($ri->{identifier}->{type} eq "issn") {
278 marc_add($m_cache,'520','x',$ri->{identifier}->{content});
280 die "can't store identifier type $type";
283 marc_rep($m_cache,'520');
285 die "can't parse related item type $related" if ($related);
290 marc_add_single($m_cache,'205','a',$ref->{originInfo}->{edition});
292 marc_add($m_cache,'210','a',$ref->{originInfo}->{place});
294 my $publisher = $ref->{originInfo}->{publisher};
295 if ($publisher =~ m,^(.+?)\s*/\s*(.+)$,) {
296 marc_add($m_cache,'210','a', $2);
297 marc_add($m_cache,'210','c', $1);
299 marc_add($m_cache,'210','c', $publisher);
302 marc_add($m_cache,'210','d',$ref->{originInfo}->{dateIssued});
304 marc_single($m_cache,'210');
306 marc_add_single($m_cache,'326','a',$ref->{originInfo}->{frequency}) if ($journal);
309 print "$nr " if ($nr % 100 == 0);
313 $bib_level = "s" if ($journal);
314 my $m=$marc->createrecord({leader=>"00000na".$bib_level." 2200000 a 4500"});
316 foreach my $fld (keys %{$m_cache->{array}}) {
317 foreach my $arr (@{$m_cache->{array}->{$fld}}) {
318 #print "array = ",Dumper($arr);
320 # do we have indicators?
321 if ($fld =~ m/^(.+)\t(.)(.)$/) {
325 $marc->addfield({record=>$m,
334 foreach my $fld (keys %{$m_cache->{single}}) {
335 #print "single = ",Dumper($m_cache->{single}->{$fld});
337 # do we have indicators?
338 if ($fld =~ m/^(.+)\t(.)(.)$/) {
342 $marc->addfield({record=>$m,
346 value=>$m_cache->{single}->{$fld}
352 $t->purge; # frees the memory