added --marcxml flag
[webpac2] / lib / WebPAC / Output / MARC.pm
1 package WebPAC::Output::MARC;
2
3 use warnings;
4 use strict;
5
6 use base qw/WebPAC::Common/;
7
8 use MARC::Record;
9 use MARC::File::XML;
10 use MARC::Lint;
11 use Data::Dump qw/dump/;
12
13 =head1 NAME
14
15 WebPAC::Output::MARC - Create MARC records from C<marc_*> normalisation rules
16
17 =head1 VERSION
18
19 Version 0.04
20
21 =cut
22
23 our $VERSION = '0.04';
24
25 =head1 SYNOPSIS
26
27 Create MARC records from C<marc_*> normalisation rules described in
28 L<WebPAC::Normalize>.
29
30
31 =head1 FUNCTIONS
32
33 =head2 new
34
35   my $marc = new WebPAC::Output::MARC(
36         path => '/path/to/output.marc',
37         marc_encoding => 'utf-8',
38         lint => 1,
39         dump => 0,
40         marcxml => 0,
41   )
42
43 =cut
44
45 sub new {
46         my $class = shift;
47         my $self = {@_};
48         bless($self, $class);
49
50         my $log = $self->_get_logger;
51
52         if ($self->{lint}) {
53                 $self->{lint}= new MARC::Lint or
54                         $log->warn("Can't create MARC::Lint object, linting is disabled");
55         }
56
57         $self->{marc_encoding} ||= 'utf-8';
58
59         if (my $path = $self->{path}) {
60                 open($self->{fh}, '>', $path . '.marc') ||
61                         $log->logdie("can't open MARC output $path: $!");
62                 binmode($self->{fh}, ':utf8');
63
64                 $log->info("Creating MARC export file $path.marc", $self->{lint} ? ' (with lint)' : '', " encoding ", $self->{marc_encoding}, "\n");
65                 if ( $self->{marcxml} || $ENV{MARCXML} ) {
66                         open($self->{fh_marcxml}, '>:utf8', "$path.marcxml") ||
67                                 $log->logdie("can't open MARCXML output $path.marcxml: $!");
68                         $log->info("Creating MARCXML export file $path.marcxml");
69                         print {$self->{fh_marcxml}} qq{<?xml version="1.0" encoding="UTF-8"?>\n<collection>\n};
70                 }
71         } else {
72                 $log->logconfess("new called without path");
73         }
74
75         $self ? return $self : return undef;
76 }
77
78 =head2 add
79
80   $marc->add(
81         id => $mfn,
82         fields => WebPAC::Normalize::_get_marc_fields(),
83         leader => WebPAC::Normalize::_get_marc_leader(),
84         row => $row,
85   );
86
87 C<row> is optional parametar which is used when dumping original row to
88 error log.
89
90 =cut
91
92 sub add {
93         my $self = shift;
94
95         my $arg = {@_};
96
97         my $log = $self->_get_logger;
98
99         $log->logconfess("add needs fields and id arguments")
100                 unless ($arg->{fields} && defined $arg->{id});
101
102         my $marc = new MARC::Record;
103         $marc->encoding( $self->{marc_encoding} );
104
105         my $id = $arg->{id};
106
107         $log->logconfess("fields isn't array") unless (ref($arg->{fields}) eq 'ARRAY');
108
109         my $fields = $arg->{fields};
110
111         $log->debug("original fields = ", sub { dump( $fields ) });
112
113         # recode fields to marc_encoding
114         foreach my $j ( 0 .. $#$fields ) {
115                 foreach my $i ( 0 .. ( ( $#{$fields->[$j]} - 3 ) / 2 ) ) {
116                         my $f = $fields->[$j]->[ ($i * 2) + 4 ];
117                         $fields->[$j]->[ ($i * 2) + 4 ] = $f;
118                 }
119         }
120
121         # sort fields
122         @$fields = sort { $a->[0] <=> $b->[0] } @$fields;
123
124         $log->debug("recode fields = ", sub { dump( $fields ) });
125
126         $marc->add_fields( @$fields );
127
128         # tweak leader
129         if (my $new_l = $arg->{leader}) {
130
131                 my $leader = $marc->leader;
132
133                 foreach my $o ( sort { $a <=> $b } keys %$new_l ) {
134                         my $insert = $new_l->{$o};
135                         $leader = substr($leader, 0, $o) .
136                                 $insert . substr($leader, $o+length($insert));
137                 }
138                 $marc->leader( $leader );
139         }
140
141         if ($self->{lint}) {
142                 $self->{lint}->check_record( $marc );
143                 my @w = $self->{lint}->warnings;
144                 if (@w) {
145                         $log->error("MARC lint detected warning on record $id\n",
146                                 "<<<<< Original input row:\n",dump($arg->{row}), "\n",
147                                 ">>>>> Normalized MARC row: leader: [", $marc->leader(), "]\n", dump( $fields ), "\n",
148                                 "!!!!! MARC lint warnings:\n",join("\n",@w),"\n"
149                         );
150                         map { $self->{_marc_lint_warnings}->{$_}++ } @w;
151                 }
152         }
153
154         if ($self->{dump}) {
155                 $log->info("MARC record on record $id\n",
156                         "<<<<< Original imput row:\n",dump($arg->{row}), "\n",
157                         ">>>>> Normalized MARC row: leader: [", $marc->leader(), "]\n", dump( $fields ), "\n",
158                 );
159         }
160
161         print {$self->{fh}} $marc->as_usmarc;
162
163         if ( $self->{fh_marcxml} ) {
164                 my $xml = $marc->as_xml_record;
165                 $xml =~ s/\Q<?xml version="1.0" encoding="UTF-8"?>\E//;
166                 print {$self->{fh_marcxml}} $xml;
167         }
168 }
169
170 =head2 finish
171
172 Close MARC output file
173
174   $marc->finish;
175
176 It will also dump MARC lint warnings summary if called with C<lint>.
177
178 =cut
179
180 sub finish {
181         my $self = shift;
182
183         my $log = $self->get_logger;
184
185         close( $self->{fh} ) or $log->logdie("can't close ", $self->{path}, ".marc: $!");
186
187         if ( $self->{fh_marcxml} ) {
188                 print {$self->{fh_marcxml}} qq{</collection>\n};
189                 $log->info("MARCXML file ", $self->{path}, ".marcxml ", -s $self->{fh_marcxml}, " bytes");
190                 close( $self->{fh_marcxml} );
191         }
192         if (my $w = $self->{_marc_lint_warnings}) {
193                 $log->error("MARC lint warnings summary:\n",
194                         join ("\n",
195                                 map { $w->{$_} . "\t" . $_ }
196                                 sort { $w->{$b} <=> $w->{$a} } keys %$w
197                         )
198                 );
199         }
200 }
201
202 =head1 AUTHOR
203
204 Dobrica Pavlinusic, C<< <dpavlin@rot13.org> >>
205
206 =head1 COPYRIGHT & LICENSE
207
208 Copyright 2006 Dobrica Pavlinusic, All Rights Reserved.
209
210 This program is free software; you can redistribute it and/or modify it
211 under the same terms as Perl itself.
212
213 =cut
214
215 1; # End of WebPAC::Output::MARC