added --marcxml flag
[webpac2] / lib / WebPAC / Output / MARC.pm
index a05d804..97500e1 100644 (file)
@@ -5,10 +5,10 @@ use strict;
 
 use base qw/WebPAC::Common/;
 
-use MARC::Record 2.0;  # need 2.0 for utf-8 encoding see marcpm.sf.net
+use MARC::Record;
+use MARC::File::XML;
 use MARC::Lint;
 use Data::Dump qw/dump/;
-use Encode qw/from_to decode/;
 
 =head1 NAME
 
@@ -16,11 +16,11 @@ WebPAC::Output::MARC - Create MARC records from C<marc_*> normalisation rules
 
 =head1 VERSION
 
-Version 0.03
+Version 0.04
 
 =cut
 
-our $VERSION = '0.03';
+our $VERSION = '0.04';
 
 =head1 SYNOPSIS
 
@@ -34,10 +34,10 @@ L<WebPAC::Normalize>.
 
   my $marc = new WebPAC::Output::MARC(
        path => '/path/to/output.marc',
-       native_encoding => 'iso-8859-2',
        marc_encoding => 'utf-8',
        lint => 1,
        dump => 0,
+       marcxml => 0,
   )
 
 =cut
@@ -54,19 +54,24 @@ sub new {
                        $log->warn("Can't create MARC::Lint object, linting is disabled");
        }
 
+       $self->{marc_encoding} ||= 'utf-8';
+
        if (my $path = $self->{path}) {
-               open($self->{fh}, '>', $path) ||
+               open($self->{fh}, '>', $path . '.marc') ||
                        $log->logdie("can't open MARC output $path: $!");
                binmode($self->{fh}, ':utf8');
 
-               $log->info("Creating MARC export file $path", $self->{lint} ? ' (with lint)' : '', "\n");
+               $log->info("Creating MARC export file $path.marc", $self->{lint} ? ' (with lint)' : '', " encoding ", $self->{marc_encoding}, "\n");
+               if ( $self->{marcxml} || $ENV{MARCXML} ) {
+                       open($self->{fh_marcxml}, '>:utf8', "$path.marcxml") ||
+                               $log->logdie("can't open MARCXML output $path.marcxml: $!");
+                       $log->info("Creating MARCXML export file $path.marcxml");
+                       print {$self->{fh_marcxml}} qq{<?xml version="1.0" encoding="UTF-8"?>\n<collection>\n};
+               }
        } else {
                $log->logconfess("new called without path");
        }
 
-       $self->{native_encoding} ||= 'iso-8859-2';
-       $self->{marc_encoding} ||= 'utf-8';
-
        $self ? return $self : return undef;
 }
 
@@ -75,7 +80,7 @@ sub new {
   $marc->add(
        id => $mfn,
        fields => WebPAC::Normalize::_get_marc_fields(),
-       leader => WebPAC::Normalize::marc_leader(),
+       leader => WebPAC::Normalize::_get_marc_leader(),
        row => $row,
   );
 
@@ -109,11 +114,13 @@ sub add {
        foreach my $j ( 0 .. $#$fields ) {
                foreach my $i ( 0 .. ( ( $#{$fields->[$j]} - 3 ) / 2 ) ) {
                        my $f = $fields->[$j]->[ ($i * 2) + 4 ];
-                       $f = decode( $self->{native_encoding}, $f );
                        $fields->[$j]->[ ($i * 2) + 4 ] = $f;
                }
        }
 
+       # sort fields
+       @$fields = sort { $a->[0] <=> $b->[0] } @$fields;
+
        $log->debug("recode fields = ", sub { dump( $fields ) });
 
        $marc->add_fields( @$fields );
@@ -123,7 +130,7 @@ sub add {
 
                my $leader = $marc->leader;
 
-               foreach my $o ( keys %$new_l ) {
+               foreach my $o ( sort { $a <=> $b } keys %$new_l ) {
                        my $insert = $new_l->{$o};
                        $leader = substr($leader, 0, $o) .
                                $insert . substr($leader, $o+length($insert));
@@ -137,7 +144,7 @@ sub add {
                if (@w) {
                        $log->error("MARC lint detected warning on record $id\n",
                                "<<<<< Original input row:\n",dump($arg->{row}), "\n",
-                               ">>>>> Normalized MARC row: leader: [", $marc->leader(), "]\n", dump( $arg->{fields} ), "\n",
+                               ">>>>> Normalized MARC row: leader: [", $marc->leader(), "]\n", dump( $fields ), "\n",
                                "!!!!! MARC lint warnings:\n",join("\n",@w),"\n"
                        );
                        map { $self->{_marc_lint_warnings}->{$_}++ } @w;
@@ -147,12 +154,17 @@ sub add {
        if ($self->{dump}) {
                $log->info("MARC record on record $id\n",
                        "<<<<< Original imput row:\n",dump($arg->{row}), "\n",
-                       ">>>>> Normalized MARC row: leader: [", $marc->leader(), "]\n", dump( $arg->{fields} ), "\n",
+                       ">>>>> Normalized MARC row: leader: [", $marc->leader(), "]\n", dump( $fields ), "\n",
                );
        }
 
        print {$self->{fh}} $marc->as_usmarc;
 
+       if ( $self->{fh_marcxml} ) {
+               my $xml = $marc->as_xml_record;
+               $xml =~ s/\Q<?xml version="1.0" encoding="UTF-8"?>\E//;
+               print {$self->{fh_marcxml}} $xml;
+       }
 }
 
 =head2 finish
@@ -170,8 +182,13 @@ sub finish {
 
        my $log = $self->get_logger;
 
-       close( $self->{fh} ) or $log->logdie("can't close ", $self->{path}, ": $!");
+       close( $self->{fh} ) or $log->logdie("can't close ", $self->{path}, ".marc: $!");
 
+       if ( $self->{fh_marcxml} ) {
+               print {$self->{fh_marcxml}} qq{</collection>\n};
+               $log->info("MARCXML file ", $self->{path}, ".marcxml ", -s $self->{fh_marcxml}, " bytes");
+               close( $self->{fh_marcxml} );
+       }
        if (my $w = $self->{_marc_lint_warnings}) {
                $log->error("MARC lint warnings summary:\n",
                        join ("\n",