r796@llin: dpavlin | 2006-07-04 12:34:13 +0200
authorDobrica Pavlinusic <dpavlin@rot13.org>
Tue, 4 Jul 2006 10:34:15 +0000 (10:34 +0000)
committerDobrica Pavlinusic <dpavlin@rot13.org>
Tue, 4 Jul 2006 10:34:15 +0000 (10:34 +0000)
 created WebPAC::Output::MARC and cleanup run.pl

git-svn-id: svn+ssh://mjesec/home/dpavlin/svn/webpac2/trunk@578 07558da8-63fa-0310-ba24-9fe276d99e06

TODO
lib/WebPAC/Output/MARC.pm [new file with mode: 0644]
run.pl
t/0-load.t

diff --git a/TODO b/TODO
index 30391c2..4c54a73 100644 (file)
--- a/TODO
+++ b/TODO
@@ -20,6 +20,7 @@
 + support arrays for normalize/path [2.21]
 + add marc to normalize and create export MARC file [2.22]
 + implement indicators and repetable subfield in marc export [2.23]
+- add WebPAC::Output::MARC [2.24]
 - support arrays for lookup
 - add dBase input format
 - remove delimiters characters from index and query entered
diff --git a/lib/WebPAC/Output/MARC.pm b/lib/WebPAC/Output/MARC.pm
new file mode 100644 (file)
index 0000000..7d0ea31
--- /dev/null
@@ -0,0 +1,159 @@
+package WebPAC::Output::MARC;
+
+use warnings;
+use strict;
+
+use base qw/WebPAC::Common/;
+
+use MARC::Record 2.0;  # need 2.0 for utf-8 encoding see marcpm.sf.net
+use MARC::Lint;
+use Data::Dump qw/dump/;
+
+=head1 NAME
+
+WebPAC::Output::MARC - Create MARC records from C<marc_*> normalisation rules
+
+=head1 VERSION
+
+Version 0.01
+
+=cut
+
+our $VERSION = '0.01';
+
+=head1 SYNOPSIS
+
+Create MARC records from C<marc_*> normalisation rules described in
+L<WebPAC::Normalize>.
+
+
+=head1 FUNCTIONS
+
+=head2 new
+
+  my $marc = new WebPAC::Output::MARC(
+       path => '/path/to/output.marc',
+       encoding => 'utf-8',
+       lint => 1,
+       dump => 0,
+  )
+
+=cut
+
+sub new {
+       my $class = shift;
+       my $self = {@_};
+       bless($self, $class);
+
+       my $log = $self->_get_logger;
+
+       if ($self->{lint}) {
+               $self->{lint}= new MARC::Lint or
+                       $log->warn("Can't create MARC::Lint object, linting is disabled");
+       }
+
+       if (my $path = $self->{path}) {
+               open($self->{fh}, '>', $path) ||
+                       $log->logdie("can't open MARC output $path: $!");
+
+               $log->info("Creating MARC export file $path", $self->{lint} ? ' (with lint)' : '', "\n");
+       } else {
+               $log->logconfess("new called without path");
+       }
+
+       $self->{encoding} ||= 'utf-8';
+
+       $self ? return $self : return undef;
+}
+
+=head2 add
+
+  $marc->add(
+       id => $mfn,
+       fields => WebPAC::Normalize::_get_marc_fields(),
+       leader => WebPAC::Normalize::marc_leader(),
+  );
+
+=cut
+
+sub add {
+       my $self = shift;
+
+       my $arg = {@_};
+
+       my $log = $self->_get_logger;
+
+       $log->logconfess("add needs fields and id arguments")
+               unless ($arg->{fields} && defined $arg->{id});
+
+       my $marc = new MARC::Record;
+       $marc->encoding( $self->{encoding} );
+
+       my $id = $self->{id};
+
+       $log->logconfess("fields isn't array") unless (ref($arg->{fields}) eq 'ARRAY');
+
+       $marc->add_fields( @{ $arg->{fields} } );
+
+       # tweak leader
+       if (my $new_l = $arg->{leader}) {
+
+               my $leader = $marc->leader;
+
+               foreach my $o ( keys %$new_l ) {
+                       my $insert = $new_l->{$o};
+                       $leader = substr($leader, 0, $o) .
+                               $insert . substr($leader, $o+length($insert));
+               }
+               $marc->leader( $leader );
+       }
+
+       if ($self->{lint}) {
+               $self->{lint}->check_record( $marc );
+               my $err = join( "\n", $self->{lint}->warnings );
+               $log->error("MARC lint detected warning on record $id\n",
+                       "<<<<< Original imput row:\n",dump($arg->{row}), "\n",
+                       ">>>>> Normalized MARC row: leader: [", $marc->leader(), "]\n", dump( $arg->{fields} ), "\n",
+                       "!!!!! MARC lint warnings:\n",$err,"\n"
+               ) if ($err);
+       }
+
+       if ($self->{dump}) {
+               $log->info("MARC record on record $id\n",
+                       "<<<<< Original imput row:\n",dump($self->{row}), "\n",
+                       ">>>>> Normalized MARC row: leader: [", $marc->leader(), "]\n", dump( $arg->{fields} ), "\n",
+               );
+       }
+
+       print {$self->{fh}} $marc->as_usmarc;
+
+}
+
+=head2 finish
+
+Close MARC output file
+
+  $marc->finish;
+
+=cut
+
+sub finish {
+       my $self = shift;
+
+       close( $self->{fh} ) or $self->_get_logger->logdie("can't close ", $self->{path}, ": $!");
+}
+
+=head1 AUTHOR
+
+Dobrica Pavlinusic, C<< <dpavlin@rot13.org> >>
+
+=head1 COPYRIGHT & LICENSE
+
+Copyright 2006 Dobrica Pavlinusic, All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it
+under the same terms as Perl itself.
+
+=cut
+
+1; # End of WebPAC::Output::MARC
diff --git a/run.pl b/run.pl
index 537e289..6fca96e 100755 (executable)
--- a/run.pl
+++ b/run.pl
@@ -13,13 +13,12 @@ use WebPAC::Store 0.03;
 use WebPAC::Normalize;
 use WebPAC::Output::TT;
 use WebPAC::Validate;
+use WebPAC::Output::MARC;
 use YAML qw/LoadFile/;
 use Getopt::Long;
 use File::Path;
 use Time::HiRes qw/time/;
 use File::Slurp;
-use MARC::Record 2.0;  # need 2.0 for utf-8 encoding see marcpm.sf.net
-use MARC::Lint;
 use Data::Dump qw/dump/;
 
 =head1 NAME
@@ -145,8 +144,6 @@ my $start_t = time();
 my @links;
 my $indexer;
 
-my $lint = new MARC::Lint if ($marc_lint);
-
 while (my ($database, $db_config) = each %{ $config->{databases} }) {
 
        my ($only_database,$only_input) = split(m#/#, $only_filter) if ($only_filter);
@@ -271,13 +268,11 @@ while (my ($database, $db_config) = each %{ $config->{databases} }) {
 
                        $log->info("Using $normalize_path for normalization...");
 
-                       my $marc_fh;
-                       if (my $path = $normalize->{output}) {
-                               open($marc_fh, '>', $path) ||
-                                       $log->logdie("can't open MARC output $path: $!");
-
-                               $log->info("Creating MARC export file $path", $marc_lint ? ' (with lint)' : '', "\n");
-                       }
+                       my $marc = new WebPAC::Output::MARC(
+                               path => $normalize->{output},
+                               lint => $marc_lint,
+                               dump => $marc_dump,
+                       ) if ($normalize->{output});
 
                        # reset position in database
                        $input_db->seek(1);
@@ -319,48 +314,12 @@ while (my ($database, $db_config) = each %{ $config->{databases} }) {
                                        type => $config->{$use_indexer}->{type},
                                ) if ($indexer && $ds);
 
-                               if ($marc_fh) {
-                                       my $marc = new MARC::Record;
-                                       $marc->encoding( 'utf-8' );
-                                       my @marc_fields = WebPAC::Normalize::_get_marc_fields();
-                                       if (! @marc_fields) {
-                                               $log->warn("MARC record $mfn is empty, skipping");
-                                       } else {
-                                               $marc->add_fields( @marc_fields );
-
-                                               # tweak leader
-                                               if (my $new_l = WebPAC::Normalize::marc_leader()) {
-
-                                                       my $leader = $marc->leader;
-
-                                                       foreach my $o ( keys %$new_l ) {
-                                                               my $insert = $new_l->{$o};
-                                                               $leader = substr($leader, 0, $o) .
-                                                                       $insert . substr($leader, $o+length($insert));
-                                                       }
-                                                       $marc->leader( $leader );
-                                               }
-
-                                               if ($marc_lint) {
-                                                       $lint->check_record( $marc );
-                                                       my $err = join( "\n", $lint->warnings );
-                                                       $log->error("MARC lint detected warning on MFN $mfn\n",
-                                                               "<<<<< Original imput row:\n",dump($row), "\n",
-                                                               ">>>>> Normalized MARC row: leader: [", $marc->leader(), "]\n", dump(@marc_fields), "\n",
-                                                               "!!!!! MARC lint warnings:\n",$err,"\n"
-                                                       ) if ($err);
-                                               }
-
-                                               if ($marc_dump) {
-                                                       $log->info("MARC record on MFN $mfn\n",
-                                                               "<<<<< Original imput row:\n",dump($row), "\n",
-                                                               ">>>>> Normalized MARC row: leader: [", $marc->leader(), "]\n", dump(@marc_fields), "\n",
-                                                       );
-                                               }
-
-                                               print $marc_fh $marc->as_usmarc;
-                                       }
-                               }
+                               $marc->add(
+                                       id => $mfn,
+                                       fields => [ WebPAC::Normalize::_get_marc_fields() ],
+                                       leader => WebPAC::Normalize::marc_leader(),
+                                       row => $row,
+                               ) if ($marc);
 
                                $total_rows++;
                        }
@@ -368,7 +327,7 @@ while (my ($database, $db_config) = each %{ $config->{databases} }) {
                        $log->info("statistics of fields usage:\n", $input_db->stats) if ($stats);
 
                        # close MARC file
-                       close($marc_fh) if ($marc_fh);
+                       $marc->finish if ($marc);
 
                }
 
index 633eab2..1f690f2 100644 (file)
@@ -1,6 +1,6 @@
 #!/usr/bin/perl -w
 
-use Test::More tests => 13;
+use Test::More tests => 14;
 use blib;
 use strict;
 
@@ -17,6 +17,7 @@ use_ok( 'WebPAC::Output::Estraier' );
 use_ok( 'WebPAC::Output::TT' );
 use_ok( 'WebPAC::Output::OAI' );
 use_ok( 'WebPAC::Output::CDBI' );
+use_ok( 'WebPAC::Output::MARC' );
 use_ok( 'WebPAC::Search::Estraier' );
 }