From 813654dada701348c2c02ae3051fe160d4a8d974 Mon Sep 17 00:00:00 2001 From: Dobrica Pavlinusic Date: Tue, 4 Jul 2006 10:34:15 +0000 Subject: [PATCH] r796@llin: dpavlin | 2006-07-04 12:34:13 +0200 created WebPAC::Output::MARC and cleanup run.pl git-svn-id: svn+ssh://mjesec/home/dpavlin/svn/webpac2/trunk@578 07558da8-63fa-0310-ba24-9fe276d99e06 --- TODO | 1 + lib/WebPAC/Output/MARC.pm | 159 ++++++++++++++++++++++++++++++++++++++ run.pl | 67 ++++------------ t/0-load.t | 3 +- 4 files changed, 175 insertions(+), 55 deletions(-) create mode 100644 lib/WebPAC/Output/MARC.pm diff --git a/TODO b/TODO index 30391c2..4c54a73 100644 --- a/TODO +++ b/TODO @@ -20,6 +20,7 @@ + support arrays for normalize/path [2.21] + add marc to normalize and create export MARC file [2.22] + implement indicators and repetable subfield in marc export [2.23] +- add WebPAC::Output::MARC [2.24] - support arrays for lookup - add dBase input format - remove delimiters characters from index and query entered diff --git a/lib/WebPAC/Output/MARC.pm b/lib/WebPAC/Output/MARC.pm new file mode 100644 index 0000000..7d0ea31 --- /dev/null +++ b/lib/WebPAC/Output/MARC.pm @@ -0,0 +1,159 @@ +package WebPAC::Output::MARC; + +use warnings; +use strict; + +use base qw/WebPAC::Common/; + +use MARC::Record 2.0; # need 2.0 for utf-8 encoding see marcpm.sf.net +use MARC::Lint; +use Data::Dump qw/dump/; + +=head1 NAME + +WebPAC::Output::MARC - Create MARC records from C normalisation rules + +=head1 VERSION + +Version 0.01 + +=cut + +our $VERSION = '0.01'; + +=head1 SYNOPSIS + +Create MARC records from C normalisation rules described in +L. + + +=head1 FUNCTIONS + +=head2 new + + my $marc = new WebPAC::Output::MARC( + path => '/path/to/output.marc', + encoding => 'utf-8', + lint => 1, + dump => 0, + ) + +=cut + +sub new { + my $class = shift; + my $self = {@_}; + bless($self, $class); + + my $log = $self->_get_logger; + + if ($self->{lint}) { + $self->{lint}= new MARC::Lint or + $log->warn("Can't create MARC::Lint object, linting is disabled"); + } + + if (my $path = $self->{path}) { + open($self->{fh}, '>', $path) || + $log->logdie("can't open MARC output $path: $!"); + + $log->info("Creating MARC export file $path", $self->{lint} ? ' (with lint)' : '', "\n"); + } else { + $log->logconfess("new called without path"); + } + + $self->{encoding} ||= 'utf-8'; + + $self ? return $self : return undef; +} + +=head2 add + + $marc->add( + id => $mfn, + fields => WebPAC::Normalize::_get_marc_fields(), + leader => WebPAC::Normalize::marc_leader(), + ); + +=cut + +sub add { + my $self = shift; + + my $arg = {@_}; + + my $log = $self->_get_logger; + + $log->logconfess("add needs fields and id arguments") + unless ($arg->{fields} && defined $arg->{id}); + + my $marc = new MARC::Record; + $marc->encoding( $self->{encoding} ); + + my $id = $self->{id}; + + $log->logconfess("fields isn't array") unless (ref($arg->{fields}) eq 'ARRAY'); + + $marc->add_fields( @{ $arg->{fields} } ); + + # tweak leader + if (my $new_l = $arg->{leader}) { + + my $leader = $marc->leader; + + foreach my $o ( keys %$new_l ) { + my $insert = $new_l->{$o}; + $leader = substr($leader, 0, $o) . + $insert . substr($leader, $o+length($insert)); + } + $marc->leader( $leader ); + } + + if ($self->{lint}) { + $self->{lint}->check_record( $marc ); + my $err = join( "\n", $self->{lint}->warnings ); + $log->error("MARC lint detected warning on record $id\n", + "<<<<< Original imput row:\n",dump($arg->{row}), "\n", + ">>>>> Normalized MARC row: leader: [", $marc->leader(), "]\n", dump( $arg->{fields} ), "\n", + "!!!!! MARC lint warnings:\n",$err,"\n" + ) if ($err); + } + + if ($self->{dump}) { + $log->info("MARC record on record $id\n", + "<<<<< Original imput row:\n",dump($self->{row}), "\n", + ">>>>> Normalized MARC row: leader: [", $marc->leader(), "]\n", dump( $arg->{fields} ), "\n", + ); + } + + print {$self->{fh}} $marc->as_usmarc; + +} + +=head2 finish + +Close MARC output file + + $marc->finish; + +=cut + +sub finish { + my $self = shift; + + close( $self->{fh} ) or $self->_get_logger->logdie("can't close ", $self->{path}, ": $!"); +} + +=head1 AUTHOR + +Dobrica Pavlinusic, C<< >> + +=head1 COPYRIGHT & LICENSE + +Copyright 2006 Dobrica Pavlinusic, All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it +under the same terms as Perl itself. + +=cut + +1; # End of WebPAC::Output::MARC diff --git a/run.pl b/run.pl index 537e289..6fca96e 100755 --- a/run.pl +++ b/run.pl @@ -13,13 +13,12 @@ use WebPAC::Store 0.03; use WebPAC::Normalize; use WebPAC::Output::TT; use WebPAC::Validate; +use WebPAC::Output::MARC; use YAML qw/LoadFile/; use Getopt::Long; use File::Path; use Time::HiRes qw/time/; use File::Slurp; -use MARC::Record 2.0; # need 2.0 for utf-8 encoding see marcpm.sf.net -use MARC::Lint; use Data::Dump qw/dump/; =head1 NAME @@ -145,8 +144,6 @@ my $start_t = time(); my @links; my $indexer; -my $lint = new MARC::Lint if ($marc_lint); - while (my ($database, $db_config) = each %{ $config->{databases} }) { my ($only_database,$only_input) = split(m#/#, $only_filter) if ($only_filter); @@ -271,13 +268,11 @@ while (my ($database, $db_config) = each %{ $config->{databases} }) { $log->info("Using $normalize_path for normalization..."); - my $marc_fh; - if (my $path = $normalize->{output}) { - open($marc_fh, '>', $path) || - $log->logdie("can't open MARC output $path: $!"); - - $log->info("Creating MARC export file $path", $marc_lint ? ' (with lint)' : '', "\n"); - } + my $marc = new WebPAC::Output::MARC( + path => $normalize->{output}, + lint => $marc_lint, + dump => $marc_dump, + ) if ($normalize->{output}); # reset position in database $input_db->seek(1); @@ -319,48 +314,12 @@ while (my ($database, $db_config) = each %{ $config->{databases} }) { type => $config->{$use_indexer}->{type}, ) if ($indexer && $ds); - if ($marc_fh) { - my $marc = new MARC::Record; - $marc->encoding( 'utf-8' ); - my @marc_fields = WebPAC::Normalize::_get_marc_fields(); - if (! @marc_fields) { - $log->warn("MARC record $mfn is empty, skipping"); - } else { - $marc->add_fields( @marc_fields ); - - # tweak leader - if (my $new_l = WebPAC::Normalize::marc_leader()) { - - my $leader = $marc->leader; - - foreach my $o ( keys %$new_l ) { - my $insert = $new_l->{$o}; - $leader = substr($leader, 0, $o) . - $insert . substr($leader, $o+length($insert)); - } - $marc->leader( $leader ); - } - - if ($marc_lint) { - $lint->check_record( $marc ); - my $err = join( "\n", $lint->warnings ); - $log->error("MARC lint detected warning on MFN $mfn\n", - "<<<<< Original imput row:\n",dump($row), "\n", - ">>>>> Normalized MARC row: leader: [", $marc->leader(), "]\n", dump(@marc_fields), "\n", - "!!!!! MARC lint warnings:\n",$err,"\n" - ) if ($err); - } - - if ($marc_dump) { - $log->info("MARC record on MFN $mfn\n", - "<<<<< Original imput row:\n",dump($row), "\n", - ">>>>> Normalized MARC row: leader: [", $marc->leader(), "]\n", dump(@marc_fields), "\n", - ); - } - - print $marc_fh $marc->as_usmarc; - } - } + $marc->add( + id => $mfn, + fields => [ WebPAC::Normalize::_get_marc_fields() ], + leader => WebPAC::Normalize::marc_leader(), + row => $row, + ) if ($marc); $total_rows++; } @@ -368,7 +327,7 @@ while (my ($database, $db_config) = each %{ $config->{databases} }) { $log->info("statistics of fields usage:\n", $input_db->stats) if ($stats); # close MARC file - close($marc_fh) if ($marc_fh); + $marc->finish if ($marc); } diff --git a/t/0-load.t b/t/0-load.t index 633eab2..1f690f2 100644 --- a/t/0-load.t +++ b/t/0-load.t @@ -1,6 +1,6 @@ #!/usr/bin/perl -w -use Test::More tests => 13; +use Test::More tests => 14; use blib; use strict; @@ -17,6 +17,7 @@ use_ok( 'WebPAC::Output::Estraier' ); use_ok( 'WebPAC::Output::TT' ); use_ok( 'WebPAC::Output::OAI' ); use_ok( 'WebPAC::Output::CDBI' ); +use_ok( 'WebPAC::Output::MARC' ); use_ok( 'WebPAC::Search::Estraier' ); } -- 2.20.1