From 6b22c85acfc694cf51642e9d41dd900cd160a88e Mon Sep 17 00:00:00 2001 From: Dobrica Pavlinusic Date: Thu, 29 Jun 2006 21:18:50 +0000 Subject: [PATCH] r730@llin: dpavlin | 2006-06-29 21:33:48 +0200 use MARC::Record 2.0 to support utf-8 encoding in MARC http://marcpm.sourceforge.net/ git-svn-id: svn+ssh://mjesec/home/dpavlin/svn/webpac2/trunk@541 07558da8-63fa-0310-ba24-9fe276d99e06 --- Makefile.PL | 2 +- lib/WebPAC/Normalize.pm | 17 ++++++++++++----- run.pl | 4 +++- 3 files changed, 16 insertions(+), 7 deletions(-) diff --git a/Makefile.PL b/Makefile.PL index f812691..ed806d5 100644 --- a/Makefile.PL +++ b/Makefile.PL @@ -29,7 +29,7 @@ WriteMakefile( 'Search::Estraier' => 0.06, 'List::Util' => 0, 'Spreadsheet::ParseExcel', => 0, - 'MARC::Record' => 0, + 'MARC::Record' => 2.0, }, dist => { COMPRESS => 'gzip -9f', SUFFIX => 'gz', }, clean => { FILES => 'WebPAC-* pod2html Makefile tags' }, diff --git a/lib/WebPAC/Normalize.pm b/lib/WebPAC/Normalize.pm index ea31b37..459bb7a 100644 --- a/lib/WebPAC/Normalize.pm +++ b/lib/WebPAC/Normalize.pm @@ -17,6 +17,7 @@ use strict; #use base qw/WebPAC::Common/; use Data::Dumper; +use Encode qw/from_to/; =head1 NAME @@ -24,11 +25,11 @@ WebPAC::Normalize - describe normalisaton rules using sets =head1 VERSION -Version 0.05 +Version 0.06 =cut -our $VERSION = '0.05'; +our $VERSION = '0.06'; =head1 SYNOPSIS @@ -57,6 +58,7 @@ Return data structure lookup => $lookup->lookup_hash, row => $row, rules => $normalize_pl_config, + marc_encoding => 'utf-8', ); Options C, C, C and C are mandatory while all @@ -78,8 +80,7 @@ sub data_structure { no strict 'subs'; _set_lookup( $arg->{lookup} ); _set_rec( $arg->{row} ); - _clean_ds(); - + _clean_ds( %{ $arg } ); eval "$arg->{rules}"; die "error evaling $arg->{rules}: $@\n" if ($@); @@ -110,6 +111,7 @@ Return hash formatted as data structure my $out; my $marc21; +my $marc_encoding; sub _get_ds { return $out; @@ -124,8 +126,10 @@ Clean data structure hash for next record =cut sub _clean_ds { + my $a = {@_}; $out = undef; $marc21 = undef; + $marc_encoding = $a->{marc_encoding}; } =head2 _set_lookup @@ -222,7 +226,10 @@ sub marc21 { my $sf = shift or die "marc21 needs subfield"; - foreach my $v (@_) { + foreach (@_) { + my $v = $_; # make var read-write for Encode + next unless (defined($v) && $v !~ /^\s+$/); + from_to($v, 'iso-8859-2', $marc_encoding) if ($marc_encoding); push @{ $marc21 }, [ $f, ' ', ' ', $sf => $v ]; } } diff --git a/run.pl b/run.pl index 64e8095..6bb258f 100755 --- a/run.pl +++ b/run.pl @@ -19,7 +19,7 @@ use Getopt::Long; use File::Path; use Time::HiRes qw/time/; use File::Slurp; -use MARC::Record; +use MARC::Record 2.0; # need 2.0 for utf-8 encoding see marcpm.sf.net =head1 NAME @@ -269,6 +269,7 @@ while (my ($database, $db_config) = each %{ $config->{databases} }) { row => $row, rules => $rules, lookup => $lookup ? $lookup->lookup_hash : undef, + marc_encoding => 'utf-8', ); $db->save_ds( @@ -285,6 +286,7 @@ while (my ($database, $db_config) = each %{ $config->{databases} }) { if ($marc_fh) { my $marc = new MARC::Record; + $marc->encoding( 'utf-8' ); $marc->add_fields( WebPAC::Normalize::_get_marc21_fields() ); print $marc_fh $marc->as_usmarc; } -- 2.20.1