From aabde718216307a645afd587a4ad7c2f84c745c8 Mon Sep 17 00:00:00 2001 From: Dobrica Pavlinusic Date: Sun, 26 Feb 2006 23:21:50 +0000 Subject: [PATCH] r494@llin: dpavlin | 2006-02-27 00:22:59 +0100 implemented recode option to input (for now, just for MARC) git-svn-id: svn+ssh://mjesec/home/dpavlin/svn/webpac2/trunk@416 07558da8-63fa-0310-ba24-9fe276d99e06 --- lib/WebPAC/Input.pm | 41 +++++++++++++++++++++++++++++++++++----- lib/WebPAC/Input/MARC.pm | 14 +++++++++----- run.pl | 1 + 3 files changed, 46 insertions(+), 10 deletions(-) diff --git a/lib/WebPAC/Input.pm b/lib/WebPAC/Input.pm index 1255c22..0d21260 100644 --- a/lib/WebPAC/Input.pm +++ b/lib/WebPAC/Input.pm @@ -14,11 +14,11 @@ WebPAC::Input - read different file formats into WebPAC =head1 VERSION -Version 0.03 +Version 0.04 =cut -our $VERSION = '0.03'; +our $VERSION = '0.04'; =head1 SYNOPSIS @@ -47,9 +47,10 @@ Perhaps a little code snippet. ); $db->open('/path/to/database'); - print "database size: ",$db->size,"\n"; - while (my $rec = $db->fetch) { - } + print "database size: ",$db->size,"\n"; + while (my $rec = $db->fetch) { + # do something with $rec + } @@ -63,6 +64,7 @@ Create new input database object. module => 'WebPAC::Input::MARC', code_page => 'ISO-8859-2', low_mem => 1, + recode => 'char pairs', ); C is low-level file format module. See L and @@ -183,8 +185,37 @@ sub open { # create Text::Iconv object $self->{iconv} = Text::Iconv->new($code_page,$self->{'code_page'}); + my $filter_ref; + + if ($self->{recode}) { + my @r = split(/\s/, $self->{recode}); + if ($#r % 2 != 1) { + $log->logwarn("recode needs even number of elements (some number of valid pairs)"); + } else { + my $recode; + while (@r) { + my $from = shift @r; + my $to = shift @r; + $recode->{$from} = $to; + } + + my $regex = join '|' => keys %{ $recode }; + + $log->debug("using recode regex: $regex"); + + $filter_ref = sub { + my $t = shift; + $t =~ s/($regex)/$recode->{$1}/g; + return $t; + }; + + } + + } + my ($db, $size) = $self->{open_db}->( $self, path => $arg->{path}, + filter => $filter_ref, ); unless ($db) { diff --git a/lib/WebPAC/Input/MARC.pm b/lib/WebPAC/Input/MARC.pm index 9409a48..a4f3cbf 100644 --- a/lib/WebPAC/Input/MARC.pm +++ b/lib/WebPAC/Input/MARC.pm @@ -3,7 +3,7 @@ package WebPAC::Input::MARC; use warnings; use strict; -use MARC::Fast; +use MARC::Fast 0.03; =head1 NAME @@ -11,11 +11,11 @@ WebPAC::Input::MARC - support for MARC database files =head1 VERSION -Version 0.04 +Version 0.05 =cut -our $VERSION = '0.04'; +our $VERSION = '0.05'; =head1 SYNOPSIS @@ -33,7 +33,8 @@ structure using C. Returns handle to database my $db = $open_db( - path => '/path/to/marc.iso' + path => '/path/to/marc.iso', + filter => \&code_ref, } =cut @@ -47,7 +48,10 @@ sub open_db { $log->info("opening MARC database '$arg->{path}'"); - my $db = new MARC::Fast( marcdb => $arg->{path}); + my $db = new MARC::Fast( + marcdb => $arg->{path}, + hash_filter => $arg->{filter}, + ); my $db_size = $db->count - 1; # FIXME $self->{_marc_size} = $db_size; diff --git a/run.pl b/run.pl index b4e7ae5..7a833f5 100755 --- a/run.pl +++ b/run.pl @@ -153,6 +153,7 @@ while (my ($database, $db_config) = each %{ $config->{databases} }) { limit => $limit || $input->{limit}, offset => $offset, lookup => $lookup, + recode => $input->{recode}, ); $log->logdie("can't create input using $input_module") unless ($input); -- 2.20.1