From c408b0af82886f884c9a17698695bd0f9e738a58 Mon Sep 17 00:00:00 2001 From: dpavlin Date: Sun, 21 Jun 2009 08:16:41 +0000 Subject: [PATCH] produce unimarc (without conversion) of usmarc (with conversion) git-svn-id: svn+ssh://llin.lib/home/dpavlin/private/svn/Z3950-HTML-Scraper@9 ae73d1a6-5fa4-44a9-8f13-f281fb455051 --- COBISS.pm | 59 ++++++++++++++++++++++++++++++++++++++++++++----------- server.pl | 6 +++++- 2 files changed, 53 insertions(+), 12 deletions(-) diff --git a/COBISS.pm b/COBISS.pm index f3c698d..e55eeaf 100644 --- a/COBISS.pm +++ b/COBISS.pm @@ -5,6 +5,7 @@ use strict; use WWW::Mechanize; use MARC::Record; +use Data::Dump qw/dump/; binmode STDOUT, ':utf8'; @@ -17,8 +18,8 @@ my $cobiss_marc21 = { 205 => { a => [ 250 , 'a' ] }, 210 => { a => [ 250 , 'a' ], - c => [ 260 , 'b' ], - d => [ 260 , 'c' ], + c => [ 250 , 'b' ], + d => [ 250 , 'c' ], }, 215 => { a => [ 300 , 'a' ], @@ -113,8 +114,12 @@ diag "in COMARC format"; } -sub fetch_marc { - my ($self) = @_; +sub fetch_rec { + my ($self,$format) = @_; + + $format ||= 'unimarc'; + + die "unknown format: $format" unless $format =~ m{(uni|us)marc}; my $comarc; @@ -124,7 +129,7 @@ sub fetch_marc { my $nr = $2; my $id = $3; -diag "fetch_marc $nr [$id]"; +diag "fetch_marc $nr [$id] $format"; $comarc =~ s{}{}gs; $comarc =~ s{]*>}{}gs; @@ -143,20 +148,52 @@ diag "fetch_marc $nr [$id]"; if ( $line !~ s{^(\d\d\d)([01 ])([01 ])}{} ) { diag "SKIP: $line"; } else { + our @f = ( $1, $2, $3 ); $line .= ""; - our @f = ( $1, $2, $3 ); - sub sf { push @f, @_; } - $line =~ s{(\w)([^<]+)\s*}{sf($1, $2)}ges; - diag "f:", join('|', @f), " left: |$line|"; - $marc->add_fields( @f ); + if ( $format eq 'unimarc' ) { + + diag dump(@f), "line: $line"; + sub sf_uni { + warn "sf ",dump(@_); + push @f, @_; + } + $line =~ s{(\w)([^<]+)\s*}{sf_uni($1, $2)}ges; + diag "f:", dump(@f), " left: |$line|"; + $marc->add_fields( @f ); + + } elsif ( $format eq 'usmarc' ) { + + my ( $f, $i1, $i2 ) = @f; + + our $out = {}; + + sub sf_us { + my ($f,$sf,$v) = @_; + if ( my $m = $cobiss_marc21->{$f}->{$sf} ) { + push @{ $out->{ $m->[0] } }, ( $m->[1], $v ); + } + return; + } + $line =~ s{(\w)([^<]+)\s*}{sf_us($f,$1, $2)}ges; + + diag "converted marc21 ",dump( $out ); + + foreach my $f ( keys %$out ) { + $marc->add_fields( $f, $i1, $i2, @{ $out->{$f} } ); + } + } } } - open(my $out, '>:utf8', "marc/$id"); + my $path = "marc/$id.$format"; + + open($out, '>:utf8', $path); print $out $marc->as_usmarc; close($out); + diag "created $path ", -s $path, " bytes"; + diag $marc->as_formatted; $nr++; diff --git a/server.pl b/server.pl index 6d26b06..1e17f2c 100755 --- a/server.pl +++ b/server.pl @@ -103,7 +103,11 @@ diag Dumper( $this ); } elsif ( $req_form eq &Net::Z3950::OID::unimarc ) { # FIXME convert to usmarc $this->{REP_FORM} = &Net::Z3950::OID::unimarc; - $this->{RECORD} = COBISS->fetch_marc; + $this->{RECORD} = COBISS->fetch_rec('unimarc'); + } + elsif ( $req_form eq &Net::Z3950::OID::usmarc ) { # FIXME convert to usmarc + $this->{REP_FORM} = &Net::Z3950::OID::usmarc; + $this->{RECORD} = COBISS->fetch_rec('usmarc'); } else { ## Unsupported record format $this->{ERR_CODE} = 239; -- 2.20.1