From 1e7d14d016aadb14fea11906d2f41ec2472716c8 Mon Sep 17 00:00:00 2001 From: dpavlin Date: Fri, 22 Oct 2010 20:49:16 +0000 Subject: [PATCH] generate marc record git-svn-id: svn+ssh://llin.lib/home/dpavlin/private/svn/Z3950-HTML-Scraper@12 ae73d1a6-5fa4-44a9-8f13-f281fb455051 --- Aleph.pm | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/Aleph.pm b/Aleph.pm index 953fda7..f89aaf7 100644 --- a/Aleph.pm +++ b/Aleph.pm @@ -125,18 +125,31 @@ print $mech->content; diag "parse $nr"; + my $marc = MARC::Record->new; + my $html = $mech->content; my $hash; - $html =~ s|\s*]*>(.+?)\s*(.+?)|$hash->{$1} = "$2";|ges; + + sub field { + my ( $f, $v ) = @_; + $v =~ s/\Q \E/ /gs; +warn "# $f\t$v\n"; + $hash->{$f} = $v; + my ($i1,$i2) = (' ',' '); + ($i1,$i2) = ($2,$3) if $f =~ s/^(...)(.)?(.)?/$1/; + my @sf = split(/\|/, $v); + shift @sf; + @sf = map { s/^(\w)\s+//; { $1 => $_ } } @sf; +diag "sf = ", dump(@sf); + $marc->add_fields( $f, $i1, $i2, @sf ) if $f =~ m/^\d+$/; + } + + $html =~ s|\s*]*>(.+?)\s*(.+?)|field($1,$2)|ges; diag dump($hash); my $id = $hash->{SYS} || die "no SYS"; -die; - - my $marc = MARC::Record->new; -# $marc->add_fields( $f, $i1, $i2, @{ $out->{$f} } ); my $path = "marc/$id.$format"; -- 2.20.1