generate marc record
authordpavlin <dpavlin@ae73d1a6-5fa4-44a9-8f13-f281fb455051>
Fri, 22 Oct 2010 20:49:16 +0000 (20:49 +0000)
committerdpavlin <dpavlin@ae73d1a6-5fa4-44a9-8f13-f281fb455051>
Fri, 22 Oct 2010 20:49:16 +0000 (20:49 +0000)
git-svn-id: svn+ssh://llin.lib/home/dpavlin/private/svn/Z3950-HTML-Scraper@12 ae73d1a6-5fa4-44a9-8f13-f281fb455051

Aleph.pm

index 953fda7..f89aaf7 100644 (file)
--- a/Aleph.pm
+++ b/Aleph.pm
@@ -125,18 +125,31 @@ print $mech->content;
 
 diag "parse $nr";
 
+               my $marc = MARC::Record->new;
+
                my $html = $mech->content;
                my $hash;
-               $html =~ s|<tr>\s*<td class=td1 id=bold[^>]*>(.+?)</td>\s*<td class=td1>(.+?)</td>|$hash->{$1} = "$2";|ges;
+
+               sub field {
+                       my ( $f, $v ) = @_;
+                       $v =~ s/\Q&nbsp;\E/ /gs;
+warn "# $f\t$v\n";
+                       $hash->{$f} = $v;
+                       my ($i1,$i2) = (' ',' ');
+                       ($i1,$i2) = ($2,$3) if $f =~ s/^(...)(.)?(.)?/$1/;
+                       my @sf = split(/\|/, $v);
+                       shift @sf;
+                       @sf = map { s/^(\w)\s+//; { $1 => $_ } } @sf;
+diag "sf = ", dump(@sf);
+                       $marc->add_fields( $f, $i1, $i2, @sf ) if $f =~ m/^\d+$/;
+               }
+
+               $html =~ s|<tr>\s*<td class=td1 id=bold[^>]*>(.+?)</td>\s*<td class=td1>(.+?)</td>|field($1,$2)|ges;
                diag dump($hash);
 
                my $id = $hash->{SYS} || die "no SYS";
 
-die;
-
-               my $marc = MARC::Record->new;
 
-#              $marc->add_fields( $f, $i1, $i2, @{ $out->{$f} } );
 
                my $path = "marc/$id.$format";