use newer MARC::File::USMARC instead of MARC
[webpac] / all2xml.pl
index 7cb8f52..b7107e7 100755 (executable)
@@ -139,12 +139,14 @@ sub data2xml {
                } else {
                        print STDERR "WARNING: field '$field' doesn't have 'name' attribute!";
                }
+
                if ($field_name) {
+                       $field_name = x($field_name);
                        if (! $last_field_name) {
-                               $last_field_name = x($field_name);
+                               $last_field_name = $field_name;
                                return $last_field_name;
                        } elsif ($field_name ne $last_field_name) {
-                               $last_field_name = x($field_name);
+                               $last_field_name = $field_name;
                                return $last_field_name;
                        }
                }
@@ -639,6 +641,10 @@ foreach my $database ($cfg->Sections) {
        my $lookup_file = $cfg -> val($database, 'lookup_newfile'); # optional
        if ($lookup_file) {
                #tie %lhash, 'GDBM_File', $lookup_file, &GDBM_NEWDB, 0644;
+               if (! -e $lookup_file) {
+                       open(LOOKUP, "> $lookup_file") || die "can't create $lookup_file': $!";
+                       close(LOOKUP);
+               }
                tie %lhash, 'TDB_File', $lookup_file, TDB_CLEAR_IF_FIRST, O_RDWR, 0644;
                print STDERR "creating lookup file '$lookup_file'\n";
                # delete memory cache for lookup file
@@ -661,15 +667,30 @@ print STDERR "reading ./import_xml/$type.xml\n";
 
        $config=XMLin("./import_xml/$type.xml", ForceArray => [ $type2tag{$type_base}, 'config', 'format' ], ForceContent => 1 );
 
+       # helper for progress bar
+       sub fmt_time {
+               my $t = shift || 0;
+               my $out = "";
+
+               my ($ss,$mm,$hh) = gmtime($t);
+               $out .= "${hh}h" if ($hh);
+               $out .= sprintf("%02d:%02d", $mm,$ss);
+               $out .= "  " if ($hh == 0);
+               return $out;
+       }
+
        # output current progress indicator
        my $last_p = 0;
+       my $start_t = time();
        sub progress {
                return if (! $show_progress);
                my $current = shift;
                my $total = shift || 1;
                my $p = int($current * 100 / $total);
                if ($p != $last_p) {
-                       printf STDERR ("%5d / %5d [%-51s] %-2d %% \r",$current,$total,"=" x ($p/2).">", $p );
+                       my $rate = ($current / (time() - $start_t || 1));
+                       my $eta = ($total-$current) / ($rate || 1);
+                       printf STDERR ("%5d [%-38s] %-5d %0.1f/s %s\r",$current,"=" x ($p/3)."$p%>", $total, $rate, fmt_time($eta));
                        $last_p = $p;
                }
        }
@@ -809,7 +830,11 @@ print STDERR "using: $type...\n";
                        for(my $iC = $oWorksheet->{MinCol} ; defined $oWorksheet->{MaxCol} && $iC <= $oWorksheet->{MaxCol} ; $iC++) {
                                my $cell = $oWorksheet->{Cells}[$iR][$iC];
                                if ($cell) {
-                                       $row->{int2col($iC)} = $cell->Value;
+                                       # this conversion is a cludge.
+                                       # Files from Excell could have
+                                       # characters which don't fit into
+                                       # destination encoding.
+                                       $row->{int2col($iC)} = $utf2cp->convert($cell->Value) || $cell->Value;
                                }
                        }
 
@@ -835,36 +860,40 @@ print STDERR "using: $type...\n";
                }
        } elsif ($type_base eq "marc") {
 
-               require MARC;
+               require MARC::File::USMARC;
                
                $import2cp = Text::Iconv->new($config->{marc_codepage},$codepage);
                my $marc_file = $cfg -> val($database, 'marc_file') || die "$database doesn't have 'marc_file' defined!";
 
                # optional argument is format
-               my $format = x($config->{marc_format}) || 'usmarc';
-
+               warn "marc_format is no longer used!" if ($config->{marc_format});
                print STDERR "Reading MARC file '$marc_file'\n";
 
-               my $marc = new MARC;
-               my $nr = $marc->openmarc({
-                               file=>$marc_file, format=>$format
-                       }) || die "Can't open MARC file '$marc_file' with format '$format'";
+               my $marc = MARC::File::USMARC->in( $marc_file )
+                       || die "Can't open MARC file '$marc_file': ".$MARC::File::ERROR;
 
-               # read MARC file in memory
-               $marc->nextmarc(-1);
+               # count records in MARC file
+               sub marc_count {
+                       my $filename = shift || die;
+                       my $file = MARC::File::USMARC->in($filename) || die $MARC::File::ERROR;
+                       my $count = 0;
+                       while ($file->skip()) {
+                               $count++;
+                       }
+                       return $count;
+               }
 
-               my $max_rec = $marc->marc_count();
+               my $count = marc_count($marc_file) || warn "no records in '$marc_file'?";
 
-               for(my $i=1; $i<=$max_rec; $i++) {
+               my $i = 0;
 
-                       progress($i,$max_rec);
+               while( my $rec = $marc->next() ) {
 
-                       # store value for marc_sf.pm
-                       $main::cache->{marc_record} = $i;
+                       progress($i++,$count);
 
                        my $swishpath = $database."#".$i;
 
-                       if (my $xml = data2xml($type_base,$marc,$add_xml,$cfg,$database)) {
+                       if (my $xml = data2xml($type_base,$rec,$add_xml,$cfg,$database)) {
                                $xml = $cp2utf->convert($xml);
                                use bytes;      # as opposed to chars
                                print "Path-Name: $swishpath\n";