projects
/
webpac
/ blobdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
|
commitdiff
|
tree
raw
|
inline
| side by side
use newer MARC::File::USMARC instead of MARC
[webpac]
/
all2xml.pl
diff --git
a/all2xml.pl
b/all2xml.pl
index
7cb8f52
..
b7107e7
100755
(executable)
--- a/
all2xml.pl
+++ b/
all2xml.pl
@@
-139,12
+139,14
@@
sub data2xml {
} else {
print STDERR "WARNING: field '$field' doesn't have 'name' attribute!";
}
} else {
print STDERR "WARNING: field '$field' doesn't have 'name' attribute!";
}
+
if ($field_name) {
if ($field_name) {
+ $field_name = x($field_name);
if (! $last_field_name) {
if (! $last_field_name) {
- $last_field_name =
x($field_name)
;
+ $last_field_name =
$field_name
;
return $last_field_name;
} elsif ($field_name ne $last_field_name) {
return $last_field_name;
} elsif ($field_name ne $last_field_name) {
- $last_field_name =
x($field_name)
;
+ $last_field_name =
$field_name
;
return $last_field_name;
}
}
return $last_field_name;
}
}
@@
-639,6
+641,10
@@
foreach my $database ($cfg->Sections) {
my $lookup_file = $cfg -> val($database, 'lookup_newfile'); # optional
if ($lookup_file) {
#tie %lhash, 'GDBM_File', $lookup_file, &GDBM_NEWDB, 0644;
my $lookup_file = $cfg -> val($database, 'lookup_newfile'); # optional
if ($lookup_file) {
#tie %lhash, 'GDBM_File', $lookup_file, &GDBM_NEWDB, 0644;
+ if (! -e $lookup_file) {
+ open(LOOKUP, "> $lookup_file") || die "can't create $lookup_file': $!";
+ close(LOOKUP);
+ }
tie %lhash, 'TDB_File', $lookup_file, TDB_CLEAR_IF_FIRST, O_RDWR, 0644;
print STDERR "creating lookup file '$lookup_file'\n";
# delete memory cache for lookup file
tie %lhash, 'TDB_File', $lookup_file, TDB_CLEAR_IF_FIRST, O_RDWR, 0644;
print STDERR "creating lookup file '$lookup_file'\n";
# delete memory cache for lookup file
@@
-661,15
+667,30
@@
print STDERR "reading ./import_xml/$type.xml\n";
$config=XMLin("./import_xml/$type.xml", ForceArray => [ $type2tag{$type_base}, 'config', 'format' ], ForceContent => 1 );
$config=XMLin("./import_xml/$type.xml", ForceArray => [ $type2tag{$type_base}, 'config', 'format' ], ForceContent => 1 );
+ # helper for progress bar
+ sub fmt_time {
+ my $t = shift || 0;
+ my $out = "";
+
+ my ($ss,$mm,$hh) = gmtime($t);
+ $out .= "${hh}h" if ($hh);
+ $out .= sprintf("%02d:%02d", $mm,$ss);
+ $out .= " " if ($hh == 0);
+ return $out;
+ }
+
# output current progress indicator
my $last_p = 0;
# output current progress indicator
my $last_p = 0;
+ my $start_t = time();
sub progress {
return if (! $show_progress);
my $current = shift;
my $total = shift || 1;
my $p = int($current * 100 / $total);
if ($p != $last_p) {
sub progress {
return if (! $show_progress);
my $current = shift;
my $total = shift || 1;
my $p = int($current * 100 / $total);
if ($p != $last_p) {
- printf STDERR ("%5d / %5d [%-51s] %-2d %% \r",$current,$total,"=" x ($p/2).">", $p );
+ my $rate = ($current / (time() - $start_t || 1));
+ my $eta = ($total-$current) / ($rate || 1);
+ printf STDERR ("%5d [%-38s] %-5d %0.1f/s %s\r",$current,"=" x ($p/3)."$p%>", $total, $rate, fmt_time($eta));
$last_p = $p;
}
}
$last_p = $p;
}
}
@@
-809,7
+830,11
@@
print STDERR "using: $type...\n";
for(my $iC = $oWorksheet->{MinCol} ; defined $oWorksheet->{MaxCol} && $iC <= $oWorksheet->{MaxCol} ; $iC++) {
my $cell = $oWorksheet->{Cells}[$iR][$iC];
if ($cell) {
for(my $iC = $oWorksheet->{MinCol} ; defined $oWorksheet->{MaxCol} && $iC <= $oWorksheet->{MaxCol} ; $iC++) {
my $cell = $oWorksheet->{Cells}[$iR][$iC];
if ($cell) {
- $row->{int2col($iC)} = $cell->Value;
+ # this conversion is a cludge.
+ # Files from Excell could have
+ # characters which don't fit into
+ # destination encoding.
+ $row->{int2col($iC)} = $utf2cp->convert($cell->Value) || $cell->Value;
}
}
}
}
@@
-835,36
+860,40
@@
print STDERR "using: $type...\n";
}
} elsif ($type_base eq "marc") {
}
} elsif ($type_base eq "marc") {
- require MARC;
+ require MARC
::File::USMARC
;
$import2cp = Text::Iconv->new($config->{marc_codepage},$codepage);
my $marc_file = $cfg -> val($database, 'marc_file') || die "$database doesn't have 'marc_file' defined!";
# optional argument is format
$import2cp = Text::Iconv->new($config->{marc_codepage},$codepage);
my $marc_file = $cfg -> val($database, 'marc_file') || die "$database doesn't have 'marc_file' defined!";
# optional argument is format
- my $format = x($config->{marc_format}) || 'usmarc';
-
+ warn "marc_format is no longer used!" if ($config->{marc_format});
print STDERR "Reading MARC file '$marc_file'\n";
print STDERR "Reading MARC file '$marc_file'\n";
- my $marc = new MARC;
- my $nr = $marc->openmarc({
- file=>$marc_file, format=>$format
- }) || die "Can't open MARC file '$marc_file' with format '$format'";
+ my $marc = MARC::File::USMARC->in( $marc_file )
+ || die "Can't open MARC file '$marc_file': ".$MARC::File::ERROR;
- # read MARC file in memory
- $marc->nextmarc(-1);
+ # count records in MARC file
+ sub marc_count {
+ my $filename = shift || die;
+ my $file = MARC::File::USMARC->in($filename) || die $MARC::File::ERROR;
+ my $count = 0;
+ while ($file->skip()) {
+ $count++;
+ }
+ return $count;
+ }
- my $
max_rec = $marc->marc_count()
;
+ my $
count = marc_count($marc_file) || warn "no records in '$marc_file'?"
;
- for(my $i=1; $i<=$max_rec; $i++) {
+ my $i = 0;
- progress($i,$max_rec);
+ while( my $rec = $marc->next() ) {
- # store value for marc_sf.pm
- $main::cache->{marc_record} = $i;
+ progress($i++,$count);
my $swishpath = $database."#".$i;
my $swishpath = $database."#".$i;
- if (my $xml = data2xml($type_base,$
mar
c,$add_xml,$cfg,$database)) {
+ if (my $xml = data2xml($type_base,$
re
c,$add_xml,$cfg,$database)) {
$xml = $cp2utf->convert($xml);
use bytes; # as opposed to chars
print "Path-Name: $swishpath\n";
$xml = $cp2utf->convert($xml);
use bytes; # as opposed to chars
print "Path-Name: $swishpath\n";