use Data::Dump qw/dump/;
use Storable qw/dclone/;
use Pod::Usage qw/pod2usage/;
-use LWP::Simple;
+use LWP::Simple qw//;
use POSIX ":sys_wait_h"; # imports WNOHANG
By default turned on if normalisation file has C<marc*> directives. You can disable lint
messages with C<--no-marc-lint>.
+=item --marc-xml
+
+Creeate MARCXML file (this can be quite large)
+
=item --marc-dump
Force dump or input and marc record for debugging.
my $validate_path;
my $validate_delimiters_path;
my $marc_generate = 1;
-my $marc_lint = 1;
+my $marc_lint = 0;
my $marc_dump = 0;
+my $marc_xml = 0;
my $parallel = 0;
my $only_links = 0;
my $merge = 0;
"marc-generate!" => \$marc_generate,
"marc-lint!" => \$marc_lint,
"marc-dump!" => \$marc_dump,
+ "marcxml!" => \$marc_xml,
"parallel=i" => \$parallel,
"only-links!" => \$only_links,
"merge" => \$merge,
next;
}
- next if ($only_input && ($input_name !~ m#$only_input#i && $input->{type} !~ m#$only_input#i));
+ next if defined $only_input && $input_name !~ m#$only_input#i;
my $type = lc($input->{type});
$base =~ s{/[^/]+$}{};
mkpath $base unless -e $base;
- my $rc = mirror( "$mirror/$path", $path );
- if (is_error( $rc )) {
+ my $rc = LWP::Simple::mirror( "$mirror/$path", $path );
+ if (LWP::Simple::is_error( $rc )) {
die "can't mirror $mirror/$path -> $path [$rc]";
} else {
$log->info( "mirror ", $path, " [$rc] ", -s $path, " bytes" );
my $input_db = new WebPAC::Input(
module => $input_module,
limit => $limit || $input->{limit},
- offset => $offset,
+ offset => $offset || $input->{offset},
recode => $input->{recode},
stats => $stats,
modify_records => $input->{modify_records},
mkpath $out_marc unless -e $out_marc;
$marc = new WebPAC::Output::MARC(
- path => "$out_marc/${database}-${input_name}.marc",
+ path => "$out_marc/${database}-${input_name}",
lint => $marc_lint,
dump => $marc_dump,
+ marcxml => $marc_xml,
);
}
my $rules = $parser->normalize_rules($database,$input_name);
- $log->logwarn("no normalize rules for $database/$input_name") unless $rules;
+ if ( ! $rules ) {
+ $log->logwarn("no normalize rules for $database/$input_name", $input_db->input_module->can('normalize') ? " using normalize from input module" : '');
+ next;
+ }
$log->debug("parsed normalize rules:\n$rules");
push @{ $row->{'000'} }, $pos;
}
+ foreach my $out ( @output_modules ) {
+ $out->add_row( $mfn, $row ) if $out->can('add_row');
+ }
if ($validate) {
if ( my $errors = $validate->validate_rec( $row, $input_db->dump_ascii ) ) {
next; # validation doesn't create any output
}
+ my $ds;
+
if ($rules) {
- my $ds = WebPAC::Normalize::data_structure(
+ $ds = WebPAC::Normalize::data_structure(
row => $row,
rules => $rules,
lookup => $lookup_hash,
},
);
- $log->debug("ds = ", sub { dump($ds) });
-
- if ( $ds ) {
-
- $store->save_ds(
- database => $database,
- input => $input_name,
- id => $mfn,
- ds => $ds,
- ) if !$stats;
-
- $indexer->add(
- id => "${input_name}/${mfn}",
- ds => $ds,
- type => $config->get($indexer_config)->{type},
- ) if $indexer;
+ } elsif ( $input_db->input_module->can('normalize') ) {
+ $ds = $input_db->input_module->normalize( $mfn );
+ }
- foreach my $out ( @output_modules ) {
- $out->add( $mfn, $ds ) if $out->can('add');
- }
+ if ( $ds ) {
+ $log->debug("ds = ", sub { dump($ds) });
- } else {
- $log->warn("record $pos didn't produce any output after normalization rules!") unless $marc;
+ $store->save_ds(
+ database => $database,
+ input => $input_name,
+ id => $mfn,
+ ds => $ds,
+ ) if !$stats;
+
+ $indexer->add(
+ id => "${input_name}/${mfn}",
+ ds => $ds,
+ type => $config->get($indexer_config)->{type},
+ ) if $indexer;
+
+ foreach my $out ( @output_modules ) {
+ $out->add( $mfn, $ds ) if $out->can('add');
}
- }
+ } else {
+ $log->warn("record $pos didn't produce any output after normalization rules!") unless $marc;
+ }
if ($marc) {
my $i = 0;