#eval $self->{module} .'->import';
# check if required subclasses are implemented
- foreach my $subclass (qw/open_db fetch_rec init/) {
+ foreach my $subclass (qw/open_db fetch_rec init dump_rec/) {
my $n = $self->{module} . '::' . $subclass;
if (! defined &{ $n }) {
my $missing = "missing $subclass in $self->{module}";
$log->debug("position: $pos\n");
- my $rec = $self->{fetch_rec}->($self, $db, $pos, sub {
+ my $rec = $self->{fetch_rec}->($self, $pos, sub {
my ($l,$f_nr) = @_;
# return unless defined($l);
# return $l unless ($rec_regex && $f_nr);
if ($self->{stats}) {
# fetch clean record with regexpes applied for statistics
- my $rec = $self->{fetch_rec}->($self, $db, $pos);
+ my $rec = $self->{fetch_rec}->($self, $pos);
foreach my $fld (keys %{ $rec }) {
$self->{_stats}->{fld}->{ $fld }++;
return $out;
}
+=head2 dump
+
+Display humanly readable dump of record
+
+=cut
+
+sub dump {
+ my $self = shift;
+
+ return $self->{dump_rec}->($self, $self->{pos});
+
+}
+
=head2 modify_record_regexps
Generate hash with regexpes to be applied using l<filter>.
Return record with ID C<$mfn> from database
- my $rec = $self->fetch_rec( $db, $mfn );
+ my $rec = $self->fetch_rec( $mfn );
}
sub fetch_rec {
my $self = shift;
- my (undef, $mfn) = @_;
+ my $mfn = shift;
my $log = $self->_get_logger();
=head1 VERSION
-Version 0.06
+Version 0.07
=cut
-our $VERSION = '0.06';
+our $VERSION = '0.07';
=head1 SYNOPSIS
my $size = $isis_db->count;
+ $self->{_isis_db} = $isis_db;
+
return ($isis_db, $size);
}
Return record with ID C<$mfn> from database
- my $rec = $self->fetch_rec( $db, $mfn, $filter_coderef);
+ my $rec = $self->fetch_rec( $mfn, $filter_coderef);
=cut
sub fetch_rec {
my $self = shift;
- my ($isis_db, $mfn, $filter_coderef) = @_;
+ my ($mfn, $filter_coderef) = @_;
- my $rec = $isis_db->to_hash({
+ my $rec = $self->{_isis_db}->to_hash({
mfn => $mfn,
include_subfields => 1,
hash_filter => $filter_coderef,
return $rec;
}
+=head2 dump_rec
+
+Return dump of record ID C<$mfn> from database
+
+ my $rec = $self->dump_rec( $db, $mfn );
+
+=cut
+
+sub dump_rec {
+ my $self = shift;
+
+ my $mfn = shift;
+
+ return $self->{_isis_db}->to_ascii( $mfn );
+}
+
=head1 AUTHOR
Dobrica Pavlinusic, C<< <dpavlin@rot13.org> >>
my $db_size = $db->count - 1; # FIXME
$self->{_marc_size} = $db_size;
+ $self->{_marc_db} = $db;
return ($db, $db_size);
}
Return record with ID C<$mfn> from database
- my $rec = $self->fetch_rec( $db, $mfn );
+ my $rec = $self->fetch_rec( $mfn );
}
sub fetch_rec {
my $self = shift;
- my ($db, $mfn) = @_;
+ my $mfn = shift;
if ($mfn > $self->{_marc_size}) {
$self->_get_logger()->warn("seek beyond database size $self->{_marc_size} to $mfn");
} else {
- my $row = $db->to_hash($mfn);
+ my $row = $self->{_marc_db}->to_hash($mfn);
push @{$row->{'000'}}, $mfn;
return $row;
}
use base 'WebPAC::Common';
use File::Slurp;
use List::Util qw/first/;
-use Data::Dumper;
+use Data::Dump qw/dump/;
use WebPAC::Normalize qw/_pack_subfields_hash/;
use Storable qw/dclone/;
=head1 VERSION
-Version 0.04
+Version 0.06
=cut
-our $VERSION = '0.04';
+our $VERSION = '0.06';
=head1 SYNOPSIS
}
- $log->debug("current validation rules: ", Dumper($v));
+ $log->debug("current validation rules: ", dump($v));
$self->{rules} = $v;
$log->logdie("rec isn't HASH") unless (ref($rec) eq 'HASH');
$log->logdie("can't find validation rules") unless (my $r = $self->{rules});
- my @errors;
+ my $errors;
- $log->debug("rec = ", sub { Dumper($rec) }, "keys = ", keys %{ $rec });
+ $log->debug("rec = ", sub { dump($rec) }, "keys = ", keys %{ $rec });
my $fields;
$fields->{$f}++;
if ( ! defined($r->{$f}) ) {
- push @errors, "field '$f' shouldn't exists";
+ $errors->{field}->{ $f }->{extra} = "not expected";
next;
}
if (ref($rec->{$f}) ne 'ARRAY') {
- push @errors, "field '$f' isn't repetable, probably bug in parsing input data";
+ $errors->{field}->{ $f }->{not_repeatable} = "probably bug in parsing input data";
next;
}
# can we have subfields?
if (ref($r->{$f}) eq 'ARRAY') {
# are values hashes? (has subfields)
- if (ref($v) ne 'HASH') {
- push @errors, "$f has value without subfields: $v";
+ if (! defined($v)) {
+ $errors->{field}->{$f}->{empty} = undef;
+ $errors->{dump}->{record}++;
+ } elsif (ref($v) ne 'HASH') {
+ $errors->{field}->{$f}->{missing_subfield} = "value without subfields: $v";
next;
} else {
$sf_repeatable->{$sf}++;
};
if (! first { $_ eq $sf } @{ $r->{$f} }) {
- push @errors, "$f has unknown subfield: $sf";
+ $errors->{field}->{ $f }->{subfield}->{$sf} = "unknown";
}
}
}
if (my @r_sf = sort keys( %$sf_repeatable )) {
- my $plural = $#r_sf > 0 ? 1 : 0;
-
- push @errors, "$f subfield" .
- ( $plural ? 's ' : ' ' ) .
- join(', ', @r_sf) .
- ( $plural ? ' are ' : ' is ' ) .
- 'repeatable in: ' .
- join('', _pack_subfields_hash( $h, 1) );
+
+ foreach my $sf (@r_sf) {
+ $errors->{field}->{$f}->{subfield}->{$sf} = "repeatable";
+ $errors->{dump}->{field}->{$f} =
+ join('', _pack_subfields_hash( $h, 1 ) );
+ }
+
}
if ( defined( $self->{must_exist_sf}->{$f} ) ) {
foreach my $sf (sort keys %{ $self->{must_exist_sf}->{$f} }) {
#warn "====> $f $sf must exist\n";
- push @errors, "$f missing required subfield $sf"
- unless (
- defined( $subfields->{$sf} )
- )
+ $errors->{field}->{$f}->{subfield}->{$sf} = "missing"
+ unless defined( $subfields->{$sf} );
}
}
}
} elsif (ref($v) eq 'HASH') {
- push @errors, "$f has subfields which is not valid";
+ $errors->{field}->{$f}->{unexpected_subfields}++;
+ $errors->{dump}->{field}->{$f} =
+ join('', _pack_subfields_hash( $v, 1 ) );
}
}
}
foreach my $must (sort keys %{ $self->{must_exist} }) {
next if ($fields->{$must});
- push @errors,
- "field $must should exist, but it doesn't";
+ $errors->{field}->{$must}->{missing}++;
+ $errors->{dump}->{record}++;
}
- #$log->logcluck("return from this function is ARRAY") unless wantarray;
+ if ($errors) {
+ $log->debug("errors: ", sub { dump( $errors ) } );
- $log->debug("errors: ", join(", ", @errors)) if (@errors);
+ my $mfn = $rec->{'000'}->[0];
+ $self->{errors}->{$mfn} = $errors;
+ }
+
+ #$log->logcluck("return from this function is ARRAY") unless wantarray;
- return @errors;
+ return $errors;
}
=head1 AUTHOR
use WebPAC::Store 0.03;
use WebPAC::Normalize 0.11;
use WebPAC::Output::TT;
-use WebPAC::Validate;
+use WebPAC::Validate 0.06;
use WebPAC::Output::MARC;
use YAML qw/LoadFile/;
use Getopt::Long;
if ($validate) {
- my @errors = $validate->validate_errors( $row );
- $log->error( "MFN $mfn validation errors:\n", join("\n", @errors) ) if (@errors);
+ if ( my $errors = $validate->validate_errors( $row ) ) {
+ my $dump_record = defined($errors->{dump}->{record});
+ $log->error( "MFN $mfn\n",
+ $dump_record ? $input_db->dump : '',
+ "validation errors:\n", dump( $errors )
+ );
+ }
}
my $ds_config = dclone($db_config);
$total_rows++;
}
+ $log->info("validation errors:\n", dump( $validate->{errors} ) ) if ($validate && defined($validate->{errors}));
+
$log->info("statistics of fields usage:\n", $input_db->stats) if ($stats);
# close MARC file