1 package WebPAC::Validate;
8 use base 'WebPAC::Common';
10 use List::Util qw/first/;
11 use Data::Dump qw/dump/;
12 use WebPAC::Normalize qw/_pack_subfields_hash/;
13 use Storable qw/dclone/;
17 WebPAC::Validate - provide simple validation for records
25 our $VERSION = '0.09';
29 This module provide a simple way to validate your file against a simple
30 configuration file in following format:
32 # field 10 doesn't have any subfields
36 # field 200 have valid subfields a-g
37 # and field e is repeatable
39 # field 205 can have only subfield a
42 # while 210 can have a c or d
44 # field which is ignored in validation
51 Create new validation object
53 my $validate = new WebPAC::Validate(
54 path => 'conf/validate/file',
64 my $log = $self->_get_logger();
66 foreach my $p (qw/path/) {
67 $log->logconfess("need $p") unless ($self->{$p});
70 my $v_file = read_file( $self->{path} ) ||
71 $log->logdie("can't open validate path $self->{path}: $!");
76 foreach my $l (split(/[\n\r]+/, $v_file)) {
79 # skip comments and whitespaces
80 next if ($l =~ /^#/ || $l =~ /^\s*$/);
85 my @d = split(/\s+/, $l);
90 $self->{must_exist}->{$fld}++;
91 } elsif ($fld =~ s/-$//) {
92 $self->{dont_validate}->{$fld}++;
95 $log->logdie("need field name in line $curr_line: $l") unless (defined($fld));
100 if ( $sf =~ s/!(\*)?$/$1/ ) {
101 $self->{must_exist_sf}->{ $fld }->{ $sf }++;
111 $log->debug("current validation rules: ", dump($v));
115 $log->info("validation uses rules from $self->{path}");
117 $self ? return $self : return undef;
120 =head2 validate_errors
122 Validate record and return errors
124 my @errors = $validate->validate_errors( $rec, $rec_dump );
128 sub validate_errors {
131 my $log = $self->_get_logger();
133 my $rec = shift || $log->logdie("validate_errors need record");
134 my $rec_dump = shift;
136 $log->logdie("rec isn't HASH") unless (ref($rec) eq 'HASH');
137 $log->logdie("can't find validation rules") unless (my $r = $self->{rules});
141 $log->debug("rec = ", sub { dump($rec) }, "keys = ", keys %{ $rec });
145 foreach my $f (keys %{ $rec }) {
147 next if (!defined($f) || $f eq '' || $f eq '000');
149 next if (defined( $self->{dont_validate}->{$f} ));
154 if ( ! defined($r->{$f}) ) {
155 $errors->{ $f }->{unexpected} = "this field is not expected";
160 if (ref($rec->{$f}) ne 'ARRAY') {
161 $errors->{ $f }->{not_repeatable} = "probably bug in parsing input data";
165 foreach my $v (@{ $rec->{$f} }) {
166 # can we have subfields?
167 if (ref($r->{$f}) eq 'ARRAY') {
168 # are values hashes? (has subfields)
170 # $errors->{$f}->{empty} = undef;
171 # $errors->{dump} = $rec_dump if ($rec_dump);
172 } elsif (ref($v) ne 'HASH') {
173 $errors->{$f}->{missing_subfield} = join(",", @{ $r->{$f} }) . " required";
177 my $h = dclone( $v );
181 delete($v->{subfields}) if (defined($v->{subfields}));
185 foreach my $sf (keys %{ $v }) {
187 $subfields->{ $sf }++;
189 # is non-repeatable but with multiple values?
190 if ( ! first { $_ eq $sf.'*' } @{$r->{$f}} ) {
191 if ( ref($v->{$sf}) eq 'ARRAY' ) {
192 $sf_repeatable->{$sf}++;
194 if (! first { $_ eq $sf } @{ $r->{$f} }) {
195 $errors->{ $f }->{subfield}->{extra}->{$sf}++;
200 if (my @r_sf = sort keys( %$sf_repeatable )) {
202 foreach my $sf (@r_sf) {
203 $errors->{$f}->{subfield}->{extra_repeatable}->{$sf}++;
204 $errors->{$f}->{dump} = _pack_subfields_hash( $h, 1 );
209 if ( defined( $self->{must_exist_sf}->{$f} ) ) {
210 foreach my $sf (sort keys %{ $self->{must_exist_sf}->{$f} }) {
211 #warn "====> $f $sf must exist\n";
212 $errors->{$f}->{subfield}->{missing}->{$sf}++
213 unless defined( $subfields->{$sf} );
218 } elsif (ref($v) eq 'HASH') {
219 $errors->{$f}->{unexpected_subfields}++;
220 $errors->{$f}->{dump} = _pack_subfields_hash( $v, 1 );
225 foreach my $must (sort keys %{ $self->{must_exist} }) {
226 next if ($fields->{$must});
227 $errors->{$must}->{missing}++;
228 $errors->{dump} = $rec_dump if ($rec_dump);
232 $log->debug("errors: ", $self->report_error( $errors ) );
234 my $mfn = $rec->{'000'}->[0] || $log->logconfess("record ", dump( $rec ), " doesn't have MFN");
235 $self->{errors}->{$mfn} = $errors;
238 #$log->logcluck("return from this function is ARRAY") unless wantarray;
245 Clean all accumulated errors for this input
247 $validate->reset_errors;
253 delete ($self->{errors});
258 Return hash with all errors
260 print dump( $validate->all_errors );
266 return $self->{errors};
271 Produce nice humanly readable report of single error
273 print $validate->report_error( $error_hash );
280 my $h = shift || die "no hash?";
283 my ($self, $tree, $accumulated) = @_;
285 my $log = $self->_get_logger();
288 ( $tree ? "tree: $tree " : '' ),
289 ( $accumulated ? "accumulated: $accumulated " : '' ),
294 if (ref($tree) ne 'HASH') {
295 return ("$accumulated\t($tree)", undef);
300 foreach my $k (sort keys %{ $tree }) {
303 $dump = $tree->{dump};
304 # warn "## dump: ",dump($dump),"\n";
308 $log->debug("current: $k");
310 my ($new_results, $new_dump) = $self->_unroll($tree->{$k},
311 $accumulated ? "$accumulated\t$k" : $k
315 ( $new_results ? "new_results: " . dump($new_results) ." " : '' ),
318 push @$results, $new_results if ($new_results);
319 $dump = $new_dump if ($new_dump);
324 ( $results ? "results: " . dump($results) ." " : '' ),
327 if ($#$results == 0) {
328 return ($results->[0], $dump);
330 return ($results, $dump);
344 for my $f (sort keys %{ $h }) {
347 my ($r, $d) = $self->_unroll( $h->{$f} );
349 if (ref($r) eq 'ARRAY') {
350 $e .= join(", ", map { _reformat( $_ ) } @$r);
352 $e .= _reformat( $r );
354 $e .= "\n\t$d" if ($d);
364 Produce nice humanly readable report of errors
366 print $validate->report;
372 my $e = $self->{errors} || return;
375 foreach my $mfn (sort { $a <=> $b } keys %$e) {
376 $out .= "MFN $mfn\n" . $self->report_error( $e->{$mfn} ) . "\n";
385 Dobrica Pavlinusic, C<< <dpavlin@rot13.org> >>
387 =head1 COPYRIGHT & LICENSE
389 Copyright 2006 Dobrica Pavlinusic, All Rights Reserved.
391 This program is free software; you can redistribute it and/or modify it
392 under the same terms as Perl itself.
396 1; # End of WebPAC::Validate