fix must_exist name
[webpac2] / lib / WebPAC / Validate.pm
index c29c81d..1856b1f 100644 (file)
@@ -18,11 +18,11 @@ WebPAC::Validate - provide simple validation for records
 
 =head1 VERSION
 
-Version 0.11
+Version 0.12
 
 =cut
 
-our $VERSION = '0.11';
+our $VERSION = '0.12';
 
 =head1 SYNOPSIS
 
@@ -60,6 +60,9 @@ Optional parametar C<delimiters> will turn on validating of delimiters. Be
 careful here, those delimiters are just stuck into regex, so they can
 contain L<perlre> regexpes.
 
+C<path> and C<delimiters_path> can be specified by L<read_validate_file> and
+L<read_validate_delimiters> calls.
+
 =cut
 
 sub new {
@@ -67,86 +70,124 @@ sub new {
        my $self = {@_};
        bless($self, $class);
 
-warn dump( @_ );
-
        my $log = $self->_get_logger();
 
-       if ( $self->{path} ) {
+       $self->read_validate_file( $self->{path} ) if ( $self->{path} );
 
-               my $v_file = read_file( $self->{path} ) ||
-                       $log->logdie("can't open validate path $self->{path}: $!");
+       if ( $self->{delimiters} ) {
+               $self->{delimiters_regex} = '(\^[a-z0-9]|' . join('|', @{ $self->{delimiters} }) . ')';
+               $log->info("validation check delimiters with regex $self->{delimiters_regex}");
+       }
 
-               my $v;
-               my $curr_line = 1;
+       $self->read_validate_delimiters_file( $self->{delimiters_path} ) if ( $self->{delimiters_path} );
 
-               foreach my $l (split(/[\n\r]+/, $v_file)) {
-                       $curr_line++;
+       return $self;
+}
 
-                       # skip comments and whitespaces
-                       next if ($l =~ /^#/ || $l =~ /^\s*$/);
 
-                       $l =~ s/^\s+//;
-                       $l =~ s/\s+$//;
+=head2 read_validate_file
 
-                       my @d = split(/\s+/, $l);
+Specify validate rules file
 
-                       my $fld = shift @d;
+  $validate->read_validate_file( 'conf/validate/file' );
 
-                       if ($fld =~ s/!$//) {
-                               $self->{must_exist}->{$fld}++;
-                       } elsif ($fld =~ s/-$//) {
-                               $self->{dont_validate}->{$fld}++;
-                       }
+Returns number of lines in file
 
-                       $log->logdie("need field name in line $curr_line: $l") unless (defined($fld));
+=cut
 
-                       if (@d) {
-                               $v->{$fld} = [ map {
-                                       my $sf = $_;
-                                       if ( $sf =~ s/!(\*)?$/$1/ ) {
-                                               $self->{must_exist_sf}->{ $fld }->{ $sf }++;
-                                       };
-                                       $sf;
-                               } @d ];
-                       } else {
-                               $v->{$fld} = 1;
-                       }
+sub read_validate_file {
+       my $self = shift;
+
+       my $path = shift || die "no path?";
+
+       my $log = $self->_get_logger();
+
+       my $v_file = read_file( $path ) ||
+               $log->logdie("can't open validate path $path: $!");
+
+       my $v;
+       delete( $self->{must_exist} );
+       delete( $self->{must_exist_sf} );
+       delete( $self->{dont_validate} );
+       my $curr_line = 1;
 
+       foreach my $l (split(/[\n\r]+/, $v_file)) {
+               $curr_line++;
+
+               # skip comments and whitespaces
+               next if ($l =~ /^#/ || $l =~ /^\s*$/);
+
+               $l =~ s/^\s+//;
+               $l =~ s/\s+$//;
+
+               my @d = split(/\s+/, $l);
+
+               my $fld = shift @d;
+
+               if ($fld =~ s/!$//) {
+                       $self->{must_exist}->{$fld}++;
+               } elsif ($fld =~ s/-$//) {
+                       $self->{dont_validate}->{$fld}++;
                }
 
-               $log->debug("current validation rules: ", dump($v));
+               $log->logdie("need field name in line $curr_line: $l") unless (defined($fld));
 
-               $self->{rules} = $v;
+               if (@d) {
+                       $v->{$fld} = [ map {
+                               my $sf = $_;
+                               if ( $sf =~ s/!(\*)?$/$1/ ) {
+                                       $self->{must_exist_sf}->{ $fld }->{ $sf }++;
+                               };
+                               $sf;
+                       } @d ];
+               } else {
+                       $v->{$fld} = 1;
+               }
 
-               $log->info("validation uses rules from $self->{path}");
        }
 
-       if ( $self->{delimiters} ) {
-               $self->{delimiters_regex} = '(\^[a-z0-9]|' . join('|', @{ $self->{delimiters} }) . ')';
-               $log->info("validation check delimiters with regex $self->{delimiters_regex}");
-       }
+       $log->debug("current validation rules: ", dump($v));
 
-       if ( my $path = $self->{delimiters_path} ) {
-               if ( -e $path ) {
-                       $log->info("using delimiter validation rules from $path");
-                       open(my $d, $path) || $log->fatal("can't open $path: $!");
-                       while(<$d>) {
-                               chomp($d);
-                               if (/^\s*(#*)\s*(\d+)\t+(\d+)\t+(.*)$/) {
-                                       my ($comment,$field,$count,$template) = ($1,$2,$3,$4);
-                                       $self->{_validate_delimiters_templates}->{$field}->{$template} = $count unless ($comment);
-                               } else {
-                                       warn "## ignored $d\n";
-                               }
+       $self->{rules} = $v;
+
+       $log->info("validation uses rules from $path");
+
+       return $curr_line;
+}
+
+=head2 read_validate_delimiters_file
+
+  $validate->read_validate_delimiters_file( 'conf/validate/delimiters/file' );
+
+=cut
+
+sub read_validate_delimiters_file {
+       my $self = shift;
+
+       my $path = shift || die "no path?";
+
+       my $log = $self->_get_logger();
+
+       delete( $self->{_validate_delimiters_templates} );
+       delete( $self->{_delimiters_templates} );
+
+       if ( -e $path ) {
+               $log->info("using delimiter validation rules from $path");
+               open(my $d, $path) || $log->fatal("can't open $path: $!");
+               while(<$d>) {
+                       chomp($d);
+                       if (/^\s*(#*)\s*(\d+)\t+(\d+)\t+(.*)$/) {
+                               my ($comment,$field,$count,$template) = ($1,$2,$3,$4);
+                               $self->{_validate_delimiters_templates}->{$field}->{$template} = $count unless ($comment);
+                       } else {
+                               warn "## ignored $d\n";
                        }
-                       close($d);
-                       warn "_validate_delimiters_templates = ",dump( $self->{_validate_delimiters_templates} );
-               } else {
-                       $log->warn("delimiters path $path doesn't exist, it will be created after this run");
                }
+               close($d);
+               #warn "_validate_delimiters_templates = ",dump( $self->{_validate_delimiters_templates} );
+       } else {
+               $log->warn("delimiters path $path doesn't exist, it will be created after this run");
        }
-
-       $self ? return $self : return undef;
 }
 
 =head2 validate_rec
@@ -194,7 +235,7 @@ sub validate_rec {
 
                                        if ( my $v = $self->{_validate_delimiters_templates} ) {
                                                if ( ! defined( $v->{$f}->{$template} ) ) {
-                                                       $errors->{$f}->{invalid_delimiters_combination} = $template;
+                                                       $errors->{$f}->{potentially_invalid_combination} = $template;
                                                        $errors->{$f}->{dump} = $subfield_dump;
                                                #} else {
                                                #       warn "## $f $template ok\n";
@@ -282,7 +323,7 @@ sub validate_rec {
                }
        }
 
-       $log->debug("_delimiters_templates = ", dump( $self->{_delimiters_templates} ) );
+       $log->debug("_delimiters_templates = ", sub { dump( $self->{_delimiters_templates} ) } );
 
        foreach my $must (sort keys %{ $self->{must_exist} }) {
                next if ($fields->{$must});
@@ -293,7 +334,7 @@ sub validate_rec {
        if ($errors) {
                $log->debug("errors: ", $self->report_error( $errors ) );
 
-               my $mfn = $rec->{'000'}->[0] || $log->logconfess("record ", dump( $rec ), " doesn't have MFN");
+               my $mfn = $rec->{'000'}->[0] || $log->logconfess("record ", sub { dump( $rec ) }, " doesn't have MFN");
                $self->{errors}->{$mfn} = $errors;
        }
 
@@ -302,17 +343,37 @@ sub validate_rec {
        return $errors;
 }
 
-=head2 reset_errors
+=head2 reset
+
+Clean all accumulated errors for this input and remember delimiter templates
+for L<save_delimiters_templates>
 
-Clean all accumulated errors for this input
+  $validate->reset;
 
-  $validate->reset_errors;
+This function B<must> be called after each input to provide accurate statistics.
 
 =cut
 
-sub reset_errors {
+sub reset {
        my $self = shift;
+
+       my $log = $self->_get_logger;
+
        delete ($self->{errors});
+
+       if ( ! $self->{_delimiters_templates} ) {
+               $log->debug("called without _delimiters_templates?");
+               return;
+       }
+
+       foreach my $f ( keys %{ $self->{_delimiters_templates} } ) {
+               foreach my $t ( keys %{ $self->{_delimiters_templates}->{$f} } ) {
+                       $self->{_accumulated_delimiters_templates}->{$f}->{$t} +=
+                               $self->{_delimiters_templates}->{$f}->{$t};
+               }
+       }
+       $log->debug("_accumulated_delimiters_templates = ", sub { dump( $self->{_accumulated_delimiter_templates} ) } );
+       delete ($self->{_delimiters_templates});
 }
 
 =head2 all_errors
@@ -373,18 +434,14 @@ sub report_error {
                                $accumulated ? "$accumulated\t$k" : $k
                        );
 
-                       $log->debug(
-                               ( $new_results          ? "new_results: " . dump($new_results) ." "     : '' ),
-                       );
+                       $log->debug( "new_results: ", sub { dump($new_results) } ) if ( $new_results );
 
                        push @$results, $new_results if ($new_results);
                        $dump = $new_dump if ($new_dump);
 
                }
 
-               $log->debug(
-                       ( $results              ? "results: " . dump($results) ." "     : '' ),
-               );
+               $log->debug( "results: ", sub { dump($results) } ) if ( $results );
 
                if ($#$results == 0) {
                        return ($results->[0], $dump);
@@ -448,6 +505,7 @@ Generate report of delimiter tamplates
 
   my $report = $validate->delimiter_teplates(
        report => 1,
+       current_input => 1,
   );
 
 Options:
@@ -458,6 +516,10 @@ Options:
 
 Generate humanly readable report with single fields
 
+=item current_input
+
+Report just current_input and not accumulated data
+
 =back
 
 =cut
@@ -467,7 +529,8 @@ sub delimiters_templates {
 
        my $args = {@_};
 
-       my $t = $self->{_delimiters_templates};
+       my $t = $self->{_accumulated_delimiters_templates};
+       $t = $self->{_delimiters_templates} if ( $args->{current_input} );
 
        my $log = $self->_get_logger;
 
@@ -494,6 +557,10 @@ sub delimiters_templates {
 
 =head2 save_delimiters_templates
 
+Save accumulated delimiter templates
+
+  $validator->save_delimiters_template( '/path/to/validate/delimiters' );
+
 =cut
 
 sub save_delimiters_templates {
@@ -505,6 +572,18 @@ sub save_delimiters_templates {
 
        my $log = $self->_get_logger;
 
+       if ( ! $self->{_accumulated_delimiters_templates} ) {
+               $log->error('no _accumulated_delimiters_templates found, reset');
+               $self->reset;
+       }
+
+       if ( ! $self->{_delimiters_templates} ) {
+               $log->error('found _delimiters_templates, calling reset');
+               $self->reset;
+       }
+
+       $path .= '.new' if ( -e $path );
+
        open(my $d, '>', $path) || $log->fatal("can't open $path: $!");
        print $d $self->delimiters_templates;
        close($d);