r1650@llin: dpavlin | 2007-11-20 11:07:57 +0100
[webpac2] / lib / WebPAC / Validate.pm
index e816db0..d7ed6fb 100644 (file)
@@ -18,11 +18,11 @@ WebPAC::Validate - provide simple validation for records
 
 =head1 VERSION
 
-Version 0.11
+Version 0.12
 
 =cut
 
-our $VERSION = '0.11';
+our $VERSION = '0.12';
 
 =head1 SYNOPSIS
 
@@ -53,12 +53,16 @@ Create new validation object
   my $validate = new WebPAC::Validate(
        path => 'conf/validate/file',
        delimiters => [ ' : ', ' / ', ' ; ', ' , ' ],
+       delimiters_path => 'conf/validate/delimiters/file',
   );
 
 Optional parametar C<delimiters> will turn on validating of delimiters. Be
 careful here, those delimiters are just stuck into regex, so they can
 contain L<perlre> regexpes.
 
+C<path> and C<delimiters_path> can be specified by L<read_validate_file> and
+L<read_validate_delimiters> calls.
+
 =cut
 
 sub new {
@@ -68,14 +72,43 @@ sub new {
 
        my $log = $self->_get_logger();
 
-       foreach my $p (qw/path/) {
-               $log->logconfess("need $p") unless ($self->{$p});
+       $self->read_validate_file( $self->{path} ) if ( $self->{path} );
+
+       if ( $self->{delimiters} ) {
+               $self->{delimiters_regex} = '(\^[a-z0-9]|' . join('|', @{ $self->{delimiters} }) . ')';
+               $log->info("validation check delimiters with regex $self->{delimiters_regex}");
        }
 
-       my $v_file = read_file( $self->{path} ) ||
-               $log->logdie("can't open validate path $self->{path}: $!");
+       $self->read_validate_delimiters_file( $self->{delimiters_path} ) if ( $self->{delimiters_path} );
+
+       return $self;
+}
+
+
+=head2 read_validate_file
+
+Specify validate rules file
+
+  $validate->read_validate_file( 'conf/validate/file' );
+
+Returns number of lines in file
+
+=cut
+
+sub read_validate_file {
+       my $self = shift;
+
+       my $path = shift || die "no path?";
+
+       my $log = $self->_get_logger();
+
+       my $v_file = read_file( $path ) ||
+               $log->logdie("can't open validate path $path: $!");
 
        my $v;
+       delete( $self->{must_exist} );
+       delete( $self->{must_exist_sf} );
+       delete( $self->{dont_validate} );
        my $curr_line = 1;
 
        foreach my $l (split(/[\n\r]+/, $v_file)) {
@@ -102,7 +135,7 @@ sub new {
                if (@d) {
                        $v->{$fld} = [ map {
                                my $sf = $_;
-                               if ( $sf =~ s/!(\*)?$/$1/ ) {
+                               if ( $sf =~ s/!// ) {
                                        $self->{must_exist_sf}->{ $fld }->{ $sf }++;
                                };
                                $sf;
@@ -117,14 +150,45 @@ sub new {
 
        $self->{rules} = $v;
 
-       $log->info("validation uses rules from $self->{path}");
+       $log->info("validation uses rules from $path");
 
-       if ( $self->{delimiters} ) {
-               $self->{delimiters_regex} = '(\^[a-z0-9]|' . join('|', @{ $self->{delimiters} }) . ')';
-               $log->info("validation check delimiters with regex $self->{delimiters_regex}");
-       }
+       return $curr_line;
+}
+
+=head2 read_validate_delimiters_file
+
+  $validate->read_validate_delimiters_file( 'conf/validate/delimiters/file' );
+
+=cut
+
+sub read_validate_delimiters_file {
+       my $self = shift;
 
-       $self ? return $self : return undef;
+       my $path = shift || die "no path?";
+
+       my $log = $self->_get_logger();
+
+       delete( $self->{_validate_delimiters_templates} );
+       delete( $self->{_delimiters_templates} );
+
+       if ( -e $path ) {
+               $log->info("using delimiter validation rules from $path");
+               open(my $d, $path) || $log->fatal("can't open $path: $!");
+               while(<$d>) {
+                       chomp($d);
+                       if (/^\s*(#*)\s*(\d+)\t+(\d+)\t+(.*)$/) {
+                               my ($comment,$field,$count,$template) = ($1,$2,$3,$4);
+                               $self->{_validate_delimiters_templates}->{$field}->{$template} = $count unless ($comment);
+                       } else {
+                               warn "## ignored $d\n";
+                       }
+               }
+               close($d);
+               #warn "_validate_delimiters_templates = ",dump( $self->{_validate_delimiters_templates} );
+       } else {
+               $log->warn("delimiters path $path doesn't exist, it will be created after this run");
+       }
+       $self->{delimiters_path} = $path;
 }
 
 =head2 validate_rec
@@ -144,7 +208,8 @@ sub validate_rec {
        my $rec_dump = shift;
 
        $log->logdie("rec isn't HASH") unless (ref($rec) eq 'HASH');
-       $log->logdie("can't find validation rules") unless (my $r = $self->{rules});
+#      $log->logdie("can't find validation rules") unless (my $r = $self->{rules});
+       my $r = $self->{rules};
 
        my $errors;
 
@@ -170,17 +235,19 @@ sub validate_rec {
                                        $self->{_delimiters_templates}->{$f}->{$template}++;
 
                                        if ( my $v = $self->{_validate_delimiters_templates} ) {
-                                               if ( ! defined( $v->{$template} ) ) {
-                                                       $errors->{$f}->{invalid_delimiters_combination} = $template;
+                                               if ( ! defined( $v->{$f}->{$template} ) ) {
+                                                       $errors->{$f}->{potentially_invalid_combination} = $template;
                                                        $errors->{$f}->{dump} = $subfield_dump;
-                                               } else {
-                                                       warn "## $f $template ok\n";
+                                               #} else {
+                                               #       warn "## $f $template ok\n";
                                                }
                                        }
                                }
                        }
                }
 
+               next unless ( $r );     # skip validation of no rules are specified
+
                next if (defined( $self->{dont_validate}->{$f} ));
 
                # track field usage
@@ -206,6 +273,7 @@ sub validate_rec {
 #                                      $errors->{dump} = $rec_dump if ($rec_dump);
                                } elsif (ref($v) ne 'HASH') {
                                        $errors->{$f}->{missing_subfield} = join(",", @{ $r->{$f} }) . " required";
+                                       $errors->{$f}->{dump} = $v;
                                        next;
                                } else {
 
@@ -226,7 +294,7 @@ sub validate_rec {
                                                        if ( ref($v->{$sf}) eq 'ARRAY' ) {
                                                                $sf_repeatable->{$sf}++;
                                                        };
-                                                       if (! first { $_ eq $sf } @{ $r->{$f} }) {
+                                                       if (! defined first { $_ eq $sf } @{ $r->{$f} }) {
                                                                $errors->{ $f }->{subfield}->{extra}->{$sf}++;
                                                        }
                                                }
@@ -257,7 +325,7 @@ sub validate_rec {
                }
        }
 
-       $log->debug("_delimiters_templates = ", dump( $self->{_delimiters_templates} ) );
+       $log->debug("_delimiters_templates = ", sub { dump( $self->{_delimiters_templates} ) } );
 
        foreach my $must (sort keys %{ $self->{must_exist} }) {
                next if ($fields->{$must});
@@ -268,7 +336,7 @@ sub validate_rec {
        if ($errors) {
                $log->debug("errors: ", $self->report_error( $errors ) );
 
-               my $mfn = $rec->{'000'}->[0] || $log->logconfess("record ", dump( $rec ), " doesn't have MFN");
+               my $mfn = $rec->{'000'}->[0] || $log->logconfess("record ", sub { dump( $rec ) }, " doesn't have MFN");
                $self->{errors}->{$mfn} = $errors;
        }
 
@@ -277,17 +345,37 @@ sub validate_rec {
        return $errors;
 }
 
-=head2 reset_errors
+=head2 reset
+
+Clean all accumulated errors for this input and remember delimiter templates
+for L<save_delimiters_templates>
 
-Clean all accumulated errors for this input
+  $validate->reset;
 
-  $validate->reset_errors;
+This function B<must> be called after each input to provide accurate statistics.
 
 =cut
 
-sub reset_errors {
+sub reset {
        my $self = shift;
+
+       my $log = $self->_get_logger;
+
        delete ($self->{errors});
+
+       if ( ! $self->{_delimiters_templates} ) {
+               $log->debug("called without _delimiters_templates?");
+               return;
+       }
+
+       foreach my $f ( keys %{ $self->{_delimiters_templates} } ) {
+               foreach my $t ( keys %{ $self->{_delimiters_templates}->{$f} } ) {
+                       $self->{_accumulated_delimiters_templates}->{$f}->{$t} +=
+                               $self->{_delimiters_templates}->{$f}->{$t};
+               }
+       }
+       $log->debug("_accumulated_delimiters_templates = ", sub { dump( $self->{_accumulated_delimiters_templates} ) } );
+       delete ($self->{_delimiters_templates});
 }
 
 =head2 all_errors
@@ -348,18 +436,14 @@ sub report_error {
                                $accumulated ? "$accumulated\t$k" : $k
                        );
 
-                       $log->debug(
-                               ( $new_results          ? "new_results: " . dump($new_results) ." "     : '' ),
-                       );
+                       $log->debug( "new_results: ", sub { dump($new_results) } ) if ( $new_results );
 
                        push @$results, $new_results if ($new_results);
                        $dump = $new_dump if ($new_dump);
 
                }
 
-               $log->debug(
-                       ( $results              ? "results: " . dump($results) ." "     : '' ),
-               );
+               $log->debug( "results: ", sub { dump($results) } ) if ( $results );
 
                if ($#$results == 0) {
                        return ($results->[0], $dump);
@@ -423,6 +507,7 @@ Generate report of delimiter tamplates
 
   my $report = $validate->delimiter_teplates(
        report => 1,
+       current_input => 1,
   );
 
 Options:
@@ -433,6 +518,10 @@ Options:
 
 Generate humanly readable report with single fields
 
+=item current_input
+
+Report just current_input and not accumulated data
+
 =back
 
 =cut
@@ -442,7 +531,8 @@ sub delimiters_templates {
 
        my $args = {@_};
 
-       my $t = $self->{_delimiters_templates};
+       my $t = $self->{_accumulated_delimiters_templates};
+       $t = $self->{_delimiters_templates} if ( $args->{current_input} );
 
        my $log = $self->_get_logger;
 
@@ -467,6 +557,46 @@ sub delimiters_templates {
        return $out;
 }
 
+=head2 save_delimiters_templates
+
+Save accumulated delimiter templates
+
+  $validator->save_delimiters_template( '/path/to/validate/delimiters' );
+
+=cut
+
+sub save_delimiters_templates {
+       my $self = shift;
+
+       my $path = shift;
+       $path ||= $self->{delimiters_path};
+
+       my $log = $self->_get_logger;
+
+       $log->logdie("need path") unless ( $path );
+
+
+       if ( ! $self->{_accumulated_delimiters_templates} ) {
+               $log->error('no _accumulated_delimiters_templates found, reset');
+               $self->reset;
+       }
+
+       if ( $self->{_delimiters_templates} ) {
+               $log->error('found _delimiters_templates, calling reset');
+               $self->reset;
+       }
+
+       $path .= '.new' if ( -e $path );
+
+       open(my $d, '>', $path) || $log->fatal("can't open $path: $!");
+       print $d $self->delimiters_templates;
+       close($d);
+
+       $log->info("new delimiters templates saved to $path");
+
+       return 1;
+}
+
 =head1 AUTHOR
 
 Dobrica Pavlinusic, C<< <dpavlin@rot13.org> >>