my $validate = new WebPAC::Validate(
path => 'conf/validate/file',
delimiters => [ ' : ', ' / ', ' ; ', ' , ' ],
+ delimiters_path => 'conf/validate/delimiters/file',
);
Optional parametar C<delimiters> will turn on validating of delimiters. Be
my $self = {@_};
bless($self, $class);
+warn dump( @_ );
+
my $log = $self->_get_logger();
- foreach my $p (qw/path/) {
- $log->logconfess("need $p") unless ($self->{$p});
- }
+ if ( $self->{path} ) {
- my $v_file = read_file( $self->{path} ) ||
- $log->logdie("can't open validate path $self->{path}: $!");
+ my $v_file = read_file( $self->{path} ) ||
+ $log->logdie("can't open validate path $self->{path}: $!");
- my $v;
- my $curr_line = 1;
+ my $v;
+ my $curr_line = 1;
- foreach my $l (split(/[\n\r]+/, $v_file)) {
- $curr_line++;
+ foreach my $l (split(/[\n\r]+/, $v_file)) {
+ $curr_line++;
- # skip comments and whitespaces
- next if ($l =~ /^#/ || $l =~ /^\s*$/);
+ # skip comments and whitespaces
+ next if ($l =~ /^#/ || $l =~ /^\s*$/);
- $l =~ s/^\s+//;
- $l =~ s/\s+$//;
+ $l =~ s/^\s+//;
+ $l =~ s/\s+$//;
- my @d = split(/\s+/, $l);
+ my @d = split(/\s+/, $l);
- my $fld = shift @d;
+ my $fld = shift @d;
- if ($fld =~ s/!$//) {
- $self->{must_exist}->{$fld}++;
- } elsif ($fld =~ s/-$//) {
- $self->{dont_validate}->{$fld}++;
- }
+ if ($fld =~ s/!$//) {
+ $self->{must_exist}->{$fld}++;
+ } elsif ($fld =~ s/-$//) {
+ $self->{dont_validate}->{$fld}++;
+ }
- $log->logdie("need field name in line $curr_line: $l") unless (defined($fld));
+ $log->logdie("need field name in line $curr_line: $l") unless (defined($fld));
+
+ if (@d) {
+ $v->{$fld} = [ map {
+ my $sf = $_;
+ if ( $sf =~ s/!(\*)?$/$1/ ) {
+ $self->{must_exist_sf}->{ $fld }->{ $sf }++;
+ };
+ $sf;
+ } @d ];
+ } else {
+ $v->{$fld} = 1;
+ }
- if (@d) {
- $v->{$fld} = [ map {
- my $sf = $_;
- if ( $sf =~ s/!(\*)?$/$1/ ) {
- $self->{must_exist_sf}->{ $fld }->{ $sf }++;
- };
- $sf;
- } @d ];
- } else {
- $v->{$fld} = 1;
}
- }
-
- $log->debug("current validation rules: ", dump($v));
+ $log->debug("current validation rules: ", dump($v));
- $self->{rules} = $v;
+ $self->{rules} = $v;
- $log->info("validation uses rules from $self->{path}");
+ $log->info("validation uses rules from $self->{path}");
+ }
if ( $self->{delimiters} ) {
$self->{delimiters_regex} = '(\^[a-z0-9]|' . join('|', @{ $self->{delimiters} }) . ')';
$log->info("validation check delimiters with regex $self->{delimiters_regex}");
}
+ if ( my $path = $self->{delimiters_path} ) {
+ if ( -e $path ) {
+ $log->info("using delimiter validation rules from $path");
+ open(my $d, $path) || $log->fatal("can't open $path: $!");
+ while(<$d>) {
+ chomp($d);
+ if (/^\s*(#*)\s*(\d+)\t+(\d+)\t+(.*)$/) {
+ my ($comment,$field,$count,$template) = ($1,$2,$3,$4);
+ $self->{_validate_delimiters_templates}->{$field}->{$template} = $count unless ($comment);
+ } else {
+ warn "## ignored $d\n";
+ }
+ }
+ close($d);
+ warn "_validate_delimiters_templates = ",dump( $self->{_validate_delimiters_templates} );
+ } else {
+ $log->warn("delimiters path $path doesn't exist, it will be created after this run");
+ }
+ }
+
$self ? return $self : return undef;
}
my $rec_dump = shift;
$log->logdie("rec isn't HASH") unless (ref($rec) eq 'HASH');
- $log->logdie("can't find validation rules") unless (my $r = $self->{rules});
+# $log->logdie("can't find validation rules") unless (my $r = $self->{rules});
+ my $r = $self->{rules};
my $errors;
}
}
+ next unless ( $r ); # skip validation of no rules are specified
+
next if (defined( $self->{dont_validate}->{$f} ));
# track field usage
return $out;
}
+=head2 save_delimiters_templates
+
+=cut
+
+sub save_delimiters_templates {
+ my $self = shift;
+
+ my $path = $self->{delimiters_path};
+
+ return unless ( $path );
+
+ my $log = $self->_get_logger;
+
+ open(my $d, '>', $path) || $log->fatal("can't open $path: $!");
+ print $d $self->delimiters_templates;
+ close($d);
+
+ $log->info("new delimiters templates saved to $path");
+}
+
=head1 AUTHOR
Dobrica Pavlinusic, C<< <dpavlin@rot13.org> >>
turn on extra validation of imput records, see L<WebPAC::Validation>
+=item --validate-delimiters path/to/validate_delimiters_file
+
+this option is used with C<--validate> to turn on extra validation of
+delimiters. If file is non existant, it will be created on first run.
+
=item --marc-generate
Generate MARC file. This will automatically be on if file contains C<marc*> directives.
my $only_filter;
my $stats = 0;
my $validate_path;
+my $validate_delimiters_path;
my $marc_generate = 1;
my $marc_lint = 1;
my $marc_dump = 0;
"debug+" => \$debug,
"stats" => \$stats,
"validate=s" => \$validate_path,
+ "validate-delimiters=s" => \$validate_delimiters_path,
"marc-generate!" => \$marc_generate,
"marc-lint!" => \$marc_lint,
"marc-dump!" => \$marc_dump,
$log->info("created merge batch file $estcmd_path");
}
-
my $validate;
$validate = new WebPAC::Validate(
path => $validate_path,
delimiters => $config->webpac('delimiters'),
-) if ($validate_path);
-
+ delimiters_path => $validate_delimiters_path,
+) if ($validate_path || $validate_delimiters_path);
my $use_indexer = $config->use_indexer;
$stats ||= $validate;
$log->info("all parallel processes finished");
}
+# save new delimiters if needed
+$validate->save_delimiters_templates;
+
#
# handle links or merge after indexing
#
#!/usr/bin/perl -w
use strict;
-use Test::More tests => 7;
+use Test::More tests => 10;
use Test::Exception;
use blib;
use Data::Dump qw/dump/;
use Cwd qw/abs_path/;
+use File::Temp qw/tempfile/;
BEGIN {
use_ok( 'WebPAC::Validate' );
ok(my $abs_path = abs_path($0), "abs_path");
$abs_path =~ s#/[^/]*$#/#;
+my ( $fh, $path ) = tempfile();
+
ok(my $v = new WebPAC::Validate(
path => "$abs_path/data/validate_test",
delimiters => [ ' : ', ' ; ', ' / ', ' \. ', ' = ' ],
+ delimiters_path => $path,
debug => $debug,
), "new");
diag $e;
diag dump( $v->{errors} );
+
+ok( $v->save_delimiters_templates, 'save_delimiters_templates' );
+
+ok(my $v2 = new WebPAC::Validate(
+ delimiters => [ ' : ', ' ; ', ' / ', ' \. ', ' = ' ],
+ delimiters_path => $path,
+ debug => $debug,
+), "new");
+
+is_deeply( $v->{_delimiters_templates}, $v2->{_validate_delimiters_templates}, 'save/load ok');