=head1 VERSION
-Version 0.03
+Version 0.04
=cut
-our $VERSION = '0.03';
+our $VERSION = '0.04';
=head1 SYNOPSIS
+This module will parse L<WebPAC::Normalize/lookup> directives and generate source
+to produce lookups and normalization.
+
+It's written using L<PPI>, pure-perl parser for perl and heavily influenced by
+reading about LISP. It might be a bit over-the board, but at least it removed
+separate configuration files for lookups.
+
+This is experimental code, but it replaces all older formats which where,
+at one point in time, available in WebPAC.
+
FIXME
=head1 FUNCTIONS
$self->{config}->iterate_inputs( sub {
my ($input, $database) = @_;
- return unless $self->valid_database_input($database, $input->{name});
- $self->parse_lookups($database,$input->{name});
+ return unless $self->valid_database_input($database, _input_name($input));
+ $self->parse_lookups($database, _input_name($input));
} );
$self ? return $self : return undef;
=cut
+sub _input_name($);
+
sub read_sources {
my $self = shift;
$self->{config}->iterate_inputs( sub {
my ($input, $database) = @_;
- my $path = $input->{normalize}->{path} || return;
- my $full = $self->{base_path} ? $self->{base_path} . '/' . $path : $path;
+ my @normalize;
+
+ if (ref($input->{normalize}) eq 'ARRAY') {
+ @normalize = @{ $input->{normalize} };
+ } else {
+ @normalize = ( $input->{normalize} );
+ }
+
+ foreach my $normalize (@normalize) {
- $log->logdie("normalization input $full doesn't exist") unless (-e $full);
+ my $path = $normalize->{path};
+ return unless($path);
+ my $full = $self->{base_path} ? $self->{base_path} . '/' . $path : $path;
- my $s = read_file( $full ) || $log->logdie("can't read $full: $!");
+ $log->logdie("normalization input $full doesn't exist") unless (-e $full);
- my $input_name = $input->{name} || $log->logdie("can't deduce name of input: ", dump( $input ));
+ my $s = read_file( $full ) || $log->logdie("can't read $full: $!");
- $log->debug("$database/$input_name: adding $path");
+ my $input_name = _input_name($input) || $log->logdie("can't deduce name of input: ", dump( $input ));
- $self->{valid_inputs}->{$database}->{$input_name} = {
- source => $s,
- path => $full,
- usage => 0,
- } unless defined($self->{valid_inputs}->{$database}->{$input_name});
+ $log->debug("$database/$input_name: adding $path");
- $self->{valid_inputs}->{$database}->{$input_name}->{usage}++;
+ $self->{valid_inputs}->{$database}->{$input_name} = {
+ source => $s,
+ path => $full,
+ usage => 0,
+ } unless defined($self->{valid_inputs}->{$database}->{$input_name});
- $nr++;
+ $self->{valid_inputs}->{$database}->{$input_name}->{usage}++;
+
+ $nr++;
+ }
} );
$log->debug("found $nr source files");
$parser->parse_lookups($database,$input);
+Called for each input by L</new>
+
+It will report invalid databases and inputs in error log after parsing.
+
=cut
sub parse_lookups {
my $self = shift;
my ($database, $input) = @_;
+ $input = _input_name($input);
+
my $log = $self->_get_logger();
$log->logdie("invalid database $database" ) unless $self->valid_database( $database );
return $self->_lookup_error("invalid database $e[3] in $path" ) unless $self->valid_database( $e[3] );
return $self->_lookup_error("invalid input $e[5] of database $e[3] in $path", ) unless $self->valid_database_input( $e[3], $e[5] );
- $self->add_lookup_create( $e[3], $e[5], $create );
+ # save code to create this lookup
+ $self->{_lookup_create}->{ _q($e[3]) }->{ _q($e[5]) }->{ _q($key) } .= $create;
+
+
+ if (defined( $self->{depends}->{ $database }->{ $input }->{ _q($e[3]) }->{ _q($e[5]) } )) {
+ my $dep_key = $self->{depends}->{ $database }->{ $input }->{ _q($e[3]) }->{ _q($e[5]) };
+ $log->warn("dependency of $database/$input on $e[3]/$e[5] allready recorded as $dep_key, now changed to $key") if ($dep_key ne $key);
+ }
+
+ # save this dependency
+ $self->{depends}->{ $database }->{ $input }->{ _q($e[3]) }->{ _q($e[5]) } .= $key;
if ($#e < 10) {
$e[8]->insert_after( $e[8]->clone );
return 1;
}
-=head2 add_lookup_create
- $parse->add_lookup_create($database,$input,$source);
+=head2 lookup_create_rules
+
+ my $source = $parser->lookup_create_rules($database, $input);
=cut
-sub add_lookup_create {
+sub lookup_create_rules {
my $self = shift;
- my ($database,$input,$source) = @_;
- $self->{_lookup_create}->{$database}->{$input} .= $source;
+ my ($database,$input) = @_;
+ return $self->{_lookup_create}->{ _q($database) }->{ _q($input) };
}
-
=head2 valid_database
my $ok = $parse->valid_database('key');
my $self = shift;
my $database = shift || return;
- $database =~ s/['"]//g;
- return defined($self->{valid_inputs}->{$database});
+ return defined($self->{valid_inputs}->{ _q($database) });
}
=head2 valid_database_input
my $self = shift;
my ($database,$input) = @_;
- $database =~ s/['"]//g;
- $input =~ s/['"]//g;
+ return defined($self->{valid_inputs}->{ _q($database) }->{ _q($input) });
+}
+
+=head2 depends
+
+Return all databases and inputs on which specified one depends
+
+ $depends_on = $parser->depends('database','input');
+
+=cut
+
+sub depends {
+ my $self = shift;
+ my ($database,$input) = @_;
+ $self->_get_logger->debug("depends($database,$input)");
+ return unless defined( $self->{depends}->{ _q($database) }->{ _q($input) } );
+ return $self->{depends}->{ _q($database) }->{ _q($input) };
+}
- return defined($self->{valid_inputs}->{$database}->{$input});
+=head1 PRIVATE
+
+=head2 _q
+
+Strip single or double quotes around value
+
+ _q(qq/'foo'/) -> foo
+
+=cut
+
+sub _q {
+ my $v = shift || return;
+ $v =~ s/^['"]*//g;
+ $v =~ s/['"]*$//g;
+ return $v;
+}
+
+=head2 _input_name
+
+Return C<name> value if HASH or arg if scalar
+
+ _input_name($input)
+
+=cut
+
+sub _input_name {
+ my $input = shift || return;
+ if (ref($input) eq 'HASH') {
+ die "can't find 'name' value in ", dump($input) unless defined($input->{name});
+ return $input->{name};
+ } else {
+ return $input;
+ }
}
+
=head1 AUTHOR
Dobrica Pavlinusic, C<< <dpavlin@rot13.org> >>
use lib './lib';
use WebPAC::Common 0.02;
+use WebPAC::Parser 0.04;
use WebPAC::Lookup 0.03;
use WebPAC::Input 0.11;
use WebPAC::Store 0.03;
# disable indexing when creating marc
$use_indexer = undef if ($marc_normalize);
+# parse normalize files and create source files for lookup and normalization
+
+my $parser = new WebPAC::Parser( config => $config );
+
my $total_rows = 0;
my $start_t = time();
die "I know only how to handle input types ", join(",", $config->webpac('inputs') ), " not '$type'!\n" unless (grep(/$type/, $config->webpac('inputs')));
- my $lookup;
- if ($input->{lookup}) {
- $lookup = new WebPAC::Lookup(
- lookup_file => $input->{lookup},
- );
- delete( $input->{lookup} );
- }
-
my $input_module = $config->webpac('inputs')->{$type};
$log->info("working on input '$input->{name}' in $input->{path} [type: $input->{type}] using $input_module",
delete($input->{modify_file});
}
+ warn "depends on: ", dump( $parser->depends($database, $input->{name}), $parser->{depends}, $parser->lookup_create_rules($database, $input->{name}), $parser->{_lookup_create} );
+
+ my $lookup;
+
my $input_db = new WebPAC::Input(
module => $input_module,
encoding => $config->webpac('webpac_encoding'),
#!/usr/bin/perl -w
use strict;
-use Test::More tests => 20;
+use Test::More tests => 26;
use Test::Exception;
use blib;
ok(my $n = $parser->{_normalize_source}, "_normalize_source");
diag "_normalize_source = ",dump($n);
+ok(my $d = $parser->{depends}, "depends");
+diag "depends = ",dump($d);
+
+my $expected_depend = {
+ foo => {
+ "foo-input1" => {
+ bar => { "bar-input" => "200-a-200-e" },
+ baz => { "baz-input" => "200-a" },
+ foo => { "foo-input1" => 11, "foo-input2" => 11 },
+ },
+ "foo-input2" => {
+ bar => { "bar-input" => "200-a-200-e" },
+ baz => { "baz-input" => "200-a" },
+ foo => { "foo-input1" => 11, "foo-input2" => 11 },
+ },
+ },
+ bar => { "bar-input" => { foo => { "foo-input1" => "245-a" } } },
+ baz => { "baz-input" => { baz => { "baz-input" => "900-x" } } },
+};
+
+
+is_deeply($d, $expected_depend, "depends correct");
+
+foreach my $db (keys %$d) {
+ foreach my $i (keys %{$d->{$db}}) {
+ is_deeply($d->{$db}->{$i}, $parser->depends($db,$i), "depend $db/$i");
+ }
+}