use Data::Dump qw/dump/;
use File::Slurp;
-use base qw/WebPAC::Common WebPAC::Normalize/;
+use base qw/WebPAC::Common/;
=head1 NAME
-WebPAC::Parser - parse perl normalization configuration files and mungle it
+WebPAC::Parser - parse perl normalization configuration files (rules) and mungle it
=head1 VERSION
-Version 0.03
+Version 0.08
=cut
-our $VERSION = '0.03';
+our $VERSION = '0.08';
=head1 SYNOPSIS
+This module will parse L<WebPAC::Normalize/lookup> directives and generate source
+to produce lookups and normalization. It will also parse other parts of
+source to produce some of DWIM (I<Do What I Mean>) magic
+(like producing MARC oputput using L<WebPAC::Output::MARC> if there are C<marc_*>
+rules in normalisation).
+
+It's written using L<PPI>, pure-perl parser for perl and heavily influenced by
+reading about LISP. It might be a bit over-the board, but at least it removed
+separate configuration files for lookups.
+
+This is experimental code, but it replaces all older formats which where,
+at one point in time, available in WebPAC.
+
FIXME
=head1 FUNCTIONS
$log->logdie("can't iterate_inputs over this config object") unless ($self->{config}->can('iterate_inputs'));
- $self->read_sources;
-
- $self->{config}->iterate_inputs( sub {
- my ($input, $database) = @_;
- return unless $self->valid_database_input($database, $input->{name});
- $self->parse_lookups($database,$input->{name});
- } );
+ $self->_read_sources;
$self ? return $self : return undef;
}
-=head2 read_sources
+=head2 valid_database
+
+ my $ok = $parse->valid_database('key');
+
+=cut
+
+sub valid_database {
+ my $self = shift;
+
+ my $database = shift || return;
+
+ return defined($self->{valid_inputs}->{ _q($database) });
+}
+
+=head2 valid_database_input
+
+ my $ok = $parse->valid_database('database_key','input_name');
+
+=cut
+
+sub valid_database_input {
+ my $self = shift;
+ my ($database,$input) = @_;
+ $input = _input_name($input);
+ return defined($self->{valid_inputs}->{ _q($database) }->{ _q($input) });
+}
+
+=head2 depends
+
+Return all databases and inputs on which specified one depends
+
+ $depends_on = $parser->depends('database','input');
+
+=cut
+
+sub depends {
+ my $self = shift;
+ my ($database,$input) = @_;
+ $input = _input_name($input);
+ $self->_get_logger->debug("depends($database,$input)");
+ return unless (
+ defined( $self->{depends}->{ _q($database) } ) &&
+ defined( $self->{depends}->{ _q($database) }->{ _q($input) } )
+ );
+ return $self->{depends}->{ _q($database) }->{ _q($input) };
+}
+
+=head2 have_lookup_create
+
+ my @keys = $parser->have_lookup_create($database, $input);
+
+=cut
+
+sub have_lookup_create {
+ my $self = shift;
+ my ($database,$input) = @_;
+ $input = _input_name($input);
+ return unless (
+ defined( $self->{_lookup_create_key}->{ _q($database) } ) &&
+ defined( $self->{_lookup_create_key}->{ _q($database) }->{ _q($input) } )
+ );
+ return keys %{ $self->{_lookup_create_key}->{ _q($database) }->{ _q($input) } };
+}
+
+
+=head2 lookup_create_rules
+
+ my $source = $parser->lookup_create_rules($database, $input);
+
+=cut
+
+sub lookup_create_rules {
+ my $self = shift;
+ my ($database,$input) = @_;
+ $input = _input_name($input);
+ return unless (
+ defined( $self->{_lookup_create}->{ _q($database) } ) &&
+ defined( $self->{_lookup_create}->{ _q($database) }->{ _q($input) } )
+ );
+ return $self->{_lookup_create}->{ _q($database) }->{ _q($input) };
+}
+
+=head2 normalize_rules
+
+ my $source = $parser->normalize_rules($database, $input);
+
+=cut
+
+sub normalize_rules {
+ my $self = shift;
+ my ($database,$input) = @_;
+ $input = _input_name($input);
+ return unless (
+ defined( $self->{_normalize_source}->{ _q($database) } ) &&
+ defined( $self->{_normalize_source}->{ _q($database) }->{ _q($input) } )
+ );
+ return $self->{_normalize_source}->{ _q($database) }->{ _q($input) };
+}
+
+
+=head2 have_rules
+
+ my $do_marc = $parser->have_rules('marc', $database, $input);
+ my $do_index = $parser->have_rules('search', $database);
+
+This function will return hash containing count of all found C<marc_*> or
+C<search> directives. Input name is optional.
+
+=cut
+
+sub have_rules {
+ my $self = shift;
+
+ my $log = $self->_get_logger();
+ my $type = shift @_ || $log->logconfess("need at least type");
+ my $database = shift @_ || $log->logconfess("database is required");
+ my $input = shift @_;
+
+ $input = _input_name($input);
+
+
+ return unless defined( $self->{_have_rules}->{ _q($database) } );
+
+ my $database_rules = $self->{_have_rules}->{ _q($database ) };
+
+ if (defined($input)) {
+
+ return unless (
+ defined( $database_rules->{ _q($input) } ) &&
+ defined( $database_rules->{ _q($input) }->{ $type } )
+ );
+
+ return $database_rules->{ _q($input) }->{ $type };
+ }
+
+ my $usage;
+
+ foreach my $i (keys %{ $database_rules }) {
+ next unless defined( $database_rules->{$i}->{$type} );
+
+ foreach my $t (keys %{ $database_rules->{ $i }->{$type} }) {
+ $usage->{ $t } += $database_rules->{ $i }->{ $t };
+ }
+ }
+
+ return $usage;
+
+}
+
+
+=head1 PRIVATE
- my $source_files = $parser->read_sources;
+=head2 _read_sources
+
+ my $source_files = $parser->_read_sources;
Called by L</new>.
=cut
-sub read_sources {
+sub _read_sources {
my $self = shift;
my $log = $self->_get_logger();
my $nr = 0;
+ my @sources;
+
+ my $lookup_src_cache;
+
$self->{config}->iterate_inputs( sub {
my ($input, $database) = @_;
- my $path = $input->{normalize}->{path} || return;
- my $full = $self->{base_path} ? $self->{base_path} . '/' . $path : $path;
+ $log->debug("database: $database input = ", dump($input));
+
+ foreach my $normalize (@{ $input->{normalize} }) {
- $log->logdie("normalization input $full doesn't exist") unless (-e $full);
+ my $path = $normalize->{path};
+ return unless($path);
+ my $full = $self->{base_path} ? $self->{base_path} . '/' . $path : $path;
- my $s = read_file( $full ) || $log->logdie("can't read $full: $!");
+ $log->logdie("normalization input $full doesn't exist") unless (-e $full);
- my $input_name = $input->{name} || $log->logdie("can't deduce name of input: ", dump( $input ));
+ my $s = read_file( $full ) || $log->logdie("can't read $full: $!");
- $log->debug("$database/$input_name: adding $path");
+ my $input_name = _input_name($input) || $log->logdie("can't deduce name of input: ", dump( $input ));
- $self->{valid_inputs}->{$database}->{$input_name} = {
- source => $s,
- path => $full,
- usage => 0,
- } unless defined($self->{valid_inputs}->{$database}->{$input_name});
+ $log->debug("$database/$input_name: adding $path");
- $self->{valid_inputs}->{$database}->{$input_name}->{usage}++;
+ $self->{valid_inputs}->{$database}->{$input_name}++;
- $nr++;
+ push @sources, sub {
+ #warn "### $database $input_name, $full ###\n";
+ $self->_parse_source( $database, $input_name, $full, $s );
+ };
+
+ $nr++;
+ }
} );
$log->debug("found $nr source files");
+ # parse all sources
+ $_->() foreach (@sources);
+
return $nr;
}
-=head2 parse_lookups
+=head2 _parse_source
- $parser->parse_lookups($database,$input);
+ $parser->_parse_source($database,$input,$path,$source);
+
+Called for each normalize source (rules) in each input by L</_read_sources>
+
+It will report invalid databases and inputs in error log after parsing.
=cut
-sub parse_lookups {
+sub _parse_source {
my $self = shift;
- my ($database, $input) = @_;
+ my ($database, $input, $path, $source) = @_;
+
+ $input = _input_name($input);
my $log = $self->_get_logger();
$log->logdie("invalid database $database" ) unless $self->valid_database( $database );
$log->logdie("invalid input $input of database $database", ) unless $self->valid_database_input( $database, $input );
- my $source = $self->{valid_inputs}->{$database}->{$input}->{source};
- my $path = $self->{valid_inputs}->{$database}->{$input}->{path};
-
$log->logdie("no source found for database $database input $input path $path") unless ($source);
- $log->info("parsing lookups for $database/$input from $path");
+ $log->info("parsing lookups for $database/$input from $path (",length($source)," bytes)");
my $Document = PPI::Document->new( \$source ) || $log->logdie("can't parse source:\n", $self->{source});
$Document->prune('PPI::Token::Whitespace');
+ $Document->prune('PPI::Token::Comment');
#$Document->prune('PPI::Token::Operator');
# Find all the named subroutines
$log->debug("key = $key");
- my $create = '
- $coderef = ' . $e[7] . $e[8] . ';
- foreach my $v ($coderef->()) {
- next unless (defined($v) && $v ne \'\');
- push @{ $lookup->{\'' . $key . '\'}->{$v} }, $mfn;
- }
- ';
+ return $self->_lookup_error("invalid database $e[3] in $path" ) unless $self->valid_database( $e[3] );
+ return $self->_lookup_error("invalid input $e[5] of database $e[3] in $path", ) unless $self->valid_database_input( $e[3], $e[5] );
+
+ my $create = qq{
+ save_into_lookup($e[3],$e[5],'$key', $e[7] $e[8] );
+ };
$log->debug("create: $create");
- return $self->_lookup_error("invalid database $e[3] in $path" ) unless $self->valid_database( $e[3] );
- return $self->_lookup_error("invalid input $e[5] of database $e[3] in $path", ) unless $self->valid_database_input( $e[3], $e[5] );
+ # save code to create this lookup
+ $self->{_lookup_create}->{ _q($e[3]) }->{ _q($e[5]) } .= $create;
+ $self->{_lookup_create_key}->{ _q($e[3]) }->{ _q($e[5]) }->{ _q($key) }++;
+
+
+ if (defined( $self->{depends}->{ $database }->{ $input }->{ _q($e[3]) }->{ _q($e[5]) }->{ $key } )) {
+ $log->warn("dependency of $database/$input on $e[3]/$e[5] with $key allready recorded");
+ }
- $self->add_lookup_create( $e[3], $e[5], $create );
+ # save this dependency
+ $self->{depends}->{ $database }->{ $input }->{ _q($e[3]) }->{ _q($e[5]) }->{$key}++;
if ($#e < 10) {
$e[8]->insert_after( $e[8]->clone );
$log->debug("create: ", dump($self->{_lookup_create}) );
$log->debug("normalize: $normalize_source");
- $self->{_normalize_source}->{$database}->{$input} = $normalize_source;
+ $self->{_normalize_source}->{$database}->{$input} .= $normalize_source;
if ($self->{debug}) {
my $Dumper = PPI::Dumper->new( $Document );
$log->error("Parser errors:\n", join("\n",@{ $self->{_lookup_errors} }) ) if ($self->{_lookup_errors});
- return 1;
-}
-
-=head2 add_lookup_create
-
- $parse->add_lookup_create($database,$input,$source);
+ $Document->find( sub {
+ my ($Document,$Element) = @_;
-=cut
+ $Element->isa('PPI::Token::Word') or return '';
+ if ($Element->content =~ m/^(marc|search)/) {
+ my $what = $1;
+ $log->debug("found $what rules in $database/$input");
+ $self->{_have_rules}->{ $database }->{ $input }->{ $what }->{ $Element->content }++;
+ } else {
+ return '';
+ }
+ });
-sub add_lookup_create {
- my $self = shift;
- my ($database,$input,$source) = @_;
- $self->{_lookup_create}->{$database}->{$input} .= $source;
+ return 1;
}
-=head2 valid_database
+=head2 _q
- my $ok = $parse->valid_database('key');
-
-=cut
+Strip single or double quotes around value
-sub valid_database {
- my $self = shift;
+ _q(qq/'foo'/) -> foo
- my $database = shift || return;
- $database =~ s/['"]//g;
+=cut
- return defined($self->{valid_inputs}->{$database});
+sub _q {
+ my $v = shift || return;
+ $v =~ s/^['"]*//g;
+ $v =~ s/['"]*$//g;
+ return $v;
}
-=head2 valid_database_input
+=head2 _input_name
- my $ok = $parse->valid_database('database_key','input_name');
+Return C<name> value if HASH or arg if scalar
-=cut
-
-sub valid_database_input {
- my $self = shift;
+ _input_name($input)
- my ($database,$input) = @_;
- $database =~ s/['"]//g;
- $input =~ s/['"]//g;
+=cut
- return defined($self->{valid_inputs}->{$database}->{$input});
+sub _input_name {
+ my $input = shift || return;
+ if (ref($input) eq 'HASH') {
+ die "can't find 'name' value in ", dump($input) unless defined($input->{name});
+ return $input->{name};
+ } else {
+ return $input;
+ }
}
+
=head1 AUTHOR
Dobrica Pavlinusic, C<< <dpavlin@rot13.org> >>