1 package WebPAC::Parser;
9 use Data::Dump qw/dump/;
12 use base qw/WebPAC::Common/;
16 WebPAC::Parser - parse perl normalization configuration files (rules) and mungle it
24 our $VERSION = '0.08';
28 This module will parse L<WebPAC::Normalize/lookup> directives and generate source
29 to produce lookups and normalization. It will also parse other parts of
30 source to produce some of DWIM (I<Do What I Mean>) magic
31 (like producing MARC oputput using L<WebPAC::Output::MARC> if there are C<marc_*>
32 rules in normalisation).
34 It's written using L<PPI>, pure-perl parser for perl and heavily influenced by
35 reading about LISP. It might be a bit over-the board, but at least it removed
36 separate configuration files for lookups.
38 This is experimental code, but it replaces all older formats which where,
39 at one point in time, available in WebPAC.
47 Create new parser object.
49 my $parser = new WebPAC::Parser(
50 config => new WebPAC::Config(),
51 base_path => '/optional/path/to/conf',
52 only_database => $only
62 my $log = $self->_get_logger();
64 $log->logdie("config isn't WebPAC::Config") unless ($self->{config} && $self->{config}->isa('WebPAC::Config'));
66 $log->logdie("can't iterate_inputs over this config object") unless ($self->{config}->can('iterate_inputs'));
70 $self ? return $self : return undef;
75 my $ok = $parse->valid_database('key');
82 my $database = shift || return;
84 return defined($self->{valid_inputs}->{ _q($database) });
87 =head2 valid_database_input
89 my $ok = $parse->valid_database('database_key','input_name');
93 sub valid_database_input {
95 my ($database,$input) = @_;
96 $input = _input_name($input);
97 return defined($self->{valid_inputs}->{ _q($database) }->{ _q($input) });
102 Return all databases and inputs on which specified one depends
104 $depends_on = $parser->depends('database','input');
110 my ($database,$input) = @_;
111 $input = _input_name($input);
112 $self->_get_logger->debug("depends($database,$input)");
114 defined( $self->{depends}->{ _q($database) } ) &&
115 defined( $self->{depends}->{ _q($database) }->{ _q($input) } )
117 return $self->{depends}->{ _q($database) }->{ _q($input) };
120 =head2 have_lookup_create
122 my @keys = $parser->have_lookup_create($database, $input);
126 sub have_lookup_create {
128 my ($database,$input) = @_;
129 $input = _input_name($input);
131 defined( $self->{_lookup_create_key}->{ _q($database) } ) &&
132 defined( $self->{_lookup_create_key}->{ _q($database) }->{ _q($input) } )
134 return keys %{ $self->{_lookup_create_key}->{ _q($database) }->{ _q($input) } };
138 =head2 lookup_create_rules
140 my $source = $parser->lookup_create_rules($database, $input);
144 sub lookup_create_rules {
146 my ($database,$input) = @_;
147 $input = _input_name($input);
149 defined( $self->{_lookup_create}->{ _q($database) } ) &&
150 defined( $self->{_lookup_create}->{ _q($database) }->{ _q($input) } )
152 return $self->{_lookup_create}->{ _q($database) }->{ _q($input) };
155 =head2 normalize_rules
157 my $source = $parser->normalize_rules($database, $input);
161 sub normalize_rules {
163 my ($database,$input) = @_;
164 $input = _input_name($input);
166 defined( $self->{_normalize_source}->{ _q($database) } ) &&
167 defined( $self->{_normalize_source}->{ _q($database) }->{ _q($input) } )
169 return $self->{_normalize_source}->{ _q($database) }->{ _q($input) };
175 my $do_marc = $parser->have_rules('marc', $database, $input);
176 my $do_index = $parser->have_rules('search', $database);
178 This function will return hash containing count of all found C<marc_*> or
179 C<search> directives. Input name is optional.
186 my $log = $self->_get_logger();
187 my $type = shift @_ || $log->logconfess("need at least type");
188 my $database = shift @_ || $log->logconfess("database is required");
189 my $input = shift @_;
191 $input = _input_name($input);
194 return unless defined( $self->{_have_rules}->{ _q($database) } );
196 my $database_rules = $self->{_have_rules}->{ _q($database ) };
198 if (defined($input)) {
201 defined( $database_rules->{ _q($input) } ) &&
202 defined( $database_rules->{ _q($input) }->{ $type } )
205 return $database_rules->{ _q($input) }->{ $type };
210 foreach my $i (keys %{ $database_rules }) {
211 next unless defined( $database_rules->{$i}->{$type} );
213 foreach my $t (keys %{ $database_rules->{ $i }->{$type} }) {
214 $usage->{ $t } += $database_rules->{ $i }->{ $t };
227 my $source_files = $parser->_read_sources;
236 my $log = $self->_get_logger();
242 my $lookup_src_cache;
244 my $only_database = $self->{only_database};
245 my $only_input = $self->{only_input};
247 $self->{config}->iterate_inputs( sub {
248 my ($input, $database) = @_;
250 return if ( $only_database && $database !~ m/$only_database/i );
251 return if ( $only_input && $input->{name} !~ m/$only_input/i );
253 $log->debug("database: $database input = ", dump($input));
255 foreach my $normalize (@{ $input->{normalize} }) {
257 my $path = $normalize->{path};
258 return unless($path);
259 my $full = $self->{base_path} ? $self->{base_path} . '/' . $path : $path;
261 $log->logdie("normalization input $full doesn't exist") unless (-e $full);
263 my $s = read_file( $full ) || $log->logdie("can't read $full: $!");
265 my $input_name = _input_name($input) || $log->logdie("can't deduce name of input: ", dump( $input ));
267 $log->debug("$database/$input_name: adding $path");
269 $self->{valid_inputs}->{$database}->{$input_name}++;
272 #warn "### $database $input_name, $full ###\n";
273 $self->_parse_source( $database, $input_name, $full, $s );
280 $log->debug("found $nr source files");
283 $_->() foreach (@sources);
290 $parser->_parse_source($database,$input,$path,$source);
292 Called for each normalize source (rules) in each input by L</_read_sources>
294 It will report invalid databases and inputs in error log after parsing.
300 my ($database, $input, $path, $source) = @_;
302 $input = _input_name($input);
304 my $log = $self->_get_logger();
306 $log->logdie("invalid database $database" ) unless $self->valid_database( $database );
307 $log->logdie("invalid input $input of database $database", ) unless $self->valid_database_input( $database, $input );
309 $log->logdie("no source found for database $database input $input path $path") unless ($source);
311 $log->info("parsing lookups for $database/$input from $path (",length($source)," bytes)");
313 my $Document = PPI::Document->new( \$source ) || $log->logdie("can't parse source:\n", $self->{source});
315 $Document->prune('PPI::Token::Whitespace');
316 $Document->prune('PPI::Token::Comment');
317 #$Document->prune('PPI::Token::Operator');
319 # Find all the named subroutines
321 $self->{_lookup_errors} = ();
326 $self->_get_logger->logconfess("error without message?") unless ($msg);
327 push @{ $self->{_lookup_errors} }, $msg;
331 $Document->find( sub {
332 my ($Document,$Element) = @_;
334 $Element->isa('PPI::Token::Word') or return '';
336 if ( $Element->content =~ m{^(sub|if)$} ) {
337 # repair demage done by prune of whitespace
338 $Element->insert_after( PPI::Token::Whitespace->new(' ') );
340 } elsif ( $Element->content eq 'my' ) {
341 $Element->insert_before( PPI::Token::Whitespace->new(' ') );
344 $Element->content eq 'lookup' or return '';
346 $log->debug("expansion: ", $Element->snext_sibling);
348 my $args = $Element->snext_sibling;
350 my @e = $args->child(0)->elements;
351 $log->logdie("hum, expect at least 8 elements, got ", scalar @e, " in $args") if ($#e < 8);
353 if ($log->is_debug) {
354 my $report = "found " . scalar @e . " elements:\n";
356 foreach my $i ( 0 .. $#e ) {
357 $report .= sprintf("# %-2d: %-30s %s\n", $i, ( $e[$i] || 'undef' ), $e[$i]->class );
360 $log->debug($report);
363 my $key_element = $e[8]->clone;
365 $log->logdie("key element must be PPI::Structure::Block") unless $key_element->isa('PPI::Structure::Block');
367 $log->debug("key part: ", $key_element);
371 $key_element->find( sub {
372 my $e = $_[1] || die "no element?";
373 $e->isa('PPI::Token::Word') or return '';
374 $e->content eq 'rec' or return '';
376 my $kf = $e->snext_sibling;
378 $log->debug("key fragment = $kf");
381 $log->logdie("can't eval { $kf }: $@") if ($@);
386 my $key = join('-', @key ) || $log->logdie("no key found!");
388 $log->debug("key = $key");
390 return $self->_lookup_error("invalid database $e[3] in $path" ) unless $self->valid_database( $e[3] );
391 return $self->_lookup_error("invalid input $e[5] of database $e[3] in $path", ) unless $self->valid_database_input( $e[3], $e[5] );
394 save_into_lookup($e[3],$e[5],'$key', $e[7] $e[8] );
397 $log->debug("create: $create");
399 # save code to create this lookup
400 $self->{_lookup_create}->{ _q($e[3]) }->{ _q($e[5]) } .= $create;
401 $self->{_lookup_create_key}->{ _q($e[3]) }->{ _q($e[5]) }->{ _q($key) }++;
404 if (defined( $self->{depends}->{ $database }->{ $input }->{ _q($e[3]) }->{ _q($e[5]) }->{ $key } )) {
405 $log->warn("dependency of $database/$input on $e[3]/$e[5] with $key allready recorded");
408 # save this dependency
409 $self->{depends}->{ $database }->{ $input }->{ _q($e[3]) }->{ _q($e[5]) }->{$key}++;
412 $e[8]->insert_after( $e[8]->clone );
413 $e[8]->insert_after( $e[7]->clone );
414 $e[8]->insert_after( $e[6]->clone );
418 $e[8]->insert_before( PPI::Token::Quote::Single->new( "'$key'" ) );
422 $log->debug(">>> ", $Element->snext_sibling);
425 my $normalize_source = $Document->serialize;
426 $log->debug("create: ", dump($self->{_lookup_create}) );
427 $log->debug("normalize: $normalize_source");
429 $self->{_normalize_source}->{$database}->{$input} .= $normalize_source;
431 if ($self->{debug}) {
432 my $Dumper = PPI::Dumper->new( $Document );
436 $log->error("Parser errors:\n", join("\n",@{ $self->{_lookup_errors} }) ) if ($self->{_lookup_errors});
438 $Document->find( sub {
439 my ($Document,$Element) = @_;
441 $Element->isa('PPI::Token::Word') or return '';
442 if ($Element->content =~ m/^(marc|search)/) {
444 $log->debug("found $what rules in $database/$input");
445 $self->{_have_rules}->{ $database }->{ $input }->{ $what }->{ $Element->content }++;
457 Strip single or double quotes around value
464 my $v = shift || return;
472 Return C<name> value if HASH or arg if scalar
479 my $input = shift || return;
480 if (ref($input) eq 'HASH') {
481 die "can't find 'name' value in ", dump($input) unless defined($input->{name});
482 return $input->{name};
491 Dobrica Pavlinusic, C<< <dpavlin@rot13.org> >>
493 =head1 COPYRIGHT & LICENSE
495 Copyright 2006 Dobrica Pavlinusic, All Rights Reserved.
497 This program is free software; you can redistribute it and/or modify it
498 under the same terms as Perl itself.
502 1; # End of WebPAC::Parser