1 package WebPAC::Parser;
9 use Data::Dump qw/dump/;
12 use base qw/WebPAC::Common/;
16 WebPAC::Parser - parse perl normalization configuration files (rules) and mungle it
24 our $VERSION = '0.08';
28 This module will parse L<WebPAC::Normalize/lookup> directives and generate source
29 to produce lookups and normalization. It will also parse other parts of
30 source to produce some of DWIM (I<Do What I Mean>) magic
31 (like producing MARC oputput using L<WebPAC::Output::MARC> if there are C<marc_*>
32 rules in normalisation).
34 It's written using L<PPI>, pure-perl parser for perl and heavily influenced by
35 reading about LISP. It might be a bit over-the board, but at least it removed
36 separate configuration files for lookups.
38 This is experimental code, but it replaces all older formats which where,
39 at one point in time, available in WebPAC.
47 Create new parser object.
49 my $parser = new WebPAC::Parser(
50 config => new WebPAC::Config(),
51 base_path => '/optional/path/to/conf',
61 my $log = $self->_get_logger();
63 $log->logdie("config isn't WebPAC::Config") unless ($self->{config} && $self->{config}->isa('WebPAC::Config'));
65 $log->logdie("can't iterate_inputs over this config object") unless ($self->{config}->can('iterate_inputs'));
69 $self ? return $self : return undef;
74 my $ok = $parse->valid_database('key');
81 my $database = shift || return;
83 return defined($self->{valid_inputs}->{ _q($database) });
86 =head2 valid_database_input
88 my $ok = $parse->valid_database('database_key','input_name');
92 sub valid_database_input {
94 my ($database,$input) = @_;
95 $input = _input_name($input);
96 return defined($self->{valid_inputs}->{ _q($database) }->{ _q($input) });
101 Return all databases and inputs on which specified one depends
103 $depends_on = $parser->depends('database','input');
109 my ($database,$input) = @_;
110 $input = _input_name($input);
111 $self->_get_logger->debug("depends($database,$input)");
113 defined( $self->{depends}->{ _q($database) } ) &&
114 defined( $self->{depends}->{ _q($database) }->{ _q($input) } )
116 return $self->{depends}->{ _q($database) }->{ _q($input) };
119 =head2 have_lookup_create
121 my @keys = $parser->have_lookup_create($database, $input);
125 sub have_lookup_create {
127 my ($database,$input) = @_;
128 $input = _input_name($input);
130 defined( $self->{_lookup_create_key}->{ _q($database) } ) &&
131 defined( $self->{_lookup_create_key}->{ _q($database) }->{ _q($input) } )
133 return keys %{ $self->{_lookup_create_key}->{ _q($database) }->{ _q($input) } };
137 =head2 lookup_create_rules
139 my $source = $parser->lookup_create_rules($database, $input);
143 sub lookup_create_rules {
145 my ($database,$input) = @_;
146 $input = _input_name($input);
148 defined( $self->{_lookup_create}->{ _q($database) } ) &&
149 defined( $self->{_lookup_create}->{ _q($database) }->{ _q($input) } )
151 return $self->{_lookup_create}->{ _q($database) }->{ _q($input) };
154 =head2 normalize_rules
156 my $source = $parser->normalize_rules($database, $input);
160 sub normalize_rules {
162 my ($database,$input) = @_;
163 $input = _input_name($input);
165 defined( $self->{_normalize_source}->{ _q($database) } ) &&
166 defined( $self->{_normalize_source}->{ _q($database) }->{ _q($input) } )
168 return $self->{_normalize_source}->{ _q($database) }->{ _q($input) };
174 my $do_marc = $parser->have_rules('marc', $database, $input);
175 my $do_index = $parser->have_rules('search', $database);
177 This function will return hash containing count of all found C<marc_*> or
178 C<search> directives. Input name is optional.
185 my $log = $self->_get_logger();
186 my $type = shift @_ || $log->logconfess("need at least type");
187 my $database = shift @_ || $log->logconfess("database is required");
188 my $input = shift @_;
190 $input = _input_name($input);
193 return unless defined( $self->{_have_rules}->{ _q($database) } );
195 my $database_rules = $self->{_have_rules}->{ _q($database ) };
197 if (defined($input)) {
200 defined( $database_rules->{ _q($input) } ) &&
201 defined( $database_rules->{ _q($input) }->{ $type } )
204 return $database_rules->{ _q($input) }->{ $type };
209 foreach my $i (keys %{ $database_rules }) {
210 next unless defined( $database_rules->{$i}->{$type} );
212 foreach my $t (keys %{ $database_rules->{ $i }->{$type} }) {
213 $usage->{ $t } += $database_rules->{ $i }->{ $t };
226 my $source_files = $parser->_read_sources;
235 my $log = $self->_get_logger();
241 $self->{config}->iterate_inputs( sub {
242 my ($input, $database) = @_;
244 $log->debug("database: $database input = ", dump($input));
246 foreach my $normalize (@{ $input->{normalize} }) {
248 my $path = $normalize->{path};
249 return unless($path);
250 my $full = $self->{base_path} ? $self->{base_path} . '/' . $path : $path;
252 $log->logdie("normalization input $full doesn't exist") unless (-e $full);
254 my $s = read_file( $full ) || $log->logdie("can't read $full: $!");
256 my $input_name = _input_name($input) || $log->logdie("can't deduce name of input: ", dump( $input ));
258 $log->debug("$database/$input_name: adding $path");
260 $self->{valid_inputs}->{$database}->{$input_name}++;
263 $self->_parse_source( $database, $input_name, $full, $s );
270 $log->debug("found $nr source files");
273 $_->() foreach (@sources);
280 $parser->_parse_source($database,$input,$path,$source);
282 Called for each normalize source (rules) in each input by L</_read_sources>
284 It will report invalid databases and inputs in error log after parsing.
290 my ($database, $input, $path, $source) = @_;
292 $input = _input_name($input);
294 my $log = $self->_get_logger();
296 $log->logdie("invalid database $database" ) unless $self->valid_database( $database );
297 $log->logdie("invalid input $input of database $database", ) unless $self->valid_database_input( $database, $input );
299 $log->logdie("no source found for database $database input $input path $path") unless ($source);
301 $log->info("parsing lookups for $database/$input from $path (",length($source)," bytes)");
303 my $Document = PPI::Document->new( \$source ) || $log->logdie("can't parse source:\n", $self->{source});
305 $Document->prune('PPI::Token::Whitespace');
306 $Document->prune('PPI::Token::Comment');
307 #$Document->prune('PPI::Token::Operator');
309 # Find all the named subroutines
311 $self->{_lookup_errors} = ();
316 $self->_get_logger->logconfess("error without message?") unless ($msg);
317 push @{ $self->{_lookup_errors} }, $msg;
321 $Document->find( sub {
322 my ($Document,$Element) = @_;
324 $Element->isa('PPI::Token::Word') or return '';
325 $Element->content eq 'lookup' or return '';
327 $log->debug("expansion: ", $Element->snext_sibling);
329 my $args = $Element->snext_sibling;
331 my @e = $args->child(0)->elements;
332 $log->logdie("hum, expect at least 8 elements, got ", scalar @e, " in $args") if ($#e < 8);
334 if ($log->is_debug) {
335 my $report = "found " . scalar @e . " elements:\n";
337 foreach my $i ( 0 .. $#e ) {
338 $report .= sprintf("# %-2d: %-30s %s\n", $i, ( $e[$i] || 'undef' ), $e[$i]->class );
341 $log->debug($report);
344 my $key_element = $e[8]->clone;
346 $log->logdie("key element must be PPI::Structure::Block") unless $key_element->isa('PPI::Structure::Block');
348 $log->debug("key part: ", $key_element);
352 $key_element->find( sub {
353 my $e = $_[1] || die "no element?";
354 $e->isa('PPI::Token::Word') or return '';
355 $e->content eq 'rec' or return '';
357 my $kf = $e->snext_sibling;
359 $log->debug("key fragment = $kf");
362 $log->logdie("can't eval { $kf }: $@") if ($@);
367 my $key = join('-', @key ) || $log->logdie("no key found!");
369 $log->debug("key = $key");
371 return $self->_lookup_error("invalid database $e[3] in $path" ) unless $self->valid_database( $e[3] );
372 return $self->_lookup_error("invalid input $e[5] of database $e[3] in $path", ) unless $self->valid_database_input( $e[3], $e[5] );
375 save_into_lookup($e[3],$e[5],'$key', $e[7] $e[8] );
378 $log->debug("create: $create");
380 # save code to create this lookup
381 $self->{_lookup_create}->{ _q($e[3]) }->{ _q($e[5]) } .= $create;
382 $self->{_lookup_create_key}->{ _q($e[3]) }->{ _q($e[5]) }->{ _q($key) }++;
385 if (defined( $self->{depends}->{ $database }->{ $input }->{ _q($e[3]) }->{ _q($e[5]) }->{ $key } )) {
386 $log->warn("dependency of $database/$input on $e[3]/$e[5] with $key allready recorded");
389 # save this dependency
390 $self->{depends}->{ $database }->{ $input }->{ _q($e[3]) }->{ _q($e[5]) }->{$key}++;
393 $e[8]->insert_after( $e[8]->clone );
394 $e[8]->insert_after( $e[7]->clone );
395 $e[8]->insert_after( $e[6]->clone );
399 $e[8]->insert_before( new PPI::Token::Quote::Single( "'$key'" ) );
403 $log->debug(">>> ", $Element->snext_sibling);
406 my $normalize_source = $Document->serialize;
407 $log->debug("create: ", dump($self->{_lookup_create}) );
408 $log->debug("normalize: $normalize_source");
410 $self->{_normalize_source}->{$database}->{$input} .= $normalize_source;
412 if ($self->{debug}) {
413 my $Dumper = PPI::Dumper->new( $Document );
417 $log->error("Parser errors:\n", join("\n",@{ $self->{_lookup_errors} }) ) if ($self->{_lookup_errors});
419 $Document->find( sub {
420 my ($Document,$Element) = @_;
422 $Element->isa('PPI::Token::Word') or return '';
423 if ($Element->content =~ m/^(marc|search)/) {
425 $log->debug("found $what rules in $database/$input");
426 $self->{_have_rules}->{ $database }->{ $input }->{ $what }->{ $Element->content }++;
438 Strip single or double quotes around value
445 my $v = shift || return;
453 Return C<name> value if HASH or arg if scalar
460 my $input = shift || return;
461 if (ref($input) eq 'HASH') {
462 die "can't find 'name' value in ", dump($input) unless defined($input->{name});
463 return $input->{name};
472 Dobrica Pavlinusic, C<< <dpavlin@rot13.org> >>
474 =head1 COPYRIGHT & LICENSE
476 Copyright 2006 Dobrica Pavlinusic, All Rights Reserved.
478 This program is free software; you can redistribute it and/or modify it
479 under the same terms as Perl itself.
483 1; # End of WebPAC::Parser