1 package WebPAC::Parser;
9 use Data::Dump qw/dump/;
12 use base qw/WebPAC::Common/;
16 WebPAC::Parser - parse perl normalization configuration files (rules) and mungle it
24 our $VERSION = '0.07';
28 This module will parse L<WebPAC::Normalize/lookup> directives and generate source
29 to produce lookups and normalization. It will also parse other parts of
30 source to produce some of DWIM (I<Do What I Mean>) magic
31 (like producing MARC oputput using L<WebPAC::Output::MARC> if there are C<marc_*>
32 rules in normalisation).
34 It's written using L<PPI>, pure-perl parser for perl and heavily influenced by
35 reading about LISP. It might be a bit over-the board, but at least it removed
36 separate configuration files for lookups.
38 This is experimental code, but it replaces all older formats which where,
39 at one point in time, available in WebPAC.
47 Create new parser object.
49 my $parser = new WebPAC::Parser(
50 config => new WebPAC::Config(),
51 base_path => '/optional/path/to/conf',
61 my $log = $self->_get_logger();
63 $log->logdie("config isn't WebPAC::Config") unless ($self->{config} && $self->{config}->isa('WebPAC::Config'));
65 $log->logdie("can't iterate_inputs over this config object") unless ($self->{config}->can('iterate_inputs'));
69 $self ? return $self : return undef;
74 my $ok = $parse->valid_database('key');
81 my $database = shift || return;
83 return defined($self->{valid_inputs}->{ _q($database) });
86 =head2 valid_database_input
88 my $ok = $parse->valid_database('database_key','input_name');
92 sub valid_database_input {
94 my ($database,$input) = @_;
95 $input = _input_name($input);
96 return defined($self->{valid_inputs}->{ _q($database) }->{ _q($input) });
101 Return all databases and inputs on which specified one depends
103 $depends_on = $parser->depends('database','input');
109 my ($database,$input) = @_;
110 $input = _input_name($input);
111 $self->_get_logger->debug("depends($database,$input)");
113 defined( $self->{depends}->{ _q($database) } ) &&
114 defined( $self->{depends}->{ _q($database) }->{ _q($input) } )
116 return $self->{depends}->{ _q($database) }->{ _q($input) };
119 =head2 have_lookup_create
121 my @keys = $parser->have_lookup_create($database, $input);
125 sub have_lookup_create {
127 my ($database,$input) = @_;
128 $input = _input_name($input);
130 defined( $self->{_lookup_create_key}->{ _q($database) } ) &&
131 defined( $self->{_lookup_create_key}->{ _q($database) }->{ _q($input) } )
133 return keys %{ $self->{_lookup_create_key}->{ _q($database) }->{ _q($input) } };
137 =head2 lookup_create_rules
139 my $source = $parser->lookup_create_rules($database, $input);
143 sub lookup_create_rules {
145 my ($database,$input) = @_;
146 $input = _input_name($input);
148 defined( $self->{_lookup_create}->{ _q($database) } ) &&
149 defined( $self->{_lookup_create}->{ _q($database) }->{ _q($input) } )
151 return $self->{_lookup_create}->{ _q($database) }->{ _q($input) };
154 =head2 normalize_rules
156 my $source = $parser->normalize_rules($database, $input);
160 sub normalize_rules {
162 my ($database,$input) = @_;
163 $input = _input_name($input);
165 defined( $self->{_normalize_source}->{ _q($database) } ) &&
166 defined( $self->{_normalize_source}->{ _q($database) }->{ _q($input) } )
168 return $self->{_normalize_source}->{ _q($database) }->{ _q($input) };
174 my $do_marc = $parser->generate_marc($database, $input);
176 This function will return hash containing count of all found C<marc_*> directives.
182 my ($database,$input) = @_;
183 $input = _input_name($input);
185 defined( $self->{_generate_marc}->{ _q($database) } ) &&
186 defined( $self->{_generate_marc}->{ _q($database) }->{ _q($input) } )
188 return $self->{_generate_marc}->{ _q($database) }->{ _q($input) };
196 my $source_files = $parser->_read_sources;
205 my $log = $self->_get_logger();
211 $self->{config}->iterate_inputs( sub {
212 my ($input, $database) = @_;
214 $log->debug("database: $database input = ", dump($input));
216 foreach my $normalize (@{ $input->{normalize} }) {
218 my $path = $normalize->{path};
219 return unless($path);
220 my $full = $self->{base_path} ? $self->{base_path} . '/' . $path : $path;
222 $log->logdie("normalization input $full doesn't exist") unless (-e $full);
224 my $s = read_file( $full ) || $log->logdie("can't read $full: $!");
226 my $input_name = _input_name($input) || $log->logdie("can't deduce name of input: ", dump( $input ));
228 $log->debug("$database/$input_name: adding $path");
230 $self->{valid_inputs}->{$database}->{$input_name}++;
233 $self->_parse_source( $database, $input_name, $full, $s );
240 $log->debug("found $nr source files");
243 $_->() foreach (@sources);
250 $parser->_parse_source($database,$input,$path,$source);
252 Called for each normalize source (rules) in each input by L</_read_sources>
254 It will report invalid databases and inputs in error log after parsing.
260 my ($database, $input, $path, $source) = @_;
262 $input = _input_name($input);
264 my $log = $self->_get_logger();
266 $log->logdie("invalid database $database" ) unless $self->valid_database( $database );
267 $log->logdie("invalid input $input of database $database", ) unless $self->valid_database_input( $database, $input );
269 $log->logdie("no source found for database $database input $input path $path") unless ($source);
271 $log->info("parsing lookups for $database/$input from $path (",length($source)," bytes)");
273 my $Document = PPI::Document->new( \$source ) || $log->logdie("can't parse source:\n", $self->{source});
275 $Document->prune('PPI::Token::Whitespace');
276 $Document->prune('PPI::Token::Comment');
277 #$Document->prune('PPI::Token::Operator');
279 # Find all the named subroutines
281 $self->{_lookup_errors} = ();
286 $self->_get_logger->logconfess("error without message?") unless ($msg);
287 push @{ $self->{_lookup_errors} }, $msg;
291 $Document->find( sub {
292 my ($Document,$Element) = @_;
294 $Element->isa('PPI::Token::Word') or return '';
295 $Element->content eq 'lookup' or return '';
297 $log->debug("expansion: ", $Element->snext_sibling);
299 my $args = $Element->snext_sibling;
301 my @e = $args->child(0)->elements;
302 $log->logdie("hum, expect at least 8 elements, got ", scalar @e, " in $args") if ($#e < 8);
304 if ($log->is_debug) {
305 my $report = "found " . scalar @e . " elements:\n";
307 foreach my $i ( 0 .. $#e ) {
308 $report .= sprintf("# %-2d: %-30s %s\n", $i, ( $e[$i] || 'undef' ), $e[$i]->class );
311 $log->debug($report);
314 my $key_element = $e[8]->clone;
316 $log->logdie("key element must be PPI::Structure::Block") unless $key_element->isa('PPI::Structure::Block');
318 $log->debug("key part: ", $key_element);
322 $key_element->find( sub {
323 my $e = $_[1] || die "no element?";
324 $e->isa('PPI::Token::Word') or return '';
325 $e->content eq 'rec' or return '';
327 my $kf = $e->snext_sibling;
329 $log->debug("key fragment = $kf");
332 $log->logdie("can't eval { $kf }: $@") if ($@);
337 my $key = join('-', @key ) || $log->logdie("no key found!");
339 $log->debug("key = $key");
341 return $self->_lookup_error("invalid database $e[3] in $path" ) unless $self->valid_database( $e[3] );
342 return $self->_lookup_error("invalid input $e[5] of database $e[3] in $path", ) unless $self->valid_database_input( $e[3], $e[5] );
345 save_into_lookup($e[3],$e[5],'$key', $e[7] $e[8] );
348 $log->debug("create: $create");
350 # save code to create this lookup
351 $self->{_lookup_create}->{ _q($e[3]) }->{ _q($e[5]) } .= $create;
352 $self->{_lookup_create_key}->{ _q($e[3]) }->{ _q($e[5]) }->{ _q($key) }++;
355 if (defined( $self->{depends}->{ $database }->{ $input }->{ _q($e[3]) }->{ _q($e[5]) }->{ $key } )) {
356 $log->warn("dependency of $database/$input on $e[3]/$e[5] with $key allready recorded");
359 # save this dependency
360 $self->{depends}->{ $database }->{ $input }->{ _q($e[3]) }->{ _q($e[5]) }->{$key}++;
363 $e[8]->insert_after( $e[8]->clone );
364 $e[8]->insert_after( $e[7]->clone );
365 $e[8]->insert_after( $e[6]->clone );
369 $e[8]->insert_before( new PPI::Token::Quote::Single( "'$key'" ) );
373 $log->debug(">>> ", $Element->snext_sibling);
376 my $normalize_source = $Document->serialize;
377 $log->debug("create: ", dump($self->{_lookup_create}) );
378 $log->debug("normalize: $normalize_source");
380 $self->{_normalize_source}->{$database}->{$input} .= $normalize_source;
382 if ($self->{debug}) {
383 my $Dumper = PPI::Dumper->new( $Document );
387 $log->error("Parser errors:\n", join("\n",@{ $self->{_lookup_errors} }) ) if ($self->{_lookup_errors});
389 $Document->find( sub {
390 my ($Document,$Element) = @_;
392 $Element->isa('PPI::Token::Word') or return '';
393 $Element->content =~ m/^marc/ or return '';
395 $log->debug("found marc output generation for $database/$input");
396 $self->{_generate_marc}->{ $database }->{ $input }->{ $Element->content }++;
405 Strip single or double quotes around value
412 my $v = shift || return;
420 Return C<name> value if HASH or arg if scalar
427 my $input = shift || return;
428 if (ref($input) eq 'HASH') {
429 die "can't find 'name' value in ", dump($input) unless defined($input->{name});
430 return $input->{name};
439 Dobrica Pavlinusic, C<< <dpavlin@rot13.org> >>
441 =head1 COPYRIGHT & LICENSE
443 Copyright 2006 Dobrica Pavlinusic, All Rights Reserved.
445 This program is free software; you can redistribute it and/or modify it
446 under the same terms as Perl itself.
450 1; # End of WebPAC::Parser