From b827a00efe5ce989a0dae18424049fef31d9de9e Mon Sep 17 00:00:00 2001 From: Dobrica Pavlinusic Date: Sun, 24 Sep 2006 17:25:04 +0000 Subject: [PATCH] r966@llin: dpavlin | 2006-09-24 19:22:45 +0200 first cut at WebPAC::Parser which will allow lookups to be specified in normalization file git-svn-id: svn+ssh://mjesec/home/dpavlin/svn/webpac2/trunk@686 07558da8-63fa-0310-ba24-9fe276d99e06 --- conf/log.conf | 4 + conf/lookup/ffkk-arti.pl | 18 ++-- lib/WebPAC/Parser.pm | 188 +++++++++++++++++++++++++++++++++++++++ t/2-parse.t | 36 ++++++++ t/conf/normalize/bar.pl | 7 ++ t/conf/normalize/baz.pl | 7 ++ t/conf/normalize/foo.pl | 19 ++++ t/conf/test.yml | 25 ++++-- 8 files changed, 290 insertions(+), 14 deletions(-) create mode 100644 lib/WebPAC/Parser.pm create mode 100755 t/2-parse.t create mode 100644 t/conf/normalize/bar.pl create mode 100644 t/conf/normalize/baz.pl create mode 100644 t/conf/normalize/foo.pl diff --git a/conf/log.conf b/conf/log.conf index e4d9dff..f5e39ea 100644 --- a/conf/log.conf +++ b/conf/log.conf @@ -19,6 +19,10 @@ log4perl.rootLogger=INFO, LOG, SCREEN #log4perl.logger.WebPAC.Lookup=DEBUG #log4perl.logger.WebPAC.Lookup.lookup=DEBUG +log4perl.logger.WebPAC.Config=DEBUG + +log4perl.logger.WebPAC.Parser=DEBUG + #log4perl.logger.WebPAC.Validate=DEBUG #log4perl.logger.WebPAC.Validate.report=DEBUG diff --git a/conf/lookup/ffkk-arti.pl b/conf/lookup/ffkk-arti.pl index 72ac8db..fe219c3 100644 --- a/conf/lookup/ffkk-arti.pl +++ b/conf/lookup/ffkk-arti.pl @@ -3,17 +3,25 @@ marc('777','v', lookup( sub { 'ffkk/perl' . rec('000') }, 'ffkk','peri', - sub { rec('11') }, - sub { rec('11') }, + sub { rec('11') } ) || lookup( sub { 'ffkk/perl' . rec('000') }, 'ffkk','peri', - sub { rec('200','a') . rec('200','e') }, + sub { rec(200,'a') . rec('200','e') }, ) || lookup( - sub { 'ffkk/perl' . rec('000') }, + sub { "ffkk/perl" . rec('000') }, 'ffkk','peri', - sub { rec('200','a') }, + sub { rec(200,'a') }, + sub { rec(900,'x') }, + ) +); + +marc('900','x', + lookup( + sub { rec('200','a') . ' ' . rec('200','b') }, + 'ffps','libri', + sub { rec('245','a') }, ) ); diff --git a/lib/WebPAC/Parser.pm b/lib/WebPAC/Parser.pm new file mode 100644 index 0000000..0a37849 --- /dev/null +++ b/lib/WebPAC/Parser.pm @@ -0,0 +1,188 @@ +package WebPAC::Parser; + +use warnings; +use strict; + +use base qw/WebPAC::Common WebPAC::Normalize/; + +use PPI; +use PPI::Dumper; +use Data::Dump qw/dump/; +use File::Slurp; + + +=head1 NAME + +WebPAC::Parser - parse perl normalization configuration files and mungle it + +=head1 VERSION + +Version 0.01 + +=cut + +our $VERSION = '0.01'; + +=head1 SYNOPSIS + +FIXME + +=head1 FUNCTIONS + +=head2 new + +Create new parser object. + + my $parser = new WebPAC::Parser( + config => new WebPAC::Config(), + base_path => '/optional/path/to/conf', + ); + +=cut + +sub new { + my $class = shift; + my $self = {@_}; + bless($self, $class); + + my $log = $self->_get_logger(); + + $log->logdie("config isn't WebPAC::Config") unless ($self->{config} && $self->{config}->isa('WebPAC::Config')); + + $log->logdie("can't iterate_inputs over this config object") unless ($self->{config}->can('iterate_inputs')); + + my $source; + + $self->{config}->iterate_inputs( sub { + my $input = shift; + my $path = $input->{normalize}->{path} || return; + my $full = $self->{base_path} ? $self->{base_path} . '/' . $path : $path; + $log->logdie("normalization input $full doesn't exist") unless (-e $full); + my $s = read_file( $full ) || $log->logdie("can't read $full: $!"); + $log->debug("adding $path to parser [",length($s)," bytes]"); + $source .= $s; + } ); + + $log->debug("collected ", length($source), " bytes of source"); + + $self->{source} = $source; + + $self ? return $self : return undef; +} + +=head2 parse + +=cut + +sub parse { + my $self = shift; + + my $log = $self->_get_logger(); + + $log->logdie('no source found in object') unless ($self->{source}); + + my $Document = PPI::Document->new( \$self->{source} ) || $log->logdie("can't parse source:\n", $self->{source}); + + $Document->prune('PPI::Token::Whitespace'); + #$Document->prune('PPI::Token::Operator'); + + # Find all the named subroutines + + my $eval_create; + + $Document->find( sub { + my ($Document,$Element) = @_; + + $Element->isa('PPI::Token::Word') or return ''; + $Element->content eq 'lookup' or return ''; + + print "#*** expansion: ", $Element->snext_sibling,$/; + + my $args = $Element->snext_sibling; + + my @e = $args->child(0)->elements; + print "hum, expect at least 8 elements, got ", scalar @e, " in $args\n" if ($#e < 8); + + print "# found ", scalar @e, " elements:\n"; + + foreach my $i ( 0 .. $#e ) { + printf("# %-2d: %-30s %s\n", $i, ( $e[$i] || 'undef' ), $e[$i]->class ); + } + + my $key_element = $e[8]->clone; + + die "key element must be PPI::Structure::Block" unless $key_element->isa('PPI::Structure::Block'); + + print "## key part: ", $key_element, $/; + + my @key; + + $key_element->find( sub { + my $e = $_[1] || die "no element?"; + $e->isa('PPI::Token::Word') or return ''; + $e->content eq 'rec' or return ''; + + my $kf = $e->snext_sibling; + + print "## key fragment = $kf\n"; + + push @key, eval $kf; + print "ERROR: can't eval { $kf }: $@" if ($@); + + return 1; + }); + + my $key = join('-', @key ) || print "ERROR: no key found!"; + + print "key = $key\n"; + + my $create = ' + $coderef = ' . $e[7] . $e[8] . '; + foreach my $v ($coderef->()) { + next unless (defined($v) && $v ne \'\'); + push @{ $lookup->{\'' . $key . '\'}->{$v} }, $mfn; + } + '; + + print "create: $create\n"; + + $create =~ s/\s+/ /gs; + $eval_create->{ $e[3] }->{ $e[5] } .= $create; + + if ($#e < 10) { + $e[8]->insert_after( $e[8]->clone ); + $e[8]->insert_after( $e[7]->clone ); + $e[8]->insert_after( $e[6]->clone ); + } + + $e[7]->remove; + $e[8]->insert_before( new PPI::Token::Quote::Single( "'$key'" ) ); + $e[8]->remove; + + + print "# >>> ", $Element->snext_sibling, "\n"; + }); + + print "-----\ncreate: ", dump($eval_create), "\n"; + print "-----\nlookup: ", $Document->serialize, "\n"; + print "-----\n"; + + my $Dumper = PPI::Dumper->new( $Document ); + $Dumper->print; + +} + +=head1 AUTHOR + +Dobrica Pavlinusic, C<< >> + +=head1 COPYRIGHT & LICENSE + +Copyright 2006 Dobrica Pavlinusic, All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it +under the same terms as Perl itself. + +=cut + +1; # End of WebPAC::Parser diff --git a/t/2-parse.t b/t/2-parse.t new file mode 100755 index 0000000..4a081c7 --- /dev/null +++ b/t/2-parse.t @@ -0,0 +1,36 @@ +#!/usr/bin/perl -w + +use strict; +use Test::More tests => 8; +use Test::Exception; +use blib; + +use Data::Dump qw/dump/; +use Cwd qw/abs_path/; +use YAML qw/LoadFile/; + +BEGIN { +use_ok( 'WebPAC::Parser' ); +use_ok( 'WebPAC::Config' ); +} + +my $debug = shift @ARGV; + +ok(my $abs_path = abs_path($0), "abs_path"); +$abs_path =~ s#/[^/]*$#/#; + +my $config_path = "$abs_path/conf/test.yml"; + +ok(-e $config_path, "$config_path exists"); + +throws_ok { new WebPAC::Parser( no_log => 1 ) } qr/WebPAC::Config/, "new without config"; + +ok( + my $parser = new WebPAC::Parser( + config => new WebPAC::Config( path => $config_path ), + base_path => $abs_path, +), "new"); + +ok($parser->{source}, "source exist"); + +ok($parser->parse, 'parse'); diff --git a/t/conf/normalize/bar.pl b/t/conf/normalize/bar.pl new file mode 100644 index 0000000..70b3476 --- /dev/null +++ b/t/conf/normalize/bar.pl @@ -0,0 +1,7 @@ +marc('900','x', + lookup( + sub { rec('200','a') . ' ' . rec('200','b') }, + 'foo','foo-input1', + sub { rec('245','a') }, + ) +); diff --git a/t/conf/normalize/baz.pl b/t/conf/normalize/baz.pl new file mode 100644 index 0000000..e468fd4 --- /dev/null +++ b/t/conf/normalize/baz.pl @@ -0,0 +1,7 @@ +marc('900','x', + lookup( + sub { rec('200','a') . ' ' . rec('200','b') }, + 'foo','foo-input2', + sub { rec('245','a') }, + ) +); diff --git a/t/conf/normalize/foo.pl b/t/conf/normalize/foo.pl new file mode 100644 index 0000000..0bec8d3 --- /dev/null +++ b/t/conf/normalize/foo.pl @@ -0,0 +1,19 @@ +marc('777','v', + lookup( + sub { '1st:' . rec('000') }, + 'foo','peri', + sub { rec('11') } + ) || + lookup( + sub { '2nd:' . rec('000') }, + 'bar','peri', + sub { rec(200,'a') . rec('200','e') }, + ) || + lookup( + sub { "3rd:" . rec('000') }, + 'baz','peri', + sub { rec(200,'a') }, + sub { rec(900,'x') }, + ) +); + diff --git a/t/conf/test.yml b/t/conf/test.yml index 86a8854..c27e9fd 100644 --- a/t/conf/test.yml +++ b/t/conf/test.yml @@ -10,18 +10,25 @@ databases: foo: name: 'foo name' input: - name: 'foo-input1' - type: isis - path: '/backup/isis_backup/A105-1/ISIS/latest/LIBRI/LIBRI' - encoding: 'cp852' - #limit: 10 - normalize: - path: 'conf/normalize/foo.pl' + - name: 'foo-input1' + type: isis + path: '/backup/isis_backup/A105-1/ISIS/latest/LIBRI/LIBRI' + encoding: 'cp852' + #limit: 10 + normalize: + path: 'conf/normalize/foo.pl' + - name: 'foo-input2' + type: isis + path: '/backup/isis_backup/A105-1/ISIS/latest/LIBRI/LIBRI' + encoding: 'cp852' + #limit: 10 + normalize: + path: 'conf/normalize/foo.pl' bar: name: 'bar name' input: - name: 'bar-input1' + name: 'bar-input' type: isis path: '/backup/isis_backup/A105-1/ISIS/latest/LIBRI/LIBRI' encoding: 'cp852' @@ -32,7 +39,7 @@ databases: baz: name: 'baz name' input: - name: 'baz-input1' + name: 'baz-input' type: isis path: '/backup/isis_backup/A105-1/ISIS/latest/LIBRI/LIBRI' encoding: 'cp852' -- 2.20.1