r966@llin: dpavlin | 2006-09-24 19:22:45 +0200
authorDobrica Pavlinusic <dpavlin@rot13.org>
Sun, 24 Sep 2006 17:25:04 +0000 (17:25 +0000)
committerDobrica Pavlinusic <dpavlin@rot13.org>
Sun, 24 Sep 2006 17:25:04 +0000 (17:25 +0000)
 first cut at WebPAC::Parser which will allow lookups to be specified in
 normalization file

git-svn-id: svn+ssh://mjesec/home/dpavlin/svn/webpac2/trunk@686 07558da8-63fa-0310-ba24-9fe276d99e06

conf/log.conf
conf/lookup/ffkk-arti.pl
lib/WebPAC/Parser.pm [new file with mode: 0644]
t/2-parse.t [new file with mode: 0755]
t/conf/normalize/bar.pl [new file with mode: 0644]
t/conf/normalize/baz.pl [new file with mode: 0644]
t/conf/normalize/foo.pl [new file with mode: 0644]
t/conf/test.yml

index e4d9dff..f5e39ea 100644 (file)
@@ -19,6 +19,10 @@ log4perl.rootLogger=INFO, LOG, SCREEN
 #log4perl.logger.WebPAC.Lookup=DEBUG
 #log4perl.logger.WebPAC.Lookup.lookup=DEBUG
 
+log4perl.logger.WebPAC.Config=DEBUG
+
+log4perl.logger.WebPAC.Parser=DEBUG
+
 #log4perl.logger.WebPAC.Validate=DEBUG
 #log4perl.logger.WebPAC.Validate.report=DEBUG
 
index 72ac8db..fe219c3 100644 (file)
@@ -3,17 +3,25 @@ marc('777','v',
        lookup(
                sub { 'ffkk/perl' . rec('000') },
                'ffkk','peri',
-               sub { rec('11') },
-               sub { rec('11') },
+               sub { rec('11') }
        ) ||
        lookup(
                sub { 'ffkk/perl' . rec('000') },
                'ffkk','peri',
-               sub { rec('200','a') . rec('200','e') },
+               sub { rec(200,'a') . rec('200','e') },
        ) ||
        lookup(
-               sub { 'ffkk/perl' . rec('000') },
+               sub { "ffkk/perl" . rec('000') },
                'ffkk','peri',
-               sub { rec('200','a') },
+               sub { rec(200,'a') },
+               sub { rec(900,'x') },
+       )
+);
+
+marc('900','x',
+       lookup(
+               sub { rec('200','a') . ' ' . rec('200','b') },
+               'ffps','libri',
+               sub { rec('245','a') },
        )
 );
diff --git a/lib/WebPAC/Parser.pm b/lib/WebPAC/Parser.pm
new file mode 100644 (file)
index 0000000..0a37849
--- /dev/null
@@ -0,0 +1,188 @@
+package WebPAC::Parser;
+
+use warnings;
+use strict;
+
+use base qw/WebPAC::Common WebPAC::Normalize/;
+
+use PPI;
+use PPI::Dumper;
+use Data::Dump qw/dump/;
+use File::Slurp;
+
+
+=head1 NAME
+
+WebPAC::Parser - parse perl normalization configuration files and mungle it
+
+=head1 VERSION
+
+Version 0.01
+
+=cut
+
+our $VERSION = '0.01';
+
+=head1 SYNOPSIS
+
+FIXME
+
+=head1 FUNCTIONS
+
+=head2 new
+
+Create new parser object.
+
+  my $parser = new WebPAC::Parser(
+       config => new WebPAC::Config(),
+       base_path => '/optional/path/to/conf',
+  );
+
+=cut
+
+sub new {
+       my $class = shift;
+       my $self = {@_};
+       bless($self, $class);
+
+       my $log = $self->_get_logger();
+
+       $log->logdie("config isn't WebPAC::Config") unless ($self->{config} && $self->{config}->isa('WebPAC::Config'));
+
+       $log->logdie("can't iterate_inputs over this config object") unless ($self->{config}->can('iterate_inputs'));
+
+       my $source;
+
+       $self->{config}->iterate_inputs( sub {
+               my $input = shift;
+               my $path = $input->{normalize}->{path} || return;
+               my $full = $self->{base_path} ? $self->{base_path} . '/' . $path : $path;
+               $log->logdie("normalization input $full doesn't exist") unless (-e $full);
+               my $s = read_file( $full ) || $log->logdie("can't read $full: $!");
+               $log->debug("adding $path to parser [",length($s)," bytes]");
+               $source .= $s;
+       } );
+
+       $log->debug("collected ", length($source), " bytes of source");
+
+       $self->{source} = $source;
+
+       $self ? return $self : return undef;
+}
+
+=head2 parse
+
+=cut
+
+sub parse {
+       my $self = shift;
+
+       my $log = $self->_get_logger();
+
+       $log->logdie('no source found in object') unless ($self->{source});
+
+       my $Document = PPI::Document->new( \$self->{source} ) || $log->logdie("can't parse source:\n", $self->{source});
+
+       $Document->prune('PPI::Token::Whitespace');
+       #$Document->prune('PPI::Token::Operator');
+
+       # Find all the named subroutines
+
+       my $eval_create;
+
+       $Document->find( sub {
+                       my ($Document,$Element) = @_;
+
+                       $Element->isa('PPI::Token::Word') or return '';
+                       $Element->content eq 'lookup' or return '';
+
+                       print "#*** expansion: ", $Element->snext_sibling,$/;
+
+                       my $args = $Element->snext_sibling;
+               
+                       my @e = $args->child(0)->elements;
+                       print "hum, expect at least 8 elements, got ", scalar @e, " in $args\n" if ($#e < 8);
+
+                       print "# found ", scalar @e, " elements:\n";
+
+                       foreach my $i ( 0 .. $#e ) {
+                               printf("# %-2d: %-30s %s\n", $i, ( $e[$i] || 'undef' ), $e[$i]->class );
+                       }
+
+                       my $key_element = $e[8]->clone;
+
+                       die "key element must be PPI::Structure::Block" unless $key_element->isa('PPI::Structure::Block');
+
+                       print "## key part: ", $key_element, $/;
+
+                       my @key;
+
+                       $key_element->find( sub {
+                               my $e = $_[1] || die "no element?";
+                               $e->isa('PPI::Token::Word') or return '';
+                               $e->content eq 'rec' or return '';
+
+                               my $kf = $e->snext_sibling;
+
+                               print "## key fragment = $kf\n";
+
+                               push @key, eval $kf;
+                               print "ERROR: can't eval { $kf }: $@" if ($@);
+
+                               return 1;
+                       });
+
+                       my $key = join('-', @key ) || print "ERROR: no key found!";
+
+                       print "key = $key\n";
+
+                       my $create = ' 
+                               $coderef = ' . $e[7] . $e[8] . ';
+                               foreach my $v ($coderef->()) {
+                                       next unless (defined($v) && $v ne \'\');
+                                       push @{ $lookup->{\'' . $key . '\'}->{$v} }, $mfn;
+                               }
+                       ';
+
+                       print "create: $create\n";
+
+                       $create =~ s/\s+/ /gs;
+                       $eval_create->{ $e[3] }->{ $e[5] } .= $create;
+
+                       if ($#e < 10) {
+                               $e[8]->insert_after( $e[8]->clone );
+                               $e[8]->insert_after( $e[7]->clone );
+                               $e[8]->insert_after( $e[6]->clone );
+                       }
+
+                       $e[7]->remove;
+                       $e[8]->insert_before( new PPI::Token::Quote::Single( "'$key'" ) );
+                       $e[8]->remove;
+
+
+                       print "# >>> ", $Element->snext_sibling, "\n";
+       });
+
+       print "-----\ncreate: ", dump($eval_create), "\n";
+       print "-----\nlookup: ", $Document->serialize, "\n";
+       print "-----\n";
+
+       my $Dumper = PPI::Dumper->new( $Document );
+       $Dumper->print;
+
+}
+
+=head1 AUTHOR
+
+Dobrica Pavlinusic, C<< <dpavlin@rot13.org> >>
+
+=head1 COPYRIGHT & LICENSE
+
+Copyright 2006 Dobrica Pavlinusic, All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it
+under the same terms as Perl itself.
+
+=cut
+
+1; # End of WebPAC::Parser
diff --git a/t/2-parse.t b/t/2-parse.t
new file mode 100755 (executable)
index 0000000..4a081c7
--- /dev/null
@@ -0,0 +1,36 @@
+#!/usr/bin/perl -w
+
+use strict;
+use Test::More tests => 8;
+use Test::Exception;
+use blib;
+
+use Data::Dump qw/dump/;
+use Cwd qw/abs_path/;
+use YAML qw/LoadFile/;
+
+BEGIN {
+use_ok( 'WebPAC::Parser' );
+use_ok( 'WebPAC::Config' );
+}
+
+my $debug = shift @ARGV;
+
+ok(my $abs_path = abs_path($0), "abs_path");
+$abs_path =~ s#/[^/]*$#/#;
+
+my $config_path = "$abs_path/conf/test.yml";
+
+ok(-e $config_path, "$config_path exists");
+
+throws_ok { new WebPAC::Parser( no_log => 1 ) } qr/WebPAC::Config/, "new without config";
+
+ok(
+       my $parser = new WebPAC::Parser(
+               config => new WebPAC::Config( path => $config_path ),
+               base_path => $abs_path,
+), "new");
+
+ok($parser->{source}, "source exist");
+
+ok($parser->parse, 'parse');
diff --git a/t/conf/normalize/bar.pl b/t/conf/normalize/bar.pl
new file mode 100644 (file)
index 0000000..70b3476
--- /dev/null
@@ -0,0 +1,7 @@
+marc('900','x',
+       lookup(
+               sub { rec('200','a') . ' ' . rec('200','b') },
+               'foo','foo-input1',
+               sub { rec('245','a') },
+       )
+);
diff --git a/t/conf/normalize/baz.pl b/t/conf/normalize/baz.pl
new file mode 100644 (file)
index 0000000..e468fd4
--- /dev/null
@@ -0,0 +1,7 @@
+marc('900','x',
+       lookup(
+               sub { rec('200','a') . ' ' . rec('200','b') },
+               'foo','foo-input2',
+               sub { rec('245','a') },
+       )
+);
diff --git a/t/conf/normalize/foo.pl b/t/conf/normalize/foo.pl
new file mode 100644 (file)
index 0000000..0bec8d3
--- /dev/null
@@ -0,0 +1,19 @@
+marc('777','v',
+       lookup(
+               sub { '1st:' . rec('000') },
+               'foo','peri',
+               sub { rec('11') }
+       ) ||
+       lookup(
+               sub { '2nd:' . rec('000') },
+               'bar','peri',
+               sub { rec(200,'a') . rec('200','e') },
+       ) ||
+       lookup(
+               sub { "3rd:" . rec('000') },
+               'baz','peri',
+               sub { rec(200,'a') },
+               sub { rec(900,'x') },
+       )
+);
+
index 86a8854..c27e9fd 100644 (file)
@@ -10,18 +10,25 @@ databases:
   foo:
     name: 'foo name'
     input:
-      name: 'foo-input1'
-      type: isis
-      path: '/backup/isis_backup/A105-1/ISIS/latest/LIBRI/LIBRI'
-      encoding: 'cp852'
-      #limit: 10
-      normalize:
-        path: 'conf/normalize/foo.pl'
+      - name: 'foo-input1'
+        type: isis
+        path: '/backup/isis_backup/A105-1/ISIS/latest/LIBRI/LIBRI'
+        encoding: 'cp852'
+        #limit: 10
+        normalize:
+          path: 'conf/normalize/foo.pl'
+      - name: 'foo-input2'
+        type: isis
+        path: '/backup/isis_backup/A105-1/ISIS/latest/LIBRI/LIBRI'
+        encoding: 'cp852'
+        #limit: 10
+        normalize:
+          path: 'conf/normalize/foo.pl'
 
   bar:
     name: 'bar name'
     input:
-      name: 'bar-input1'
+      name: 'bar-input'
       type: isis
       path: '/backup/isis_backup/A105-1/ISIS/latest/LIBRI/LIBRI'
       encoding: 'cp852'
@@ -32,7 +39,7 @@ databases:
   baz:
     name: 'baz name'
     input:
-      name: 'baz-input1'
+      name: 'baz-input'
       type: isis
       path: '/backup/isis_backup/A105-1/ISIS/latest/LIBRI/LIBRI'
       encoding: 'cp852'