r1020@llin: dpavlin | 2006-09-26 14:40:34 +0200
[webpac2] / run.pl
diff --git a/run.pl b/run.pl
index ffcc3b2..3b01e3f 100755 (executable)
--- a/run.pl
+++ b/run.pl
@@ -8,9 +8,8 @@ use lib './lib';
 
 use WebPAC::Common 0.02;
 use WebPAC::Parser 0.04;
-use WebPAC::Lookup 0.03;
-use WebPAC::Input 0.11;
-use WebPAC::Store 0.03;
+use WebPAC::Input 0.13;
+use WebPAC::Store 0.11;
 use WebPAC::Normalize 0.11;
 use WebPAC::Output::TT;
 use WebPAC::Validate 0.06;
@@ -189,6 +188,15 @@ if ($parallel) {
        Proc::Queue::size($parallel);
 }
 
+sub create_ds_config {
+       my ($db_config, $database, $input, $mfn) = @_;
+       my $c = dclone( $db_config );
+       $c->{_} = $database || $log->logconfess("need database");
+       $c->{_mfn} = $mfn || $log->logconfess("need mfn");
+       $c->{input} = $input || $log->logconfess("need input");
+       return $c;
+}
+
 while (my ($database, $db_config) = each %{ $config->databases }) {
 
        my ($only_database,$only_input) = split(m#/#, $only_filter) if ($only_filter);
@@ -277,7 +285,7 @@ while (my ($database, $db_config) = each %{ $config->databases }) {
        my $abs_path = abs_path($0);
        $abs_path =~ s#/[^/]*$#/#;
 
-       my $db_path = $config->get('webpac')->{db_path} . '/' . $database;
+       my $db_path = $config->webpac('db_path');
 
        if ($clean) {
                $log->info("creating new database '$database' in $db_path");
@@ -286,9 +294,8 @@ while (my ($database, $db_config) = each %{ $config->databases }) {
                $log->info("working on database '$database' in $db_path");
        }
 
-       my $db = new WebPAC::Store(
+       my $store = new WebPAC::Store(
                path => $db_path,
-               database => $database,
                debug => $debug,
        );
 
@@ -308,7 +315,9 @@ while (my ($database, $db_config) = each %{ $config->databases }) {
 
        foreach my $input (@inputs) {
 
-               next if ($only_input && ($input->{name} !~ m#$only_input#i && $input->{type} !~ m#$only_input#i));
+               my $input_name = $input->{name} || $log->logdie("input without a name isn't valid: ",dump($input));
+
+               next if ($only_input && ($input_name !~ m#$only_input#i && $input->{type} !~ m#$only_input#i));
 
                my $type = lc($input->{type});
 
@@ -316,8 +325,10 @@ while (my ($database, $db_config) = each %{ $config->databases }) {
 
                my $input_module = $config->webpac('inputs')->{$type};
 
-               $log->info("working on input '$input->{name}' in $input->{path} [type: $input->{type}] using $input_module",
-                       $input->{lookup} ? "lookup '$input->{lookup}'" : ""
+               my @lookups = $parser->have_lookup_create($database, $input);
+
+               $log->info("working on input '$input_name' in $input->{path} [type: $input->{type}] using $input_module",
+                       @lookups ? " creating lookups: ".join(", ", @lookups) : ""
                );
 
                if ($stats) {
@@ -326,22 +337,11 @@ while (my ($database, $db_config) = each %{ $config->databases }) {
                        delete($input->{modify_file});
                }
 
-               warn "parser->depends = ", dump( $parser->{depends} );
-               warn "depends on: ", dump( $parser->depends($database, $input->{name}) );
-               warn "lookup_create_rules = ", dump( $parser->lookup_create_rules($database, $input->{name}) );
-               warn "parser->_lookup_create = ", dump( $parser->{_lookup_create} );
-
-               my $lookup;
-
                my $input_db = new WebPAC::Input(
                        module => $input_module,
                        encoding => $config->webpac('webpac_encoding'),
                        limit => $limit || $input->{limit},
                        offset => $offset,
-                       lookup_coderef => sub {
-                               my $rec = shift || return;
-                               $lookup->add( $rec );
-                       },
                        recode => $input->{recode},
                        stats => $stats,
                        modify_records => $input->{modify_records},
@@ -350,22 +350,65 @@ while (my ($database, $db_config) = each %{ $config->databases }) {
                $log->logdie("can't create input using $input_module") unless ($input);
 
                if (defined( $input->{lookup} )) {
-                       $log->warn("$database/", $input->{name}, " has depriciated lookup definition, removing it...");
+                       $log->warn("$database/$input_name has depriciated lookup definition, removing it...");
                        delete( $input->{lookup} );
                }
 
+               my $lookup;
+               my $lookup_coderef;
+
+               if (@lookups) {
+
+                       my $rules = $parser->lookup_create_rules($database, $input) || $log->logdie("no rules found for $database/$input");
+
+                       $lookup_coderef = sub {
+                               my $rec = shift || die "need rec!";
+                               my $mfn = $rec->{'000'}->[0] || die "need mfn in 000";
+
+                               WebPAC::Normalize::data_structure(
+                                       row => $rec,
+                                       rules => $rules,
+                                       lookup => $lookup,
+                                       config => create_ds_config( $db_config, $database, $input, $mfn ),
+                               );
+
+                               warn "current lookup = ", dump($lookup) if ($lookup);
+                       };
+
+                       WebPAC::Normalize::_set_lookup( undef );
+
+                       $log->debug("created lookup_coderef using:\n$rules");
+
+               };
+
                my $maxmfn = $input_db->open(
                        path => $input->{path},
                        code_page => $input->{encoding},        # database encoding
+                       lookup_coderef => $lookup_coderef,
                        %{ $input },
                );
 
+               my $lookup_data = WebPAC::Normalize::_get_lookup();
+
+               if (defined( $lookup_data->{$database}->{$input_name} )) {
+                       $log->debug("created following lookups: ", dump( $lookup_data ));
+
+                       foreach my $key (keys %{ $lookup_data->{$database}->{$input_name} }) {
+                               $store->save_lookup(
+                                       database => $database,
+                                       input => $input_name,
+                                       key => $key,
+                                       data => $lookup_data->{$database}->{$input_name}->{$key},
+                               );
+                       }
+               }
+
                my $report_fh;
                if ($stats || $validate) {
-                       my $path = "out/report/" . $database . '-' . $input->{name} . '.txt';
+                       my $path = "out/report/${database}-${input_name}.txt";
                        open($report_fh, '>', $path) || $log->logdie("can't open $path: $!");
 
-                       print $report_fh "Report for database '$database' input '$input->{name}' records ",
+                       print $report_fh "Report for database '$database' input '$input_name' records ",
                                $offset || 1, "-", $limit || $input->{limit} || $maxmfn, "\n\n";
                        $log->info("Generating report file $path");
                }
@@ -376,7 +419,7 @@ while (my ($database, $db_config) = each %{ $config->databases }) {
                if ($marc_normalize) {
                        @norm_array = ( {
                                path => $marc_normalize,
-                               output => $marc_output || 'out/marc/' . $database . '-' . $input->{name} . '.marc',
+                               output => $marc_output || "out/marc/${database}-${input_name}.marc",
                        } );
                }
 
@@ -424,33 +467,23 @@ while (my ($database, $db_config) = each %{ $config->databases }) {
                                        }
                                }
 
-                               my $ds_config = dclone($db_config);
-
-                               # default values -> database key
-                               $ds_config->{_} = $database;
-
-                               # current mfn
-                               $ds_config->{_mfn} = $mfn;
-
-                               # attach current input
-                               $ds_config->{input} = $input;
-
                                my $ds = WebPAC::Normalize::data_structure(
                                        row => $row,
                                        rules => $rules,
                                        lookup => $lookup ? $lookup->lookup_hash : undef,
-                                       config => $ds_config,
+                                       config => create_ds_config( $db_config, $database, $input, $mfn ),
                                        marc_encoding => 'utf-8',
                                );
 
-                               $db->save_ds(
+                               $store->save_ds(
+                                       database => $database,
+                                       input => $input_name,
                                        id => $mfn,
                                        ds => $ds,
-                                       prefix => $input->{name},
                                ) if ($ds && !$stats);
 
                                $indexer->add(
-                                       id => $input->{name} . "/" . $mfn,
+                                       id => "${input_name}/${mfn}",
                                        ds => $ds,
                                        type => $config->get($indexer_config)->{type},
                                ) if ($indexer && $ds);