r644@llin: dpavlin | 2006-05-14 15:29:07 +0200
[webpac2] / run.pl
diff --git a/run.pl b/run.pl
index 89b1130..58653e5 100755 (executable)
--- a/run.pl
+++ b/run.pl
@@ -12,11 +12,13 @@ use WebPAC::Lookup;
 use WebPAC::Input 0.03;
 use WebPAC::Store 0.03;
 use WebPAC::Normalize::XML;
+use WebPAC::Normalize::Set;
 use WebPAC::Output::TT;
 use YAML qw/LoadFile/;
 use Getopt::Long;
 use File::Path;
 use Time::HiRes qw/time/;
+use File::Slurp;
 
 =head1 NAME
 
@@ -48,6 +50,11 @@ reindex just single database (legacy name is --one)
 
 path to YAML configuration file
 
+=item --force-set
+
+force conversion C<normalize->path> in C<config.yml> from
+C<.xml> to C<.pl>
+
 =back
 
 =cut
@@ -59,6 +66,7 @@ my $clean = 0;
 my $config = 'conf/config.yml';
 my $debug = 0;
 my $only_db_name;
+my $force_set = 0;
 
 GetOptions(
        "limit=i" => \$limit,
@@ -68,6 +76,7 @@ GetOptions(
        "only=s" => \$only_db_name,
        "config" => \$config,
        "debug" => \$debug,
+       "force-set" => \$force_set,
 );
 
 $config = LoadFile($config);
@@ -191,18 +200,34 @@ while (my ($database, $db_config) = each %{ $config->{databases} }) {
                        prefix => $input->{name},
                );
 
+               my $rules;
                my $normalize_path = $input->{normalize}->{path};
 
+               if ($force_set) {
+                       my $new_norm_path = $normalize_path;
+                       $new_norm_path =~ s/\.xml$/.pl/;
+                       if (-e $new_norm_path) {
+                               $log->info("--force-set replaced $normalize_path with $new_norm_path");
+                               $normalize_path = $new_norm_path;
+                       } else {
+                               $log->warn("--force-set failed on $new_norm_path, fallback to $normalize_path");
+                       }
+               }
+
                if ($normalize_path =~ m/\.xml$/i) {
                        $n->open(
                                tag => $input->{normalize}->{tag},
-                               xml_file => $input->{normalize}->{path},
+                               xml_file => $normalize_path,
                        );
                } elsif ($normalize_path =~ m/\.(?:yml|yaml)$/i) {
                        $n->open_yaml(
                                path => $normalize_path,
                                tag => $input->{normalize}->{tag},
                        );
+               } elsif ($normalize_path =~ m/\.(?:pl)$/i) {
+                       $n = undef;
+                       $log->info("using WebPAC::Normalize::Set to process $normalize_path");
+                       $rules = read_file( $normalize_path ) or die "can't open $normalize_path: $!";
                }
 
                foreach my $pos ( 0 ... $input_db->size ) {
@@ -217,7 +242,12 @@ while (my ($database, $db_config) = each %{ $config->{databases} }) {
                                push @{ $row->{'000'} }, $pos;
                        }
 
-                       my $ds = $n->data_structure($row);
+                       my $ds = $n ? $n->data_structure($row) :
+                               WebPAC::Normalize::Set::data_structure(
+                                       row => $row,
+                                       rules => $rules,
+                                       lookup => $lookup->lookup_hash,
+                               );
 
                        $indexer->add(
                                id => $input->{name} . "/" . $mfn,