my $n = $self->{module} . '::' . $subclass;
if (! defined &{ $n }) {
my $missing = "missing $subclass in $self->{module}";
- $log->logwarn($missing);
- $self->{$subclass} = sub { warn "$missing\n" };
+ $self->{$subclass} = sub { $log->logwarn($missing) };
} else {
$self->{$subclass} = \&{ $n };
}
}
if ($self->{limit}) {
- $log->info("limiting to ",$self->{limit}," records");
+ $log->debug("limiting to ",$self->{limit}," records");
$limit = $offset + $self->{limit} - 1;
$limit = $size if ($limit > $size);
}
my $url = $self->{masterurl} . '/node/' . $self->{database};
$self->{url} = $url;
- $log->info("opening Hyper Estraier index $self->{url}");
+ $log->debug("opening index $self->{url}");
my $nodes = $self->master( action => 'nodelist' );
$log->debug("nodes found: $nodes");
if ($nodes !~ m/^$self->{database}\t/sm) {
- $log->info("creating index $url");
+ $log->warn("creating index $url");
$self->master(
action => 'nodeadd',
name => $self->{database},
$self->{'db'}->set_auth($self->{'user'}, $self->{passwd});
my $encoding = $self->{'encoding'} || 'ISO-8859-2';
- $log->info("using encoding $encoding");
+ $log->info("using index $self->{url} with encoding $encoding");
$self->{'iconv'} = new Text::Iconv($encoding, 'UTF-8') or
$log->logdie("can't create conversion from $encoding to UTF-8");
use WebPAC::Output::TT;
use WebPAC::Output::Estraier 0.05;
use YAML qw/LoadFile/;
-use LWP::Simple;
+use Getopt::Long;
+use File::Path;
-my $limit = shift @ARGV;
+=head1 NAME
-my $config = LoadFile('conf/config.yml');
+run.pl - start WebPAC indexing
-print "config = ",Dumper($config);
+B<this command will probably go away. Don't get used to it!>
+
+Options:
+
+=over 4
+
+=item --offset 42
+
+start loading (all) databases at offset 42
+
+=item --limit 100
+
+limit loading to 100 records
+
+=item --clean
+
+remove database and Hyper Estraier index before indexing
+
+=item --config conf/config.yml
+
+path to YAML configuration file
+
+=back
+
+=cut
+
+my $offset;
+my $limit;
+
+my $clean = 0;
+my $config = 'conf/config.yml';
+my $debug = 0;
+
+GetOptions(
+ "limit=i" => \$limit,
+ "offset=i" => \$offset,
+ "clean" => \$clean,
+ "config" => \$config,
+ "debug" => \$debug,
+);
+
+$config = LoadFile($config);
+
+print "config = ",Dumper($config) if ($debug);
die "no databases in config file!\n" unless ($config->{databases});
my $est_config = $config->{hyperestraier} || $log->logdie("can't find 'hyperestraier' part in confguration");
$est_config->{database} = $database;
- $log->info("using HyperEstraier URL $est_config->{masterurl}");
-
my $est = new WebPAC::Output::Estraier(
%{ $est_config },
);
+ if ($clean) {
+ $log->warn("creating new empty index $database");
+ $est->master( action => 'nodedel', name => $database );
+ $est->master( action => 'nodeadd', name => $database, label => $database );
+ }
+
#
# now WebPAC::Store
#
my $db_path = $config->{webpac}->{db_path} . '/' . $database;
- $log->info("working on $database in $db_path");
+ if ($clean) {
+ $log->info("creating new database $database in $db_path");
+ rmtree( $db_path ) || $log->warn("can't remove $db_path: $!");
+ } else {
+ $log->info("working on $database in $db_path");
+ }
my $db = new WebPAC::Store(
path => $db_path,
database => $database,
- debug => 1,
+ debug => $debug,
);
my $input_db = new WebPAC::Input(
module => $input_module,
code_page => $config->{webpac}->{webpac_encoding},
- limit => $input->{limit},
+ limit => $limit || $input->{limit},
+ offset => $offset,
lookup => $lookup,
);
$log->logdie("can't create input using $input_module") unless ($input);