6 use File::Temp qw/tempdir/;
11 use WebPAC::Input::ISIS;
12 use WebPAC::Store 0.03;
13 use WebPAC::Normalize::XML;
14 use WebPAC::Output::TT;
15 use WebPAC::Output::Estraier 0.02;
16 use YAML qw/LoadFile/;
19 my $limit = shift @ARGV;
21 my $config = LoadFile('conf/config.yml');
23 print "config = ",Dumper($config);
25 die "no databases in config file!\n" unless ($config->{databases});
29 while (my ($database, $db_config) = each %{ $config->{databases} }) {
31 my $abs_path = abs_path($0);
32 $abs_path =~ s#/[^/]*$#/#;
34 my $db_path = $config->{webpac}->{db_path} . '/' . $database;
36 my $db = new WebPAC::Store(
38 database => $database,
42 my $log = $db->_get_logger;
43 $log->info("working on $database in $db_path");
45 my $est_config = $config->{hyperestraier} || $log->logdie("can't find 'hyperestraier' part in confguration");
46 $est_config->{database} = $database;
48 $log->info("using HyperEstraier URL $est_config->{masterurl}");
50 my $est = new WebPAC::Output::Estraier(
55 # now, iterate through input formats
59 if (ref($db_config->{input}) eq 'ARRAY') {
60 @inputs = @{ $db_config->{input} };
62 push @inputs, $db_config->{input};
65 foreach my $input (@inputs) {
67 my $type = lc($input->{type});
69 die "I know only how to handle input type isis, not '$type'!\n" unless ($type eq 'isis');
71 my $lookup = new WebPAC::Lookup(
72 lookup_file => $input->{lookup},
75 $log->info("working on input $input->{path} [$input->{type}]");
77 my $isis = new WebPAC::Input::ISIS(
78 code_page => $config->{webpac}->{webpac_encoding},
79 limit_mfn => $input->{limit},
83 my $maxmfn = $isis->open(
84 filename => $input->{path},
85 code_page => $input->{encoding}, # database encoding
88 $log->info( Dumper($lookup->{_lookup_data}) );
90 my $n = new WebPAC::Normalize::XML(
91 # filter => { 'foo' => sub { shift } },
93 lookup_regex => $lookup->regex,
95 prefix => $input->{name},
99 tag => $input->{normalize}->{tag},
100 xml_file => $input->{normalize}->{path},
103 for ( 0 ... $isis->size ) {
105 my $row = $isis->fetch || next;
107 my $mfn = $row->{'000'}->[0] || die "can't find MFN";
109 my $ds = $n->data_structure($row);
112 id => $input->{name} . "#" . $mfn,
114 type => $config->{hyperestraier}->{type},
122 $log->info("$total_rows records indexed");