From: Dobrica Pavlinusic Date: Mon, 25 Sep 2006 18:58:43 +0000 (+0000) Subject: r1014@llin: dpavlin | 2006-09-25 20:56:33 +0200 X-Git-Url: http://git.rot13.org/?a=commitdiff_plain;h=f6bee04bfefd37b50103be90a2903aa1b2da39d8;p=webpac2 r1014@llin: dpavlin | 2006-09-25 20:56:33 +0200 save lookups using WebPAC::Store git-svn-id: svn+ssh://mjesec/home/dpavlin/svn/webpac2/trunk@710 07558da8-63fa-0310-ba24-9fe276d99e06 --- diff --git a/TODO b/TODO index 126785c..8f2dfd6 100644 --- a/TODO +++ b/TODO @@ -30,8 +30,8 @@ + modify_records regexpes now match just first occurence (repeat to get second...) + fix WebPAC::Output::MARC encoding troubles + generate reports (validation and stats) for each input -- support splitting of config yml to multiple files - rewrite lookup support to use WebPAC::Normalize +- support splitting of config yml to multiple files - add dBase input format - remove delimiters characters from index and query entered - delete unused files in database directories diff --git a/lib/WebPAC/Store.pm b/lib/WebPAC/Store.pm index b1837c4..46fc419 100644 --- a/lib/WebPAC/Store.pm +++ b/lib/WebPAC/Store.pm @@ -10,23 +10,26 @@ use Data::Dumper; =head1 NAME -WebPAC::Store - Store normalized data on disk +WebPAC::Store - Store WebPAC data on disk =head1 VERSION -Version 0.09 +Version 0.10 =cut -our $VERSION = '0.09'; +our $VERSION = '0.10'; =head1 SYNOPSIS -This module provides disk storage for normalised data. +This module provides disk storage for normalised data and lookups. -It is newest component of WebPAC, so it will change quite often or be in -flux. However, I will try to keep backward compatiblity by providing -multiple back-ends. +It is one of newer components of WebPAC, so it will change from time to +time. + +I will try to keep backward compatiblity by providing multiple back-ends, +but this can't be garanteed. In other words, don't delete your input +databases just yet :-) This has additional advantage. I can create single place to plugin other file formats which provide better performance for particular type of data. @@ -62,8 +65,8 @@ Mandatory parametar C is used as subdirectory in database directory. sub new { my $class = shift; - my $self = {@_}; - bless($self, $class); + my $self = {@_}; + bless($self, $class); my $log = $self->_get_logger(); @@ -172,10 +175,6 @@ sub load_ds { my $cache_file = "$cache_path/$database/$prefix/$id"; $cache_file =~ s#//#/#go; -open(my $fh, '>>', '/tmp/foo'); -print $fh "LOAD $cache_path / $database / $prefix / $id ==> $cache_file\n"; -close($fh); - $log->debug("using cache_file $cache_file"); if (-r $cache_file) { @@ -249,6 +248,34 @@ sub save_ds { } +=head2 save_lookup + + $db->save_lookup( $database, $input, $key, $lookup ); + +=cut + +sub save_lookup { + my $self = shift; + my ($database, $input, $key, $lookup) = @_; + + my $log = $self->_get_logger; + + my $path = $self->{'path'} . "/lookup/$input"; + + mkpath($path) unless (-d $path); + + $path .= "/$key"; + + if (store $lookup, $path) { + $log->info("saved lookup $path"); + } else { + $log->logwarn("can't store lookup $database/$input/$key in $path: $!"); + } + + +} + + =head1 AUTHOR Dobrica Pavlinusic, C<< >> diff --git a/run.pl b/run.pl index cd214ba..93363eb 100755 --- a/run.pl +++ b/run.pl @@ -9,7 +9,7 @@ use lib './lib'; use WebPAC::Common 0.02; use WebPAC::Parser 0.04; use WebPAC::Input 0.13; -use WebPAC::Store 0.03; +use WebPAC::Store 0.10; use WebPAC::Normalize 0.11; use WebPAC::Output::TT; use WebPAC::Validate 0.06; @@ -294,7 +294,7 @@ while (my ($database, $db_config) = each %{ $config->databases }) { $log->info("working on database '$database' in $db_path"); } - my $db = new WebPAC::Store( + my $store = new WebPAC::Store( path => $db_path, database => $database, debug => $debug, @@ -316,7 +316,9 @@ while (my ($database, $db_config) = each %{ $config->databases }) { foreach my $input (@inputs) { - next if ($only_input && ($input->{name} !~ m#$only_input#i && $input->{type} !~ m#$only_input#i)); + my $input_name = $input->{name} || $log->logdie("input without a name isn't valid: ",dump($input)); + + next if ($only_input && ($input_name !~ m#$only_input#i && $input->{type} !~ m#$only_input#i)); my $type = lc($input->{type}); @@ -326,7 +328,7 @@ while (my ($database, $db_config) = each %{ $config->databases }) { my @lookups = $parser->have_lookup_create($database, $input); - $log->info("working on input '$input->{name}' in $input->{path} [type: $input->{type}] using $input_module", + $log->info("working on input '$input_name' in $input->{path} [type: $input->{type}] using $input_module", @lookups ? " creating lookups: ".join(", ", @lookups) : "" ); @@ -349,7 +351,7 @@ while (my ($database, $db_config) = each %{ $config->databases }) { $log->logdie("can't create input using $input_module") unless ($input); if (defined( $input->{lookup} )) { - $log->warn("$database/", $input->{name}, " has depriciated lookup definition, removing it..."); + $log->warn("$database/$input_name has depriciated lookup definition, removing it..."); delete( $input->{lookup} ); } @@ -387,14 +389,20 @@ while (my ($database, $db_config) = each %{ $config->databases }) { %{ $input }, ); - $log->debug("created following lookups: ", dump( WebPAC::Normalize::_get_lookup() ) ); + my $lookup_data = WebPAC::Normalize::_get_lookup(); + + $log->debug("created following lookups: ", dump( $lookup_data )); + + foreach my $key (keys %$lookup_data) { + $store->save_lookup( $database, $input_name, $key, $lookup_data->{$key} ); + } my $report_fh; if ($stats || $validate) { - my $path = "out/report/" . $database . '-' . $input->{name} . '.txt'; + my $path = "out/report/${database}-${input_name}.txt"; open($report_fh, '>', $path) || $log->logdie("can't open $path: $!"); - print $report_fh "Report for database '$database' input '$input->{name}' records ", + print $report_fh "Report for database '$database' input '$input_name' records ", $offset || 1, "-", $limit || $input->{limit} || $maxmfn, "\n\n"; $log->info("Generating report file $path"); } @@ -405,7 +413,7 @@ while (my ($database, $db_config) = each %{ $config->databases }) { if ($marc_normalize) { @norm_array = ( { path => $marc_normalize, - output => $marc_output || 'out/marc/' . $database . '-' . $input->{name} . '.marc', + output => $marc_output || "out/marc/${database}-${input_name}.marc", } ); } @@ -461,14 +469,14 @@ while (my ($database, $db_config) = each %{ $config->databases }) { marc_encoding => 'utf-8', ); - $db->save_ds( + $store->save_ds( id => $mfn, ds => $ds, - prefix => $input->{name}, + prefix => $input_name, ) if ($ds && !$stats); $indexer->add( - id => $input->{name} . "/" . $mfn, + id => "${input_name}/${mfn}", ds => $ds, type => $config->get($indexer_config)->{type}, ) if ($indexer && $ds);