From 866332b9578d684da0dd0af5431e0e7777df8f23 Mon Sep 17 00:00:00 2001 From: Dobrica Pavlinusic Date: Sat, 16 Jul 2005 11:07:38 +0000 Subject: [PATCH] moved implementation of lookups from older code-base git-svn-id: svn+ssh://mjesec/home/dpavlin/svn/webpac2/trunk@3 07558da8-63fa-0310-ba24-9fe276d99e06 --- Makefile.PL | 3 + lib/WebPAC.pm | 4 + lib/WebPAC/Common.pm | 143 +++++++++++++++++++++++++++++++++ lib/WebPAC/Input.pm | 83 ++++++++++++++----- lib/WebPAC/Normalize/Lookup.pm | 130 ++++++++++++++++++++++++++++++ t/00-load.t | 7 +- t/01-lookup.t | 14 ++++ t/pod-coverage.t | 2 +- 8 files changed, 365 insertions(+), 21 deletions(-) create mode 100644 lib/WebPAC/Common.pm create mode 100644 lib/WebPAC/Normalize/Lookup.pm create mode 100755 t/01-lookup.t diff --git a/Makefile.PL b/Makefile.PL index 27c3dde..9c8cd78 100644 --- a/Makefile.PL +++ b/Makefile.PL @@ -10,6 +10,9 @@ WriteMakefile( PL_FILES => {}, PREREQ_PM => { 'Test::More' => 0, + 'Config::Tiny' => 0, + 'File::Slurp' => 0, + 'Log::Log4perl' => 0, }, dist => { COMPRESS => 'gzip -9f', SUFFIX => 'gz', }, clean => { FILES => 'WebPAC-* pod2html Makefile' }, diff --git a/lib/WebPAC.pm b/lib/WebPAC.pm index 1726a43..585dd75 100644 --- a/lib/WebPAC.pm +++ b/lib/WebPAC.pm @@ -35,6 +35,8 @@ if you don't export anything, such as for a purely object-oriented module. =head2 function1 +foo + =cut sub function1 { @@ -42,6 +44,8 @@ sub function1 { =head2 function2 +bar + =cut sub function2 { diff --git a/lib/WebPAC/Common.pm b/lib/WebPAC/Common.pm new file mode 100644 index 0000000..4f04d78 --- /dev/null +++ b/lib/WebPAC/Common.pm @@ -0,0 +1,143 @@ +package WebPAC::Common; + +use warnings; +use strict; + +use Log::Log4perl qw(get_logger :levels); + +=head1 NAME + +WebPAC::Common - internal methods called from other WebPAC modules + +=head1 VERSION + +Version 0.01 + +=cut + +our $VERSION = '0.01'; + +=head1 INTERNAL METHODS + +Here is a quick list of internal methods, mostly useful to turn debugging +on them (see L below for explanation). + +=cut + +=head2 _eval + +Internal function to eval code without C. + +=cut + +sub _eval { + my $self = shift; + + my $code = shift || return; + + my $log = $self->_get_logger(); + + no strict 'subs'; + my $ret = eval $code; + if ($@) { + $log->error("problem with eval code [$code]: $@"); + } + + $log->debug("eval: ",$code," [",$ret,"]"); + + return $ret || undef; +} + +=head2 _sort_by_order + +Sort xml tags data structure accoding to C attribute. + +=cut + +sub _sort_by_order { + my $self = shift; + + my $va = $self->{'import_xml'}->{'indexer'}->{$a}->{'order'} || + $self->{'import_xml'}->{'indexer'}->{$a}; + my $vb = $self->{'import_xml'}->{'indexer'}->{$b}->{'order'} || + $self->{'import_xml'}->{'indexer'}->{$b}; + + return $va <=> $vb; +} + +=head2 _x + +Convert string from UTF-8 to code page defined in C. + + my $text = $webpac->_x('utf8 text'); + +Default application code page is C. You will probably want to +change that when creating new instance of object based on this one. + +=cut + +sub _x { + my $self = shift; + my $utf8 = shift || return; + + # create UTF-8 convertor for import_xml files + $self->{'utf2cp'} ||= Text::Iconv->new('UTF-8' ,$self->{'code_page'} || 'ISO-8859-2'); + + return $self->{'utf2cp'}->convert($utf8) || + $self->_get_logger()->logwarn("can't convert '$utf8'"); +} + +=head2 _init_logger + +This function will init C using provided configuration file. + + $webpac->_init_logger('/path/to/log.conf'); + +=cut + +sub _init_logger { + my $self = shift; + my $file = shift; + if ($file) { + Log::Log4perl->init($file); + } else { + my $conf = q( ); + Log::Log4perl->init( \$conf ); + } +} + + +=head2 _get_logger + +Get C object with a twist: domains are defined for each +method + + my $log = $webpac->_get_logger(); + +=cut + +sub _get_logger { + my $self = shift; + + $self->{'_logger_ok'} ||= $self->_init_logger; + + my $name = (caller(1))[3] || caller; + return get_logger($name); +} + + +=head1 LOGGING + +Logging in WebPAC is performed by L with config file +C. + +Methods defined above have different levels of logging, so +it's descriptions will be useful to turn (mostry B logging) on +or off to see why WabPAC isn't perforing as you expect it (it might even +be a bug!). + +B. To repeat, you can +also use method names, and not only classes (which are just few) +to filter logging. + + diff --git a/lib/WebPAC/Input.pm b/lib/WebPAC/Input.pm index 07b9546..79c859b 100644 --- a/lib/WebPAC/Input.pm +++ b/lib/WebPAC/Input.pm @@ -5,7 +5,7 @@ use strict; =head1 NAME -WebPAC::Input - The great new WebPAC::Input! +WebPAC::Input - core module for input file format =head1 VERSION @@ -17,27 +17,49 @@ our $VERSION = '0.01'; =head1 SYNOPSIS -Quick summary of what the module does. +This module will load particular loader module and execute it's functions. Perhaps a little code snippet. use WebPAC::Input; - my $foo = WebPAC::Input->new(); - ... + my $db = WebPAC::Input->new( + format => 'NULL', + config => $config, + ); -=head1 EXPORT - -A list of functions that can be exported. You can delete this section -if you don't export anything, such as for a purely object-oriented module. + $db->open('/path/to/database'); + print "database size: ",$db->size,"\n"; + while (my $row = $db->fetch) { + ... + } + $db->close; =head1 FUNCTIONS -=head2 function1 +=head2 new + +Create new input database object. + + my $db = new WebPAC::Input( format => 'NULL' ); + +This function will load needed wrapper module and + +=cut + +sub new { + my $class = shift; + my $self = {@_}; + bless($self, $class); + + $self ? return $self : return undef; +} + +=head2 open =cut -sub function1 { +sub open { } =head2 function2 @@ -47,19 +69,42 @@ sub function1 { sub function2 { } -=head1 AUTHOR -Dobrica Pavlinusic, C<< >> -=head1 BUGS +=head1 MEMORY USAGE + +C options is double-edged sword. If enabled, WebPAC +will run on memory constraint machines (which doesn't have enough +physical RAM to create memory structure for whole source database). + +If your machine has 512Mb or more of RAM and database is around 10000 records, +memory shouldn't be an issue. If you don't have enough physical RAM, you +might consider using virtual memory (if your operating system is handling it +well, like on FreeBSD or Linux) instead of dropping to L to handle +parsed structure of ISIS database (this is what C option does). + +Hitting swap at end of reading source database is probably o.k. However, +hitting swap before 90% will dramatically decrease performance and you will +be better off with C and using rest of availble memory for +operating system disk cache (Linux is particuallary good about this). +However, every access to database record will require disk access, so +generation phase will be slower 10-100 times. -Please report any bugs or feature requests to -C, or through the web interface at -L. -I will be notified, and then you'll automatically be notified of progress on -your bug as I make changes. +Parsed structures are essential - you just have option to trade RAM memory +(which is fast) for disk space (which is slow). Be sure to have planty of +disk space if you are using C and thus L. -=head1 ACKNOWLEDGEMENTS +However, when WebPAC is running on desktop machines (or laptops :-), it's +highly undesireable for system to start swapping. Using C option can +reduce WecPAC memory usage to around 64Mb for same database with lookup +fields and sorted indexes which stay in RAM. Performance will suffer, but +memory usage will really be minimal. It might be also more confortable to +run WebPAC reniced on those machines. + + +=head1 AUTHOR + +Dobrica Pavlinusic, C<< >> =head1 COPYRIGHT & LICENSE diff --git a/lib/WebPAC/Normalize/Lookup.pm b/lib/WebPAC/Normalize/Lookup.pm new file mode 100644 index 0000000..6b5aa07 --- /dev/null +++ b/lib/WebPAC/Normalize/Lookup.pm @@ -0,0 +1,130 @@ +package WebPAC::Normalize::Lookup; + +use warnings; +use strict; + +use WebPAC::Common; + +use base qw/WebPAC::Common/; +use File::Slurp; + +=head1 NAME + +WebPAC::Normalize::Lookup - simple normalisation plugin to produce lookup + +=head1 VERSION + +Version 0.01 + +=cut + +our $VERSION = '0.01'; + +=head1 SYNOPSIS + +This module will produce in-memory lookups for easy resolution of lookups +to different records in source files. It can also be use with +C to produce tree hierarchies. + +Lookups are defined in C. + +C argument is an array of lookups to create. Each lookup must have C and +C. Optional parametar C is perl code to evaluate before storing +value in index. + + my $lookup => [ + { 'key' => 'd:v900', 'val' => 'v250^a' }, + { 'eval' => '"v901^a" eq "Podruèje"', + 'key' => 'pa:v561^4:v562^4:v461^1', + 'val' => 'v900' }, + ]; + + +=head1 FUNCTIONS + +=head2 new + +Create new lookup object. + + my $lookup = new WebPAC::Normalize::Lookup( + config => '/path/to/conf/lookup/lookup.pm', + ); + +=cut + +sub new { + my $class = shift; + my $self = {@_}; + bless($self, $class); + + my $log = $self->_get_logger(); + + my $config = $self->{'config'} || $log->logconfess("need path to lookup file in config parametar"); + + my $lookup_code = read_file($config) || $log->logconfess("can't read lookup file $config: $!"); + + { + no strict 'vars'; + do $config or $log->logdie("Failed to read configuration parameters '$config' $! $@"); + $self->{'lookup_def'} = \@lookup || $log->logdie("lookup config doesn't produce \@lookup array"); + } + + $log->logconfess("lookup config file isn't ARRAY") if ($self->{'lookup_def'} !~ /ARRAY/o); + + $self ? return $self : return undef; +} + +=head2 create_lookup + +Create lookup from record using lookup definition. + + $self->create_lookup($rec, @lookups); + +Called internally by C methods. + +=cut + +sub create_lookup { + my $self = shift; + + my $log = $self->_get_logger(); + + my $rec = shift || $log->logconfess("need record to create lookup"); + $log->logconfess("need HASH as first argument!") if ($rec !~ /HASH/o); + + foreach my $i ($self->{'loookup_def'}) { + $log->logconfess("need key") unless defined($i->{'key'}); + $log->logconfess("need val") unless defined($i->{'val'}); + + if (defined($i->{'eval'})) { + # eval first, so we can skip fill_in for key and val + my $eval = $self->fill_in($rec,$i->{'eval'}) || next; + if ($self->_eval($eval)) { + my $key = $self->fill_in($rec,$i->{'key'}) || next; + my @val = $self->fill_in($rec,$i->{'val'}) || next; + $log->debug("stored $key = ",sub { join(" | ",@val) }); + push @{$self->{'lookup'}->{$key}}, @val; + } + } else { + my $key = $self->fill_in($rec,$i->{'key'}) || next; + my @val = $self->fill_in($rec,$i->{'val'}) || next; + $log->debug("stored $key = ",sub { join(" | ",@val) }); + push @{$self->{'lookup'}->{$key}}, @val; + } + } +} + +=head1 AUTHOR + +Dobrica Pavlinusic, C<< >> + +=head1 COPYRIGHT & LICENSE + +Copyright 2005 Dobrica Pavlinusic, All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it +under the same terms as Perl itself. + +=cut + +1; # End of WebPAC::Normalize::Lookup diff --git a/t/00-load.t b/t/00-load.t index b520589..92c200f 100644 --- a/t/00-load.t +++ b/t/00-load.t @@ -1,4 +1,8 @@ -use Test::More tests => 8; +#!/usr/bin/perl -w + +use Test::More tests => 9; +use blib; +use strict; BEGIN { use_ok( 'WebPAC' ); @@ -9,6 +13,7 @@ use_ok( 'WebPAC::Output::Estraier' ); use_ok( 'WebPAC::Output::html' ); use_ok( 'WebPAC::Output::OAI' ); use_ok( 'WebPAC::Output::ClassDBI' ); +use_ok( 'WebPAC::Normalize::Lookup' ); } diag( "Testing WebPAC $WebPAC::VERSION, Perl 5.008007, /usr/bin/perl" ); diff --git a/t/01-lookup.t b/t/01-lookup.t new file mode 100755 index 0000000..1249677 --- /dev/null +++ b/t/01-lookup.t @@ -0,0 +1,14 @@ +#!/usr/bin/perl -w + +use Test::More tests => 3; +use Test::Exception; +use blib; +use strict; + +BEGIN { +use_ok( 'WebPAC::Normalize::Lookup' ); +} + +throws_ok { new WebPAC::Normalize::Lookup() } qr/config/, "new without config"; + +ok(my $nos = new WebPAC::Normalize::Lookup( config => 'conf/lookup/isis.pm' ), "new"); diff --git a/t/pod-coverage.t b/t/pod-coverage.t index 703f91d..2a1ea37 100644 --- a/t/pod-coverage.t +++ b/t/pod-coverage.t @@ -1,4 +1,4 @@ -#!perl -T +#!/usr/bin/perl -T use Test::More; eval "use Test::Pod::Coverage 1.04"; -- 2.20.1