first cut into WebPAC::DB
authorDobrica Pavlinusic <dpavlin@rot13.org>
Sun, 17 Jul 2005 14:53:37 +0000 (14:53 +0000)
committerDobrica Pavlinusic <dpavlin@rot13.org>
Sun, 17 Jul 2005 14:53:37 +0000 (14:53 +0000)
git-svn-id: svn+ssh://mjesec/home/dpavlin/svn/webpac2/trunk@18 07558da8-63fa-0310-ba24-9fe276d99e06

Makefile.PL
lib/WebPAC/DB.pm
lib/WebPAC/Normalize.pm
t/4-db.tt [new file with mode: 0755]
t/4-output-tt.t [deleted file]
t/5-output-tt.t [new file with mode: 0755]

index 5b198aa..35a358f 100644 (file)
@@ -21,6 +21,7 @@ WriteMakefile(
        'XML::Simple' => 0,
        'Template' => 0,
        'Time::HiRes' => 0,
+       'File::Temp' => 0,
     },
     dist                => { COMPRESS => 'gzip -9f', SUFFIX => 'gz', },
     clean               => { FILES => 'WebPAC-* pod2html Makefile tags' },
index 101834d..a11ccb9 100644 (file)
@@ -3,9 +3,12 @@ package WebPAC::DB;
 use warnings;
 use strict;
 
+use base 'WebPAC::Common';
+use Storable;
+
 =head1 NAME
 
-WebPAC::DB - The great new WebPAC::DB!
+WebPAC::DB - Store normalized data on disk
 
 =head1 VERSION
 
@@ -17,9 +20,16 @@ our $VERSION = '0.01';
 
 =head1 SYNOPSIS
 
-Quick summary of what the module does.
+This module provides disk storage for normalised data.
+
+It is newest component of WebPAC, so it will change quite often or be in
+flux. However, I will try to keep backward compatiblity by providing
+multiple back-ends.
+
+This has additional advantage. I can create single place to plugin other
+file formats which provide better performance for particular type of data.
 
-Perhaps a little code snippet.
+For now, this is a prototype version.
 
     use WebPAC::DB;
 
@@ -28,11 +38,168 @@ Perhaps a little code snippet.
 
 =head1 FUNCTIONS
 
-=head2 function1
+=head2 new
+
+Create new normalised database object
+
+  my $db = new WebPAC::DB(
+       path = '/path/to/cache/ds/',
+  );
+
+Optional parameter C<path> defines path to directory
+in which cache file for C<data_structure> call will be created.
+
+=cut
+
+sub new {
+       my $class = shift;
+        my $self = {@_};
+        bless($self, $class);
+
+       $self->path( $self->{'path'} );
+
+       $self ? return $self : return undef;
+}
+
+=head2 path
+
+Check if specified cache directory exist, and if not, disable caching.
+
+ $db->path('./cache/ds/');
+
+If you pass false or zero value to this function, it will disable
+cacheing.
+
+=cut
+
+sub path {
+       my $self = shift;
+
+       my $dir = shift;
+
+       my $log = $self->_get_logger();
+
+       if ($dir) {
+               my $msg;
+               if (! -e $dir) {
+                       $msg = "doesn't exist";
+               } elsif (! -d $dir) {
+                       $msg = "is not directory";
+               } elsif (! -w $dir) {
+                       $msg = "not writable";
+               }
+
+               if ($msg) {
+                       undef $self->{'cache_data_structure'};
+                       $log->warn("cache_data_structure $dir $msg, disabling...");
+               } else {
+                       $log->debug("using cache dir $dir");
+               }
+       } else {
+               $log->debug("disabling cache");
+               undef $self->{'cache_data_structure'};
+       }
+}
+
+=head2 load_gs
+
+Retrive from disk one data_structure records using field 000 as key
+
+  my @ds = $db->load_gs($rec);
+
+This function will also perform basic sanity checking on returned
+data and disable caching if data is corrupted (or changed since last
+update).
+
+Returns array or undef if cacheing is disabled or unavailable.
 
 =cut
 
-sub function1 {
+sub load_gs {
+       my $self = shift;
+
+       return unless $self->{'path'};
+
+       my $rec = shift || return;
+
+       my $log = $self->_get_logger;
+
+       my $cache_path = $self->{'path'};
+
+       my $id = $rec->{'000'};
+       $id = $rec->{'000'}->[0] if ($id =~ m/^ARRAY/o);
+
+       unless (defined($id)) {
+               $log->warn("Can't use cacheing on records without unique identifier in field 000");
+               undef $self->{'path'};
+       } else {
+               my $cache_file = "$cache_path/$id";
+               $self->{'cache_file'} = $cache_file;
+               if (-r $cache_file) {
+                       my $ds_ref = retrieve($cache_file);
+                       if ($ds_ref) {
+                               $log->debug("cache hit: $cache_file");
+                               my $ok = 1;
+                               foreach my $f (qw(current_filename headline)) {
+                                       if ($ds_ref->{$f}) {
+                                               $self->{$f} = $ds_ref->{$f};
+                                       } else {
+                                               $ok = 0;
+                                       }
+                               };
+                               if ($ok && $ds_ref->{'ds'}) {
+                                       return @{ $ds_ref->{'ds'} };
+                               } else {
+                                       $log->warn("cache entry $cache_path corrupt. Use rm $cache_path/* to re-create it on next run!");
+                                       undef $self->{'path'};
+                               }
+                       }
+               }
+       }
+
+       return undef;
+}
+
+=head2 save_gs
+
+Store data_structure on disk.
+
+  $db->save_gs(
+       ds => \@ds,
+       current_filename => $self->{'current_filename'},
+       headline => $self->{'headline'},
+  );
+
+B<Totally broken, but fast.>
+
+Depends on filename generated by C<load_gs>.
+
+=cut
+
+sub save_gs {
+       my $self = shift;
+
+       return unless($self->{'path'});
+       return unless (@_);
+
+       my $arg = {@_};
+
+       my $log = $self->_get_logger;
+
+       $log->logdie("save_gs without cache_file -- did you called gs_get before?") unless ($self->{'cache_file'});
+
+       foreach my $e (qw/ds current_filename headline/) {
+               $log->logdie("missing $e") unless $arg->{$e};
+       }
+
+       $log->debug("creating storable cache file ",$self->{'cache_file'});
+
+       store {
+               ds => $arg->{'ds'},
+               current_filename => $arg->{'current_filename'},
+               headline => $arg->{'headline'},
+       }, $self->{'cache_file'};
+
 }
 
 =head1 AUTHOR
index 91a0945..44d2144 100644 (file)
@@ -3,7 +3,6 @@ package WebPAC::Normalize;
 use warnings;
 use strict;
 use Data::Dumper;
-use Storable;
 
 =head1 NAME
 
@@ -79,16 +78,13 @@ Create new normalisation object
                        return length($_);
                }, ...
        },
-       cache_data_structure => './cache/ds/',
+       db => $webpac_db_obj,
        lookup_regex => $lookup->regex,
   );
 
 Parametar C<filter> defines user supplied snippets of perl code which can
 be use with C<filter{...}> notation.
 
-Optional parameter C<cache_data_structure> defines path to directory
-in which cache file for C<data_structure> call will be created.
-
 Recommended parametar C<lookup_regex> is used to enable parsing of lookups
 in structures.
 
@@ -99,51 +95,9 @@ sub new {
         my $self = {@_};
         bless($self, $class);
 
-       $self->setup_cache_dir( $self->{'cache_data_structure'} );
-
        $self ? return $self : return undef;
 }
 
-=head2 setup_cache_dir
-
-Check if specified cache directory exist, and if not, disable caching.
-
- $setup_cache_dir('./cache/ds/');
-
-If you pass false or zero value to this function, it will disable
-cacheing.
-
-=cut
-
-sub setup_cache_dir {
-       my $self = shift;
-
-       my $dir = shift;
-
-       my $log = $self->_get_logger();
-
-       if ($dir) {
-               my $msg;
-               if (! -e $dir) {
-                       $msg = "doesn't exist";
-               } elsif (! -d $dir) {
-                       $msg = "is not directory";
-               } elsif (! -w $dir) {
-                       $msg = "not writable";
-               }
-
-               if ($msg) {
-                       undef $self->{'cache_data_structure'};
-                       $log->warn("cache_data_structure $dir $msg, disabling...");
-               } else {
-                       $log->debug("using cache dir $dir");
-               }
-       } else {
-               $log->debug("disabling cache");
-               undef $self->{'cache_data_structure'};
-       }
-}
-
 
 =head2 data_structure
 
@@ -172,35 +126,9 @@ sub data_structure {
 
        my $cache_file;
 
-       if (my $cache_path = $self->{'cache_data_structure'}) {
-               my $id = $rec->{'000'};
-               $id = $rec->{'000'}->[0] if ($id =~ m/^ARRAY/o);
-               unless (defined($id)) {
-                       $log->warn("Can't use cache_data_structure on records without unique identifier in field 000");
-                       undef $self->{'cache_data_structure'};
-               } else {
-                       $cache_file = "$cache_path/$id";
-                       if (-r $cache_file) {
-                               my $ds_ref = retrieve($cache_file);
-                               if ($ds_ref) {
-                                       $log->debug("cache hit: $cache_file");
-                                       my $ok = 1;
-                                       foreach my $f (qw(current_filename headline)) {
-                                               if ($ds_ref->{$f}) {
-                                                       $self->{$f} = $ds_ref->{$f};
-                                               } else {
-                                                       $ok = 0;
-                                               }
-                                       };
-                                       if ($ok && $ds_ref->{'ds'}) {
-                                               return @{ $ds_ref->{'ds'} };
-                                       } else {
-                                               $log->warn("cache_data_structure $cache_path corrupt. Use rm $cache_path/* to re-create it on next run!");
-                                               undef $self->{'cache_data_structure'};
-                                       }
-                               }
-                       }
-               }
+       if ($self->{'db'}) {
+               my @ds = $self->{'db'}->get_ds($rec);
+               return @ds if (@ds);
        }
 
        undef $self->{'currnet_filename'};
@@ -308,14 +236,11 @@ sub data_structure {
 
        }
 
-       if ($cache_file) {
-               store {
-                       ds => \@ds,
-                       current_filename => $self->{'current_filename'},
-                       headline => $self->{'headline'},
-               }, $cache_file;
-               $log->debug("created storable cache file $cache_file");
-       }
+       $self->{'db'}->put_gs(
+               ds => \@ds,
+               current_filename => $self->{'current_filename'},
+               headline => $self->{'headline'},
+       ) if ($self->{'db'});
 
        return @ds;
 
diff --git a/t/4-db.tt b/t/4-db.tt
new file mode 100755 (executable)
index 0000000..34c376b
--- /dev/null
+++ b/t/4-db.tt
@@ -0,0 +1,77 @@
+#!/usr/bin/perl -w
+
+use Test::More tests => 22;
+use Test::Exception;
+use Cwd qw/abs_path/;
+use blib;
+use File::Temp qw/tempdir/;
+use strict;
+
+BEGIN {
+use_ok( 'WebPAC::DB' );
+}
+
+ok(my $abs_path = abs_path($0), "abs_path");
+$abs_path =~ s#/[^/]*$#/#;
+diag "abs_path: $abs_path";
+
+my $db;
+
+diag "NULL DB";
+
+ok($db = new WebPAC::DB( debug => 1), "new");
+
+ok(! $db->path, "path");
+
+ok(! $db->load_gs(), 'load_gs');
+ok(! $db->load_gs({ '000' => '000' }), 'load_gs');
+
+ok(! $db->save_gs(), "save_gs");
+ok(! $db->save_gs({ '000' => '000' }), 'save_gs');
+
+undef $db;
+
+ok(my $path = tempdir(), "path");
+
+diag "DB path: $path";
+
+ok($db = new WebPAC::DB( path => $path, debug => 1), "new");
+
+cmp_ok($db->{'path'}, 'eq', $path, "path");
+
+ok(! $db->path, "path - disable caching");
+
+cmp_ok($db->{'path'}, '==', undef, "no path");
+
+ok($db->path( $path ), "path($path)");
+
+ok(! $db->load_gs(), 'load_gs');
+ok(! $db->load_gs({ '000' => '000' }), 'load_gs');
+
+ok(! $db->save_gs(), "save_gs");
+
+my @ds = [ {
+       'name' => 'Izvor: ',
+       'tag' => 'Source',
+       'display' => [ 'foo' ]
+       }, {
+       'name' => 'ID',
+       'tag' => 'IDths',
+       'swish' => [ 'bar' ],
+       'lookup_key' => [ 'bar' ]
+       }, {
+       'filename' => [ 'out/thes/001.html' ],
+       'name' => 'filename',
+       'tag' => 'filename'
+       },
+];
+
+ok(! $db->save_gs(), "empty save_gs");
+throws_ok { $db->save_gs( foo => 1 ) } qr/ds/, "save_gs - ds";
+throws_ok { $db->save_gs( ds => \@ds ) } qr/current_filename/, "save_gs - current_filename";
+throws_ok { $db->save_gs( ds => \@ds, 'current_filename' => 'foo' ) } qr/headline/, "save_gs - headline";
+
+ok($db->save_gs( ds => \@ds, 'current_filename' => 'foo', 'headline' => 'bar' ), "save_gs");
+
+undef $db;
+
diff --git a/t/4-output-tt.t b/t/4-output-tt.t
deleted file mode 100755 (executable)
index 318c6b5..0000000
+++ /dev/null
@@ -1,43 +0,0 @@
-#!/usr/bin/perl -w
-
-use Test::More tests => 5;
-use Test::Exception;
-use Cwd qw/abs_path/;
-use blib;
-use strict;
-
-BEGIN {
-use_ok( 'WebPAC::Output::TT' );
-}
-
-ok(my $abs_path = abs_path($0), "abs_path");
-$abs_path =~ s#/[^/]*$#/#;
-diag "abs_path: $abs_path";
-
-ok(my $tt = new WebPAC::Output::TT(
-       include_path => "$abs_path../conf/output/tt",
-       debug => 1 ),
-"new");
-
-my @ds = [ {
-       'name' => 'Izvor: ',
-       'tag' => 'Source',
-       'display' => [ 'foo' ]
-       }, {
-       'name' => 'ID',
-       'tag' => 'IDths',
-       'swish' => [ 'bar' ],
-       'lookup_key' => [ 'bar' ]
-       }, {
-       'filename' => [ 'out/thes/001.html' ],
-       'name' => 'filename',
-       'tag' => 'filename'
-       },
-];
-
-throws_ok { $tt->apply( template => 'foo', data => [] ) } qr/error.*foo/, "apply without template";
-
-cmp_ok(my $text = $tt->apply( template => 'text.tt', data => @ds ), '=~', qr/Source.*foo/, "apply");
-
-diag $text;
-
diff --git a/t/5-output-tt.t b/t/5-output-tt.t
new file mode 100755 (executable)
index 0000000..318c6b5
--- /dev/null
@@ -0,0 +1,43 @@
+#!/usr/bin/perl -w
+
+use Test::More tests => 5;
+use Test::Exception;
+use Cwd qw/abs_path/;
+use blib;
+use strict;
+
+BEGIN {
+use_ok( 'WebPAC::Output::TT' );
+}
+
+ok(my $abs_path = abs_path($0), "abs_path");
+$abs_path =~ s#/[^/]*$#/#;
+diag "abs_path: $abs_path";
+
+ok(my $tt = new WebPAC::Output::TT(
+       include_path => "$abs_path../conf/output/tt",
+       debug => 1 ),
+"new");
+
+my @ds = [ {
+       'name' => 'Izvor: ',
+       'tag' => 'Source',
+       'display' => [ 'foo' ]
+       }, {
+       'name' => 'ID',
+       'tag' => 'IDths',
+       'swish' => [ 'bar' ],
+       'lookup_key' => [ 'bar' ]
+       }, {
+       'filename' => [ 'out/thes/001.html' ],
+       'name' => 'filename',
+       'tag' => 'filename'
+       },
+];
+
+throws_ok { $tt->apply( template => 'foo', data => [] ) } qr/error.*foo/, "apply without template";
+
+cmp_ok(my $text = $tt->apply( template => 'text.tt', data => @ds ), '=~', qr/Source.*foo/, "apply");
+
+diag $text;
+