r1290@llin: dpavlin | 2007-06-21 23:26:25 +0200
authorDobrica Pavlinusic <dpavlin@rot13.org>
Thu, 21 Jun 2007 21:26:19 +0000 (21:26 +0000)
committerDobrica Pavlinusic <dpavlin@rot13.org>
Thu, 21 Jun 2007 21:26:19 +0000 (21:26 +0000)
 experimental (still unfinished) dbf input

git-svn-id: svn+ssh://mjesec/home/dpavlin/svn/webpac2/trunk@869 07558da8-63fa-0310-ba24-9fe276d99e06

lib/WebPAC/Input/DBF.pm [new file with mode: 0644]
t/2-input-dbf.t [new file with mode: 0755]

diff --git a/lib/WebPAC/Input/DBF.pm b/lib/WebPAC/Input/DBF.pm
new file mode 100644 (file)
index 0000000..6563370
--- /dev/null
@@ -0,0 +1,244 @@
+package WebPAC::Input::DBF;
+
+use warnings;
+use strict;
+
+use WebPAC::Input;
+use base qw/WebPAC::Common/;
+use XBase;
+use Data::Dump qw/dump/;
+use Encode qw/encode_utf8/;
+use YAML;
+use File::Slurp;
+
+=head1 NAME
+
+WebPAC::Input::DBF - support for reading DBF tables
+
+=head1 VERSION
+
+Version 0.01
+
+=cut
+
+our $VERSION = '0.01';
+
+=head1 SYNOPSIS
+
+Read data from DBF tables (do you remember Clipper applications?) and create
+pseudo-MARC records from them.
+
+ my $ll_db = new WebPAC::Input::DBF(
+       path => '/path/to/database.dbf',
+ );
+
+=head1 FUNCTIONS
+
+=head2 new
+
+Returns new low-level input API object
+
+  my $ll_db = new WebPAC::Input::DBF(
+       path => '/path/to/database.dbf'
+       filter => sub {
+               my ($l,$field_nr) = @_;
+               # do something with $l which is line of input file
+               return $l;
+       },
+  }
+
+Options:
+
+=over 4
+
+=item path
+
+path to DBF file
+
+=back
+
+=cut
+
+sub new {
+       my $class = shift;
+       my $self = {@_};
+       bless($self, $class);
+
+       my $arg = {@_};
+
+       my $log = $self->_get_logger();
+
+       my $db = XBase->new( $arg->{path} ) || $log->logdie("can't open ", $arg->{path}, ": $!");
+
+       my $size = $db->last_record;
+
+       $log->info("opening DBF database '$arg->{path}' with $size records");
+
+       my $mapping = $arg->{input_config}->{mapping};
+
+       if ( ! $mapping ) {
+               $log->debug("didn't found any mapping file in configuration", sub { dump( $arg->{input_config} ) });
+
+               foreach my $field ( $db->field_names ) {
+                       push @$mapping, { $field => { '900' => 'x' } };
+               }
+
+               my $mapping_path = $arg->{path};
+               $mapping_path =~ s!^.+/([^/]+)\.dbf!$1.yml!;
+
+               $log->logdie("mapping file $mapping_path allready exists, aborting.") if ( -e $mapping_path );
+
+               write_file( $mapping_path, Dump( { mapping => $mapping } ) ) ||
+                       $log->logdie("can't write template file for mapping_path $mapping_path: $!");
+
+               $log->logdie("template file for mapping_path created as $mapping_path");
+
+       } else {
+               $log->debug("using mapping ", sub { dump($mapping) });
+       }
+
+       foreach my $mfn ( 1 .. $size ) {
+
+               my $row = $db->get_record_as_hash( $mfn );
+
+               $log->debug("dbf row = ", sub { dump( $row ) });
+
+               my $record = {
+                       '001' => [ $mfn ],
+               };
+
+               $self->{_rows}->{ $mfn } = $record;
+               $log->debug("created row $mfn ", dump( $record ));
+       }
+
+       $self->{size} = $size;
+
+       $self ? return $self : return undef;
+}
+
+=head2 fetch_rec
+
+Return record with ID C<$mfn> from database
+
+  my $rec = $ll_db->fetch_rec( $mfn, $filter_coderef );
+
+=cut
+
+sub fetch_rec {
+       my $self = shift;
+
+       my ($mfn, $filter_coderef) = @_;
+
+       my $rec = $self->_to_hash(
+               mfn => $mfn,
+               row => $self->{_rows}->{$mfn},
+               hash_filter => $filter_coderef,
+       );
+
+       my $log = $self->_get_logger();
+       $log->debug("fetch_rec($mfn) = ", dump($rec));
+
+       return $rec;
+}
+
+=head2 size
+
+Return number of records in database
+
+  my $size = $ll_db->size;
+
+=cut
+
+sub size {
+       my $self = shift;
+       return $self->{size};
+}
+
+=head2 _to_hash
+
+Return hash from row. Taken from L<Biblio::Isis>
+
+  my $rec = $ll_db->_to_hash(
+       mfn => $mfn;
+       $row
+  );
+
+=cut
+
+sub _to_hash {
+       my $self = shift;
+
+       my $arg = {@_};
+
+       my $log = $self->_get_logger();
+
+       my $hash_filter = $arg->{hash_filter};
+       my $mfn = $arg->{mfn} || $log->logconfess("need mfn in arguments");
+       my $row = $arg->{row} || $log->logconfess("need row in arguments");
+
+       # init record to include MFN as field 000
+       my $rec = { '000' => [ $mfn ] };
+
+       foreach my $f_nr (keys %{$row}) {
+               foreach my $l (@{$row->{$f_nr}}) {
+
+                       # filter output
+                       $l = $hash_filter->($l, $f_nr) if ($hash_filter);
+                       next unless defined($l);
+
+                       my $val;
+                       my $r_sf;       # repeatable subfields in this record
+
+                       # has subfields?
+                       if ($l =~ m/\^/) {
+                               foreach my $t (split(/\^/,$l)) {
+                                       next if (! $t);
+                                       my ($sf,$v) = (substr($t,0,1), substr($t,1));
+                                       next unless (defined($v) && $v ne '');
+
+                                       if (ref( $val->{$sf} ) eq 'ARRAY') {
+
+                                               push @{ $val->{$sf} }, $v;
+
+                                               # record repeatable subfield it it's offset
+                                               push @{ $val->{subfields} }, ( $sf, $#{ $val->{$sf} } );
+                                               $r_sf->{$sf}++;
+
+                                       } elsif (defined( $val->{$sf} )) {
+
+                                               # convert scalar field to array
+                                               $val->{$sf} = [ $val->{$sf}, $v ];
+
+                                               push @{ $val->{subfields} }, ( $sf, 1 );
+                                               $r_sf->{$sf}++;
+
+                                       } else {
+                                               $val->{$sf} = $v;
+                                               push @{ $val->{subfields} }, ( $sf, 0 );
+                                       }
+                               }
+                       } else {
+                               $val = $l;
+                       }
+
+                       push @{$rec->{$f_nr}}, $val;
+               }
+       }
+
+       return $rec;
+}
+
+=head1 AUTHOR
+
+Dobrica Pavlinusic, C<< <dpavlin@rot13.org> >>
+
+=head1 COPYRIGHT & LICENSE
+
+Copyright 2007 Dobrica Pavlinusic, All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it
+under the same terms as Perl itself.
+
+=cut
+
+1; # End of WebPAC::Input::DBF
diff --git a/t/2-input-dbf.t b/t/2-input-dbf.t
new file mode 100755 (executable)
index 0000000..25728de
--- /dev/null
@@ -0,0 +1,58 @@
+#!/usr/bin/perl -w
+
+use Test::More tests => 1179;
+use Test::Exception;
+use Cwd qw/abs_path/;
+use blib;
+use strict;
+
+use Data::Dump qw/dump/;
+
+my $debug = shift @ARGV;
+
+BEGIN {
+use_ok( 'WebPAC::Input' );
+}
+
+ok(my $abs_path = abs_path($0), "abs_path");
+$abs_path =~ s#/[^/]*$#/#;
+
+my $module = 'WebPAC::Input::DBF';
+diag "testing with $module", $debug ? ' with debug' : '';
+
+ok(my $input = new WebPAC::Input(
+       module => $module,
+       no_log => $debug ? 0 : 1,
+       no_progress_bar => 1,
+       debug => $debug,
+), "new");
+
+ok(my $db = $input->open(
+       path => "$abs_path/data/cas2000.dbf",
+       input_config => {
+               xmapping => [
+                       { 'foobar' => [ '200', 'a' ] },
+               ],
+       },
+), "open");
+ok(my $size = $input->size, "size");
+
+diag "size: $size" if ($debug);
+
+ok(defined($input->{ll_db}->{_rows}), 'have ll_db->rows');
+
+foreach my $mfn ( 1 ... $size ) {
+
+       ok(defined($input->{ll_db}->{_rows}->{$mfn}), "have ll_db->_rows->$mfn");
+
+       diag "row: ", dump( $input->{ll_db}->{_rows}->{$mfn} ) if ($debug);
+
+       my $rec = $input->fetch;
+
+       ok($rec, "fetch $mfn");
+
+       cmp_ok($input->pos, '==', $mfn, "pos $mfn");
+
+       diag "rec: ", dump($rec), "\n" if ($debug);
+}
+