r1479@llin: dpavlin | 2007-11-02 14:59:05 +0100
authorDobrica Pavlinusic <dpavlin@rot13.org>
Fri, 2 Nov 2007 13:59:10 +0000 (13:59 +0000)
committerDobrica Pavlinusic <dpavlin@rot13.org>
Fri, 2 Nov 2007 13:59:10 +0000 (13:59 +0000)
 begin work on WebPAC::Input::XML

git-svn-id: svn+ssh://mjesec/home/dpavlin/svn/webpac2/trunk@968 07558da8-63fa-0310-ba24-9fe276d99e06

Makefile.PL
conf/log.conf
lib/WebPAC/Input/XML.pm [new file with mode: 0644]
t/2-input-xml.t [new file with mode: 0755]
t/data/xml/1.xml [new file with mode: 0644]
t/data/xml/2.xml [new file with mode: 0644]
t/data/xml/3.xml [new file with mode: 0644]

index a3af66d..42e591b 100644 (file)
@@ -42,6 +42,9 @@ WriteMakefile(
        'JSON' => 0,
        'File::Spec' => 0,
        'Sort::External' => 0,
+       # WebPAC::Input::XML
+       'XML::Simple' => 0,
+       'File::Find' => 0,
     },
     dist                => { COMPRESS => 'gzip -9f', SUFFIX => 'gz', },
     clean               => { FILES => 'WebPAC-* pod2html Makefile tags' },
index 65da489..6f98dba 100644 (file)
@@ -40,6 +40,7 @@ log4perl.rootLogger=INFO, LOG, SCREEN
 #log4perl.logger.WebPAC.Input.Gutenberg=DEBUG
 #log4perl.logger.WebPAC.Input.DBF=DEBUG
 #log4perl.logger.WebPAC.Input.ISI=DEBUG
+log4perl.logger.WebPAC.Input.XML=DEBUG
 
 #log4perl.logger.WebPAC.Normalize.=DEBUG
 #log4perl.logger.WebPAC.Normalize.new=DEBUG
diff --git a/lib/WebPAC/Input/XML.pm b/lib/WebPAC/Input/XML.pm
new file mode 100644 (file)
index 0000000..454636f
--- /dev/null
@@ -0,0 +1,126 @@
+package WebPAC::Input::XML;
+
+use warnings;
+use strict;
+
+use WebPAC::Input;
+use base qw/WebPAC::Common/;
+
+use XML::Simple;
+use File::Find;
+
+use Data::Dump qw/dump/;
+
+=head1 NAME
+
+WebPAC::Input::XML - support for reading XML files
+
+=cut
+
+our $VERSION = '0.01';
+
+=head1 FUNCTIONS
+
+=head2 new
+
+Returns new low-level input API object
+
+  my $input = new WebPAC::Input::XML(
+       path => '/path/to/XML/records.txt'
+       filter => sub {
+               my ($l,$field_nr) = @_;
+               # do something with $l which is line of input file
+               return $l;
+       },
+  }
+
+Options:
+
+=over 4
+
+=item path
+
+path to directory with xml files ending in C<.xml>
+
+=back
+
+=cut
+
+sub new {
+       my $class = shift;
+       my $self = {@_};
+       bless($self, $class);
+
+       my $arg = {@_};
+
+       my $log = $self->_get_logger();
+
+       $log->logdie("can't find path ", $arg->{path}, ": $!\n") unless -d $arg->{path};
+
+       $log->info("collecting xml files from ", $arg->{path});
+
+       my @files;
+
+       find({
+               wanted => sub {
+                       my $path = $File::Find::name;
+                       return unless -f $path && $path =~ m/\.xml$/i;
+                       push @files, $path;
+               },
+               follow => 1,
+       }, $arg->{path} );
+
+       $log->info("found ", $#files + 1, " XML files in ", $arg->{path});
+
+       $self->{_files} = \@files;
+
+       $self ? return $self : return undef;
+}
+
+=head2 fetch_rec
+
+Return record with ID C<$mfn> from database
+
+  my $rec = $input->fetch_rec( $mfn, $filter_coderef );
+
+=cut
+
+sub fetch_rec {
+       my $self = shift;
+
+       my ( $mfn, $filter_coderef ) = @_;
+
+       my $path = $self->{_files}->[ $mfn - 1 ] || return;
+
+       warn "### fetch_rec( $mfn ) = $path";
+
+}
+
+
+=head2 size
+
+Return number of records in database
+
+  my $size = $input->size;
+
+=cut
+
+sub size {
+       my $self = shift;
+       return $#{$self->{_files}} + 1;
+}
+
+=head1 AUTHOR
+
+Dobrica Pavlinusic, C<< <dpavlin@rot13.org> >>
+
+=head1 COPYRIGHT & LICENSE
+
+Copyright 2007 Dobrica Pavlinusic, All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it
+under the same terms as Perl itself.
+
+=cut
+
+1;
diff --git a/t/2-input-xml.t b/t/2-input-xml.t
new file mode 100755 (executable)
index 0000000..ff2f190
--- /dev/null
@@ -0,0 +1,41 @@
+#!/usr/bin/perl -w
+
+use strict;
+use blib;
+
+use Test::More tests => 12;
+
+BEGIN {
+use_ok( 'WebPAC::Test' );
+use_ok( 'WebPAC::Input' );
+}
+
+my $module = 'WebPAC::Input::XML';
+diag "testing with $module";
+
+ok(my $input = new WebPAC::Input(
+       module => $module,
+       no_progress_bar => 1,
+       %LOG
+), "new");
+
+throws_ok { $input->open( path => '/tmp/does_not_exit', %LOG ) } qr/can't find path/, 'open with non-existant path';
+
+ok(my $db = $input->open(
+       path => "$abs_path/data/xml/"
+), "open");
+ok(my $size = $input->size, "size");
+
+foreach my $mfn ( 1 ... $size ) {
+       my $rec = $input->fetch;
+       if ($mfn <= 10 || $mfn == 20) {
+               ok($rec, "fetch $mfn");
+       } else {
+               ok(! $rec, "empty $mfn");
+       }
+
+       cmp_ok($input->pos, '==', $mfn, "pos $mfn");
+
+       diag "rec: ", dump($rec), "\n" if $debug;
+}
+
diff --git a/t/data/xml/1.xml b/t/data/xml/1.xml
new file mode 100644 (file)
index 0000000..1a0cc28
--- /dev/null
@@ -0,0 +1,5 @@
+<xml>
+       <foo attr="attr1">foo1</foo>
+       <foo attr="attr2">foo2</foo>
+       <bar>bar</bar>
+</xml>
diff --git a/t/data/xml/2.xml b/t/data/xml/2.xml
new file mode 100644 (file)
index 0000000..a29c861
--- /dev/null
@@ -0,0 +1,3 @@
+<xml>
+       <bar>bar but alone</bar>
+</xml>
diff --git a/t/data/xml/3.xml b/t/data/xml/3.xml
new file mode 100644 (file)
index 0000000..73b0de0
--- /dev/null
@@ -0,0 +1,2 @@
+<empty>
+</empty>