added WebPAC::Input::CSV
authorDobrica Pavlinusic <dpavlin@rot13.org>
Tue, 19 May 2009 14:46:12 +0000 (14:46 +0000)
committerDobrica Pavlinusic <dpavlin@rot13.org>
Tue, 19 May 2009 14:46:12 +0000 (14:46 +0000)
git-svn-id: svn+ssh://mjesec/home/dpavlin/svn/webpac2/trunk@1186 07558da8-63fa-0310-ba24-9fe276d99e06

Makefile.PL
lib/WebPAC/Input/CSV.pm [new file with mode: 0644]
t/2-input-csv.t [new file with mode: 0755]
t/data/records-utf8.csv [new file with mode: 0644]

index 4831746..3aae43c 100644 (file)
@@ -50,6 +50,10 @@ features(
                -default => 1,
                'Spreadsheet::ParseExcel',
        ],
+       'WebPAC::Input::CSV' => [
+               -default => 1,
+               'Text::CSV',
+       ],
        'WebPAC::Input::DBF' => [
                -default => 0,
                'XBase',
diff --git a/lib/WebPAC/Input/CSV.pm b/lib/WebPAC/Input/CSV.pm
new file mode 100644 (file)
index 0000000..112b9f0
--- /dev/null
@@ -0,0 +1,123 @@
+package WebPAC::Input::CSV;
+
+use warnings;
+use strict;
+
+use WebPAC::Input;
+use base qw/WebPAC::Common/;
+
+use Text::CSV;
+use Data::Dump qw/dump/;
+
+=head1 NAME
+
+WebPAC::Input::CSV - support for CSV Export Format
+
+=cut
+
+our $VERSION = '0.01';
+
+=head1 FUNCTIONS
+
+=head2 new
+
+Returns new low-level input API object
+
+  my $input = new WebPAC::Input::CSV(
+       path => '/path/to/records.csv',
+  );
+
+Options:
+
+=over 4
+
+=item path
+
+path to CSV file
+
+=back
+
+Default encoding of input file is C<utf-8>
+
+=cut
+
+sub new {
+       my $class = shift;
+       my $self = {@_};
+       bless($self, $class);
+
+       my $arg = {@_};
+
+       my $log = $self->_get_logger();
+
+       open( my $fh, '<:encoding(utf-8)', $arg->{path} ) || $log->logconfess("can't open $arg->{path}: $!");
+
+       my $csv = Text::CSV->new({ binary => 1 });
+
+       $self->{size} = 0;
+
+       while ( 1 ) {
+               my $line = $csv->getline( $fh );
+               last if $csv->eof;
+
+               $log->logdie( "can't parse CSV file ", $csv->error_diag ) unless $line;
+
+               my $rec;
+               $rec->{'000'} = [ ++$self->{size} ];
+
+               my $col = 'A';
+               $rec->{ $col++ } = $_ foreach @$line;
+
+               push @{ $self->{_rec} }, $rec;
+
+       };
+
+       $log->debug("loaded ", $self->size, " records");
+
+       $self ? return $self : return undef;
+}
+
+=head2 fetch_rec
+
+Return record with ID C<$mfn> from database
+
+  my $rec = $input->fetch_rec( $mfn, $filter_coderef );
+
+=cut
+
+sub fetch_rec {
+       my $self = shift;
+
+       my ( $mfn, $filter_coderef ) = @_;
+
+       return $self->{_rec}->[$mfn-1];
+}
+
+
+=head2 size
+
+Return number of records in database
+
+  my $size = $input->size;
+
+=cut
+
+sub size {
+       my $self = shift;
+       return $self->{size};
+}
+
+=head1 AUTHOR
+
+Dobrica Pavlinusic, C<< <dpavlin@rot13.org> >>
+
+=head1 COPYRIGHT & LICENSE
+
+Copyright 2009 Dobrica Pavlinusic, All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it
+under the same terms as Perl itself.
+
+=cut
+
+1; # End of WebPAC::Input::CSV
diff --git a/t/2-input-csv.t b/t/2-input-csv.t
new file mode 100755 (executable)
index 0000000..096a207
--- /dev/null
@@ -0,0 +1,35 @@
+#!/usr/bin/perl -w
+
+use strict;
+use blib;
+
+use Test::More tests => 27;
+
+BEGIN {
+use_ok( 'WebPAC::Test' );
+use_ok( 'WebPAC::Input' );
+}
+
+my $module = 'WebPAC::Input::CSV';
+diag "testing with $module";
+
+ok(my $input = new WebPAC::Input(
+       module => $module,
+       no_progress_bar => 1,
+       %LOG
+), "new");
+
+ok(my $db = $input->open(
+       path => "$abs_path/data/records-utf8.csv"
+), "open");
+ok(my $size = $input->size, "size");
+cmp_ok( $size, '==', 7, 'size ok' );
+
+foreach my $mfn ( 1 ... $size ) {
+       my $rec = $input->fetch;
+       ok($rec, "fetch $mfn");
+       cmp_ok($rec->{'000'}->[0], '==', $mfn, 'has mfn');
+       cmp_ok($input->pos, '==', $mfn, "pos $mfn");
+       diag "rec: ", dump($rec), "\n" if $debug;
+}
+
diff --git a/t/data/records-utf8.csv b/t/data/records-utf8.csv
new file mode 100644 (file)
index 0000000..a8728c3
--- /dev/null
@@ -0,0 +1,7 @@
+FFSF,1963,50109,,"Slovo a smysl = Word & Sense : Časopis pro mezioborová bohemistická studia = A Journal of Interdisciplinary Theory and Criticism in Czech Studies",1214-7915,"Roč.3.(2006)- čislo 5/9191"
+FFSF,2049,50110,,"Opera slavica : Slavistické rozhledy",1211-7676,"Roč. 14(2004)- 1,2,3,4,  15(2005)-1,2,3,4,  16(2006)-1,2,3,4,"
+FFSF,2075,50111,,"Časopis musea Království českého",,"Roč.1,2 ; 4 - 43 ; 45 - 50 ; 52"
+FFSF,2205,50112,,"Akzente : [Zeitschrift für literatur]",0002-3957,"Jhr.54(2007) - Heft 4/9421"
+FFSF,2221,50113,RARITETI,"Luna : Belletristisches Beiblatt der Agramer Zeitung",1333-5820,"Knj. 1838 - 1857"
+,,50116,,"Godišnjak Ogranka Matice hrvatske Beli Manastir",1845-044X,
+FFAN,,50304,,Scrutiny,,