added header_first and ability to address cells by names from header

author Dobrica Pavlinusic <dpavlin@rot13.org>

Wed, 24 Jul 2019 11:01:30 +0000 (13:01 +0200)

committer Dobrica Pavlinusic <dpavlin@rot13.org>

Wed, 24 Jul 2019 11:01:30 +0000 (13:01 +0200)
author Dobrica Pavlinusic <dpavlin@rot13.org>
Wed, 24 Jul 2019 11:01:30 +0000 (13:01 +0200)
committer Dobrica Pavlinusic <dpavlin@rot13.org>
Wed, 24 Jul 2019 11:01:30 +0000 (13:01 +0200)
diff --git a/lib/WebPAC/Input/TSV.pm b/lib/WebPAC/Input/TSV.pm

index c4475a2..f1d911f 100644 (file)
--- a/lib/WebPAC/Input/TSV.pm
+++ b/lib/WebPAC/Input/TSV.pm
@@ -21,8 +21,11 @@ WebPAC::Input::TSV - tab separated values
  
    my $input = new WebPAC::Input::TSV(
         path => '/path/to/records.tsv',
+       header_first => 1,
    );
  
+C<header_first> will use first line as header names.
+
  =back
  
  Default encoding of input file is C<utf-8>
@@ -42,6 +45,13 @@ sub new {
  
         $self->{size} = 0;
  
+       if ( $self->{header_first} ) {
+               my $header = <$fh>;
+               chomp $header;
+               $self->{header_names} = [ split(/\t/,$header) ];
+               $self->debug( "header_names = ",dump( $self->{header_names} ) );
+       }
+
         while ( my $line = <$fh> ) {
                 chomp $line;
  
@@ -49,9 +59,16 @@ sub new {
                 $rec->{'000'} = [ ++$self->{size} ];
  
                 my $col = 'A';
+               my $header_pos = 0;
                 foreach my $v ( split(/\t/,$line) ) {
                         $rec->{ $col } = Encode::decode_utf8( $v ) if $v ne '\N';
                         $col++;
+
+                       if ( $self->{header_names} ) {
+                               $rec->{ $self->{header_names}->[$header_pos] } =
+                                       Encode::decode_utf8( $v ) if $v ne '\N';
+                               $header_pos++;
+                       }
                 }
  
                 push @{ $self->{_rec} }, $rec;
diff --git a/t/2-input-tsv.t b/t/2-input-tsv.t

index 71c684a..112172a 100755 (executable)
--- a/t/2-input-tsv.t
+++ b/t/2-input-tsv.t
@@ -3,7 +3,7 @@
  use strict;
  use lib 'lib';
  
-use Test::More tests => 63;
+use Test::More tests => 84;
  
  BEGIN {
  use_ok( 'WebPAC::Test' );
@@ -39,3 +39,17 @@ foreach my $mfn ( 1 ... $size ) {
         diag "rec: ", dump($rec), "\n" if $debug;
  }
  
+ok(my $db = $input->open(
+       path => "$abs_path/data/header.tsv",
+       header_first => 1,
+), "open header_first");
+ok(my $size = $input->size, "size");
+cmp_ok( $size, '==', 9, 'size one leas because of header_first' );
+
+foreach my $mfn ( 1 ... $size ) {
+       my $rec = $input->fetch;
+       ok($rec, "fetch $mfn");
+
+       ok( $rec->{'publication_title'}, 'has publication_title' );
+       diag "rec: ", dump($rec), "\n" if $debug;
+}
author	Dobrica Pavlinusic <dpavlin@rot13.org>
	Wed, 24 Jul 2019 11:01:30 +0000 (13:01 +0200)
committer	Dobrica Pavlinusic <dpavlin@rot13.org>
	Wed, 24 Jul 2019 11:01:30 +0000 (13:01 +0200)
lib/WebPAC/Input/TSV.pm		patch \| blob \| history
t/2-input-tsv.t		patch \| blob \| history