added header_first to WebPAC::Input::CSV
[webpac2] / lib / WebPAC / Input / CSV.pm
index 112b9f0..9f10579 100644 (file)
@@ -7,6 +7,7 @@ use WebPAC::Input;
 use base qw/WebPAC::Common/;
 
 use Text::CSV;
+use Encode;
 use Data::Dump qw/dump/;
 
 =head1 NAME
@@ -15,7 +16,7 @@ WebPAC::Input::CSV - support for CSV Export Format
 
 =cut
 
-our $VERSION = '0.01';
+our $VERSION = '0.02';
 
 =head1 FUNCTIONS
 
@@ -25,6 +26,7 @@ Returns new low-level input API object
 
   my $input = new WebPAC::Input::CSV(
        path => '/path/to/records.csv',
+       header_first => 1,
   );
 
 Options:
@@ -39,6 +41,8 @@ path to CSV file
 
 Default encoding of input file is C<utf-8>
 
+C<header_first> will use first line as header names.
+
 =cut
 
 sub new {
@@ -50,12 +54,18 @@ sub new {
 
        my $log = $self->_get_logger();
 
-       open( my $fh, '<:encoding(utf-8)', $arg->{path} ) || $log->logconfess("can't open $arg->{path}: $!");
+       open( my $fh, '<:raw', $arg->{path} ) || $log->logconfess("can't open $arg->{path}: $!");
 
        my $csv = Text::CSV->new({ binary => 1 });
 
        $self->{size} = 0;
 
+       if ( $self->{header_first} ) {
+               my $line = $csv->getline( $fh );
+               $self->{header_names} = $line;
+               $self->debug( "header_names = ",dump( $self->{header_names} ) );
+       }
+
        while ( 1 ) {
                my $line = $csv->getline( $fh );
                last if $csv->eof;
@@ -66,7 +76,28 @@ sub new {
                $rec->{'000'} = [ ++$self->{size} ];
 
                my $col = 'A';
-               $rec->{ $col++ } = $_ foreach @$line;
+               my $header_pos = 0;
+
+               foreach my $cell ( @$line ) {
+                       my $str = eval { Encode::decode_utf8( $cell ) };
+                       if ( $@ ) {
+                               if ( $@ =~ m/Cannot decode string with wide characters/ ) {
+                                       $str = $cell;
+                               } else {
+                                       die "ERROR: $@ in line ",dump( $line );
+                               }
+                       } else {
+                               utf8::upgrade( $cell );
+                               $str = $cell;
+                       }
+                               
+                       $rec->{ $col++ } = $str;
+
+                       if ( $self->{header_names} ) {
+                               $rec->{ $self->{header_names}->[$header_pos] } = $str;
+                               $header_pos++;
+                       }
+               }
 
                push @{ $self->{_rec} }, $rec;
 
@@ -86,9 +117,7 @@ Return record with ID C<$mfn> from database
 =cut
 
 sub fetch_rec {
-       my $self = shift;
-
-       my ( $mfn, $filter_coderef ) = @_;
+       my ( $self, $mfn, $filter_coderef ) = @_;
 
        return $self->{_rec}->[$mfn-1];
 }