use base qw/WebPAC::Common/;
use Text::CSV;
+use Encode;
use Data::Dump qw/dump/;
=head1 NAME
=cut
-our $VERSION = '0.01';
+our $VERSION = '0.02';
=head1 FUNCTIONS
my $input = new WebPAC::Input::CSV(
path => '/path/to/records.csv',
+ header_first => 1,
);
Options:
Default encoding of input file is C<utf-8>
+C<header_first> will use first line as header names.
+
=cut
sub new {
my $log = $self->_get_logger();
- open( my $fh, '<:encoding(utf-8)', $arg->{path} ) || $log->logconfess("can't open $arg->{path}: $!");
+ open( my $fh, '<:raw', $arg->{path} ) || $log->logconfess("can't open $arg->{path}: $!");
my $csv = Text::CSV->new({ binary => 1 });
$self->{size} = 0;
+ if ( $self->{header_first} ) {
+ my $line = $csv->getline( $fh );
+ $self->{header_names} = $line;
+ $self->debug( "header_names = ",dump( $self->{header_names} ) );
+ }
+
while ( 1 ) {
my $line = $csv->getline( $fh );
last if $csv->eof;
$rec->{'000'} = [ ++$self->{size} ];
my $col = 'A';
- $rec->{ $col++ } = $_ foreach @$line;
+ my $header_pos = 0;
+
+ foreach my $cell ( @$line ) {
+ my $str = eval { Encode::decode_utf8( $cell ) };
+ if ( $@ ) {
+ if ( $@ =~ m/Cannot decode string with wide characters/ ) {
+ $str = $cell;
+ } else {
+ die "ERROR: $@ in line ",dump( $line );
+ }
+ } else {
+ utf8::upgrade( $cell );
+ $str = $cell;
+ }
+
+ $rec->{ $col++ } = $str;
+
+ if ( $self->{header_names} ) {
+ $rec->{ $self->{header_names}->[$header_pos] } = $str;
+ $header_pos++;
+ }
+ }
push @{ $self->{_rec} }, $rec;
=cut
sub fetch_rec {
- my $self = shift;
-
- my ( $mfn, $filter_coderef ) = @_;
+ my ( $self, $mfn, $filter_coderef ) = @_;
return $self->{_rec}->[$mfn-1];
}