X-Git-Url: http://git.rot13.org/?a=blobdiff_plain;f=lib%2FMojoFacets%2FImport%2FCSV.pm;h=ae8a518fd1eea508ab17e26749867bd0b7c5a173;hb=6e4a0f563ef329240b1b6bf443291428e5f42ccc;hp=9a3b45c07588fb2526b4693dce5041dd0a311819;hpb=b7920d18094bb96fb8b94736552bd76200916a77;p=MojoFacets.git diff --git a/lib/MojoFacets/Import/CSV.pm b/lib/MojoFacets/Import/CSV.pm index 9a3b45c..ae8a518 100644 --- a/lib/MojoFacets/Import/CSV.pm +++ b/lib/MojoFacets/Import/CSV.pm @@ -12,6 +12,26 @@ use Encode; __PACKAGE__->attr('path'); __PACKAGE__->attr('full_path'); # FIXME remove full_path +my $null = ''; # FIXME undef? + +sub _split_line { + my ( $delimiter, $line ) = @_; + my @v; + while ( $line ) { + if ( $line =~ s/^"([^"]+)"\Q$delimiter\E?// ) { + push @v, $1; + } elsif ( $line =~ s/^([^\Q$delimiter\E]+)\Q$delimiter\E?// ) { + push @v, $1; + } elsif ( $line =~ s/^\Q$delimiter\E// ) { + push @v, $null; + } else { + die "can't parse [$line]\n"; + } + } + + return @v; +} + sub data { my $self = shift; @@ -21,30 +41,30 @@ sub data { my $encoding = 'utf-8'; if ( $path =~ m/\.(\w+).csv/i ) { $encoding = $1; - warn "decoding ", length($data), " bytes using $encoding\n"; - $data = decode($encoding, $data); } + warn "decoding ", length($data), " bytes using $encoding\n"; + $data = decode($encoding, $data); my @lines = split(/\r?\n/, $data); $data = { items => [] }; - my $delimiter = qr/,/; + my $delimiter = ','; if ( $lines[0] !~ /;/ && $lines[1] =~ /;/ ) { shift @lines; # FIXME ship non-header line - $delimiter = qr/;/; + $delimiter = ';'; } warn "$path ", $#lines + 1, " lines encoding: $encoding delimiter:",dump($delimiter); my $header_line = shift @lines; - my @header = split( $delimiter, $header_line ); + my @header = _split_line( $delimiter, $header_line ); warn "# header ",dump( @header ); while ( my $line = shift @lines ) { chomp $line; - my @v = split($delimiter, $line); + my @v = _split_line($delimiter, $line); my $item; foreach my $i ( 0 .. $#v ) { $item->{ $header[$i] || "f_$i" } = [ $v[$i] ];