1 package MojoFacets::Import::CSV;
9 use Data::Dump qw(dump);
12 __PACKAGE__->attr('full_path');
14 my $null = ''; # FIXME undef?
17 my ( $delimiter, $line ) = @_;
21 if ( $line =~ s/^"// ) {
22 $line =~ s/""/_qq_/gc;
23 $line =~ s/^\s*([^"]*)\s*"\Q$delimiter\E?// || die "can't parse [$line] ",dump(@v);
25 } elsif ( $line =~ s/^\s*([^\Q$delimiter\E]+)\s*\Q$delimiter\E?// ) {
27 } elsif ( $line =~ s/^\s*\Q$delimiter\E// ) {
30 die "can't parse [$line]\n";
33 $v =~ s/^\s*(.+?)\s*$/$1/;
34 $v = $null if $v eq '_qq_'; # "" field which is not first one
46 my $path = $self->full_path;
48 my $data = read_file $path, { binmode => ':raw' }; # FIXME configurable!
49 my $encoding = 'utf-8';
50 if ( $path =~ m/\.(\w+).csv/i ) {
53 warn "decoding ", length($data), " bytes using $encoding\n";
54 $data = decode($encoding, $data);
57 while ( $data =~ s/(,"[^"]*)[\n\r]+([^"]*)/$1_LF_$2/sg ) {
58 warn "multi-line quoted CSV data found";
61 my @lines = split(/\r?\n/, $data);
62 $data = { items => [] };
66 if ( $lines[0] !~ m/,/ ) {
67 if ( $lines[0] =~ m/;/ ) {
69 } elsif ( $lines[0] !~ /;/ && $lines[1] =~ /;/ ) {
70 shift @lines; # FIXME skip non-header line
75 warn "$path ", $#lines + 1, " lines encoding: $encoding delimiter:",dump($delimiter);
77 my $header_line = shift @lines;
79 my @header = _split_line( $delimiter, $header_line );
80 warn "# header ",dump( @header );
82 while ( my $line = shift @lines ) {
84 my @v = _split_line($delimiter, $line);
86 foreach my $i ( 0 .. $#v ) {
87 $item->{ $header[$i] || "f_$i" } = [ $v[$i] ];
89 push @{ $data->{items} }, $item;
92 $data->{header} = [ @header ];