support multi-line "foo\nbar" CSV fields
authorDobrica Pavlinusic <dpavlin@rot13.org>
Wed, 12 Jan 2011 15:01:29 +0000 (16:01 +0100)
committerDobrica Pavlinusic <dpavlin@rot13.org>
Wed, 12 Jan 2011 15:01:29 +0000 (16:01 +0100)
lib/MojoFacets/Import/CSV.pm

index d879169..6ab3395 100644 (file)
@@ -20,7 +20,7 @@ sub _split_line {
                my $v;
                if ( $line =~ s/^"// ) {
                        $line =~ s/""/_qq_/gc;
-                       $line =~ s/^\s*([^"]+)\s*"\Q$delimiter\E?// || die "can't parse $line";
+                       $line =~ s/^\s*([^"]*)\s*"\Q$delimiter\E?// || die "can't parse [$line] ",dump(@v);
                        $v = $1;
                } elsif ( $line =~ s/^\s*([^\Q$delimiter\E]+)\s*\Q$delimiter\E?// ) {
                        $v = $1;
@@ -31,7 +31,9 @@ sub _split_line {
                }
 
                $v =~ s/^\s*(.+?)\s*$/$1/;
+               $v = $null if $v eq '_qq_'; # "" field which is not first one
                $v =~ s/_qq_/"/g;
+               $v =~ s/_LF_/\n/g;
                push @v, $v;
        }
 
@@ -51,6 +53,11 @@ sub data {
        warn "decoding ", length($data), " bytes using $encoding\n";
        $data = decode($encoding, $data);
 
+       # multi-line strings
+       while ( $data =~ s/(,"[^"]*)[\n\r]+([^"]*)/$1_LF_$2/sg ) {
+                warn "multi-line quoted CSV data found";
+       }
+
        my @lines = split(/\r?\n/, $data);
        $data = { items => [] };