projects
/
MojoFacets.git
/ commitdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
| commitdiff |
tree
raw
|
patch
|
inline
| side by side (parent:
65ae9cf
)
support multi-line "foo\nbar" CSV fields
author
Dobrica Pavlinusic
<dpavlin@rot13.org>
Wed, 12 Jan 2011 15:01:29 +0000
(16:01 +0100)
committer
Dobrica Pavlinusic
<dpavlin@rot13.org>
Wed, 12 Jan 2011 15:01:29 +0000
(16:01 +0100)
lib/MojoFacets/Import/CSV.pm
patch
|
blob
|
history
diff --git
a/lib/MojoFacets/Import/CSV.pm
b/lib/MojoFacets/Import/CSV.pm
index
d879169
..
6ab3395
100644
(file)
--- a/
lib/MojoFacets/Import/CSV.pm
+++ b/
lib/MojoFacets/Import/CSV.pm
@@
-20,7
+20,7
@@
sub _split_line {
my $v;
if ( $line =~ s/^"// ) {
$line =~ s/""/_qq_/gc;
my $v;
if ( $line =~ s/^"// ) {
$line =~ s/""/_qq_/gc;
- $line =~ s/^\s*([^"]
+)\s*"\Q$delimiter\E?// || die "can't parse $line"
;
+ $line =~ s/^\s*([^"]
*)\s*"\Q$delimiter\E?// || die "can't parse [$line] ",dump(@v)
;
$v = $1;
} elsif ( $line =~ s/^\s*([^\Q$delimiter\E]+)\s*\Q$delimiter\E?// ) {
$v = $1;
$v = $1;
} elsif ( $line =~ s/^\s*([^\Q$delimiter\E]+)\s*\Q$delimiter\E?// ) {
$v = $1;
@@
-31,7
+31,9
@@
sub _split_line {
}
$v =~ s/^\s*(.+?)\s*$/$1/;
}
$v =~ s/^\s*(.+?)\s*$/$1/;
+ $v = $null if $v eq '_qq_'; # "" field which is not first one
$v =~ s/_qq_/"/g;
$v =~ s/_qq_/"/g;
+ $v =~ s/_LF_/\n/g;
push @v, $v;
}
push @v, $v;
}
@@
-51,6
+53,11
@@
sub data {
warn "decoding ", length($data), " bytes using $encoding\n";
$data = decode($encoding, $data);
warn "decoding ", length($data), " bytes using $encoding\n";
$data = decode($encoding, $data);
+ # multi-line strings
+ while ( $data =~ s/(,"[^"]*)[\n\r]+([^"]*)/$1_LF_$2/sg ) {
+ warn "multi-line quoted CSV data found";
+ }
+
my @lines = split(/\r?\n/, $data);
$data = { items => [] };
my @lines = split(/\r?\n/, $data);
$data = { items => [] };