use base 'Mojo::Base';
-use File::Slurp;
+use Text::CSV;
use Data::Dump qw(dump);
-use Encode;
-__PACKAGE__->attr('path');
__PACKAGE__->attr('full_path');
+sub ext { '\.[ct]sv$' };
+
sub data {
my $self = shift;
- my $path = $self->path;
+ my $path = $self->full_path;
- my $data = read_file $self->full_path, { binmode => ':raw' }; # FIXME configurable!
- $data = decode('cp1250', $data);
+ my $encoding = 'utf-8';
+ if ( $path =~ m/\.([\w\-]+).[ct]sv/i ) {
+ $encoding = $1;
+ }
- my @lines = split(/\r?\n/, $data);
- $data = { items => [] };
+ my $data = { items => [] };
+ my @header;
- my $delimiter = qr/;/;
+ open my $fh, "<:encoding($encoding)", $path or die "$path: $!";
+ my $first = <$fh>;
+ my $possible_delimiters;
+ while ( $first =~ s/(\W)// ) {
+ $possible_delimiters->{$1}++;
+ }
+ warn "# possible_delimiters = ",dump($possible_delimiters);
+ seek $fh,0,0; # rewind for Text::CSV
- shift @lines; # FIXME ship non-header line
- my $header_line = shift @lines;
+ my @sep_by_usage = sort { $possible_delimiters->{$b} <=> $possible_delimiters->{$a} } keys %$possible_delimiters;
+ my $sep_char = shift @sep_by_usage;
+ while ( $sep_char =~ m/^\s$/ ) {
+ last if $sep_char eq "\t" && $path =~ m/\.tsv$/i;
+ warn "## skip whitespace separator ",dump($sep_char);
+ $sep_char = shift @sep_by_usage;
+ }
- my @header = split( $delimiter, $header_line );
- warn "# header ",dump( @header );
+ while ( $sep_char =~ m/^\"$/ ) {
+ warn "## skip quote separator ",dump($sep_char);
+ $sep_char = shift @sep_by_usage;
+ }
- while ( my $line = shift @lines ) {
- chomp $line;
- my @v = split($delimiter, $line);
+ if ( $sep_char !~ m/,/ && $possible_delimiters->{','} && $path =~ m/\.csv/i ) {
+ $sep_char = ',';
+ warn "## csv file detected so prefer , as separator";
+ }
+
+ warn "sep_char = [$sep_char] for $path\n";
+
+ my $csv = Text::CSV->new ( { binary => 1, eol => $/, sep_char => $sep_char } )
+ or die "Cannot use CSV: ".Text::CSV->error_diag ();
+
+ while ( my $row = $csv->getline( $fh ) ) {
+ if ( ! @header ) {
+ @header = @$row;
+ $header[0] =~ s/^#// if $path =~ m/\.tsv/i; # remove hash from 1st column
+ next;
+ }
my $item;
- foreach my $i ( 0 .. $#v ) {
- $item->{ $header[$i] || "f_$i" } = [ $v[$i] ];
+ foreach my $i ( 0 .. $#{$row} ) {
+ $item->{ $header[$i] || "f_$i" } = [ $row->[$i] ];
}
push @{ $data->{items} }, $item;
}
+ $csv->eof or $csv->error_diag();
+ close $fh;
+
$data->{header} = [ @header ];
return $data;