1 package MojoFacets::Import::File;
8 use HTML::TableExtract;
10 use Data::Dump qw(dump);
13 __PACKAGE__->attr('path');
14 __PACKAGE__->attr('full_path');
16 sub ext { '\.(js(on)?|txt)$' }
21 my $path = $self->path;
23 $path = $self->full_path || die "no path or full_path";
24 $path =~ s{^.+/([^/]+)$}{$1};
27 # we could use Mojo::JSON here, but it's too slow
28 # $data = from_json read_file $path;
29 my $data = read_file $self->full_path;
30 warn "# data snippet: ", substr($data,0,200);
32 if ( $path =~ m/\.js(on)?/ ) {
33 Encode::_utf8_on($data);
34 $data = from_json $data;
35 } elsif ( $path =~ m/\.txt/ ) {
36 my @lines = split(/\r?\n/, $data);
37 $data = { items => [] };
39 my $header_line = shift @lines;
40 my $multiline = $header_line =~ s/\^//g;
41 @header = split(/\|/, $header_line );
42 warn "# header ", dump( @header );
43 while ( my $line = shift @lines ) {
46 my @v = split(/\|/, $line);
47 while ( @lines && $#v < $#header ) {
51 my @more_v = split(/\|/, $line);
52 if ( $#v + $#more_v > $#header ) {
53 warn "short line: ",dump( @v );
57 $v[ $#v ] .= shift @more_v if @more_v;
58 push @v, @more_v if @more_v;
60 if ( $#v > $#header ) {
61 die "# splice $#header ", dump( @v );
62 @v = splice @v, 0, $#header;
66 foreach my $i ( 0 .. $#v ) {
67 $item->{ $header[$i] || "f_$i" } = [ $v[$i] ];
69 push @{ $data->{items} }, $item;
72 warn "file format unknown $path";
75 $data->{header} = [ @header ];