926937f1eae488e0c7fb5903de98ca8b38ac16bf
[MojoFacets.git] / lib / MojoFacets / Import / File.pm
1 package MojoFacets::Import::File;
2
3 use warnings;
4 use strict;
5
6 use base 'Mojo::Base';
7
8 use HTML::TableExtract;
9 use File::Slurp;
10 use Data::Dump qw(dump);
11 use JSON;
12
13 __PACKAGE__->attr('path');
14 __PACKAGE__->attr('full_path');
15
16 sub ext { '\.(js(on)?|txt)$' }
17
18 sub data {
19         my $self = shift;
20
21         my $path = $self->path;
22         if ( ! $path ) {
23                 $path = $self->full_path || die "no path or full_path";
24                 $path =~ s{^.+/([^/]+)$}{$1};
25         }
26
27         # we could use Mojo::JSON here, but it's too slow
28 #       $data = from_json read_file $path;
29         my $data = read_file $self->full_path;
30         warn "# data snippet: ", substr($data,0,200);
31         my @header;
32         if ( $path =~ m/\.js(on)?/ ) {
33                 Encode::_utf8_on($data);
34                 $data = from_json $data;
35         } elsif ( $path =~ m/\.txt/ ) {
36                 my @lines = split(/\r?\n/, $data);
37                 $data = { items => [] };
38
39                 my $header_line = shift @lines;
40                 my $multiline = $header_line =~ s/\^//g;
41                 @header = split(/\|/, $header_line );
42                 warn "# header ", dump( @header );
43                 while ( my $line = shift @lines ) {
44                         $line =~ s/\^//g;
45                         chomp $line;
46                         my @v = split(/\|/, $line);
47                         while ( @lines && $#v < $#header ) {
48                                 $line = $lines[0];
49                                 $line =~ s/\^//g;
50                                 chomp $line;
51                                 my @more_v = split(/\|/, $line);
52                                 if ( $#v + $#more_v > $#header ) {
53                                         warn "short line: ",dump( @v );
54                                         last;
55                                 }
56                                 shift @lines;
57                                 $v[ $#v ] .= shift @more_v if @more_v;
58                                 push @v, @more_v if @more_v;
59
60                                 if ( $#v > $#header ) {
61                                         die "# splice $#header ", dump( @v );
62                                         @v = splice @v, 0, $#header;
63                                 }
64                         }
65                         my $item;
66                         foreach my $i ( 0 .. $#v ) {
67                                 $item->{ $header[$i] || "f_$i" } = [ $v[$i] ];
68                         }
69                         push @{ $data->{items} }, $item;
70                 }
71         } else {
72                 warn "file format unknown $path";
73         }
74
75         $data->{header} = [ @header ];
76         
77         return $data;
78
79 }
80
81 1