1 package MojoFacets::Import::HTMLTable;
8 use HTML::TableExtract;
10 use Data::Dump qw(dump);
12 __PACKAGE__->attr('full_path');
14 sub ext { '\.html$' => 'directory' }
16 sub __normalize_header {
32 foreach my $file ( glob $self->full_path . '/*.html' ) {
33 warn "# file $file\n";
34 my $te = HTML::TableExtract->new(
38 $te->parse( scalar read_file $file );
40 foreach my $ts ($te->tables) {
41 warn "# table coords ", join(',', $ts->coords), "\n";
42 warn "# hrow ", dump( $ts->hrow() ), "\n";
43 my @column_map = $ts->column_map;
44 warn "# column_map ", dump( @column_map );
45 next unless $#column_map == 8;
46 foreach my $row ($ts->rows) {
47 warn "# row ", dump( $row ),"\n";
48 if ( ! $stats->{$file} ) {
50 @header = __normalize_header( @$row );
51 warn "# new header ",dump(@header);
54 my $o = join('|', @header);
55 my $n = join('|', __normalize_header(@$row));
57 warn "# same header again in $file skipping\n";
60 warn "# header $n changed from $o in $file";
67 foreach my $i ( 0 .. $#$row ) {
68 $item->{ $header[$i] } = [ $row->[$i] ];
71 warn "## item ",$stats->{$file}, ' ', dump($item);
81 header => [ @header ],
86 warn "# data ",dump( $data );