1 package MojoFacets::Import::HTMLTable;
8 use HTML::TableExtract;
10 use Data::Dump qw(dump);
12 __PACKAGE__->attr('dir');
14 sub __normalize_header {
30 foreach my $file ( glob $self->dir . '/*.html' ) {
31 warn "# file $file\n";
32 my $te = HTML::TableExtract->new(
36 $te->parse( scalar read_file $file );
38 foreach my $ts ($te->tables) {
39 warn "# table coords ", join(',', $ts->coords), "\n";
40 warn "# hrow ", dump( $ts->hrow() ), "\n";
41 my @column_map = $ts->column_map;
42 warn "# column_map ", dump( @column_map );
43 next unless $#column_map == 8;
44 foreach my $row ($ts->rows) {
45 warn "# row ", dump( $row ),"\n";
46 if ( ! $stats->{$file} ) {
48 @header = __normalize_header( @$row );
49 warn "# new header ",dump(@header);
52 my $o = join('|', @header);
53 my $n = join('|', __normalize_header(@$row));
55 warn "# same header again in $file skipping\n";
58 warn "# header $n changed from $o in $file";
65 foreach my $i ( 0 .. $#$row ) {
66 $item->{ $header[$i] } = [ $row->[$i] ];
69 warn "## item ",$stats->{$file}, ' ', dump($item);
79 header => [ @header ],
84 warn "# data ",dump( $data );