extract path from full_path if missing
[MojoFacets.git] / lib / MojoFacets / Import / HTMLTable.pm
index a575635..a662707 100644 (file)
@@ -9,7 +9,18 @@ use HTML::TableExtract;
 use File::Slurp;
 use Data::Dump qw(dump);
 
-__PACKAGE__->attr('dir');
+__PACKAGE__->attr('full_path');
+
+sub ext { '\.html$' => 'directory' }
+
+sub __normalize_header {
+       map {
+               s/^\s+//s;
+               s/\s+$//s;
+               s/\s\s+/ /gs;
+               $_;
+       } @_
+}
 
 sub data {
        my $self = shift;
@@ -18,7 +29,7 @@ sub data {
        my $stats;
        my @header;
 
-       foreach my $file ( glob $self->dir . '/*.html' ) {
+       foreach my $file ( glob $self->full_path . '/*.html' ) {
                warn "# file $file\n";
                my $te = HTML::TableExtract->new(
                        keep_headers => 1,
@@ -36,33 +47,44 @@ sub data {
                                warn "# row ", dump( $row ),"\n";
                                if ( ! $stats->{$file} ) {
                                        if ( ! @header ) {
-                                               @header = @$row;
+                                               @header = __normalize_header( @$row );
                                                warn "# new header ",dump(@header);
+                                               $row = undef;
                                        } else {
                                                my $o = join('|', @header);
-                                               my $n = join('|', @$row);
+                                               my $n = join('|', __normalize_header(@$row));
                                                if ( $o eq $n ) {
                                                        warn "# same header again in $file skipping\n";
+                                                       $row = undef;
                                                } else {
                                                        warn "# header $n changed from $o in $file";
-                                                       push @$items, $row;
-                                                       $stats->{$file}++;
                                                }
                                        }
-                               } else {
-                                       push @$items, $row;
+                               };
+
+                               if ( $row ) {
+                                       my $item;
+                                       foreach my $i ( 0 .. $#$row ) {
+                                               $item->{ $header[$i] } = [ $row->[$i] ];
+                                       }
                                        $stats->{$file}++;
+                                       warn "## item ",$stats->{$file}, ' ', dump($item);
+                                       push @$items, $item;
+
                                }
                        }
                }
 
        }
 
-       return {
+       my $data = {
                header => [ @header ],
+               file_stats => $stats,
                items => $items,
-               stats => $stats,
-       }
+       };
+
+       warn "# data ",dump( $data );
+       return $data;
 }
 
 1