mark all columns used in code
[MojoFacets.git] / lib / MojoFacets / Data.pm
index 86a5286..32f532e 100644 (file)
@@ -7,12 +7,15 @@ use base 'Mojolicious::Controller';
 
 use Data::Dump qw(dump);
 use File::Slurp;
-use JSON;
 use Encode;
 use locale;
 use File::Find;
 use Storable;
 use Time::HiRes qw(time);
+use File::Path qw(mkpath);
+
+use MojoFacets::Import::File;
+use MojoFacets::Import::HTMLTable;
 
 our $loaded;
 our $filters;
@@ -20,31 +23,29 @@ our $filters;
 sub index {
        my $self = shift;
 
-       my $path = $self->app->home->rel_dir('data');
-       die "no data dir $path" unless -d $path;
+       my $data_dir = $self->app->home->rel_dir('data');
+       die "no data dir $data_dir" unless -d $data_dir;
 
        my @files;
-       my $edits;
+       my $changes;
        find( sub {
                my $file = $File::Find::name;
                if ( -f $file && $file =~ m/\.(js(on)?|txt)$/ ) {
-                       $file =~ s/$path\/*//;
+                       $file =~ s/$data_dir\/*//;
+                       push @files, $file;
+               } elsif ( -f $file && $file =~ m/([^\/]+)\.changes\/(\d+\.\d+.+)/ ) {
+                       push @{ $changes->{$1} }, $2
+               } elsif ( -d $file && $file =~ m/\.html$/ ) {
+                       $file =~ s/$data_dir\/*//;
                        push @files, $file;
-               } elsif ( -f $file && $file =~ m/([^\/]+)\.edits\/(\d+\.\d+.+)/ ) {
-                       push @{ $edits->{$1} }, $2
                } else {
                        warn "IGNORE: $file\n";
                }
-       }, $path);
+       }, $data_dir);
 
        @files = sort { lc $a cmp lc $b } @files;
        my $size;
-       $size->{$_} = -s "$path/$_" foreach @files;
-
-       if ( my $save_path = $self->session('save_path') ) {
-               $self->session( 'save_path' => 0 )
-               if ! defined $loaded->{$save_path};
-       }
+       $size->{$_} = -s "$data_dir/$_" foreach @files;
 
        $self->render(
                files => [ @files ],
@@ -52,7 +53,7 @@ sub index {
                loaded => $loaded,
                filters => $filters,
                dump_path => { map { $_ => $self->_dump_path($_) } @files },
-               edits => $edits,
+               changes => $changes,
        );
 }
 
@@ -129,7 +130,7 @@ sub __stats {
                                }
                                if ( $unique ) {
                                        $stats->{$n}->{unique} = 1;
-                                       warn "# $n unique ",dump( $unique );
+                                       #warn "# $n unique ",dump( $unique );
                                }
                        }
                }
@@ -172,66 +173,27 @@ sub _load_path {
                return;
        }
 
-       # we could use Mojo::JSON here, but it's too slow
-#      $data = from_json read_file $path;
-       my $data = read_file $full_path;
-       warn "# data snippet: ", substr($data,0,200);
-       my @header;
-       if ( $path =~ m/\.js/ ) {
-               Encode::_utf8_on($data);
-               $data = from_json $data;
-       } elsif ( $path =~ m/\.txt/ ) {
-               my @lines = split(/\r?\n/, $data);
-               $data = { items => [] };
-
-               my $header_line = shift @lines;
-               my $multiline = $header_line =~ s/\^//g;
-               @header = split(/\|/, $header_line );
-               warn "# header ", dump( @header );
-               while ( my $line = shift @lines ) {
-                       $line =~ s/\^//g;
-                       chomp $line;
-                       my @v = split(/\|/, $line);
-                       while ( @lines && $#v < $#header ) {
-                               $line = $lines[0];
-                               $line =~ s/\^//g;
-                               chomp $line;
-                               my @more_v = split(/\|/, $line);
-                               if ( $#v + $#more_v > $#header ) {
-                                       warn "short line: ",dump( @v );
-                                       last;
-                               }
-                               shift @lines;
-                               $v[ $#v ] .= shift @more_v if @more_v;
-                               push @v, @more_v if @more_v;
-
-                               if ( $#v > $#header ) {
-                                       die "# splice $#header ", dump( @v );
-                                       @v = splice @v, 0, $#header;
-                               }
-                       }
-                       my $item;
-                       foreach my $i ( 0 .. $#v ) {
-                               $item->{ $header[$i] || "f_$i" } = [ $v[$i] ];
-                       }
-                       push @{ $data->{items} }, $item;
-               }
+       my $data;
+       if ( -f $full_path ) {
+               $data = MojoFacets::Import::File->new( full_path => $full_path, path => $path )->data;
+       } elsif ( -d $full_path && $full_path =~ m/.html/ ) {
+               $data = MojoFacets::Import::HTMLTable->new( dir => $full_path )->data;
        } else {
-               warn "file format unknown $path";
+               die "can't load $full_path";
        }
 
-       my $stats = __stats( $data->{items} );
+       my @header;
 
-       if ( ! @header ) {
-               if ( defined $data->{header} ) {
-                       if ( ref $data->{header} eq 'ARRAY' ) {
-                               @header = @{ $data->{header} };
-                       } else {
-                               warn "header not array ", dump( $data->{header} );
-                       }
+       if ( defined $data->{header} ) {
+               if ( ref $data->{header} eq 'ARRAY' ) {
+                       @header = @{ $data->{header} };
+               } else {
+                       warn "header not array ", dump( $data->{header} );
                }
        }
 
+       my $stats = __stats( $data->{items} );
+
        @header =
                sort { $stats->{$b}->{count} <=> $stats->{$a}->{count} }
                grep { defined $stats->{$_}->{count} } keys %$stats
@@ -261,9 +223,10 @@ sub load {
 
        my $path = $self->param('path') || $self->redirect_to( '/data/index' );
        warn "# path $path\n";
-       $self->session('path' => $path);
        $self->_load_path( $path );
 
+       $self->session( 'path' => $path );
+
        my $redirect_to = '/data/items';
 
        $self->session( 'header' => $loaded->{$path}->{header} );
@@ -289,16 +252,33 @@ sub _loaded {
        my ( $self, $name ) = @_;
        my $path = $self->session('path') || $self->param('path');
        $self->redirect_to('/data/index') unless $path;
+
+       if ( defined $loaded->{$path}->{modified} && $loaded->{$path}->{modified} > 1 ) {
+               my $caller = (caller(1))[3];
+               if ( $caller =~ m/::edit/ ) {
+                       warn "rebuild stats for $path ignored caller $caller\n";
+               } else {
+                       warn "rebuild stats for $path FORCED by modified caller $caller\n";
+                       $loaded->{$path}->{stats} = __stats( $loaded->{$path}->{data}->{items} );
+                       $loaded->{$path}->{modified} = 1;
+               }
+       }
+
        if ( ! defined $loaded->{$path}->{$name} ) {
                warn "$path $name isn't loaded\n";
                $self->_load_path( $path );
-               $self->redirect_to('/data/index')
-                       unless defined $loaded->{$path}->{$name};
                if ( ! defined $loaded->{$path}->{stats} ) {
                        warn "rebuild stats for $path\n";
                        $loaded->{$path}->{stats} = __stats( $loaded->{$path}->{data}->{items} );
                }
+               if ( ! defined $loaded->{$path}->{$name} ) {
+                       warn "MISSING $name for $path\n";
+                       $self->redirect_to('/data/index')
+               }
        }
+
+       $self->session( 'modified' => $loaded->{$path}->{modified} );
+
        return $loaded->{$path}->{$name};
 }
 
@@ -320,10 +300,12 @@ sub _permanent_path {
 sub _export_path {
        my $self = shift;
        my $path = $self->_param_or_session('path');
-       my $dir = $self->app->home->rel_dir('public') . '/export/';
-       mkdir $dir unless -e $dir;
-       $dir .= $path;
-       mkdir $dir unless -e $dir;
+       if ( ! $path ) {
+               warn "no path in param or session";
+               return;
+       }
+       my $dir = $self->app->home->rel_dir('public') . "/export/$path";
+       mkpath $dir unless -e $dir;
        $dir . '/' . join('.', @_);
 }
 
@@ -390,7 +372,7 @@ sub _param_scalar {
                $self->session($name => $scalar);
        }
 
-       warn "# _perm_scalar $name ",dump $scalar;
+       warn "# _param_scalar $name ",dump $scalar;
        return $scalar;
 }
 
@@ -441,7 +423,9 @@ sub _filter_on_data {
        foreach my $i ( 0 .. $#$items ) {
 
                if ( defined $items->[$i]->{$name} ) {
-                       foreach my $v ( @{ $items->[$i]->{$name} } ) {
+                       my $row = $items->[$i]->{$name};
+                       $row = [ $row ] unless ref $row eq 'ARRAY'; # FIXME probably wrong place
+                       foreach my $v ( @$row ) {
                                if ( defined $filter_hash->{ $v } ) {
                                        $filtered_items->{$i}++;
                                }
@@ -538,7 +522,7 @@ sub _data_sorted_by {
                }
        } map {
                [ $nr++, exists $_->{$order} ? join('', @{$_->{$order}}) : $missing ]
-       } @{ $data->{items} }
+       } grep { ref $_->{$order} eq 'ARRAY' } @{ $data->{items} }
        ;
 
        warn "sorted: $order numeric: $numeric items: ", $#sorted + 1, "\n";
@@ -551,8 +535,12 @@ sub _data_sorted_by {
 sub items {
        my $self = shift;
 
+       if ( my $show = $self->param('id') ) {
+               $self->param('show', $show);
+               warn "show $show\n";
+       }
+
        my $path = $self->session('path');
-       $self->redirect_to('/data/index') unless defined $loaded->{ $path };
 
        my @columns = $self->_param_array('columns');
        $self->redirect_to('/data/columns') unless @columns;
@@ -565,6 +553,10 @@ sub items {
        # fix offset when changing limit
        $offset = int( $offset / $limit ) * $limit;
 
+       if ( ! grep { /^\Q$order\E$/ } @columns ) {
+               $order = $columns[0];
+               $self->session( order => $order );
+       }
        my $sorted = $self->_data_sorted_by( $order );
 
        my @filter_names;
@@ -619,17 +611,64 @@ sub items {
 
        warn "all_filters $all_filters produced ", $#$filtered + 1, " items\n" if $filtered;
 
-       my $sorted_items;
        my $data = $self->_loaded('data');
+
+       my $code = $self->_param_scalar('code','');
+       $code =~ s{[\r\n]+$}{}s;
+
+       my $commit = $self->param('commit');
+       my $test = $self->param('test');
+
+       my $cols_changed;
+
+       if ( $code && ( $test || $commit ) ) {
+               # XXX find columns used in code snippet and show them to user
+               foreach my $column ( $code =~ m/\$row->{(.+?)}/g ) {
+                       if ( $column =~ s/^(['"])// ) {
+                               $column =~ s/$1$//;
+                       }
+                       $cols_changed->{$column}++;
+                       next if grep { /$column/ } @columns;
+                       $cols_changed->{$column}++;
+                       unshift @columns, $column;
+                       if ( $commit ) {
+                               $self->session('columns', [ @columns ]);
+                               $loaded->{$path}->{columns} = [ @columns ];
+                               __path_modified( $path, 2 );
+                       }
+               }
+       }
+
+       if ( $commit ) {
+               warn "# commit on ", $#$filtered + 1, " items:\n$code\n";
+               foreach ( 0 .. $#$filtered ) {
+                       my $i = $filtered->[$_];
+                       my $row = $data->{items}->[$i];
+                       eval $code;
+               }
+               $code = '';
+       }
+
+       my $sorted_items;
        my $from_end = $sort eq 'd' ? $#$filtered : 0;
        foreach ( 0 .. $limit ) {
                my $i = $_ + $offset;
                last unless defined $filtered->[$i];
                $i = $from_end - $i if $from_end;
                my $id = $filtered->[$i];
-               push @$sorted_items,
-               my $item = $data->{items}->[ $id ];
-               $item->{_row_id} ||= $id;
+               my $row = $data->{items}->[ $id ];
+               if ( $code && $test ) {
+                       $row = Storable::dclone $row;
+                       eval $code;
+                       if ( $@ ) {
+                               warn "ERROR evaling\n$code\n$@";
+                               $self->stash('eval_error', $@) if $@;
+                       } else {
+                               warn "EVAL ",dump($row);
+                       }
+               }
+               $row->{_row_id} ||= $id;
+               push @$sorted_items, $row;
        }
 
        warn "# sorted_items ", $#$sorted_items + 1, " offset $offset limit $limit order $sort";
@@ -643,6 +682,8 @@ sub items {
                rows => $#$filtered + 1,
                numeric => { map { $_, $self->_is_numeric($_) } @columns },
                filters => $self->_current_filters,
+               code => $code,
+               cols_changed => $cols_changed,
        );
 
 }
@@ -762,9 +803,34 @@ sub facet {
        );
 }
 
+
+sub __invalidate_path_column {
+       my ( $path, $name ) = @_;
+
+       if ( defined $loaded->{$path}->{sorted}->{$name} ) {
+               delete $loaded->{$path}->{sorted}->{$name};
+               warn "# invalidate $path sorted $name\n";
+       }
+
+       foreach ( grep { m/$name/ } keys %{ $loaded->{$path}->{filtered} } ) {
+               delete $loaded->{$path}->{filtered}->{$_};
+               warn "# invalidate $path filtered $_\n";
+       }
+}
+
+sub __path_modified {
+       my ( $path, $value ) = @_;
+       $value = 1 unless defined $value;
+       
+       $loaded->{$path}->{modified}  = $value;
+
+       warn "# __path_modified $path $value\n";
+}
+
 sub edit {
        my $self = shift;
-       my $content = $self->param('content');
+       my $new_content = $self->param('new_content');
+       $new_content  ||= $self->param('content'); # backward compatibility with old actions
 
        my $i = $self->param('_row_id');
        die "invalid _row_id ",dump($i) unless $i =~ m/^\d+$/;
@@ -775,21 +841,21 @@ sub edit {
        my $data = $self->_loaded('data');
 
        if ( defined $loaded->{$path}->{data}->{items}->[$i] ) {
-               $content =~ s/^\s+//s;
-               $content =~ s/\s+$//s;
+               $new_content =~ s/^\s+//s;
+               $new_content =~ s/\s+$//s;
                my $v;
-               if ( $content =~ /\xB6/ ) {     # para
-                       $v = [ split(/\s*\xB6\s*/, $content) ];
+               if ( $new_content =~ /\xB6/ ) { # para
+                       $v = [ split(/\s*\xB6\s*/, $new_content) ];
                } else {
-                       $v = [ $content ];
+                       $v = [ $new_content ];
                }
 
                my $old = dump $loaded->{$path}->{data}->{items}->[$i]->{$name};
                my $new = dump $v;
                if ( $old ne $new
-                       && ! ( $old eq 'undef' && length($content) == 0 ) # new value empty, previous undef
+                       && ! ( $old eq 'undef' && length($new_content) == 0 ) # new value empty, previous undef
                ) {
-                       my $edit = {
+                       my $change = {
                                path => $path,
                                column => $name,
                                pos => $i,
@@ -803,43 +869,38 @@ sub edit {
                                        keys %{ $loaded->{$path}->{stats} }
                                },
                        };
-                       my $edit_path = $self->_permanent_path( 'edits' );
-                       mkdir $edit_path unless -d $edit_path;
-                       $edit_path .= '/' . $edit->{time};
-                       store $edit, $edit_path;
-                       utime $edit->{time}, $edit->{time}, $edit_path;
-                       warn "# $edit_path ", dump($edit);
-
-                       warn "# edit $path $i $old -> $new\n";
+                       my $change_path = $self->_permanent_path( 'changes' );
+                       mkdir $change_path unless -d $change_path;
+                       $change_path .= '/' . $change->{time};
+                       store $change, $change_path;
+                       utime $change->{time}, $change->{time}, $change_path;
+                       warn "# $change_path ", dump($change);
+
+                       warn "# change $path $i $old -> $new\n";
                        $loaded->{$path}->{data}->{items}->[$i]->{$name} = $v;
 
-                       if ( defined $loaded->{$path}->{sorted}->{$name} ) {
-                           delete $loaded->{$path}->{sorted}->{$name};
-                               warn "# invalidate $path sorted $name\n";
-                       }
-
-                       foreach ( grep { m/$name/ } keys %{ $loaded->{$path}->{filtered} } ) {
-                           delete $loaded->{$path}->{filtered}->{$_};
-                               warn "# invalidate $path filtered $_\n";
-                       }
+                       __invalidate_path_column( $path, $name );
 
                        $status = 201; # created
-                       $self->session('save_path' => $path);
+                       # modified = 2 -- force rebuild of stats
+                       __path_modified( $path, 2 );
+       
+                       $new_content = join("\xB6",@$v);
 
                } else {
                        warn "# unchanged $path $i $old\n";
                        $status = 304;
                }
        } else {
-               $content = "$path $i $name doesn't exist\n";
+               $new_content = "$path $i $name doesn't exist\n";
                $status = 404;
        }
 
-       warn "# edit $status $content";
+       warn "# edit $status ", dump $new_content;
 
        $self->render(
                status => $status,
-               content => $content,
+               new_content => scalar $new_content,
        );
 }
 
@@ -848,15 +909,30 @@ sub save {
        my $self = shift;
        my $path = $self->_param_or_session('path');
        my $dump_path = $self->_save( $path );
-       $self->session('save_path' => 0);
+       __path_modified( $path, 0 );
 
        $self->redirect_to( '/data/items' );
 }
 
 sub export {
        my $self = shift;
+
+       if ( my $import = $self->param('import') ) {
+
+               if ( $import =~ m{/filter\.(.+?)\..+} ) {
+                       my $name = $1;
+                       my @vals = map { chomp; $_ }
+                               read_file $self->app->home->rel_dir('public') . "/export/$import";
+                       $self->_remove_filter( $name );
+                       $self->_filter_on_data( $name, @vals );
+                       $self->session( 'offset' => 0 );
+                       $self->redirect_to('/data/items');
+               } else {
+                       warn "UNKNOWN IMPORT $import";
+               }
+       }
+
        $self->render( export => [
-               map { s{^.+/public/export/}{}; $_ }
                glob( $self->_export_path . '*' )
        ] );
 }