rebuild stats link on columns page
[MojoFacets.git] / lib / MojoFacets / Data.pm
index a5ae43f..bbe311c 100644 (file)
@@ -11,6 +11,8 @@ use JSON;
 use Encode;
 use locale;
 use File::Find;
+use Storable;
+use Time::HiRes qw(time);
 
 our $loaded;
 our $filters;
@@ -22,11 +24,14 @@ sub index {
        die "no data dir $path" unless -d $path;
 
        my @files;
+       my $edits;
        find( sub {
                my $file = $File::Find::name;
                if ( -f $file && $file =~ m/\.(js(on)?|txt)$/ ) {
                        $file =~ s/$path\/*//;
                        push @files, $file;
+               } elsif ( -f $file && $file =~ m/([^\/]+)\.edits\/(\d+\.\d+.+)/ ) {
+                       push @{ $edits->{$1} }, $2
                } else {
                        warn "IGNORE: $file\n";
                }
@@ -36,22 +41,137 @@ sub index {
        my $size;
        $size->{$_} = -s "$path/$_" foreach @files;
 
+       if ( my $save_path = $self->session('save_path') ) {
+               $self->session( 'save_path' => 0 )
+               if ! defined $loaded->{$save_path};
+       }
+
        $self->render(
                files => [ @files ],
                size => $size,
                loaded => $loaded,
                filters => $filters,
+               dump_path => { map { $_ => $self->_dump_path($_) } @files },
+               edits => $edits,
        );
 }
 
-sub _load_path {
+sub _dump_path {
+       my ( $self, $name ) = @_;
+       my $dir = $self->app->home->rel_dir('data');
+       $name =~ s/^$dir//;
+       $name =~ s/\/+/_/g;
+       return '/tmp/mojo_facets.' . $name . '.storable';
+}
+
+sub _save {
        my ( $self, $path ) = @_;
 
-       return if defined $loaded->{$path}->{data};
+       my $dump_path = $self->_dump_path( $path );
+       my $first_load = ! -e $dump_path;
+       warn "save loaded to $dump_path";
+       my $info = $loaded->{$path};
+       store $info, $dump_path;
+
+       if ( $first_load ) {
+               my $mtime = $loaded->{$path}->{mtime};
+               utime $mtime, $mtime, $dump_path;
+               warn "sync time to $path at $mtime\n";
+       }
+
+       warn $dump_path, ' ', -s $dump_path, " bytes\n";
+       return $dump_path;
+}
+
+
+sub __stats {
+
+       my $stats;
+
+       my $nr_items = $#{ $_[0] } + 1;
+
+       warn "__stats $nr_items\n";
+
+       foreach my $e ( @{ $_[0] } ) {
+               foreach my $n ( keys %$e ) {
+                       $stats->{$n}->{count}++;
+                       my @v;
+                       if ( ref $e->{$n} eq 'ARRAY' ) {
+                               $stats->{$n}->{array} += $#{ $e->{$n} } + 1;
+                               @v = @{ $e->{$n} };
+                       } else {
+                               @v = ( $e->{$n} );
+                       }
+
+                       foreach my $x ( @v ) {
+                               $stats->{$n}->{numeric}++
+                                       if $x =~ m/^[-+]?([0-9]*\.[0-9]+|[0-9]+)$/;
+                               $stats->{$n}->{empty}++
+                                       if length $x == 0; # faster than $x =~ m/^\s*$/;
+                       }
+
+               }
+       }
+
+       foreach my $n ( keys %$stats ) {
+               my $s = $stats->{$n};
+               next unless defined $s->{array};
+               if ( $s->{array} == $s->{count} ) {
+                       delete $s->{array};
+                       if ( $s->{count} == $nr_items ) {
+                               warn "check $n for uniqeness\n";
+                               my $unique;
+                               foreach my $e ( @{ $_[0] } ) {
+                                       if ( ++$unique->{ $e->{$n}->[0] } == 2 ) {
+                                               $unique = 0;
+                                               last;
+                                       }
+                               }
+                               if ( $unique ) {
+                                       $stats->{$n}->{unique} = 1;
+                                       warn "# $n unique ",dump( $unique );
+                               }
+                       }
+               }
+       }
+
+       warn "# __stats ",dump($stats);
+
+       return $stats;
+}
+
+sub _param_or_session {
+       $_[0]->param( $_[1] ) || $_[0]->session( $_[1] )
+}
+
+sub stats {
+       my $self = shift;
+       my $path = $self->_param_or_session('path');
+       warn "stats $path\n";
+       delete $loaded->{$path}->{stats};
+       $self->redirect_to( '/data/columns' );
+}
+
+
+sub _load_path {
+       my ( $self, $path ) = @_;
 
        my $full_path = $self->app->home->rel_file( 'data/' . $path );
        die "$full_path $!" unless -r $full_path;
 
+       my $dump_path = $self->_dump_path( $path );
+
+       if ( defined $loaded->{$path}->{data} ) {
+               my $mtime = (stat($full_path))[9];
+               return if $loaded->{$path}->{mtime} == $mtime;
+               warn "reload $full_path, modified ", time() - $mtime, " seconds ago\n";
+       } elsif ( -e $dump_path ) {
+               warn "dump_path $dump_path ", -s $dump_path, " bytes loading...\n";
+               my $info = retrieve $dump_path;
+               $loaded->{ $path } = $info;
+               return;
+       }
+
        # we could use Mojo::JSON here, but it's too slow
 #      $data = from_json read_file $path;
        my $data = read_file $full_path;
@@ -91,41 +211,16 @@ sub _load_path {
                                }
                        }
                        my $item;
-                       $item->{ $header[$_] || "f_$_" } = [ $v[$_] ] foreach ( 0 .. $#v );
+                       foreach my $i ( 0 .. $#v ) {
+                               $item->{ $header[$i] || "f_$i" } = [ $v[$i] ];
+                       }
                        push @{ $data->{items} }, $item;
                }
        } else {
                warn "file format unknown $path";
        }
 
-       my $stats;
-
-       foreach my $e ( @{ $data->{items} } ) {
-               foreach my $n ( keys %$e ) {
-                       $stats->{$n}->{count}++;
-                       my @v;
-                       if ( ref $e->{$n} eq 'ARRAY' ) {
-                               $stats->{$n}->{array} += $#{ $e->{$n} } + 1;
-                               @v = @{ $e->{$n} };
-                       } else {
-                               @v = ( $e->{$n} );
-                       }
-
-                       foreach my $x ( @v ) {
-                               $stats->{$n}->{numeric}++
-                                       if $x =~ m/^[-+]?([0-9]*\.[0-9]+|[0-9]+)$/;
-                               $stats->{$n}->{empty}++
-                                       if length $x == 0; # faster than $x =~ m/^\s*$/;
-                       }
-
-               }
-       }
-
-       foreach my $n ( keys %$stats ) {
-               next unless defined $stats->{$n}->{array};
-               delete $stats->{$n}->{array}
-                       if $stats->{$n}->{array} == $stats->{$n}->{count};
-       }
+       my $stats = __stats( $data->{items} );
 
        if ( ! @header ) {
                if ( defined $data->{header} ) {
@@ -142,16 +237,18 @@ sub _load_path {
                grep { defined $stats->{$_}->{count} } keys %$stats
                unless @header;
 
-       warn dump($stats);
-
-       $loaded->{ $path } = {
+       my $info = {
                header => [ @header ],
                stats  => $stats,
                full_path => $full_path,
                size => -s $full_path,
+               mtime => (stat($full_path))[9],
                data => $data,
        };
 
+       $loaded->{ $path } = $info;
+       $self->_save( $path );
+
 }
 
 
@@ -167,24 +264,41 @@ sub load {
        $self->session('path' => $path);
        $self->_load_path( $path );
 
+       my $redirect_to = '/data/items';
+
        $self->session( 'header' => $loaded->{$path}->{header} );
        if ( ! defined $loaded->{$path}->{columns} ) {
-               $self->session( 'columns' => $loaded->{$path}->{header} );
-               $self->session( 'order'   => $loaded->{$path}->{header}->[0] );
-               $self->redirect_to( '/data/columns' );
-       } else {
-               $self->session( 'columns' => $loaded->{$path}->{columns} );
-               $self->session( 'order'   => $loaded->{$path}->{columns}->[0] );
-               $self->redirect_to( '/data/items' );
+               my $columns_path = $self->_permanent_path( 'columns' );
+               if ( -e $columns_path ) {
+                       my @columns = map { s/[\r\n]+$//; $_ } read_file $columns_path;
+                       $loaded->{$path}->{columns} = [ @columns ];
+                       warn "# columns_path $columns_path ",dump(@columns);
+               } else {
+                       $loaded->{$path}->{columns} = $loaded->{$path}->{header}
+               }
+
+               $redirect_to = '/data/columns';
        }
+       $self->session( 'columns' => $loaded->{$path}->{columns} );
+       $self->session( 'order'   => $loaded->{$path}->{columns}->[0] );
+       $self->redirect_to( $redirect_to );
 }
 
 
 sub _loaded {
        my ( $self, $name ) = @_;
-       my $path = $self->session('path');
-       die "$path $name doesn't exist in loaded ",dump( $loaded )
-               unless defined $loaded->{$path}->{$name};
+       my $path = $self->session('path') || $self->param('path');
+       $self->redirect_to('/data/index') unless $path;
+       if ( ! defined $loaded->{$path}->{$name} ) {
+               warn "$path $name isn't loaded\n";
+               $self->_load_path( $path );
+               $self->redirect_to('/data/index')
+                       unless defined $loaded->{$path}->{$name};
+               if ( ! defined $loaded->{$path}->{stats} ) {
+                       warn "rebuild stats for $path\n";
+                       $loaded->{$path}->{stats} = __stats( $loaded->{$path}->{data}->{items} );
+               }
+       }
        return $loaded->{$path}->{$name};
 }
 
@@ -197,16 +311,32 @@ sub _checked {
        return $checked;
 }
 
+sub _permanent_path {
+       my $self = shift;
+       my $path = $self->_param_or_session('path');
+       $self->app->home->rel_dir('data') . '/' . join('.', $path, @_);
+}
+
+sub _export_path {
+       my $self = shift;
+       my $path = $self->_param_or_session('path');
+       my $dir = $self->app->home->rel_dir('public') . '/export/';
+       mkdir $dir unless -e $dir;
+       $dir .= $path;
+       mkdir $dir unless -e $dir;
+       $dir . '/' . join('.', @_);
+}
 
 sub columns {
     my $self = shift;
 
        if ( $self->param('columns') ) {
-               $self->_perm_array('columns');
+               my @columns = $self->_param_array('columns');
+               write_file( $self->_permanent_path( 'columns' ), map { "$_\n" } @columns );
                $self->redirect_to('/data/items');
        }
 
-       my $stats = $self->_loaded( 'stats' ); # || $self->redirect_to( '/data/index' );
+       my $stats = $self->_loaded( 'stats' );
 
        my @columns;
        @columns = grep { defined $stats->{$_}->{count} } @{ $self->session('columns') } if $self->session('columns');
@@ -219,11 +349,11 @@ sub columns {
                message => 'Select columns to display',
                stats => $stats,
                columns => \@columns,
-               checked => $self->_checked( $self->_perm_array('columns') ),
+               checked => $self->_checked( $self->_param_array('columns') ),
        );
 }
 
-sub _perm_array {
+sub _param_array {
     my ($self,$name) = @_;
 
        my @array = $self->param($name);
@@ -243,7 +373,7 @@ sub _perm_array {
        return @array;
 }
 
-sub _perm_scalar {
+sub _param_scalar {
     my ($self,$name,$default) = @_;
 
        my $scalar = $self->param($name);
@@ -271,7 +401,14 @@ sub filter {
        my @vals = $self->param('filter_vals');
 
        $self->_remove_filter( $name );
-       $self->_filter_on_data( $name, @vals ) if @vals;
+       if ( @vals ) {
+               $self->_filter_on_data( $name, @vals );
+               if ( my $permanent = $self->param('_permanent') ) {
+                       my $permanent_path = $self->_export_path( 'filter', $name, $permanent );
+                       write_file $permanent_path, map { "$_\n" } @vals;
+                       warn "permanent filter $permanent_path ", -s $permanent_path;
+               }
+       }
 
        $self->session( 'offset' => 0 );
 
@@ -417,13 +554,13 @@ sub items {
        my $path = $self->session('path');
        $self->redirect_to('/data/index') unless defined $loaded->{ $path };
 
-       my @columns = $self->_perm_array('columns');
+       my @columns = $self->_param_array('columns');
        $self->redirect_to('/data/columns') unless @columns;
-       my $order   = $self->_perm_scalar('order', $columns[0]);
-       my $sort    = $self->_perm_scalar('sort', 'a');
-       my $offset  = $self->_perm_scalar('offset', 0);
-       my $limit   = $self->_perm_scalar('limit', 20);
-       $self->_perm_scalar('show', 'table');
+       my $order   = $self->_param_scalar('order', $columns[0]);
+       my $sort    = $self->_param_scalar('sort', 'a');
+       my $offset  = $self->_param_scalar('offset', 0);
+       my $limit   = $self->_param_scalar('limit', 20);
+       $self->_param_scalar('show', 'table');
 
        # fix offset when changing limit
        $offset = int( $offset / $limit ) * $limit;
@@ -489,7 +626,10 @@ sub items {
                my $i = $_ + $offset;
                last unless defined $filtered->[$i];
                $i = $from_end - $i if $from_end;
-               push @$sorted_items, $data->{items}->[ $filtered->[$i] ];
+               my $id = $filtered->[$i];
+               push @$sorted_items,
+               my $item = $data->{items}->[ $id ];
+               $item->{_row_id} ||= $id;
        }
 
        warn "# sorted_items ", $#$sorted_items + 1, " offset $offset limit $limit order $sort";
@@ -559,7 +699,7 @@ sub facet {
        my $facet;
        my $name = $self->param('name') || die "no name";
 
-       my $all = $self->_perm_scalar('all', 1);
+       my $all = $self->_param_scalar('all', 1);
        my $data = $self->_loaded('data');
 
        my $filters = $self->_current_filters;
@@ -598,19 +738,23 @@ sub facet {
        my $numeric = $self->_is_numeric($name);
 
        my $sort = $self->param('sort');
-       $sort ||= $numeric ? 'a' : 'c';
+       # sort numeric facets with more than 5 values ascending
+       $sort ||= $numeric && $#facet_names > 4 ? 'a' : 'c';
 
        @facet_names = sort {
-               if ( $sort =~ m/a/i ) {
-                       $numeric ? $a <=> $b : lc $a cmp lc $b;
-               } elsif ( $sort =~ m/d/i ) {
-                       $numeric ? $b <=> $a : lc $b cmp lc $a;
-               } elsif ( $sort =~ m/c/i ) {
-                       ( $facet->{$b} || -1 ) <=> ( $facet->{$a} || -1 )
+               my $result;
+               if ( $sort eq 'a' ) {
+                       $result = $numeric ? $a <=> $b : lc $a cmp lc $b;
+               } elsif ( $sort eq 'd' ) {
+                       $result = $numeric ? $b <=> $a : lc $b cmp lc $a;
+               } elsif ( $sort eq 'c' ) {
+                       $result = ( $facet->{$b} || -1 ) <=> ( $facet->{$a} || -1 )
                } else {
                        warn "unknown sort: $sort";
-                       $a cmp $b;
+                       $result = $a cmp $b;
                }
+               $result = $a cmp $b unless defined $result; # FIXME cludge for numeric facets with invalid data
+               $result;
        } @facet_names;
 
        $self->render( name => $name, facet => $facet, checked => $checked,
@@ -622,9 +766,99 @@ sub edit {
        my $self = shift;
        my $content = $self->param('content');
 
+       my $i = $self->param('_row_id');
+       die "invalid _row_id ",dump($i) unless $i =~ m/^\d+$/;
+       my $path = $self->param('path') || die "no path";
+       my $name = $self->param('name') || die "no name";
+       my $status = 200; # 200 = OK, 201 = Created
+
+       my $data = $self->_loaded('data');
+
+       if ( defined $loaded->{$path}->{data}->{items}->[$i] ) {
+               $content =~ s/^\s+//s;
+               $content =~ s/\s+$//s;
+               my $v;
+               if ( $content =~ /\xB6/ ) {     # para
+                       $v = [ split(/\s*\xB6\s*/, $content) ];
+               } else {
+                       $v = [ $content ];
+               }
+
+               my $old = dump $loaded->{$path}->{data}->{items}->[$i]->{$name};
+               my $new = dump $v;
+               if ( $old ne $new
+                       && ! ( $old eq 'undef' && length($content) == 0 ) # new value empty, previous undef
+               ) {
+                       my $edit = {
+                               path => $path,
+                               column => $name,
+                               pos => $i,
+                               old => $loaded->{$path}->{data}->{items}->[$i]->{$name},
+                               new => $v,
+                               time => $self->param('time') || time(),
+                               user => $self->param('user') || $ENV{'LOGNAME'},
+                               unique => {
+                                       map { $_ => $loaded->{$path}->{data}->{items}->[$i]->{$_}->[0] }
+                                       grep { defined $loaded->{$path}->{stats}->{$_}->{unique} }
+                                       keys %{ $loaded->{$path}->{stats} }
+                               },
+                       };
+                       my $edit_path = $self->_permanent_path( 'edits' );
+                       mkdir $edit_path unless -d $edit_path;
+                       $edit_path .= '/' . $edit->{time};
+                       store $edit, $edit_path;
+                       utime $edit->{time}, $edit->{time}, $edit_path;
+                       warn "# $edit_path ", dump($edit);
+
+                       warn "# edit $path $i $old -> $new\n";
+                       $loaded->{$path}->{data}->{items}->[$i]->{$name} = $v;
+
+                       if ( defined $loaded->{$path}->{sorted}->{$name} ) {
+                           delete $loaded->{$path}->{sorted}->{$name};
+                               warn "# invalidate $path sorted $name\n";
+                       }
+
+                       foreach ( grep { m/$name/ } keys %{ $loaded->{$path}->{filtered} } ) {
+                           delete $loaded->{$path}->{filtered}->{$_};
+                               warn "# invalidate $path filtered $_\n";
+                       }
+
+                       $status = 201; # created
+                       $self->session('save_path' => $path);
+
+               } else {
+                       warn "# unchanged $path $i $old\n";
+                       $status = 304;
+               }
+       } else {
+               $content = "$path $i $name doesn't exist\n";
+               $status = 404;
+       }
+
+       warn "# edit $status $content";
+
        $self->render(
-               content => $content
+               status => $status,
+               content => $content,
        );
 }
 
+
+sub save {
+       my $self = shift;
+       my $path = $self->_param_or_session('path');
+       my $dump_path = $self->_save( $path );
+       $self->session('save_path' => 0);
+
+       $self->redirect_to( '/data/items' );
+}
+
+sub export {
+       my $self = shift;
+       $self->render( export => [
+               map { s{^.+/public/export/}{}; $_ }
+               glob( $self->_export_path . '*' )
+       ] );
+}
+
 1;