X-Git-Url: http://git.rot13.org/?a=blobdiff_plain;f=lib%2FMojoFacets%2FData.pm;h=d74763fccb3c4edc51ead1ed888d2dcfda1800fb;hb=eecf7c8888bde22f9518b84b1ad9b8438f88306e;hp=dffc48bc25addbee8663092ed36bb049a56024f7;hpb=2e6d14413341c393e2f39b3eae1c045ad9de8d17;p=MojoFacets.git diff --git a/lib/MojoFacets/Data.pm b/lib/MojoFacets/Data.pm index dffc48b..d74763f 100644 --- a/lib/MojoFacets/Data.pm +++ b/lib/MojoFacets/Data.pm @@ -7,12 +7,15 @@ use base 'Mojolicious::Controller'; use Data::Dump qw(dump); use File::Slurp; -use JSON; use Encode; use locale; use File::Find; use Storable; use Time::HiRes qw(time); +use File::Path qw(mkpath); + +use MojoFacets::Import::File; +use MojoFacets::Import::HTMLTable; our $loaded; our $filters; @@ -20,31 +23,30 @@ our $filters; sub index { my $self = shift; - my $path = $self->app->home->rel_dir('data'); - die "no data dir $path" unless -d $path; + my $data_dir = $self->app->home->rel_dir('data'); + die "no data dir $data_dir" unless -d $data_dir; my @files; - my $edits; + my $changes; find( sub { my $file = $File::Find::name; if ( -f $file && $file =~ m/\.(js(on)?|txt)$/ ) { - $file =~ s/$path\/*//; + $file =~ s/$data_dir\/*//; + push @files, $file; + } elsif ( -f $file && $file =~ m/([^\/]+)\.changes\/(\d+\.\d+.+)/ ) { + push @{ $changes->{$1} }, $2 + } elsif ( -d $file && $file =~ m/\.html$/ ) { + $file =~ s/$data_dir\/*//; push @files, $file; - } elsif ( -f $file && $file =~ m/([^\/]+)\.edits\/(\d+\.\d+.+)/ ) { - push @{ $edits->{$1} }, $2 } else { - warn "IGNORE: $file\n"; + #warn "IGNORE: $file\n"; } - }, $path); + }, $data_dir); - @files = sort { lc $a cmp lc $b } @files; + @files = sort { $loaded->{$b}->{mtime} <=> $loaded->{$a}->{mtime} || lc $a cmp lc $b } @files, + grep { defined $loaded->{$_}->{generated} } keys %$loaded; my $size; - $size->{$_} = -s "$path/$_" foreach @files; - - if ( my $save_path = $self->session('save_path') ) { - $self->session( 'save_path' => 0 ) - if ! defined $loaded->{$save_path}; - } + $size->{$_} = -s "$data_dir/$_" foreach @files; $self->render( files => [ @files ], @@ -52,7 +54,7 @@ sub index { loaded => $loaded, filters => $filters, dump_path => { map { $_ => $self->_dump_path($_) } @files }, - edits => $edits, + changes => $changes, ); } @@ -90,6 +92,8 @@ sub __stats { my $nr_items = $#{ $_[0] } + 1; + warn "__stats $nr_items\n"; + foreach my $e ( @{ $_[0] } ) { foreach my $n ( keys %$e ) { $stats->{$n}->{count}++; @@ -127,7 +131,7 @@ sub __stats { } if ( $unique ) { $stats->{$n}->{unique} = 1; - warn "# $n unique ",dump( $unique ); + #warn "# $n unique ",dump( $unique ); } } } @@ -145,7 +149,8 @@ sub _param_or_session { sub stats { my $self = shift; my $path = $self->_param_or_session('path'); - $loaded->{$path}->{stats} = __stats( $loaded->{$path}->{data}->{items} ); + warn "stats $path\n"; + delete $loaded->{$path}->{stats}; $self->redirect_to( '/data/columns' ); } @@ -153,6 +158,8 @@ sub stats { sub _load_path { my ( $self, $path ) = @_; + return if defined $loaded->{$path}->{'generated'}; + my $full_path = $self->app->home->rel_file( 'data/' . $path ); die "$full_path $!" unless -r $full_path; @@ -169,66 +176,27 @@ sub _load_path { return; } - # we could use Mojo::JSON here, but it's too slow -# $data = from_json read_file $path; - my $data = read_file $full_path; - warn "# data snippet: ", substr($data,0,200); - my @header; - if ( $path =~ m/\.js/ ) { - Encode::_utf8_on($data); - $data = from_json $data; - } elsif ( $path =~ m/\.txt/ ) { - my @lines = split(/\r?\n/, $data); - $data = { items => [] }; - - my $header_line = shift @lines; - my $multiline = $header_line =~ s/\^//g; - @header = split(/\|/, $header_line ); - warn "# header ", dump( @header ); - while ( my $line = shift @lines ) { - $line =~ s/\^//g; - chomp $line; - my @v = split(/\|/, $line); - while ( @lines && $#v < $#header ) { - $line = $lines[0]; - $line =~ s/\^//g; - chomp $line; - my @more_v = split(/\|/, $line); - if ( $#v + $#more_v > $#header ) { - warn "short line: ",dump( @v ); - last; - } - shift @lines; - $v[ $#v ] .= shift @more_v if @more_v; - push @v, @more_v if @more_v; - - if ( $#v > $#header ) { - die "# splice $#header ", dump( @v ); - @v = splice @v, 0, $#header; - } - } - my $item; - foreach my $i ( 0 .. $#v ) { - $item->{ $header[$i] || "f_$i" } = [ $v[$i] ]; - } - push @{ $data->{items} }, $item; - } + my $data; + if ( -f $full_path ) { + $data = MojoFacets::Import::File->new( full_path => $full_path, path => $path )->data; + } elsif ( -d $full_path && $full_path =~ m/.html/ ) { + $data = MojoFacets::Import::HTMLTable->new( dir => $full_path )->data; } else { - warn "file format unknown $path"; + die "can't load $full_path"; } - my $stats = __stats( $data->{items} ); + my @header; - if ( ! @header ) { - if ( defined $data->{header} ) { - if ( ref $data->{header} eq 'ARRAY' ) { - @header = @{ $data->{header} }; - } else { - warn "header not array ", dump( $data->{header} ); - } + if ( defined $data->{header} ) { + if ( ref $data->{header} eq 'ARRAY' ) { + @header = @{ $data->{header} }; + } else { + warn "header not array ", dump( $data->{header} ); } } + my $stats = __stats( $data->{items} ); + @header = sort { $stats->{$b}->{count} <=> $stats->{$a}->{count} } grep { defined $stats->{$_}->{count} } keys %$stats @@ -252,15 +220,24 @@ sub _load_path { sub load { my $self = shift; + my $path = $self->param('path') || $self->redirect_to( '/data/index' ); + my @paths = $self->param('paths'); warn "# paths ", dump @paths; + + foreach my $p ( keys %$loaded ) { + next if grep { /^\Q$p\E$/ } @paths; + warn "remove $p from memory\n"; + delete $loaded->{$p}; + } + $self->_load_path( $_ ) foreach @paths; - my $path = $self->param('path') || $self->redirect_to( '/data/index' ); warn "# path $path\n"; - $self->session('path' => $path); $self->_load_path( $path ); + $self->session( 'path' => $path ); + my $redirect_to = '/data/items'; $self->session( 'header' => $loaded->{$path}->{header} ); @@ -286,12 +263,33 @@ sub _loaded { my ( $self, $name ) = @_; my $path = $self->session('path') || $self->param('path'); $self->redirect_to('/data/index') unless $path; + + if ( defined $loaded->{$path}->{modified} && $loaded->{$path}->{modified} > 1 ) { + my $caller = (caller(1))[3]; + if ( $caller =~ m/::edit/ ) { + warn "rebuild stats for $path ignored caller $caller\n"; + } else { + warn "rebuild stats for $path FORCED by modified caller $caller\n"; + $loaded->{$path}->{stats} = __stats( $loaded->{$path}->{data}->{items} ); + $loaded->{$path}->{modified} = 1; + } + } + if ( ! defined $loaded->{$path}->{$name} ) { warn "$path $name isn't loaded\n"; $self->_load_path( $path ); - $self->redirect_to('/data/index') - unless defined $loaded->{$path}->{$name}; + if ( ! defined $loaded->{$path}->{stats} ) { + warn "rebuild stats for $path\n"; + $loaded->{$path}->{stats} = __stats( $loaded->{$path}->{data}->{items} ); + } + if ( ! defined $loaded->{$path}->{$name} ) { + warn "MISSING $name for $path\n"; + $self->redirect_to('/data/index') + } } + + $self->session( 'modified' => $loaded->{$path}->{modified} ); + return $loaded->{$path}->{$name}; } @@ -313,10 +311,12 @@ sub _permanent_path { sub _export_path { my $self = shift; my $path = $self->_param_or_session('path'); - my $dir = $self->app->home->rel_dir('public') . '/export/'; - mkdir $dir unless -e $dir; - $dir .= $path; - mkdir $dir unless -e $dir; + if ( ! $path ) { + warn "no path in param or session"; + return; + } + my $dir = $self->app->home->rel_dir('public') . "/export/$path"; + mkpath $dir unless -e $dir; $dir . '/' . join('.', @_); } @@ -383,7 +383,7 @@ sub _param_scalar { $self->session($name => $scalar); } - warn "# _perm_scalar $name ",dump $scalar; + warn "# _param_scalar $name ",dump $scalar; return $scalar; } @@ -434,7 +434,9 @@ sub _filter_on_data { foreach my $i ( 0 .. $#$items ) { if ( defined $items->[$i]->{$name} ) { - foreach my $v ( @{ $items->[$i]->{$name} } ) { + my $row = $items->[$i]->{$name}; + $row = [ $row ] unless ref $row eq 'ARRAY'; # FIXME probably wrong place + foreach my $v ( @$row ) { if ( defined $filter_hash->{ $v } ) { $filtered_items->{$i}++; } @@ -450,58 +452,19 @@ sub _filter_on_data { warn "filter $name with ", scalar keys %$filtered_items, " items created\n"; } -sub _data_items { - my ( $self, $all ) = @_; - my $data = $self->_loaded( 'data' ); - - return @{ $data->{items} } if $all == 1; - - my $filters = $self->_current_filters; - my $filter_value; - foreach my $f ( keys %$filters ) { - foreach my $n ( @{ $filters->{$f} } ) { - $filter_value->{$f}->{$n} = 1; - } - } - my @items = @{ $data->{items} }; - @items = grep { - my $i = $_; - my $pass = 1; - foreach my $n ( keys %$filter_value ) { - if ( ! exists $i->{$n} ) { - if ( defined $filter_value->{$n}->{_missing} ) { - $pass = 1; - next; - } else { - $pass = 0; - last; - } - } - # and match any of values in element - my $have_values = 0; - foreach my $v ( @{ $i->{$n} } ) { # FIXME not array? - $have_values ||= 1 if defined $filter_value->{$n}->{$v}; - } - if ( ! $have_values ) { - $pass = 0; - last; - } - } - $pass; - } @items if $filter_value; - return @items; -} - sub _current_filters { my $self = shift; my $current_filters; + my $columns = $self->_loaded('header'); + if ( my $sc = $self->session('columns') ) { + $columns = $sc; + } + $current_filters->{ $_ } = $filters->{ $_ } - foreach ( - grep { defined $filters->{ $_ } } - @{ $self->_loaded('header') } - ); - #warn "# current_filters ",dump($current_filters); + foreach ( grep { defined $filters->{ $_ } } @$columns ) + ; + warn "# _current_filters ",dump($columns); return $current_filters; } @@ -530,7 +493,15 @@ sub _data_sorted_by { $a->[1] cmp $b->[1] } } map { - [ $nr++, exists $_->{$order} ? join('', @{$_->{$order}}) : $missing ] + my $v; + if ( ! exists $_->{$order} ) { + $v = $missing; + } elsif ( ref $_->{$order} eq 'ARRAY' ) { + $v = join('', @{$_->{$order}}); + } else { + $v = $_->{$order}; + } + [ $nr++, $v ] } @{ $data->{items} } ; @@ -541,11 +512,20 @@ sub _data_sorted_by { } +sub __all_filters { + my $order = pop @_; + join(',', sort(@_), 'order', $order); +} + sub items { my $self = shift; + if ( my $show = $self->param('id') ) { + $self->param('show', $show); + warn "show $show\n"; + } + my $path = $self->session('path'); - $self->redirect_to('/data/index') unless defined $loaded->{ $path }; my @columns = $self->_param_array('columns'); $self->redirect_to('/data/columns') unless @columns; @@ -558,6 +538,10 @@ sub items { # fix offset when changing limit $offset = int( $offset / $limit ) * $limit; + if ( ! grep { /^\Q$order\E$/ } @columns ) { + $order = $columns[0]; + $self->session( order => $order ); + } my $sorted = $self->_data_sorted_by( $order ); my @filter_names; @@ -576,7 +560,7 @@ sub items { } } - my $all_filters = join(' ',sort @filter_names,'order:',$order); + my $all_filters = __all_filters( @filter_names,$order ); # warn "# all_filters $all_filters ", dump( $loaded->{$path}->{filtered}->{$all_filters} ); @@ -584,7 +568,7 @@ sub items { my $path_filters = $loaded->{$path}->{filters}; - warn "create combined filter for $all_filters\n"; + warn "create combined filter for $all_filters from ", $#$sorted + 1, " items\n"; my @filtered; foreach my $i ( 0 .. $#$sorted ) { @@ -612,21 +596,129 @@ sub items { warn "all_filters $all_filters produced ", $#$filtered + 1, " items\n" if $filtered; - my $sorted_items; my $data = $self->_loaded('data'); + + my $code = $self->_param_scalar('code',''); + $code =~ s{[\r\n]+$}{}s; + + my $commit = $self->param('commit'); + my $test = $self->param('test'); + + my $cols_changed; + + if ( $code && ( $test || $commit ) ) { + # XXX find columns used in code snippet and show them to user + my $order = 0; + foreach my $column ( $code =~ m/\$row->{([^}]+)}/g ) { + if ( $column =~ s/^(['"])// ) { + $column =~ s/$1$//; + } + next if $column =~ m/\$/; # hide columns with vars in them + $cols_changed->{$column} = 0; + } + } + + my $code_path = $self->app->home->rel_dir('public') . "/code"; + if ( $commit ) { + warn "# commit on ", $#$filtered + 1, " items:\n$code\n"; + my $out; + foreach ( 0 .. $#$filtered ) { + my $i = $filtered->[$_]; + my $row = $data->{items}->[$i]; + eval $code; + } + if ( my $description = $self->param('code_description') ) { + my $depends = $self->param('code_depends') || die "no code_depends?"; + my $path = "$code_path/$depends.$description.pl"; + if ( -e $path && ! $self->param('overwrite') ) { + warn "# code $path not saved\n"; + } else { + write_file $path, $code; + warn "code $path ", -s $path, " bytes saved\n"; + } + } + $code = ''; + if ( $out ) { + my $commit_dataset = join('.' + , $self->param('code_depends') + , $self->param('code_description') + ); + my $key = $self->param('code_depends'); + $key =~ s/,.+$//; + $key ||= 'key'; + my $items; + foreach my $n ( keys %$out ) { + my $i = { $key => [ $n ] }; + $i->{$_} = [ $out->{$n}->{$_} ] foreach keys %{ $out->{$n} }; + push @$items, $i; + }; + undef $out; + my $stats = __stats( $items ); + my @columns = grep { ! m/^\Q$key\E$/ } sort keys %$stats; + unshift @columns, $key; + + $loaded->{$commit_dataset} = { + header => [ @columns ], + columns => [ @columns ], + mtime => time(), + data => { items => $items }, + stats => $stats, + generated => 1, + }; + warn "# loaded out ", dump( $loaded->{$commit_dataset} ); + $self->session('path', $commit_dataset); + $self->session('columns', [ @columns ]); + $self->session('order', $key); + $self->redirect_to('/data/items'); + } + } + + my $sorted_items; my $from_end = $sort eq 'd' ? $#$filtered : 0; + my $out; foreach ( 0 .. $limit ) { my $i = $_ + $offset; last unless defined $filtered->[$i]; $i = $from_end - $i if $from_end; my $id = $filtered->[$i]; - push @$sorted_items, - my $item = $data->{items}->[ $id ]; - $item->{_row_id} ||= $id; + my $row = $data->{items}->[ $id ]; + my $old = { map { $_ => 1 } keys %$row }; + if ( $code && $test ) { + $row = Storable::dclone $row; + eval $code; + if ( $@ ) { + warn "ERROR evaling\n$code\n$@"; + $self->stash('eval_error', $@) if $@; + } else { + warn "EVAL ",dump($row); + $old->{$_}-- foreach keys %$row; + warn "columns changed ",dump($old); + $cols_changed->{$_}++ foreach grep { $old->{$_} == -1 } keys %$old; + } + } + $row->{_row_id} ||= $id; + push @$sorted_items, $row; } + my @added_columns = sort grep { $cols_changed->{$_} > 0 } keys %$cols_changed; + unshift @columns, @added_columns; + + if ( $commit ) { + $self->session('columns', [ @columns ]); + $loaded->{$path}->{columns} = [ @columns ]; + __path_modified( $path, 2 ); + } warn "# sorted_items ", $#$sorted_items + 1, " offset $offset limit $limit order $sort"; + my $code_depends = $self->param('code_depends')|| + join(',', sort grep { $cols_changed->{$_} == 0 } keys %$cols_changed ); + my $code_description = $self->param('code_description') || + join(',', @added_columns); + + $code_depends ||= $code_description; # self-modifing + + warn "# cols_changed ",dump( $cols_changed, $code_depends, $code_description ); + $self->render( order => $order, offset => $offset, @@ -635,7 +727,14 @@ sub items { columns => [ @columns ], rows => $#$filtered + 1, numeric => { map { $_, $self->_is_numeric($_) } @columns }, + unique => { map { $_, $self->_is_unique( $_) } @columns }, filters => $self->_current_filters, + code => $code, + cols_changed => $cols_changed, + code_depends => $code_depends, + code_description => $code_description, + code_path => $code_path, + out => $out, ); } @@ -660,6 +759,12 @@ sub _is_numeric { $stats->{$name}->{numeric} > $count / 2; } +sub _is_unique { + my ( $self, $name ) = @_; + my $stats = $self->_loaded( 'stats' ); + defined $stats->{$name}->{unique}; +} + sub _remove_filter { my ($self,$name) = @_; warn "_remove_filter $name\n"; @@ -671,7 +776,7 @@ sub _remove_filter { warn "filters left: ", keys %{ $loaded->{$path}->{filters} }; foreach ( - grep { /\b$name\b/ } + grep { /\Q$name\E/ } keys %{ $loaded->{$path}->{filtered} } ) { delete $loaded->{$path}->{filtered}->{$_}; @@ -696,7 +801,7 @@ sub facet { my $data = $self->_loaded('data'); my $filters = $self->_current_filters; - my $all_filters = join(' ',sort keys %$filters,'order:',$self->session('order')); + my $all_filters = __all_filters( keys %$filters,$self->session('order') ); my $filtered = $loaded->{$path}->{filtered}->{$all_filters} if defined $loaded->{$path}->{filtered}->{$all_filters}; @@ -755,9 +860,34 @@ sub facet { ); } + +sub __invalidate_path_column { + my ( $path, $name ) = @_; + + if ( defined $loaded->{$path}->{sorted}->{$name} ) { + delete $loaded->{$path}->{sorted}->{$name}; + warn "# invalidate $path sorted $name\n"; + } + + foreach ( grep { m/\Q$name\E/ } keys %{ $loaded->{$path}->{filtered} } ) { + delete $loaded->{$path}->{filtered}->{$_}; + warn "# invalidate $path filtered $_\n"; + } +} + +sub __path_modified { + my ( $path, $value ) = @_; + $value = 1 unless defined $value; + + $loaded->{$path}->{modified} = $value; + + warn "# __path_modified $path $value\n"; +} + sub edit { my $self = shift; - my $content = $self->param('content'); + my $new_content = $self->param('new_content'); + $new_content ||= $self->param('content'); # backward compatibility with old actions my $i = $self->param('_row_id'); die "invalid _row_id ",dump($i) unless $i =~ m/^\d+$/; @@ -768,21 +898,21 @@ sub edit { my $data = $self->_loaded('data'); if ( defined $loaded->{$path}->{data}->{items}->[$i] ) { - $content =~ s/^\s+//s; - $content =~ s/\s+$//s; + $new_content =~ s/^\s+//s; + $new_content =~ s/\s+$//s; my $v; - if ( $content =~ /\xB6/ ) { # para - $v = [ split(/\s*\xB6\s*/, $content) ]; + if ( $new_content =~ /\xB6/ ) { # para + $v = [ split(/\s*\xB6\s*/, $new_content) ]; } else { - $v = [ $content ]; + $v = [ $new_content ]; } my $old = dump $loaded->{$path}->{data}->{items}->[$i]->{$name}; my $new = dump $v; if ( $old ne $new - && ! ( $old eq 'undef' && length($content) == 0 ) # new value empty, previous undef + && ! ( $old eq 'undef' && length($new_content) == 0 ) # new value empty, previous undef ) { - my $edit = { + my $change = { path => $path, column => $name, pos => $i, @@ -796,43 +926,38 @@ sub edit { keys %{ $loaded->{$path}->{stats} } }, }; - my $edit_path = $self->_permanent_path( 'edits' ); - mkdir $edit_path unless -d $edit_path; - $edit_path .= '/' . $edit->{time}; - store $edit, $edit_path; - utime $edit->{time}, $edit->{time}, $edit_path; - warn "# $edit_path ", dump($edit); - - warn "# edit $path $i $old -> $new\n"; + my $change_path = $self->_permanent_path( 'changes' ); + mkdir $change_path unless -d $change_path; + $change_path .= '/' . $change->{time}; + store $change, $change_path; + utime $change->{time}, $change->{time}, $change_path; + warn "# $change_path ", dump($change); + + warn "# change $path $i $old -> $new\n"; $loaded->{$path}->{data}->{items}->[$i]->{$name} = $v; - if ( defined $loaded->{$path}->{sorted}->{$name} ) { - delete $loaded->{$path}->{sorted}->{$name}; - warn "# invalidate $path sorted $name\n"; - } - - foreach ( grep { m/$name/ } keys %{ $loaded->{$path}->{filtered} } ) { - delete $loaded->{$path}->{filtered}->{$_}; - warn "# invalidate $path filtered $_\n"; - } + __invalidate_path_column( $path, $name ); $status = 201; # created - $self->session('save_path' => $path); + # modified = 2 -- force rebuild of stats + __path_modified( $path, 2 ); + + $new_content = join("\xB6",@$v); } else { warn "# unchanged $path $i $old\n"; $status = 304; } } else { - $content = "$path $i $name doesn't exist\n"; + $new_content = "$path $i $name doesn't exist\n"; $status = 404; } - warn "# edit $status $content"; + warn "# edit $status ", dump $new_content; $self->render( status => $status, - content => $content, + new_content => scalar $new_content, ); } @@ -841,17 +966,38 @@ sub save { my $self = shift; my $path = $self->_param_or_session('path'); my $dump_path = $self->_save( $path ); - $self->session('save_path' => 0); + __path_modified( $path, 0 ); $self->redirect_to( '/data/items' ); } sub export { my $self = shift; + + if ( my $import = $self->param('import') ) { + + if ( $import =~ m{/filter\.(.+?)\..+} ) { + my $name = $1; + my @vals = map { chomp; $_ } + read_file $self->app->home->rel_dir('public') . "/export/$import"; + $self->_remove_filter( $name ); + $self->_filter_on_data( $name, @vals ); + $self->session( 'offset' => 0 ); + $self->redirect_to('/data/items'); + } else { + warn "UNKNOWN IMPORT $import"; + } + } + $self->render( export => [ - map { s{^.+/public/export/}{}; $_ } glob( $self->_export_path . '*' ) ] ); } +sub __loaded_paths { + return + grep { defined $loaded->{$_}->{data} } + keys %$loaded; +} + 1;