X-Git-Url: http://git.rot13.org/?a=blobdiff_plain;f=lib%2FMojoFacets%2FData.pm;h=a02a563bd5ee9287e262c216399ed031b6cf02ec;hb=28ed3834b792af09728ca90e984735877b50455c;hp=d32aaecaeacbbc4e9e4805281a70d2cab7f47780;hpb=11daddd40a3ca0ea057aa681f6a220c08e69468f;p=MojoFacets.git diff --git a/lib/MojoFacets/Data.pm b/lib/MojoFacets/Data.pm index d32aaec..a02a563 100644 --- a/lib/MojoFacets/Data.pm +++ b/lib/MojoFacets/Data.pm @@ -13,9 +13,11 @@ use File::Find; use Storable; use Time::HiRes qw(time); use File::Path qw(mkpath); +use Text::Unaccent::PurePerl; use MojoFacets::Import::File; use MojoFacets::Import::HTMLTable; +use MojoFacets::Import::CSV; our $loaded; our $filters; @@ -38,12 +40,16 @@ sub index { } elsif ( -d $file && $file =~ m/\.html$/ ) { $file =~ s/$data_dir\/*//; push @files, $file; + } elsif ( -f $file && $file =~ m/\.csv$/i ) { + $file =~ s/$data_dir\/*//; + push @files, $file; } else { - warn "IGNORE: $file\n"; + #warn "IGNORE: $file\n"; } }, $data_dir); - @files = sort { lc $a cmp lc $b } @files; + @files = sort { $loaded->{$b}->{mtime} <=> $loaded->{$a}->{mtime} || lc $a cmp lc $b } @files, + grep { defined $loaded->{$_}->{generated} } keys %$loaded; my $size; $size->{$_} = -s "$data_dir/$_" foreach @files; @@ -157,8 +163,10 @@ sub stats { sub _load_path { my ( $self, $path ) = @_; + return if defined $loaded->{$path}->{'generated'}; + my $full_path = $self->app->home->rel_file( 'data/' . $path ); - die "$full_path $!" unless -r $full_path; + $self->redirect_to('/data/index') unless -r $full_path; my $dump_path = $self->_dump_path( $path ); @@ -175,7 +183,11 @@ sub _load_path { my $data; if ( -f $full_path ) { - $data = MojoFacets::Import::File->new( full_path => $full_path, path => $path )->data; + if ( $full_path =~ m/.csv/i ) { + $data = MojoFacets::Import::CSV->new( full_path => $full_path )->data; + } else { + $data = MojoFacets::Import::File->new( full_path => $full_path, path => $path )->data; + } } elsif ( -d $full_path && $full_path =~ m/.html/ ) { $data = MojoFacets::Import::HTMLTable->new( dir => $full_path )->data; } else { @@ -217,11 +229,19 @@ sub _load_path { sub load { my $self = shift; + my $path = $self->param('path') || $self->redirect_to( '/data/index' ); + my @paths = $self->param('paths'); warn "# paths ", dump @paths; + + foreach my $p ( keys %$loaded ) { + next if grep { /^\Q$p\E$/ } @paths; + warn "remove $p from memory\n"; + delete $loaded->{$p}; + } + $self->_load_path( $_ ) foreach @paths; - my $path = $self->param('path') || $self->redirect_to( '/data/index' ); warn "# path $path\n"; $self->_load_path( $path ); @@ -233,7 +253,7 @@ sub load { if ( ! defined $loaded->{$path}->{columns} ) { my $columns_path = $self->_permanent_path( 'columns' ); if ( -e $columns_path ) { - my @columns = map { s/[\r\n]+$//; $_ } read_file $columns_path; + my @columns = map { s/[\r\n]+$//; $_ } read_file $columns_path, binmode => ':utf8'; $loaded->{$path}->{columns} = [ @columns ]; warn "# columns_path $columns_path ",dump(@columns); } else { @@ -253,7 +273,7 @@ sub _loaded { my $path = $self->session('path') || $self->param('path'); $self->redirect_to('/data/index') unless $path; - if ( $loaded->{$path}->{modified} > 1 ) { + if ( defined $loaded->{$path}->{modified} && $loaded->{$path}->{modified} > 1 ) { my $caller = (caller(1))[3]; if ( $caller =~ m/::edit/ ) { warn "rebuild stats for $path ignored caller $caller\n"; @@ -267,12 +287,14 @@ sub _loaded { if ( ! defined $loaded->{$path}->{$name} ) { warn "$path $name isn't loaded\n"; $self->_load_path( $path ); - $self->redirect_to('/data/index') - unless defined $loaded->{$path}->{$name}; if ( ! defined $loaded->{$path}->{stats} ) { warn "rebuild stats for $path\n"; $loaded->{$path}->{stats} = __stats( $loaded->{$path}->{data}->{items} ); } + if ( ! defined $loaded->{$path}->{$name} ) { + warn "MISSING $name for $path\n"; + $self->redirect_to('/data/index') + } } $self->session( 'modified' => $loaded->{$path}->{modified} ); @@ -304,7 +326,7 @@ sub _export_path { } my $dir = $self->app->home->rel_dir('public') . "/export/$path"; mkpath $dir unless -e $dir; - $dir . '/' . join('.', @_); + $dir . '/' . unac_string( join('.', @_) ); } sub columns { @@ -312,7 +334,7 @@ sub columns { if ( $self->param('columns') ) { my @columns = $self->_param_array('columns'); - write_file( $self->_permanent_path( 'columns' ), map { "$_\n" } @columns ); + write_file( $self->_permanent_path( 'columns' ), { binmode => ':utf8' }, map { "$_\n" } @columns ); $self->redirect_to('/data/items'); } @@ -385,7 +407,7 @@ sub filter { $self->_filter_on_data( $name, @vals ); if ( my $permanent = $self->param('_permanent') ) { my $permanent_path = $self->_export_path( 'filter', $name, $permanent ); - write_file $permanent_path, map { "$_\n" } @vals; + write_file $permanent_path, { binmode => ':utf8' }, map { "$_\n" } @vals; warn "permanent filter $permanent_path ", -s $permanent_path; } } @@ -421,7 +443,9 @@ sub _filter_on_data { foreach my $i ( 0 .. $#$items ) { if ( defined $items->[$i]->{$name} ) { - foreach my $v ( @{ $items->[$i]->{$name} } ) { + my $row = $items->[$i]->{$name}; + $row = [ $row ] unless ref $row eq 'ARRAY'; # FIXME probably wrong place + foreach my $v ( @$row ) { if ( defined $filter_hash->{ $v } ) { $filtered_items->{$i}++; } @@ -437,58 +461,19 @@ sub _filter_on_data { warn "filter $name with ", scalar keys %$filtered_items, " items created\n"; } -sub _data_items { - my ( $self, $all ) = @_; - my $data = $self->_loaded( 'data' ); - - return @{ $data->{items} } if $all == 1; - - my $filters = $self->_current_filters; - my $filter_value; - foreach my $f ( keys %$filters ) { - foreach my $n ( @{ $filters->{$f} } ) { - $filter_value->{$f}->{$n} = 1; - } - } - my @items = @{ $data->{items} }; - @items = grep { - my $i = $_; - my $pass = 1; - foreach my $n ( keys %$filter_value ) { - if ( ! exists $i->{$n} ) { - if ( defined $filter_value->{$n}->{_missing} ) { - $pass = 1; - next; - } else { - $pass = 0; - last; - } - } - # and match any of values in element - my $have_values = 0; - foreach my $v ( @{ $i->{$n} } ) { # FIXME not array? - $have_values ||= 1 if defined $filter_value->{$n}->{$v}; - } - if ( ! $have_values ) { - $pass = 0; - last; - } - } - $pass; - } @items if $filter_value; - return @items; -} - sub _current_filters { my $self = shift; my $current_filters; + my $columns = $self->_loaded('header'); + if ( my $sc = $self->session('columns') ) { + $columns = $sc; + } + $current_filters->{ $_ } = $filters->{ $_ } - foreach ( - grep { defined $filters->{ $_ } } - @{ $self->_loaded('header') } - ); - #warn "# current_filters ",dump($current_filters); + foreach ( grep { defined $filters->{ $_ } } @$columns ) + ; + warn "# _current_filters ",dump($columns); return $current_filters; } @@ -517,8 +502,16 @@ sub _data_sorted_by { $a->[1] cmp $b->[1] } } map { - [ $nr++, exists $_->{$order} ? join('', @{$_->{$order}}) : $missing ] - } grep { ref $_->{$order} eq 'ARRAY' } @{ $data->{items} } + my $v; + if ( ! exists $_->{$order} ) { + $v = $missing; + } elsif ( ref $_->{$order} eq 'ARRAY' ) { + $v = join('', @{$_->{$order}}); + } else { + $v = $_->{$order}; + } + [ $nr++, $v ] + } @{ $data->{items} } ; warn "sorted: $order numeric: $numeric items: ", $#sorted + 1, "\n"; @@ -528,6 +521,11 @@ sub _data_sorted_by { } +sub __all_filters { + my $order = pop @_; + join(',', sort(@_), 'order', $order); +} + sub items { my $self = shift; @@ -537,7 +535,6 @@ sub items { } my $path = $self->session('path'); - $self->redirect_to('/data/index') unless defined $loaded->{ $path }; my @columns = $self->_param_array('columns'); $self->redirect_to('/data/columns') unless @columns; @@ -550,6 +547,10 @@ sub items { # fix offset when changing limit $offset = int( $offset / $limit ) * $limit; + if ( ! grep { /^\Q$order\E$/ } @columns ) { + $order = $columns[0]; + $self->session( order => $order ); + } my $sorted = $self->_data_sorted_by( $order ); my @filter_names; @@ -568,7 +569,7 @@ sub items { } } - my $all_filters = join(' ',sort @filter_names,'order:',$order); + my $all_filters = __all_filters( @filter_names,$order ); # warn "# all_filters $all_filters ", dump( $loaded->{$path}->{filtered}->{$all_filters} ); @@ -576,7 +577,7 @@ sub items { my $path_filters = $loaded->{$path}->{filters}; - warn "create combined filter for $all_filters\n"; + warn "create combined filter for $all_filters from ", $#$sorted + 1, " items\n"; my @filtered; foreach my $i ( 0 .. $#$sorted ) { @@ -607,52 +608,171 @@ sub items { my $data = $self->_loaded('data'); my $code = $self->_param_scalar('code',''); - $code =~ s{\n+$}{}s; + $code =~ s{[\r\n]+$}{}s; my $commit = $self->param('commit'); my $test = $self->param('test'); + my $commit_changed; + if ( $code && ( $test || $commit ) ) { - my $new_col = 'stoljece'; - if ( ! grep { /$new_col/ } @columns ) { - unshift @columns, $new_col; - $self->session('columns', [ @columns ]) if $commit; + # XXX find columns used in code snippet and show them to user + my $order = 0; + foreach my $column ( $code =~ m/\$row->{([^}]+)}/g ) { + if ( $column =~ s/^(['"])// ) { + $column =~ s/$1$//; + } + next if $column =~ m/\$/; # hide columns with vars in them + $commit_changed->{$column} = 0; } } + my $code_path = $self->app->home->rel_dir('public') . "/code"; if ( $commit ) { + warn "# commit on ", $#$filtered + 1, " items:\n$code\n"; + my $out; foreach ( 0 .. $#$filtered ) { my $i = $filtered->[$_]; - my $rec = $data->{items}->[$i]; + my $row = $data->{items}->[$i]; + my $update; eval $code; + foreach ( keys %$update ) { + $commit_changed->{$_}++; + $row->{$_} = $update->{$_}; + } + } + if ( my $description = $self->param('code_description') ) { + my $depends = $self->param('code_depends') || die "no code_depends?"; + my $path = "$code_path/$depends.$description.pl"; + if ( -e $path && ! $self->param('overwrite') ) { + warn "# code $path not saved\n"; + } else { + write_file( $path, { binmode => ':utf8' }, $code ); + warn "code $path ", -s $path, " bytes saved\n"; + } + } + $code = ''; + if ( $out ) { + my $commit_dataset = join('.' + , $self->param('code_depends') + , $self->param('code_description') + , time() + ); + my $key = $self->param('code_depends'); + $key =~ s/,.+$//; + $key ||= 'key'; + my $items; + foreach my $n ( keys %$out ) { + my $i = { $key => [ $n ] }; + my $ref = ref $out->{$n}; + if ( $ref eq 'HASH' ) { + $i->{$_} = [ $out->{$n}->{$_} ] foreach keys %{ $out->{$n} }; + } elsif ( $ref eq 'ARRAY' ) { + $i->{$_} = $out->{$n}; + } elsif ( ! $ref ) { + $i->{value} = [ $out->{$n} ]; + } else { + $i->{_error} = [ dump($out->{$n}) ]; + } + push @$items, $i; + }; + undef $out; + my $stats = __stats( $items ); + my @columns = grep { ! m/^\Q$key\E$/ } sort keys %$stats; + unshift @columns, $key; + + $loaded->{$commit_dataset} = { + header => [ @columns ], + columns => [ @columns ], + mtime => time(), + data => { items => $items }, + stats => $stats, + generated => 1, + }; + warn "# loaded out ", dump( $loaded->{$commit_dataset} ); + $self->session('path', $commit_dataset); + $self->session('columns', [ @columns ]); + $self->session('order', $key); + $self->redirect_to('/data/items'); + return; # FIXME needed to correctly show columns } + + # this might move before $out to recalculate stats on source dataset? + __path_modified( $path, 2 ); + my $c = { map { $_ => 1 } @columns }; + my @added_columns = sort grep { ! $c->{$_} } keys %$commit_changed; + warn "# added_columns ",dump( @added_columns ); + unshift @columns, @added_columns; + + $self->session('columns', [ @columns ]); + $loaded->{$path}->{columns} = [ @columns ]; + warn "# new columns ",dump( @columns ); + + __invalidate_path_column( $path, $_ ) foreach keys %$commit_changed; } my $sorted_items; my $from_end = $sort eq 'd' ? $#$filtered : 0; + my $test_changed; + my $out; foreach ( 0 .. $limit ) { my $i = $_ + $offset; last unless defined $filtered->[$i]; $i = $from_end - $i if $from_end; my $id = $filtered->[$i]; - my $rec = $data->{items}->[ $id ]; - $rec->{_row_id} ||= $id; + my $row = Storable::dclone $data->{items}->[ $id ]; if ( $code && $test ) { - $rec = Storable::dclone $rec; + my $update; eval $code; if ( $@ ) { warn "ERROR evaling\n$code\n$@"; $self->stash('eval_error', $@) if $@; } else { - warn "EVAL ",dump($rec); + warn "EVAL ",dump($update); + foreach ( keys %$update ) { + $test_changed->{$_}++; + $row->{$_} = $update->{$_}; + } } } - push @$sorted_items, $rec; + $row->{_row_id} ||= $id; + push @$sorted_items, $row; + } + + if ( $self->param('export') ) { + my $export_path = $self->_export_path( 'items', @columns); + open(my $fh, '>', $export_path) || warn "ERROR: can't open $export_path: $!"; + foreach my $f ( 0 .. $#$filtered ) { + print $fh join("\t", map { + my $i = $data->{items}->[ $filtered->[$f] ]; + if ( ref $i->{$_} eq 'ARRAY' ) { + join(',', @{ $i->{$_} }); + } else { + dump $i->{$_}; + } + } @columns),"\n"; + } + close($fh); + warn "export $export_path ", -s $export_path, " bytes\n"; } + warn "# test_changed ",dump( $test_changed ); + my $c = { map { $_ => 1 } @columns }; + my @added_columns = sort grep { ! $c->{$_} } keys %$test_changed; + unshift @columns, @added_columns; + warn "# sorted_items ", $#$sorted_items + 1, " offset $offset limit $limit order $sort"; + my $code_depends = $self->param('code_depends')|| + join(',', sort grep { $test_changed->{$_} == 0 } keys %$test_changed ); + my $code_description = $self->param('code_description') || + join(',', @added_columns); + + $code_depends ||= $code_description; # self-modifing + + warn "# test_changed ",dump( $test_changed, $code_depends, $code_description ); + $self->render( order => $order, offset => $offset, @@ -661,8 +781,14 @@ sub items { columns => [ @columns ], rows => $#$filtered + 1, numeric => { map { $_, $self->_is_numeric($_) } @columns }, + unique => { map { $_, $self->_is_unique( $_) } @columns }, filters => $self->_current_filters, code => $code, + cols_changed => $commit ? $commit_changed : $test_changed, + code_depends => $code_depends, + code_description => $code_description, + code_path => $code_path, + out => $out, ); } @@ -687,6 +813,12 @@ sub _is_numeric { $stats->{$name}->{numeric} > $count / 2; } +sub _is_unique { + my ( $self, $name ) = @_; + my $stats = $self->_loaded( 'stats' ); + defined $stats->{$name}->{unique}; +} + sub _remove_filter { my ($self,$name) = @_; warn "_remove_filter $name\n"; @@ -698,7 +830,7 @@ sub _remove_filter { warn "filters left: ", keys %{ $loaded->{$path}->{filters} }; foreach ( - grep { /\b$name\b/ } + grep { /\Q$name\E/ } keys %{ $loaded->{$path}->{filtered} } ) { delete $loaded->{$path}->{filtered}->{$_}; @@ -723,7 +855,7 @@ sub facet { my $data = $self->_loaded('data'); my $filters = $self->_current_filters; - my $all_filters = join(' ',sort keys %$filters,'order:',$self->session('order')); + my $all_filters = __all_filters( keys %$filters,$self->session('order') ); my $filtered = $loaded->{$path}->{filtered}->{$all_filters} if defined $loaded->{$path}->{filtered}->{$all_filters}; @@ -791,7 +923,7 @@ sub __invalidate_path_column { warn "# invalidate $path sorted $name\n"; } - foreach ( grep { m/$name/ } keys %{ $loaded->{$path}->{filtered} } ) { + foreach ( grep { m/\Q$name\E/ } keys %{ $loaded->{$path}->{filtered} } ) { delete $loaded->{$path}->{filtered}->{$_}; warn "# invalidate $path filtered $_\n"; } @@ -901,7 +1033,7 @@ sub export { if ( $import =~ m{/filter\.(.+?)\..+} ) { my $name = $1; my @vals = map { chomp; $_ } - read_file $self->app->home->rel_dir('public') . "/export/$import"; + read_file $self->app->home->rel_dir('public') . "/export/$import", binmode => ':utf8'; $self->_remove_filter( $name ); $self->_filter_on_data( $name, @vals ); $self->session( 'offset' => 0 ); @@ -911,9 +1043,10 @@ sub export { } } - $self->render( export => [ - glob( $self->_export_path . '*' ) - ] ); + my @files = glob( $self->_export_path . '*' ); + my $mtime = { map { $_ => (stat($_))[9] } @files }; + @files = sort { $mtime->{$b} <=> $mtime->{$a} } @files; + $self->render( export => [ @files ] ); } sub __loaded_paths {