X-Git-Url: http://git.rot13.org/?a=blobdiff_plain;f=lib%2FMojoFacets%2FData.pm;h=c2283b875ba123bfc243dbf0e2e4d0e3e66524d8;hb=83132a946e365cb2b134f26bed859f60bcdb2eda;hp=5d40d8e279134c8ef3e2bb8c91a677e4707a3f46;hpb=7ed8a4cc53db2b5d5391a46f5a1a5960a5a25613;p=MojoFacets.git diff --git a/lib/MojoFacets/Data.pm b/lib/MojoFacets/Data.pm index 5d40d8e..c2283b8 100644 --- a/lib/MojoFacets/Data.pm +++ b/lib/MojoFacets/Data.pm @@ -7,10 +7,15 @@ use base 'Mojolicious::Controller'; use Data::Dump qw(dump); use File::Slurp; -use JSON; use Encode; use locale; use File::Find; +use Storable; +use Time::HiRes qw(time); +use File::Path qw(mkpath); + +use MojoFacets::Import::File; +use MojoFacets::Import::HTMLTable; our $loaded; our $filters; @@ -18,89 +23,77 @@ our $filters; sub index { my $self = shift; - my $path = $self->app->home->rel_dir('data'); - die "no data dir $path" unless -d $path; + my $data_dir = $self->app->home->rel_dir('data'); + die "no data dir $data_dir" unless -d $data_dir; my @files; + my $changes; find( sub { my $file = $File::Find::name; if ( -f $file && $file =~ m/\.(js(on)?|txt)$/ ) { - $file =~ s/$path\/*//; + $file =~ s/$data_dir\/*//; + push @files, $file; + } elsif ( -f $file && $file =~ m/([^\/]+)\.changes\/(\d+\.\d+.+)/ ) { + push @{ $changes->{$1} }, $2 + } elsif ( -d $file && $file =~ m/\.html$/ ) { + $file =~ s/$data_dir\/*//; push @files, $file; } else { warn "IGNORE: $file\n"; } - }, $path); + }, $data_dir); @files = sort { lc $a cmp lc $b } @files; my $size; - $size->{$_} = -s "$path/$_" foreach @files; + $size->{$_} = -s "$data_dir/$_" foreach @files; $self->render( files => [ @files ], size => $size, loaded => $loaded, filters => $filters, + dump_path => { map { $_ => $self->_dump_path($_) } @files }, + changes => $changes, ); } -sub _load_path { +sub _dump_path { + my ( $self, $name ) = @_; + my $dir = $self->app->home->rel_dir('data'); + $name =~ s/^$dir//; + $name =~ s/\/+/_/g; + return '/tmp/mojo_facets.' . $name . '.storable'; +} + +sub _save { my ( $self, $path ) = @_; - return if defined $loaded->{$path}->{data}; + my $dump_path = $self->_dump_path( $path ); + my $first_load = ! -e $dump_path; + warn "save loaded to $dump_path"; + my $info = $loaded->{$path}; + store $info, $dump_path; - my $full_path = $self->app->home->rel_file( 'data/' . $path ); - die "$full_path $!" unless -r $full_path; + if ( $first_load ) { + my $mtime = $loaded->{$path}->{mtime}; + utime $mtime, $mtime, $dump_path; + warn "sync time to $path at $mtime\n"; + } - # we could use Mojo::JSON here, but it's too slow -# $data = from_json read_file $path; - my $data = read_file $full_path; - warn "# data snippet: ", substr($data,0,200); - my @header; - if ( $path =~ m/\.js/ ) { - Encode::_utf8_on($data); - $data = from_json $data; - } elsif ( $path =~ m/\.txt/ ) { - my @lines = split(/\r?\n/, $data); - $data = { items => [] }; - - my $header_line = shift @lines; - my $multiline = $header_line =~ s/\^//g; - @header = split(/\|/, $header_line ); - warn "# header ", dump( @header ); - while ( my $line = shift @lines ) { - $line =~ s/\^//g; - chomp $line; - my @v = split(/\|/, $line); - while ( @lines && $#v < $#header ) { - $line = $lines[0]; - $line =~ s/\^//g; - chomp $line; - my @more_v = split(/\|/, $line); - if ( $#v + $#more_v > $#header ) { - warn "short line: ",dump( @v ); - last; - } - shift @lines; - $v[ $#v ] .= shift @more_v if @more_v; - push @v, @more_v if @more_v; + warn $dump_path, ' ', -s $dump_path, " bytes\n"; + return $dump_path; +} - if ( $#v > $#header ) { - die "# splice $#header ", dump( @v ); - @v = splice @v, 0, $#header; - } - } - my $item; - $item->{ $header[$_] || "f_$_" } = [ $v[$_] ] foreach ( 0 .. $#v ); - push @{ $data->{items} }, $item; - } - } else { - warn "file format unknown $path"; - } + +sub __stats { my $stats; - foreach my $e ( @{ $data->{items} } ) { + my $nr_items = $#{ $_[0] } + 1; + + warn "__stats $nr_items\n"; + + foreach my $e ( @{ $_[0] } ) { foreach my $n ( keys %$e ) { $stats->{$n}->{count}++; my @v; @@ -122,36 +115,102 @@ sub _load_path { } foreach my $n ( keys %$stats ) { - next unless defined $stats->{$n}->{array}; - delete $stats->{$n}->{array} - if $stats->{$n}->{array} == $stats->{$n}->{count}; + my $s = $stats->{$n}; + next unless defined $s->{array}; + if ( $s->{array} == $s->{count} ) { + delete $s->{array}; + if ( $s->{count} == $nr_items ) { + warn "check $n for uniqeness\n"; + my $unique; + foreach my $e ( @{ $_[0] } ) { + if ( ++$unique->{ $e->{$n}->[0] } == 2 ) { + $unique = 0; + last; + } + } + if ( $unique ) { + $stats->{$n}->{unique} = 1; + #warn "# $n unique ",dump( $unique ); + } + } + } } - if ( ! @header ) { - if ( defined $data->{header} ) { - if ( ref $data->{header} eq 'ARRAY' ) { - @header = @{ $data->{header} }; - } else { - warn "header not array ", dump( $data->{header} ); - } + warn "# __stats ",dump($stats); + + return $stats; +} + +sub _param_or_session { + $_[0]->param( $_[1] ) || $_[0]->session( $_[1] ) +} + +sub stats { + my $self = shift; + my $path = $self->_param_or_session('path'); + warn "stats $path\n"; + delete $loaded->{$path}->{stats}; + $self->redirect_to( '/data/columns' ); +} + + +sub _load_path { + my ( $self, $path ) = @_; + + my $full_path = $self->app->home->rel_file( 'data/' . $path ); + die "$full_path $!" unless -r $full_path; + + my $dump_path = $self->_dump_path( $path ); + + if ( defined $loaded->{$path}->{data} ) { + my $mtime = (stat($full_path))[9]; + return if $loaded->{$path}->{mtime} == $mtime; + warn "reload $full_path, modified ", time() - $mtime, " seconds ago\n"; + } elsif ( -e $dump_path ) { + warn "dump_path $dump_path ", -s $dump_path, " bytes loading...\n"; + my $info = retrieve $dump_path; + $loaded->{ $path } = $info; + return; + } + + my $data; + if ( -f $full_path ) { + $data = MojoFacets::Import::File->new( full_path => $full_path, path => $path )->data; + } elsif ( -d $full_path && $full_path =~ m/.html/ ) { + $data = MojoFacets::Import::HTMLTable->new( dir => $full_path )->data; + } else { + die "can't load $full_path"; + } + + my @header; + + if ( defined $data->{header} ) { + if ( ref $data->{header} eq 'ARRAY' ) { + @header = @{ $data->{header} }; + } else { + warn "header not array ", dump( $data->{header} ); } } + my $stats = __stats( $data->{items} ); + @header = sort { $stats->{$b}->{count} <=> $stats->{$a}->{count} } grep { defined $stats->{$_}->{count} } keys %$stats unless @header; - warn dump($stats); - - $loaded->{ $path } = { + my $info = { header => [ @header ], stats => $stats, full_path => $full_path, size => -s $full_path, + mtime => (stat($full_path))[9], data => $data, }; + $loaded->{ $path } = $info; + $self->_save( $path ); + } @@ -164,27 +223,62 @@ sub load { my $path = $self->param('path') || $self->redirect_to( '/data/index' ); warn "# path $path\n"; - $self->session('path' => $path); $self->_load_path( $path ); + $self->session( 'path' => $path ); + + my $redirect_to = '/data/items'; + $self->session( 'header' => $loaded->{$path}->{header} ); if ( ! defined $loaded->{$path}->{columns} ) { - $self->session( 'columns' => $loaded->{$path}->{header} ); - $self->session( 'order' => $loaded->{$path}->{header}->[0] ); - $self->redirect_to( '/data/columns' ); - } else { - $self->session( 'columns' => $loaded->{$path}->{columns} ); - $self->session( 'order' => $loaded->{$path}->{columns}->[0] ); - $self->redirect_to( '/data/items' ); + my $columns_path = $self->_permanent_path( 'columns' ); + if ( -e $columns_path ) { + my @columns = map { s/[\r\n]+$//; $_ } read_file $columns_path; + $loaded->{$path}->{columns} = [ @columns ]; + warn "# columns_path $columns_path ",dump(@columns); + } else { + $loaded->{$path}->{columns} = $loaded->{$path}->{header} + } + + $redirect_to = '/data/columns'; } + $self->session( 'columns' => $loaded->{$path}->{columns} ); + $self->session( 'order' => $loaded->{$path}->{columns}->[0] ); + $self->redirect_to( $redirect_to ); } sub _loaded { my ( $self, $name ) = @_; - my $path = $self->session('path'); - die "$path $name doesn't exist in loaded ",dump( $loaded ) - unless defined $loaded->{$path}->{$name}; + my $path = $self->session('path') || $self->param('path'); + $self->redirect_to('/data/index') unless $path; + + if ( defined $loaded->{$path}->{modified} && $loaded->{$path}->{modified} > 1 ) { + my $caller = (caller(1))[3]; + if ( $caller =~ m/::edit/ ) { + warn "rebuild stats for $path ignored caller $caller\n"; + } else { + warn "rebuild stats for $path FORCED by modified caller $caller\n"; + $loaded->{$path}->{stats} = __stats( $loaded->{$path}->{data}->{items} ); + $loaded->{$path}->{modified} = 1; + } + } + + if ( ! defined $loaded->{$path}->{$name} ) { + warn "$path $name isn't loaded\n"; + $self->_load_path( $path ); + if ( ! defined $loaded->{$path}->{stats} ) { + warn "rebuild stats for $path\n"; + $loaded->{$path}->{stats} = __stats( $loaded->{$path}->{data}->{items} ); + } + if ( ! defined $loaded->{$path}->{$name} ) { + warn "MISSING $name for $path\n"; + $self->redirect_to('/data/index') + } + } + + $self->session( 'modified' => $loaded->{$path}->{modified} ); + return $loaded->{$path}->{$name}; } @@ -197,16 +291,34 @@ sub _checked { return $checked; } +sub _permanent_path { + my $self = shift; + my $path = $self->_param_or_session('path'); + $self->app->home->rel_dir('data') . '/' . join('.', $path, @_); +} + +sub _export_path { + my $self = shift; + my $path = $self->_param_or_session('path'); + if ( ! $path ) { + warn "no path in param or session"; + return; + } + my $dir = $self->app->home->rel_dir('public') . "/export/$path"; + mkpath $dir unless -e $dir; + $dir . '/' . join('.', @_); +} sub columns { my $self = shift; if ( $self->param('columns') ) { - $self->_perm_array('columns'); + my @columns = $self->_param_array('columns'); + write_file( $self->_permanent_path( 'columns' ), map { "$_\n" } @columns ); $self->redirect_to('/data/items'); } - my $stats = $self->_loaded( 'stats' ); # || $self->redirect_to( '/data/index' ); + my $stats = $self->_loaded( 'stats' ); my @columns; @columns = grep { defined $stats->{$_}->{count} } @{ $self->session('columns') } if $self->session('columns'); @@ -219,11 +331,11 @@ sub columns { message => 'Select columns to display', stats => $stats, columns => \@columns, - checked => $self->_checked( $self->_perm_array('columns') ), + checked => $self->_checked( $self->_param_array('columns') ), ); } -sub _perm_array { +sub _param_array { my ($self,$name) = @_; my @array = $self->param($name); @@ -243,7 +355,7 @@ sub _perm_array { return @array; } -sub _perm_scalar { +sub _param_scalar { my ($self,$name,$default) = @_; my $scalar = $self->param($name); @@ -271,7 +383,14 @@ sub filter { my @vals = $self->param('filter_vals'); $self->_remove_filter( $name ); - $self->_filter_on_data( $name, @vals ) if @vals; + if ( @vals ) { + $self->_filter_on_data( $name, @vals ); + if ( my $permanent = $self->param('_permanent') ) { + my $permanent_path = $self->_export_path( 'filter', $name, $permanent ); + write_file $permanent_path, map { "$_\n" } @vals; + warn "permanent filter $permanent_path ", -s $permanent_path; + } + } $self->session( 'offset' => 0 ); @@ -401,11 +520,11 @@ sub _data_sorted_by { } } map { [ $nr++, exists $_->{$order} ? join('', @{$_->{$order}}) : $missing ] - } @{ $data->{items} } + } grep { ref $_->{$order} eq 'ARRAY' } @{ $data->{items} } ; warn "sorted: $order numeric: $numeric items: ", $#sorted + 1, "\n"; - warn "# sorted ",dump( @sorted ); + #warn "# sorted ",dump( @sorted ); $loaded->{$path}->{sorted}->{$order} = [ @sorted ]; } @@ -414,16 +533,20 @@ sub _data_sorted_by { sub items { my $self = shift; + if ( my $show = $self->param('id') ) { + $self->param('show', $show); + warn "show $show\n"; + } + my $path = $self->session('path'); - $self->redirect_to('/data/index') unless defined $loaded->{ $path }; - my @columns = $self->_perm_array('columns'); + my @columns = $self->_param_array('columns'); $self->redirect_to('/data/columns') unless @columns; - my $order = $self->_perm_scalar('order', $columns[0]); - my $sort = $self->_perm_scalar('sort', 'a'); - my $offset = $self->_perm_scalar('offset', 0); - my $limit = $self->_perm_scalar('limit', 20); - $self->_perm_scalar('show', 'table'); + my $order = $self->_param_scalar('order', $columns[0]); + my $sort = $self->_param_scalar('sort', 'a'); + my $offset = $self->_param_scalar('offset', 0); + my $limit = $self->_param_scalar('limit', 20); + $self->_param_scalar('show', 'table'); # fix offset when changing limit $offset = int( $offset / $limit ) * $limit; @@ -489,7 +612,10 @@ sub items { my $i = $_ + $offset; last unless defined $filtered->[$i]; $i = $from_end - $i if $from_end; - push @$sorted_items, $data->{items}->[ $filtered->[$i] ]; + my $id = $filtered->[$i]; + push @$sorted_items, + my $item = $data->{items}->[ $id ]; + $item->{_row_id} ||= $id; } warn "# sorted_items ", $#$sorted_items + 1, " offset $offset limit $limit order $sort"; @@ -559,7 +685,7 @@ sub facet { my $facet; my $name = $self->param('name') || die "no name"; - my $all = $self->_perm_scalar('all', 1); + my $all = $self->_param_scalar('all', 1); my $data = $self->_loaded('data'); my $filters = $self->_current_filters; @@ -598,19 +724,23 @@ sub facet { my $numeric = $self->_is_numeric($name); my $sort = $self->param('sort'); - $sort ||= $numeric ? 'a' : 'c'; + # sort numeric facets with more than 5 values ascending + $sort ||= $numeric && $#facet_names > 4 ? 'a' : 'c'; @facet_names = sort { - if ( $sort =~ m/a/i ) { - $numeric ? $a <=> $b : lc $a cmp lc $b; - } elsif ( $sort =~ m/d/i ) { - $numeric ? $b <=> $a : lc $b cmp lc $a; - } elsif ( $sort =~ m/c/i ) { - ( $facet->{$b} || -1 ) <=> ( $facet->{$a} || -1 ) + my $result; + if ( $sort eq 'a' ) { + $result = $numeric ? $a <=> $b : lc $a cmp lc $b; + } elsif ( $sort eq 'd' ) { + $result = $numeric ? $b <=> $a : lc $b cmp lc $a; + } elsif ( $sort eq 'c' ) { + $result = ( $facet->{$b} || -1 ) <=> ( $facet->{$a} || -1 ) } else { warn "unknown sort: $sort"; - $a cmp $b; + $result = $a cmp $b; } + $result = $a cmp $b unless defined $result; # FIXME cludge for numeric facets with invalid data + $result; } @facet_names; $self->render( name => $name, facet => $facet, checked => $checked, @@ -618,13 +748,144 @@ sub facet { ); } + +sub __invalidate_path_column { + my ( $path, $name ) = @_; + + if ( defined $loaded->{$path}->{sorted}->{$name} ) { + delete $loaded->{$path}->{sorted}->{$name}; + warn "# invalidate $path sorted $name\n"; + } + + foreach ( grep { m/$name/ } keys %{ $loaded->{$path}->{filtered} } ) { + delete $loaded->{$path}->{filtered}->{$_}; + warn "# invalidate $path filtered $_\n"; + } +} + +sub __path_modified { + my ( $path, $value ) = @_; + $value = 1 unless defined $value; + + $loaded->{$path}->{modified} = $value; + + warn "# __path_modified $path $value\n"; +} + sub edit { my $self = shift; - my $content = $self->param('content'); + my $new_content = $self->param('new_content'); + $new_content ||= $self->param('content'); # backward compatibility with old actions + + my $i = $self->param('_row_id'); + die "invalid _row_id ",dump($i) unless $i =~ m/^\d+$/; + my $path = $self->param('path') || die "no path"; + my $name = $self->param('name') || die "no name"; + my $status = 200; # 200 = OK, 201 = Created + + my $data = $self->_loaded('data'); + + if ( defined $loaded->{$path}->{data}->{items}->[$i] ) { + $new_content =~ s/^\s+//s; + $new_content =~ s/\s+$//s; + my $v; + if ( $new_content =~ /\xB6/ ) { # para + $v = [ split(/\s*\xB6\s*/, $new_content) ]; + } else { + $v = [ $new_content ]; + } + + my $old = dump $loaded->{$path}->{data}->{items}->[$i]->{$name}; + my $new = dump $v; + if ( $old ne $new + && ! ( $old eq 'undef' && length($new_content) == 0 ) # new value empty, previous undef + ) { + my $change = { + path => $path, + column => $name, + pos => $i, + old => $loaded->{$path}->{data}->{items}->[$i]->{$name}, + new => $v, + time => $self->param('time') || time(), + user => $self->param('user') || $ENV{'LOGNAME'}, + unique => { + map { $_ => $loaded->{$path}->{data}->{items}->[$i]->{$_}->[0] } + grep { defined $loaded->{$path}->{stats}->{$_}->{unique} } + keys %{ $loaded->{$path}->{stats} } + }, + }; + my $change_path = $self->_permanent_path( 'changes' ); + mkdir $change_path unless -d $change_path; + $change_path .= '/' . $change->{time}; + store $change, $change_path; + utime $change->{time}, $change->{time}, $change_path; + warn "# $change_path ", dump($change); + + warn "# change $path $i $old -> $new\n"; + $loaded->{$path}->{data}->{items}->[$i]->{$name} = $v; + + __invalidate_path_column( $path, $name ); + + $status = 201; # created + # modified = 2 -- force rebuild of stats + __path_modified( $path, 2 ); + + $new_content = join("\xB6",@$v); + + } else { + warn "# unchanged $path $i $old\n"; + $status = 304; + } + } else { + $new_content = "$path $i $name doesn't exist\n"; + $status = 404; + } + + warn "# edit $status ", dump $new_content; $self->render( - content => $content + status => $status, + new_content => scalar $new_content, ); } + +sub save { + my $self = shift; + my $path = $self->_param_or_session('path'); + my $dump_path = $self->_save( $path ); + __path_modified( $path, 0 ); + + $self->redirect_to( '/data/items' ); +} + +sub export { + my $self = shift; + + if ( my $import = $self->param('import') ) { + + if ( $import =~ m{/filter\.(.+?)\..+} ) { + my $name = $1; + my @vals = map { chomp; $_ } + read_file $self->app->home->rel_dir('public') . "/export/$import"; + $self->_remove_filter( $name ); + $self->_filter_on_data( $name, @vals ); + $self->session( 'offset' => 0 ); + $self->redirect_to('/data/items'); + } else { + warn "UNKNOWN IMPORT $import"; + } + } + + $self->render( export => [ + glob( $self->_export_path . '*' ) + ] ); +} + +sub __loaded_paths { + return + grep { defined $loaded->{$_}->{data} } + keys %$loaded; +} + 1;