use base 'Mojolicious::Controller';
-use Data::Dump qw(dump);
+#use Data::Dump qw(dump); # broken with Mojo::JSON, see https://rt.cpan.org/Public/Bug/Display.html?id=86592
+use Data::Dumper;
+use subs 'dump';
+sub dump { Dumper(@_) };
+
use File::Slurp;
use Encode;
use locale;
use Storable;
use Time::HiRes qw(time);
use File::Path qw(mkpath);
-use Text::Unaccent::PurePerl;
+use Text::Unaccent;
use Digest::MD5;
+use Statistics::Descriptive;
+
+our $imports;
+foreach my $module ( glob('lib/MojoFacets/Import/*.pm') ) {
+ $module =~ s{lib/(\w+)/(\w+)/(.*)\.pm}{$1::$2::$3};
+ eval "use $module";
+ die "$module: $!" if $!;
+ my ( $ext, $priority ) = $module->ext;
+ $imports->{$priority || 'file'}->{$ext} = $module;
+ warn "# import $ext $module\n";
+}
+
+warn "# import loaded ",dump( $imports );
+
+sub import_module {
+ my $full_path = shift;
+
+# warn "# import_module $full_path\n";
-use MojoFacets::Import::File;
-use MojoFacets::Import::HTMLTable;
-use MojoFacets::Import::CSV;
+ return if $full_path =~ m/\.columns$/;
+
+ foreach my $ext ( keys %{ $imports->{file} } ) {
+ if ( -f $full_path && $full_path =~ m/$ext/i ) {
+ return $imports->{file}->{$ext};
+ last;
+ }
+ }
+
+ foreach my $ext ( keys %{ $imports->{directory} } ) {
+ if ( -f $full_path && $full_path =~ m/$ext/i ) {
+ return $imports->{directory}->{$ext};
+ last;
+ }
+ }
+}
our $loaded;
our $filters;
my @files;
my $changes;
+
find( sub {
my $file = $File::Find::name;
- if ( -f $file && $file =~ m/\.(js(on)?|txt)$/ ) {
- $file =~ s/$data_dir\/*//;
- push @files, $file;
- } elsif ( -f $file && $file =~ m/([^\/]+)\.changes\/(\d+\.\d+.+)/ ) {
+ if ( -f $file && $file =~ m/([^\/]+)\.changes\/(\d+\.\d+.+)/ ) {
push @{ $changes->{$1} }, $2
- } elsif ( -d $file && $file =~ m/\.html$/ ) {
- $file =~ s/$data_dir\/*//;
- push @files, $file;
- } elsif ( -f $file && $file =~ m/\.csv$/i ) {
- $file =~ s/$data_dir\/*//;
- push @files, $file;
- } elsif ( -f $file && $file =~ m/\.storable/i ) {
+ } elsif ( import_module( $file ) ) {
$file =~ s/$data_dir\/*//;
push @files, $file;
} else {
no warnings qw(uninitialized); # mtime
@files = sort { $loaded->{$b}->{mtime} <=> $loaded->{$a}->{mtime} || lc $a cmp lc $b } @files,
- grep { defined $loaded->{$_}->{generated} } keys %$loaded;
+ grep { defined $loaded->{$_}->{generated} } keys %$loaded;
+
my $size;
$size->{$_} = -s "$data_dir/$_" foreach @files;
warn "__stats $nr_items\n";
- foreach my $e ( @{ $_[0] } ) {
+# foreach my $e ( @{ $_[0] } ) {
+ foreach my $i ( 0 .. $#{$_[0]} ) {
+ print STDERR " $i" if $i % 5000;
+ my $e = $_[0]->[$i];
foreach my $n ( keys %$e ) {
$stats->{$n}->{count}++;
my @v;
}
foreach my $x ( @v ) {
+ if ( ! defined $x ) { # FIXME really null
+ $stats->{$n}->{empty}++;
+ next;
+ }
+
$stats->{$n}->{numeric}++
if $x =~ m/^[-+]?([0-9]*\.[0-9]+|[0-9]+)$/;
$stats->{$n}->{empty}++
my $path = $self->_param_or_session('path');
warn "stats $path\n";
delete $loaded->{$path}->{stats};
- $self->redirect_to( '/data/columns' );
+ return $self->redirect_to( '/data/columns' );
}
return if defined $loaded->{$path}->{generated};
my $full_path = $self->app->home->rel_file( 'data/' . $path );
- $self->redirect_to('/data/index') unless -r $full_path;
+ return $self->redirect_to('/data/index') unless -r $full_path;
my $dump_path = $self->_dump_path( $path );
}
my $data;
- if ( -f $full_path ) {
- if ( $full_path =~ m/.csv/i ) {
- $data = MojoFacets::Import::CSV->new( full_path => $full_path )->data;
- } elsif ( $full_path =~ m/.storable/ ) {
- warn "open $full_path ", -s $full_path, " bytes";
- open(my $pipe, "<", $full_path) || die $!;
- while ( my $o = eval { Storable::fd_retrieve $pipe } ) {
- if ( defined $o->{item} ) {
- push @{ $data->{items} }, $o->{item};
- } else {
- warn "SKIP ",dump($o);
- }
- }
- close($pipe);
- warn "loaded ", $#{ $data->{items} } + 1, " items from $full_path\n";
- $data->{generated}++;
- } else {
- $data = MojoFacets::Import::File->new( full_path => $full_path, path => $path )->data;
- }
- } elsif ( -d $full_path && $full_path =~ m/.html/ ) {
- $data = MojoFacets::Import::HTMLTable->new( dir => $full_path )->data;
+ if ( my $module = import_module( $full_path ) ) {
+ $data = $module->new( full_path => $full_path )->data;
} else {
die "can't load $full_path";
}
sub load {
my $self = shift;
- my $path = $self->param('path') || $self->redirect_to( '/data/index' );
-
- my @paths = $self->param('paths');
+ my @paths = @{ $self->every_param('paths') };
warn "# paths ", dump @paths;
foreach my $p ( keys %$loaded ) {
$self->_load_path( $_ ) foreach @paths;
+ my $path = $self->param('path') || $self->session('path') || $paths[0] || $self->redirect_to('/data/index');
+
warn "# path $path\n";
$self->_load_path( $path );
}
$self->session( 'columns' => $loaded->{$path}->{columns} );
$self->session( 'order' => $loaded->{$path}->{columns}->[0] );
- $self->redirect_to( $redirect_to );
+ return $self->redirect_to( $redirect_to );
}
sub _loaded {
my ( $self, $name ) = @_;
my $path = $self->session('path') || $self->param('path');
- $self->redirect_to('/data/index') unless $path;
+ return $self->redirect_to('/data/index') unless $path;
if ( defined $loaded->{$path}->{modified} && $loaded->{$path}->{modified} > 1 ) {
my $caller = (caller(1))[3];
}
if ( ! defined $loaded->{$path}->{$name} ) {
warn "MISSING $name for $path\n";
- $self->redirect_to('/data/index')
+ return $self->redirect_to('/data/index')
}
}
sub __unac {
my $n = shift;
- $n = unac_string($n);
+ $n = unac_string('utf-8',$n);
$n =~ s/\W+/_/g;
return $n;
}
}
sub _export_path {
- my $max_length = 80;
-
my $self = shift;
my $path = $self->_param_or_session('path');
if ( ! $path ) {
}
my $dir = $self->app->home->rel_dir('public') . "/export/$path";
mkpath $dir unless -e $dir;
+ my $name = __export_path_name( $path, @_ );
+ my $full = $dir . '/' . $name;
+ $full =~ s/\/+$// if -d $full; # strip trailing slash for dirs
+ return $full;
+}
+
+sub __export_path_name {
+ my $max_length = 80;
+
+ my $path = shift;
my $name = join('.', map { __unac($_) } @_ );
if ( length($name) > $max_length ) {
$name = substr($name,0,$max_length) . Digest::MD5::md5_hex substr($name,$max_length);
}
- my $full = $dir . '/' . $name;
- $full =~ s/\/+$// if -d $full; # strip trailing slash for dirs
- return $full;
+ return $name;
}
sub columns {
my $self = shift;
+ my $view_path = $self->_permanent_path( 'views' );
+
if ( $self->param('columns') ) {
my @columns = $self->_param_array('columns');
write_file( $self->_permanent_path( 'columns' ), { binmode => ':utf8' }, map { "$_\n" } @columns );
- $self->redirect_to('/data/items');
+ if ( my $view = $self->param('view') ) {
+ mkdir $view_path unless -e $view_path;
+ write_file( "$view_path/$view", { binmode => ':utf8' }, map { "$_\n" } @columns );
+ warn "view $view_path/$view ", -s "$view_path/$view", " bytes\n";
+ }
+
+ return $self->redirect_to('/data/items');
+
+ } elsif ( ! $self->session('header') ) {
+ return $self->redirect_to('/data/load');
+ }
+
+ if ( my $id = $self->param('id') ) {
+ my $view_full = "$view_path/$id";
+ if ( -f $view_full ) {
+ my @columns = map { chomp; $_ } read_file $view_full, binmode => ':utf8';
+ warn "view $view_full loaded ", dump @columns;
+ $self->session( 'columns' => [ @columns ] );
+ return $self->redirect_to('/data/items');
+ }
}
my $stats = $self->_loaded( 'stats' );
push @columns, $c unless grep { /^\Q$c\E$/ } @columns;
}
+ my @views;
+ if ( -d $view_path ) {
+ @views = map { s{^\Q$view_path\E/*}{}; $_ } glob "$view_path/*";
+ warn "# views ",dump @views;
+ }
+
$self->render(
message => 'Select columns to display',
stats => $stats,
columns => \@columns,
checked => $self->_checked( $self->_param_array('columns') ),
+ views => \@views,
);
}
sub _param_array {
my ($self,$name) = @_;
- my @array = $self->param($name);
+ my @array = @{ $self->every_param($name) };
my $path = $self->session('path');
if ( @array ) {
if ( ! defined $scalar ) {
$scalar = $default;
- die "no default for $name" unless defined $scalar;
- $self->session($name => $scalar);
+ if ( defined $scalar ) {
+ $self->session($name => $scalar);
+ } else {
+ warn "no default for $name";
+ }
}
warn "# _param_scalar $name ",dump $scalar;
my $self = shift;
my $name = $self->param('filter_name') || die "name?";
- my @vals = $self->param('filter_vals');
+ my @vals = @{ $self->every_param('filter_vals') };
$self->_remove_filter( $name );
if ( @vals ) {
$self->session( 'offset' => 0 );
- $self->redirect_to('/data/items');
+ return $self->redirect_to('/data/items');
}
sub _filter_on_data {
#warn "# filter $name ",dump($filtered_items);
+ # invalidate filters on other datasets
+ foreach my $dataset ( grep { exists $loaded->{$_}->{filters}->{$name} } keys %$loaded ) {
+ delete $loaded->{$dataset}->{filters}->{$name};
+ delete $loaded->{$dataset}->{filtered};
+ }
+
$loaded->{$path}->{filters}->{$name} = $filtered_items;
warn "filter $name with ", scalar keys %$filtered_items, " items created\n";
}
our ($out, $key,$value);
+our $lookup_path_col;
+our $on;
+
+sub __commit_begin {
+ warn "__commit_begin";
+ $lookup_path_col = undef;
+ $on = undef;
+}
+
+sub __commit_end {
+ warn "__commit_end";
+ $lookup_path_col = undef; # cleanup memory
+ $on = undef;
+}
+
+sub lookup {
+ warn "# lookup ",dump @_;
+ my ( $vals, $on_path, $on_col, $code, $stat_code ) = @_;
+ die "code is not sub{ ... } but ", dump $code unless ref $code eq 'CODE';
+
+ if ( ! exists $loaded->{$on_path} ) {
+ my @possible_paths = grep { /\Q$on_path\E/ } keys %$loaded;
+ die "more than one dataset available for '$on_path' ",dump @possible_paths if $#possible_paths > 0;
+ $on_path = shift @possible_paths;
+ warn "## fuzzy selected path $on_path";
+ }
+
+ my $items = $loaded->{$on_path}->{data}->{items} || die "no items for $on_path";
+
+ if ( ! exists $lookup_path_col->{$on_path}->{$on_col} ) {
+ warn "create lookup_path_col $on_path $on_col";
+ foreach my $i ( 0 .. $#$items ) {
+ my $item = $items->[$i];
+ if ( exists $item->{$on_col} ) {
+ if ( ref $item->{$on_col} eq 'ARRAY' ) {
+ foreach my $v ( @{ $item->{$on_col} } ) {
+ push @{ $lookup_path_col->{$on_path}->{$on_col}->{$v} }, $i;
+ }
+ } elsif ( ! ref $item->{$on_col} ) { # scalar
+ my $v = $item->{$on_col};
+ push @{ $lookup_path_col->{$on_path}->{$on_col}->{$v} }, $i;
+ } else {
+ die "unknown type of ",dump $item->{$on_col};
+ }
+ }
+ }
+ warn "XXX ",dump $lookup_path_col->{$on_path}->{$on_col} if $ENV{DEBUG};
+ }
+
+ my $stat;
+ $stat = Statistics::Descriptive::Full->new() if $stat_code;
+
+ foreach my $v ( ref $vals eq 'ARRAY' ? @$vals : ( $vals ) ) {
+ foreach my $i ( @{ $lookup_path_col->{$on_path}->{$on_col}->{$v} } ) {
+ $on = $items->[$i];
+ warn "XXX lookup code $v $i ",dump $on if $ENV{DEBUG};
+ $code->($stat);
+ }
+ $stat_code->( $stat ) if $stat_code;
+ }
+}
+
sub __commit_path_code {
my ( $path, $i, $code, $commit_changed ) = @_;
#warn "__commit_path_code $path $i ",dump( $update );
}
+# uses templates/admin.html.ep
+sub _switch_dataset {
+ my $self = shift;
+
+ my $datasets;
+
+ foreach my $path ( keys %$loaded ) {
+ next unless exists $loaded->{$path}->{data};
+ push @$datasets, $path;
+ }
+
+ warn "# datasets ",dump($datasets);
+
+ $self->stash( 'datasets' => $datasets );
+}
+
sub items {
my $self = shift;
+ $self->_switch_dataset;
+
if ( my $show = $self->param('id') ) {
$self->param('show', $show);
warn "show $show\n";
}
- my $path = $self->session('path');
+ my $path = $self->_param_scalar('path');
my @columns = $self->_param_array('columns');
- $self->redirect_to('/data/columns') unless @columns;
+ return $self->redirect_to('/data/columns') unless @columns;
my $order = $self->_param_scalar('order', $columns[0]);
my $sort = $self->_param_scalar('sort', 'a');
my $offset = $self->_param_scalar('offset', 0);
$code =~ s{\r}{}gs;
$code =~ s{\n+$}{\n}s;
+ # XXX convert @row->{foo} into @{$row->{foo}}
+ $code =~ s|\@(row->{[^}]+})|\@{\$$1}|gs;
+
my $commit = $self->param('commit');
my $test = $self->param('test');
my $commit_changed;
+ __commit_begin;
if ( $code && ( $test || $commit ) ) {
# XXX find columns used in code snippet and show them to user
my $code_path = $self->app->home->rel_dir('public') . "/code";
if ( $commit ) {
+ __path_modified( $path, 'commit' );
+
warn "# commit on ", $#$filtered + 1, " items:\n$code\n";
( $key, $value, $out ) = ( 'key', 'value' );
foreach ( 0 .. $#$filtered ) {
__commit_path_code( $path, $i, $code, \$commit_changed );
}
+ # this might move before $out to recalculate stats on source dataset?
+ __path_rebuild_stats( $path );
+ my $c = { map { $_ => 1 } @columns };
+ my @added_columns = sort grep { ! $c->{$_} } keys %$commit_changed;
+ warn "# added_columns ",dump( @added_columns );
+ unshift @columns, @added_columns;
+
+ $loaded->{$path}->{columns} = [ @columns ];
+ warn "# new columns ",dump( @columns );
+
+ __invalidate_path_column( $path, $_ ) foreach keys %$commit_changed;
+
$self->_save_change({
path => $path,
time => $self->param('time') || time(),
$self->session('path', $commit_dataset);
$self->session('columns', [ @columns ]);
$self->session('order', $key);
- $self->redirect_to('/data/items');
- return; # FIXME needed to correctly show columns
+ return $self->redirect_to('/data/items');
}
- # this might move before $out to recalculate stats on source dataset?
- __path_rebuild_stats( $path );
- my $c = { map { $_ => 1 } @columns };
- my @added_columns = sort grep { ! $c->{$_} } keys %$commit_changed;
- warn "# added_columns ",dump( @added_columns );
- unshift @columns, @added_columns;
-
$self->session('columns', [ @columns ]);
- $loaded->{$path}->{columns} = [ @columns ];
- warn "# new columns ",dump( @columns );
-
- __invalidate_path_column( $path, $_ ) foreach keys %$commit_changed;
}
my $sorted_items;
if ( $self->param('export') ) {
my $export_path = $self->_export_path( 'items', @columns);
open(my $fh, '>', $export_path) || die "ERROR: can't open $export_path: $!";
+ print $fh "#",join("\t",@columns),"\n";
foreach my $f ( 0 .. $#$filtered ) {
print $fh join("\t", map {
my $i = $data->{items}->[ $filtered->[$f] ];
warn "export $export_path ", -s $export_path, " bytes\n";
}
- warn "# test_changed ",dump( $test_changed );
- my $c = { map { $_ => 1 } @columns };
- my @added_columns = sort grep { ! $c->{$_} } keys %$test_changed;
- unshift @columns, @added_columns;
+ my ( $code_depends, $code_description );
- warn "# sorted_items ", $#$sorted_items + 1, " offset $offset limit $limit order $sort";
+ if ( $test ) {
- my $code_depends = $self->param('code_depends')||
- join(',', sort grep { $test_changed->{$_} == 0 } keys %$test_changed );
- my $code_description = $self->param('code_description') ||
- join(',', @added_columns);
+ warn "# test_changed ",dump( $test_changed );
+ my $c = { map { $_ => 1 } @columns };
+ my @added_columns = sort grep { ! $c->{$_} } keys %$test_changed;
+ unshift @columns, @added_columns;
- $code_depends ||= $code_description; # self-modifing
- if ( ! $code_depends && $out ) {
- $code_depends = $key;
- $code_description = $value;
- }
+ warn "# sorted_items ", $#$sorted_items + 1, " offset $offset limit $limit order $sort";
+
+ my $depends_on;
+ my $tmp = $code; $tmp =~ s/\$row->{(['"]?)([\w\s]+)\1/$depends_on->{$2}++/gse;
+ warn "# depends_on ",dump $depends_on;
- warn "# test_changed ",dump( $test_changed, $code_depends, $code_description );
+ my $test_added = Storable::dclone $test_changed;
+ delete $test_added->{$_} foreach keys %$depends_on;
+
+ $code_depends = $self->param('code_depends')
+ || join(',', keys %$depends_on);
+
+ $code_description = $self->param('code_description') ||
+ join(',', keys %$test_added);
+
+ $code_depends ||= $code_description; # self-modifing
+ if ( ! $code_depends && $out ) {
+ $code_depends = $key;
+ $code_description = $value;
+ }
+
+ warn "# test_changed ",dump( $test_changed, $code_depends, $code_description );
+
+ } # test?
+
+ __commit_end;
$self->render(
order => $order,
my $self = shift;
$self->session('order', $self->param('order'));
$self->session('sort', $self->param('sort'));
- $self->redirect_to('/data/items');
+ return $self->redirect_to('/data/items');
}
sub _is_numeric {
sub facet {
my $self = shift;
- my $path = $self->session('path') || $self->redirect_to( '/data/index' );
+ my $path = $self->session('path') || return $self->redirect_to( '/data/index' );
if ( my $name = $self->param('remove') ) {
$self->_remove_filter( $name );
- $self->redirect_to( '/data/items' );
+ return $self->redirect_to( '/data/items' );
}
my $facet;
foreach my $i ( @$filtered ) {
my $item = $data->{items}->[$i];
- if ( ! exists $item->{$name} ) {
+ if ( ! exists $item->{$name} || ! defined $item->{$name} ) {
$facet->{ _missing }++;
} elsif ( ref $item->{$name} eq 'ARRAY' ) {
$facet->{$_}++ foreach @{ $item->{$name} };
my $dump_path = $self->_save( $path );
__path_modified( $path, 0 );
- $self->redirect_to( '/data/items' );
+ return $self->redirect_to( '/data/items' );
}
sub export {
$self->_remove_filter( $name );
$self->_filter_on_data( $name, @vals );
$self->session( 'offset' => 0 );
- $self->redirect_to('/data/items');
+ return $self->redirect_to('/data/items');
} else {
warn "UNKNOWN IMPORT $import";
}
unlink $path if -e $path;
}
- my $path = $self->_export_path || $self->redirect_to('/data/index');
+ my $path = $self->_export_path || return $self->redirect_to('/data/index');
my @files = grep { ! /\.png$/ } glob "$path/*";
my $mtime = { map { $_ => (stat($_))[9] } @files };
keys %$loaded;
}
+sub reload {
+ my $self = shift;
+ $self->stash( reload => 1 );
+ $self->remove;
+# $self->_load_path( $self->param('path') );
+ $self->redirect_to('/data/load?path=' . $self->param('path') );
+}
+
+sub remove {
+ my $self = shift;
+ my $path = $self->param('path');
+ if ( $path =~ m{^/tmp/mojo_facets\.} ) {
+ unlink $path;
+ warn "# unlink $path";
+ if ( my $name = $self->param('name') ) {
+ delete $loaded->{$name};
+ warn "# remove $name from memory";
+ }
+ } else {
+ warn "WARNING: $path unlink ignored";
+ }
+ return if $self->stash('reload');
+ return $self->redirect_to( '/data/load' );
+}
+
1;