allow spaces in $row->{'field with space'}
[MojoFacets.git] / lib / MojoFacets / Data.pm
index 86d71bc..6a55286 100644 (file)
@@ -14,10 +14,13 @@ use Storable;
 use Time::HiRes qw(time);
 use File::Path qw(mkpath);
 use Text::Unaccent::PurePerl;
+use Digest::MD5;
 
 use MojoFacets::Import::File;
 use MojoFacets::Import::HTMLTable;
 use MojoFacets::Import::CSV;
+use MojoFacets::Import::CouchDB;
+use MojoFacets::Import::SQL;
 
 our $loaded;
 our $filters;
@@ -40,10 +43,7 @@ sub index {
                } elsif ( -d $file && $file =~ m/\.html$/ ) {
                        $file =~ s/$data_dir\/*//;
                        push @files, $file;
-               } elsif ( -f $file && $file =~ m/\.csv$/i ) {
-                       $file =~ s/$data_dir\/*//;
-                       push @files, $file;
-               } elsif ( -f $file && $file =~ m/\.storable/i ) {
+               } elsif ( -f $file && $file =~ m/\.(csv|storabe|couchdb|sql)$/i ) {
                        $file =~ s/$data_dir\/*//;
                        push @files, $file;
                } else {
@@ -160,17 +160,17 @@ sub stats {
        my $path = $self->_param_or_session('path');
        warn "stats $path\n";
        delete $loaded->{$path}->{stats};
-       $self->redirect_to( '/data/columns' );
+       return $self->redirect_to( '/data/columns' );
 }
 
 
 sub _load_path {
        my ( $self, $path ) = @_;
 
-       return if defined $loaded->{$path}->{'generated'};
+       return if defined $loaded->{$path}->{generated};
 
        my $full_path = $self->app->home->rel_file( 'data/' . $path );
-       $self->redirect_to('/data/index') unless -r $full_path;
+       return $self->redirect_to('/data/index') unless -r $full_path;
 
        my $dump_path = $self->_dump_path( $path );
 
@@ -189,6 +189,10 @@ sub _load_path {
        if ( -f $full_path ) {
                if ( $full_path =~ m/.csv/i ) {
                        $data = MojoFacets::Import::CSV->new( full_path => $full_path )->data;
+               } elsif ( $full_path =~ m/.sql/i ) {
+                       $data = MojoFacets::Import::SQL->new( full_path => $full_path )->data;
+               } elsif ( $full_path =~ m/.couchdb/i ) {
+                       $data = MojoFacets::Import::CouchDB->new( full_path => $full_path )->data;
                } elsif ( $full_path =~ m/.storable/ ) {
                        warn "open $full_path ", -s $full_path, " bytes";
                        open(my $pipe, "<", $full_path) || die $!;
@@ -201,6 +205,7 @@ sub _load_path {
                        }
                        close($pipe);
                        warn "loaded ", $#{ $data->{items} } + 1, " items from $full_path\n";
+                       $data->{generated}++;
                } else {
                        $data = MojoFacets::Import::File->new( full_path => $full_path, path => $path )->data;
                }
@@ -234,10 +239,11 @@ sub _load_path {
                size => -s $full_path,
                mtime => (stat($full_path))[9],
                data => $data,
+               defined $data->{generated} ? ( generated => 1 ) : (),
        };
 
        $loaded->{ $path } = $info;
-       $self->_save( $path );
+       $self->_save( $path ) unless $info->{generated};
 
 }
 
@@ -245,7 +251,7 @@ sub _load_path {
 sub load {
        my $self = shift;
 
-       my $path = $self->param('path') || $self->redirect_to( '/data/index' );
+       my $path = $self->param('path') || return $self->redirect_to( '/data/index' );
 
        my @paths = $self->param('paths');
        warn "# paths ", dump @paths;
@@ -280,14 +286,14 @@ sub load {
        }
        $self->session( 'columns' => $loaded->{$path}->{columns} );
        $self->session( 'order'   => $loaded->{$path}->{columns}->[0] );
-       $self->redirect_to( $redirect_to );
+       return $self->redirect_to( $redirect_to );
 }
 
 
 sub _loaded {
        my ( $self, $name ) = @_;
        my $path = $self->session('path') || $self->param('path');
-       $self->redirect_to('/data/index') unless $path;
+       return $self->redirect_to('/data/index') unless $path;
 
        if ( defined $loaded->{$path}->{modified} && $loaded->{$path}->{modified} > 1 ) {
                my $caller = (caller(1))[3];
@@ -316,7 +322,7 @@ sub _loaded {
                }
                if ( ! defined $loaded->{$path}->{$name} ) {
                        warn "MISSING $name for $path\n";
-                       $self->redirect_to('/data/index')
+                       return $self->redirect_to('/data/index')
                }
        }
 
@@ -365,19 +371,51 @@ sub _export_path {
        }
        my $dir = $self->app->home->rel_dir('public') . "/export/$path";
        mkpath $dir unless -e $dir;
-       my $name = join('.', map { __unac($_) } @_ );
+       my $name = __export_path_name( $path, @_ );
        my $full = $dir . '/' . $name;
        $full =~ s/\/+$// if -d $full; # strip trailing slash for dirs
        return $full;
 }
 
+sub __export_path_name {
+       my $max_length = 80;
+
+       my $path = shift;
+       my $name = join('.', map { __unac($_) } @_ );
+       if ( length($name) > $max_length ) {
+               $name = substr($name,0,$max_length) . Digest::MD5::md5_hex substr($name,$max_length);
+       }
+       return $name;
+}
+
 sub columns {
     my $self = shift;
 
+       my $view_path = $self->_permanent_path( 'views' );
+
        if ( $self->param('columns') ) {
                my @columns = $self->_param_array('columns');
                write_file( $self->_permanent_path( 'columns' ), { binmode => ':utf8' }, map { "$_\n" } @columns );
-               $self->redirect_to('/data/items');
+               if ( my $view = $self->param('view') ) {
+                       mkdir $view_path unless -e $view_path;
+                       write_file( "$view_path/$view", { binmode => ':utf8' }, map { "$_\n" } @columns );
+                       warn "view $view_path/$view ", -s "$view_path/$view", " bytes\n";
+               }
+
+               return $self->redirect_to('/data/items');
+
+       } elsif ( ! $self->session('header') ) {
+               return $self->redirect_to('/data/load');
+       }
+
+       if ( my $id = $self->param('id') ) {
+               my $view_full = "$view_path/$id";
+               if ( -f $view_full ) {
+                       my @columns = map { chomp; $_ } read_file $view_full, binmode => ':utf8';
+                       warn "view $view_full loaded ", dump @columns;
+                       $self->session( 'columns' => [ @columns ] );
+                       return $self->redirect_to('/data/items');
+               }
        }
 
        my $stats = $self->_loaded( 'stats' );
@@ -389,11 +427,18 @@ sub columns {
                push @columns, $c unless grep { /^\Q$c\E$/ } @columns;
        }
 
+       my @views;
+       if ( -d $view_path ) {
+               @views = map { s{^\Q$view_path\E/*}{}; $_ } glob "$view_path/*";
+               warn "# views ",dump @views;
+       }
+
     $self->render(
                message => 'Select columns to display',
                stats => $stats,
                columns => \@columns,
                checked => $self->_checked( $self->_param_array('columns') ),
+               views => \@views,
        );
 }
 
@@ -456,7 +501,7 @@ sub filter {
 
        $self->session( 'offset' => 0 );
 
-       $self->redirect_to('/data/items');
+       return $self->redirect_to('/data/items');
 }
 
 sub _filter_on_data {
@@ -499,6 +544,12 @@ sub _filter_on_data {
 
        #warn "# filter $name ",dump($filtered_items);
 
+       # invalidate filters on other datasets
+       foreach my $dataset ( grep { exists $loaded->{$_}->{filters}->{$name} } keys %$loaded ) {
+               delete $loaded->{$dataset}->{filters}->{$name};
+               delete $loaded->{$dataset}->{filtered};
+       }
+
        $loaded->{$path}->{filters}->{$name} = $filtered_items;
        warn "filter $name with ", scalar keys %$filtered_items, " items created\n";
 }
@@ -581,18 +632,36 @@ sub __commit_path_code {
        #warn "__commit_path_code $path $i ",dump( $update );
 }
 
+# uses templates/admin.html.ep
+sub _switch_dataset {
+       my $self = shift;
+
+       my $datasets;
+
+       foreach my $path ( keys %$loaded ) {
+               next unless exists $loaded->{$path}->{data};
+               push @$datasets, $path;
+       }
+
+       warn "# datasets ",dump($datasets);
+
+       $self->stash( 'datasets' => $datasets );
+}
+
 sub items {
        my $self = shift;
 
+       $self->_switch_dataset;
+
        if ( my $show = $self->param('id') ) {
                $self->param('show', $show);
                warn "show $show\n";
        }
 
-       my $path = $self->session('path');
+       my $path = $self->_param_scalar('path');
 
        my @columns = $self->_param_array('columns');
-       $self->redirect_to('/data/columns') unless @columns;
+       return $self->redirect_to('/data/columns') unless @columns;
        my $order   = $self->_param_scalar('order', $columns[0]);
        my $sort    = $self->_param_scalar('sort', 'a');
        my $offset  = $self->_param_scalar('offset', 0);
@@ -693,6 +762,18 @@ sub items {
                        __commit_path_code( $path, $i, $code, \$commit_changed );
                }
 
+               # this might move before $out to recalculate stats on source dataset?
+               __path_rebuild_stats( $path );
+               my $c = { map { $_ => 1 } @columns };
+               my @added_columns = sort grep { ! $c->{$_} } keys %$commit_changed;
+               warn "# added_columns ",dump( @added_columns );
+               unshift @columns, @added_columns;
+
+               $loaded->{$path}->{columns} = [ @columns ];
+               warn "# new columns ",dump( @columns );
+
+               __invalidate_path_column( $path, $_ ) foreach keys %$commit_changed;
+
                $self->_save_change({
                        path => $path,
                        time => $self->param('time') || time(),
@@ -755,22 +836,10 @@ sub items {
                        $self->session('path', $commit_dataset);
                        $self->session('columns', [ @columns ]);
                        $self->session('order', $key);
-                       $self->redirect_to('/data/items');
-                       return; # FIXME needed to correctly show columns
+                       return $self->redirect_to('/data/items');
                }
 
-               # this might move before $out to recalculate stats on source dataset?
-               __path_rebuild_stats( $path );
-               my $c = { map { $_ => 1 } @columns };
-               my @added_columns = sort grep { ! $c->{$_} } keys %$commit_changed;
-               warn "# added_columns ",dump( @added_columns );
-               unshift @columns, @added_columns;
-
                $self->session('columns', [ @columns ]);
-               $loaded->{$path}->{columns} = [ @columns ];
-               warn "# new columns ",dump( @columns );
-
-               __invalidate_path_column( $path, $_ ) foreach keys %$commit_changed;
        }
 
        my $sorted_items;
@@ -803,7 +872,8 @@ sub items {
 
        if ( $self->param('export') ) {
                my $export_path = $self->_export_path( 'items', @columns);
-               open(my $fh, '>', $export_path) || warn "ERROR: can't open $export_path: $!";
+               open(my $fh, '>', $export_path) || die "ERROR: can't open $export_path: $!";
+               print $fh "#",join("\t",@columns),"\n";
                foreach my $f ( 0 .. $#$filtered ) {
                        print $fh join("\t", map {
                                my $i = $data->{items}->[ $filtered->[$f] ];
@@ -813,6 +883,8 @@ sub items {
                                } elsif ( ref $i->{$_} eq 'ARRAY' ) {
                                        $v =join(',', @{ $i->{$_} });
                                        $v = '\N' if length($v) == 0;
+                               } elsif ( ! ref $i->{$_} ) {
+                                       $v = $i->{$_};
                                } else {
                                        $v = dump $i->{$_};
                                }
@@ -823,25 +895,39 @@ sub items {
                warn "export $export_path ", -s $export_path, " bytes\n";
        }
 
-       warn "# test_changed ",dump( $test_changed );
-       my $c = { map { $_ => 1 } @columns };
-       my @added_columns = sort grep { ! $c->{$_} } keys %$test_changed;
-       unshift @columns, @added_columns;
+       my ( $code_depends, $code_description );
 
-       warn "# sorted_items ", $#$sorted_items + 1, " offset $offset limit $limit order $sort";
+       if ( $test ) {
 
-       my $code_depends = $self->param('code_depends')||
-       join(',', sort grep { $test_changed->{$_} == 0 } keys %$test_changed );
-       my $code_description = $self->param('code_description') ||
-       join(',', @added_columns);
+               warn "# test_changed ",dump( $test_changed );
+               my $c = { map { $_ => 1 } @columns };
+               my @added_columns = sort grep { ! $c->{$_} } keys %$test_changed;
+               unshift @columns, @added_columns;
 
-       $code_depends ||= $code_description; # self-modifing
-       if ( ! $code_depends && $out ) {
-               $code_depends = $key;
-               $code_description = $value;
-       }
+               warn "# sorted_items ", $#$sorted_items + 1, " offset $offset limit $limit order $sort";
+
+               my $depends_on;
+               my $tmp = $code; $tmp =~ s/\$row->{(['"]?)([\w\s]+)\1/$depends_on->{$2}++/gse;
+               warn "# depends_on ",dump $depends_on;
+
+               my $test_added = Storable::dclone $test_changed;
+               delete $test_added->{$_} foreach keys %$depends_on;
 
-       warn "# test_changed ",dump( $test_changed, $code_depends, $code_description );
+               $code_depends = $self->param('code_depends')
+               || join(',', keys %$depends_on);
+
+               $code_description = $self->param('code_description') ||
+               join(',', keys %$test_added);
+
+               $code_depends ||= $code_description; # self-modifing
+               if ( ! $code_depends && $out ) {
+                       $code_depends = $key;
+                       $code_description = $value;
+               }
+
+               warn "# test_changed ",dump( $test_changed, $code_depends, $code_description );
+
+       } # test?
 
        $self->render(
                order => $order,
@@ -868,7 +954,7 @@ sub order {
        my $self = shift;
        $self->session('order', $self->param('order'));
        $self->session('sort', $self->param('sort'));
-       $self->redirect_to('/data/items');
+       return $self->redirect_to('/data/items');
 }
 
 sub _is_numeric {
@@ -911,11 +997,11 @@ sub _remove_filter {
 sub facet {
        my $self = shift;
 
-       my $path = $self->session('path') || $self->redirect_to( '/data/index' );
+       my $path = $self->session('path') || return $self->redirect_to( '/data/index' );
 
        if ( my $name = $self->param('remove') ) {
                $self->_remove_filter( $name );
-               $self->redirect_to( '/data/items' );
+               return $self->redirect_to( '/data/items' );
        }
 
        my $facet;
@@ -947,15 +1033,21 @@ sub facet {
                }
        }
 
+       my $checked_values = $self->_checked( @{ $filters->{$name} } ) if defined $filters->{$name};
+
        if ( my $code = $self->param('code') ) {
                my $out;
-               foreach my $v ( keys %$facet ) {
-                       my $c = $facet->{$v};
+               foreach my $value ( keys %$facet ) {
+                       my $count = $facet->{$value};
+                       my $checked = $checked_values->{$value};
                        eval $code;
                        if ( $@ ) {
                                $out = $@;
                                warn "ERROR: $@\n$code\n";
                                last;
+                       } elsif ( $checked != $checked_values->{$value} ) {
+                               warn "checked $value $count -> $checked\n";
+                               $checked_values->{$value} = $checked;
                        }
                }
                warn "out ",dump( $out );
@@ -964,14 +1056,11 @@ sub facet {
 
 #      warn "# facet $name ",dump $facet;
 
-       my $checked;
        my @facet_names =
                  $all                      ? keys %$facet
                : defined $filters->{$name} ? @{ $filters->{$name} }
                : keys %$facet;
 
-       $checked = $self->_checked( @{ $filters->{$name} } ) if defined $filters->{$name};
-
        my $numeric = $self->_is_numeric($name);
 
        my $sort = $self->param('sort');
@@ -994,7 +1083,7 @@ sub facet {
                $result;
        } @facet_names;
 
-       $self->render( name => $name, facet => $facet, checked => $checked,
+       $self->render( name => $name, facet => $facet, checked => $checked_values,
                facet_names => \@facet_names, sort => $sort, numeric => $numeric,
        );
 }
@@ -1113,7 +1202,7 @@ sub save {
        my $dump_path = $self->_save( $path );
        __path_modified( $path, 0 );
 
-       $self->redirect_to( '/data/items' );
+       return $self->redirect_to( '/data/items' );
 }
 
 sub export {
@@ -1132,7 +1221,7 @@ sub export {
                        $self->_remove_filter( $name );
                        $self->_filter_on_data( $name, @vals );
                        $self->session( 'offset' => 0 );
-                       $self->redirect_to('/data/items');
+                       return $self->redirect_to('/data/items');
                } else {
                        warn "UNKNOWN IMPORT $import";
                }
@@ -1145,7 +1234,7 @@ sub export {
                unlink $path if -e $path;
        }
 
-       my $path = $self->_export_path || $self->redirect_to('/data/index');
+       my $path = $self->_export_path || return $self->redirect_to('/data/index');
 
        my @files = grep { ! /\.png$/ } glob "$path/*";
        my $mtime = { map { $_ => (stat($_))[9] } @files };
@@ -1159,4 +1248,20 @@ sub __loaded_paths {
                keys %$loaded;
 }
 
+sub remove {
+       my $self = shift;
+       my $path = $self->param('path');
+       if ( $path =~ m{^/tmp/mojo_facets\.} ) {
+               unlink $path;
+               warn "# unlink $path";
+               if ( my $name = $self->param('name') ) {
+                       delete $loaded->{$name};
+                       warn "# remove $name from memory";
+               }
+       } else {
+               warn "WARNING: $path unlink ignored";
+       }
+       return $self->redirect_to( '/data/load' );
+}
+
 1;