X-Git-Url: http://git.rot13.org/?a=blobdiff_plain;f=lib%2FMojoFacets%2FData.pm;h=e69c16e5fbac008e77afa35f88b6fbe7a2a862c5;hb=2c8d9972340cb72f8eb5c0eb1385eb32ae657144;hp=1780fee258a7e78a30acf9d8aea788eb0e3d5bec;hpb=b87950c02420a4c266229094cd9f21ead4a9162c;p=MojoFacets.git diff --git a/lib/MojoFacets/Data.pm b/lib/MojoFacets/Data.pm index 1780fee..e69c16e 100644 --- a/lib/MojoFacets/Data.pm +++ b/lib/MojoFacets/Data.pm @@ -47,11 +47,15 @@ sub index { sub _load_path { my ( $self, $path ) = @_; - return if defined $loaded->{$path}->{data}; - my $full_path = $self->app->home->rel_file( 'data/' . $path ); die "$full_path $!" unless -r $full_path; + if ( defined $loaded->{$path}->{data} ) { + my $mtime = (stat($full_path))[9]; + return if $loaded->{$path}->{mtime} == $mtime; + warn "reload $full_path, modified ", time() - $mtime, " seconds ago\n"; + } + # we could use Mojo::JSON here, but it's too slow # $data = from_json read_file $path; my $data = read_file $full_path; @@ -91,7 +95,12 @@ sub _load_path { } } my $item; - $item->{ $header[$_] || "f_$_" } = [ $v[$_] ] foreach ( 0 .. $#v ); + foreach my $i ( 0 .. $#v ) { + my $v = $v[$i]; + # unpack numeric values separated by commas + my $a = $v =~ m/\d+\s*,\s*\d+/ ? [ split(/\,\s*/,$v) ] : [ $v ]; + $item->{ $header[$i] || "f_$i" } = $a; + } push @{ $data->{items} }, $item; } } else { @@ -115,7 +124,7 @@ sub _load_path { $stats->{$n}->{numeric}++ if $x =~ m/^[-+]?([0-9]*\.[0-9]+|[0-9]+)$/; $stats->{$n}->{empty}++ - if $x =~ m/^\s*$/; + if length $x == 0; # faster than $x =~ m/^\s*$/; } } @@ -127,6 +136,16 @@ sub _load_path { if $stats->{$n}->{array} == $stats->{$n}->{count}; } + if ( ! @header ) { + if ( defined $data->{header} ) { + if ( ref $data->{header} eq 'ARRAY' ) { + @header = @{ $data->{header} }; + } else { + warn "header not array ", dump( $data->{header} ); + } + } + } + @header = sort { $stats->{$b}->{count} <=> $stats->{$a}->{count} } grep { defined $stats->{$_}->{count} } keys %$stats @@ -139,6 +158,7 @@ sub _load_path { stats => $stats, full_path => $full_path, size => -s $full_path, + mtime => (stat($full_path))[9], data => $data, }; @@ -160,12 +180,13 @@ sub load { $self->session( 'header' => $loaded->{$path}->{header} ); if ( ! defined $loaded->{$path}->{columns} ) { $self->session( 'columns' => $loaded->{$path}->{header} ); + $self->session( 'order' => $loaded->{$path}->{header}->[0] ); $self->redirect_to( '/data/columns' ); } else { $self->session( 'columns' => $loaded->{$path}->{columns} ); + $self->session( 'order' => $loaded->{$path}->{columns}->[0] ); $self->redirect_to( '/data/items' ); } - } @@ -190,6 +211,11 @@ sub _checked { sub columns { my $self = shift; + if ( $self->param('columns') ) { + $self->_perm_array('columns'); + $self->redirect_to('/data/items'); + } + my $stats = $self->_loaded( 'stats' ); # || $self->redirect_to( '/data/index' ); my @columns; @@ -223,7 +249,7 @@ sub _perm_array { die "$name not array ",dump($session); } } - warn "# $name ",dump @array; + #warn "# $name ",dump @array; return @array; } @@ -254,30 +280,61 @@ sub filter { my $name = $self->param('filter_name') || die "name?"; my @vals = $self->param('filter_vals'); - warn "# filter $name vals ",dump(@vals); - - if ( @vals ) { - $filters->{$name} = [ @vals ]; - warn "# filter + $name $#vals\n"; - } else { - warn "# filter - $name\n"; - delete $filters->{$name}; - } - - warn "# filters ",dump($filters); + $self->_remove_filter( $name ); + $self->_filter_on_data( $name, @vals ) if @vals; $self->session( 'offset' => 0 ); $self->redirect_to('/data/items'); } +sub _filter_on_data { + my ( $self, $name, @vals ) = @_; + + my $path = $self->session('path'); + + if ( ! defined $loaded->{$path}->{stats}->{ $name } ) { + warn "filter $name not found in data set"; + return; + } + + $filters->{$name} = [ @vals ]; + warn "_filter_on_data $name ", $#vals + 1, " values on $path\n"; + + my $filter_hash; + $filter_hash->{$_}++ foreach @vals; + + #warn "# filter_hash ",dump( $filter_hash ); + + my $items = $self->_loaded('data')->{items}; + + my $include_missing = defined $filter_hash->{_missing}; + my $filtered_items; + + foreach my $i ( 0 .. $#$items ) { + + if ( defined $items->[$i]->{$name} ) { + foreach my $v ( @{ $items->[$i]->{$name} } ) { + if ( defined $filter_hash->{ $v } ) { + $filtered_items->{$i}++; + } + } + } elsif ( $include_missing ) { + $filtered_items->{$i}++; + } + } + + #warn "# filter $name ",dump($filtered_items); + + $loaded->{$path}->{filters}->{$name} = $filtered_items; + warn "filter $name with ", scalar keys %$filtered_items, " items created\n"; +} sub _data_items { - my ( $self, $all ) = shift; - my $path = $self->session('path') || $self->redirect_to( '/data/index' ); + my ( $self, $all ) = @_; my $data = $self->_loaded( 'data' ); - return @{ $data->{items} } if $all; + return @{ $data->{items} } if $all == 1; my $filters = $self->_current_filters; my $filter_value; @@ -324,10 +381,45 @@ sub _current_filters { grep { defined $filters->{ $_ } } @{ $self->_loaded('header') } ); - warn "# current_filters ",dump($current_filters); + #warn "# current_filters ",dump($current_filters); return $current_filters; } +sub _data_sorted_by { + my ( $self, $order ) = @_; + + my $path = $self->session('path'); + + warn "_data_sorted_by $order from $path"; + + if ( defined $loaded->{$path}->{sorted}->{$order} ) { + return $loaded->{$path}->{sorted}->{$order}; + } + + my $data = $self->_loaded( 'data' ); + my $numeric = $self->_is_numeric($order); + my $missing = $numeric ? 0 : ''; + no warnings qw(numeric); + my $nr = 0; + my @sorted = map { + $_->[0] + } sort { + if ( $numeric ) { + $a->[1] <=> $b->[1] + } else { + $a->[1] cmp $b->[1] + } + } map { + [ $nr++, exists $_->{$order} ? join('', @{$_->{$order}}) : $missing ] + } @{ $data->{items} } + ; + + warn "sorted: $order numeric: $numeric items: ", $#sorted + 1, "\n"; + #warn "# sorted ",dump( @sorted ); + + $loaded->{$path}->{sorted}->{$order} = [ @sorted ]; +} + sub items { my $self = shift; @@ -346,28 +438,79 @@ sub items { # fix offset when changing limit $offset = int( $offset / $limit ) * $limit; - # FIXME - multi-level sort - my $numeric = $self->_is_numeric($order); - my $missing = $numeric ? 0 : ''; - no warnings qw(numeric); - my @sorted = sort { - my $v1 = exists $a->{$order} ? join('', @{$a->{$order}}) : $missing; - my $v2 = exists $b->{$order} ? join('', @{$b->{$order}}) : $missing; - ($v1,$v2) = ($v2,$v1) if $sort eq 'd'; - $numeric ? $v1 <=> $v2 : $v1 cmp $v2 ; - } $self->_data_items; + my $sorted = $self->_data_sorted_by( $order ); + + my @filter_names; + if ( $filters ) { + foreach my $name ( keys %$filters ) { + if ( ! defined $loaded->{$path}->{stats}->{ $name } ) { + warn "skip filter $name not found in $path\n"; + next; + } + push @filter_names, $name; + } + warn "filter_names ",dump( @filter_names ); + foreach my $name ( @filter_names ) { + next if ref $loaded->{$path}->{filters}->{$name} eq 'ARRAY'; + $self->_filter_on_data( $name, @{ $filters->{$name} } ); + } + } + + my $all_filters = join(' ',sort @filter_names,'order:',$order); + +# warn "# all_filters $all_filters ", dump( $loaded->{$path}->{filtered}->{$all_filters} ); + + if ( ! defined $loaded->{$path}->{filtered}->{$all_filters} ) { + + my $path_filters = $loaded->{$path}->{filters}; + + warn "create combined filter for $all_filters\n"; + + my @filtered; + foreach my $i ( 0 .. $#$sorted ) { + my $pos = $sorted->[$i]; + + if ( $#filter_names == -1 ) { + push @filtered, $pos; + next; + } + + my $skip = 0; + foreach ( @filter_names ) { + $skip ||= 1 if ! defined $path_filters->{$_}->{$pos}; + } + next if $skip; + + push @filtered, $pos; + } + + $loaded->{$path}->{filtered}->{$all_filters} = [ @filtered ]; + } -# warn "# sorted ", dump @sorted; + my $filtered = $loaded->{$path}->{filtered}->{$all_filters} + if defined $loaded->{$path}->{filtered}->{$all_filters}; - my $rows = $#sorted + 1; + warn "all_filters $all_filters produced ", $#$filtered + 1, " items\n" if $filtered; + + my $sorted_items; + my $data = $self->_loaded('data'); + my $from_end = $sort eq 'd' ? $#$filtered : 0; + foreach ( 0 .. $limit ) { + my $i = $_ + $offset; + last unless defined $filtered->[$i]; + $i = $from_end - $i if $from_end; + push @$sorted_items, $data->{items}->[ $filtered->[$i] ]; + } + + warn "# sorted_items ", $#$sorted_items + 1, " offset $offset limit $limit order $sort"; $self->render( order => $order, offset => $offset, limit => $limit, - sorted => [ splice @sorted, $offset, $limit ], + sorted => $sorted_items, columns => [ @columns ], - rows => $rows, + rows => $#$filtered + 1, numeric => { map { $_, $self->_is_numeric($_) } @columns }, filters => $self->_current_filters, ); @@ -394,28 +537,61 @@ sub _is_numeric { $stats->{$name}->{numeric} > $count / 2; } +sub _remove_filter { + my ($self,$name) = @_; + warn "_remove_filter $name\n"; + + my $path = $self->session('path'); + + delete $filters->{$name}; + delete $loaded->{$path}->{filters}->{$name}; + warn "filters left: ", keys %{ $loaded->{$path}->{filters} }; + + foreach ( + grep { /\b$name\b/ } + keys %{ $loaded->{$path}->{filtered} } + ) { + delete $loaded->{$path}->{filtered}->{$_}; + warn "remove filtered cache $_"; + } +} + sub facet { my $self = shift; my $path = $self->session('path') || $self->redirect_to( '/data/index' ); - if ( my $remove = $self->param('remove') ) { - delete $filters->{$remove}; + if ( my $name = $self->param('remove') ) { + $self->_remove_filter( $name ); $self->redirect_to( '/data/items' ); } my $facet; my $name = $self->param('name') || die "no name"; - my $all = $self->_perm_scalar('all', 0); + my $all = $self->_perm_scalar('all', 1); + my $data = $self->_loaded('data'); + + my $filters = $self->_current_filters; + my $all_filters = join(' ',sort keys %$filters,'order:',$self->session('order')); + my $filtered = $loaded->{$path}->{filtered}->{$all_filters} + if defined $loaded->{$path}->{filtered}->{$all_filters}; - foreach my $i ( $self->_data_items($all) ) { - if ( ! exists $i->{$name} ) { + if ( ! $filtered || $all ) { + $filtered = [ 0 .. $#{ $data->{items} } ]; + warn "filter all values\n"; + } else { + warn "filter using $all_filters\n"; + } + + foreach my $i ( @$filtered ) { + my $item = $data->{items}->[$i]; + if ( ! exists $item->{$name} ) { $facet->{ _missing }++; - } elsif ( ref $i->{$name} eq 'ARRAY' ) { - $facet->{$_}++ foreach @{ $i->{$name} }; + } elsif ( ref $item->{$name} eq 'ARRAY' ) { + $facet->{$_}++ foreach @{ $item->{$name} }; } else { - $facet->{ $i->{$name} }++; + $facet->{ $item->{$name} }++; } } @@ -429,22 +605,26 @@ sub facet { $checked = $self->_checked( @{ $filters->{$name} } ) if defined $filters->{$name}; - my $sort = $self->param('sort') || 'c'; - - # sort facet numerically if more >50% elements are numeric my $numeric = $self->_is_numeric($name); + my $sort = $self->param('sort'); + # sort numeric facets with more than 5 values ascending + $sort ||= $numeric && $#facet_names > 4 ? 'a' : 'c'; + @facet_names = sort { - if ( $sort =~ m/a/i ) { - $numeric ? $a <=> $b : lc $a cmp lc $b; - } elsif ( $sort =~ m/d/i ) { - $numeric ? $b <=> $a : lc $b cmp lc $a; - } elsif ( $sort =~ m/c/i ) { - $facet->{$b} <=> $facet->{$a}; + my $result; + if ( $sort eq 'a' ) { + $result = $numeric ? $a <=> $b : lc $a cmp lc $b; + } elsif ( $sort eq 'd' ) { + $result = $numeric ? $b <=> $a : lc $b cmp lc $a; + } elsif ( $sort eq 'c' ) { + $result = ( $facet->{$b} || -1 ) <=> ( $facet->{$a} || -1 ) } else { warn "unknown sort: $sort"; - $a cmp $b; + $result = $a cmp $b; } + $result = $a cmp $b unless defined $result; # FIXME cludge for numeric facets with invalid data + $result; } @facet_names; $self->render( name => $name, facet => $facet, checked => $checked, @@ -452,5 +632,13 @@ sub facet { ); } +sub edit { + my $self = shift; + my $content = $self->param('content'); + + $self->render( + content => $content + ); +} 1;