From: Dobrica Pavlinusic Date: Tue, 30 Oct 2007 22:46:51 +0000 (+0000) Subject: r1386@llin: dpavlin | 2007-10-30 23:46:53 +0100 X-Git-Url: http://git.rot13.org/?p=webpac2;a=commitdiff_plain;h=957ed4e3ce3f0f8b0ec04ae0402e41dd11ef4785 r1386@llin: dpavlin | 2007-10-30 23:46:53 +0100 new WebPAC::Output::Sorted to create huge sorted lists using Sort::External (to keep memory under controll) git-svn-id: svn+ssh://mjesec/home/dpavlin/svn/webpac2/trunk@920 07558da8-63fa-0310-ba24-9fe276d99e06 --- diff --git a/Makefile.PL b/Makefile.PL index 249d1f0..00ff13d 100644 --- a/Makefile.PL +++ b/Makefile.PL @@ -38,6 +38,7 @@ WriteMakefile( 'Class::Accessor' => 0, 'JSON' => 0, 'File::Spec' => 0, + 'Sort::External' => 0, }, dist => { COMPRESS => 'gzip -9f', SUFFIX => 'gz', }, clean => { FILES => 'WebPAC-* pod2html Makefile tags' }, diff --git a/lib/WebPAC/Output/Sorted.pm b/lib/WebPAC/Output/Sorted.pm new file mode 100644 index 0000000..a5cc924 --- /dev/null +++ b/lib/WebPAC/Output/Sorted.pm @@ -0,0 +1,174 @@ +package WebPAC::Output::Sorted; + +use warnings; +use strict; + +use base qw/WebPAC::Common WebPAC::Output Class::Accessor/; +__PACKAGE__->mk_accessors(qw( + path + database + + sortex +)); + +use Sort::External; +use File::Path; +use Data::Dump qw/dump/; + +=head1 NAME + +WebPAC::Output::Sorted - create sorted lists + +=head1 VERSION + +Version 0.01 + +=cut + +our $VERSION = '0.01'; + +=head1 SYNOPSIS + +Create sorted with from data with type C. + +=head1 FUNCTIONS + +=head2 new + + my $output = new WebPAC::Output::Sorted({ + path => '/path/to/sorted_dir', + database => 'demo', + }); + +=head2 init + + $output->init; + +=cut + +sub init { + my $self = shift; + + my $log = $self->_get_logger; + + foreach my $p (qw/path database/) { + $log->logdie("need $p") unless ($self->$p); + } + + if ( ! -e $self->path ) { + mkpath $self->path || $log->logdie("can't create ", $self->path,": $!"); + $log->info("created ", $self->path); + } + +} + + +=head2 add + +Adds one entry + + $est->add( 42, $ds ); + +=cut + +sub add { + my $self = shift; + + my ( $id, $ds ) = @_; + + my $log = $self->_get_logger; + $log->logdie("need id") unless defined $id; + $log->logdie("need ds") unless $ds; + + $log->debug("id: $id ds = ",dump($ds)); + + my $hash = $self->ds_to_hash( $ds, 'sorted' ) || return; + + warn "add( $id, ",dump($ds)," ) => ", dump( $hash ); + + foreach my $f ( keys %$hash ) { + + my $sortex = $self->{sortex}->{$f}; + + if ( ! $sortex ) { + + my $sortscheme = sub { $Sort::External::b <=> $Sort::External::a }; + $sortex = Sort::External->new( + -mem_threshold => 2**24, # default: 2**20 (1Mb) + -cache_size => 100_000, # default: undef (disabled) +# -sortsub => $sortscheme, # default sort: standard lexical +# -working_dir => $tmp, + ); + + $log->logdie("can't create sorted list for $f: $!") unless $sortex; + + $log->info("created sorted list for $f"); + + $self->{sortex}->{$f} = $sortex; + + }; + + my @v; + + if ( ref( $hash->{$f} ) eq 'ARRAY' ) { + @v = @{ $hash->{$f} }; + } else { + @v = $hash->{$f} ; + } + + # we want LF in output file :-) + @v = map { "$_\n" } @v; + + $self->{sortex}->{$f}->feed( @v ); + + } + + return 1; +} + +=head2 finish + +Close index + + $index->finish; + +=cut + +sub finish { + my $self = shift; + + my $log = $self->_get_logger(); + + $log->info("finish sorted lists"); + + foreach my $list ( keys %{ $self->{sortex} } ) { + + my $path = $self->path . '/' . $list . '.txt'; + $log->info("saving $list to $path"); + + use Fcntl; + $self->{sortex}->{$list}->finish( + -outfile => $path, + -flags => (O_CREAT | O_WRONLY), + ); + + } + + $log->info("over with sorted lists"); +} + + +=head1 AUTHOR + +Dobrica Pavlinusic, C<< >> + +=head1 COPYRIGHT & LICENSE + +Copyright 2007 Dobrica Pavlinusic, All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it +under the same terms as Perl itself. + +=cut + +1; diff --git a/t/5-output-sorted.t b/t/5-output-sorted.t new file mode 100755 index 0000000..b43c9be --- /dev/null +++ b/t/5-output-sorted.t @@ -0,0 +1,59 @@ +#!/usr/bin/perl -w + +use Test::More tests => 13; +use Test::Exception; +use Cwd qw/abs_path/; +use Data::Dump qw/dump/; +use blib; +use strict; + +BEGIN { +use_ok( 'WebPAC::Output::Sorted' ); +} + +my $debug = shift @ARGV; + +ok(my $abs_path = abs_path($0), "abs_path"); +$abs_path =~ s#/[^/]*$#/#; # +diag "abs_path: $abs_path"; +my $path = "$abs_path/sorted/"; + +ok(my $out = new WebPAC::Output::Sorted({ + path => $path, + database => 'test', + clean => 1, + debug => $debug, +}), "new"); + +ok( $out->init, 'init' ); + +my $ds = { + 'Source' => { + 'name' => 'Izvor: ', + 'sorted' => [ 'foo' ] + }, + 'ID' => { + 'sorted' => 'id', + }, + 'Array' => { + 'sorted' => [ qw/a1 a2 s3 a4 a5/ ], + }, +}; + +throws_ok { $out->add( ) } qr/need id/, 'add without params'; +throws_ok { $out->add( 42 ) } qr/need ds/, 'add without ds'; + +ok( $out->add( 42, $ds ), 'add 42' ); + +ok( $out->add( 99, { foo => { sorted => 'bar' } } ), 'add 99' ); + +ok( $out->add( 100, { foo => { sorted => [ qw/foo bar baz/ ] } } ), 'add 100' ); + +ok( -e $out->path, "created $path" ); + +diag $out->path," eq ",$path; +cmp_ok( $out->path, 'eq', $path, 'path' ); + +ok( $out->finish, 'finish' ); + +ok( -e "$path/test.txt", 'list' );