r1386@llin: dpavlin | 2007-10-30 23:46:53 +0100
[webpac2] / lib / WebPAC / Output / Sorted.pm
diff --git a/lib/WebPAC/Output/Sorted.pm b/lib/WebPAC/Output/Sorted.pm
new file mode 100644 (file)
index 0000000..a5cc924
--- /dev/null
@@ -0,0 +1,174 @@
+package WebPAC::Output::Sorted;
+
+use warnings;
+use strict;
+
+use base qw/WebPAC::Common WebPAC::Output Class::Accessor/;
+__PACKAGE__->mk_accessors(qw(
+       path
+       database
+
+       sortex
+));
+
+use Sort::External;
+use File::Path;
+use Data::Dump qw/dump/;
+
+=head1 NAME
+
+WebPAC::Output::Sorted - create sorted lists
+
+=head1 VERSION
+
+Version 0.01
+
+=cut
+
+our $VERSION = '0.01';
+
+=head1 SYNOPSIS
+
+Create sorted with from data with type C<sorted>.
+
+=head1 FUNCTIONS
+
+=head2 new
+
+ my $output = new WebPAC::Output::Sorted({
+       path => '/path/to/sorted_dir',
+       database => 'demo',
+ });
+
+=head2 init
+
+ $output->init;
+
+=cut
+
+sub init {
+       my $self = shift;
+
+       my $log = $self->_get_logger;
+
+       foreach my $p (qw/path database/) {
+               $log->logdie("need $p") unless ($self->$p);
+       }
+
+       if ( ! -e $self->path ) {
+               mkpath $self->path || $log->logdie("can't create ", $self->path,": $!");
+               $log->info("created ", $self->path);
+       }
+
+}
+
+
+=head2 add
+
+Adds one entry
+
+  $est->add( 42, $ds );
+
+=cut
+
+sub add {
+       my $self = shift;
+
+       my ( $id, $ds ) = @_;
+
+       my $log = $self->_get_logger;
+       $log->logdie("need id") unless defined $id;
+       $log->logdie("need ds") unless $ds;
+
+       $log->debug("id: $id ds = ",dump($ds));
+
+       my $hash = $self->ds_to_hash( $ds, 'sorted' ) || return;
+
+       warn "add( $id, ",dump($ds)," ) => ", dump( $hash );
+
+       foreach my $f ( keys %$hash ) {
+
+               my $sortex = $self->{sortex}->{$f};
+
+               if ( ! $sortex ) {
+
+                       my $sortscheme = sub { $Sort::External::b <=> $Sort::External::a };
+                       $sortex = Sort::External->new(
+                               -mem_threshold   => 2**24,            # default: 2**20 (1Mb)
+                               -cache_size      => 100_000,          # default: undef (disabled) 
+#                              -sortsub         => $sortscheme,      # default sort: standard lexical
+#                              -working_dir     => $tmp,
+                       );
+
+                       $log->logdie("can't create sorted list for $f: $!") unless $sortex;
+
+                       $log->info("created sorted list for $f");
+
+                       $self->{sortex}->{$f} = $sortex;
+
+               };
+
+               my @v;
+
+               if ( ref( $hash->{$f} ) eq 'ARRAY' ) {
+                       @v = @{ $hash->{$f} };
+               } else {
+                       @v =    $hash->{$f}  ;
+               }
+
+               # we want LF in output file :-)
+               @v = map { "$_\n" } @v;
+
+               $self->{sortex}->{$f}->feed( @v );
+
+       }
+
+       return 1;
+}
+
+=head2 finish
+
+Close index
+
+ $index->finish;
+
+=cut
+
+sub finish {
+       my $self = shift;
+
+       my $log = $self->_get_logger();
+
+       $log->info("finish sorted lists");
+
+       foreach my $list ( keys %{ $self->{sortex} } ) {
+
+               my $path = $self->path . '/' . $list . '.txt';
+               $log->info("saving $list to $path");
+
+               use Fcntl;
+               $self->{sortex}->{$list}->finish( 
+                       -outfile => $path,
+                       -flags => (O_CREAT | O_WRONLY),
+               );
+       
+       }
+
+       $log->info("over with sorted lists");
+}
+
+
+=head1 AUTHOR
+
+Dobrica Pavlinusic, C<< <dpavlin@rot13.org> >>
+
+=head1 COPYRIGHT & LICENSE
+
+Copyright 2007 Dobrica Pavlinusic, All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it
+under the same terms as Perl itself.
+
+=cut
+
+1;