- version bump [0.06]
authorDobrica Pavlinusic <dpavlin@rot13.org>
Tue, 9 Jun 2009 21:41:12 +0000 (21:41 +0000)
committerDobrica Pavlinusic <dpavlin@rot13.org>
Tue, 9 Jun 2009 21:41:12 +0000 (21:41 +0000)
- produce column names and labels for vhost/webpac2.cgi
- implement normalize callback which put Excel data into _rows and search values

git-svn-id: svn+ssh://mjesec/home/dpavlin/svn/webpac2/trunk@1222 07558da8-63fa-0310-ba24-9fe276d99e06

lib/WebPAC/Input/Excel.pm

index 6143791..0bb8ae2 100644 (file)
@@ -6,6 +6,8 @@ use strict;
 use Spreadsheet::ParseExcel;
 use Spreadsheet::ParseExcel::Utility qw/int2col/;
 use base qw/WebPAC::Common/;
+use Text::Unaccent::PurePerl qw/unac_string/;
+use Data::Dump qw/dump/;
 
 =head1 NAME
 
@@ -13,7 +15,7 @@ WebPAC::Input::Excel - support for Microsoft Excel and compatibile files
 
 =cut
 
-our $VERSION = '0.05';
+our $VERSION = '0.06';
 
 
 =head1 SYNOPSIS
@@ -113,9 +115,8 @@ sub fetch_rec {
        $log->debug("fetch_rec( $mfn ) row: $row cols: ",$sheet->{MinCol}," - ",$sheet->{MaxCol});
 
        foreach my $col ( $sheet->{MinCol} ... $sheet->{MaxCol} ) {
-               if (my $v = $sheet->{Cells}->[$row]->[$col]->{_Value}) {        ## XXX _Value = formatted | Val = unformated !
-                       $rec->{ int2col($col) } = $v;
-               }
+               my $v = $sheet->{Cells}->[$row]->[$col]->{_Value};      ## XXX _Value = formatted | Val = unformated !
+               $rec->{ int2col($col) } = $v if defined $v;
        }
 
        # add mfn only to records with data
@@ -137,8 +138,6 @@ sub size {
        return $self->{size};
 }
 
-sub default_encoding { 'UTF-16' }
-
 our @labels;
 our @names;
 
@@ -149,17 +148,59 @@ sub normalize {
 
        my $sheet = $self->{sheet};
 
+       my $ds;
+
        if ( ! @labels ) {
-               push @labels, $sheet->{Cells}->[0]->[$_]->{_Value}
-               foreach ( $sheet->{MinCol} ... $sheet->{MaxCol} )
-               ;
-               @names = map { s{\W+}{_}; $_ } @labels;
-               $log->loginfo("column labels:", @labels, @names);
+
+               my $labels;
+
+               foreach ( $sheet->{MinCol} ... $sheet->{MaxCol} ) {
+                       my $label = $sheet->{Cells}->[0]->[$_]->{_Value};
+                       last if length($label) == 0;
+                       push @labels, $label;
+               }
+               @names = map {
+                       my $t = unac_string($_);
+                       $t =~ s{[^a-z0-9]+}{_}gi;
+                       $t =~ s{_+$}{};
+                       $t =~ s{^_+}{};
+                       $t = lc($t);
+                       $labels .= "$t\t$_\n";
+                       $t;
+               } @labels;
+
+               $log->info("columns = ", dump( @names ), " labels = ", dump( @labels ) );
+
+               $ds = {
+                       '_labels' => [ @labels ],
+                       '_names' => [ @names ],
+               };
+
+               my $path = $self->{labels} || 'var/labels.txt';
+               {
+warn $labels;
+                       open(my $fh, '>:raw', $path) || die "$path: $!";
+                       print $fh $labels;
+                       close $fh;
+               }
+               $log->info("created labels $path ", -s $path, " bytes");
        }
 
+
        my $row = $self->{from} + $mfn - 1;
 
-       
+       my $data;
+       foreach ( $sheet->{MinCol} ... $sheet->{MaxCol} ) {
+               my $name = $names[$_];
+               next unless $name;
+               my $v = $sheet->{Cells}->[$row]->[$_]->{_Value};
+               $data->{ $name } = $v;
+               $ds->{ $name } = { search => [ $v ] } if defined $v;
+       }
+
+       $ds->{'_rows'} = { $self->{sheet}->{Name} => [ $data ] };
+
+       return $ds;
 }
 
 =head1 AUTHOR