1 package WebPAC::Input::Excel;
6 use Spreadsheet::ParseExcel;
7 use Spreadsheet::ParseExcel::Utility qw/int2col/;
8 use base qw/WebPAC::Common/;
9 use Text::Unaccent::PurePerl qw/unac_string/;
10 use Data::Dump qw/dump/;
14 WebPAC::Input::Excel - support for Microsoft Excel and compatibile files
18 our $VERSION = '0.06';
23 Open Microsoft Excell, or compatibile format (for e.g. from OpenOffice.org
24 or Gnuemeric) in C<.xls> format.
30 Returns handle to database and size
32 my $excel = new WebPAC::Input::Excel(
33 path => '/path/to/workbook.xls'
34 worksheet => 'name of sheet',
39 C<worksheet> is case and white-space insensitive name of worksheet in Excel
40 file to use. If not specified, name of input is used. If none of those
41 methods returned sheet, first worksheet in file is used instead.
43 C<from> and C<to> specify row numbers to start and finish import.
52 my $log = $self->_get_logger();
54 $log->logdie("can't open excel file $self->{path}: $!") unless (-r $self->{path} && -f $self->{path});
56 my $workbook = Spreadsheet::ParseExcel::Workbook->Parse($self->{path});
59 my $wanted_worksheet = $self->{worksheet} || $self->{name};
61 if ($wanted_worksheet) {
64 $sheet = shift @{ $workbook->{Worksheet} };
65 $log->logdie("can't find sheet '$wanted_worksheet' in $self->{path}\n") unless (defined($sheet));
66 $name = $sheet->{Name};
68 } until ($name =~ m/^\s*\Q$wanted_worksheet\E\s*$/i);
72 $sheet ||= shift @{ $workbook->{Worksheet} };
74 $self->{sheet} = $sheet;
76 $self->{from} ||= $sheet->{MinRow};
77 $self->{to} ||= $sheet->{MaxRow};
79 my $size = $self->{to} - $self->{from};
80 $self->{size} = $size;
82 $log->warn("opening Excel file '$self->{path}', using ",
83 $wanted_worksheet ? '' : 'first ',
84 "worksheet: $sheet->{Name} [$size rows]"
87 $self ? return $self : return undef;
92 Return record with ID C<$mfn> from database
94 my $rec = $self->fetch_rec( $mfn );
96 Columns are named C<A>, C<B> and so on...
105 my $log = $self->_get_logger();
107 my $sheet = $self->{sheet};
108 $log->logdie("can't find sheet hash") unless (defined($sheet));
109 $log->logdie("sheet hash isn't Spreadsheet::ParseExcel::Worksheet") unless ($sheet->isa('Spreadsheet::ParseExcel::Worksheet'));
113 my $row = $self->{from} + $mfn - 1;
115 $log->debug("fetch_rec( $mfn ) row: $row cols: ",$sheet->{MinCol}," - ",$sheet->{MaxCol});
117 foreach my $col ( $sheet->{MinCol} ... $sheet->{MaxCol} ) {
118 my $v = $sheet->{Cells}->[$row]->[$col]->{_Value}; ## XXX _Value = formatted | Val = unformated !
119 $rec->{ int2col($col) } = $v if defined $v;
122 # add mfn only to records with data
123 $rec->{'000'} = [ $mfn ] if ($rec);
130 Return number of records in database
132 my $size = $isis->size;
138 return $self->{size};
145 my ($self,$mfn) = @_;
147 my $log = $self->_get_logger();
149 my $sheet = $self->{sheet};
157 foreach ( $sheet->{MinCol} ... $sheet->{MaxCol} ) {
158 my $label = $sheet->{Cells}->[0]->[$_]->{_Value};
159 last if length($label) == 0;
160 push @labels, $label;
163 my $t = unac_string($_);
164 $t =~ s{[^a-z0-9]+}{_}gi;
168 $labels .= "$t\t$_\n";
172 $log->info("columns = ", dump( @names ), " labels = ", dump( @labels ) );
175 '_labels' => [ @labels ],
176 '_names' => [ @names ],
179 my $path = $self->{labels} || 'var/labels.txt';
182 open(my $fh, '>:raw', $path) || die "$path: $!";
186 $log->info("created labels $path ", -s $path, " bytes");
190 my $row = $self->{from} + $mfn - 1;
193 foreach ( $sheet->{MinCol} ... $sheet->{MaxCol} ) {
194 my $name = $names[$_];
196 my $v = $sheet->{Cells}->[$row]->[$_]->{_Value};
197 $data->{ $name } = $v;
198 $ds->{ $name } = { search => [ $v ] } if defined $v;
201 $ds->{'_rows'} = { $self->{sheet}->{Name} => [ $data ] };
208 Dobrica Pavlinusic, C<< <dpavlin@rot13.org> >>
210 =head1 COPYRIGHT & LICENSE
212 Copyright 2005-2006 Dobrica Pavlinusic, All Rights Reserved.
214 This program is free software; you can redistribute it and/or modify it
215 under the same terms as Perl itself.
219 1; # End of WebPAC::Input::Excel