added header_first to WebPAC::Input::CSV
[webpac2] / lib / WebPAC / Input / CSV.pm
1 package WebPAC::Input::CSV;
2
3 use warnings;
4 use strict;
5
6 use WebPAC::Input;
7 use base qw/WebPAC::Common/;
8
9 use Text::CSV;
10 use Encode;
11 use Data::Dump qw/dump/;
12
13 =head1 NAME
14
15 WebPAC::Input::CSV - support for CSV Export Format
16
17 =cut
18
19 our $VERSION = '0.02';
20
21 =head1 FUNCTIONS
22
23 =head2 new
24
25 Returns new low-level input API object
26
27   my $input = new WebPAC::Input::CSV(
28         path => '/path/to/records.csv',
29         header_first => 1,
30   );
31
32 Options:
33
34 =over 4
35
36 =item path
37
38 path to CSV file
39
40 =back
41
42 Default encoding of input file is C<utf-8>
43
44 C<header_first> will use first line as header names.
45
46 =cut
47
48 sub new {
49         my $class = shift;
50         my $self = {@_};
51         bless($self, $class);
52
53         my $arg = {@_};
54
55         my $log = $self->_get_logger();
56
57         open( my $fh, '<:raw', $arg->{path} ) || $log->logconfess("can't open $arg->{path}: $!");
58
59         my $csv = Text::CSV->new({ binary => 1 });
60
61         $self->{size} = 0;
62
63         if ( $self->{header_first} ) {
64                 my $line = $csv->getline( $fh );
65                 $self->{header_names} = $line;
66                 $self->debug( "header_names = ",dump( $self->{header_names} ) );
67         }
68
69         while ( 1 ) {
70                 my $line = $csv->getline( $fh );
71                 last if $csv->eof;
72
73                 $log->logdie( "can't parse CSV file ", $csv->error_diag ) unless $line;
74
75                 my $rec;
76                 $rec->{'000'} = [ ++$self->{size} ];
77
78                 my $col = 'A';
79                 my $header_pos = 0;
80
81                 foreach my $cell ( @$line ) {
82                         my $str = eval { Encode::decode_utf8( $cell ) };
83                         if ( $@ ) {
84                                 if ( $@ =~ m/Cannot decode string with wide characters/ ) {
85                                         $str = $cell;
86                                 } else {
87                                         die "ERROR: $@ in line ",dump( $line );
88                                 }
89                         } else {
90                                 utf8::upgrade( $cell );
91                                 $str = $cell;
92                         }
93                                 
94                         $rec->{ $col++ } = $str;
95
96                         if ( $self->{header_names} ) {
97                                 $rec->{ $self->{header_names}->[$header_pos] } = $str;
98                                 $header_pos++;
99                         }
100                 }
101
102                 push @{ $self->{_rec} }, $rec;
103
104         };
105
106         $log->debug("loaded ", $self->size, " records");
107
108         $self ? return $self : return undef;
109 }
110
111 =head2 fetch_rec
112
113 Return record with ID C<$mfn> from database
114
115   my $rec = $input->fetch_rec( $mfn, $filter_coderef );
116
117 =cut
118
119 sub fetch_rec {
120         my ( $self, $mfn, $filter_coderef ) = @_;
121
122         return $self->{_rec}->[$mfn-1];
123 }
124
125
126 =head2 size
127
128 Return number of records in database
129
130   my $size = $input->size;
131
132 =cut
133
134 sub size {
135         my $self = shift;
136         return $self->{size};
137 }
138
139 =head1 AUTHOR
140
141 Dobrica Pavlinusic, C<< <dpavlin@rot13.org> >>
142
143 =head1 COPYRIGHT & LICENSE
144
145 Copyright 2009 Dobrica Pavlinusic, All Rights Reserved.
146
147 This program is free software; you can redistribute it and/or modify it
148 under the same terms as Perl itself.
149
150 =cut
151
152 1; # End of WebPAC::Input::CSV