1 package WebPAC::Input::XML;
7 use base qw/WebPAC::Common/;
13 use Data::Dump qw/dump/;
17 WebPAC::Input::XML - support for reading XML files
21 our $VERSION = '0.02';
27 Returns new low-level input API object
29 my $input = new WebPAC::Input::XML(
30 path => '/path/to/XML/records.txt'
32 my ($l,$field_nr) = @_;
33 # do something with $l which is line of input file
36 mungle => 'conf/mungle/xml-mungle.pl',
45 path to directory with xml files ending in C<.xml>
49 path to perl data_structure mungler which will be called to pre-normalize
50 hash produced by this module.
52 It's ugly and souldn't be really here, but I didn't wanted to write separate
53 input module for each possible XML in the face of the earth, and having perl
54 power to transform hash is just... Best solution :-)
67 #warn "#### arg = ",dump( $arg );
69 my $log = $self->_get_logger();
71 $log->logdie("can't find path ", $arg->{path}, ": $!\n") unless -d $arg->{path};
73 $log->info("collecting xml files from ", $arg->{path});
79 my $path = $File::Find::name;
80 return unless -f $path && $path =~ m/\.xml$/i;
86 $log->info("found ", $#files + 1, " XML files in ", $arg->{path});
88 $self->{_files} = [ sort @files ];
90 if ( my $path = $arg->{mungle} ) {
91 $log->logdie("can't find $path: $!") unless -r $path;
92 $log->info("using $path as mungle rules");
94 $self->{mungle_rules} = read_file( $path ) || $log->logdie("can't open $path: $!");
97 $self ? return $self : return undef;
102 Return record with ID C<$mfn> from database
104 my $rec = $input->fetch_rec( $mfn, $filter_coderef );
111 my ( $mfn, $filter_coderef ) = @_;
113 my $path = $self->{_files}->[ $mfn - 1 ] || return;
115 my $log = $self->_get_logger();
122 # SuppressEmpty => 1, # '' undef
124 ) || $log->logdie("can't open $path: $!");
126 $log->debug("fetch_rec( $mfn ) => $path => xml is ",sub { dump($xml) });
130 if ( my $rules = $self->{mungle_rules} ) {
133 # warn "### get_ds xml = ",dump($xml);
138 # warn "### set_ds hash = ",dump($hash);
139 foreach my $f ( keys %$hash ) {
140 # warn "+++ $f ", dump( $hash->{$f} ),"\n";
141 $ds->{$f} = $hash->{$f};
143 # warn "### set_ds mungle_ds = ",dump($ds);
146 $log->logdie("mungle rules $path error: $@") if $@;
148 # warn "### set_ds after mungle_rules ds = ",dump($ds);
156 $ds->{'000'} = [ $mfn ];
164 Return number of records in database
166 my $size = $input->size;
172 return $#{$self->{_files}} + 1;
177 Dobrica Pavlinusic, C<< <dpavlin@rot13.org> >>
179 =head1 COPYRIGHT & LICENSE
181 Copyright 2007 Dobrica Pavlinusic, All Rights Reserved.
183 This program is free software; you can redistribute it and/or modify it
184 under the same terms as Perl itself.