+ apply regex on records from input to fix-up character encodings [2.11]
+ add support for KinoSearch search library [2.12]
+ added new set-based normalizer which is pure perl code [2.13]
++ added --stats to report field and subfield usage [2.14]
- support arrays for normalize/path and lookup
- add Excel input format
- add dBase input format
=head1 VERSION
-Version 2.13
+Version 2.14
=cut
-our $VERSION = '2.13';
+our $VERSION = '2.14';
=head1 SYNOPSIS
use warnings;
use strict;
+use blib;
+
use WebPAC::Common;
use base qw/WebPAC::Common/;
use Text::Iconv;
Dump statistics about field and subfield usage
- print Dumper( $input->stats );
+ print $input->stats;
=cut
sub stats {
my $self = shift;
- return $self->{_stats};
+
+ my $log = $self->_get_logger();
+
+ my $s = $self->{_stats};
+ if (! $s) {
+ $log->warn("called stats, but there is no statistics collected");
+ return;
+ }
+
+ my $max_fld = 0;
+
+ my $out = join("\n",
+ map {
+ my $f = $_ || die "no field";
+ my $v = $s->{fld}->{$f} || die "no s->{fld}->{$f}";
+ $max_fld = $v if ($v > $max_fld);
+
+ my $o = sprintf("%4d %d ~", $f, $v);
+
+ if (defined($s->{sf}->{$f})) {
+ map {
+ $o .= sprintf(" %s:%d", $_, $s->{sf}->{$f}->{$_});
+ } sort keys %{ $s->{sf}->{$f} };
+ }
+
+ if (my $v_r = $s->{repeatable}->{$f}) {
+ $o .= " ($v_r)" if ($v_r != $v);
+ }
+
+ $o;
+ } sort { $a <=> $b } keys %{ $s->{fld} }
+ );
+
+ $log->debug( sub { Dumper($s) } );
+
+ return $out;
}
=head1 MEMORY USAGE
force conversion C<normalize->path> in C<config.yml> from
C<.xml> to C<.pl>
+=item --stats
+
+dump statistics about used fields and subfields in each input
+
=back
=cut
my $debug = 0;
my $only_db_name;
my $force_set = 0;
+my $stats = 0;
GetOptions(
"limit=i" => \$limit,
"config" => \$config,
"debug" => \$debug,
"force-set" => \$force_set,
+ "stats" => \$stats,
);
$config = LoadFile($config);
offset => $offset,
lookup => $lookup,
recode => $input->{recode},
+ stats => $stats,
);
$log->logdie("can't create input using $input_module") unless ($input);
$total_rows++;
}
+ $log->info("statistics of fields usage:\n", $input_db->stats) if ($stats);
+
};
eval { $indexer->finish } if ($indexer->can('finish'));
test_start_limit($input, 1, $size + 2, $size);
ok(my $s = $input->stats, 'stats');
-diag "stats: ",Dumper($s);
+diag "stats:\n$s";
$module = 'WebPAC::Input::MARC';
diag "testing with $module";