X-Git-Url: http://git.rot13.org/?p=MARC-Fast;a=blobdiff_plain;f=Fast.pm;h=aef3d5b5baead4b41eb5ec183f6196696f87c52f;hp=f944654aa902d6e30a897618a30238f334ecb4cd;hb=cabfd93a9ff6f82e8b67e3050ca3c2ea1ef0af37;hpb=5fc18887422d2dd62c716b73d81a226f3a295e5b diff --git a/Fast.pm b/Fast.pm index f944654..aef3d5b 100644 --- a/Fast.pm +++ b/Fast.pm @@ -7,7 +7,7 @@ use Data::Dump qw/dump/; BEGIN { use Exporter (); use vars qw ($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS); - $VERSION = 0.09; + $VERSION = 0.10; @ISA = qw (Exporter); #Give a hoot don't pollute, do not export more than needed by default @EXPORT = qw (); @@ -87,7 +87,8 @@ sub new { my $len = read($self->{fh}, $leader, 24); if ($len < 24) { - carp "short read of leader, aborting\n"; + warn "short read of leader, aborting\n"; + $self->{count}--; last; } @@ -310,14 +311,14 @@ sub to_hash { my $row = $self->fetch($mfn) || return; - foreach my $rec_nr (keys %{$row}) { - foreach my $l (@{$row->{$rec_nr}}) { + foreach my $tag (keys %{$row}) { + foreach my $l (@{$row->{$tag}}) { # remove end marker $l =~ s/\x1E$//; # filter output - $l = $self->{'hash_filter'}->($l, $rec_nr) if ($self->{'hash_filter'}); + $l = $self->{'hash_filter'}->($l, $tag) if ($self->{'hash_filter'}); my $val; @@ -350,7 +351,7 @@ sub to_hash { $val = $l; } - push @{$rec->{$rec_nr}}, $val; + push @{$rec->{$tag}}, $val; } } @@ -384,6 +385,27 @@ sub to_ascii { 1; __END__ +=head1 UTF-8 ENCODING + +This module does nothing with encoding. But, since MARC format is byte +oriented even when using UTF-8 which has variable number of bytes for each +character, file is opened in binary mode. + +As a result, all scalars recturned to perl don't have utf-8 flag. Solution is +to use C and L to decode utf-8 encoding like this: + + use Encode; + + my $marc = new MARC::Fast( + marcdb => 'utf8.marc', + hash_filter => sub { + Encode::decode( 'utf-8', $_[0] ); + }, + ); + +This will affect C, but C will still return binary representation +since it doesn't support C. + =head1 AUTHOR Dobrica Pavlinusic