X-Git-Url: http://git.rot13.org/?a=blobdiff_plain;ds=sidebyside;f=Fast.pm;h=6485b55397d4190d5b115285d1e4662c1984ada2;hb=c9df7a8379b2016ab44b0427198a193c4a98cf2e;hp=f944654aa902d6e30a897618a30238f334ecb4cd;hpb=5fc18887422d2dd62c716b73d81a226f3a295e5b;p=MARC-Fast diff --git a/Fast.pm b/Fast.pm index f944654..6485b55 100644 --- a/Fast.pm +++ b/Fast.pm @@ -87,7 +87,8 @@ sub new { my $len = read($self->{fh}, $leader, 24); if ($len < 24) { - carp "short read of leader, aborting\n"; + warn "short read of leader, aborting\n"; + $self->{count}--; last; } @@ -384,6 +385,27 @@ sub to_ascii { 1; __END__ +=head1 UTF-8 ENCODING + +This module does nothing with encoding. But, since MARC format is byte +oriented even when using UTF-8 which has variable number of bytes for each +character, file is opened in binary mode. + +As a result, all scalars recturned to perl don't have utf-8 flag. Solution is +to use C and L to decode utf-8 encoding like this: + + use Encode; + + my $marc = new MARC::Fast( + marcdb => 'utf8.marc', + hash_filter => sub { + Encode::decode( 'utf-8', $_[0] ); + }, + ); + +This will affect C, but C will still return binary representation +since it doesn't support C. + =head1 AUTHOR Dobrica Pavlinusic