X-Git-Url: http://git.rot13.org/?a=blobdiff_plain;ds=sidebyside;f=Fast.pm;h=6485b55397d4190d5b115285d1e4662c1984ada2;hb=c9df7a8379b2016ab44b0427198a193c4a98cf2e;hp=f944654aa902d6e30a897618a30238f334ecb4cd;hpb=5fc18887422d2dd62c716b73d81a226f3a295e5b;p=MARC-Fast

diff --git a/Fast.pm b/Fast.pm
index f944654..6485b55 100644
--- a/Fast.pm
+++ b/Fast.pm
@@ -87,7 +87,8 @@ sub new {
 		my $len = read($self->{fh}, $leader, 24);
 
 		if ($len < 24) {
-			carp "short read of leader, aborting\n";
+			warn "short read of leader, aborting\n";
+			$self->{count}--;
 			last;
 		}
 
@@ -384,6 +385,27 @@ sub to_ascii {
 1;
 __END__
 
+=head1 UTF-8 ENCODING
+
+This module does nothing with encoding. But, since MARC format is byte
+oriented even when using UTF-8 which has variable number of bytes for each
+character, file is opened in binary mode.
+
+As a result, all scalars recturned to perl don't have utf-8 flag. Solution is
+to use C<hash_filter> and L<Encode> to decode utf-8 encoding like this:
+
+  use Encode;
+
+  my $marc = new MARC::Fast(
+  	marcdb => 'utf8.marc',
+	hash_filter => sub {
+		Encode::decode( 'utf-8', $_[0] );
+	},
+  );
+
+This will affect C<to_hash>, but C<fetch> will still return binary representation
+since it doesn't support C<hash_filter>.
+
 =head1 AUTHOR
 
 	Dobrica Pavlinusic