From e4c102564496b8bc44b63bec5b6e7b36875bd66f Mon Sep 17 00:00:00 2001 From: Dobrica Pavlinusic Date: Mon, 27 Jul 2009 16:24:41 +0000 Subject: [PATCH] force utf-8 encoding on all data comming from file git-svn-id: svn+ssh://mjesec/home/dpavlin/svn/webpac2/trunk@1254 07558da8-63fa-0310-ba24-9fe276d99e06 --- lib/WebPAC/Input/CSV.pm | 11 +++++------ t/2-input-csv.t | 12 +++++++++--- t/data/records-utf8.csv | 4 ++++ 3 files changed, 18 insertions(+), 9 deletions(-) diff --git a/lib/WebPAC/Input/CSV.pm b/lib/WebPAC/Input/CSV.pm index 112b9f0..7dcf582 100644 --- a/lib/WebPAC/Input/CSV.pm +++ b/lib/WebPAC/Input/CSV.pm @@ -7,6 +7,7 @@ use WebPAC::Input; use base qw/WebPAC::Common/; use Text::CSV; +use Encode; use Data::Dump qw/dump/; =head1 NAME @@ -15,7 +16,7 @@ WebPAC::Input::CSV - support for CSV Export Format =cut -our $VERSION = '0.01'; +our $VERSION = '0.02'; =head1 FUNCTIONS @@ -50,7 +51,7 @@ sub new { my $log = $self->_get_logger(); - open( my $fh, '<:encoding(utf-8)', $arg->{path} ) || $log->logconfess("can't open $arg->{path}: $!"); + open( my $fh, '<:raw', $arg->{path} ) || $log->logconfess("can't open $arg->{path}: $!"); my $csv = Text::CSV->new({ binary => 1 }); @@ -66,7 +67,7 @@ sub new { $rec->{'000'} = [ ++$self->{size} ]; my $col = 'A'; - $rec->{ $col++ } = $_ foreach @$line; + $rec->{ $col++ } = Encode::decode_utf8( $_ ) foreach @$line; push @{ $self->{_rec} }, $rec; @@ -86,9 +87,7 @@ Return record with ID C<$mfn> from database =cut sub fetch_rec { - my $self = shift; - - my ( $mfn, $filter_coderef ) = @_; + my ( $self, $mfn, $filter_coderef ) = @_; return $self->{_rec}->[$mfn-1]; } diff --git a/t/2-input-csv.t b/t/2-input-csv.t index 096a207..2ed084e 100755 --- a/t/2-input-csv.t +++ b/t/2-input-csv.t @@ -1,13 +1,15 @@ #!/usr/bin/perl -w use strict; -use blib; +use lib 'lib'; -use Test::More tests => 27; +use Test::More tests => 63; BEGIN { use_ok( 'WebPAC::Test' ); use_ok( 'WebPAC::Input' ); +use_ok( 'Encode' ); +use_ok( 'Devel::Peek' ); } my $module = 'WebPAC::Input::CSV'; @@ -23,13 +25,17 @@ ok(my $db = $input->open( path => "$abs_path/data/records-utf8.csv" ), "open"); ok(my $size = $input->size, "size"); -cmp_ok( $size, '==', 7, 'size ok' ); +cmp_ok( $size, '==', 11, 'size ok' ); foreach my $mfn ( 1 ... $size ) { my $rec = $input->fetch; ok($rec, "fetch $mfn"); cmp_ok($rec->{'000'}->[0], '==', $mfn, 'has mfn'); cmp_ok($input->pos, '==', $mfn, "pos $mfn"); + + ok( my $txt = $rec->{'E'}, 'E' ); + diag Dump( $txt ) if $debug; + ok( Encode::is_utf8( $txt, 1 ), 'utf8' ); diag "rec: ", dump($rec), "\n" if $debug; } diff --git a/t/data/records-utf8.csv b/t/data/records-utf8.csv index a8728c3..fd25760 100644 --- a/t/data/records-utf8.csv +++ b/t/data/records-utf8.csv @@ -5,3 +5,7 @@ FFSF,2205,50112,,"Akzente : [Zeitschrift für literatur]",0002-3957,"Jhr.54(2007 FFSF,2221,50113,RARITETI,"Luna : Belletristisches Beiblatt der Agramer Zeitung",1333-5820,"Knj. 1838 - 1857" ,,50116,,"Godišnjak Ogranka Matice hrvatske Beli Manastir",1845-044X, FFAN,,50304,,Scrutiny,, +FFSF,387,50060,,"Lingvist : Časopis za strane jezike Saveza stručnih prevodilaca Jugoslavije",,"God.1(1963) - 2-3(1964-1965)" +FFSF,97,50061,,"Naše řeč",0027-8203,"Roč.49(1966) ; 52(1969) ; 54(1971)-55(1972) ; 57(1974)-72-73(1989-90) ; 76(1993)/7972" +FFSF,389,50065,,"Glossy : Poświęcone kulturze i piśmiennictwu",,"Rok 1(1939)-Zes.1-2,3" +FFSF,220,50093,,"Libra : Zbornik SC Književnog kluba",0352-9126,Sv.4(1996)/8311 -- 2.20.1