#!/usr/bin/perl
+
+# This file is part of Koha.
+#
+# Koha is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+#
+# Koha is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
#
+# You should have received a copy of the GNU General Public License
+# along with Koha; if not, see <http://www.gnu.org/licenses>.
-use strict;
-use C4::Interface::CGI::Output; #
+use Modern::Perl;
-use vars qw( @tests );
-use vars qw( $loaded );
+use Test::More tests => 16;
+use Encode qw( is_utf8 );
+
+use MARC::Record;
+
+use utf8;
+use open ':std', ':encoding(utf8)';
BEGIN {
- @tests = (
- [
- 'Normal HTML without meta tag',
- sub { guesscharset($_[0]) },
- undef,
- <<EOF
-<title>control case</title>
-EOF
- ], [
- 'Result of guesscharset with normal HTML with irrelevant meta tag',
- sub { guesscharset($_[0]) },
- undef,
- <<EOF
-<meta http-equiv="Content-Language" content="zh-TW">
-EOF
- ], [
- 'Result of guesstype with normal HTML with irrelevant meta tag',
- sub { guesstype($_[0]) },
- 'text/html',
- <<EOF
-<meta http-equiv="Content-Language" content="zh-TW">
-EOF
- ], [
- 'Result of guesscharset with normal HTML with relevant meta tag',
- sub { guesscharset($_[0]) },
- 'big5',
- <<EOF
-<meta http-equiv="Content-Type" content="text/html; charset=big5">
-EOF
- ], [
- 'Result of guesstype with normal HTML with relevant meta tag',
- sub { guesstype($_[0]) },
- 'text/html; charset=big5',
- <<EOF
-<meta http-equiv="Content-Type" content="text/html; charset=big5">
-EOF
- ], [
- 'Variant 1 using single quotes',
- sub { guesstype($_[0]) },
- 'text/html; charset=iso-2022-jp',
- <<EOF
-<meta http-equiv="Content-Type" content='text/html; charset=iso-2022-jp'>
-EOF
- ], [
- 'Variant 2 using single quotes',
- sub { guesstype($_[0]) },
- 'text/html; charset=utf-8',
- <<EOF
-<meta http-equiv='Content-Type' content="text/html; charset=utf-8">
-EOF
- ], [
- 'Unquoted Content-Type',
- sub { guesstype($_[0]) },
- 'text/html; charset=big5',
- <<EOF
-<meta http-equiv=Content-Type content="text/html; charset=big5">
-EOF
- ], [
- 'XML syntax',
- sub { guesstype($_[0]) },
- 'text/html; charset=iso-8859-2',
- <<EOF
-<meta http-equiv=Content-Type content="text/html; charset=iso-8859-2" />
-EOF
- ], [
- 'Expected attributes in reverse order',
- sub { guesstype($_[0]) },
- 'text/html; charset=big5',
- <<EOF
-<meta content="text/html; charset=big5" http-equiv="Content-Type">
-EOF
- ], [
- 'Extra whitespace at end',
- sub { guesstype($_[0]) },
- 'text/html; charset=big5',
- <<EOF
-<meta http-equiv="Content-Type" content="text/html; charset=big5" >
-EOF
- ], [
- 'Multiple lines',
- sub { guesstype($_[0]) },
- 'text/html; charset=big5',
- <<EOF
-<meta
-http-equiv="Content-Type"
-content="text/html; charset=big5"
->
-EOF
- ], [
- # FIXME - THIS IS NOT A WELL-WRITTEN TEST CASE!!!
- 'With surrounding HTML',
- sub { guesstype($_[0]) },
- 'text/html; charset=us-ascii',
- <<EOF
-<html>
-<head>
-<title>Test case with surrounding HTML</title>
-<meta http-equiv="Content-Type" content="text/html; charset=us-ascii">
-</head>
-<body>
-The return value should not be contaiminated with any surround HTML
-FIXME: Auth.pm returns in code that can contaminate the charset
-FIXME: if we do not explicitly disallow whitespace in the charset
-</body>
-</html>
-EOF
- ],
-);
+ use_ok('C4::Charset');
}
-BEGIN { $| = 1; printf "1..%d\n", scalar(@tests); }
-END {print "not ok 1\n" unless $loaded;}
-$loaded = 1;
-
-
-# Run all tests in sequence
-for (my $i = 1; $i <= scalar @tests; $i += 1) {
- my $test = $tests[$i - 1];
- my($title, $f, $expected, $input) = @$test;
- die "not ok $i (malformed test case)\n"
- unless @$test == 4 && ref $f eq 'CODE';
-
- my $output = &$f($input);
- if (
- (!defined $output && !defined $expected)
- || (defined $output && defined $expected && $output eq $expected)
- ) {
- print "ok $i - $title\n";
- } else {
- print "not ok $i - $title: got ",
- (defined $output? "\"$output\"": 'undef'),
- ', expected ',
- (defined $expected? "\"$expected\"": 'undef'),
- "\n";
- }
-}
+my $string;
+ok(!defined(NormalizeString($string,undef,1)),'Uninitialized string case 1 normalizes to uninitialized string.');
+
+$string = 'Sample';
+ok(defined(NormalizeString($string,undef,0)), 'Initialized string case 1 normalizes to some string.');
+ok(defined(NormalizeString($string,undef,1)), 'Initialized string case 2 normalizes to some string.');
+ok(defined(NormalizeString($string,1,0)), 'Initialized string case 3 normalizes to some string.');
+ok(defined(NormalizeString($string,1,1)), 'Initialized string case 4 normalizes to some string.');
+
+my $octets = "abc";
+ok(IsStringUTF8ish($octets), "verify octets are valid UTF-8 (ASCII)");
+$octets = "flamb\xc3\xa9";
+ok(!Encode::is_utf8($octets), "verify that string does not have Perl UTF-8 flag on");
+ok(IsStringUTF8ish($octets), "verify octets are valid UTF-8 (LATIN SMALL LETTER E WITH ACUTE)");
+ok(!Encode::is_utf8($octets), "verify that IsStringUTF8ish does not magically turn Perl UTF-8 flag on");
+$octets = "a\xc2" . "c";
+ok(!IsStringUTF8ish($octets), "verify octets are not valid UTF-8");
+
+ok( !SetUTF8Flag(), 'SetUTF8Flag returns undef if no record passed' );
+
+my $record = MARC::Record->new();
+ok( !SetUTF8Flag($record), 'SetUTF8Flag returns undef if the record has no subfields' );
+# Add some fields/subfields
+$record->append_fields(
+ MARC::Field->new('100', ' ', ' ', a => 'Julio Cortazar'),
+ MARC::Field->new('245', ' ', ' ', a => 'Rayuela'),
+);
+# Verify our data serves its purpose
+ok( !Encode::is_utf8($record->subfield('100','a')) &&
+ !Encode::is_utf8($record->subfield('245','a')),
+ 'Verify that the subfields are NOT set the UTF-8 flag yet' );
+SetUTF8Flag($record);
+ok( Encode::is_utf8($record->subfield('100','a')) &&
+ Encode::is_utf8($record->subfield('245','a')),
+ 'SetUTF8Flag sets the UTF-8 flag to all subfields' );
+is( nsb_clean("\98Le\9c Moyen Âge"), "Le Moyen Âge", "nsb_clean removes \98 and \9c" );