C4/Charset.pm

   1 package C4::Charset;
   2
   3 # Copyright (C) 2008 LibLime
   4 #
   5 # This file is part of Koha.
   6 #
   7 # Koha is free software; you can redistribute it and/or modify it under the
   8 # terms of the GNU General Public License as published by the Free Software
   9 # Foundation; either version 2 of the License, or (at your option) any later
  10 # version.
  11 #
  12 # Koha is distributed in the hope that it will be useful, but WITHOUT ANY
  13 # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
  14 # A PARTICULAR PURPOSE.  See the GNU General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License along with
  17 # Koha; if not, write to the Free Software Foundation, Inc., 59 Temple Place,
  18 # Suite 330, Boston, MA  02111-1307 USA
  19
  20 use strict;
  21 use warnings;
  22
  23 use MARC::Charset qw/marc8_to_utf8/;
  24 use Text::Iconv;
  25
  26 use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
  27
  28 BEGIN {
  29     # set the version for version checking
  30     $VERSION = 3.01;
  31     require Exporter;
  32     @ISA    = qw(Exporter);
  33     @EXPORT = qw(
  34         IsStringUTF8ish
  35         MarcToUTF8Record
  36         SetMarcUnicodeFlag
  37         StripNonXmlChars
  38     );
  39 }
  40
  41 =head1 NAME
  42
  43 C4::Charset - utilities for handling character set conversions.
  44
  45 =head1 SYNOPSIS
  46
  47 use C4::Charset;
  48
  49 =head1 DESCRIPTION
  50
  51 This module contains routines for dealing with character set
  52 conversions, particularly for MARC records.
  53
  54 A variety of character encodings are in use by various MARC
  55 standards, and even more character encodings are used by
  56 non-standard MARC records.  The various MARC formats generally
  57 do not do a good job of advertising a given record's character
  58 encoding, and even when a record does advertise its encoding,
  59 e.g., via the Leader/09, experience has shown that one cannot
  60 trust it.
  61
  62 Ultimately, all MARC records are stored in Koha in UTF-8 and
  63 must be converted from whatever the source character encoding is.
  64 The goal of this module is to ensure that these conversions
  65 take place accurately.  When a character conversion cannot take
  66 place, or at least not accurately, the module was provide
  67 enough information to allow user-facing code to inform the user
  68 on how to deal with the situation.
  69
  70 =cut
  71
  72 =head1 FUNCTIONS
  73
  74 =head2 IsStringUTF8ish
  75
  76 =over 4
  77
  78 my $is_utf8 = IsStringUTF8ish($str);
  79
  80 =back
  81
  82 Determines if C<$str> is valid UTF-8.  This can mean
  83 one of two things:
  84
  85 =over 2
  86
  87 =item *
  88
  89 The Perl UTF-8 flag is set and the string contains valid UTF-8.
  90
  91 =item *
  92
  93 The Perl UTF-8 flag is B<not> set, but the octets contain
  94 valid UTF-8.
  95
  96 =back
  97
  98 The function is named C<IsStringUTF8ish> instead of C<IsStringUTF8>
  99 because in one could be presented with a MARC blob that is
 100 not actually in UTF-8 but whose sequence of octets appears to be
 101 valid UTF-8.  The rest of the MARC character conversion functions
 102 will assume that this situation occur does not very often.
 103
 104 =cut
 105
 106 sub IsStringUTF8ish {
 107     my $str = shift;
 108
 109     return 1 if utf8::is_utf8($str);
 110     return utf8::decode($str);
 111 }
 112
 113 =head2 MarcToUTF8Record
 114
 115 =over 4
 116
 117 ($marc_record, $converted_from, $errors_arrayref) = MarcToUTF8Record($marc_blob, $marc_flavour, [, $source_encoding]);
 118
 119 =back
 120
 121 Given a MARC blob or a C<MARC::Record>, the MARC flavour, and an
 122 optional source encoding, return a C<MARC::Record> that is
 123 converted to UTF-8.
 124
 125 The returned C<$marc_record> is guaranteed to be in valid UTF-8, but
 126 is not guaranteed to have been converted correctly.  Specifically,
 127 if C<$converted_from> is 'failed', the MARC record returned failed
 128 character conversion and had each of its non-ASCII octets changed
 129 to the Unicode replacement character.
 130
 131 If the source encoding was not specified, this routine will
 132 try to guess it; the character encoding used for a successful
 133 conversion is returned in C<$converted_from>.
 134
 135 =cut
 136
 137 sub MarcToUTF8Record {
 138     my $marc = shift;
 139     my $marc_flavour = shift;
 140     my $source_encoding = shift;
 141
 142     my $marc_record;
 143     my $marc_blob_is_utf8 = 0;
 144     if (ref($marc) eq 'MARC::Record') {
 145         my $marc_blob = $marc->as_usmarc();
 146         $marc_blob_is_utf8 = IsStringUTF8ish($marc_blob);
 147         $marc_record = $marc;
 148     } else {
 149         # dealing with a MARC blob
 150
 151         # remove any ersatz whitespace from the beginning and
 152         # end of the MARC blob -- these can creep into MARC
 153         # files produced by several sources -- caller really
 154         # should be doing this, however
 155         $marc =~ s/^\s+//;
 156         $marc =~ s/\s+$//;
 157         $marc_blob_is_utf8 = IsStringUTF8ish($marc);
 158         eval {
 159             $marc_record = MARC::Record->new_from_usmarc($marc);
 160         };
 161         if ($@) {
 162             # if we fail the first time, one likely problem
 163             # is that we have a MARC21 record that says that it's
 164             # UTF-8 (Leader/09 = 'a') but contains non-UTF-8 characters.
 165             # We'll try parsing it again.
 166             substr($marc, 9, 1) = ' ';
 167             eval {
 168                 $marc_record = MARC::Record->new_from_usmarc($marc);
 169             };
 170             if ($@) {
 171                 # it's hopeless; return an empty MARC::Record
 172                 return MARC::Record->new(), 'failed', ['could not parse MARC blob'];
 173             }
 174         }
 175     }
 176
 177     # If we do not know the source encoding, try some guesses
 178     # as follows:
 179     #   1. Record is UTF-8 already.
 180     #   2. If MARC flavor is MARC21, then
 181     #      a. record is MARC-8
 182     #      b. record is ISO-8859-1
 183     #   3. If MARC flavor is UNIMARC, then
 184     if (not defined $source_encoding) {
 185         if ($marc_blob_is_utf8) {
 186             # note that for MARC21 we are not bothering to check
 187             # if the Leader/09 is set to 'a' or not -- because
 188             # of problems with various ILSs (including Koha in the
 189             # past, alas), this just is not trustworthy.
 190             SetMarcUnicodeFlag($marc_record, $marc_flavour);
 191             return $marc_record, 'UTF-8', [];
 192         } else {
 193             if ($marc_flavour eq 'MARC21') {
 194                 return _default_marc21_charconv_to_utf8($marc_record, $marc_flavour);
 195             } elsif ($marc_flavour eq 'UNIMARC') {
 196                 return _default_unimarc_charconv_to_utf8($marc_record, $marc_flavour);
 197             } else {
 198                 return _default_marc21_charconv_to_utf8($marc_record, $marc_flavour);
 199             }
 200         }
 201     } else {
 202         # caller knows the character encoding
 203         my $original_marc_record = $marc_record->clone();
 204         my @errors;
 205         if ($source_encoding =~ /utf-?8/i) {
 206             if ($marc_blob_is_utf8) {
 207                 SetMarcUnicodeFlag($marc_record, $marc_flavour);
 208                 return $marc_record, 'UTF-8', [];
 209             } else {
 210                 push @errors, 'specified UTF-8 => UTF-8 conversion, but record is not in UTF-8';
 211             }
 212         } elsif ($source_encoding =~ /marc-?8/i) {
 213             @errors = _marc_marc8_to_utf8($marc_record, $marc_flavour);
 214         } elsif ($source_encoding =~ /5426/) {
 215             @errors = _marc_iso5426_to_utf8($marc_record, $marc_flavour);
 216         } else {
 217             # assume any other character encoding is for Text::Iconv
 218             @errors = _marc_to_utf8_via_text_iconv($marc_record, $marc_flavour, 'iso-8859-1');
 219         }
 220
 221         if (@errors) {
 222             _marc_to_utf8_replacement_char($original_marc_record, $marc_flavour);
 223             return $original_marc_record, 'failed', \@errors;
 224         } else {
 225             return $marc_record, $source_encoding, [];
 226         }
 227     }
 228
 229 }
 230
 231 =head2 SetMarcUnicodeFlag
 232
 233 =over 4
 234
 235 SetMarcUnicodeFlag($marc_record, $marc_flavour);
 236
 237 =back
 238
 239 Set both the internal MARC::Record encoding flag
 240 and the appropriate Leader/09 (MARC21) or
 241 100/26-29 (UNIMARC) to indicate that the record
 242 is in UTF-8.  Note that this does B<not> do
 243 any actual character conversion.
 244
 245 =cut
 246
 247 sub SetMarcUnicodeFlag {
 248     my $marc_record = shift;
 249     my $marc_flavour = shift; # || C4::Context->preference("marcflavour");
 250
 251     $marc_record->encoding('UTF-8');
 252     if ($marc_flavour eq 'MARC21') {
 253         my $leader = $marc_record->leader();
 254         substr($leader, 9, 1) = 'a';
 255         $marc_record->leader($leader);
 256     } elsif ($marc_flavour eq "UNIMARC") {
 257         if (my $field = $marc_record->field('100')) {
 258             my $sfa = $field->subfield('a');
 259
 260             my $subflength = 36;
 261             # fix the length of the field
 262             $sfa = substr $sfa, 0, $subflength if (length($sfa) > $subflength);
 263             $sfa = sprintf( "%-*s", 35, $sfa ) if (length($sfa) < $subflength);
 264
 265             substr($sfa, 26, 4) = '50  ';
 266             $field->update('a' => $sfa);
 267         }
 268     } else {
 269         warn "Unrecognized marcflavour: $marc_flavour";
 270     }
 271 }
 272
 273 =head2 StripNonXmlChars
 274
 275 =over 4
 276
 277 my $new_str = StripNonXmlChars($old_str);
 278
 279 =back
 280
 281 Given a string, return a copy with the
 282 characters that are illegal in XML
 283 removed.
 284
 285 This function exists to work around a problem
 286 that can occur with badly-encoded MARC records.
 287 Specifically, if a UTF-8 MARC record also
 288 has excape (\x1b) characters, MARC::File::XML
 289 will let the escape characters pass through
 290 when as_xml() or as_xml_record() is called.  The
 291 problem is that the escape character is not
 292 legal in well-formed XML documents, so when
 293 MARC::File::XML attempts to parse such a record,
 294 the XML parser will fail.
 295
 296 Stripping such characters will allow a
 297 MARC::Record->new_from_xml()
 298 to work, at the possible risk of some data loss.
 299
 300 =cut
 301
 302 sub StripNonXmlChars {
 303     my $str = shift;
 304     if (!defined($str) || $str eq ""){
 305         return "";
 306     }
 307     $str =~ s/[^\x09\x0A\x0D\x{0020}-\x{D7FF}\x{E000}-\x{FFFD}\x{10000}-\x{10FFFF}]//g;
 308     return $str;
 309 }
 310
 311 =head1 INTERNAL FUNCTIONS
 312
 313 =head2 _default_marc21_charconv_to_utf8
 314
 315 =over 4
 316
 317 my ($new_marc_record, $guessed_charset) = _default_marc21_charconv_to_utf8($marc_record);
 318
 319 =back
 320
 321 Converts a C<MARC::Record> of unknown character set to UTF-8,
 322 first by trying a MARC-8 to UTF-8 conversion, then ISO-8859-1
 323 to UTF-8, then a default conversion that replaces each non-ASCII
 324 character with the replacement character.
 325
 326 The C<$guessed_charset> return value contains the character set
 327 that resulted in a conversion to valid UTF-8; note that
 328 if the MARC-8 and ISO-8859-1 conversions failed, the value of
 329 this is 'failed'.
 330
 331 =cut
 332
 333 sub _default_marc21_charconv_to_utf8 {
 334     my $marc_record = shift;
 335     my $marc_flavour = shift;
 336
 337     my $trial_marc8 = $marc_record->clone();
 338     my @all_errors = ();
 339     my @errors = _marc_marc8_to_utf8($trial_marc8, $marc_flavour);
 340     unless (@errors) {
 341         return $trial_marc8, 'MARC-8', [];
 342     }
 343     push @all_errors, @errors;
 344
 345     my $trial_8859_1 = $marc_record->clone();
 346     @errors = _marc_to_utf8_via_text_iconv($trial_8859_1, $marc_flavour, 'iso-8859-1');
 347     unless (@errors) {
 348         return $trial_8859_1, 'iso-8859-1', []; # note -- we could return \@all_errors
 349                                                 # instead if we wanted to report details
 350                                                 # of the failed attempt at MARC-8 => UTF-8
 351     }
 352     push @all_errors, @errors;
 353
 354     my $default_converted = $marc_record->clone();
 355     _marc_to_utf8_replacement_char($default_converted, $marc_flavour);
 356     return $default_converted, 'failed', \@all_errors;
 357 }
 358
 359 =head2 _default_unimarc_charconv_to_utf8
 360
 361 =over 4
 362
 363 my ($new_marc_record, $guessed_charset) = _default_unimarc_charconv_to_utf8($marc_record);
 364
 365 =back
 366
 367 Converts a C<MARC::Record> of unknown character set to UTF-8,
 368 first by trying a ISO-5426 to UTF-8 conversion, then ISO-8859-1
 369 to UTF-8, then a default conversion that replaces each non-ASCII
 370 character with the replacement character.
 371
 372 The C<$guessed_charset> return value contains the character set
 373 that resulted in a conversion to valid UTF-8; note that
 374 if the MARC-8 and ISO-8859-1 conversions failed, the value of
 375 this is 'failed'.
 376
 377 =cut
 378
 379 sub _default_unimarc_charconv_to_utf8 {
 380     my $marc_record = shift;
 381     my $marc_flavour = shift;
 382
 383     my $trial_marc8 = $marc_record->clone();
 384     my @all_errors = ();
 385     my @errors = _marc_iso5426_to_utf8($trial_marc8, $marc_flavour);
 386     unless (@errors) {
 387         return $trial_marc8, 'iso-5426';
 388     }
 389     push @all_errors, @errors;
 390
 391     my $trial_8859_1 = $marc_record->clone();
 392     @errors = _marc_to_utf8_via_text_iconv($trial_8859_1, $marc_flavour, 'iso-8859-1');
 393     unless (@errors) {
 394         return $trial_8859_1, 'iso-8859-1';
 395     }
 396     push @all_errors, @errors;
 397
 398     my $default_converted = $marc_record->clone();
 399     _marc_to_utf8_replacement_char($default_converted, $marc_flavour);
 400     return $default_converted, 'failed', \@all_errors;
 401 }
 402
 403 =head2 _marc_marc8_to_utf8
 404
 405 =over 4
 406
 407 my @errors = _marc_marc8_to_utf8($marc_record, $marc_flavour, $source_encoding);
 408
 409 =back
 410
 411 Convert a C<MARC::Record> to UTF-8 in-place from MARC-8.
 412 If the conversion fails for some reason, an
 413 appropriate messages will be placed in the returned
 414 C<@errors> array.
 415
 416 =cut
 417
 418 sub _marc_marc8_to_utf8 {
 419     my $marc_record = shift;
 420     my $marc_flavour = shift;
 421
 422     my $prev_ignore = MARC::Charset->ignore_errors();
 423     MARC::Charset->ignore_errors(1);
 424
 425     # trap warnings raised by MARC::Charset
 426     my @errors = ();
 427     local $SIG{__WARN__} = sub {
 428         my $msg = $_[0];
 429         if ($msg =~ /MARC.Charset/) {
 430             # FIXME - purpose of this regexp is to strip out the
 431             # line reference to MARC/Charset.pm, but as it
 432             # exists probably won't work quite on Windows --
 433             # some sort of minimal-bunch back-tracking RE
 434             # would be helpful here
 435             $msg =~ s/at [\/].*?.MARC.Charset\.pm line \d+\.\n$//;
 436             push @errors, $msg;
 437         } else {
 438             # if warning doesn't come from MARC::Charset, just
 439             # pass it on
 440             warn $msg;
 441         }
 442     };
 443
 444     foreach my $field ($marc_record->fields()) {
 445         if ($field->is_control_field()) {
 446             ; # do nothing -- control fields should not contain non-ASCII characters
 447         } else {
 448             my @converted_subfields;
 449             foreach my $subfield ($field->subfields()) {
 450                 my $utf8sf = MARC::Charset::marc8_to_utf8($subfield->[1]);
 451                 unless (IsStringUTF8ish($utf8sf)) {
 452                     # Because of a bug in MARC::Charset 0.98, if the string
 453                     # has (a) one or more diacritics that (b) are only in character positions
 454                     # 128 to 255 inclusive, the resulting converted string is not in
 455                     # UTF-8, but the legacy 8-bit encoding (e.g., ISO-8859-1).  If that
 456                     # occurs, upgrade the string in place.  Moral of the story seems to be
 457                     # that pack("U", ...) is better than chr(...) if you need to guarantee
 458                     # that the resulting string is UTF-8.
 459                     utf8::upgrade($utf8sf);
 460                 }
 461                 push @converted_subfields, $subfield->[0], $utf8sf;
 462             }
 463
 464             $field->replace_with(MARC::Field->new(
 465                 $field->tag(), $field->indicator(1), $field->indicator(2),
 466                 @converted_subfields)
 467             );
 468         }
 469     }
 470
 471     MARC::Charset->ignore_errors($prev_ignore);
 472
 473     SetMarcUnicodeFlag($marc_record, $marc_flavour);
 474
 475     return @errors;
 476 }
 477
 478 =head2 _marc_iso5426_to_utf8
 479
 480 =over 4
 481
 482 my @errors = _marc_iso5426_to_utf8($marc_record, $marc_flavour, $source_encoding);
 483
 484 =back
 485
 486 Convert a C<MARC::Record> to UTF-8 in-place from ISO-5426.
 487 If the conversion fails for some reason, an
 488 appropriate messages will be placed in the returned
 489 C<@errors> array.
 490
 491 FIXME - is ISO-5426 equivalent enough to MARC-8
 492 that C<MARC::Charset> can be used instead?
 493
 494 =cut
 495
 496 sub _marc_iso5426_to_utf8 {
 497     my $marc_record = shift;
 498     my $marc_flavour = shift;
 499
 500     my @errors = ();
 501
 502     foreach my $field ($marc_record->fields()) {
 503         if ($field->is_control_field()) {
 504             ; # do nothing -- control fields should not contain non-ASCII characters
 505         } else {
 506             my @converted_subfields;
 507             foreach my $subfield ($field->subfields()) {
 508                 my $utf8sf = char_decode5426($subfield->[1]);
 509                 push @converted_subfields, $subfield->[0], $utf8sf;
 510             }
 511
 512             $field->replace_with(MARC::Field->new(
 513                 $field->tag(), $field->indicator(1), $field->indicator(2),
 514                 @converted_subfields)
 515             );
 516         }
 517     }
 518
 519     SetMarcUnicodeFlag($marc_record, $marc_flavour);
 520
 521     return @errors;
 522 }
 523
 524 =head2 _marc_to_utf8_via_text_iconv
 525
 526 =over 4
 527
 528 my @errors = _marc_to_utf8_via_text_iconv($marc_record, $marc_flavour, $source_encoding);
 529
 530 =back
 531
 532 Convert a C<MARC::Record> to UTF-8 in-place using the
 533 C<Text::Iconv> CPAN module.  Any source encoding accepted
 534 by the user's iconv installation should work.  If
 535 the source encoding is not recognized on the user's
 536 server or the conversion fails for some reason,
 537 appropriate messages will be placed in the returned
 538 C<@errors> array.
 539
 540 =cut
 541
 542 sub _marc_to_utf8_via_text_iconv {
 543     my $marc_record = shift;
 544     my $marc_flavour = shift;
 545     my $source_encoding = shift;
 546
 547     my @errors = ();
 548     my $decoder;
 549     eval { $decoder = Text::Iconv->new($source_encoding, 'utf8'); };
 550     if ($@) {
 551         push @errors, "Could not initialze $source_encoding => utf8 converter: $@";
 552         return @errors;
 553     }
 554
 555     my $prev_raise_error = Text::Iconv->raise_error();
 556     Text::Iconv->raise_error(1);
 557
 558     foreach my $field ($marc_record->fields()) {
 559         if ($field->is_control_field()) {
 560             ; # do nothing -- control fields should not contain non-ASCII characters
 561         } else {
 562             my @converted_subfields;
 563             foreach my $subfield ($field->subfields()) {
 564                 my $converted_value;
 565                 my $conversion_ok = 1;
 566                 eval { $converted_value = $decoder->convert($subfield->[1]); };
 567                 if ($@) {
 568                     $conversion_ok = 0;
 569                     push @errors, $@;
 570                 } elsif (not defined $converted_value) {
 571                     $conversion_ok = 0;
 572                     push @errors, "Text::Iconv conversion failed - retval is " . $decoder->retval();
 573                 }
 574
 575                 if ($conversion_ok) {
 576                     push @converted_subfields, $subfield->[0], $converted_value;
 577                 } else {
 578                     $converted_value = $subfield->[1];
 579                     $converted_value =~ s/[\200-\377]/\xef\xbf\xbd/g;
 580                     push @converted_subfields, $subfield->[0], $converted_value;
 581                 }
 582             }
 583
 584             $field->replace_with(MARC::Field->new(
 585                 $field->tag(), $field->indicator(1), $field->indicator(2),
 586                 @converted_subfields)
 587             );
 588         }
 589     }
 590
 591     SetMarcUnicodeFlag($marc_record, $marc_flavour);
 592     Text::Iconv->raise_error($prev_raise_error);
 593
 594     return @errors;
 595 }
 596
 597 =head2 _marc_to_utf8_replacement_char
 598
 599 =over 4
 600
 601 _marc_to_utf8_replacement_char($marc_record, $marc_flavour);
 602
 603 =back
 604
 605 Convert a C<MARC::Record> to UTF-8 in-place, adopting the
 606 unsatisfactory method of replacing all non-ASCII (e.g.,
 607 where the eight bit is set) octet with the Unicode
 608 replacement character.  This is meant as a last-ditch
 609 method, and would be best used as part of a UI that
 610 lets a cataloguer pick various character conversions
 611 until he or she finds the right one.
 612
 613 =cut
 614
 615 sub _marc_to_utf8_replacement_char {
 616     my $marc_record = shift;
 617     my $marc_flavour = shift;
 618
 619     foreach my $field ($marc_record->fields()) {
 620         if ($field->is_control_field()) {
 621             ; # do nothing -- control fields should not contain non-ASCII characters
 622         } else {
 623             my @converted_subfields;
 624             foreach my $subfield ($field->subfields()) {
 625                 my $value = $subfield->[1];
 626                 $value =~ s/[\200-\377]/\xef\xbf\xbd/g;
 627                 push @converted_subfields, $subfield->[0], $value;
 628             }
 629
 630             $field->replace_with(MARC::Field->new(
 631                 $field->tag(), $field->indicator(1), $field->indicator(2),
 632                 @converted_subfields)
 633             );
 634         }
 635     }
 636
 637     SetMarcUnicodeFlag($marc_record, $marc_flavour);
 638 }
 639
 640 =head2 char_decode5426
 641
 642 =over 4
 643
 644 my $utf8string = char_decode5426($iso_5426_string);
 645
 646 =back
 647
 648 Converts a string from ISO-5426 to UTF-8.
 649
 650 =cut
 651
 652
 653 my %chars;
 654 $chars{0xb0}=0x0101;#3/0ayn[ain]
 655 $chars{0xb1}=0x0623;#3/1alif/hamzah[alefwithhamzaabove]
 656 #$chars{0xb2}=0x00e0;#'à';
 657 $chars{0xb2}=0x00e0;#3/2leftlowsinglequotationmark
 658 #$chars{0xb3}=0x00e7;#'ç';
 659 $chars{0xb3}=0x00e7;#3/2leftlowsinglequotationmark
 660 # $chars{0xb4}='è';
 661 $chars{0xb4}=0x00e8;
 662 # $chars{0xb5}='é';
 663 $chars{0xb5}=0x00e9;
 664 $chars{0x97}=0x003c;#3/2leftlowsinglequotationmark
 665 $chars{0x98}=0x003e;#3/2leftlowsinglequotationmark
 666 $chars{0xfa}=0x0153;#oe
 667 $chars{0x81d1}=0x00b0;
 668
 669 ####
 670 ## combined characters iso5426
 671
 672 $chars{0xc041}=0x1ea2; # capital a with hook above
 673 $chars{0xc045}=0x1eba; # capital e with hook above
 674 $chars{0xc049}=0x1ec8; # capital i with hook above
 675 $chars{0xc04f}=0x1ece; # capital o with hook above
 676 $chars{0xc055}=0x1ee6; # capital u with hook above
 677 $chars{0xc059}=0x1ef6; # capital y with hook above
 678 $chars{0xc061}=0x1ea3; # small a with hook above
 679 $chars{0xc065}=0x1ebb; # small e with hook above
 680 $chars{0xc069}=0x1ec9; # small i with hook above
 681 $chars{0xc06f}=0x1ecf; # small o with hook above
 682 $chars{0xc075}=0x1ee7; # small u with hook above
 683 $chars{0xc079}=0x1ef7; # small y with hook above
 684
 685         # 4/1 grave accent
 686 $chars{0xc141}=0x00c0; # capital a with grave accent
 687 $chars{0xc145}=0x00c8; # capital e with grave accent
 688 $chars{0xc149}=0x00cc; # capital i with grave accent
 689 $chars{0xc14f}=0x00d2; # capital o with grave accent
 690 $chars{0xc155}=0x00d9; # capital u with grave accent
 691 $chars{0xc157}=0x1e80; # capital w with grave
 692 $chars{0xc159}=0x1ef2; # capital y with grave
 693 $chars{0xc161}=0x00e0; # small a with grave accent
 694 $chars{0xc165}=0x00e8; # small e with grave accent
 695 $chars{0xc169}=0x00ec; # small i with grave accent
 696 $chars{0xc16f}=0x00f2; # small o with grave accent
 697 $chars{0xc175}=0x00f9; # small u with grave accent
 698 $chars{0xc177}=0x1e81; # small w with grave
 699 $chars{0xc179}=0x1ef3; # small y with grave
 700         # 4/2 acute accent
 701 $chars{0xc241}=0x00c1; # capital a with acute accent
 702 $chars{0xc243}=0x0106; # capital c with acute accent
 703 $chars{0xc245}=0x00c9; # capital e with acute accent
 704 $chars{0xc247}=0x01f4; # capital g with acute
 705 $chars{0xc249}=0x00cd; # capital i with acute accent
 706 $chars{0xc24b}=0x1e30; # capital k with acute
 707 $chars{0xc24c}=0x0139; # capital l with acute accent
 708 $chars{0xc24d}=0x1e3e; # capital m with acute
 709 $chars{0xc24e}=0x0143; # capital n with acute accent
 710 $chars{0xc24f}=0x00d3; # capital o with acute accent
 711 $chars{0xc250}=0x1e54; # capital p with acute
 712 $chars{0xc252}=0x0154; # capital r with acute accent
 713 $chars{0xc253}=0x015a; # capital s with acute accent
 714 $chars{0xc255}=0x00da; # capital u with acute accent
 715 $chars{0xc257}=0x1e82; # capital w with acute
 716 $chars{0xc259}=0x00dd; # capital y with acute accent
 717 $chars{0xc25a}=0x0179; # capital z with acute accent
 718 $chars{0xc261}=0x00e1; # small a with acute accent
 719 $chars{0xc263}=0x0107; # small c with acute accent
 720 $chars{0xc265}=0x00e9; # small e with acute accent
 721 $chars{0xc267}=0x01f5; # small g with acute
 722 $chars{0xc269}=0x00ed; # small i with acute accent
 723 $chars{0xc26b}=0x1e31; # small k with acute
 724 $chars{0xc26c}=0x013a; # small l with acute accent
 725 $chars{0xc26d}=0x1e3f; # small m with acute
 726 $chars{0xc26e}=0x0144; # small n with acute accent
 727 $chars{0xc26f}=0x00f3; # small o with acute accent
 728 $chars{0xc270}=0x1e55; # small p with acute
 729 $chars{0xc272}=0x0155; # small r with acute accent
 730 $chars{0xc273}=0x015b; # small s with acute accent
 731 $chars{0xc275}=0x00fa; # small u with acute accent
 732 $chars{0xc277}=0x1e83; # small w with acute
 733 $chars{0xc279}=0x00fd; # small y with acute accent
 734 $chars{0xc27a}=0x017a; # small z with acute accent
 735 $chars{0xc2e1}=0x01fc; # capital ae with acute
 736 $chars{0xc2f1}=0x01fd; # small ae with acute
 737        # 4/3 circumflex accent
 738 $chars{0xc341}=0x00c2; # capital a with circumflex accent
 739 $chars{0xc343}=0x0108; # capital c with circumflex
 740 $chars{0xc345}=0x00ca; # capital e with circumflex accent
 741 $chars{0xc347}=0x011c; # capital g with circumflex
 742 $chars{0xc348}=0x0124; # capital h with circumflex
 743 $chars{0xc349}=0x00ce; # capital i with circumflex accent
 744 $chars{0xc34a}=0x0134; # capital j with circumflex
 745 $chars{0xc34f}=0x00d4; # capital o with circumflex accent
 746 $chars{0xc353}=0x015c; # capital s with circumflex
 747 $chars{0xc355}=0x00db; # capital u with circumflex
 748 $chars{0xc357}=0x0174; # capital w with circumflex
 749 $chars{0xc359}=0x0176; # capital y with circumflex
 750 $chars{0xc35a}=0x1e90; # capital z with circumflex
 751 $chars{0xc361}=0x00e2; # small a with circumflex accent
 752 $chars{0xc363}=0x0109; # small c with circumflex
 753 $chars{0xc365}=0x00ea; # small e with circumflex accent
 754 $chars{0xc367}=0x011d; # small g with circumflex
 755 $chars{0xc368}=0x0125; # small h with circumflex
 756 $chars{0xc369}=0x00ee; # small i with circumflex accent
 757 $chars{0xc36a}=0x0135; # small j with circumflex
 758 $chars{0xc36e}=0x00f1; # small n with tilde
 759 $chars{0xc36f}=0x00f4; # small o with circumflex accent
 760 $chars{0xc373}=0x015d; # small s with circumflex
 761 $chars{0xc375}=0x00fb; # small u with circumflex
 762 $chars{0xc377}=0x0175; # small w with circumflex
 763 $chars{0xc379}=0x0177; # small y with circumflex
 764 $chars{0xc37a}=0x1e91; # small z with circumflex
 765         # 4/4 tilde
 766 $chars{0xc441}=0x00c3; # capital a with tilde
 767 $chars{0xc445}=0x1ebc; # capital e with tilde
 768 $chars{0xc449}=0x0128; # capital i with tilde
 769 $chars{0xc44e}=0x00d1; # capital n with tilde
 770 $chars{0xc44f}=0x00d5; # capital o with tilde
 771 $chars{0xc455}=0x0168; # capital u with tilde
 772 $chars{0xc456}=0x1e7c; # capital v with tilde
 773 $chars{0xc459}=0x1ef8; # capital y with tilde
 774 $chars{0xc461}=0x00e3; # small a with tilde
 775 $chars{0xc465}=0x1ebd; # small e with tilde
 776 $chars{0xc469}=0x0129; # small i with tilde
 777 $chars{0xc46e}=0x00f1; # small n with tilde
 778 $chars{0xc46f}=0x00f5; # small o with tilde
 779 $chars{0xc475}=0x0169; # small u with tilde
 780 $chars{0xc476}=0x1e7d; # small v with tilde
 781 $chars{0xc479}=0x1ef9; # small y with tilde
 782     # 4/5 macron
 783 $chars{0xc541}=0x0100; # capital a with macron
 784 $chars{0xc545}=0x0112; # capital e with macron
 785 $chars{0xc547}=0x1e20; # capital g with macron
 786 $chars{0xc549}=0x012a; # capital i with macron
 787 $chars{0xc54f}=0x014c; # capital o with macron
 788 $chars{0xc555}=0x016a; # capital u with macron
 789 $chars{0xc561}=0x0101; # small a with macron
 790 $chars{0xc565}=0x0113; # small e with macron
 791 $chars{0xc567}=0x1e21; # small g with macron
 792 $chars{0xc569}=0x012b; # small i with macron
 793 $chars{0xc56f}=0x014d; # small o with macron
 794 $chars{0xc575}=0x016b; # small u with macron
 795 $chars{0xc572}=0x0159; # small r with macron
 796 $chars{0xc5e1}=0x01e2; # capital ae with macron
 797 $chars{0xc5f1}=0x01e3; # small ae with macron
 798         # 4/6 breve
 799 $chars{0xc641}=0x0102; # capital a with breve
 800 $chars{0xc645}=0x0114; # capital e with breve
 801 $chars{0xc647}=0x011e; # capital g with breve
 802 $chars{0xc649}=0x012c; # capital i with breve
 803 $chars{0xc64f}=0x014e; # capital o with breve
 804 $chars{0xc655}=0x016c; # capital u with breve
 805 $chars{0xc661}=0x0103; # small a with breve
 806 $chars{0xc665}=0x0115; # small e with breve
 807 $chars{0xc667}=0x011f; # small g with breve
 808 $chars{0xc669}=0x012d; # small i with breve
 809 $chars{0xc66f}=0x014f; # small o with breve
 810 $chars{0xc675}=0x016d; # small u with breve
 811         # 4/7 dot above
 812 $chars{0xc7b0}=0x01e1; # Ain with dot above
 813 $chars{0xc742}=0x1e02; # capital b with dot above
 814 $chars{0xc743}=0x010a; # capital c with dot above
 815 $chars{0xc744}=0x1e0a; # capital d with dot above
 816 $chars{0xc745}=0x0116; # capital e with dot above
 817 $chars{0xc746}=0x1e1e; # capital f with dot above
 818 $chars{0xc747}=0x0120; # capital g with dot above
 819 $chars{0xc748}=0x1e22; # capital h with dot above
 820 $chars{0xc749}=0x0130; # capital i with dot above
 821 $chars{0xc74d}=0x1e40; # capital m with dot above
 822 $chars{0xc74e}=0x1e44; # capital n with dot above
 823 $chars{0xc750}=0x1e56; # capital p with dot above
 824 $chars{0xc752}=0x1e58; # capital r with dot above
 825 $chars{0xc753}=0x1e60; # capital s with dot above
 826 $chars{0xc754}=0x1e6a; # capital t with dot above
 827 $chars{0xc757}=0x1e86; # capital w with dot above
 828 $chars{0xc758}=0x1e8a; # capital x with dot above
 829 $chars{0xc759}=0x1e8e; # capital y with dot above
 830 $chars{0xc75a}=0x017b; # capital z with dot above
 831 $chars{0xc761}=0x0227; # small b with dot above
 832 $chars{0xc762}=0x1e03; # small b with dot above
 833 $chars{0xc763}=0x010b; # small c with dot above
 834 $chars{0xc764}=0x1e0b; # small d with dot above
 835 $chars{0xc765}=0x0117; # small e with dot above
 836 $chars{0xc766}=0x1e1f; # small f with dot above
 837 $chars{0xc767}=0x0121; # small g with dot above
 838 $chars{0xc768}=0x1e23; # small h with dot above
 839 $chars{0xc76d}=0x1e41; # small m with dot above
 840 $chars{0xc76e}=0x1e45; # small n with dot above
 841 $chars{0xc770}=0x1e57; # small p with dot above
 842 $chars{0xc772}=0x1e59; # small r with dot above
 843 $chars{0xc773}=0x1e61; # small s with dot above
 844 $chars{0xc774}=0x1e6b; # small t with dot above
 845 $chars{0xc777}=0x1e87; # small w with dot above
 846 $chars{0xc778}=0x1e8b; # small x with dot above
 847 $chars{0xc779}=0x1e8f; # small y with dot above
 848 $chars{0xc77a}=0x017c; # small z with dot above
 849         # 4/8 trema, diaresis
 850 $chars{0xc820}=0x00a8; # diaeresis
 851 $chars{0xc841}=0x00c4; # capital a with diaeresis
 852 $chars{0xc845}=0x00cb; # capital e with diaeresis
 853 $chars{0xc848}=0x1e26; # capital h with diaeresis
 854 $chars{0xc849}=0x00cf; # capital i with diaeresis
 855 $chars{0xc84f}=0x00d6; # capital o with diaeresis
 856 $chars{0xc855}=0x00dc; # capital u with diaeresis
 857 $chars{0xc857}=0x1e84; # capital w with diaeresis
 858 $chars{0xc858}=0x1e8c; # capital x with diaeresis
 859 $chars{0xc859}=0x0178; # capital y with diaeresis
 860 $chars{0xc861}=0x00e4; # small a with diaeresis
 861 $chars{0xc865}=0x00eb; # small e with diaeresis
 862 $chars{0xc868}=0x1e27; # small h with diaeresis
 863 $chars{0xc869}=0x00ef; # small i with diaeresis
 864 $chars{0xc86f}=0x00f6; # small o with diaeresis
 865 $chars{0xc874}=0x1e97; # small t with diaeresis
 866 $chars{0xc875}=0x00fc; # small u with diaeresis
 867 $chars{0xc877}=0x1e85; # small w with diaeresis
 868 $chars{0xc878}=0x1e8d; # small x with diaeresis
 869 $chars{0xc879}=0x00ff; # small y with diaeresis
 870         # 4/9 umlaut
 871 $chars{0xc920}=0x00a8; # [diaeresis]
 872 $chars{0xc961}=0x00e4; # a with umlaut
 873 $chars{0xc965}=0x00eb; # e with umlaut
 874 $chars{0xc969}=0x00ef; # i with umlaut
 875 $chars{0xc96f}=0x00f6; # o with umlaut
 876 $chars{0xc975}=0x00fc; # u with umlaut
 877         # 4/10 circle above
 878 $chars{0xca41}=0x00c5; # capital a with ring above
 879 $chars{0xcaad}=0x016e; # capital u with ring above
 880 $chars{0xca61}=0x00e5; # small a with ring above
 881 $chars{0xca75}=0x016f; # small u with ring above
 882 $chars{0xca77}=0x1e98; # small w with ring above
 883 $chars{0xca79}=0x1e99; # small y with ring above
 884         # 4/11 high comma off centre
 885         # 4/12 inverted high comma centred
 886         # 4/13 double acute accent
 887 $chars{0xcd4f}=0x0150; # capital o with double acute
 888 $chars{0xcd55}=0x0170; # capital u with double acute
 889 $chars{0xcd6f}=0x0151; # small o with double acute
 890 $chars{0xcd75}=0x0171; # small u with double acute
 891         # 4/14 horn
 892 $chars{0xce54}=0x01a0; # latin capital letter o with horn
 893 $chars{0xce55}=0x01af; # latin capital letter u with horn
 894 $chars{0xce74}=0x01a1; # latin small letter o with horn
 895 $chars{0xce75}=0x01b0; # latin small letter u with horn
 896         # 4/15 caron (hacek
 897 $chars{0xcf41}=0x01cd; # capital a with caron
 898 $chars{0xcf43}=0x010c; # capital c with caron
 899 $chars{0xcf44}=0x010e; # capital d with caron
 900 $chars{0xcf45}=0x011a; # capital e with caron
 901 $chars{0xcf47}=0x01e6; # capital g with caron
 902 $chars{0xcf49}=0x01cf; # capital i with caron
 903 $chars{0xcf4b}=0x01e8; # capital k with caron
 904 $chars{0xcf4c}=0x013d; # capital l with caron
 905 $chars{0xcf4e}=0x0147; # capital n with caron
 906 $chars{0xcf4f}=0x01d1; # capital o with caron
 907 $chars{0xcf52}=0x0158; # capital r with caron
 908 $chars{0xcf53}=0x0160; # capital s with caron
 909 $chars{0xcf54}=0x0164; # capital t with caron
 910 $chars{0xcf55}=0x01d3; # capital u with caron
 911 $chars{0xcf5a}=0x017d; # capital z with caron
 912 $chars{0xcf61}=0x01ce; # small a with caron
 913 $chars{0xcf63}=0x010d; # small c with caron
 914 $chars{0xcf64}=0x010f; # small d with caron
 915 $chars{0xcf65}=0x011b; # small e with caron
 916 $chars{0xcf67}=0x01e7; # small g with caron
 917 $chars{0xcf69}=0x01d0; # small i with caron
 918 $chars{0xcf6a}=0x01f0; # small j with caron
 919 $chars{0xcf6b}=0x01e9; # small k with caron
 920 $chars{0xcf6c}=0x013e; # small l with caron
 921 $chars{0xcf6e}=0x0148; # small n with caron
 922 $chars{0xcf6f}=0x01d2; # small o with caron
 923 $chars{0xcf72}=0x0159; # small r with caron
 924 $chars{0xcf73}=0x0161; # small s with caron
 925 $chars{0xcf74}=0x0165; # small t with caron
 926 $chars{0xcf75}=0x01d4; # small u with caron
 927 $chars{0xcf7a}=0x017e; # small z with caron
 928         # 5/0 cedilla
 929 $chars{0xd020}=0x00b8; # cedilla
 930 $chars{0xd043}=0x00c7; # capital c with cedilla
 931 $chars{0xd044}=0x1e10; # capital d with cedilla
 932 $chars{0xd047}=0x0122; # capital g with cedilla
 933 $chars{0xd048}=0x1e28; # capital h with cedilla
 934 $chars{0xd04b}=0x0136; # capital k with cedilla
 935 $chars{0xd04c}=0x013b; # capital l with cedilla
 936 $chars{0xd04e}=0x0145; # capital n with cedilla
 937 $chars{0xd052}=0x0156; # capital r with cedilla
 938 $chars{0xd053}=0x015e; # capital s with cedilla
 939 $chars{0xd054}=0x0162; # capital t with cedilla
 940 $chars{0xd063}=0x00e7; # small c with cedilla
 941 $chars{0xd064}=0x1e11; # small d with cedilla
 942 $chars{0xd065}=0x0119; # small e with cedilla
 943 $chars{0xd067}=0x0123; # small g with cedilla
 944 $chars{0xd068}=0x1e29; # small h with cedilla
 945 $chars{0xd06b}=0x0137; # small k with cedilla
 946 $chars{0xd06c}=0x013c; # small l with cedilla
 947 $chars{0xd06e}=0x0146; # small n with cedilla
 948 $chars{0xd072}=0x0157; # small r with cedilla
 949 $chars{0xd073}=0x015f; # small s with cedilla
 950 $chars{0xd074}=0x0163; # small t with cedilla
 951         # 5/1 rude
 952         # 5/2 hook to left
 953         # 5/3 ogonek (hook to right
 954 $chars{0xd320}=0x02db; # ogonek
 955 $chars{0xd341}=0x0104; # capital a with ogonek
 956 $chars{0xd345}=0x0118; # capital e with ogonek
 957 $chars{0xd349}=0x012e; # capital i with ogonek
 958 $chars{0xd34f}=0x01ea; # capital o with ogonek
 959 $chars{0xd355}=0x0172; # capital u with ogonek
 960 $chars{0xd361}=0x0105; # small a with ogonek
 961 $chars{0xd365}=0x0119; # small e with ogonek
 962 $chars{0xd369}=0x012f; # small i with ogonek
 963 $chars{0xd36f}=0x01eb; # small o with ogonek
 964 $chars{0xd375}=0x0173; # small u with ogonek
 965         # 5/4 circle below
 966 $chars{0xd441}=0x1e00; # capital a with ring below
 967 $chars{0xd461}=0x1e01; # small a with ring below
 968         # 5/5 half circle below
 969 $chars{0xf948}=0x1e2a; # capital h with breve below
 970 $chars{0xf968}=0x1e2b; # small h with breve below
 971         # 5/6 dot below
 972 $chars{0xd641}=0x1ea0; # capital a with dot below
 973 $chars{0xd642}=0x1e04; # capital b with dot below
 974 $chars{0xd644}=0x1e0c; # capital d with dot below
 975 $chars{0xd645}=0x1eb8; # capital e with dot below
 976 $chars{0xd648}=0x1e24; # capital h with dot below
 977 $chars{0xd649}=0x1eca; # capital i with dot below
 978 $chars{0xd64b}=0x1e32; # capital k with dot below
 979 $chars{0xd64c}=0x1e36; # capital l with dot below
 980 $chars{0xd64d}=0x1e42; # capital m with dot below
 981 $chars{0xd64e}=0x1e46; # capital n with dot below
 982 $chars{0xd64f}=0x1ecc; # capital o with dot below
 983 $chars{0xd652}=0x1e5a; # capital r with dot below
 984 $chars{0xd653}=0x1e62; # capital s with dot below
 985 $chars{0xd654}=0x1e6c; # capital t with dot below
 986 $chars{0xd655}=0x1ee4; # capital u with dot below
 987 $chars{0xd656}=0x1e7e; # capital v with dot below
 988 $chars{0xd657}=0x1e88; # capital w with dot below
 989 $chars{0xd659}=0x1ef4; # capital y with dot below
 990 $chars{0xd65a}=0x1e92; # capital z with dot below
 991 $chars{0xd661}=0x1ea1; # small a with dot below
 992 $chars{0xd662}=0x1e05; # small b with dot below
 993 $chars{0xd664}=0x1e0d; # small d with dot below
 994 $chars{0xd665}=0x1eb9; # small e with dot below
 995 $chars{0xd668}=0x1e25; # small h with dot below
 996 $chars{0xd669}=0x1ecb; # small i with dot below
 997 $chars{0xd66b}=0x1e33; # small k with dot below
 998 $chars{0xd66c}=0x1e37; # small l with dot below
 999 $chars{0xd66d}=0x1e43; # small m with dot below
1000 $chars{0xd66e}=0x1e47; # small n with dot below
1001 $chars{0xd66f}=0x1ecd; # small o with dot below
1002 $chars{0xd672}=0x1e5b; # small r with dot below
1003 $chars{0xd673}=0x1e63; # small s with dot below
1004 $chars{0xd674}=0x1e6d; # small t with dot below
1005 $chars{0xd675}=0x1ee5; # small u with dot below
1006 $chars{0xd676}=0x1e7f; # small v with dot below
1007 $chars{0xd677}=0x1e89; # small w with dot below
1008 $chars{0xd679}=0x1ef5; # small y with dot below
1009 $chars{0xd67a}=0x1e93; # small z with dot below
1010         # 5/7 double dot below
1011 $chars{0xd755}=0x1e72; # capital u with diaeresis below
1012 $chars{0xd775}=0x1e73; # small u with diaeresis below
1013         # 5/8 underline
1014 $chars{0xd820}=0x005f; # underline
1015         # 5/9 double underline
1016 $chars{0xd920}=0x2017; # double underline
1017         # 5/10 small low vertical bar
1018 $chars{0xda20}=0x02cc; #
1019         # 5/11 circumflex below
1020         # 5/12 (this position shall not be used)
1021         # 5/13 left half of ligature sign and of double tilde
1022         # 5/14 right half of ligature sign
1023         # 5/15 right half of double tilde
1024 #     map {printf "%x :%x\n",$_,$chars{$_};}keys %chars;
1025
1026 sub char_decode5426 {
1027     my ( $string) = @_;
1028     my $result;
1029
1030     my @data = unpack("C*", $string);
1031     my @characters;
1032     my $length=scalar(@data);
1033     for (my $i = 0; $i < scalar(@data); $i++) {
1034       my $char= $data[$i];
1035       if ($char >= 0x00 && $char <= 0x7F){
1036         #IsAscii
1037
1038           push @characters,$char unless ($char<0x02 ||$char== 0x0F);
1039       }elsif (($char >= 0xC0 && $char <= 0xDF)) {
1040         #Combined Char
1041         my $convchar ;
1042         if ($chars{$char*256+$data[$i+1]}) {
1043           $convchar= $chars{$char * 256 + $data[$i+1]};
1044           $i++;
1045 #           printf "char %x $char, char to convert %x , converted %x\n",$char,$char * 256 + $data[$i - 1],$convchar;
1046         } elsif ($chars{$char})  {
1047           $convchar= $chars{$char};
1048 #           printf "0xC char %x, converted %x\n",$char,$chars{$char};
1049         }else {
1050           $convchar=$char;
1051         }
1052         push @characters,$convchar;
1053       } else {
1054         my $convchar;
1055         if ($chars{$char})  {
1056           $convchar= $chars{$char};
1057 #            printf "char %x,  converted %x\n",$char,$chars{$char};
1058         }else {
1059 #            printf "char %x $char\n",$char;
1060           $convchar=$char;
1061         }
1062         push @characters,$convchar;
1063       }
1064     }
1065     $result=pack "U*",@characters;
1066 #     $result=~s/\x01//;
1067 #     $result=~s/\x00//;
1068      $result=~s/\x0f//;
1069      $result=~s/\x1b.//;
1070      $result=~s/\x0e//;
1071      $result=~s/\x1b\x5b//;
1072 #   map{printf "%x",$_} @characters;
1073 #   printf "\n";
1074   return $result;
1075 }
1076
1077 1;
1078
1079
1080 =head1 AUTHOR
1081
1082 Koha Development Team <info@koha.org>
1083
1084 Galen Charlton <galen.charlton@liblime.com>
1085
1086 =cut