C4/Charset.pm

   1 package C4::Charset;
   2
   3 # Copyright (C) 2008 LibLime
   4 #
   5 # This file is part of Koha.
   6 #
   7 # Koha is free software; you can redistribute it and/or modify it under the
   8 # terms of the GNU General Public License as published by the Free Software
   9 # Foundation; either version 2 of the License, or (at your option) any later
  10 # version.
  11 #
  12 # Koha is distributed in the hope that it will be useful, but WITHOUT ANY
  13 # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
  14 # A PARTICULAR PURPOSE.  See the GNU General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License along with
  17 # Koha; if not, write to the Free Software Foundation, Inc., 59 Temple Place,
  18 # Suite 330, Boston, MA  02111-1307 USA
  19
  20 use strict;
  21 use MARC::Charset qw/marc8_to_utf8/;
  22 use Text::Iconv;
  23
  24 use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
  25
  26 BEGIN {
  27     # set the version for version checking
  28     $VERSION = 3.01;
  29     require Exporter;
  30     @ISA    = qw(Exporter);
  31     @EXPORT = qw(
  32         IsStringUTF8ish
  33         MarcToUTF8Record
  34         SetMarcUnicodeFlag
  35         StripNonXmlChars
  36     );
  37 }
  38
  39 =head1 NAME
  40
  41 C4::Charset - utilities for handling character set conversions.
  42
  43 =head1 SYNOPSIS
  44
  45 use C4::Charset;
  46
  47 =head1 DESCRIPTION
  48
  49 This module contains routines for dealing with character set
  50 conversions, particularly for MARC records.
  51
  52 A variety of character encodings are in use by various MARC
  53 standards, and even more character encodings are used by
  54 non-standard MARC records.  The various MARC formats generally
  55 do not do a good job of advertising a given record's character
  56 encoding, and even when a record does advertise its encoding,
  57 e.g., via the Leader/09, experience has shown that one cannot
  58 trust it.
  59
  60 Ultimately, all MARC records are stored in Koha in UTF-8 and
  61 must be converted from whatever the source character encoding is.
  62 The goal of this module is to ensure that these conversions
  63 take place accurately.  When a character conversion cannot take
  64 place, or at least not accurately, the module was provide
  65 enough information to allow user-facing code to inform the user
  66 on how to deal with the situation.
  67
  68 =cut
  69
  70 =head1 FUNCTIONS
  71
  72 =head2 IsStringUTF8ish
  73
  74 =over 4
  75
  76 my $is_utf8 = IsStringUTF8ish($str);
  77
  78 =back
  79
  80 Determines if C<$str> is valid UTF-8.  This can mean
  81 one of two things:
  82
  83 =over 2
  84
  85 =item *
  86
  87 The Perl UTF-8 flag is set and the string contains valid UTF-8.
  88
  89 =item *
  90
  91 The Perl UTF-8 flag is B<not> set, but the octets contain
  92 valid UTF-8.
  93
  94 =back
  95
  96 The function is named C<IsStringUTF8ish> instead of C<IsStringUTF8>
  97 because in one could be presented with a MARC blob that is
  98 not actually in UTF-8 but whose sequence of octets appears to be
  99 valid UTF-8.  The rest of the MARC character conversion functions
 100 will assume that this situation occur does not very often.
 101
 102 =cut
 103
 104 sub IsStringUTF8ish {
 105     my $str = shift;
 106
 107     return 1 if utf8::is_utf8($str);
 108     return utf8::decode($str);
 109 }
 110
 111 =head2 MarcToUTF8Record
 112
 113 =over 4
 114
 115 ($marc_record, $converted_from, $errors_arrayref) = MarcToUTF8Record($marc_blob, $marc_flavour, [, $source_encoding]);
 116
 117 =back
 118
 119 Given a MARC blob or a C<MARC::Record>, the MARC flavour, and an
 120 optional source encoding, return a C<MARC::Record> that is
 121 converted to UTF-8.
 122
 123 The returned C<$marc_record> is guaranteed to be in valid UTF-8, but
 124 is not guaranteed to have been converted correctly.  Specifically,
 125 if C<$converted_from> is 'failed', the MARC record returned failed
 126 character conversion and had each of its non-ASCII octets changed
 127 to the Unicode replacement character.
 128
 129 If the source encoding was not specified, this routine will
 130 try to guess it; the character encoding used for a successful
 131 conversion is returned in C<$converted_from>.
 132
 133 =cut
 134
 135 sub MarcToUTF8Record {
 136     my $marc = shift;
 137     my $marc_flavour = shift;
 138     my $source_encoding = shift;
 139
 140     my $marc_record;
 141     my $marc_blob_is_utf8 = 0;
 142     if (ref($marc) eq 'MARC::Record') {
 143         my $marc_blob = $marc->as_usmarc();
 144         $marc_blob_is_utf8 = IsStringUTF8ish($marc_blob);
 145         $marc_record = $marc;
 146     } else {
 147         # dealing with a MARC blob
 148
 149         # remove any ersatz whitespace from the beginning and
 150         # end of the MARC blob -- these can creep into MARC
 151         # files produced by several sources -- caller really
 152         # should be doing this, however
 153         $marc =~ s/^\s+//;
 154         $marc =~ s/\s+$//;
 155         $marc_blob_is_utf8 = IsStringUTF8ish($marc);
 156         eval {
 157             $marc_record = MARC::Record->new_from_usmarc($marc);
 158         };
 159         if ($@) {
 160             # if we fail the first time, one likely problem
 161             # is that we have a MARC21 record that says that it's
 162             # UTF-8 (Leader/09 = 'a') but contains non-UTF-8 characters.
 163             # We'll try parsing it again.
 164             substr($marc, 9, 1) = ' ';
 165             eval {
 166                 $marc_record = MARC::Record->new_from_usmarc($marc);
 167             };
 168             if ($@) {
 169                 # it's hopeless; return an empty MARC::Record
 170                 return MARC::Record->new(), 'failed', ['could not parse MARC blob'];
 171             }
 172         }
 173     }
 174
 175     # If we do not know the source encoding, try some guesses
 176     # as follows:
 177     #   1. Record is UTF-8 already.
 178     #   2. If MARC flavor is MARC21, then
 179     #      a. record is MARC-8
 180     #      b. record is ISO-8859-1
 181     #   3. If MARC flavor is UNIMARC, then
 182     if (not defined $source_encoding) {
 183         if ($marc_blob_is_utf8) {
 184             # note that for MARC21 we are not bothering to check
 185             # if the Leader/09 is set to 'a' or not -- because
 186             # of problems with various ILSs (including Koha in the
 187             # past, alas), this just is not trustworthy.
 188             SetMarcUnicodeFlag($marc_record, $marc_flavour);
 189             return $marc_record, 'UTF-8', [];
 190         } else {
 191             if ($marc_flavour eq 'MARC21') {
 192                 return _default_marc21_charconv_to_utf8($marc_record, $marc_flavour);
 193             } elsif ($marc_flavour eq 'UNIMARC') {
 194                 return _default_unimarc_charconv_to_utf8($marc_record, $marc_flavour);
 195             } else {
 196                 return _default_marc21_charconv_to_utf8($marc_record, $marc_flavour);
 197             }
 198         }
 199     } else {
 200         # caller knows the character encoding
 201         my $original_marc_record = $marc_record->clone();
 202         my @errors;
 203         if ($source_encoding =~ /utf-?8/i) {
 204             if ($marc_blob_is_utf8) {
 205                 SetMarcUnicodeFlag($marc_record, $marc_flavour);
 206                 return $marc_record, 'UTF-8', [];
 207             } else {
 208                 push @errors, 'specified UTF-8 => UTF-8 conversion, but record is not in UTF-8';
 209             }
 210         } elsif ($source_encoding =~ /marc-?8/i) {
 211             @errors = _marc_marc8_to_utf8($marc_record, $marc_flavour);
 212         } elsif ($source_encoding =~ /5426/) {
 213             @errors = _marc_iso5426_to_utf8($marc_record, $marc_flavour);
 214         } else {
 215             # assume any other character encoding is for Text::Iconv
 216             @errors = _marc_to_utf8_via_text_iconv($marc_record, $marc_flavour, 'iso-8859-1');
 217         }
 218
 219         if (@errors) {
 220             _marc_to_utf8_replacement_char($original_marc_record, $marc_flavour);
 221             return $original_marc_record, 'failed', \@errors;
 222         } else {
 223             return $marc_record, $source_encoding, [];
 224         }
 225     }
 226
 227 }
 228
 229 =head2 SetMarcUnicodeFlag
 230
 231 =over 4
 232
 233 SetMarcUnicodeFlag($marc_record, $marc_flavour);
 234
 235 =back
 236
 237 Set both the internal MARC::Record encoding flag
 238 and the appropriate Leader/09 (MARC21) or
 239 100/26-29 (UNIMARC) to indicate that the record
 240 is in UTF-8.  Note that this does B<not> do
 241 any actual character conversion.
 242
 243 =cut
 244
 245 sub SetMarcUnicodeFlag {
 246     my $marc_record = shift;
 247     my $marc_flavour = shift; # || C4::Context->preference("marcflavour");
 248
 249     $marc_record->encoding('UTF-8');
 250     if ($marc_flavour eq 'MARC21') {
 251         my $leader = $marc_record->leader();
 252         substr($leader, 9, 1) = 'a';
 253         $marc_record->leader($leader);
 254     } elsif ($marc_flavour eq "UNIMARC") {
 255         if (my $field = $marc_record->field('100')) {
 256             my $sfa = $field->subfield('a');
 257
 258             my $subflength = 36;
 259             # fix the length of the field
 260             $sfa = substr $sfa, 0, $subflength if (length($sfa) > $subflength);
 261             $sfa = sprintf( "%-*s", 35, $sfa ) if (length($sfa) < $subflength);
 262
 263             substr($sfa, 26, 4) = '50  ';
 264             $field->update('a' => $sfa);
 265         }
 266     } else {
 267         warn "Unrecognized marcflavour: $marc_flavour";
 268     }
 269 }
 270
 271 =head2 StripNonXmlChars
 272
 273 =over 4
 274
 275 my $new_str = StripNonXmlChars($old_str);
 276
 277 =back
 278
 279 Given a string, return a copy with the
 280 characters that are illegal in XML
 281 removed.
 282
 283 This function exists to work around a problem
 284 that can occur with badly-encoded MARC records.
 285 Specifically, if a UTF-8 MARC record also
 286 has excape (\x1b) characters, MARC::File::XML
 287 will let the escape characters pass through
 288 when as_xml() or as_xml_record() is called.  The
 289 problem is that the escape character is not
 290 legal in well-formed XML documents, so when
 291 MARC::File::XML attempts to parse such a record,
 292 the XML parser will fail.
 293
 294 Stripping such characters will allow a
 295 MARC::Record->new_from_xml()
 296 to work, at the possible risk of some data loss.
 297
 298 =cut
 299
 300 sub StripNonXmlChars {
 301     my $str = shift;
 302     $str =~ s/[^\x09\x0A\x0D\x{0020}-\x{D7FF}\x{E000}-\x{FFFD}\x{10000}-\x{10FFFF}]//g;
 303     return $str;
 304 }
 305
 306 =head1 INTERNAL FUNCTIONS
 307
 308 =head2 _default_marc21_charconv_to_utf8
 309
 310 =over 4
 311
 312 my ($new_marc_record, $guessed_charset) = _default_marc21_charconv_to_utf8($marc_record);
 313
 314 =back
 315
 316 Converts a C<MARC::Record> of unknown character set to UTF-8,
 317 first by trying a MARC-8 to UTF-8 conversion, then ISO-8859-1
 318 to UTF-8, then a default conversion that replaces each non-ASCII
 319 character with the replacement character.
 320
 321 The C<$guessed_charset> return value contains the character set
 322 that resulted in a conversion to valid UTF-8; note that
 323 if the MARC-8 and ISO-8859-1 conversions failed, the value of
 324 this is 'failed'.
 325
 326 =cut
 327
 328 sub _default_marc21_charconv_to_utf8 {
 329     my $marc_record = shift;
 330     my $marc_flavour = shift;
 331
 332     my $trial_marc8 = $marc_record->clone();
 333     my @all_errors = ();
 334     my @errors = _marc_marc8_to_utf8($trial_marc8, $marc_flavour);
 335     unless (@errors) {
 336         return $trial_marc8, 'MARC-8', [];
 337     }
 338     push @all_errors, @errors;
 339
 340     my $trial_8859_1 = $marc_record->clone();
 341     @errors = _marc_to_utf8_via_text_iconv($trial_8859_1, $marc_flavour, 'iso-8859-1');
 342     unless (@errors) {
 343         return $trial_8859_1, 'iso-8859-1', []; # note -- we could return \@all_errors
 344                                                 # instead if we wanted to report details
 345                                                 # of the failed attempt at MARC-8 => UTF-8
 346     }
 347     push @all_errors, @errors;
 348
 349     my $default_converted = $marc_record->clone();
 350     _marc_to_utf8_replacement_char($default_converted, $marc_flavour);
 351     return $default_converted, 'failed', \@all_errors;
 352 }
 353
 354 =head2 _default_unimarc_charconv_to_utf8
 355
 356 =over 4
 357
 358 my ($new_marc_record, $guessed_charset) = _default_unimarc_charconv_to_utf8($marc_record);
 359
 360 =back
 361
 362 Converts a C<MARC::Record> of unknown character set to UTF-8,
 363 first by trying a ISO-5426 to UTF-8 conversion, then ISO-8859-1
 364 to UTF-8, then a default conversion that replaces each non-ASCII
 365 character with the replacement character.
 366
 367 The C<$guessed_charset> return value contains the character set
 368 that resulted in a conversion to valid UTF-8; note that
 369 if the MARC-8 and ISO-8859-1 conversions failed, the value of
 370 this is 'failed'.
 371
 372 =cut
 373
 374 sub _default_unimarc_charconv_to_utf8 {
 375     my $marc_record = shift;
 376     my $marc_flavour = shift;
 377
 378     my $trial_marc8 = $marc_record->clone();
 379     my @all_errors = ();
 380     my @errors = _marc_iso5426_to_utf8($trial_marc8, $marc_flavour);
 381     unless (@errors) {
 382         return $trial_marc8, 'iso-5426';
 383     }
 384     push @all_errors, @errors;
 385
 386     my $trial_8859_1 = $marc_record->clone();
 387     @errors = _marc_to_utf8_via_text_iconv($trial_8859_1, $marc_flavour, 'iso-8859-1');
 388     unless (@errors) {
 389         return $trial_8859_1, 'iso-8859-1';
 390     }
 391     push @all_errors, @errors;
 392
 393     my $default_converted = $marc_record->clone();
 394     _marc_to_utf8_replacement_char($default_converted, $marc_flavour);
 395     return $default_converted, 'failed', \@all_errors;
 396 }
 397
 398 =head2 _marc_marc8_to_utf8
 399
 400 =over 4
 401
 402 my @errors = _marc_marc8_to_utf8($marc_record, $marc_flavour, $source_encoding);
 403
 404 =back
 405
 406 Convert a C<MARC::Record> to UTF-8 in-place from MARC-8.
 407 If the conversion fails for some reason, an
 408 appropriate messages will be placed in the returned
 409 C<@errors> array.
 410
 411 =cut
 412
 413 sub _marc_marc8_to_utf8 {
 414     my $marc_record = shift;
 415     my $marc_flavour = shift;
 416
 417     my $prev_ignore = MARC::Charset->ignore_errors();
 418     MARC::Charset->ignore_errors(1);
 419
 420     # trap warnings raised by MARC::Charset
 421     my @errors = ();
 422     local $SIG{__WARN__} = sub {
 423         my $msg = $_[0];
 424         if ($msg =~ /MARC.Charset/) {
 425             # FIXME - purpose of this regexp is to strip out the
 426             # line reference to MARC/Charset.pm, but as it
 427             # exists probably won't work quite on Windows --
 428             # some sort of minimal-bunch back-tracking RE
 429             # would be helpful here
 430             $msg =~ s/at [\/].*?.MARC.Charset\.pm line \d+\.\n$//;
 431             push @errors, $msg;
 432         } else {
 433             # if warning doesn't come from MARC::Charset, just
 434             # pass it on
 435             warn $msg;
 436         }
 437     };
 438
 439     foreach my $field ($marc_record->fields()) {
 440         if ($field->is_control_field()) {
 441             ; # do nothing -- control fields should not contain non-ASCII characters
 442         } else {
 443             my @converted_subfields;
 444             foreach my $subfield ($field->subfields()) {
 445                 my $utf8sf = MARC::Charset::marc8_to_utf8($subfield->[1]);
 446                 unless (IsStringUTF8ish($utf8sf)) {
 447                     # Because of a bug in MARC::Charset 0.98, if the string
 448                     # has (a) one or more diacritics that (b) are only in character positions
 449                     # 128 to 255 inclusive, the resulting converted string is not in
 450                     # UTF-8, but the legacy 8-bit encoding (e.g., ISO-8859-1).  If that
 451                     # occurs, upgrade the string in place.  Moral of the story seems to be
 452                     # that pack("U", ...) is better than chr(...) if you need to guarantee
 453                     # that the resulting string is UTF-8.
 454                     utf8::upgrade($utf8sf);
 455                 }
 456                 push @converted_subfields, $subfield->[0], $utf8sf;
 457             }
 458
 459             $field->replace_with(MARC::Field->new(
 460                 $field->tag(), $field->indicator(1), $field->indicator(2),
 461                 @converted_subfields)
 462             );
 463         }
 464     }
 465
 466     MARC::Charset->ignore_errors($prev_ignore);
 467
 468     SetMarcUnicodeFlag($marc_record, $marc_flavour);
 469
 470     return @errors;
 471 }
 472
 473 =head2 _marc_iso5426_to_utf8
 474
 475 =over 4
 476
 477 my @errors = _marc_iso5426_to_utf8($marc_record, $marc_flavour, $source_encoding);
 478
 479 =back
 480
 481 Convert a C<MARC::Record> to UTF-8 in-place from ISO-5426.
 482 If the conversion fails for some reason, an
 483 appropriate messages will be placed in the returned
 484 C<@errors> array.
 485
 486 FIXME - is ISO-5426 equivalent enough to MARC-8
 487 that C<MARC::Charset> can be used instead?
 488
 489 =cut
 490
 491 sub _marc_iso5426_to_utf8 {
 492     my $marc_record = shift;
 493     my $marc_flavour = shift;
 494
 495     my @errors = ();
 496
 497     foreach my $field ($marc_record->fields()) {
 498         if ($field->is_control_field()) {
 499             ; # do nothing -- control fields should not contain non-ASCII characters
 500         } else {
 501             my @converted_subfields;
 502             foreach my $subfield ($field->subfields()) {
 503                 my $utf8sf = char_decode5426($subfield->[1]);
 504                 push @converted_subfields, $subfield->[0], $utf8sf;
 505             }
 506
 507             $field->replace_with(MARC::Field->new(
 508                 $field->tag(), $field->indicator(1), $field->indicator(2),
 509                 @converted_subfields)
 510             );
 511         }
 512     }
 513
 514     SetMarcUnicodeFlag($marc_record, $marc_flavour);
 515
 516     return @errors;
 517 }
 518
 519 =head2 _marc_to_utf8_via_text_iconv
 520
 521 =over 4
 522
 523 my @errors = _marc_to_utf8_via_text_iconv($marc_record, $marc_flavour, $source_encoding);
 524
 525 =back
 526
 527 Convert a C<MARC::Record> to UTF-8 in-place using the
 528 C<Text::Iconv> CPAN module.  Any source encoding accepted
 529 by the user's iconv installation should work.  If
 530 the source encoding is not recognized on the user's
 531 server or the conversion fails for some reason,
 532 appropriate messages will be placed in the returned
 533 C<@errors> array.
 534
 535 =cut
 536
 537 sub _marc_to_utf8_via_text_iconv {
 538     my $marc_record = shift;
 539     my $marc_flavour = shift;
 540     my $source_encoding = shift;
 541
 542     my @errors = ();
 543     my $decoder;
 544     eval { $decoder = Text::Iconv->new($source_encoding, 'utf8'); };
 545     if ($@) {
 546         push @errors, "Could not initialze $source_encoding => utf8 converter: $@";
 547         return @errors;
 548     }
 549
 550     my $prev_raise_error = Text::Iconv->raise_error();
 551     Text::Iconv->raise_error(1);
 552
 553     foreach my $field ($marc_record->fields()) {
 554         if ($field->is_control_field()) {
 555             ; # do nothing -- control fields should not contain non-ASCII characters
 556         } else {
 557             my @converted_subfields;
 558             foreach my $subfield ($field->subfields()) {
 559                 my $converted_value;
 560                 my $conversion_ok = 1;
 561                 eval { $converted_value = $decoder->convert($subfield->[1]); };
 562                 if ($@) {
 563                     $conversion_ok = 0;
 564                     push @errors, $@;
 565                 } elsif (not defined $converted_value) {
 566                     $conversion_ok = 0;
 567                     push @errors, "Text::Iconv conversion failed - retval is " . $decoder->retval();
 568                 }
 569
 570                 if ($conversion_ok) {
 571                     push @converted_subfields, $subfield->[0], $converted_value;
 572                 } else {
 573                     $converted_value = $subfield->[1];
 574                     $converted_value =~ s/[\200-\377]/\xef\xbf\xbd/g;
 575                     push @converted_subfields, $subfield->[0], $converted_value;
 576                 }
 577             }
 578
 579             $field->replace_with(MARC::Field->new(
 580                 $field->tag(), $field->indicator(1), $field->indicator(2),
 581                 @converted_subfields)
 582             );
 583         }
 584     }
 585
 586     SetMarcUnicodeFlag($marc_record, $marc_flavour);
 587     Text::Iconv->raise_error($prev_raise_error);
 588
 589     return @errors;
 590 }
 591
 592 =head2 _marc_to_utf8_replacement_char
 593
 594 =over 4
 595
 596 _marc_to_utf8_replacement_char($marc_record, $marc_flavour);
 597
 598 =back
 599
 600 Convert a C<MARC::Record> to UTF-8 in-place, adopting the
 601 unsatisfactory method of replacing all non-ASCII (e.g.,
 602 where the eight bit is set) octet with the Unicode
 603 replacement character.  This is meant as a last-ditch
 604 method, and would be best used as part of a UI that
 605 lets a cataloguer pick various character conversions
 606 until he or she finds the right one.
 607
 608 =cut
 609
 610 sub _marc_to_utf8_replacement_char {
 611     my $marc_record = shift;
 612     my $marc_flavour = shift;
 613
 614     foreach my $field ($marc_record->fields()) {
 615         if ($field->is_control_field()) {
 616             ; # do nothing -- control fields should not contain non-ASCII characters
 617         } else {
 618             my @converted_subfields;
 619             foreach my $subfield ($field->subfields()) {
 620                 my $value = $subfield->[1];
 621                 $value =~ s/[\200-\377]/\xef\xbf\xbd/g;
 622                 push @converted_subfields, $subfield->[0], $value;
 623             }
 624
 625             $field->replace_with(MARC::Field->new(
 626                 $field->tag(), $field->indicator(1), $field->indicator(2),
 627                 @converted_subfields)
 628             );
 629         }
 630     }
 631
 632     SetMarcUnicodeFlag($marc_record, $marc_flavour);
 633 }
 634
 635 =head2 char_decode5426
 636
 637 =over 4
 638
 639 my $utf8string = char_decode5426($iso_5426_string);
 640
 641 =back
 642
 643 Converts a string from ISO-5426 to UTF-8.
 644
 645 =cut
 646
 647
 648 my %chars;
 649 $chars{0xb0}=0x0101;#3/0ayn[ain]
 650 $chars{0xb1}=0x0623;#3/1alif/hamzah[alefwithhamzaabove]
 651 #$chars{0xb2}=0x00e0;#'à';
 652 $chars{0xb2}=0x00e0;#3/2leftlowsinglequotationmark
 653 #$chars{0xb3}=0x00e7;#'ç';
 654 $chars{0xb3}=0x00e7;#3/2leftlowsinglequotationmark
 655 # $chars{0xb4}='è';
 656 $chars{0xb4}=0x00e8;
 657 # $chars{0xb5}='é';
 658 $chars{0xb5}=0x00e9;
 659 $chars{0x97}=0x003c;#3/2leftlowsinglequotationmark
 660 $chars{0x98}=0x003e;#3/2leftlowsinglequotationmark
 661 $chars{0xfa}=0x0153;#oe
 662 $chars{0x81d1}=0x00b0;
 663
 664 ####
 665 ## combined characters iso5426
 666
 667 $chars{0xc041}=0x1ea2; # capital a with hook above
 668 $chars{0xc045}=0x1eba; # capital e with hook above
 669 $chars{0xc049}=0x1ec8; # capital i with hook above
 670 $chars{0xc04f}=0x1ece; # capital o with hook above
 671 $chars{0xc055}=0x1ee6; # capital u with hook above
 672 $chars{0xc059}=0x1ef6; # capital y with hook above
 673 $chars{0xc061}=0x1ea3; # small a with hook above
 674 $chars{0xc065}=0x1ebb; # small e with hook above
 675 $chars{0xc069}=0x1ec9; # small i with hook above
 676 $chars{0xc06f}=0x1ecf; # small o with hook above
 677 $chars{0xc075}=0x1ee7; # small u with hook above
 678 $chars{0xc079}=0x1ef7; # small y with hook above
 679
 680         # 4/1 grave accent
 681 $chars{0xc141}=0x00c0; # capital a with grave accent
 682 $chars{0xc145}=0x00c8; # capital e with grave accent
 683 $chars{0xc149}=0x00cc; # capital i with grave accent
 684 $chars{0xc14f}=0x00d2; # capital o with grave accent
 685 $chars{0xc155}=0x00d9; # capital u with grave accent
 686 $chars{0xc157}=0x1e80; # capital w with grave
 687 $chars{0xc159}=0x1ef2; # capital y with grave
 688 $chars{0xc161}=0x00e0; # small a with grave accent
 689 $chars{0xc165}=0x00e8; # small e with grave accent
 690 $chars{0xc169}=0x00ec; # small i with grave accent
 691 $chars{0xc16f}=0x00f2; # small o with grave accent
 692 $chars{0xc175}=0x00f9; # small u with grave accent
 693 $chars{0xc177}=0x1e81; # small w with grave
 694 $chars{0xc179}=0x1ef3; # small y with grave
 695         # 4/2 acute accent
 696 $chars{0xc241}=0x00c1; # capital a with acute accent
 697 $chars{0xc243}=0x0106; # capital c with acute accent
 698 $chars{0xc245}=0x00c9; # capital e with acute accent
 699 $chars{0xc247}=0x01f4; # capital g with acute
 700 $chars{0xc249}=0x00cd; # capital i with acute accent
 701 $chars{0xc24b}=0x1e30; # capital k with acute
 702 $chars{0xc24c}=0x0139; # capital l with acute accent
 703 $chars{0xc24d}=0x1e3e; # capital m with acute
 704 $chars{0xc24e}=0x0143; # capital n with acute accent
 705 $chars{0xc24f}=0x00d3; # capital o with acute accent
 706 $chars{0xc250}=0x1e54; # capital p with acute
 707 $chars{0xc252}=0x0154; # capital r with acute accent
 708 $chars{0xc253}=0x015a; # capital s with acute accent
 709 $chars{0xc255}=0x00da; # capital u with acute accent
 710 $chars{0xc257}=0x1e82; # capital w with acute
 711 $chars{0xc259}=0x00dd; # capital y with acute accent
 712 $chars{0xc25a}=0x0179; # capital z with acute accent
 713 $chars{0xc261}=0x00e1; # small a with acute accent
 714 $chars{0xc263}=0x0107; # small c with acute accent
 715 $chars{0xc265}=0x00e9; # small e with acute accent
 716 $chars{0xc267}=0x01f5; # small g with acute
 717 $chars{0xc269}=0x00ed; # small i with acute accent
 718 $chars{0xc26b}=0x1e31; # small k with acute
 719 $chars{0xc26c}=0x013a; # small l with acute accent
 720 $chars{0xc26d}=0x1e3f; # small m with acute
 721 $chars{0xc26e}=0x0144; # small n with acute accent
 722 $chars{0xc26f}=0x00f3; # small o with acute accent
 723 $chars{0xc270}=0x1e55; # small p with acute
 724 $chars{0xc272}=0x0155; # small r with acute accent
 725 $chars{0xc273}=0x015b; # small s with acute accent
 726 $chars{0xc275}=0x00fa; # small u with acute accent
 727 $chars{0xc277}=0x1e83; # small w with acute
 728 $chars{0xc279}=0x00fd; # small y with acute accent
 729 $chars{0xc27a}=0x017a; # small z with acute accent
 730 $chars{0xc2e1}=0x01fc; # capital ae with acute
 731 $chars{0xc2f1}=0x01fd; # small ae with acute
 732        # 4/3 circumflex accent
 733 $chars{0xc341}=0x00c2; # capital a with circumflex accent
 734 $chars{0xc343}=0x0108; # capital c with circumflex
 735 $chars{0xc345}=0x00ca; # capital e with circumflex accent
 736 $chars{0xc347}=0x011c; # capital g with circumflex
 737 $chars{0xc348}=0x0124; # capital h with circumflex
 738 $chars{0xc349}=0x00ce; # capital i with circumflex accent
 739 $chars{0xc34a}=0x0134; # capital j with circumflex
 740 $chars{0xc34f}=0x00d4; # capital o with circumflex accent
 741 $chars{0xc353}=0x015c; # capital s with circumflex
 742 $chars{0xc355}=0x00db; # capital u with circumflex
 743 $chars{0xc357}=0x0174; # capital w with circumflex
 744 $chars{0xc359}=0x0176; # capital y with circumflex
 745 $chars{0xc35a}=0x1e90; # capital z with circumflex
 746 $chars{0xc361}=0x00e2; # small a with circumflex accent
 747 $chars{0xc363}=0x0109; # small c with circumflex
 748 $chars{0xc365}=0x00ea; # small e with circumflex accent
 749 $chars{0xc367}=0x011d; # small g with circumflex
 750 $chars{0xc368}=0x0125; # small h with circumflex
 751 $chars{0xc369}=0x00ee; # small i with circumflex accent
 752 $chars{0xc36a}=0x0135; # small j with circumflex
 753 $chars{0xc36e}=0x00f1; # small n with tilde
 754 $chars{0xc36f}=0x00f4; # small o with circumflex accent
 755 $chars{0xc373}=0x015d; # small s with circumflex
 756 $chars{0xc375}=0x00fb; # small u with circumflex
 757 $chars{0xc377}=0x0175; # small w with circumflex
 758 $chars{0xc379}=0x0177; # small y with circumflex
 759 $chars{0xc37a}=0x1e91; # small z with circumflex
 760         # 4/4 tilde
 761 $chars{0xc441}=0x00c3; # capital a with tilde
 762 $chars{0xc445}=0x1ebc; # capital e with tilde
 763 $chars{0xc449}=0x0128; # capital i with tilde
 764 $chars{0xc44e}=0x00d1; # capital n with tilde
 765 $chars{0xc44f}=0x00d5; # capital o with tilde
 766 $chars{0xc455}=0x0168; # capital u with tilde
 767 $chars{0xc456}=0x1e7c; # capital v with tilde
 768 $chars{0xc459}=0x1ef8; # capital y with tilde
 769 $chars{0xc461}=0x00e3; # small a with tilde
 770 $chars{0xc465}=0x1ebd; # small e with tilde
 771 $chars{0xc469}=0x0129; # small i with tilde
 772 $chars{0xc46e}=0x00f1; # small n with tilde
 773 $chars{0xc46f}=0x00f5; # small o with tilde
 774 $chars{0xc475}=0x0169; # small u with tilde
 775 $chars{0xc476}=0x1e7d; # small v with tilde
 776 $chars{0xc479}=0x1ef9; # small y with tilde
 777     # 4/5 macron
 778 $chars{0xc541}=0x0100; # capital a with macron
 779 $chars{0xc545}=0x0112; # capital e with macron
 780 $chars{0xc547}=0x1e20; # capital g with macron
 781 $chars{0xc549}=0x012a; # capital i with macron
 782 $chars{0xc54f}=0x014c; # capital o with macron
 783 $chars{0xc555}=0x016a; # capital u with macron
 784 $chars{0xc561}=0x0101; # small a with macron
 785 $chars{0xc565}=0x0113; # small e with macron
 786 $chars{0xc567}=0x1e21; # small g with macron
 787 $chars{0xc569}=0x012b; # small i with macron
 788 $chars{0xc56f}=0x014d; # small o with macron
 789 $chars{0xc575}=0x016b; # small u with macron
 790 $chars{0xc572}=0x0159; # small r with macron
 791 $chars{0xc5e1}=0x01e2; # capital ae with macron
 792 $chars{0xc5f1}=0x01e3; # small ae with macron
 793         # 4/6 breve
 794 $chars{0xc641}=0x0102; # capital a with breve
 795 $chars{0xc645}=0x0114; # capital e with breve
 796 $chars{0xc647}=0x011e; # capital g with breve
 797 $chars{0xc649}=0x012c; # capital i with breve
 798 $chars{0xc64f}=0x014e; # capital o with breve
 799 $chars{0xc655}=0x016c; # capital u with breve
 800 $chars{0xc661}=0x0103; # small a with breve
 801 $chars{0xc665}=0x0115; # small e with breve
 802 $chars{0xc667}=0x011f; # small g with breve
 803 $chars{0xc669}=0x012d; # small i with breve
 804 $chars{0xc66f}=0x014f; # small o with breve
 805 $chars{0xc675}=0x016d; # small u with breve
 806         # 4/7 dot above
 807 $chars{0xc7b0}=0x01e1; # Ain with dot above
 808 $chars{0xc742}=0x1e02; # capital b with dot above
 809 $chars{0xc743}=0x010a; # capital c with dot above
 810 $chars{0xc744}=0x1e0a; # capital d with dot above
 811 $chars{0xc745}=0x0116; # capital e with dot above
 812 $chars{0xc746}=0x1e1e; # capital f with dot above
 813 $chars{0xc747}=0x0120; # capital g with dot above
 814 $chars{0xc748}=0x1e22; # capital h with dot above
 815 $chars{0xc749}=0x0130; # capital i with dot above
 816 $chars{0xc74d}=0x1e40; # capital m with dot above
 817 $chars{0xc74e}=0x1e44; # capital n with dot above
 818 $chars{0xc750}=0x1e56; # capital p with dot above
 819 $chars{0xc752}=0x1e58; # capital r with dot above
 820 $chars{0xc753}=0x1e60; # capital s with dot above
 821 $chars{0xc754}=0x1e6a; # capital t with dot above
 822 $chars{0xc757}=0x1e86; # capital w with dot above
 823 $chars{0xc758}=0x1e8a; # capital x with dot above
 824 $chars{0xc759}=0x1e8e; # capital y with dot above
 825 $chars{0xc75a}=0x017b; # capital z with dot above
 826 $chars{0xc761}=0x0227; # small b with dot above
 827 $chars{0xc762}=0x1e03; # small b with dot above
 828 $chars{0xc763}=0x010b; # small c with dot above
 829 $chars{0xc764}=0x1e0b; # small d with dot above
 830 $chars{0xc765}=0x0117; # small e with dot above
 831 $chars{0xc766}=0x1e1f; # small f with dot above
 832 $chars{0xc767}=0x0121; # small g with dot above
 833 $chars{0xc768}=0x1e23; # small h with dot above
 834 $chars{0xc76d}=0x1e41; # small m with dot above
 835 $chars{0xc76e}=0x1e45; # small n with dot above
 836 $chars{0xc770}=0x1e57; # small p with dot above
 837 $chars{0xc772}=0x1e59; # small r with dot above
 838 $chars{0xc773}=0x1e61; # small s with dot above
 839 $chars{0xc774}=0x1e6b; # small t with dot above
 840 $chars{0xc777}=0x1e87; # small w with dot above
 841 $chars{0xc778}=0x1e8b; # small x with dot above
 842 $chars{0xc779}=0x1e8f; # small y with dot above
 843 $chars{0xc77a}=0x017c; # small z with dot above
 844         # 4/8 trema, diaresis
 845 $chars{0xc820}=0x00a8; # diaeresis
 846 $chars{0xc841}=0x00c4; # capital a with diaeresis
 847 $chars{0xc845}=0x00cb; # capital e with diaeresis
 848 $chars{0xc848}=0x1e26; # capital h with diaeresis
 849 $chars{0xc849}=0x00cf; # capital i with diaeresis
 850 $chars{0xc84f}=0x00d6; # capital o with diaeresis
 851 $chars{0xc855}=0x00dc; # capital u with diaeresis
 852 $chars{0xc857}=0x1e84; # capital w with diaeresis
 853 $chars{0xc858}=0x1e8c; # capital x with diaeresis
 854 $chars{0xc859}=0x0178; # capital y with diaeresis
 855 $chars{0xc861}=0x00e4; # small a with diaeresis
 856 $chars{0xc865}=0x00eb; # small e with diaeresis
 857 $chars{0xc868}=0x1e27; # small h with diaeresis
 858 $chars{0xc869}=0x00ef; # small i with diaeresis
 859 $chars{0xc86f}=0x00f6; # small o with diaeresis
 860 $chars{0xc874}=0x1e97; # small t with diaeresis
 861 $chars{0xc875}=0x00fc; # small u with diaeresis
 862 $chars{0xc877}=0x1e85; # small w with diaeresis
 863 $chars{0xc878}=0x1e8d; # small x with diaeresis
 864 $chars{0xc879}=0x00ff; # small y with diaeresis
 865         # 4/9 umlaut
 866 $chars{0xc920}=0x00a8; # [diaeresis]
 867 $chars{0xc961}=0x00e4; # a with umlaut
 868 $chars{0xc965}=0x00eb; # e with umlaut
 869 $chars{0xc969}=0x00ef; # i with umlaut
 870 $chars{0xc96f}=0x00f6; # o with umlaut
 871 $chars{0xc975}=0x00fc; # u with umlaut
 872         # 4/10 circle above
 873 $chars{0xca41}=0x00c5; # capital a with ring above
 874 $chars{0xcaad}=0x016e; # capital u with ring above
 875 $chars{0xca61}=0x00e5; # small a with ring above
 876 $chars{0xca75}=0x016f; # small u with ring above
 877 $chars{0xca77}=0x1e98; # small w with ring above
 878 $chars{0xca79}=0x1e99; # small y with ring above
 879         # 4/11 high comma off centre
 880         # 4/12 inverted high comma centred
 881         # 4/13 double acute accent
 882 $chars{0xcd4f}=0x0150; # capital o with double acute
 883 $chars{0xcd55}=0x0170; # capital u with double acute
 884 $chars{0xcd6f}=0x0151; # small o with double acute
 885 $chars{0xcd75}=0x0171; # small u with double acute
 886         # 4/14 horn
 887 $chars{0xce54}=0x01a0; # latin capital letter o with horn
 888 $chars{0xce55}=0x01af; # latin capital letter u with horn
 889 $chars{0xce74}=0x01a1; # latin small letter o with horn
 890 $chars{0xce75}=0x01b0; # latin small letter u with horn
 891         # 4/15 caron (hacek
 892 $chars{0xcf41}=0x01cd; # capital a with caron
 893 $chars{0xcf43}=0x010c; # capital c with caron
 894 $chars{0xcf44}=0x010e; # capital d with caron
 895 $chars{0xcf45}=0x011a; # capital e with caron
 896 $chars{0xcf47}=0x01e6; # capital g with caron
 897 $chars{0xcf49}=0x01cf; # capital i with caron
 898 $chars{0xcf4b}=0x01e8; # capital k with caron
 899 $chars{0xcf4c}=0x013d; # capital l with caron
 900 $chars{0xcf4e}=0x0147; # capital n with caron
 901 $chars{0xcf4f}=0x01d1; # capital o with caron
 902 $chars{0xcf52}=0x0158; # capital r with caron
 903 $chars{0xcf53}=0x0160; # capital s with caron
 904 $chars{0xcf54}=0x0164; # capital t with caron
 905 $chars{0xcf55}=0x01d3; # capital u with caron
 906 $chars{0xcf5a}=0x017d; # capital z with caron
 907 $chars{0xcf61}=0x01ce; # small a with caron
 908 $chars{0xcf63}=0x010d; # small c with caron
 909 $chars{0xcf64}=0x010f; # small d with caron
 910 $chars{0xcf65}=0x011b; # small e with caron
 911 $chars{0xcf67}=0x01e7; # small g with caron
 912 $chars{0xcf69}=0x01d0; # small i with caron
 913 $chars{0xcf6a}=0x01f0; # small j with caron
 914 $chars{0xcf6b}=0x01e9; # small k with caron
 915 $chars{0xcf6c}=0x013e; # small l with caron
 916 $chars{0xcf6e}=0x0148; # small n with caron
 917 $chars{0xcf6f}=0x01d2; # small o with caron
 918 $chars{0xcf72}=0x0159; # small r with caron
 919 $chars{0xcf73}=0x0161; # small s with caron
 920 $chars{0xcf74}=0x0165; # small t with caron
 921 $chars{0xcf75}=0x01d4; # small u with caron
 922 $chars{0xcf7a}=0x017e; # small z with caron
 923         # 5/0 cedilla
 924 $chars{0xd020}=0x00b8; # cedilla
 925 $chars{0xd043}=0x00c7; # capital c with cedilla
 926 $chars{0xd044}=0x1e10; # capital d with cedilla
 927 $chars{0xd047}=0x0122; # capital g with cedilla
 928 $chars{0xd048}=0x1e28; # capital h with cedilla
 929 $chars{0xd04b}=0x0136; # capital k with cedilla
 930 $chars{0xd04c}=0x013b; # capital l with cedilla
 931 $chars{0xd04e}=0x0145; # capital n with cedilla
 932 $chars{0xd052}=0x0156; # capital r with cedilla
 933 $chars{0xd053}=0x015e; # capital s with cedilla
 934 $chars{0xd054}=0x0162; # capital t with cedilla
 935 $chars{0xd063}=0x00e7; # small c with cedilla
 936 $chars{0xd064}=0x1e11; # small d with cedilla
 937 $chars{0xd065}=0x0119; # small e with cedilla
 938 $chars{0xd067}=0x0123; # small g with cedilla
 939 $chars{0xd068}=0x1e29; # small h with cedilla
 940 $chars{0xd06b}=0x0137; # small k with cedilla
 941 $chars{0xd06c}=0x013c; # small l with cedilla
 942 $chars{0xd06e}=0x0146; # small n with cedilla
 943 $chars{0xd072}=0x0157; # small r with cedilla
 944 $chars{0xd073}=0x015f; # small s with cedilla
 945 $chars{0xd074}=0x0163; # small t with cedilla
 946         # 5/1 rude
 947         # 5/2 hook to left
 948         # 5/3 ogonek (hook to right
 949 $chars{0xd320}=0x02db; # ogonek
 950 $chars{0xd341}=0x0104; # capital a with ogonek
 951 $chars{0xd345}=0x0118; # capital e with ogonek
 952 $chars{0xd349}=0x012e; # capital i with ogonek
 953 $chars{0xd34f}=0x01ea; # capital o with ogonek
 954 $chars{0xd355}=0x0172; # capital u with ogonek
 955 $chars{0xd361}=0x0105; # small a with ogonek
 956 $chars{0xd365}=0x0119; # small e with ogonek
 957 $chars{0xd369}=0x012f; # small i with ogonek
 958 $chars{0xd36f}=0x01eb; # small o with ogonek
 959 $chars{0xd375}=0x0173; # small u with ogonek
 960         # 5/4 circle below
 961 $chars{0xd441}=0x1e00; # capital a with ring below
 962 $chars{0xd461}=0x1e01; # small a with ring below
 963         # 5/5 half circle below
 964 $chars{0xf948}=0x1e2a; # capital h with breve below
 965 $chars{0xf968}=0x1e2b; # small h with breve below
 966         # 5/6 dot below
 967 $chars{0xd641}=0x1ea0; # capital a with dot below
 968 $chars{0xd642}=0x1e04; # capital b with dot below
 969 $chars{0xd644}=0x1e0c; # capital d with dot below
 970 $chars{0xd645}=0x1eb8; # capital e with dot below
 971 $chars{0xd648}=0x1e24; # capital h with dot below
 972 $chars{0xd649}=0x1eca; # capital i with dot below
 973 $chars{0xd64b}=0x1e32; # capital k with dot below
 974 $chars{0xd64c}=0x1e36; # capital l with dot below
 975 $chars{0xd64d}=0x1e42; # capital m with dot below
 976 $chars{0xd64e}=0x1e46; # capital n with dot below
 977 $chars{0xd64f}=0x1ecc; # capital o with dot below
 978 $chars{0xd652}=0x1e5a; # capital r with dot below
 979 $chars{0xd653}=0x1e62; # capital s with dot below
 980 $chars{0xd654}=0x1e6c; # capital t with dot below
 981 $chars{0xd655}=0x1ee4; # capital u with dot below
 982 $chars{0xd656}=0x1e7e; # capital v with dot below
 983 $chars{0xd657}=0x1e88; # capital w with dot below
 984 $chars{0xd659}=0x1ef4; # capital y with dot below
 985 $chars{0xd65a}=0x1e92; # capital z with dot below
 986 $chars{0xd661}=0x1ea1; # small a with dot below
 987 $chars{0xd662}=0x1e05; # small b with dot below
 988 $chars{0xd664}=0x1e0d; # small d with dot below
 989 $chars{0xd665}=0x1eb9; # small e with dot below
 990 $chars{0xd668}=0x1e25; # small h with dot below
 991 $chars{0xd669}=0x1ecb; # small i with dot below
 992 $chars{0xd66b}=0x1e33; # small k with dot below
 993 $chars{0xd66c}=0x1e37; # small l with dot below
 994 $chars{0xd66d}=0x1e43; # small m with dot below
 995 $chars{0xd66e}=0x1e47; # small n with dot below
 996 $chars{0xd66f}=0x1ecd; # small o with dot below
 997 $chars{0xd672}=0x1e5b; # small r with dot below
 998 $chars{0xd673}=0x1e63; # small s with dot below
 999 $chars{0xd674}=0x1e6d; # small t with dot below
1000 $chars{0xd675}=0x1ee5; # small u with dot below
1001 $chars{0xd676}=0x1e7f; # small v with dot below
1002 $chars{0xd677}=0x1e89; # small w with dot below
1003 $chars{0xd679}=0x1ef5; # small y with dot below
1004 $chars{0xd67a}=0x1e93; # small z with dot below
1005         # 5/7 double dot below
1006 $chars{0xd755}=0x1e72; # capital u with diaeresis below
1007 $chars{0xd775}=0x1e73; # small u with diaeresis below
1008         # 5/8 underline
1009 $chars{0xd820}=0x005f; # underline
1010         # 5/9 double underline
1011 $chars{0xd920}=0x2017; # double underline
1012         # 5/10 small low vertical bar
1013 $chars{0xda20}=0x02cc; #
1014         # 5/11 circumflex below
1015         # 5/12 (this position shall not be used)
1016         # 5/13 left half of ligature sign and of double tilde
1017         # 5/14 right half of ligature sign
1018         # 5/15 right half of double tilde
1019 #     map {printf "%x :%x\n",$_,$chars{$_};}keys %chars;
1020
1021 sub char_decode5426 {
1022     my ( $string) = @_;
1023     my $result;
1024
1025     my @data = unpack("C*", $string);
1026     my @characters;
1027     my $length=scalar(@data);
1028     for (my $i = 0; $i < scalar(@data); $i++) {
1029       my $char= $data[$i];
1030       if ($char >= 0x00 && $char <= 0x7F){
1031         #IsAscii
1032
1033           push @characters,$char unless ($char<0x02 ||$char== 0x0F);
1034       }elsif (($char >= 0xC0 && $char <= 0xDF)) {
1035         #Combined Char
1036         my $convchar ;
1037         if ($chars{$char*256+$data[$i+1]}) {
1038           $convchar= $chars{$char * 256 + $data[$i+1]};
1039           $i++;
1040 #           printf "char %x $char, char to convert %x , converted %x\n",$char,$char * 256 + $data[$i - 1],$convchar;
1041         } elsif ($chars{$char})  {
1042           $convchar= $chars{$char};
1043 #           printf "0xC char %x, converted %x\n",$char,$chars{$char};
1044         }else {
1045           $convchar=$char;
1046         }
1047         push @characters,$convchar;
1048       } else {
1049         my $convchar;
1050         if ($chars{$char})  {
1051           $convchar= $chars{$char};
1052 #            printf "char %x,  converted %x\n",$char,$chars{$char};
1053         }else {
1054 #            printf "char %x $char\n",$char;
1055           $convchar=$char;
1056         }
1057         push @characters,$convchar;
1058       }
1059     }
1060     $result=pack "U*",@characters;
1061 #     $result=~s/\x01//;
1062 #     $result=~s/\x00//;
1063      $result=~s/\x0f//;
1064      $result=~s/\x1b.//;
1065      $result=~s/\x0e//;
1066      $result=~s/\x1b\x5b//;
1067 #   map{printf "%x",$_} @characters;
1068 #   printf "\n";
1069   return $result;
1070 }
1071
1072 1;
1073
1074
1075 =head1 AUTHOR
1076
1077 Koha Development Team <info@koha.org>
1078
1079 Galen Charlton <galen.charlton@liblime.com>
1080
1081 =cut