From b5a106078808677b538291266efa791365ec0db8 Mon Sep 17 00:00:00 2001 From: Henri-Damien LAURENT Date: Tue, 8 Jan 2008 16:43:22 -0600 Subject: [PATCH] Improving encoding Support for z3950 clients. Adding encoding for z3950 server information. Uses Text::Iconv for conversion (ISO6937 and ISO_5428 and ISO5427) For ISO 5426 (ANSEL or MARC-8) new char_decode5426 based on marc4j tool. Not Tested on LOC or any USMARC z3950 source. But tested OK on BNF and SUDOC. Signed-off-by: Chris Cormack Signed-off-by: Joshua Ferraro --- C4/Biblio.pm | 202 +----- C4/Breeding.pm | 7 +- C4/Koha.pm | 573 +++++++++++++++--- admin/z3950servers.pl | 20 +- installer/data/mysql/kohastructure.sql | 1 + .../prog/en/modules/admin/z3950servers.tmpl | 14 +- 6 files changed, 530 insertions(+), 287 deletions(-) diff --git a/C4/Biblio.pm b/C4/Biblio.pm index e96e1e5a4f..3c60dc1881 100755 --- a/C4/Biblio.pm +++ b/C4/Biblio.pm @@ -40,6 +40,7 @@ BEGIN { @ISA = qw( Exporter ); # to add biblios +# EXPORTED FUNCTIONS. push @EXPORT, qw( &AddBiblio ); @@ -60,7 +61,6 @@ BEGIN { &GetMarcSeries GetMarcUrls &GetUsedMarcStructure - &GetXmlBiblio &GetAuthorisedValueDesc @@ -77,12 +77,10 @@ BEGIN { &ModBiblioframework &ModZebra ); - # To delete something push @EXPORT, qw( &DelBiblio ); - # Internal functions # those functions are exported but should not be used # they are usefull is few circumstances, so are exported. @@ -90,7 +88,6 @@ BEGIN { push @EXPORT, qw( &ModBiblioMarc ); - # Others functions push @EXPORT, qw( &TransformMarcToKoha @@ -98,7 +95,6 @@ BEGIN { &TransformHtmlToMarc &TransformHtmlToXml &PrepareItemrecordDisplay - &char_decode &GetNoZebraIndexes ); } @@ -1724,202 +1720,6 @@ sub TransformMarcToKohaOneField { =head1 OTHER FUNCTIONS -=head2 char_decode - -=over 4 - -my $string = char_decode( $string, $encoding ); - -converts ISO 5426 coded string to UTF-8 -sloppy code : should be improved in next issue - -=back - -=cut - -sub char_decode { - my ( $string, $encoding ) = @_; - $_ = $string; - - $encoding = C4::Context->preference("marcflavour") unless $encoding; - if ( $encoding eq "UNIMARC" ) { - - # s/\xe1/Æ/gm; - s/\xe2/Ğ/gm; - s/\xe9/Ø/gm; - s/\xec/ş/gm; - s/\xf1/æ/gm; - s/\xf3/ğ/gm; - s/\xf9/ø/gm; - s/\xfb/ß/gm; - s/\xc1\x61/à/gm; - s/\xc1\x65/è/gm; - s/\xc1\x69/ì/gm; - s/\xc1\x6f/ò/gm; - s/\xc1\x75/ù/gm; - s/\xc1\x41/À/gm; - s/\xc1\x45/È/gm; - s/\xc1\x49/Ì/gm; - s/\xc1\x4f/Ò/gm; - s/\xc1\x55/Ù/gm; - s/\xc2\x41/Á/gm; - s/\xc2\x45/É/gm; - s/\xc2\x49/Í/gm; - s/\xc2\x4f/Ó/gm; - s/\xc2\x55/Ú/gm; - s/\xc2\x59/İ/gm; - s/\xc2\x61/á/gm; - s/\xc2\x65/é/gm; - s/\xc2\x69/í/gm; - s/\xc2\x6f/ó/gm; - s/\xc2\x75/ú/gm; - s/\xc2\x79/ı/gm; - s/\xc3\x41/Â/gm; - s/\xc3\x45/Ê/gm; - s/\xc3\x49/Î/gm; - s/\xc3\x4f/Ô/gm; - s/\xc3\x55/Û/gm; - s/\xc3\x61/â/gm; - s/\xc3\x65/ê/gm; - s/\xc3\x69/î/gm; - s/\xc3\x6f/ô/gm; - s/\xc3\x75/û/gm; - s/\xc4\x41/Ã/gm; - s/\xc4\x4e/Ñ/gm; - s/\xc4\x4f/Õ/gm; - s/\xc4\x61/ã/gm; - s/\xc4\x6e/ñ/gm; - s/\xc4\x6f/õ/gm; - s/\xc8\x41/Ä/gm; - s/\xc8\x45/Ë/gm; - s/\xc8\x49/Ï/gm; - s/\xc8\x61/ä/gm; - s/\xc8\x65/ë/gm; - s/\xc8\x69/ï/gm; - s/\xc8\x6F/ö/gm; - s/\xc8\x75/ü/gm; - s/\xc8\x76/ÿ/gm; - s/\xc9\x41/Ä/gm; - s/\xc9\x45/Ë/gm; - s/\xc9\x49/Ï/gm; - s/\xc9\x4f/Ö/gm; - s/\xc9\x55/Ü/gm; - s/\xc9\x61/ä/gm; - s/\xc9\x6f/ö/gm; - s/\xc9\x75/ü/gm; - s/\xca\x41/Å/gm; - s/\xca\x61/å/gm; - s/\xd0\x43/Ç/gm; - s/\xd0\x63/ç/gm; - - # this handles non-sorting blocks (if implementation requires this) - $string = nsb_clean($_); - } - elsif ( $encoding eq "USMARC" || $encoding eq "MARC21" ) { - ##MARC-8 to UTF-8 - - s/\xe1\x61/à/gm; - s/\xe1\x65/è/gm; - s/\xe1\x69/ì/gm; - s/\xe1\x6f/ò/gm; - s/\xe1\x75/ù/gm; - s/\xe1\x41/À/gm; - s/\xe1\x45/È/gm; - s/\xe1\x49/Ì/gm; - s/\xe1\x4f/Ò/gm; - s/\xe1\x55/Ù/gm; - s/\xe2\x41/Á/gm; - s/\xe2\x45/É/gm; - s/\xe2\x49/Í/gm; - s/\xe2\x4f/Ó/gm; - s/\xe2\x55/Ú/gm; - s/\xe2\x59/İ/gm; - s/\xe2\x61/á/gm; - s/\xe2\x65/é/gm; - s/\xe2\x69/í/gm; - s/\xe2\x6f/ó/gm; - s/\xe2\x75/ú/gm; - s/\xe2\x79/ı/gm; - s/\xe3\x41/Â/gm; - s/\xe3\x45/Ê/gm; - s/\xe3\x49/Î/gm; - s/\xe3\x4f/Ô/gm; - s/\xe3\x55/Û/gm; - s/\xe3\x61/â/gm; - s/\xe3\x65/ê/gm; - s/\xe3\x69/î/gm; - s/\xe3\x6f/ô/gm; - s/\xe3\x75/û/gm; - s/\xe4\x41/Ã/gm; - s/\xe4\x4e/Ñ/gm; - s/\xe4\x4f/Õ/gm; - s/\xe4\x61/ã/gm; - s/\xe4\x6e/ñ/gm; - s/\xe4\x6f/õ/gm; - s/\xe6\x41/Ă/gm; - s/\xe6\x45/Ĕ/gm; - s/\xe6\x65/ĕ/gm; - s/\xe6\x61/ă/gm; - s/\xe8\x45/Ë/gm; - s/\xe8\x49/Ï/gm; - s/\xe8\x65/ë/gm; - s/\xe8\x69/ï/gm; - s/\xe8\x76/ÿ/gm; - s/\xe9\x41/A/gm; - s/\xe9\x4f/O/gm; - s/\xe9\x55/U/gm; - s/\xe9\x61/a/gm; - s/\xe9\x6f/o/gm; - s/\xe9\x75/u/gm; - s/\xea\x41/A/gm; - s/\xea\x61/a/gm; - - #Additional Turkish characters - s/\x1b//gm; - s/\x1e//gm; - s/(\xf0)s/\xc5\x9f/gm; - s/(\xf0)S/\xc5\x9e/gm; - s/(\xf0)c/ç/gm; - s/(\xf0)C/Ç/gm; - s/\xe7\x49/\\xc4\xb0/gm; - s/(\xe6)G/\xc4\x9e/gm; - s/(\xe6)g/ğ\xc4\x9f/gm; - s/\xB8/ı/gm; - s/\xB9/£/gm; - s/(\xe8|\xc8)o/ö/gm; - s/(\xe8|\xc8)O/Ö/gm; - s/(\xe8|\xc8)u/ü/gm; - s/(\xe8|\xc8)U/Ü/gm; - s/\xc2\xb8/\xc4\xb1/gm; - s/¸/\xc4\xb1/gm; - - # this handles non-sorting blocks (if implementation requires this) - $string = nsb_clean($_); - } - return ($string); -} - -=head2 nsb_clean - -=over 4 - -my $string = nsb_clean( $string, $encoding ); - -=back - -=cut - -sub nsb_clean { - my $NSB = '\x88'; # NSB : begin Non Sorting Block - my $NSE = '\x89'; # NSE : Non Sorting Block end - # handles non sorting blocks - my ($string) = @_; - $_ = $string; - s/$NSB/(/gm; - s/[ ]{0,1}$NSE/) /gm; - $string = $_; - return ($string); -} =head2 PrepareItemrecordDisplay diff --git a/C4/Breeding.pm b/C4/Breeding.pm index 9f09c26e40..7e8ca8afac 100644 --- a/C4/Breeding.pm +++ b/C4/Breeding.pm @@ -86,7 +86,7 @@ sub ImportBreeding { # FIXME -- not sure that this kind of checking is actually needed my $searchbreeding = $dbh->prepare("select import_record_id from import_biblios where isbn=? and title=?"); - $encoding = C4::Context->preference("marcflavour") unless $encoding; +# $encoding = C4::Context->preference("marcflavour") unless $encoding; # fields used for import results my $imported=0; my $alreadyindb = 0; @@ -94,8 +94,9 @@ sub ImportBreeding { my $notmarcrecord = 0; my $breedingid; for (my $i=0;$i<=$#marcarray;$i++) { - my $marcrecord = FixEncoding($marcarray[$i]."\x1D"); - + my $marcrecord = FixEncoding($marcarray[$i]."\x1D",$encoding); +# warn "$i : $marcarray[$i]"; +# warn "FixEncoding : ".$marcrecord->as_formatted; # FIXME - currently this does nothing my @warnings = $marcrecord->warnings(); diff --git a/C4/Koha.pm b/C4/Koha.pm index 8db25fff57..4239fa6cba 100644 --- a/C4/Koha.pm +++ b/C4/Koha.pm @@ -74,6 +74,7 @@ BEGIN { =over 2 +=cut =head2 slashifyDate $slash_date = &slashifyDate($dash_date); @@ -835,80 +836,6 @@ sub GetAuthorisedValues { return \@results; #$data; } -=item fixEncoding - - $marcrecord = &fixEncoding($marcblob); - -Returns a well encoded marcrecord. - -=cut -sub FixEncoding { - my $marc=shift; - my $encoding=shift; - my $record = MARC::Record->new_from_usmarc($marc); - if (C4::Context->preference("marcflavour") eq "UNIMARC"){ - my $targetcharset="utf8"; - if ($encoding && $targetcharset ne $encoding){ - my $newRecord=MARC::Record->new(); - if ($encoding!~/5426/){ - use Text::Iconv; - my $decoder = Text::Iconv->new($encoding,$targetcharset); - my $newRecord=MARC::Record->new(); - foreach my $field ($record->fields()){ - if ($field->tag()<'010'){ - $newRecord->insert_grouped_field($field); - } else { - my $newField; - my $createdfield=0; - foreach my $subfield ($field->subfields()){ - if ($createdfield){ - if (($newField->tag eq '100')) { - substr($subfield->[1],26,2,"0103") if ($targetcharset eq "latin1"); - substr($subfield->[1],26,4,"5050") if ($targetcharset eq "utf8"); - } - map {$decoder->convert($_)} @$subfield; - $newField->add_subfields($subfield->[0]=>$subfield->[1]); - } else { - map {$decoder->convert($_)} @$subfield; - $newField=MARC::Field->new($field->tag(),$field->indicator(1),$field->indicator(2),$subfield->[0]=>$subfield->[1]); - $createdfield=1; - } - } - $newRecord->insert_grouped_field($newField); - } - } - }elsif ($encoding=~/5426/){ - use MARC::Charset; - my $newRecord=MARC::Record->new(); - foreach my $field ($record->fields()){ - if ($field->tag()<'010'){ - $newRecord->insert_grouped_field($field); - } else { - my $newField; - my $createdfield=0; - foreach my $subfield ($field->subfields()){ - if ($createdfield){ - if (($newField->tag eq '100')) { - substr($subfield->[1],26,4,"5050"); - } - $newField->add_subfields($subfield->[0]=>MARC::Charset::marc8_to_utf8($subfield->[1])); - } else { - $newField=MARC::Field->new($field->tag(),$field->indicator(1),$field->indicator(2),$subfield->[0]=>MARC::Charset::marc8_to_utf8($subfield->[1])); - $createdfield=1; - } - } - $newRecord->insert_grouped_field($newField); - } - } - } - # warn $newRecord->as_formatted(); - return $newRecord; - } - return $record; - } - return $record; -} - =head2 GetKohaAuthorisedValues Takes $dbh , $kohafield as parameters. @@ -976,6 +903,504 @@ ORDER BY marc_subfield_structure.tagfield, tagsubfield|); return $data; } + +=item fixEncoding + + $marcrecord = &fixEncoding($marcblob); + +Returns a well encoded marcrecord. + +=cut +sub FixEncoding { + my $marc=shift; + my $encoding=shift; + my $record = MARC::Record->new_from_usmarc($marc); + if (C4::Context->preference("marcflavour") eq "UNIMARC"){ + my $targetcharset="utf8"; + if ($encoding && $targetcharset ne $encoding){ + my $newRecord=MARC::Record->new(); + if ($encoding!~/5426/){ + use Text::Iconv; + my $decoder = Text::Iconv->new($encoding,$targetcharset); + my $newRecord=MARC::Record->new(); + foreach my $field ($record->fields()){ + if ($field->tag()<'010'){ + $newRecord->insert_grouped_field($field); + } else { + my $newField; + my $createdfield=0; + foreach my $subfield ($field->subfields()){ + if ($createdfield){ + if (($newField->tag eq '100')) { + substr($subfield->[1],26,4,"5050") if ($targetcharset eq "utf8"); + } + map {$decoder->convert($_)} @$subfield; + $newField->add_subfields($subfield->[0]=>$subfield->[1]); + } else { + map {$decoder->convert($_)} @$subfield; + $newField=MARC::Field->new($field->tag(),$field->indicator(1),$field->indicator(2),$subfield->[0]=>$subfield->[1]); + $createdfield=1; + } + } + $newRecord->insert_grouped_field($newField); + } + } + }elsif ($encoding=~/5426/){ + foreach my $field ($record->fields()){ + if ($field->tag()<'010'){ + $newRecord->insert_grouped_field($field); + } else { + my $newField; + my $createdfield=0; + foreach my $subfield ($field->subfields()){ +# my $utf8=eval{MARC::Charset::marc8_to_utf8($subfield->[1])}; +# if ($@) {warn "z3950 character conversion error $@ ";$utf8=$subfield->[1]}; + my $utf8=char_decode5426($subfield->[1]); + if (($field->tag eq '100')) { + substr($utf8,26,4,"5050"); + } + if ($createdfield){ + $newField->add_subfields($subfield->[0]=>$utf8); + } else { + $newField=MARC::Field->new($field->tag(),$field->indicator(1),$field->indicator(2),$subfield->[0]=>$utf8); + $createdfield=1; + } + } + $newRecord->insert_grouped_field($newField); + } + } + } +# warn $newRecord->as_formatted(); + return $newRecord; + } + return $record; + } + return $record; +} + + +sub char_decode5426 { + my ( $string) = @_; + my $result; +my %chars; +$chars{0xb0}=0x0101;#3/0ayn[ain] +$chars{0xb1}=0x0623;#3/1alif/hamzah[alefwithhamzaabove] +#$chars{0xb2}=0x00e0;#'à'; +$chars{0xb2}=0x00e0;#3/2leftlowsinglequotationmark +#$chars{0xb3}=0x00e7;#'ç'; +$chars{0xb3}=0x00e7;#3/2leftlowsinglequotationmark +# $chars{0xb4}='è'; +$chars{0xb4}=0x00e8; +# $chars{0xb5}='é'; +$chars{0xb5}=0x00e9; +$chars{0x97}=0x003c;#3/2leftlowsinglequotationmark +$chars{0x98}=0x003e;#3/2leftlowsinglequotationmark +$chars{0xfa}=0x0153;#oe +$chars{0x81d1}=0x00b0; + +#### +## combined characters iso5426 + +$chars{0xc041}=0x1ea2; # capital a with hook above +$chars{0xc045}=0x1eba; # capital e with hook above +$chars{0xc049}=0x1ec8; # capital i with hook above +$chars{0xc04f}=0x1ece; # capital o with hook above +$chars{0xc055}=0x1ee6; # capital u with hook above +$chars{0xc059}=0x1ef6; # capital y with hook above +$chars{0xc061}=0x1ea3; # small a with hook above +$chars{0xc065}=0x1ebb; # small e with hook above +$chars{0xc069}=0x1ec9; # small i with hook above +$chars{0xc06f}=0x1ecf; # small o with hook above +$chars{0xc075}=0x1ee7; # small u with hook above +$chars{0xc079}=0x1ef7; # small y with hook above + + # 4/1 grave accent +$chars{0xc141}=0x00c0; # capital a with grave accent +$chars{0xc145}=0x00c8; # capital e with grave accent +$chars{0xc149}=0x00cc; # capital i with grave accent +$chars{0xc14f}=0x00d2; # capital o with grave accent +$chars{0xc155}=0x00d9; # capital u with grave accent +$chars{0xc157}=0x1e80; # capital w with grave +$chars{0xc159}=0x1ef2; # capital y with grave +$chars{0xc161}=0x00e0; # small a with grave accent +$chars{0xc165}=0x00e8; # small e with grave accent +$chars{0xc169}=0x00ec; # small i with grave accent +$chars{0xc16f}=0x00f2; # small o with grave accent +$chars{0xc175}=0x00f9; # small u with grave accent +$chars{0xc177}=0x1e81; # small w with grave +$chars{0xc179}=0x1ef3; # small y with grave + # 4/2 acute accent +$chars{0xc241}=0x00c1; # capital a with acute accent +$chars{0xc243}=0x0106; # capital c with acute accent +$chars{0xc245}=0x00c9; # capital e with acute accent +$chars{0xc247}=0x01f4; # capital g with acute +$chars{0xc249}=0x00cd; # capital i with acute accent +$chars{0xc24b}=0x1e30; # capital k with acute +$chars{0xc24c}=0x0139; # capital l with acute accent +$chars{0xc24d}=0x1e3e; # capital m with acute +$chars{0xc24e}=0x0143; # capital n with acute accent +$chars{0xc24f}=0x00d3; # capital o with acute accent +$chars{0xc250}=0x1e54; # capital p with acute +$chars{0xc252}=0x0154; # capital r with acute accent +$chars{0xc253}=0x015a; # capital s with acute accent +$chars{0xc255}=0x00da; # capital u with acute accent +$chars{0xc257}=0x1e82; # capital w with acute +$chars{0xc259}=0x00dd; # capital y with acute accent +$chars{0xc25a}=0x0179; # capital z with acute accent +$chars{0xc261}=0x00e1; # small a with acute accent +$chars{0xc263}=0x0107; # small c with acute accent +$chars{0xc265}=0x00e9; # small e with acute accent +$chars{0xc267}=0x01f5; # small g with acute +$chars{0xc269}=0x00ed; # small i with acute accent +$chars{0xc26b}=0x1e31; # small k with acute +$chars{0xc26c}=0x013a; # small l with acute accent +$chars{0xc26d}=0x1e3f; # small m with acute +$chars{0xc26e}=0x0144; # small n with acute accent +$chars{0xc26f}=0x00f3; # small o with acute accent +$chars{0xc270}=0x1e55; # small p with acute +$chars{0xc272}=0x0155; # small r with acute accent +$chars{0xc273}=0x015b; # small s with acute accent +$chars{0xc275}=0x00fa; # small u with acute accent +$chars{0xc277}=0x1e83; # small w with acute +$chars{0xc279}=0x00fd; # small y with acute accent +$chars{0xc27a}=0x017a; # small z with acute accent +$chars{0xc2e1}=0x01fc; # capital ae with acute +$chars{0xc2f1}=0x01fd; # small ae with acute + # 4/3 circumflex accent +$chars{0xc341}=0x00c2; # capital a with circumflex accent +$chars{0xc343}=0x0108; # capital c with circumflex +$chars{0xc345}=0x00ca; # capital e with circumflex accent +$chars{0xc347}=0x011c; # capital g with circumflex +$chars{0xc348}=0x0124; # capital h with circumflex +$chars{0xc349}=0x00ce; # capital i with circumflex accent +$chars{0xc34a}=0x0134; # capital j with circumflex +$chars{0xc34f}=0x00d4; # capital o with circumflex accent +$chars{0xc353}=0x015c; # capital s with circumflex +$chars{0xc355}=0x00db; # capital u with circumflex +$chars{0xc357}=0x0174; # capital w with circumflex +$chars{0xc359}=0x0176; # capital y with circumflex +$chars{0xc35a}=0x1e90; # capital z with circumflex +$chars{0xc361}=0x00e2; # small a with circumflex accent +$chars{0xc363}=0x0109; # small c with circumflex +$chars{0xc365}=0x00ea; # small e with circumflex accent +$chars{0xc367}=0x011d; # small g with circumflex +$chars{0xc368}=0x0125; # small h with circumflex +$chars{0xc369}=0x00ee; # small i with circumflex accent +$chars{0xc36a}=0x0135; # small j with circumflex +$chars{0xc36e}=0x00f1; # small n with tilde +$chars{0xc36f}=0x00f4; # small o with circumflex accent +$chars{0xc373}=0x015d; # small s with circumflex +$chars{0xc375}=0x00fb; # small u with circumflex +$chars{0xc377}=0x0175; # small w with circumflex +$chars{0xc379}=0x0177; # small y with circumflex +$chars{0xc37a}=0x1e91; # small z with circumflex + # 4/4 tilde +$chars{0xc441}=0x00c3; # capital a with tilde +$chars{0xc445}=0x1ebc; # capital e with tilde +$chars{0xc449}=0x0128; # capital i with tilde +$chars{0xc44e}=0x00d1; # capital n with tilde +$chars{0xc44f}=0x00d5; # capital o with tilde +$chars{0xc455}=0x0168; # capital u with tilde +$chars{0xc456}=0x1e7c; # capital v with tilde +$chars{0xc459}=0x1ef8; # capital y with tilde +$chars{0xc461}=0x00e3; # small a with tilde +$chars{0xc465}=0x1ebd; # small e with tilde +$chars{0xc469}=0x0129; # small i with tilde +$chars{0xc46e}=0x00f1; # small n with tilde +$chars{0xc46f}=0x00f5; # small o with tilde +$chars{0xc475}=0x0169; # small u with tilde +$chars{0xc476}=0x1e7d; # small v with tilde +$chars{0xc479}=0x1ef9; # small y with tilde + # 4/5 macron +$chars{0xc541}=0x0100; # capital a with macron +$chars{0xc545}=0x0112; # capital e with macron +$chars{0xc547}=0x1e20; # capital g with macron +$chars{0xc549}=0x012a; # capital i with macron +$chars{0xc54f}=0x014c; # capital o with macron +$chars{0xc555}=0x016a; # capital u with macron +$chars{0xc561}=0x0101; # small a with macron +$chars{0xc565}=0x0113; # small e with macron +$chars{0xc567}=0x1e21; # small g with macron +$chars{0xc569}=0x012b; # small i with macron +$chars{0xc56f}=0x014d; # small o with macron +$chars{0xc575}=0x016b; # small u with macron +$chars{0xc572}=0x0159; # small r with macron +$chars{0xc5e1}=0x01e2; # capital ae with macron +$chars{0xc5f1}=0x01e3; # small ae with macron + # 4/6 breve +$chars{0xc641}=0x0102; # capital a with breve +$chars{0xc645}=0x0114; # capital e with breve +$chars{0xc647}=0x011e; # capital g with breve +$chars{0xc649}=0x012c; # capital i with breve +$chars{0xc64f}=0x014e; # capital o with breve +$chars{0xc655}=0x016c; # capital u with breve +$chars{0xc661}=0x0103; # small a with breve +$chars{0xc665}=0x0115; # small e with breve +$chars{0xc667}=0x011f; # small g with breve +$chars{0xc669}=0x012d; # small i with breve +$chars{0xc66f}=0x014f; # small o with breve +$chars{0xc675}=0x016d; # small u with breve + # 4/7 dot above +$chars{0xc7b0}=0x01e1; # Ain with dot above +$chars{0xc742}=0x1e02; # capital b with dot above +$chars{0xc743}=0x010a; # capital c with dot above +$chars{0xc744}=0x1e0a; # capital d with dot above +$chars{0xc745}=0x0116; # capital e with dot above +$chars{0xc746}=0x1e1e; # capital f with dot above +$chars{0xc747}=0x0120; # capital g with dot above +$chars{0xc748}=0x1e22; # capital h with dot above +$chars{0xc749}=0x0130; # capital i with dot above +$chars{0xc74d}=0x1e40; # capital m with dot above +$chars{0xc74e}=0x1e44; # capital n with dot above +$chars{0xc750}=0x1e56; # capital p with dot above +$chars{0xc752}=0x1e58; # capital r with dot above +$chars{0xc753}=0x1e60; # capital s with dot above +$chars{0xc754}=0x1e6a; # capital t with dot above +$chars{0xc757}=0x1e86; # capital w with dot above +$chars{0xc758}=0x1e8a; # capital x with dot above +$chars{0xc759}=0x1e8e; # capital y with dot above +$chars{0xc75a}=0x017b; # capital z with dot above +$chars{0xc761}=0x0227; # small b with dot above +$chars{0xc762}=0x1e03; # small b with dot above +$chars{0xc763}=0x010b; # small c with dot above +$chars{0xc764}=0x1e0b; # small d with dot above +$chars{0xc765}=0x0117; # small e with dot above +$chars{0xc766}=0x1e1f; # small f with dot above +$chars{0xc767}=0x0121; # small g with dot above +$chars{0xc768}=0x1e23; # small h with dot above +$chars{0xc76d}=0x1e41; # small m with dot above +$chars{0xc76e}=0x1e45; # small n with dot above +$chars{0xc770}=0x1e57; # small p with dot above +$chars{0xc772}=0x1e59; # small r with dot above +$chars{0xc773}=0x1e61; # small s with dot above +$chars{0xc774}=0x1e6b; # small t with dot above +$chars{0xc777}=0x1e87; # small w with dot above +$chars{0xc778}=0x1e8b; # small x with dot above +$chars{0xc779}=0x1e8f; # small y with dot above +$chars{0xc77a}=0x017c; # small z with dot above + # 4/8 trema, diaresis +$chars{0xc820}=0x00a8; # diaeresis +$chars{0xc841}=0x00c4; # capital a with diaeresis +$chars{0xc845}=0x00cb; # capital e with diaeresis +$chars{0xc848}=0x1e26; # capital h with diaeresis +$chars{0xc849}=0x00cf; # capital i with diaeresis +$chars{0xc84f}=0x00d6; # capital o with diaeresis +$chars{0xc855}=0x00dc; # capital u with diaeresis +$chars{0xc857}=0x1e84; # capital w with diaeresis +$chars{0xc858}=0x1e8c; # capital x with diaeresis +$chars{0xc859}=0x0178; # capital y with diaeresis +$chars{0xc861}=0x00e4; # small a with diaeresis +$chars{0xc865}=0x00eb; # small e with diaeresis +$chars{0xc868}=0x1e27; # small h with diaeresis +$chars{0xc869}=0x00ef; # small i with diaeresis +$chars{0xc86f}=0x00f6; # small o with diaeresis +$chars{0xc874}=0x1e97; # small t with diaeresis +$chars{0xc875}=0x00fc; # small u with diaeresis +$chars{0xc877}=0x1e85; # small w with diaeresis +$chars{0xc878}=0x1e8d; # small x with diaeresis +$chars{0xc879}=0x00ff; # small y with diaeresis + # 4/9 umlaut +$chars{0xc920}=0x00a8; # [diaeresis] +$chars{0xc961}=0x00e4; # a with umlaut +$chars{0xc965}=0x00eb; # e with umlaut +$chars{0xc969}=0x00ef; # i with umlaut +$chars{0xc96f}=0x00f6; # o with umlaut +$chars{0xc975}=0x00fc; # u with umlaut + # 4/10 circle above +$chars{0xca41}=0x00c5; # capital a with ring above +$chars{0xcaad}=0x016e; # capital u with ring above +$chars{0xca61}=0x00e5; # small a with ring above +$chars{0xca75}=0x016f; # small u with ring above +$chars{0xca77}=0x1e98; # small w with ring above +$chars{0xca79}=0x1e99; # small y with ring above + # 4/11 high comma off centre + # 4/12 inverted high comma centred + # 4/13 double acute accent +$chars{0xcd4f}=0x0150; # capital o with double acute +$chars{0xcd55}=0x0170; # capital u with double acute +$chars{0xcd6f}=0x0151; # small o with double acute +$chars{0xcd75}=0x0171; # small u with double acute + # 4/14 horn +$chars{0xce54}=0x01a0; # latin capital letter o with horn +$chars{0xce55}=0x01af; # latin capital letter u with horn +$chars{0xce74}=0x01a1; # latin small letter o with horn +$chars{0xce75}=0x01b0; # latin small letter u with horn + # 4/15 caron (hacek +$chars{0xcf41}=0x01cd; # capital a with caron +$chars{0xcf43}=0x010c; # capital c with caron +$chars{0xcf44}=0x010e; # capital d with caron +$chars{0xcf45}=0x011a; # capital e with caron +$chars{0xcf47}=0x01e6; # capital g with caron +$chars{0xcf49}=0x01cf; # capital i with caron +$chars{0xcf4b}=0x01e8; # capital k with caron +$chars{0xcf4c}=0x013d; # capital l with caron +$chars{0xcf4e}=0x0147; # capital n with caron +$chars{0xcf4f}=0x01d1; # capital o with caron +$chars{0xcf52}=0x0158; # capital r with caron +$chars{0xcf53}=0x0160; # capital s with caron +$chars{0xcf54}=0x0164; # capital t with caron +$chars{0xcf55}=0x01d3; # capital u with caron +$chars{0xcf5a}=0x017d; # capital z with caron +$chars{0xcf61}=0x01ce; # small a with caron +$chars{0xcf63}=0x010d; # small c with caron +$chars{0xcf64}=0x010f; # small d with caron +$chars{0xcf65}=0x011b; # small e with caron +$chars{0xcf67}=0x01e7; # small g with caron +$chars{0xcf69}=0x01d0; # small i with caron +$chars{0xcf6a}=0x01f0; # small j with caron +$chars{0xcf6b}=0x01e9; # small k with caron +$chars{0xcf6c}=0x013e; # small l with caron +$chars{0xcf6e}=0x0148; # small n with caron +$chars{0xcf6f}=0x01d2; # small o with caron +$chars{0xcf72}=0x0159; # small r with caron +$chars{0xcf73}=0x0161; # small s with caron +$chars{0xcf74}=0x0165; # small t with caron +$chars{0xcf75}=0x01d4; # small u with caron +$chars{0xcf7a}=0x017e; # small z with caron + # 5/0 cedilla +$chars{0xd020}=0x00b8; # cedilla +$chars{0xd043}=0x00c7; # capital c with cedilla +$chars{0xd044}=0x1e10; # capital d with cedilla +$chars{0xd047}=0x0122; # capital g with cedilla +$chars{0xd048}=0x1e28; # capital h with cedilla +$chars{0xd04b}=0x0136; # capital k with cedilla +$chars{0xd04c}=0x013b; # capital l with cedilla +$chars{0xd04e}=0x0145; # capital n with cedilla +$chars{0xd052}=0x0156; # capital r with cedilla +$chars{0xd053}=0x015e; # capital s with cedilla +$chars{0xd054}=0x0162; # capital t with cedilla +$chars{0xd063}=0x00e7; # small c with cedilla +$chars{0xd064}=0x1e11; # small d with cedilla +$chars{0xd065}=0x0119; # small e with cedilla +$chars{0xd067}=0x0123; # small g with cedilla +$chars{0xd068}=0x1e29; # small h with cedilla +$chars{0xd06b}=0x0137; # small k with cedilla +$chars{0xd06c}=0x013c; # small l with cedilla +$chars{0xd06e}=0x0146; # small n with cedilla +$chars{0xd072}=0x0157; # small r with cedilla +$chars{0xd073}=0x015f; # small s with cedilla +$chars{0xd074}=0x0163; # small t with cedilla + # 5/1 rude + # 5/2 hook to left + # 5/3 ogonek (hook to right +$chars{0xd320}=0x02db; # ogonek +$chars{0xd341}=0x0104; # capital a with ogonek +$chars{0xd345}=0x0118; # capital e with ogonek +$chars{0xd349}=0x012e; # capital i with ogonek +$chars{0xd34f}=0x01ea; # capital o with ogonek +$chars{0xd355}=0x0172; # capital u with ogonek +$chars{0xd361}=0x0105; # small a with ogonek +$chars{0xd365}=0x0119; # small e with ogonek +$chars{0xd369}=0x012f; # small i with ogonek +$chars{0xd36f}=0x01eb; # small o with ogonek +$chars{0xd375}=0x0173; # small u with ogonek + # 5/4 circle below +$chars{0xd441}=0x1e00; # capital a with ring below +$chars{0xd461}=0x1e01; # small a with ring below + # 5/5 half circle below +$chars{0xf948}=0x1e2a; # capital h with breve below +$chars{0xf968}=0x1e2b; # small h with breve below + # 5/6 dot below +$chars{0xd641}=0x1ea0; # capital a with dot below +$chars{0xd642}=0x1e04; # capital b with dot below +$chars{0xd644}=0x1e0c; # capital d with dot below +$chars{0xd645}=0x1eb8; # capital e with dot below +$chars{0xd648}=0x1e24; # capital h with dot below +$chars{0xd649}=0x1eca; # capital i with dot below +$chars{0xd64b}=0x1e32; # capital k with dot below +$chars{0xd64c}=0x1e36; # capital l with dot below +$chars{0xd64d}=0x1e42; # capital m with dot below +$chars{0xd64e}=0x1e46; # capital n with dot below +$chars{0xd64f}=0x1ecc; # capital o with dot below +$chars{0xd652}=0x1e5a; # capital r with dot below +$chars{0xd653}=0x1e62; # capital s with dot below +$chars{0xd654}=0x1e6c; # capital t with dot below +$chars{0xd655}=0x1ee4; # capital u with dot below +$chars{0xd656}=0x1e7e; # capital v with dot below +$chars{0xd657}=0x1e88; # capital w with dot below +$chars{0xd659}=0x1ef4; # capital y with dot below +$chars{0xd65a}=0x1e92; # capital z with dot below +$chars{0xd661}=0x1ea1; # small a with dot below +$chars{0xd662}=0x1e05; # small b with dot below +$chars{0xd664}=0x1e0d; # small d with dot below +$chars{0xd665}=0x1eb9; # small e with dot below +$chars{0xd668}=0x1e25; # small h with dot below +$chars{0xd669}=0x1ecb; # small i with dot below +$chars{0xd66b}=0x1e33; # small k with dot below +$chars{0xd66c}=0x1e37; # small l with dot below +$chars{0xd66d}=0x1e43; # small m with dot below +$chars{0xd66e}=0x1e47; # small n with dot below +$chars{0xd66f}=0x1ecd; # small o with dot below +$chars{0xd672}=0x1e5b; # small r with dot below +$chars{0xd673}=0x1e63; # small s with dot below +$chars{0xd674}=0x1e6d; # small t with dot below +$chars{0xd675}=0x1ee5; # small u with dot below +$chars{0xd676}=0x1e7f; # small v with dot below +$chars{0xd677}=0x1e89; # small w with dot below +$chars{0xd679}=0x1ef5; # small y with dot below +$chars{0xd67a}=0x1e93; # small z with dot below + # 5/7 double dot below +$chars{0xd755}=0x1e72; # capital u with diaeresis below +$chars{0xd775}=0x1e73; # small u with diaeresis below + # 5/8 underline +$chars{0xd820}=0x005f; # underline + # 5/9 double underline +$chars{0xd920}=0x2017; # double underline + # 5/10 small low vertical bar +$chars{0xda20}=0x02cc; # + # 5/11 circumflex below + # 5/12 (this position shall not be used) + # 5/13 left half of ligature sign and of double tilde + # 5/14 right half of ligature sign + # 5/15 right half of double tilde +# map {printf "%x :%x\n",$_,$chars{$_};}keys %chars; + my @data = unpack("C*", $string); + my @characters; + my $length=scalar(@data); + for (my $i = 0; $i < scalar(@data); $i++) { + my $char= $data[$i]; + if ($char >= 0x00 && $char <= 0x7F){ + #IsAscii + + push @characters,$char unless ($char<0x02 ||$char== 0x0F); + }elsif (($char >= 0xC0 && $char <= 0xDF)) { + #Combined Char + my $convchar ; + if ($chars{$char*256+$data[$i+1]}) { + $convchar= $chars{$char * 256 + $data[$i+1]}; + $i++; +# printf "char %x $char, char to convert %x , converted %x\n",$char,$char * 256 + $data[$i - 1],$convchar; + } elsif ($chars{$char}) { + $convchar= $chars{$char}; +# printf "0xC char %x, converted %x\n",$char,$chars{$char}; + }else { + $convchar=$char; + } + push @characters,$convchar; + } else { + my $convchar; + if ($chars{$char}) { + $convchar= $chars{$char}; +# printf "char %x, converted %x\n",$char,$chars{$char}; + }else { +# printf "char %x $char\n",$char; + $convchar=$char; + } + push @characters,$convchar; + } + } + $result=pack "U*",@characters; +# $result=~s/\x01//; +# $result=~s/\x00//; + $result=~s/\x0f//; + $result=~s/\x1b.//; + $result=~s/\x0e//; + $result=~s/\x1b\x5b//; +# map{printf "%x",$_} @characters; +# printf "\n"; + return $result; +} + 1; __END__ diff --git a/admin/z3950servers.pl b/admin/z3950servers.pl index a02714b824..7174f9a931 100755 --- a/admin/z3950servers.pl +++ b/admin/z3950servers.pl @@ -31,7 +31,7 @@ sub StringSearch { $searchstring=~ s/\'/\\\'/g; my @data=split(' ',$searchstring); my $count=@data; - my $sth=$dbh->prepare("Select host,port,db,userid,password,name,id,checked,rank,syntax from z3950servers where (name like ?) order by rank,name"); + my $sth=$dbh->prepare("Select host,port,db,userid,password,name,id,checked,rank,syntax,encoding from z3950servers where (name like ?) order by rank,name"); $sth->execute("$data[0]\%"); my @results; while (my $data=$sth->fetchrow_hashref) { @@ -73,7 +73,7 @@ if ($op eq 'add_form') { my $data; if ($searchfield) { my $dbh = C4::Context->dbh; - my $sth=$dbh->prepare("select host,port,db,userid,password,name,id,checked,rank,syntax from z3950servers where (name = ?) order by rank,name"); + my $sth=$dbh->prepare("select host,port,db,userid,password,name,id,checked,rank,syntax,encoding from z3950servers where (name = ?) order by rank,name"); $sth->execute($searchfield); $data=$sth->fetchrow_hashref; $sth->finish; @@ -85,7 +85,10 @@ if ($op eq 'add_form') { userid => $data->{'userid'}, password => $data->{'password'}, checked => $data->{'checked'}, - rank => $data->{'rank'}); + rank => $data->{'rank'}, + syntax => $data->{'syntax'}, + encoding => $data->{'encoding'}, + ); # END $OP eq ADD_FORM ################## ADD_VALIDATE ################################## # called by add_form, used to insert/modify data in DB @@ -95,7 +98,7 @@ if ($op eq 'add_form') { my $sth=$dbh->prepare("select * from z3950servers where name=?"); $sth->execute($input->param('searchfield')); if ($sth->rows) { - $sth=$dbh->prepare("update z3950servers set host=?, port=?, db=?, userid=?, password=?, name=?, checked=?, rank=?,syntax=? where name=?"); + $sth=$dbh->prepare("update z3950servers set host=?, port=?, db=?, userid=?, password=?, name=?, checked=?, rank=?,syntax=?,encoding=? where name=?"); $sth->execute($input->param('host'), $input->param('port'), $input->param('db'), @@ -105,6 +108,7 @@ if ($op eq 'add_form') { $input->param('checked'), $input->param('rank'), $input->param('syntax'), + $input->param('encoding'), $input->param('searchfield'), ); } else { @@ -118,6 +122,7 @@ if ($op eq 'add_form') { $input->param('checked'), $input->param('rank'), $input->param('syntax'), + $input->param('encoding'), ); } $sth->finish; @@ -128,7 +133,7 @@ if ($op eq 'add_form') { $template->param(delete_confirm => 1); my $dbh = C4::Context->dbh; - my $sth2=$dbh->prepare("select host,port,db,userid,password,name,id,checked,rank,syntax from z3950servers where (name = ?) order by rank,name"); + my $sth2=$dbh->prepare("select host,port,db,userid,password,name,id,checked,rank,syntax,encoding from z3950servers where (name = ?) order by rank,name"); $sth2->execute($searchfield); my $data=$sth2->fetchrow_hashref; $sth2->finish; @@ -139,7 +144,9 @@ if ($op eq 'add_form') { userid => $data->{'userid'}, password => $data->{'password'}, checked => $data->{'checked'}, - rank => $data->{'rank'}); + rank => $data->{'rank'}, + syntax => $data->{'syntax'}, + encoding => $data->{'encoding'} ); # END $OP eq DELETE_CONFIRM ################## DELETE_CONFIRMED ################################## @@ -170,6 +177,7 @@ if ($op eq 'add_form') { checked => $results->[$i]{'checked'}, rank => $results->[$i]{'rank'}, syntax => $results->[$i]{'syntax'}, + encoding => $results->[$i]{'encoding'}, toggle => $toggle); push @loop, \%row; diff --git a/installer/data/mysql/kohastructure.sql b/installer/data/mysql/kohastructure.sql index 26a327295e..569414ba35 100644 --- a/installer/data/mysql/kohastructure.sql +++ b/installer/data/mysql/kohastructure.sql @@ -1810,6 +1810,7 @@ CREATE TABLE `z3950servers` ( `icon` text, `position` enum('primary','secondary','') NOT NULL default 'primary', `type` enum('zed','opensearch') NOT NULL default 'zed', + `encoding` text NULL default 'utf8', `description` text NOT NULL, PRIMARY KEY (`id`) ) ENGINE=InnoDB DEFAULT CHARSET=utf8; diff --git a/koha-tmpl/intranet-tmpl/prog/en/modules/admin/z3950servers.tmpl b/koha-tmpl/intranet-tmpl/prog/en/modules/admin/z3950servers.tmpl index fc499783b1..1cbe7c7708 100644 --- a/koha-tmpl/intranet-tmpl/prog/en/modules/admin/z3950servers.tmpl +++ b/koha-tmpl/intranet-tmpl/prog/en/modules/admin/z3950servers.tmpl @@ -130,7 +130,13 @@ - +
  • + +
  • Cancel
    @@ -156,6 +162,8 @@
  • Password:
  • Checked:
  • Rank:
  • +
  • Syntax:
  • +
  • Encoding:
  • " method="post">" />
    " method="post">
    @@ -193,10 +201,10 @@ You searched for - + - +
    Target:Hostname/Port:Database:Userid:Password:Checked:Rank: Syntax: 
    Target:Hostname/Port:Database:Userid:Password:Checked:Rank: Syntax:Encoding: 
    : ?op=add_form&searchfield=">Edit?op=delete_confirm&searchfield=">Delete
    : ?op=add_form&searchfield=">Edit?op=delete_confirm&searchfield=">Delete
    -- 2.20.1