use strict;
use MARC::Charset qw/marc8_to_utf8/;
use Text::Iconv;
+use C4::Debug;
+use Unicode::Normalize;
use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
@EXPORT = qw(
IsStringUTF8ish
MarcToUTF8Record
+ SetUTF8Flag
SetMarcUnicodeFlag
StripNonXmlChars
);
return utf8::decode($str);
}
+=head2 SetUTF8Flag
+
+=over 4
+
+my $marc_record = SetUTF8Flag($marc_record);
+
+=back
+
+This function sets the PERL UTF8 flag for data.
+It is required when using new_from_usmarc
+since MARC::File::USMARC does not handle PERL UTF8 setting.
+When editing unicode marc records fields and subfields, you
+would end up in double encoding without using this function.
+
+FIXME
+In my opinion, this function belongs to MARC::Record and not
+to this package.
+But since it handles charset, and MARC::Record, it finds its way in that package
+
+=cut
+
+sub SetUTF8Flag{
+ my ($record)=@_;
+ return unless ($record && $record->fields());
+ foreach my $field ($record->fields()){
+ if ($field->tag()>=10){
+ my @subfields;
+ foreach my $subfield ($field->subfields()){
+ push @subfields,($$subfield[0],NormalizeString($$subfield[1]));
+ }
+ my $newfield=MARC::Field->new(
+ $field->tag(),
+ $field->indicator(1),
+ $field->indicator(2),
+ @subfields
+ );
+ $field->replace_with($newfield);
+ }
+ }
+}
+
+=head2 NormalizeString
+
+=over 4
+
+ my $normalized_string=NormalizeString($string);
+
+=back
+ Given
+ a string
+ nfc : If you want to set NFC and not NFD
+ transform : If you expect all the signs to be removed
+ Sets the PERL UTF8 Flag on your initial data if need be
+ and applies cleaning if required
+
+ Returns a utf8 NFD normalized string
+
+ Sample code :
+ my $string=NormalizeString ("l'ornithoptère");
+ #results into ornithoptère in NFD form and sets UTF8 Flag
+=cut
+
+sub NormalizeString{
+ my ($string,$nfc,$transform)=@_;
+ utf8::decode($string) unless (utf8::is_utf8($string));
+ if ($nfc){
+ $string= NFD($string);
+ }
+ else {
+ $string=NFC($string);
+ }
+ if ($transform){
+ $string=~s/\<|\>|\^|\;|\.|\?|,|\-|\(|\)|\[|\]|\{|\}|\$|\%|\!|\*|\:|\\|\/|\&|\"|\'/ /g;
+ #removing one letter words "d'" "l'" was changed into "d " "l "
+ $string=~s/\b\S\b//g;
+ $string=~s/\s+$//g;
+ }
+ return $string;
+}
+
=head2 MarcToUTF8Record
=over 4
use C4::Debug;
use YAML;
use URI::Escape;
+use C4::Charset;
use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS $DEBUG);
# not an index scan
else {
$record = $results[ $i - 1 ]->record($j)->raw();
+ warn $results[$i-1]->record($j)->render() ;
# warn "RECORD $j:".$record;
$results_hash->{'RECORDS'}[$j] = $record;
# loop through all of the records we've retrieved
for ( my $i = $offset ; $i <= $times - 1 ; $i++ ) {
my $marcrecord = MARC::File::USMARC::decode( $marcresults[$i] );
+ SetUTF8Flag($marcrecord);
my $biblionumber;
if(not $scan){
}
# XSLT processing of some stuff
- # FIXME : This needs some work in order to be more flexible : Can not use a result list for intranet different from OPAC
if (C4::Context->preference("XSLTResultsDisplay") && !$scan) {
$oldbiblio->{XSLTResultsRecord} = XSLTParse4Display(
$oldbiblio->{biblionumber}, $marcrecord, C4::Context->preference("XSLTResultsDisplay") );
my $record = transformMARCXML4XSLT($biblionumber, $orig_record);
#return $record->as_formatted();
my $itemsxml = buildKohaItemsNamespace($biblionumber);
- my $xmlrecord = $record->as_xml();
- $xmlrecord =~ s/\<\/record\>/$itemsxml\<\/record\>/;
+ my $xmlrecord = $record->as_xml(C4::Context->preference('marcflavour'));
+ my $sysxml = "";
+ warn $xmlrecord;
+ foreach my $syspref ( qw/OPACURLOpenInNewWindow DisplayOPACiconsXSLT URLLinkText/ ) {
+ if (C4::Context->preference( $syspref ) ){
+ $sysxml .= "<syspref name=\"$syspref\">" .
+ C4::Context->preference( $syspref ) .
+ "</syspref>\n";
+ }
+ }
+ $sysxml = "<sysprefs>\n".$sysxml."</sysprefs>\n" if length($sysxml);
+ $xmlrecord =~ s/\<\/record\>/$itemsxml$sysxml\<\/record\>/;
+ $xmlrecord =~ s/\& /\&\; /;
+
my $parser = XML::LibXML->new();
# don't die when you find &, >, etc
- $parser->recover_silently(1);
+ $parser->recover_silently(0);
my $source = $parser->parse_string($xmlrecord);
unless ( $stylesheet->{$xslfilename} ) {
my $xslt = XML::LibXSLT->new();