use warnings;
use strict;
+use utf8;
use XML::Simple;
use Data::Dump qw(dump);
-use utf8;
my $odt_file = 'contributions.odt';
#foreach my $file ( glob 'contributions/*.xml' ) {
my $xml;
-open(my $fh, '<', 'contributions/all.xml');
+open(my $fh, '<:encoding(utf-8)', 'contributions/all.xml');
{
local $/ = undef;
$xml = <$fh>;
close($fh);
}
+warn "---xml---", dump($xml), "---/xml---\n";
+
foreach my $xml ( split(/\Q<?xml\E/, $xml) ) {
next unless $xml;
my $abstract;
-#print $xml;
-
$abstract = $xs->XMLin( $xml, ForceArray => qw(PrimaryAuthor) );
warn "# abstract = ", dump($abstract);
#warn "# stat = ",dump($stat);
+use Encode;
+sub enc {
+ my $b = shift;
+ warn "# enc in ",Encode::is_utf8($b), dump($b);
+
+# $b = Encode::decode('iso-8859-1',$b); # XXX doesn't work!
+
+# Encode::from_to($b, 'iso-8859-1','utf-8');
+# Encode::_utf8_on($b);
+
+ Encode::_utf8_off($b);
+
+ warn "# enc out ",Encode::is_utf8($b), dump($b);
+ return $b;
+}
+sub l1 {
+ my $l = shift;
+ return 'FIXME ref ' . dump($l) if ref $l;
+ return enc($l) if $l =~ m/([\x{80}-\x{ff}])/;
+ return $l;
+}
+
sub abstract2doc {
my $id = shift;
foreach my $a ( @{ $abstract->{PrimaryAuthor} } ) {
$doc->appendParagraph(text => join(' ',
- map { $a->{$_}->[0] } qw( FirstName FamilyName )
+# map { s/([\x{80}-\x{ff}])/enc($1)/eg && warn "# PrimaryAuthor Id: $id ",Encode::is_utf8($_), dump($_),"\n"; $_ }
+ map { l1($a->{$_}->[0]) } qw( FirstName FamilyName )
), style => 'Heading 2');
$doc->appendParagraph(text => join(' ',
- map { $a->{$_}->[0] } qw( Affiliation )
+ map { l1($a->{$_}->[0]) } qw( Affiliation )
), style => 'Heading 3');
push @{ $stat->{_Affiliation}->{ $a->{Affiliation}->[0] } }, $id;
}