1 package WebPAC::Normalize::XML;
6 use base qw/WebPAC::Common WebPAC::Normalize/;
10 use YAML qw/Dump LoadFile/;
14 WebPAC::Normalize::XML - apply XML or YAML normalisaton rules
22 our $VERSION = '0.03';
26 This module uses C<conf/normalize/*.xml> files to perform normalisation
35 Read normalisation rules defined using XML from C<conf/normalize/*.xml> and
38 my $n = new WebPAC::Normalize::XML;
41 xml_file => '/path/to/conf/normalize/isis.xml',
44 C<tag> defines tag to use within C<xml_file>
46 C<xml_file> defines path to normalize XML
48 C<tags> define additional tags that can be forced (and an be array).
57 my $log = $self->_get_logger();
59 foreach my $req (qw/tag xml_file/) {
60 $log->logconfess("need argument $req") unless $arg->{$req};
63 $self->{'tag'} = $arg->{'tag'};
64 my $xml_file = $arg->{'xml_file'};
66 $log->info("using $xml_file tag <",$self->{'tag'},">");
68 $log->logdie("normalisation xml file '$xml_file' doesn't exist!") if (! -e $xml_file);
70 $self->{'import_xml_file'} = $xml_file;
72 my @force_array = [ $self->{'tag'}, 'config', 'format' ];
73 push @force_array, $self->{'tags'} if ($self->{'tags'});
75 $self->{'import_xml'} = XMLin($xml_file,
76 ForceArray => @force_array,
80 $log->debug("import xml is ",sub { Dumper($self->{'import_xml'}) }, $self->{lookup} ? " using lookups" : "lookups disabled");
82 #print STDERR Dump($self->{import_xml});
89 Read normalisation rules defined in YAML file located usually at
90 C<conf/normalize/*.yml> and parse it.
92 my $n = new WebPAC::Normalize::XML;
95 path => '/path/to/conf/normalize/isis.yml',
105 my $log = $self->_get_logger();
107 foreach my $req (qw/tag path/) {
108 $log->logconfess("need argument $req") unless $arg->{$req};
111 my $path = $arg->{path};
112 $self->{tag} = $arg->{tag};
114 $log->logdie("normalisation yaml file '$path' doesn't exist!") if (! -e $path);
116 $log->info("using $path normalization YAML");
118 $self->{'import_xml'} = LoadFile( $path ) || $log->die("can't load $path: $!");
120 $log->debug("import yaml is ",sub { Dumper($self->{'import_xml'}) }, $self->{lookup} ? " using lookups" : "lookups disabled");
122 $self->{_skip_x} = 1;
129 Convert string from XML UTF-8 encoding to code page defined in C<xml_file>.
131 my $text = $n->_x('utf8 text');
133 Default application code page is C<ISO-8859-2>. You will probably want to
134 change that when creating new instance of object based on this one.
140 my $utf8 = shift || return;
141 return $utf8 if ($self->{_skip_x});
143 # create UTF-8 convertor for import_xml files
144 $self->{'utf2cp'} ||= Text::Iconv->new('UTF-8' ,$self->{'code_page'} || 'ISO-8859-2');
146 return $self->{'utf2cp'}->convert($utf8) ||
147 $self->_get_logger()->logwarn("can't convert '$utf8'");
153 Dobrica Pavlinusic, C<< <dpavlin@rot13.org> >>
155 =head1 COPYRIGHT & LICENSE
157 Copyright 2005 Dobrica Pavlinusic, All Rights Reserved.
159 This program is free software; you can redistribute it and/or modify it
160 under the same terms as Perl itself.
164 1; # End of WebPAC::Normalize::XML