1 package WebPAC::Normalize::XML;
6 use base qw/WebPAC::Common WebPAC::Normalize/;
13 WebPAC::Normalize::XML - apply XML normalisaton rules
21 our $VERSION = '0.02';
25 This module uses C<conf/normalize/*.xml> files to perform normalisation
34 Read normalisation rules defined using XML from C<conf/normalize/*.xml> and
37 my $n = new WebPAC::Normalize::XML;
40 xml_file => '/path/to/conf/normalize/isis.xml',
43 C<tag> defines tag to use within C<xml_file>
45 C<xml_file> defines path to normalize XML.
47 C<tags> define additional tags that can be forced (and an be array).
56 my $log = $self->_get_logger();
58 foreach my $req (qw/tag xml_file/) {
59 $log->logconfess("need argument $req") unless $arg->{$req};
62 $self->{'tag'} = $arg->{'tag'};
63 my $xml_file = $arg->{'xml_file'};
65 $log->info("using $xml_file tag <",$self->{'tag'},">");
67 $log->logdie("normalisation xml file '$xml_file' doesn't exist!") if (! -e $xml_file);
69 $self->{'import_xml_file'} = $xml_file;
71 my @force_array = [ $self->{'tag'}, 'config', 'format' ];
72 push @force_array, $self->{'tags'} if ($self->{'tags'});
74 $self->{'import_xml'} = XMLin($xml_file,
75 ForceArray => @force_array,
79 $log->debug("import xml is ",sub { Dumper($self->{'import_xml'}) });
87 Convert string from XML UTF-8 encoding to code page defined in C<xml_file>.
89 my $text = $n->_x('utf8 text');
91 Default application code page is C<ISO-8859-2>. You will probably want to
92 change that when creating new instance of object based on this one.
98 my $utf8 = shift || return;
100 # create UTF-8 convertor for import_xml files
101 $self->{'utf2cp'} ||= Text::Iconv->new('UTF-8' ,$self->{'code_page'} || 'ISO-8859-2');
103 return $self->{'utf2cp'}->convert($utf8) ||
104 $self->_get_logger()->logwarn("can't convert '$utf8'");
110 Dobrica Pavlinusic, C<< <dpavlin@rot13.org> >>
112 =head1 COPYRIGHT & LICENSE
114 Copyright 2005 Dobrica Pavlinusic, All Rights Reserved.
116 This program is free software; you can redistribute it and/or modify it
117 under the same terms as Perl itself.
121 1; # End of WebPAC::Normalize::XML