improvements to WebPAC::Normalize::XML
[webpac2] / lib / WebPAC / Lookup.pm
1 package WebPAC::Lookup;
2
3 use warnings;
4 use strict;
5
6 use WebPAC::Common;
7
8 use base qw/WebPAC::Common/;
9 use File::Slurp;
10 use Data::Dumper;
11
12 =head1 NAME
13
14 WebPAC::Lookup - simple normalisation plugin to produce lookup
15
16 =head1 VERSION
17
18 Version 0.01
19
20 =cut
21
22 our $VERSION = '0.01';
23
24 =head1 SYNOPSIS
25
26 This module will produce in-memory lookups for easy resolution of lookups
27 to different records in source files. This will enable you to resolve
28 relational data in source format.
29
30 It can also be use with C<WebPAC::Tree> to produce tree hierarchies.
31
32 Lookups are defined in C<conf/lookup/isis.pm>.
33
34 C<lookup> argument is an array of lookups to create. Each lookup must have C<key> and
35 C<val>. Optional parametar C<eval> is perl code to evaluate before storing
36 value in lookup.
37
38  @lookup = [
39   { 'key' => 'd:v900', 'val' => 'v250^a' },
40   { 'eval' => '"v901^a" eq "Podruèje"',
41     'key' => 'pa:v561^4:v562^4:v461^1',
42     'val' => 'v900' },
43  ];
44
45
46 =head1 FUNCTIONS
47
48 =head2 new
49
50 Create new lookup object.
51
52   my $lookup = new WebPAC::Lookup(
53         lookup_file => '/path/to/conf/lookup/lookup.pm',
54         is_lookup_regex => 'lookup{[^\{\}]+}';
55         save_lookup_regex => 'lookup{([^\{\}]+)}';
56   );
57
58 =cut
59
60 sub new {
61         my $class = shift;
62         my $self = {@_};
63         bless($self, $class);
64
65         my $log = $self->_get_logger();
66
67         my $lookup_file = $self->{'lookup_file'} || $log->logconfess("need path to lookup file in lookup_file parametar");
68
69         my $lookup_code = read_file($lookup_file) || $log->logconfess("can't read lookup file $lookup_file: $!");
70
71         {
72                 no strict 'vars';
73                 do $lookup_file or $log->logdie("Failed to read configuration parameters '$lookup_file' $! $@");
74                 $self->{'lookup_def'} = \@lookup || $log->logdie("lookup config $lookup_file doesn't produce \@lookup array");
75         }
76
77         $log->logconfess("lookup config file isn't ARRAY", sub { Dumper( $self->{'lookup_def'} ) }) if ($self->{'lookup_def'} !~ /ARRAY/o);
78
79         $self->{'is_lookup_regex'} ||= 'lookup{[^\{\}]+}';
80         $self->{'save_lookup_regex'} ||= 'lookup{([^\{\}]+)}';
81
82
83         $self->{'LOOKUP_REGEX'} = qr/$self->{'is_lookup_regex'}/;
84         $self->{'LOOKUP_REGEX_SAVE'} = qr/$self->{'save_lookup_regex'}/;
85
86         $log->debug("regexps: ", $self->{'LOOKUP_REGEX'}, " ", $self->{'LOOKUP_REGEX_SAVE'});
87
88         $self ? return $self : return undef;
89 }
90
91 =head2 add
92
93 Create lookup from record using lookup definition.
94
95  $self->add($rec);
96
97 Returns true if this record produced lookup.
98
99 =cut
100
101 sub add($) {
102         my $self = shift;
103
104         my $log = $self->_get_logger();
105
106         my $rec = shift || $log->logconfess("need record to create lookup");
107         $log->logconfess("need HASH as first argument!") if ($rec !~ /HASH/o);
108         $log->logconfess("need HASH as first argument!") if ($self->{'lookup_def'} !~ /ARRAY/o);
109
110         my $n = 0;
111
112         foreach my $i (@{ $self->{'lookup_def'} }) {
113                 $log->logconfess("need key") unless defined($i->{'key'});
114                 $log->logconfess("need val") unless defined($i->{'val'});
115
116                 $n++;
117
118                 if (defined($i->{'eval'})) {
119                         # eval first, so we can skip fill_in for key and val
120                         my $eval = $self->fill_in($rec,$i->{'eval'}) || next;
121                         if ($self->_eval($eval)) {
122                                 my $key = $self->fill_in($rec,$i->{'key'}) || next;
123                                 my @val = $self->fill_in($rec,$i->{'val'}) || next;
124                                 $log->debug("stored $key = ",sub { join(" | ",@val) });
125                                 push @{$self->{'lookup'}->{$key}}, @val;
126                         }
127                 } else {
128                         my $key = $self->fill_in($rec,$i->{'key'}) || next;
129                         my @val = $self->fill_in($rec,$i->{'val'}) || next;
130                         $log->debug("stored $key = ",sub { join(" | ",@val) });
131                         push @{$self->{'lookup'}->{$key}}, @val;
132                 }
133         }
134
135         return $n;
136 }
137
138 =head2 lookup
139
140 Perform lookups on format supplied to it.
141
142  my $text = $lookup->lookup('[v900]');
143
144 Lookups can be nested (like C<[d:[a:[v900]]]>).
145
146 =cut
147
148 sub lookup {
149         my $self = shift;
150
151         my $log = $self->_get_logger();
152
153         my $tmp = shift || $log->logconfess("need format");
154
155         if ($tmp =~ $self->{'LOOKUP_REGEX'}) {
156                 my @in = ( $tmp );
157
158                 $log->debug("lookup for: ",$tmp);
159
160                 my @out;
161                 while (my $f = shift @in) {
162                         if ($f =~ $self->{'LOOKUP_REGEX_SAVE'}) {
163                                 my $k = $1;
164                                 if ($self->{'lookup'}->{$k}) {
165                                         foreach my $nv (@{$self->{'lookup'}->{$k}}) {
166                                                 my $tmp2 = $f;
167                                                 $tmp2 =~ s/lookup{$k}/$nv/g;
168                                                 push @in, $tmp2;
169                                         }
170                                 } else {
171                                         undef $f;
172                                 }
173                         } elsif ($f) {
174                                 push @out, $f;
175                         }
176                 }
177                 $log->logconfess("return is array and it's not expected!") unless wantarray;
178                 return @out;
179         } else {
180                 return $tmp;
181         }
182 }
183
184 =head2 regex
185
186 Returns precompiled regex for lookup format.
187
188  if ($foo =~ $lookup->reges) { ... }
189
190 =cut
191
192 sub regex {
193         my $self = shift;
194
195         return $self->{'LOOKUP_REGEX'};
196 }
197
198 =head1 AUTHOR
199
200 Dobrica Pavlinusic, C<< <dpavlin@rot13.org> >>
201
202 =head1 COPYRIGHT & LICENSE
203
204 Copyright 2005 Dobrica Pavlinusic, All Rights Reserved.
205
206 This program is free software; you can redistribute it and/or modify it
207 under the same terms as Perl itself.
208
209 =cut
210
211 1; # End of WebPAC::Lookup