r245@athlon: dpavlin | 2005-12-06 20:45:49 +0100
[webpac2] / lib / WebPAC / Lookup.pm
1 package WebPAC::Lookup;
2
3 use warnings;
4 use strict;
5
6 use base qw/WebPAC::Common WebPAC::Normalize/;
7 use File::Slurp;
8 use YAML qw/LoadFile/;
9 use Data::Dumper;
10
11 =head1 NAME
12
13 WebPAC::Lookup - simple normalisation plugin to produce lookup
14
15 =head1 VERSION
16
17 Version 0.02
18
19 =cut
20
21 our $VERSION = '0.02';
22
23 =head1 SYNOPSIS
24
25 This module will produce in-memory lookups for easy resolution of lookups
26 to different records in source files. This will enable you to resolve
27 relational data in source format.
28
29 It can also be use with C<WebPAC::Tree> to produce tree hierarchies.
30
31 Lookups are defined in C<conf/lookup/isis.pm>.
32
33 C<lookup> argument is an array of lookups to create. Each lookup must have C<key> and
34 C<val>. Optional parametar C<eval> is perl code to evaluate before storing
35 value in lookup.
36
37  @lookup = [
38   { 'key' => 'd:v900', 'val' => 'v250^a' },
39   { 'eval' => '"v901^a" eq "Podruèje"',
40     'key' => 'pa:v561^4:v562^4:v461^1',
41     'val' => 'v900' },
42  ];
43
44 Just for a reference, lookup data is internally stored in
45 C<< $self->{'_lookup_data'} >>.
46
47 =head1 FUNCTIONS
48
49 =head2 new
50
51 Create new lookup object.
52
53   my $lookup = new WebPAC::Lookup(
54         lookup_file => '/path/to/conf/lookup/lookup.pm',
55         is_lookup_regex => 'lookup{[^\{\}]+}';
56         save_lookup_regex => 'lookup{([^\{\}]+)}';
57   );
58
59 =cut
60
61 sub new {
62         my $class = shift;
63         my $self = {@_};
64         bless($self, $class);
65
66         my $log = $self->_get_logger();
67
68         my $lookup_file = $self->{'lookup_file'} || $log->logconfess("need path to lookup file in lookup_file parametar");
69
70         my $lookup_code = read_file($lookup_file) || $log->logconfess("can't read lookup file $lookup_file: $!");
71
72         if ($lookup_file =~ m#\.pm$#) {
73                 no strict 'vars';
74                 do $lookup_file or $log->logdie("Failed to read configuration parameters '$lookup_file' $! $@");
75                 $self->{'lookup_def'} = \@lookup || $log->logdie("lookup config $lookup_file doesn't produce \@lookup array");
76         } elsif ($lookup_file =~ m#\.(:?yml|yaml)$#) {
77                 my $yaml = LoadFile( $lookup_file ) || $log->logdie("lookup YAML file $lookup_file error: $!");
78                 $self->{'lookup_def'} = $yaml->{lookup} || $log->logdie("lookup YAML file $lookup_file should begin with 'lookup:'");
79         } else {
80                 $log->logide("unsupported lookup file $lookup_file");
81         }
82
83         $log->logconfess("lookup config file isn't ARRAY but ", sub { Dumper( $self->{'lookup_def'} ) }) if ($self->{'lookup_def'} !~ /ARRAY/o);
84
85         $self->{'is_lookup_regex'} ||= 'lookup{[^\{\}]+}';
86         $self->{'save_lookup_regex'} ||= 'lookup{([^\{\}]+)}';
87
88
89         $self->{'LOOKUP_REGEX'} = qr/$self->{'is_lookup_regex'}/;
90         $self->{'LOOKUP_REGEX_SAVE'} = qr/$self->{'save_lookup_regex'}/;
91
92         $log->debug("regexps: ", $self->{'LOOKUP_REGEX'}, " ", $self->{'LOOKUP_REGEX_SAVE'});
93
94         $self ? return $self : return undef;
95 }
96
97 =head2 add
98
99 Create lookup from record using lookup definition.
100
101  $self->add($rec);
102
103 Returns true if this record produced lookup.
104
105 =cut
106
107 sub add($) {
108         my $self = shift;
109
110         my $log = $self->_get_logger();
111
112         my $rec = shift || $log->logconfess("need record to create lookup");
113         $log->logconfess("need HASH as first argument!") if ($rec !~ /HASH/o);
114         $log->logconfess("need HASH as first argument!") if ($self->{'lookup_def'} !~ /ARRAY/o);
115
116         my $n = 0;
117
118         foreach my $i (@{ $self->{'lookup_def'} }) {
119                 $log->logconfess("need key") unless defined($i->{'key'});
120                 $log->logconfess("need val") unless defined($i->{'val'});
121
122                 $n++;
123
124                 if (defined($i->{'eval'})) {
125                         # eval first, so we can skip fill_in for key and val
126                         my $eval = $self->fill_in($rec,$i->{'eval'}) || next;
127                         if ($self->_eval($eval)) {
128                                 my $key = $self->fill_in($rec,$i->{'key'}) || next;
129                                 my @val = $self->fill_in($rec,$i->{'val'}) || next;
130                                 $log->debug("stored $key = ",sub { join(" | ",@val) });
131                                 push @{$self->{'_lookup_data'}->{$key}}, @val;
132                         }
133                 } else {
134                         my $key = $self->fill_in($rec,$i->{'key'}) || next;
135                         my @val = $self->fill_in($rec,$i->{'val'}) || next;
136                         $log->debug("stored $key = ",sub { join(" | ",@val) });
137                         push @{$self->{'_lookup_data'}->{$key}}, @val;
138                 }
139         }
140
141         return $n;
142 }
143
144 =head2 lookup
145
146 Perform lookups on format supplied to it.
147
148  my $text = $lookup->lookup('[v900]');
149
150 Lookups can be nested (like C<[d:[a:[v900]]]>).
151
152 =cut
153
154 sub lookup {
155         my $self = shift;
156
157         my $log = $self->_get_logger();
158
159         my $tmp = shift || $log->logconfess("need format");
160
161         if ($tmp =~ $self->{'LOOKUP_REGEX'}) {
162                 my @in = ( $tmp );
163
164                 $log->debug("lookup for: ",$tmp);
165
166                 my @out;
167                 while (my $f = shift @in) {
168                         if ($f =~ $self->{'LOOKUP_REGEX_SAVE'}) {
169                                 my $k = $1;
170                                 if ($self->{'_lookup_data'}->{$k}) {
171                                         foreach my $nv (@{$self->{'_lookup_data'}->{$k}}) {
172                                                 my $tmp2 = $f;
173                                                 $tmp2 =~ s/lookup{$k}/$nv/g;
174                                                 push @in, $tmp2;
175                                         }
176                                 } else {
177                                         undef $f;
178                                 }
179                         } elsif ($f) {
180                                 push @out, $f;
181                         }
182                 }
183                 $log->logconfess("return is array and it's not expected!") unless wantarray;
184                 return @out;
185         } else {
186                 return $tmp;
187         }
188 }
189
190 =head2 regex
191
192 Returns precompiled regex for lookup format.
193
194  if ($foo =~ $lookup->reges) { ... }
195
196 =cut
197
198 sub regex {
199         my $self = shift;
200
201         return $self->{'LOOKUP_REGEX'};
202 }
203
204 =head1 AUTHOR
205
206 Dobrica Pavlinusic, C<< <dpavlin@rot13.org> >>
207
208 =head1 COPYRIGHT & LICENSE
209
210 Copyright 2005 Dobrica Pavlinusic, All Rights Reserved.
211
212 This program is free software; you can redistribute it and/or modify it
213 under the same terms as Perl itself.
214
215 =cut
216
217 1; # End of WebPAC::Lookup