+ support multiple inputs to single database [2.00_1]
+ lookups now works [2.00_3]
+ create links to other databases [2.00_4]
++ add regexp filter [2.00_5]
+- add more input formats (MARC::Fast and others)
- delete unused files in database directories
- scoring for various fields in input/*.xml
- write pure perl Search::HyperEstraier
>
<!--
- <isis type="swish|display|index">_pre_000x_sep_000x_sep_000x_post_</isis>
- <config type="swish|display">name of var from config file</config>
+ <isis type="search|display|index">_pre_000x_sep_000x_sep_000x_post_</isis>
+ <config type="search|display">name of var from config file</config>
-->
</MFN>
<ISBN>
- <isis type="display">filter{isn_swish}v10</isis>
+ <isis type="display">v10</isis>
</ISBN>
<ISSN>
</ISSN>
<IdentificationNumbers name="ISN">
- <isis type="swish">filter{isn_swish}v10 v11</isis>
+ <isis type="search">v10 v11</isis>
</IdentificationNumbers>
<Language>
</Language>
<TitleProper>
- <isis>v200^a</isis>
+ <isis>filter{regex(s/<[^>]*>/)}v200^a</isis>
</TitleProper>
<Subtitle>
</UncontrolledTerms>
<UDC_All>
- <isis type="swish">v675</isis>
+ <isis type="search">v675</isis>
</UDC_All>
<UDC>
<type name="Vrsta graðe: " order="100">
<!-- added via xml tag in .conf -->
<config type="display">materialtype</config>
- <config type="swish">material_code</config>
- <isis type="swish">v200^e</isis>
+ <config type="search">material_code</config>
+ <isis type="search">v200^e</isis>
</type>
<form name="Format: " order="101">
<!-- added via xml tag in .conf -->
- <config type="swish">material_form</config>
+ <config type="search">material_form</config>
</form>
<library name="Knji¾nica: " order="110">
<delimiter>, </delimiter>
<value>library_url</value>
</config>
- <config type="swish">library_code</config>
+ <config type="search">library_code</config>
</library>
</indexer>
=cut
-our $VERSION = '2.00_4';
+our $VERSION = '2.00_5';
=head1 SYNOPSIS
=head1 VERSION
-Version 0.05
+Version 0.06
=cut
-our $VERSION = '0.05';
+our $VERSION = '0.06';
=head1 SYNOPSIS
code defined as code ref on format after field substitution to producing
output
+There is one built-in filter called C<regex> which can be use like this:
+
+ filter{regex(s/foo/bar/)}
+
=item *
optional C<lookup{...}> will be then performed. See C<WebPAC::Lookups>.
$log->debug("using lookup regex: ", $self->{lookup_regex}) if ($r && $l);
+ if ($self->{filter} && ! $self->{filter}->{regex}) {
+ $log->debug("adding built-in filter regex");
+ $self->{filter}->{regex} = sub {
+ my ($val, $regex) = @_;
+ eval "\$val =~ $regex";
+ return $val;
+ };
+ }
+
$self ? return $self : return undef;
}
my $text = $webpac->parse($rec,'eval{"v901^a" eq "Deskriptor"}descriptor: v250^a', $i);
+Filters are implemented here. While simple form of filters looks like this:
+
+ filter{name_of_filter}
+
+but, filters can also have variable number of parametars like this:
+
+ filter{name_of_filter(param,param,param)}
+
=cut
sub parse {
return if (! $self->_eval($eval));
}
- if ($filter_name && $self->{'filter'}->{$filter_name}) {
- $log->debug("about to filter{$filter_name} format: $out");
- $out = $self->{'filter'}->{$filter_name}->($out);
- return unless(defined($out));
- $log->debug("filter result: $out");
+ if ($filter_name) {
+ my @filter_args;
+ if ($filter_name =~ s/(\w+)\((.*)\)/$1/) {
+ @filter_args = split(/,/, $2);
+ }
+ if ($self->{'filter'}->{$filter_name}) {
+ $log->debug("about to filter{$filter_name} format: $out with arguments: ", join(",", @filter_args));
+ unshift @filter_args, $out;
+ $out = $self->{'filter'}->{$filter_name}->(@filter_args);
+ return unless(defined($out));
+ $log->debug("filter result: $out");
+ } else {
+ $log->warn("trying to use undefined filter $filter_name");
+ }
}
return $out;
throws_ok { new WebPAC::Normalize::XML( lookup_regex => 'foo' ) } qr/pair/, "lookup_regex without lookup";
throws_ok { new WebPAC::Normalize::XML( lookup => 'bar' ) } qr/pair/, "lookup without lookup_regex";
-ok(my $n = new WebPAC::Normalize::XML( debug => 0 ), "new");
+ok(my $n = new WebPAC::Normalize::XML(
+ debug => 1,
+ filter => {
+ regex => sub {
+ my ($val, $regex) = @_;
+ eval "\$val =~ $regex";
+ return $val;
+ },
+ },
+), "new");
throws_ok { $n->open() } qr/tag/, "open without tag";
throws_ok { $n->open( tag => 'isis' ) } qr/xml_file/, "open without xml_file";
ok(my $ds = $n->data_structure( $rec ), "data_structure");
-diag Dumper($rec, $ds);
+#diag Dumper($rec, $ds);
<?xml version="1.0" encoding="ISO-8859-2"?>
<config>
<indexer>
- <headline>
- <isis type="display">v200^a : v200^e / v200^f, v210^d.</isis>
- <config >
- <delimiter> (</delimiter>
- <value>materialtype</value>
- </config>
- <config >
- <delimiter>) ---> Lokacija: </delimiter>
- <value>library</value>
- </config>
- </headline>
-
<filename>
<isis type="filename">out/v000.html</isis>
</filename>
</NamePublisher>
<DatePublication>
- <isis>v210^d</isis>
+ <isis>filter{regex(s/cop.\s+//)}v210^d</isis>
</DatePublication>
<PhysicalDescription>