Added type="swish_exact" to save data into swish index with boundaries
[webpac] / all2xml.pl
index 20bc592..de6fe0f 100755 (executable)
@@ -96,6 +96,7 @@ sub data2xml {
                $field_usage{$field}++;
 
                my $swish_data = "";
+               my $swish_exact_data = "";
                my $display_data = "";
                my $line_delimiter;
 
@@ -109,9 +110,10 @@ sub data2xml {
 
                        my $repeat_off = 0;             # repeatable offset
 
-                       my ($s,$d,$i) = (1,1,0);        # swish, display default
+                       my ($s,$se,$d,$i) = (1,0,1,0);  # swish, display default
                        $s = 0 if (lc($x->{type}) eq "display");
                        $d = 0 if (lc($x->{type}) eq "swish");
+                       $se = 1 if (lc($x->{type}) eq "swish_exact");
                        ($s,$d,$i) = (0,0,1) if (lc($x->{type}) eq "index");
 
                        # what will separate last line from this one?
@@ -128,6 +130,32 @@ sub data2xml {
                        my @index_data;
                        my $index_filter;
 
+                       sub mkformat {
+                               my $x = shift || die "mkformat needs tag reference";
+                               my $data = shift || return;
+                               my $format_name = x($x->{format_name}) || return $data;
+                               my $fmt = x($config->{format}->{$format_name}->{content}) || die "<format name=\"$format_name\"> is not defined!";
+                               my $format_delimiter = x($x->{format_delimiter});
+                               my @data;
+                               if ($format_delimiter) {
+                                       @data = split(/$format_delimiter/,$data);
+                               } else {
+                                       push @data,$data;
+                               }
+
+                               if ($fmt) {
+                                       my $nr = scalar $fmt =~ s/%s/%s/g;
+                                       if (($#data+1) == $nr) {
+                                               return sprintf($fmt,@data);
+                                       } else {
+                                               print STDERR "mkformat: [$data] can't be split on [$format_delimiter] to $nr fields!\n";
+                                               return $data;
+                                       }
+                               } else {
+                                       print STDERR "usage of link '$format_name' without defined format (<link> tag)\n";
+                               }
+                       }
+
                        # while because of repeatable fields
                        while ($swish || $display) {
                                ($swish,$display) = parse_format($type, $format,$row,$repeat_off++,$import2cp);
@@ -143,12 +171,16 @@ sub data2xml {
                                        require "filter/".$filter.".pm";
                                }
                                # type="swish" ; field for swish
-                               if ($s && $swish) {
-                                       if ($filter) {
+                               if ($swish) {
+                                       if ($filter && ($s || $se)) {
                                                no strict 'refs';
-                                               $swish_data .= join(" ",&$filter($swish));
+                                               my $tmp = join(" ",&$filter($swish)) if ($s || $se);
+                                               $swish_data .= $tmp if ($s);
+                                               $swish_exact_data .= $tmp if ($se);
+
                                        } else {
-                                               $swish_data .= $swish;
+                                               $swish_data .= $swish if ($s);
+                                               $swish_exact_data .= $swish if ($se);
                                        }
                                }
 
@@ -161,15 +193,15 @@ sub data2xml {
                                        if ($filter) {
                                                no strict 'refs';
                                                if ($display_data) {
-                                                       $display_data .= $delimiter.&$filter($display);
+                                                       $display_data .= $delimiter.join($delimiter,mkformat($x,&$filter($display)));
                                                } else {
-                                                       $display_data = &$filter($display);
+                                                       $display_data = join($delimiter,mkformat($x,&$filter($display)));
                                                }
                                        } else {
                                                if ($display_data) {
-                                                       $display_data .= $delimiter.$display;
+                                                       $display_data .= $delimiter.mkformat($x,$display);
                                                } else {
-                                                       $display_data = $display;
+                                                       $display_data = mkformat($x,$display);
                                                }
                                        }
                                }
@@ -205,6 +237,8 @@ sub data2xml {
                        my ($s,$d,$i) = (1,1,0);        # swish, display default
                        $s = 0 if (lc($x->{type}) eq "display");
                        $d = 0 if (lc($x->{type}) eq "swish");
+                       # no support for swish exact in config.
+                       # IMHO, it's useless
                        ($s,$d,$i) = (0,0,1) if (lc($x->{type}) eq "index");
 
                        if ($val) {
@@ -247,6 +281,15 @@ sub data2xml {
                        $xml .= xmlify($field."_swish", unac_string($codepage,$swish_data));
                }
 
+               if ($swish_exact_data) {
+                       $swish_exact_data =~ s/ +/ /g;
+                       $swish_exact_data =~ s/ +$//g;
+
+                       # add delimiters before and after word.
+                       # That is required to produce exact match
+                       $xml .= xmlify($field."_swish_exact", unac_string($codepage,'xxbxx '.$swish_exact_data.' xxexx'));
+               }
+
 
        }
 
@@ -293,7 +336,7 @@ print STDERR "reading ./import_xml/$type.xml\n";
        my $type_base = $type;
        $type_base =~ s/_.+$//g;
 
-       $config=XMLin("./import_xml/$type.xml", forcearray => [ $type2tag{$type_base}, 'config' ], forcecontent => 1);
+       $config=XMLin("./import_xml/$type.xml", forcearray => [ $type2tag{$type_base}, 'config', 'format' ], forcecontent => 1);
 
        # output current progress indicator
        my $last_p = 0;