($s,$d,$i) = (0,0,1) if (lc($x->{type}) eq "index");
#print STDERR "## s: $s d: $d i: $i ## $format ##\n";
+ # FIX: this is ugly, UGLY, cludge string is returned
+ # in UTF8 encoding , but as if source charset
+ # is ISO8859-1 and not some other. This break other
+ # character encodings, so we convert it first
+ # back to ISO8859-1
$format = $cludge_codepage->convert($format);
+
my ($swish,$display) = parse_format($format,$row);
#print STDERR "s: $swish\nd: $display\n" if ($swish);
#print STDERR "swish: $swish<-- display: $display<--\n";
- # FIX: this is ugly, UGLY, cludge: OpenIsis return
- # UTF8 encoding of strings, but as if source charset
- # is ISO8859-1 and not some other. This breaks our
- # isis character encoding, so we convert it first
- # back to ISO8859-1 (which can actually be different
- # encoding in isis)
- $swish_data .= $swish if ($s && $swish);
- $display_data .= $display if ($d && $display);
+ # filter="name" ; filter this field through
+ # filter/[name].pm
+ my $filter;
+ if ($x->{filter}) {
+ $filter = "filter/".$x->{filter}.".pm";
+ require $filter;
+ }
+ # type="swish" ; field for swish
+ if ($s && $swish) {
+ if ($filter) {
+ $swish_data .= join(" ",&filter($swish));
+ } else {
+ $swish_data .= $swish if ($s && $swish);
+ }
+ }
- # insert into index
+ # type="display" ; field for display
+ if ($d && $display) {
+ if ($filter) {
+ $display_data .= join(" ",&filter($display));
+ } else {
+ $display_data .= $display if ($s && $display);
+ }
+ }
+
+ # type="index" ; insert into index
if ($i && $display) {
my $index_data = $index_codepage->convert($display) || $display;
- $index->insert($field, $index_data, $db_dir);
+ if ($filter) {
+ foreach my $d (&filter($index_data)) {
+ $index->insert($field, $d, $db_dir);
+ }
+ } else {
+ $index->insert($field, $index_data, $db_dir);
+ }
}
}
$field_name = $config->{indexer}->{$field}->{name_singular}."#-#";
} elsif ($config->{indexer}->{$field}->{name_plural}) {
$field_name = $config->{indexer}->{$field}->{name_plural}."#-#";
- } else {
+ } elsif ($config->{indexer}->{$field}->{name}) {
$field_name = $config->{indexer}->{$field}->{name}."#-#";
+ } else {
+ print STDERR "WARNING: field '$field' doesn't have 'name' attribute!";
}
if ($field_name) {
$html .= $xml_codepage->convert($field_name);
}
if ($swish_data) {
my $i = Text::Iconv->new($config->{isis_codepage},'ISO8859-2');
+ # remove extra spaces
+ $swish_data =~ s/ +/ /g;
+ $swish_data =~ s/ +$//g;
+
$swish_data = $i->convert($swish_data);
$xml .= xmlify($field."_swish",unac_string('ISO8859-2',$swish_data));
#$swish_data = $isis_codepage->convert($swish_data)."##" || $swish_data;
--- /dev/null
+
+
+<isis type="swish|display|index"
+ append="1"
+ format="one of formatting functions below"
+ filter="name"
+>_pre_000x_sep_000x_sep_000x_post_</isis>
+
+format:
+ upper [from_char[,to_char]]
+
+ makes field UPPERCASE
+ default: whole field
+
+
+ upper_w [word_nr[,...]
+
+ make words in field UPPERCASE
+ default: first word
+
+ substr from[,len]
+
+ returns substring from some character in some length
+ default: len - rest of the string
+
+ skip2nr
+ skip all aphanumeric caracters and return just
+ numbers after it. To output "1992." from "cop. 1992."
+
+ initial [word_nr[,...]]
+
+ make word(s) into initials (upper case first char and
+ append dot after it)
+ default: first word
+
+
+filter:
+ name of filter which is stored in filters/[name].pm