From e5cdddbbf7cb4d9f302806670684af253fd1bd76 Mon Sep 17 00:00:00 2001 From: Dobrica Pavlinusic Date: Sat, 22 Nov 2003 22:04:05 +0000 Subject: [PATCH] implemented filter which can replace (or be used together with) unac_string from Text::Unaccent git-svn-id: file:///home/dpavlin/private/svn/webpac/trunk@164 13eb9ef6-21d5-0310-b721-a9d68796d827 --- WebPac.pm | 4 ++++ all2xml.pl | 5 +++++ filter/unac_string_croatian.pm | 14 ++++++++++++++ 3 files changed, 23 insertions(+) create mode 100644 filter/unac_string_croatian.pm diff --git a/WebPac.pm b/WebPac.pm index 3ed8a33..6cb4875 100644 --- a/WebPac.pm +++ b/WebPac.pm @@ -28,7 +28,11 @@ my $MAX_HITS = $cfg_global->val('webpac', 'max_hits') || 0; my $ON_PAGE =$cfg_global->val('webpac', 'on_page') || 10; my $MIN_WILDCARD =$cfg_global->val('webpac', 'min_wildcard') || 1; my $TEMPLATE =$cfg_global->val('webpac', 'template'); +my $UNAC_FILTER =$cfg_global->val('global', 'unac_filter'); +if ($UNAC_FILTER) { + require $UNAC_FILTER; +} Text::Iconv->raise_error(0); # Conversion errors raise exceptions diff --git a/all2xml.pl b/all2xml.pl index de6fe0f..76009de 100755 --- a/all2xml.pl +++ b/all2xml.pl @@ -325,6 +325,11 @@ $index = new index_DBI( my $show_progress = $cfg_global->val('global', 'show_progress'); +my $unac_filter = $cfg_global->val('global', 'unac_filter'); +if ($unac_filter) { + require $unac_filter; +} + foreach my $database ($cfg->Sections) { my $type = lc($cfg -> val($database, 'type')) || die "$database doesn't have 'type' defined"; diff --git a/filter/unac_string_croatian.pm b/filter/unac_string_croatian.pm new file mode 100644 index 0000000..ad0c7f4 --- /dev/null +++ b/filter/unac_string_croatian.pm @@ -0,0 +1,14 @@ +# Alternative implementation for unac_string which supports charasters in +# Croatian language which isn't really accented (ð) but needs to be coverted +# to unaccented equivalent (d) + +sub unac_string { + my $charset = shift || return; + my $string = shift || return; +# $string = Text::Unaccent::unac_string($charset,$string); +# $string =~ tr/ðÐ/dD/; + $string =~ tr/èæ¾¹ðÈÆ®©Ð/cczsdCCZSD/; + return $string; +} + +1; -- 2.20.1