implemented filter which can replace (or be used together with) unac_string
authorDobrica Pavlinusic <dpavlin@rot13.org>
Sat, 22 Nov 2003 22:04:05 +0000 (22:04 +0000)
committerDobrica Pavlinusic <dpavlin@rot13.org>
Sat, 22 Nov 2003 22:04:05 +0000 (22:04 +0000)
from Text::Unaccent

git-svn-id: file:///home/dpavlin/private/svn/webpac/trunk@164 13eb9ef6-21d5-0310-b721-a9d68796d827

WebPac.pm
all2xml.pl
filter/unac_string_croatian.pm [new file with mode: 0644]

index 3ed8a33..6cb4875 100644 (file)
--- a/WebPac.pm
+++ b/WebPac.pm
@@ -28,7 +28,11 @@ my $MAX_HITS = $cfg_global->val('webpac', 'max_hits') || 0;
 my $ON_PAGE =$cfg_global->val('webpac', 'on_page') || 10;
 my $MIN_WILDCARD =$cfg_global->val('webpac', 'min_wildcard') || 1;
 my $TEMPLATE =$cfg_global->val('webpac', 'template');
+my $UNAC_FILTER =$cfg_global->val('global', 'unac_filter');
 
+if ($UNAC_FILTER) {
+       require $UNAC_FILTER;
+}
 
 Text::Iconv->raise_error(0);     # Conversion errors raise exceptions
 
index de6fe0f..76009de 100755 (executable)
@@ -325,6 +325,11 @@ $index = new index_DBI(
 
 my $show_progress = $cfg_global->val('global', 'show_progress');
 
+my $unac_filter = $cfg_global->val('global', 'unac_filter');
+if ($unac_filter) {
+       require $unac_filter;
+}
+
 foreach my $database ($cfg->Sections) {
 
        my $type = lc($cfg -> val($database, 'type')) || die "$database doesn't have 'type' defined";
diff --git a/filter/unac_string_croatian.pm b/filter/unac_string_croatian.pm
new file mode 100644 (file)
index 0000000..ad0c7f4
--- /dev/null
@@ -0,0 +1,14 @@
+# Alternative implementation for unac_string which supports charasters in
+# Croatian language which isn't really accented (ð) but needs to be coverted
+# to unaccented equivalent (d)
+
+sub unac_string {
+       my $charset = shift || return;
+       my $string = shift || return;
+#      $string = Text::Unaccent::unac_string($charset,$string);
+#      $string =~ tr/ðÐ/dD/;
+       $string =~ tr/èæ¾¹ðÈÆ®©Ð/cczsdCCZSD/;
+       return $string;
+}
+
+1;