Modifications to C4/Matcher for colon and ending punctuation
authorJ. David Bavousett <David.Bavousett@liblime.com>
Fri, 26 Dec 2008 21:38:00 +0000 (15:38 -0600)
committerGalen Charlton <galen.charlton@liblime.com>
Sat, 4 Apr 2009 00:10:49 +0000 (19:10 -0500)
Recommended by Michele Maenpaa, this adds handling colon and end-punctuation stripping
in subroutine _normalize, and fixes length testing in subroutine _get_match_keys

Signed-off-by: Galen Charlton <galen.charlton@liblime.com>
C4/Matcher.pm

index 645b2ad..294c481 100644 (file)
@@ -786,6 +786,11 @@ sub _get_match_keys {
                     }
                 }
                 $key = _normalize($key);
+                if ($component->{'length'}){
+                   if (length($key) > $component->{'length'}){
+                     $key = _normalize(substr($key,$component->{'offset'},$component{'length'}));
+                   }
+                }
             }
             if ($i == 0) {
                 push @keys, $key if $key;
@@ -815,10 +820,12 @@ sub _parse_match_component {
 # FIXME - default normalizer
 sub _normalize {
     my $value = uc shift;
+    $value =~ s/.;:,\]\[\)\(\/'"//g;
     $value =~ s/^\s+//;
-    $value =~ s/^\s+$//;
+    #$value =~ s/^\s+$//;
+    $value =~ s/\s+$//;
     $value =~ s/\s+/ /g;
-    $value =~ s/[.;,\]\[\)\(\/"']//g;
+    #$value =~ s/[.;,\]\[\)\(\/"']//g;
     return $value;
 }