$stat->{error}->{new_exact}->{$nr}++;
my @candidates;
+ my $limit_sim = 0.9;
foreach my $key ( keys %{ $keys->{ $key_id } } ) {
- my $s = similarity $num, $key, 0.4;
+ my $s = similarity $num, $key, $limit_sim;
#warn "# $num $key $s\n";
- if ($s > 0.8 ) {
+ if ($s > $limit_sim ) {
push @candidates, { key => $key, s => $s };
}
}
- if ( $#candidates == 0 ) {
- $stat->{similarity}->{$nr}++;
- my $new_num = $candidates[0]->{key};
- $id = join('-',uc $new_num, $row->[1], $row->[2]);
- print "SIMILARITY $nr $num -> $new_num\n";
- push @{ $keys->{ $key_id }->{ $id } }, uc $id;
- } elsif ( $#candidates > 1 ) {
- warn "# similarity IGNORED $num = ",dump( @candidates ),$/;
- $stat->{similarity_multiple}->{$nr}++;
+ if ( @candidates ) {
+ my $multi = $#candidates > 0 ? 'multi' : '';
+ $stat->{ 'similarity' . $multi }->{$nr}++;
+ foreach my $i ( 0 .. $#candidates ) {
+ my $new_num = $candidates[$i]->{key};
+ $id = join('-',uc $new_num, $row->[1], $row->[2]);
+ print "SIMILARITY$multi $nr $num -> $new_num\n";
+ push @{ $keys->{ $key_id }->{ $id } }, uc $id;
+ }
} else {
$stat->{similarity_none}->{$nr}++;
push @{ $keys->{ $key_id }->{ $num } }, uc $row->[0];