);
my $num = uc $row->[0];
+ if ( length $num < 3 ) {
+ print "IGNORE $nr ",dump($row->[ 0 .. 5 ]),$/;
+ $stat->{ignore}->{$nr}++;
+ next;
+ }
my $key_id = $row->[1] . '-' . $row->[2];
- if ( $nr == 1 ) {
- push @{ $keys->{ $key_id }->{ $num } }, uc $row->[0];
- }
- if ( length($num) > 3 && $nr > 1 && ! exists $stat->{_}->{ $id } ) {
- $stat->{error}->{new_exact}->{$nr}++;
+ $keys->{ $key_id }->{ $num }++;
+
+ if ( $nr > 1 && ! exists $stat->{_}->{ $id } ) {
+ $stat->{new_exact}->{$nr}++;
my @candidates;
my $limit_sim = 0.9;
foreach my $key ( keys %{ $keys->{ $key_id } } ) {
+ next if $key eq $num;
my $s = similarity $num, $key, $limit_sim;
#warn "# $num $key $s\n";
if ($s > $limit_sim ) {
my $new_num = $candidates[$i]->{key};
$id = join('-',uc $new_num, $row->[1], $row->[2]);
print "SIMILARITY$multi $nr $num -> $new_num\n";
- push @{ $keys->{ $key_id }->{ $id } }, uc $id;
+ $keys->{ $key_id }->{ $id }++;
}
} else {
$stat->{similarity_none}->{$nr}++;
- push @{ $keys->{ $key_id }->{ $num } }, uc $row->[0];
+ $stat->{unique_id}->{$id}++;
}
-
-
}
$stat->{exists}->{$nr}++ if exists $stat->{_}->{ $id };