sub candidates {
my ( $num, $key_id, $limit_sim ) = @_;
+ $limit_sim //= $ENV{LIMIT};
$limit_sim //= 0.9;
my @candidates;
#print "XXX use $num $key ",dump($use);
my $duplicate = grep { $use->{$_} > 1 } keys %$use;
if ( $duplicate ) {
- print "XXX suggest duplicate $num $key SKIP duplicate ",dump($use), $/;
+ print "XXX $limit_sim suggest duplicate $num $key SKIP duplicate ",dump($use), $/;
$stat->{suggest}->{duplicate}++;
} else {
push @candidates, { key => $key, s => $s };
}
} else {
- print "XXX candidates $key missing\n";
+ print "XXX $limit_sim candidates $key missing\n";
}
}
}
if ( $#u_v < 3 ) { # single, double
my ( $num, $key_id ) = split(/-/,$id,2);
- my @candidates = candidates $num => $key_id, 0.7; # XXX 0.9 too high, 0.8 better, 0.7 too lax
+ my @candidates = candidates $num => $key_id; #, 0.7; # XXX 0.9 too high, 0.8 better, 0.7 too lax
if ( @candidates ) {
print "MERGE $num $key_id ", dump( @candidates ), ' val=', dump( \@val ), $/;
my @keys = map { $_->{key} } @candidates;