try all limits from 0.9 in descending orders

author Dobrica Pavlinusic <dpavlin@rot13.org>

Thu, 16 Nov 2023 10:39:20 +0000 (11:39 +0100)

committer Dobrica Pavlinusic <dpavlin@rot13.org>

Thu, 16 Nov 2023 10:39:20 +0000 (11:39 +0100)
author Dobrica Pavlinusic <dpavlin@rot13.org>
Thu, 16 Nov 2023 10:39:20 +0000 (11:39 +0100)
committer Dobrica Pavlinusic <dpavlin@rot13.org>
Thu, 16 Nov 2023 10:39:20 +0000 (11:39 +0100)
diff --git a/upari.pl b/upari.pl

index 0ff27a0..df05e11 100755 (executable)
--- a/upari.pl
+++ b/upari.pl
@@ -106,7 +106,9 @@ foreach my $nr ( 1 .. 4 ) {
  
  
  
-
+foreach my $l ( 0 .. 9 ) { # 0.9 - 0.7 -- 0.6 is too lax
+       my $limit = 0.9 - "0.$l";
+warn "XXX limit $limit\n";
  
  print "# total = ",scalar keys %{ $stat->{_} }, $/;
  foreach my $id ( sort keys %{ $stat->{_} } ) {
@@ -130,7 +132,7 @@ foreach my $id ( sort keys %{ $stat->{_} } ) {
  
         if ( $#u_v < 3 ) { # single, double
                 my ( $num, $key_id ) = split(/-/,$id,2);
-               my @candidates = candidates $num => $key_id; #, 0.7; # XXX 0.9 too high, 0.8 better, 0.7 too lax
+               my @candidates = candidates $num => $key_id, $limit; #, 0.7; # XXX 0.9 too high, 0.8 better, 0.7 too lax
                 if ( @candidates ) {
                         print "MERGE $num $key_id ", dump( @candidates ), ' val=', dump( \@val ), $/;
                         my @keys = map { $_->{key} } @candidates;
@@ -165,7 +167,7 @@ foreach my $id ( sort keys %{ $stat->{_} } ) {
         }
  }
  
-print "# total after merge = ",scalar keys %{ $stat->{_} }, $/;
+print "# total after merge $limit = ",scalar keys %{ $stat->{_} }, $/;
  
  foreach my $id ( sort keys %{ $stat->{_} } ) {
         my @val; # = @{ $stat->{_}->{$id} };
@@ -176,18 +178,20 @@ foreach my $id ( sort keys %{ $stat->{_} } ) {
                 print "SKIP[$id]";
                 next;
         }
-       $stat->{B_count}->{ scalar @val }++;
-       $stat->{B_count_total}++;
+       $stat->{"B${limit}_count"}->{ scalar @val }++;
+       $stat->{"B${limit}_count_total"}++;
  
-       #$stat->{B_count_val_dup}->{ join(' ', @val) }++; # with duplicates
+       #$stat->{"B${limit}_count_val_dup"}->{ join(' ', @val) }++; # with duplicates
  
         my $u;
         $u->{$_}++ foreach @val;
         my @u_v = sort keys %$u;
-       $stat->{B_count_val}->{ join(' ', @u_v ) }++; # without duplicates
+       $stat->{"B${limit}_count_val"}->{ join(' ', @u_v ) }++; # without duplicates
  }
  
  
+} # for $limit
+
  print "# stat = ",dump( $stat );
  #print "# keys = ",dump( $keys );
author	Dobrica Pavlinusic <dpavlin@rot13.org>
	Thu, 16 Nov 2023 10:39:20 +0000 (11:39 +0100)
committer	Dobrica Pavlinusic <dpavlin@rot13.org>
	Thu, 16 Nov 2023 10:39:20 +0000 (11:39 +0100)