similarity, forward progress only
authorDobrica Pavlinusic <dpavlin@rot13.org>
Tue, 14 Nov 2023 09:00:04 +0000 (10:00 +0100)
committerDobrica Pavlinusic <dpavlin@rot13.org>
Tue, 14 Nov 2023 09:00:04 +0000 (10:00 +0100)
upari.pl

index cf47bcd..4b09a69 100755 (executable)
--- a/upari.pl
+++ b/upari.pl
@@ -1,5 +1,7 @@
 #!/usr/bin/perl
 
+# TODO: skola <-> razred swap?
+
 use warnings;
 use strict;
 use autodie;
@@ -29,9 +31,7 @@ foreach my $nr ( 1 .. 4 ) {
                        $row->[2],
                );
 
-               my $num = $row->[0]; #$num =~ s/\D//g;
-
-               #$stat->{num}->{len}->{ length($num) }++;
+               my $num = uc $row->[0];
 
                my $key_id = $row->[1] . '-' . $row->[2]; # . ',' . int( $row->[4] );
 
@@ -51,28 +51,19 @@ foreach my $nr ( 1 .. 4 ) {
                        }
                        if ( $#candidates == 0 ) {
                                $stat->{similarity}->{$nr}++;
-                               warn "# similarity $num = ",dump( @candidates ),$/;
-                               $id = $candidates[0];
+                               $id = join('-',uc $candidates[0]->{key}, $row->[1], $row->[2]);
+                               warn "# similarity $nr $num -> $id\n";
+                               push @{ $keys->{ $key_id }->{ $id } }, uc $id;
                        } elsif ( $#candidates > 1 ) {
                                warn "# similarity IGNORED $num = ",dump( @candidates ),$/;
+                               $stat->{similarity_multiple}->{$nr}++;
+                       } else {
+                               $stat->{similarity_none}->{$nr}++;
+                               push @{ $keys->{ $key_id }->{ $num } }, uc $row->[0];
                        }
 
 
 
-                       if ( exists $keys->{ $key_id }->{ $num } ) {
-                               my @found = @{ $keys->{ $key_id }->{ $num } };
-                               if ( $#found == 0 ) {
-                                       $id = $found[0];
-                                       print "MAPPED $nr $row->[0] to $id\n";
-                                       $stat->{mapped}->{$nr}++;
-                               } elsif ( $#found > 0 ) {
-                                       print "ALIAS $nr $num into ",dump( @found ),$/;
-                                       $stat->{alias}->{$nr}++;
-                               }
-                       } else {
-                               $stat->{error}->{new_num}->{$nr}++;
-                               #print "ERROR $id new in $file\n";
-                       }
                }
 
                $stat->{exists}->{$nr}++ if exists $stat->{_}->{ $id };
@@ -87,9 +78,7 @@ print "# total = ",scalar keys %{ $stat->{_} }, $/;
 foreach my $id ( keys %{ $stat->{_} } ) {
        $stat->{count}->{ scalar @{ $stat->{_}->{$id} } }++;
        $stat->{count_total}++;
-       foreach my $val ( @{  $stat->{_}->{$id} } ) {
-               #$stat->{val}->{$val}++ if $val > 1;
-       }
+       $stat->{count_val}->{ join(' ', @{ $stat->{_}->{$id} }) }++;
 }
 print "# stat = ",dump( $stat );
 #print "# keys = ",dump( $keys );