From 137061be78bd97d8c775906fe372e8f136880c0f Mon Sep 17 00:00:00 2001 From: Dobrica Pavlinusic Date: Tue, 14 Nov 2023 10:00:04 +0100 Subject: [PATCH] similarity, forward progress only --- upari.pl | 33 +++++++++++---------------------- 1 file changed, 11 insertions(+), 22 deletions(-) diff --git a/upari.pl b/upari.pl index cf47bcd..4b09a69 100755 --- a/upari.pl +++ b/upari.pl @@ -1,5 +1,7 @@ #!/usr/bin/perl +# TODO: skola <-> razred swap? + use warnings; use strict; use autodie; @@ -29,9 +31,7 @@ foreach my $nr ( 1 .. 4 ) { $row->[2], ); - my $num = $row->[0]; #$num =~ s/\D//g; - - #$stat->{num}->{len}->{ length($num) }++; + my $num = uc $row->[0]; my $key_id = $row->[1] . '-' . $row->[2]; # . ',' . int( $row->[4] ); @@ -51,28 +51,19 @@ foreach my $nr ( 1 .. 4 ) { } if ( $#candidates == 0 ) { $stat->{similarity}->{$nr}++; - warn "# similarity $num = ",dump( @candidates ),$/; - $id = $candidates[0]; + $id = join('-',uc $candidates[0]->{key}, $row->[1], $row->[2]); + warn "# similarity $nr $num -> $id\n"; + push @{ $keys->{ $key_id }->{ $id } }, uc $id; } elsif ( $#candidates > 1 ) { warn "# similarity IGNORED $num = ",dump( @candidates ),$/; + $stat->{similarity_multiple}->{$nr}++; + } else { + $stat->{similarity_none}->{$nr}++; + push @{ $keys->{ $key_id }->{ $num } }, uc $row->[0]; } - if ( exists $keys->{ $key_id }->{ $num } ) { - my @found = @{ $keys->{ $key_id }->{ $num } }; - if ( $#found == 0 ) { - $id = $found[0]; - print "MAPPED $nr $row->[0] to $id\n"; - $stat->{mapped}->{$nr}++; - } elsif ( $#found > 0 ) { - print "ALIAS $nr $num into ",dump( @found ),$/; - $stat->{alias}->{$nr}++; - } - } else { - $stat->{error}->{new_num}->{$nr}++; - #print "ERROR $id new in $file\n"; - } } $stat->{exists}->{$nr}++ if exists $stat->{_}->{ $id }; @@ -87,9 +78,7 @@ print "# total = ",scalar keys %{ $stat->{_} }, $/; foreach my $id ( keys %{ $stat->{_} } ) { $stat->{count}->{ scalar @{ $stat->{_}->{$id} } }++; $stat->{count_total}++; - foreach my $val ( @{ $stat->{_}->{$id} } ) { - #$stat->{val}->{$val}++ if $val > 1; - } + $stat->{count_val}->{ join(' ', @{ $stat->{_}->{$id} }) }++; } print "# stat = ",dump( $stat ); #print "# keys = ",dump( $keys ); -- 2.20.1