first cut
authorDobrica Pavlinusic <dpavlin@rot13.org>
Mon, 13 Nov 2023 21:48:21 +0000 (22:48 +0100)
committerDobrica Pavlinusic <dpavlin@rot13.org>
Mon, 13 Nov 2023 21:48:21 +0000 (22:48 +0100)
upari.pl [new file with mode: 0755]

diff --git a/upari.pl b/upari.pl
new file mode 100755 (executable)
index 0000000..fc5e0ba
--- /dev/null
+++ b/upari.pl
@@ -0,0 +1,73 @@
+#!/usr/bin/perl
+
+use warnings;
+use strict;
+use autodie;
+
+# apt install libtext-csv-perl
+use Text::CSV;
+use Data::Dump qw(dump);
+
+my @files = qw( 1.csv 2.csv 3.csv 4.csv );
+
+my $stat;
+my $keys;
+
+foreach my $nr ( 1 .. 4 ) {
+       my $file = "$nr.csv";
+       warn "# $file\n";
+
+       my $csv = Text::CSV->new ({ binary => 1, auto_diag => 1 });
+       open my $fh, "<:encoding(utf8)", $file or die "$file: $!";
+       while (my $row = $csv->getline ($fh)) {
+               $stat->{lines}->{$nr}++;
+               $stat->{ $file }->{lines}++;
+               my $id = join('-',
+                       uc $row->[0],
+                       $row->[1],
+                       $row->[2],
+               );
+
+               my $num = $row->[0]; $num =~ s/\D//g;
+
+               $stat->{num}->{len}->{ length($num) }++;
+
+               my $key_id = $row->[1] . '-' . $row->[2] . 's' . int( $row->[4] );
+
+               if ( $nr == 1 ) {
+                       push @{ $keys->{ $key_id }->{ $num } }, uc $row->[0];
+               }
+               if ( $num && $nr > 1 && ! exists $stat->{_}->{ $id } ) {
+                       $stat->{error}->{new_exact}->{$nr}++;
+                       if ( exists $keys->{ $key_id }->{ $num } ) {
+                               my @found = @{ $keys->{ $key_id }->{ $num } };
+                               if ( $#found == 0 ) {
+                                       $id = $found[0];
+                                       print "MAPPED $nr $row->[0] to $id\n";
+                                       $stat->{mapped}->{$nr}++;
+                               } elsif ( $#found > 0 ) {
+                                       print "ALIAS $nr $num into ",dump( @found ),$/;
+                                       $stat->{alias}->{$nr}++;
+                               }
+                       } else {
+                               $stat->{error}->{new_num}->{$nr}++;
+                               #print "ERROR $id new in $file\n";
+                       }
+               }
+
+               $stat->{exists}->{$nr}++ if exists $stat->{_}->{ $id };
+
+               push @{ $stat->{_}->{ $id } }, $nr;
+       }
+       close $fh;
+
+}
+
+print "# total = ",scalar keys %{ $stat->{_} }, $/;
+foreach my $id ( keys %{ $stat->{_} } ) {
+       $stat->{count}->{ scalar @{ $stat->{_}->{$id} } }++;
+       $stat->{count_total}++;
+}
+print "# stat = ",dump( $stat );
+#print "# keys = ",dump( $keys );
+