From: Dobrica Pavlinusic Date: Sun, 26 Nov 2023 17:35:34 +0000 (+0100) Subject: dump more info about duplicate input rows X-Git-Url: http://git.rot13.org/?a=commitdiff_plain;h=5f4f6cc3233393e9bcf07feaf05430a1a5b978b6;p=csv-join-similarity dump more info about duplicate input rows --- diff --git a/upari.pl b/upari.pl index 0a5ba15..4b89c35 100755 --- a/upari.pl +++ b/upari.pl @@ -129,6 +129,20 @@ foreach my $val ( 1 .. 4 ) { push @{ $stat->{_}->{ $id } }, $val; + if ( exists $data->{$key_id}->{$num}->{$val} ) { + $stat->{file}->{$file}->{duplicate_keyid_num}->{$val}++; + print "DUPLICATE $key_id $num $val old elements=", $#{ $data->{$key_id}->{$num}->{$val} }, "new ", $#$row, $/; + my $diff; + my $old = $data->{$key_id}->{$num}->{$val}; + foreach ( 0 .. $#$row ) { + if ( $old->[$_] ne $row->[$_] ) { + $diff->[$_] = [ $old->[$_], $row->[$_] ]; + } + } + print "diff = ",dump($diff) if $diff; + #print "old=", dump( $data->{$key_id}->{$num}->{$val} ), $/; + #print "new=", dump( $row ), $/; + } $data->{$key_id}->{$num}->{$val} = $row; } close $fh;