check if old duplicate is longer and keep it
authorDobrica Pavlinusic <dpavlin@rot13.org>
Fri, 8 Dec 2023 10:42:39 +0000 (11:42 +0100)
committerDobrica Pavlinusic <dpavlin@rot13.org>
Fri, 8 Dec 2023 10:46:23 +0000 (11:46 +0100)
not found in this dataset

upari.pl

index 60eb772..1e7e1bd 100755 (executable)
--- a/upari.pl
+++ b/upari.pl
@@ -135,12 +135,23 @@ foreach my $val ( 1 .. 4 ) {
 
                if ( exists $data->{$key_id}->{$num}->{$val} ) {
                        $stat->{file}->{$file}->{duplicate_keyid_num}->{$val}++;
-                       print "DUPLICATE $file $key_id $num $val\n";
-                       
+
                        my $old = $data->{$key_id}->{$num}->{$val};
                        print $duplicate_fh join(',', $file, @$old), "\n";
                        print $duplicate_fh join(',', $file, @$row), "\n";
                        print $duplicate_fh "\n";
+
+                       # select row by longer lenght;
+                       my $l_old = length dump $old;
+                       my $l_row = length dump $row;
+                       
+                       print "DUPLICATE $file $key_id $num $val len: $l_old < $l_row\n";
+                       
+                       if ( $l_old > $l_row ) {
+                               print "DUPLICATE KEEP old longer $l_old row (new only $l_row)\n";
+                               next;
+                       }
+
 =for diff
                        my $diff;
                        foreach ( 0 .. $#$row ) {