From 286a7880e23ecc453d524ac176b49b3193eaf2b0 Mon Sep 17 00:00:00 2001 From: Dobrica Pavlinusic Date: Wed, 22 Nov 2023 10:21:19 +0100 Subject: [PATCH] cleanup output, maintain merged $data --- upari.pl | 42 ++++++++++++++++++++++++++++++++++-------- 1 file changed, 34 insertions(+), 8 deletions(-) diff --git a/upari.pl b/upari.pl index 1755230..9a5c3c4 100755 --- a/upari.pl +++ b/upari.pl @@ -82,6 +82,8 @@ if ( -e $keys_file ) { print "LOAD $keys_file", scalar keys %$keys, "\n"; } +our $data; + foreach my $val ( 1 .. 4 ) { my $file = "$val.csv"; warn "# $file\n"; @@ -89,10 +91,18 @@ foreach my $val ( 1 .. 4 ) { my $csv = Text::CSV->new ({ binary => 1, auto_diag => 1 }); open my $fh, "<:encoding(utf8)", $file or die "$file: $!"; while (my $row = $csv->getline ($fh)) { - $stat->{lines}->{$val}++; - $stat->{ $file }->{lines}++; - $row->[1] =~ s/\D+//g && $stat->{corrupt}->{skola}->{$val}++ && print 'c1'; - $row->[2] =~ s/\D+//g && $stat->{corrupt}->{razred}->{$val}++ && print 'c2'; + $stat->{file}->{$file}->{lines}++; + $stat->{file}->{$file}->{columns}->{ $#$row }++; + + if ( ! exists $stat->{file}->{$file}->{header} ) { + $stat->{file}->{$file}->{header} = $row; + next; + } + + my $c_s = $row->[1]; + my $c_r = $row->[2]; + $row->[1] =~ s/\D+//g && $stat->{file}->{$file}->{corrupt_s}->{$c_s}++ && print 'c1'; + $row->[2] =~ s/\D+//g && $stat->{file}->{$file}->{corrupt_r}->{$c_r}++ && print 'c2'; my $id = join('-', uc $row->[0], $row->[1], @@ -116,18 +126,18 @@ foreach my $val ( 1 .. 4 ) { push @{ $stat->{_}->{ $id } }, $val; - $data->{$key_id}->{$val}-> + $data->{$key_id}->{$val}->{$id} = $row; } close $fh; } - +my $merge_ids; my $first = 1; # 0.9 - 0.7 -- 0.6 is too lax -foreach my $limit ( 0.7, 0.6 ) { +foreach my $limit ( 0.7 ) { #, 0.6 ) { warn "XXX limit $limit\n"; print "# total = ",scalar keys %{ $stat->{_} }, $/; @@ -180,6 +190,16 @@ foreach my $id ( sort keys %{ $stat->{_} } ) { push @{ $stat->{_}->{ $m_id } }, $val; print "++ $m_id $val "; $stat->{merge_val}->{$val}++; + + my ( $id_s, $s, $r ) = split('-', $id); + my $key_s = "$s-$r"; + + die "ERROR merge: $val $id $m_id exists",dump( $merge_ids->{$val}->{$key_s}->{$id_s} ) if exists $merge_ids->{$val}->{$key_s}->{$id_s}; + my $m_id_s = (split('-',$m_id,3))[0]; + $merge_ids->{$val}->{$key_s}->{$id_s} = $m_id_s; + + $data->{$key_s}->{$val}->{$id_s} = delete $data->{$key_s}->{$val}->{$m_id_s}; + } print "result val=",dump( $stat->{_}->{ $m_id } ), " result_elements=", scalar @{ $stat->{_}->{ $m_id } }, $/; } @@ -217,6 +237,12 @@ $first++; print "# stat = ",dump( $stat ); #print "# keys = ",dump( $keys ); - store $keys, $keys_file; +my $merge_file = 'merge.storable'; +store $merge_ids, $merge_file; + +__END__ +open(my $out_fh, '>', 'merged.csv'); +foreach my $val ( 1 .. 4 ) { + foreach my $key_s -- 2.20.1