print "LOAD $keys_file", scalar keys %$keys, "\n";
}
+our $data;
+
foreach my $val ( 1 .. 4 ) {
my $file = "$val.csv";
warn "# $file\n";
my $csv = Text::CSV->new ({ binary => 1, auto_diag => 1 });
open my $fh, "<:encoding(utf8)", $file or die "$file: $!";
while (my $row = $csv->getline ($fh)) {
- $stat->{lines}->{$val}++;
- $stat->{ $file }->{lines}++;
- $row->[1] =~ s/\D+//g && $stat->{corrupt}->{skola}->{$val}++ && print 'c1';
- $row->[2] =~ s/\D+//g && $stat->{corrupt}->{razred}->{$val}++ && print 'c2';
+ $stat->{file}->{$file}->{lines}++;
+ $stat->{file}->{$file}->{columns}->{ $#$row }++;
+
+ if ( ! exists $stat->{file}->{$file}->{header} ) {
+ $stat->{file}->{$file}->{header} = $row;
+ next;
+ }
+
+ my $c_s = $row->[1];
+ my $c_r = $row->[2];
+ $row->[1] =~ s/\D+//g && $stat->{file}->{$file}->{corrupt_s}->{$c_s}++ && print 'c1';
+ $row->[2] =~ s/\D+//g && $stat->{file}->{$file}->{corrupt_r}->{$c_r}++ && print 'c2';
my $id = join('-',
uc $row->[0],
$row->[1],
push @{ $stat->{_}->{ $id } }, $val;
- $data->{$key_id}->{$val}->
+ $data->{$key_id}->{$val}->{$id} = $row;
}
close $fh;
}
-
+my $merge_ids;
my $first = 1;
# 0.9 - 0.7 -- 0.6 is too lax
-foreach my $limit ( 0.7, 0.6 ) {
+foreach my $limit ( 0.7 ) { #, 0.6 ) {
warn "XXX limit $limit\n";
print "# total = ",scalar keys %{ $stat->{_} }, $/;
push @{ $stat->{_}->{ $m_id } }, $val;
print "++ $m_id $val ";
$stat->{merge_val}->{$val}++;
+
+ my ( $id_s, $s, $r ) = split('-', $id);
+ my $key_s = "$s-$r";
+
+ die "ERROR merge: $val $id $m_id exists",dump( $merge_ids->{$val}->{$key_s}->{$id_s} ) if exists $merge_ids->{$val}->{$key_s}->{$id_s};
+ my $m_id_s = (split('-',$m_id,3))[0];
+ $merge_ids->{$val}->{$key_s}->{$id_s} = $m_id_s;
+
+ $data->{$key_s}->{$val}->{$id_s} = delete $data->{$key_s}->{$val}->{$m_id_s};
+
}
print "result val=",dump( $stat->{_}->{ $m_id } ), " result_elements=", scalar @{ $stat->{_}->{ $m_id } }, $/;
}
print "# stat = ",dump( $stat );
#print "# keys = ",dump( $keys );
-
store $keys, $keys_file;
+my $merge_file = 'merge.storable';
+store $merge_ids, $merge_file;
+
+__END__
+open(my $out_fh, '>', 'merged.csv');
+foreach my $val ( 1 .. 4 ) {
+ foreach my $key_s