5 use Data::Dump qw(dump);
7 use POSIX qw(strftime);
9 #my $filename = 'Intervali_Openface_novi_rad_Boris_export.txt';
10 my $filename = 'originals_2023-07-09/Intervali_Openface_novi_rad_Boris_export.txt';
12 my $debug = $ENV{DEBUG} || 0;
14 # better time format for scp (don't have :)
15 my $today = strftime("%Y-%m-%d_%H%M%S", localtime(time()));
16 warn "# today $today";
18 open(my $err_fh, '>', "out.$today.log");
19 local $SIG{__WARN__} = sub {
29 $out->{$_} = $i++ foreach @c;
30 #warn "## col2nr ", dump( $out );
34 open(my $fh, '<', $filename);
36 my $h = <$fh>; chomp $h;
37 my @cols = split(/\t/, $h);
38 my $col2nr = col2nr @cols;
48 my @glob = glob 'originals_2023-07-07/*';
51 my $un = lc( unac_string('utf-8', $_ ) );
52 $map_un_back->{ $un } = $_;
57 my $id_lc = lc( unac_string('utf-8', $id ) );
59 my @found = grep { $pos++; m/$id_lc/ } @glob_un;
62 warn "XXX patt=$id pos=$pos found=",dump(@found);
63 return $map_un_back->{$found[0]};
66 warn "ERROR: pattern $id found $#found results, ignoring";
75 open(my $skipped_input, '>',"out.$today.skip");
76 open(my $out_fh, '>', "out.$today.csv");
79 print $out_fh join(",", map { m/\s/ ? qq{"$_"} : $_ } @a),"\n";
86 next if m/^\s*$/; # FIXME corrput data
87 my @v = split(/\t/, $_);
88 warn "#[ $_ ]\n" if $debug;
91 if ( ! defined $id ) {
92 warn "ERROR: ID col 0 null [$_]";
96 if ( ! $last_id || $last_id ne $v[0] ) {
97 my $patt = 'originals_2023-07-07/' . $v[0] . '*.csv';
99 $file2 ||= glob 'originals_2023-07-07/' . $v[0] . '*.csv';
100 $file2 ||= glob 'originals_2023-07-07/' . unac_string('utf-8', $v[0] ) . '*.csv';
101 $file2 ||= glob_id $v[0];
102 # try to find file with same numbers
105 $file2 ||= glob_id $id_nrs;
107 if ( ! defined $file2 ) {
108 warn "SKIP ERROR: $patt glob didn't find anything for v[0]=$v[0] [$_]";
109 print $skipped_input "$_\n";
114 warn "ERROR: $patt glob file $file2 not readable: $! SKIPPING [$_]";
115 print $skipped_input "$_\n";
119 warn "# file2 $file2 [$v[0]]";
120 open($fh2, '<', $file2);
127 my $i = $col2nr->{$name};
128 die "can't find $name" unless defined $i;
130 die "can't find $i : $name in ",dump( \@v ) unless defined $v;
131 warn "## col_v $name -> $i -> $v",dump( \@v ) if $debug > 1;
135 #my $start = $v[ $col2nr->{'Start (s)'} ];
136 #my $stop = $v[ $col2nr->{'Stop (s)'} ];
138 #my $start = col_v 'Start (s)';
139 #my $stop = col_v 'Stop (s)';
143 warn "# start: $start - stop: $stop\n" if $debug;
150 warn "## fh2 [ $_ ]\n" if $debug;
151 #my @v2 = map { s/^\s+//; $_ } split(/\t/,$_);
152 my @v2 = map { s/^\s+//; $_ } split(/[;,]\s*/,$_);
157 warn "# h2nr ",dump( $h2nr ) if $debug;
158 if ( ! @out_header ) {
159 push @out_header, @cols;
160 push @out_header, map { $h2[$_] }
161 ( $h2nr->{'AU01_r'} .. $h2nr->{'AU45_r'} );
162 push @out_header, map { $h2[$_] }
163 ( $h2nr->{'AU01_c'} .. $h2nr->{'AU45_c'} );
164 warn "# out_header ",dump( \@out_header );
172 my $i = $h2nr->{$name};
173 die "can't find $name in ",dump( $h2nr ) unless defined $i;
175 die "can't find $i : $name in ",dump( \@v2 ) unless defined $v;
176 return "### col_v2 $name $i = $v";
181 #my $timestamp = $v2[ $h2nr->{'timestamp'} ] || die;
182 #my $timestamp = col_v2 'timestamp';
183 my $timestamp = $v2[2];
185 #$timestamp /= 1000 if $timestamp =~ m/^\d+$/; # FIXME fix corrputed input data
186 #warn "XXX filter $start - $stop from ",dump( $timestamp ); #$h2nr, \@v2 );
188 # Argument "1.324.400" isn't numeric
189 $timestamp =~ s/^(\d+)\.(\d\d\d)\.(\d\d\d)$/$1$2.$3/;
191 if ( $timestamp !~ m/^\d+(\.\d+)$/ ) {
192 warn "timestamp [$timestamp] from $file2 not numeric [$_]";
195 if ( $timestamp < $start ) {
196 #warn "## $start > @v2";
197 } elsif ( $timestamp < $stop ) {
199 foreach my $n ( $h2nr->{'AU01_r'} .. $h2nr->{'AU45_c'} ) {
201 #warn "sum $n $sum[$n] $v2[$n]\n";
204 #warn "XXX $count sum $h2nr->{'AU01_r'} .. $h2nr->{'AU45_c'} = ",dump( \@sum );
206 warn "# seek $tell2" if $debug;
207 seek $fh2, $tell2, 0;
209 die "count is 0" if $count == 0;
211 #warn "XXX $file2 count=$count XXX ", $h2nr->{'AU01_r'},' - ', $h2nr->{'AU45_r'} , " YYY ", $h2nr->{'AU01_c'}, ' - ', $h2nr->{'AU45_c'}, ' sum=', dump( \@sum );
213 push @add_cols, map { $sum[$_] / $count } ( $h2nr->{'AU01_r'} .. $h2nr->{'AU45_r'} );
214 push @add_cols, map { $sum[$_] } ( $h2nr->{'AU01_c'} .. $h2nr->{'AU45_c'} );
215 warn "# add_cols = ",dump( \@add_cols ) if $debug;
216 out_csv @v, @add_cols;
221 warn "# last" if $debug;
232 # prema filenamenu iz $v[0]