use strict;
use autodie;
use Data::Dump qw(dump);
+use Text::Unaccent;
+use POSIX qw(strftime);
+
+#my $filename = 'Intervali_Openface_novi_rad_Boris_export.txt';
+my $filename = 'originals_2023-07-09/Intervali_Openface_novi_rad_Boris_export.txt';
+
+my $debug = $ENV{DEBUG} || 0;
+
+# better time format for scp (don't have :)
+my $today = strftime("%Y-%m-%d_%H%M%S", localtime(time()));
+warn "# today $today";
+
+open(my $err_fh, '>', "out.$today.log");
+local $SIG{__WARN__} = sub {
+ print STDERR @_;
+ print $err_fh @_;
+};
-my $filename = 'Intervali_Openface_novi_rad_Boris_export.txt';
sub col2nr {
my @c = @_;
my $i = 0;
my $out;
$out->{$_} = $i++ foreach @c;
- warn "# col2nr ", dump( $out );
+ #warn "## col2nr ", dump( $out );
return $out;
}
my $h2nr;
my $tell2;
+my @glob = glob 'originals_2023-07-07/*';
+my $map_un_back;
+my @glob_un = map {
+ my $un = lc( unac_string('utf-8', $_ ) );
+ $map_un_back->{ $un } = $_;
+ $un;
+} @glob;
+sub glob_id {
+ my $id = shift;
+ my $id_lc = lc( unac_string('utf-8', $id ) );
+ my $pos = -1;
+ my @found = grep { $pos++; m/$id_lc/ } @glob_un;
+ if ( @found ) {
+ if ( $#found == 0 ) {
+ warn "XXX patt=$id pos=$pos found=",dump(@found);
+ return $map_un_back->{$found[0]};
+ #return $glob[$pos];
+ } else {
+ warn "ERROR: pattern $id found $#found results, ignoring";
+ return;
+ }
+ } else {
+ warn "not found $id";
+ return;
+ }
+}
+
+open(my $skipped_input, '>',"out.$today.skip");
+open(my $out_fh, '>', "out.$today.csv");
+sub out_csv {
+ my @a = @_;
+ print $out_fh join(",", map { m/\s/ ? qq{"$_"} : $_ } @a),"\n";
+}
+
+my @out_header;
while(<$fh>) {
chomp;
+ next if m/^\s*$/; # FIXME corrput data
my @v = split(/\t/, $_);
- #warn "#[ $_ ]\n";
+ warn "#[ $_ ]\n" if $debug;
+
+ my $id = $v[0];
+ if ( ! defined $id ) {
+ warn "ERROR: ID col 0 null [$_]";
+ next;
+ }
if ( ! $last_id || $last_id ne $v[0] ) {
- $file2 = glob $v[0] . '*.txt';
+ my $patt = 'originals_2023-07-07/' . $v[0] . '*.csv';
+ $file2 = glob $patt;
+ $file2 ||= glob 'originals_2023-07-07/' . $v[0] . '*.csv';
+ $file2 ||= glob 'originals_2023-07-07/' . unac_string('utf-8', $v[0] ) . '*.csv';
+ $file2 ||= glob_id $v[0];
+ # try to find file with same numbers
+ my $id_nrs = $v[0];
+ $id_nrs =~ s/\D//g;
+ $file2 ||= glob_id $id_nrs;
+
+ if ( ! defined $file2 ) {
+ warn "SKIP ERROR: $patt glob didn't find anything for v[0]=$v[0] [$_]";
+ print $skipped_input "$_\n";
+ next;
+ }
- next unless $file2; # FIXME
+ if ( ! -r $file2 ) {
+ warn "ERROR: $patt glob file $file2 not readable: $! SKIPPING [$_]";
+ print $skipped_input "$_\n";
+ next;
+ }
warn "# file2 $file2 [$v[0]]";
open($fh2, '<', $file2);
$last_id = $v[0];
+ @h2 = ();
}
sub col_v {
die "can't find $name" unless defined $i;
my $v = $v[$i];
die "can't find $i : $name in ",dump( \@v ) unless defined $v;
+ warn "## col_v $name -> $i -> $v",dump( \@v ) if $debug > 1;
return $v;
}
#my $start = $v[ $col2nr->{'Start (s)'} ];
#my $stop = $v[ $col2nr->{'Stop (s)'} ];
- my $start = col_v 'Start (s)';
- my $stop = col_v 'Stop (s)';
+ #my $start = col_v 'Start (s)';
+ #my $stop = col_v 'Stop (s)';
+
+ my $start = $v[10];
+ my $stop = $v[11];
+ warn "# start: $start - stop: $stop\n" if $debug;
my @sum;
my $count = 0;
while(<$fh2>) {
- chomp;
- warn "##[ $_ ]\n";
- my @v2 = map { s/^\s+//; $_ } split(/\t/,$_);
+ chomp; s/\r+$//;
+ warn "## fh2 [ $_ ]\n" if $debug;
+ #my @v2 = map { s/^\s+//; $_ } split(/\t/,$_);
+ my @v2 = map { s/^\s+//; $_ } split(/[;,]\s*/,$_);
if ( ! @h2 ) {
@h2 = @v2;
$h2nr = col2nr @h2;
$tell2 = tell($fh2);
- warn "# h2nr ",dump( $h2nr );
+ warn "# h2nr ",dump( $h2nr ) if $debug;
+ if ( ! @out_header ) {
+ push @out_header, @cols;
+ push @out_header, map { $h2[$_] }
+ ( $h2nr->{'AU01_r'} .. $h2nr->{'AU45_r'} );
+ push @out_header, map { $h2[$_] }
+ ( $h2nr->{'AU01_c'} .. $h2nr->{'AU45_c'} );
+ warn "# out_header ",dump( \@out_header );
+ out_csv @out_header;
+ }
next;
}
#my $timestamp = $v2[ $h2nr->{'timestamp'} ] || die;
#my $timestamp = col_v2 'timestamp';
my $timestamp = $v2[2];
-warn "XXX ",dump( $timestamp ); #$h2nr, \@v2 );
+
+ #$timestamp /= 1000 if $timestamp =~ m/^\d+$/; # FIXME fix corrputed input data
+ #warn "XXX filter $start - $stop from ",dump( $timestamp ); #$h2nr, \@v2 );
+
+ # Argument "1.324.400" isn't numeric
+ $timestamp =~ s/^(\d+)\.(\d\d\d)\.(\d\d\d)$/$1$2.$3/;
+
+ if ( $timestamp !~ m/^\d+(\.\d+)$/ ) {
+ warn "timestamp [$timestamp] from $file2 not numeric [$_]";
+ }
if ( $timestamp < $start ) {
- warn "# $start > @v2";
+ #warn "## $start > @v2";
} elsif ( $timestamp < $stop ) {
$tell2 = tell($fh2);
foreach my $n ( $h2nr->{'AU01_r'} .. $h2nr->{'AU45_c'} ) {
- $sum[$n] += $v[$n];
+ $sum[$n] += $v2[$n];
+ #warn "sum $n $sum[$n] $v2[$n]\n";
}
$count++;
+ #warn "XXX $count sum $h2nr->{'AU01_r'} .. $h2nr->{'AU45_c'} = ",dump( \@sum );
} else {
- warn "# seek $tell2";
+ warn "# seek $tell2" if $debug;
seek $fh2, $tell2, 0;
- print join("\t", @v,
- map { $sum[$_] / $count } ( $h2nr->{'AU01_r'} .. $h2nr->{'AU45_r'} ),
- map { $sum[$_] } ( $h2nr->{'AU01_c'} .. $h2nr->{'AU45_c'} ),
- );
+ die "count is 0" if $count == 0;
+
+ #warn "XXX $file2 count=$count XXX ", $h2nr->{'AU01_r'},' - ', $h2nr->{'AU45_r'} , " YYY ", $h2nr->{'AU01_c'}, ' - ', $h2nr->{'AU45_c'}, ' sum=', dump( \@sum );
+ my @add_cols;
+ push @add_cols, map { $sum[$_] / $count } ( $h2nr->{'AU01_r'} .. $h2nr->{'AU45_r'} );
+ push @add_cols, map { $sum[$_] } ( $h2nr->{'AU01_c'} .. $h2nr->{'AU45_c'} );
+ warn "# add_cols = ",dump( \@add_cols ) if $debug;
+ out_csv @v, @add_cols;
+
+ @sum = ();
+ $count = 0;
+
+ warn "# last" if $debug;
+ last;
}
}
}