git.rot13.org Git - openface-maja/blob - p.pl

   1 #!/usr/bin/perl
   2 use warnings;
   3 use strict;
   4 use autodie;
   5 use Data::Dump qw(dump);
   6 use Text::Unaccent;
   7 use POSIX qw(strftime);
   8
   9 my $filename = 'Intervali_Openface_novi_rad_Boris_export.txt';
  10
  11 my $debug = $ENV{DEBUG} || 0;
  12
  13 my $today = strftime("%Y-%m-%dT%H:%M:%S", localtime(time()));
  14 warn "# today $today";
  15
  16 open(my $err_fh, '>', "out.$today.log");
  17 local $SIG{__WARN__} = sub {
  18         print STDERR @_;
  19         print $err_fh @_;
  20 };
  21
  22
  23 sub col2nr {
  24         my @c = @_;
  25         my $i = 0;
  26         my $out;
  27         $out->{$_} = $i++ foreach @c;
  28         #warn "## col2nr ", dump( $out );
  29         return $out;
  30 }
  31
  32 open(my $fh, '<', $filename);
  33
  34 my $h = <$fh>; chomp $h;
  35 my @cols = split(/\t/, $h);
  36 my $col2nr = col2nr @cols;
  37
  38 my $file2;
  39 my $fh2;
  40 my $last_id;
  41
  42 my @h2;
  43 my $h2nr;
  44 my $tell2;
  45
  46 my @glob = glob 'originals_2023-07-07/*';
  47 my $map_un_back;
  48 my @glob_un = map {
  49         my $un = lc( unac_string('utf-8', $_ ) );
  50         $map_un_back->{ $un } = $_;
  51         $un;
  52 } @glob;
  53 sub glob_id {
  54         my $id = shift;
  55         my $id_lc = lc( unac_string('utf-8', $id ) );
  56         my $pos = -1;
  57         my @found = grep { $pos++; m/$id_lc/ } @glob_un;
  58         if ( @found ) {
  59                 if ( $#found == 0 ) {
  60                         warn "XXX patt=$id pos=$pos found=",dump(@found);
  61                         return $map_un_back->{$found[0]};
  62                         #return $glob[$pos];
  63                 } else {
  64                         warn "ERROR: pattern $id found $#found results, ignoring";
  65                         return;
  66                 }
  67         } else {
  68                 warn "not found $id";
  69                 return;
  70         }
  71 }
  72
  73 open(my $skipped_input, '>',"out.$today.skip");
  74 open(my $out_fh, '>', "out.$today.csv");
  75 sub out_csv {
  76         my @a = @_;
  77         print $out_fh join(",", map { m/\s/ ? qq{"$_"} : $_ } @a),"\n";
  78 }
  79
  80 my @out_header;
  81
  82 while(<$fh>) {
  83         chomp;
  84         next if m/^\s*$/;       # FIXME corrput data
  85         my @v = split(/\t/, $_);
  86         warn "#[ $_ ]\n" if $debug;
  87
  88         my $id = $v[0];
  89         if ( ! defined $id ) {
  90                 warn "ERROR: ID col 0 null [$_]";
  91                 next;
  92         }
  93
  94         if ( ! $last_id || $last_id ne $v[0] ) {
  95                 my $patt = 'originals_2023-07-07/' . $v[0] . '*.csv';
  96                 $file2 = glob $patt;
  97                 $file2 ||= glob 'originals_2023-07-07/' . $v[0] . '*.csv';
  98                 $file2 ||= glob 'originals_2023-07-07/' . unac_string('utf-8', $v[0] ) . '*.csv';
  99                 $file2 ||= glob_id $v[0];
 100                 # try to find file with same numbers
 101                 my $id_nrs = $v[0];
 102                 $id_nrs =~ s/\D//g;
 103                 $file2 ||= glob_id $id_nrs;
 104
 105                 if ( ! defined $file2 ) {
 106                         warn "SKIP ERROR: $patt glob didn't find anything for v[0]=$v[0] [$_]";
 107                         print $skipped_input "$_\n";
 108                         next;
 109                 }
 110
 111                 if ( ! -r $file2 ) {
 112                         warn "ERROR: $patt glob file $file2 not readable: $! SKIPPING [$_]";
 113                         print $skipped_input "$_\n";
 114                         next;
 115                 }
 116
 117                 warn "# file2 $file2 [$v[0]]";
 118                 open($fh2, '<', $file2);
 119                 $last_id = $v[0];
 120                 @h2 = ();
 121         }
 122
 123         sub col_v {
 124                 my $name = shift;
 125                 my $i = $col2nr->{$name};
 126                 die "can't find $name" unless defined $i;
 127                 my $v = $v[$i];
 128                 die "can't find $i : $name in ",dump( \@v ) unless defined $v;
 129                 warn "## col_v $name -> $i -> $v",dump( \@v ) if $debug > 1;
 130                 return $v;
 131         }
 132
 133         #my $start = $v[ $col2nr->{'Start (s)'} ];
 134         #my $stop  = $v[ $col2nr->{'Stop (s)'} ];
 135
 136         #my $start = col_v 'Start (s)';
 137         #my $stop  = col_v 'Stop (s)';
 138
 139         my $start = $v[10];
 140         my $stop  = $v[11];
 141         warn "# start: $start - stop: $stop\n" if $debug;
 142
 143         my @sum;
 144         my $count = 0;
 145
 146         while(<$fh2>) {
 147                 chomp; s/\r+$//;
 148                 warn "## fh2 [ $_ ]\n" if $debug;
 149                 #my @v2 = map { s/^\s+//; $_ } split(/\t/,$_);
 150                 my @v2 = map { s/^\s+//; $_ } split(/[;,]\s*/,$_);
 151                 if ( ! @h2 ) {
 152                         @h2 = @v2;
 153                         $h2nr = col2nr @h2;
 154                         $tell2 = tell($fh2);
 155                         warn "# h2nr ",dump( $h2nr ) if $debug;
 156                         if ( ! @out_header ) {
 157                                 push @out_header, @cols;
 158                                 push @out_header, map { $h2[$_] }
 159                                         ( $h2nr->{'AU01_r'} .. $h2nr->{'AU45_r'} );
 160                                 push @out_header, map { $h2[$_] }
 161                                         ( $h2nr->{'AU01_c'} .. $h2nr->{'AU45_c'} );
 162                                 warn "# out_header ",dump( \@out_header );
 163                                 out_csv @out_header;
 164                         }
 165                         next;
 166                 }
 167
 168                 sub col_v2 {
 169                         my $name = shift;
 170                         my $i = $h2nr->{$name};
 171                         die "can't find $name in ",dump( $h2nr ) unless defined $i;
 172                         my $v = $v2[$i];
 173                         die "can't find $i : $name in ",dump( \@v2 ) unless defined $v;
 174                         return "### col_v2 $name $i = $v";
 175                         return $v;
 176                 }
 177
 178
 179                 #my $timestamp = $v2[ $h2nr->{'timestamp'} ] || die;
 180                 #my $timestamp = col_v2 'timestamp';
 181                 my $timestamp = $v2[2];
 182
 183                 #$timestamp /= 1000 if $timestamp =~ m/^\d+$/;  # FIXME fix corrputed input data
 184                 #warn "XXX filter $start - $stop from ",dump( $timestamp ); #$h2nr, \@v2 );
 185
 186                 # Argument "1.324.400" isn't numeric
 187                 $timestamp =~ s/^(\d+)\.(\d\d\d)\.(\d\d\d)$/$1$2.$3/;
 188
 189                 if ( $timestamp !~ m/^\d+(\.\d+)$/ ) {
 190                         warn "timestamp [$timestamp] from $file2 not numeric [$_]";
 191                 }
 192
 193                 if ( $timestamp < $start ) {
 194                         #warn "## $start > @v2";
 195                 } elsif ( $timestamp < $stop ) {
 196                         $tell2 = tell($fh2);
 197                         foreach my $n ( $h2nr->{'AU01_r'} .. $h2nr->{'AU45_c'} ) {
 198                                 $sum[$n] += $v2[$n];
 199                                 #warn "sum $n $sum[$n] $v2[$n]\n";
 200                         }
 201                         $count++;
 202                         #warn "XXX $count sum $h2nr->{'AU01_r'} .. $h2nr->{'AU45_c'} = ",dump( \@sum );
 203                 } else {
 204                         warn "# seek $tell2" if $debug;
 205                         seek $fh2, $tell2, 0;
 206
 207                         die "count is 0" if $count == 0;
 208
 209                         #warn "XXX $file2 count=$count XXX ", $h2nr->{'AU01_r'},' - ', $h2nr->{'AU45_r'} , " YYY ", $h2nr->{'AU01_c'}, ' - ', $h2nr->{'AU45_c'}, ' sum=', dump( \@sum );
 210                         my @add_cols;
 211                         push @add_cols, map { $sum[$_] / $count } ( $h2nr->{'AU01_r'} .. $h2nr->{'AU45_r'} );
 212                         push @add_cols, map { $sum[$_] }          ( $h2nr->{'AU01_c'} .. $h2nr->{'AU45_c'} );
 213                         warn "# add_cols = ",dump( \@add_cols ) if $debug;
 214                         out_csv @v, @add_cols;
 215
 216                         @sum = ();
 217                         $count = 0;
 218
 219                         warn "# last" if $debug;
 220                         last;
 221                 }
 222         }
 223 }
 224
 225 __END__
 226
 227
 228 my $col2nr;
 229
 230 # prema filenamenu iz $v[0]
 231 # Start (s)       Stop (s)
 232
 233 my $lookup_file = '
 234
 235 # col timestamp
 236 #