From: Dobrica Pavlinusic Date: Fri, 13 Sep 2013 10:05:12 +0000 (+0200) Subject: re-insert commas in data X-Git-Url: http://git.rot13.org/?p=crolist2marc;a=commitdiff_plain;h=9da83a6114ad6f08db6e1644324228a51b7ecd17;ds=sidebyside re-insert commas in data This requires a little bit of background knowladge about database schema (TEXTKEY is 8 chars, TEXTREST is rest of content) --- diff --git a/csv2tsv.pl b/csv2tsv.pl new file mode 100755 index 0000000..fdbd848 --- /dev/null +++ b/csv2tsv.pl @@ -0,0 +1,44 @@ +#!/usr/bin/perl +use warnings; +use strict; + +# ls *.csv | xargs -i sh -cx "./csv2tsv.pl {} > tsv/{} 2>>/tmp/log" + +use Data::Dump qw(dump); + +my $cols = 0; +my $delimiter; + +while(my $line = <>) { + chomp $line; + if ( $cols == 0 ) { # header + $cols = $line =~ s/([,;])/$1/g; + $delimiter = $1; + warn "# delimiter [$delimiter] columns=$cols\n"; + } + + $line =~ s/,$/,_null_/; # protect last empty column + + my @v = split(/$delimiter/, $line); + if ( $#v > $cols ) { +#warn "FIX $#v $cols", dump(@v); + my @l2 = splice( @v, -2 ); # keep last two + my @fix = splice( @v, 6 ); +warn "XXX before ",dump( @fix ); + my ( $k, $v ) = ( shift(@fix), '' ); + while ( @fix && (( length($k) + length($fix[0]) ) < 8 ) ) { + $k .= $k ? ',' : ''; + $k .= shift(@fix); + } + $k .= " " x (8 - length($k)); # pad key to 8 spaces + $v = join(',', @fix); +warn "YYY after ",dump( $k, $v ); + die "ASSERT key < 8 [$k]" if length($k) < 8; + die "ASSERT key > 8 [$k]" if length($k) > 8; + @v = ( @v, $k, $v, @l2 ); + } + + $v[-1] =~ s/_null_//; + + $ENV{DEBUG} || print join("\t", @v), "\n"; +}