From: Dobrica Pavlinusic Date: Mon, 18 Oct 2010 19:38:43 +0000 (+0200) Subject: eprints-dev: /home/dpavlin/tsv2xp-xml.pl [commit] X-Git-Url: http://git.rot13.org/?p=eprints3-migration.git;a=commitdiff_plain;h=979ecbc738f62e9b0baada96453c0635bec65300 eprints-dev: /home/dpavlin/tsv2xp-xml.pl [commit] --- diff --git a/tsv2xp-xml.pl b/tsv2xp-xml.pl new file mode 100755 index 0000000..2991632 --- /dev/null +++ b/tsv2xp-xml.pl @@ -0,0 +1,88 @@ +#!/usr/bin/perl + +# sudo -u eprints /usr/share/eprints3/bin/import --verbose --migration ffzg eprint XML dipl.xml + +use warnings; +use strict; + +use File::Slurp; +use Data::Dump qw(dump); + +my $tsv_file = 'items.mentor_ime.mentor_perzime.IME_FILE_a.PREZIME.IME.NAZIV_RADA.MENTOR.GOD_OBR63a8814e199c9b969dbe8251fdef0fa2'; + +our $eprintsid = 700; + +my $xml = read_file 'ep-xml.xml'; +my @files = read_file "files.txt"; + +my $file2path; +foreach my $full ( @files ) { + chomp $full; + my $file = $1 if $full =~ m{/([^/]+)$}; + $file =~ s/\.\w+$//; + $file2path->{ lc $file } = $full; +} +#warn "# file2path ",dump($file2path); + +print qq{ + +}; + +my @header; +my $header2col; +my $col = 0; + +our @v; +sub interpolate { + my $f = shift; + my $optional = $1 if $f =~ s{(\?)$}{}; + my $i = $header2col->{$f}; + die "no $f in ", dump( $header2col ) if not defined $i and not $optional; + my $v = $v[$i]; + warn "# $f $i = $v\n"; + return $v; +} + + +open(my $tsv, '<', $tsv_file) || die "$tsv_file: $!"; +while(<$tsv>) { + chomp; + if ( m/#(.+)/ ) { + @header = split(/\t/, $1); + warn "# header ",dump( @header ); + my $i = 0; + $header2col->{$_} = $col++ foreach @header; + warn "# header2col ",dump( $header2col ); + next; + } + + @v = map { s/\\N//g; $_ } split(/\t/, $_); + warn "# v = ", dump(@v); + + my $file = interpolate 'IME FILE-a'; + $file =~ s/ //g; + my $full_path; + if ( my $full = $file2path->{ lc $file } ) { + $full_path = $full; + warn "# file $file -> $full_path\n"; + } + + my $c = $col; + $header2col->{'eprintsid'} = $c; $v[$c++] = $eprintsid++; + $header2col->{'file'} = $c; $v[$c++] = $file; + $header2col->{'full_path'} = $c; $v[$c++] = $full_path; + + my $eprints = $xml; + while ( $eprints =~ s//interpolate($1)/seg ) { + warn "# replaced $1\n"; + } + + $eprints =~ s{.+}{}s if ! $full_path; + + print $eprints; +} + +print qq{ + +}; +