3 # ./tsv2xp-xml.pl 2>/dev/null > ep-diplomski.xml
4 # sudo -u eprints /usr/share/eprints3/bin/import --verbose --migration ffzg eprint XML ep-diplomski.xml
10 use Data::Dump qw(dump);
12 my $tsv_file = ( glob 'items.*' )[0];
16 my $xml = read_file 'ep-xml.xml';
17 my @files = read_file "files.txt";
20 foreach my $full ( @files ) {
22 my $file = $1 if $full =~ m{/([^/]+)$};
24 $file2path->{ lc $file } = $full;
26 #warn "# file2path ",dump($file2path);
28 print qq{<?xml version="1.0" encoding="utf-8" ?>
39 my $optional = $1 if $f =~ s{(\?)$}{};
40 my $i = $header2col->{$f};
41 die "no $f in ", dump( $header2col ) if not defined $i and not $optional;
43 warn "# $f $i = $v\n";
48 open(my $tsv, '<', $tsv_file) || die "$tsv_file: $!";
52 @header = split(/\t/, $1);
53 warn "# header ",dump( @header );
55 $header2col->{$_} = $col++ foreach @header;
56 warn "# header2col ",dump( $header2col );
60 @v = map { s/\\N//g; $_ } split(/\t/, $_);
61 warn "# v = ", dump(@v);
63 my $file = interpolate 'IME FILE-a';
66 if ( my $full = $file2path->{ lc $file } ) {
68 warn "# file $file -> $full_path\n";
69 $file .= $1 if $full_path =~ m/(\.\w+)$/;
73 if ( $file =~ m/^(\d+)(\w+)$/ ) {
75 $2 =~ m/bib/i ? 'IZBIB' :
76 $2 =~ m/[info]{4}/i ? 'IZDHI' :
77 $2 =~ m/muz/i ? 'IZMUZ' :
78 $2 =~ m/arh/i ? 'IZARH' :
79 die "unknown subject: $2";
83 $header2col->{'eprintsid'} = $c; $v[$c++] = $eprintsid++;
84 $header2col->{'file'} = $c; $v[$c++] = $file;
85 $header2col->{'full_path'} = $c; $v[$c++] = $full_path;
86 $header2col->{'subject'} = $c; $v[$c++] = $subject;
89 while ( $eprints =~ s/<!-- "(.+?)" -->/interpolate($1)/seg ) {
90 warn "# replaced $1\n";
93 $eprints =~ s{<documents>.+</documents>}{<!-- no documents -->}s if ! $full_path;
94 $eprints =~ s{<subjects>.+</subjects>}{<!-- no subjects -->}s if ! $subject;