eprints-dev: /home/dpavlin/tsv2xp-xml.pl [commit]
authorDobrica Pavlinusic <dpavlin@rot13.org>
Mon, 18 Oct 2010 22:10:11 +0000 (00:10 +0200)
committerDobrica Pavlinusic <dpavlin@rot13.org>
Mon, 18 Oct 2010 22:10:11 +0000 (00:10 +0200)
tsv2xp-xml.pl

index ccae475..446d32e 100755 (executable)
@@ -69,10 +69,21 @@ while(<$tsv>) {
                $file .= $1 if $full_path =~ m/(\.\w+)$/;
        }
 
+       my $subject;
+       if ( $file =~ m/^(\d+)(\w+)$/ ) {
+               $subject =
+                       $2 =~ m/bib/i       ? 'IZBIB' :
+                       $2 =~ m/[info]{4}/i ? 'IZDHI' :
+                       $2 =~ m/muz/i       ? 'IZMUZ' :
+                       $2 =~ m/arh/i       ? 'IZARH' :
+                       die "unknown subject: $2";
+       }
+
        my $c = $col;
        $header2col->{'eprintsid'} = $c; $v[$c++] = $eprintsid++;
        $header2col->{'file'} = $c; $v[$c++] = $file;
        $header2col->{'full_path'} = $c; $v[$c++] = $full_path;
+       $header2col->{'subject'} = $c; $v[$c++] = $subject;
 
        my $eprints = $xml;
        while ( $eprints =~ s/<!-- "(.+?)" -->/interpolate($1)/seg ) {
@@ -80,6 +91,7 @@ while(<$tsv>) {
        }
 
        $eprints =~ s{<documents>.+</documents>}{<!-- no documents -->}s if ! $full_path;
+       $eprints =~ s{<subjects>.+</subjects>}{<!-- no subjects -->}s if ! $subject;
 
        print $eprints;
 }