use Encode;
use Data::Dump qw(dump);
use Storable;
-
-=for eprints-api
+use LWP::Simple;
use EPrints;
-my $institution = 'Grafički fakultet';
my $ep = EPrints->new();
-my $repo = $ep->repository( 'grf' );
-$repo->{config}->{enable_file_imports} = 1;
-$repo->{config}->{enable_web_imports} = 1;
-
+my $repo = $ep->repository( 'snz' );
+#$repo->{config}->{enable_file_imports} = 1;
+#$repo->{config}->{enable_web_imports} = 1;
my $dataset = $repo->dataset( 'eprint' );
my $list = $dataset->search;
my $count = $list->count;
warn "# found [$count] eprints\n";
-my $eprint = $dataset->dataobj( 21 );
-warn dump( $eprint->get_value('institution'), $institution );
+warn ref( $list );
+
+#warn "# ids = ",dump( $list->ids );
+
+my $info = { count => 0 };
+$list->map( sub {
+ my( $session, $dataset, $eprint, $info ) = @_;
+
+ my $biblionumber = $eprint->get_value('biblionumber');
+
+ $info->{biblionumber}->{$biblionumber}++;
+ $info->{count}++;
+
+}, $info );
+warn dump( $info );
+
+=for update
+
+#my $eprint = $dataset->dataobj( 21 );
+#warn dump( $eprint->get_value('institution'), $institution );
warn dump( $eprint );
store $files, "$koha_path.biblionumber.file";
my $stat;
-my $this_id = '';
-my $item;
-
-binmode STDOUT, ":utf8";
-
-sub dump_item {
- my $item = shift || return;
-
- my $f200 = $item->{200}->[0] || die "no 200 in ",dump($item);
-
- if ( $f200 =~ s/\s*;\s*([^;]+?)$//i ) {
- $item->{mentor} = $1;
- $item->{mentor} =~ s/^\s*voditelji?\s*(?:rada)\s*//i;
- } else {
- warn "MISSING ; voditelj [$f200]\n";
- }
-
- if ( $f200 =~ s{\s*/\s*([^/]+?)$}{} ) {
- $item->{autor} = $1;
- } else {
- warn "MISSING / autor [$f200]\n";
- }
- if ( $f200 =~ s{\s*:\s*([^:]+?)$}{} ) {
- $item->{tip} = lc($1);
- } else {
- warn "MISSING : tip [$f200]\n";
- }
+open(my $tsv_fh, '<:encoding(UTF-8)', "$koha_path.tsv");
+open(my $marc_fh, '<', "$koha_path.marc");
+open(my $import_fh, '>', "$koha_path.import.marc");
- $item->{title} = $f200;
+my $last_offset = 0;
+my @cols;
- if ( exists $item->{991} ) {
- my $file_id = $item->{991}->[0];
- if ( exists $files->{ $file_id } ) {
- $item->{full_path} = delete $files->{ $file_id };
- } elsif ( $file_id =~ s/(\w)0*(\d)/$1$2/ ) {
- if ( exists $files->{ $file_id } ) {
- $item->{full_path} = delete $files->{ $file_id };
- }
- }
- }
+while(<$tsv_fh>) {
+ my $line = $_;
+ $line =~ s/[\n\r]+$//;
- if ( ! exists $item->{full_path} ) {
- my $file_id = ucfirst( $item->{300}->[0] . ' ' . $item->{700}->[0] );
- $file_id =~ s/[\.\,]//g;
- if ( exists $files->{ $file_id } ) {
- $item->{full_path} = delete $files->{ $file_id };
- }
+ if ( ! @cols && $line =~ m/#(.+)/ ) {
+ @cols = split(/\t/, $1);
+ next;
}
- warn "MISSING file for $eprintid\n" unless exists $item->{full_path};
-
- warn "# item ",dump($item);
-
- my $eprint = {
- eprintid => $eprintid++
- };
+ my @v = split(/\t/, $_, $#cols + 1);
+ my %row;
+ @row{@cols} = @v;
+#warn "## row = ",dump( \%row );
- $eprint->{filename} = $1 if $item->{full_path} =~ m{/([^/]+)$};
- $eprint->{full_path} = $item->{full_path};
+ my $offset = $row{offset} // die "no offset";
+ my $biblionumber = $row{biblionumber} || die "no biblionumber";
- $eprint->{date} = $1 if $item->{210}->[0] =~ m/\$d(\d+)/;
- $eprint->{pages} = $1 if $item->{215}->[0] =~ m/^(\d+)/;
+ warn "# ", join(' ', map { $row{$_} } qw(offset biblionumber title)), "\n";
- ( $eprint->{creators_family}, $eprint->{creators_given} ) = split(/,\s*/, $item->{700}->[0] );
-
- $eprint->{title} = $item->{title};
-
- $eprint->{keywords} = join(", ", @{ $item->{610} }) if exists $item->{610};
+ exit if $ENV{LAST} && $eprintid >= $ENV{LAST};
- if ( exists $item->{700}->[1] ) {
- ( $eprint->{thesis_mentor_family}, $eprint->{thesis_mentor_given} ) = split(/,\s*/, $item->{700}->[1] );
- } elsif ( $item->{mentor} ) {
- ( $eprint->{thesis_mentor_given}, $eprint->{thesis_mentor_family} ) = split(/\s+/, $item->{mentor} );
- }
+ if ( delete $files->{$biblionumber} ) {
+ $stat->{file}++;
- $eprint->{thesis_mentor_family} =~ s/(\S+)\s*-\s*(\S+)/$1-$2/; # fix spaces between dash in double surname
+ if ( $info->{biblionumber}->{$biblionumber} ) {
+ $stat->{existing}++;
+ warn "EXISTING $biblionumber found in eprints\n";
+ } else {
- $eprint->{thesis_callnumber} = $item->{990}->[0];
- $eprint->{thesis_invnumber} = $item->{991}->[0]; # FIXME?
+ $stat->{new}++;
- # fallback za radove bez datuma na godinu
- if ( ! $eprint->{thesis_date} && $item->{990}->[0] =~ m{/(\d\d\d\d)/} ) {
- $eprint->{thesis_date} = $1;
- }
+ seek $marc_fh, $last_offset, 0;
+ read $marc_fh, my $marc, $offset - $last_offset;
+ print $import_fh $marc;
+ warn "# marc $biblionumber\n";
+ }
- if ( ! $eprint->{date} ) {
- $eprint->{date} = $eprint->{thesis_date};
+ } else {
+ $stat->{missing}++;
}
- warn "# eprint ",dump($eprint);
-
- print qq|
-
- <eprint>
- <eprintid>$eprint->{eprintid}</eprintid>
-
- |;
-
- if ( $eprint->{full_path} ) {
- print qq|
-
- <documents>
- <document>
-
- <files>
- <file>
- <datasetid>document</datasetid>
- <filename>$eprint->{filename}</filename>
- <mime_type>application/pdf</mime_type>
- <url>file://$eprint->{full_path}</url>
- </file>
- </files>
- <mime_type>application/pdf</mime_type>
- <format>application/pdf</format>
- <language>hr</language>
- <security>validuser</security>
- <main>$eprint->{filename}</main>
- </document>
- </documents>
-
- |;
- }
- print qq|
-
- <eprint_status>archive</eprint_status>
- <type>$eprint->{type}</type>
- <metadata_visibility>show</metadata_visibility>
- <creators>
- <item>
- <name>
- <family>$eprint->{creators_family}</family>
- <given>$eprint->{creators_given}</given>
- </name>
- </item>
- </creators>
- <title>$eprint->{title}</title>
- <ispublished>unpub</ispublished>
- <subjects>
- <item>2.06</item>
- </subjects>
- <full_text_status>restricted</full_text_status>
- <keywords>$eprint->{keywords}</keywords>
- <date>$eprint->{date}</date>
- <date_type>completed</date_type>
- <pages>$eprint->{pages}</pages>
- <institution>Grafički fakultet</institution>
-<!--
- <department>strojevi</department>
--->
- <thesis_date>$eprint->{thesis_date}</thesis_date>
- <thesis_callnumber>$eprint->{thesis_callnumber}</thesis_callnumber>
- <thesis_invnumber>$eprint->{thesis_invnumber}</thesis_invnumber>
- <thesis_mentor>
- <name>
- <family>$eprint->{thesis_mentor_family}</family>
- <given>$eprint->{thesis_mentor_given}</given>
- </name>
- </thesis_mentor>
- </eprint>
-
- |;
+ $last_offset = $offset;
}
-print qq{<?xml version="1.0" encoding="utf-8" ?>
-<eprints>
-};
-
-
-open(my $tsv_fh, '<:encoding(UTF-8)', "$koha_path.tsv");
-open(my $tsv_marc, '<:encoding(UTF-8)', "$koha_path.marc");
-
-while(<$tsv_fh>) {
- my $line = $_;
- $line =~ s/[\n\r]+$//;
-
- my ($offset, $biblionumber, $title) = split(/\t/,$_,3);
-
- warn "# $offset $biblionumber $title\n";
+warn "# files left ", dump($files);
- exit if $ENV{LAST} && $eprintid >= $ENV{LAST};
+foreach my $biblionumber ( keys %$files ) {
- my $item;
+ if ( $info->{biblionumber}->{$biblionumber} ) {
+ $stat->{existing}++;
+ warn "EXISTING $biblionumber found in eprints\n";
+ next;
+ }
- if ( $item->{full_path} = $files->{$biblionumber} ) {
- $stat->{file}++;
+ if ( my $marc = get("https://koha.ffzg.hr/cgi-bin/koha/opac-export.pl?op=export&bib=$biblionumber&format=utf8") ) {
+ print $import_fh $marc;
+ warn "## marc $biblionumber from koha!";
} else {
- $stat->{missing}++;
+ warn "ERROR: can't fetch $biblionumber from koha";
}
-
}
-print qq{
-</eprints>
-};
-
-
-warn "# files left ", dump($files);
-
warn "# stat ", dump($stat);