--- /dev/null
+package EPrints::Plugin::Import::MARC;
+
+=head1 NAME
+
+EPrints::Plugin::Import::MARC -- allows to import MARC records
+
+=head1 DESCRIPTION
+
+This plugin allows you to import MARC and MARC XML records into GNU EPrints.
+
+=head1 CONFIGURATION
+
+Configuration might be changed in cfg.d/marc.pl. Webserver needs to be restarted after any configuration changes.
+
+=head1 COPYRIGHT AND LICENSE
+
+(C) 2008 Jose Miguel Parrella Romero <bureado@cpan.org>
+(C) 2013 Dobrica Pavlinušić <dpavlin@rot13.org>
+This module is free software under the same terms of Perl.
+
+=cut
+
+use Data::Dump qw(dump);
+
+use Encode;
+use strict;
+
+our @ISA = qw/EPrints::Plugin::Import/;
+
+sub new
+{
+ my( $class, %params ) = @_;
+
+ my $self = $class->SUPER::new( %params );
+
+ $self->{name} = "MARC";
+ $self->{visible} = "all";
+ $self->{produce} = [ 'list/eprint' ];
+
+ my $rc = EPrints::Utils::require_if_exists("MARC::Record") and EPrints::Utils::require_if_exists("MARC::File::USMARC");
+ unless( $rc )
+ {
+ $self->{visible} = "";
+ $self->{error} = "Failed to load required modules.";
+ }
+
+ return $self;
+}
+
+sub input_fh
+{
+ my( $plugin, %opts ) = @_;
+
+ my @ids;
+ my $file = MARC::File::USMARC->in( $opts{fh} );
+
+ while ( my $marc = $file->next() ) {
+ my $epdata = $plugin->convert_input( $marc );
+ next unless( defined $epdata );
+
+ my $dataobj = $plugin->epdata_to_dataobj( $opts{dataset}, $epdata );
+ if( defined $dataobj )
+ {
+ push @ids, $dataobj->get_id;
+ }
+ }
+
+ return EPrints::List->new(
+ dataset => $opts{dataset},
+ session => $plugin->{session},
+ ids=>\@ids );
+
+ return undef;
+}
+
+sub input_file
+{
+ my( $plugin, %opts ) = @_;
+
+ if( $opts{filename} eq '-' )
+ {
+ $plugin->error("Does not support input from STDIN");
+
+ return undef;
+ }
+
+ my @ids;
+ my $file = MARC::File::USMARC->in( $opts{filename} );
+
+ while ( my $marc = $file->next() ) {
+ my $epdata = $plugin->convert_input( $marc );
+ next unless( defined $epdata );
+
+ my $dataobj = $plugin->epdata_to_dataobj( $opts{dataset}, $epdata );
+ if( defined $dataobj )
+ {
+ # Callback
+ if ( my $code = $plugin->{session}->get_repository->get_conf( "marc" )->{dataobj_callback} ) {
+ $epdata = $code->($dataobj);
+ }
+
+ push @ids, $dataobj->get_id;
+ }
+ }
+
+ return EPrints::List->new(
+ dataset => $opts{dataset},
+ session => $plugin->{session},
+ ids=>\@ids );
+}
+
+our $debug;
+
+sub convert_input
+{
+
+ my ( $plugin, $marc ) = @_;
+ my $epdata = (); # to be returned
+
+ # Taken from cfg.d/marc.pl
+ my %mappings = %{$plugin->{session}->get_repository->get_conf( "marc" )->{marc2ep}};
+
+ my $dataset = $plugin->{session}->get_dataset('archive');
+
+ foreach my $field ( $marc->fields() ) { # each field of the record
+ my $t = $field->tag();
+ my @list = grep ( /^$t/, keys %mappings ); # lookup for mappings
+ foreach my $i ( sort @list ) {
+ ( my $s ) = $i =~ /$t(.)/; # mapped subfield
+ my $ts = $t . $s; # complete tag+subfield
+ my $value = $field->as_string($s);
+
+ my $field = $mappings{$ts} || $plugin->error("no mapping for $ts");
+ my $metafield = $dataset->get_field($field);
+
+ if ($metafield->get_property('multiple')) {
+ warn "# multiple $field ",dump( $metafield ) if ! $debug->{$field}++;
+ $epdata->{$field} = [ { name => $value } ];
+ } else {
+ $epdata->{$field} = $value; # bye!
+ }
+ }
+ }
+
+ # Authors
+ my $field = $marc->field('100');
+ if ( defined $field ) {
+ foreach my $i ( $field->subfield('a') ) {
+ my $name;
+ ( $name->{family}, $name->{given} ) = split ( "," , $i );
+ push @{ $epdata->{creators_name} }, $name if defined $name;
+ }
+ }
+
+ # Subjects
+ if ( $plugin->{session}->get_repository->get_conf( "marc" )->{importSubjects} ) {
+
+ if ( $field = $marc->field('650') ) {
+ foreach my $i ( $field->subfield('a') ) {
+ push @{ $epdata->{subjects} }, $i;
+ }
+ }
+
+ }
+
+ # Callback
+ if ( my $code = $plugin->{session}->get_repository->get_conf( "marc" )->{epdata_callback} ) {
+ $epdata = $code->($epdata);
+ }
+
+ return $epdata;
+
+}
+
+1;
--- /dev/null
+#!/usr/bin/perl -I /usr/share/eprints3/perl_lib
+use warnings;
+use strict;
+use autodie;
+use utf8;
+
+# export single record to get structure:
+# sudo -u eprints /usr/share/eprints3/bin/export snz archive XMLFiles 20 > /tmp/20.xml
+
+# Import procedure:
+
+# 3. import with:
+# sudo -u eprints /usr/share/eprints3/bin/import --verbose --migration --enable-file-imports --update --enable-import-fields grf eprint XML /tmp/xml
+#
+# 4. re-run view generation
+# sudo -u eprints /usr/share/eprints3/bin/generate_views grf --verbose
+
+use Encode;
+use Data::Dump qw(dump);
+use Storable;
+
+=for eprints-api
+
+use EPrints;
+my $institution = 'Grafički fakultet';
+
+my $ep = EPrints->new();
+my $repo = $ep->repository( 'grf' );
+$repo->{config}->{enable_file_imports} = 1;
+$repo->{config}->{enable_web_imports} = 1;
+
+my $dataset = $repo->dataset( 'eprint' );
+my $list = $dataset->search;
+my $count = $list->count;
+warn "# found [$count] eprints\n";
+
+my $eprint = $dataset->dataobj( 21 );
+warn dump( $eprint->get_value('institution'), $institution );
+
+warn dump( $eprint );
+
+if ( $eprint->get_value( 'institution' ) ne $institution ) {
+ $eprint->set_value( 'institution' => $institution );
+ $eprint->save_revision();
+ $eprint->commit();
+}
+
+$repo->terminate();
+
+=cut
+
+our $eprintid = 1;
+
+my $files;
+
+my $mkp_path = "/mnt/share/MKP/ELEKTRONIČKI DOKUMENTI/EL.DOKUMENTI PO BIBLIOBROJU/do18052015/";
+my $koha_path = "/tmp/koha_ffzg";
+
+open(my $fh, '-|:encoding(UTF-8)', 'find "' . $mkp_path . '" -iname "*.pdf"');
+while(my $full_path = <$fh>) {
+ chomp $full_path;
+
+ my $file = $1 if $full_path =~ m{/([^/]+)\.pdf}i;
+
+ my $file_id;
+ if ( $file =~ m/^(\d+)/ ) {
+ $file_id = $1;
+ } else {
+ $file_id = $file;
+ }
+
+ warn "# $file_id\t$full_path\n";
+ $files->{ $file_id } = $full_path;
+
+}
+
+warn "# got ", scalar keys %$files, " files\n";
+
+store $files, "$koha_path.biblionumber.file";
+
+my $stat;
+my $this_id = '';
+my $item;
+
+binmode STDOUT, ":utf8";
+
+sub dump_item {
+ my $item = shift || return;
+
+ my $f200 = $item->{200}->[0] || die "no 200 in ",dump($item);
+
+ if ( $f200 =~ s/\s*;\s*([^;]+?)$//i ) {
+ $item->{mentor} = $1;
+ $item->{mentor} =~ s/^\s*voditelji?\s*(?:rada)\s*//i;
+ } else {
+ warn "MISSING ; voditelj [$f200]\n";
+ }
+
+ if ( $f200 =~ s{\s*/\s*([^/]+?)$}{} ) {
+ $item->{autor} = $1;
+ } else {
+ warn "MISSING / autor [$f200]\n";
+ }
+
+ if ( $f200 =~ s{\s*:\s*([^:]+?)$}{} ) {
+ $item->{tip} = lc($1);
+ } else {
+ warn "MISSING : tip [$f200]\n";
+ }
+
+ $item->{title} = $f200;
+
+ if ( exists $item->{991} ) {
+ my $file_id = $item->{991}->[0];
+ if ( exists $files->{ $file_id } ) {
+ $item->{full_path} = delete $files->{ $file_id };
+ } elsif ( $file_id =~ s/(\w)0*(\d)/$1$2/ ) {
+ if ( exists $files->{ $file_id } ) {
+ $item->{full_path} = delete $files->{ $file_id };
+ }
+ }
+ }
+
+ if ( ! exists $item->{full_path} ) {
+ my $file_id = ucfirst( $item->{300}->[0] . ' ' . $item->{700}->[0] );
+ $file_id =~ s/[\.\,]//g;
+ if ( exists $files->{ $file_id } ) {
+ $item->{full_path} = delete $files->{ $file_id };
+ }
+ }
+
+ warn "MISSING file for $eprintid\n" unless exists $item->{full_path};
+
+ warn "# item ",dump($item);
+
+ my $eprint = {
+ eprintid => $eprintid++
+ };
+
+ $eprint->{filename} = $1 if $item->{full_path} =~ m{/([^/]+)$};
+ $eprint->{full_path} = $item->{full_path};
+
+ $eprint->{date} = $1 if $item->{210}->[0] =~ m/\$d(\d+)/;
+ $eprint->{pages} = $1 if $item->{215}->[0] =~ m/^(\d+)/;
+
+ ( $eprint->{creators_family}, $eprint->{creators_given} ) = split(/,\s*/, $item->{700}->[0] );
+
+ $eprint->{title} = $item->{title};
+
+ $eprint->{keywords} = join(", ", @{ $item->{610} }) if exists $item->{610};
+
+ if ( exists $item->{700}->[1] ) {
+ ( $eprint->{thesis_mentor_family}, $eprint->{thesis_mentor_given} ) = split(/,\s*/, $item->{700}->[1] );
+ } elsif ( $item->{mentor} ) {
+ ( $eprint->{thesis_mentor_given}, $eprint->{thesis_mentor_family} ) = split(/\s+/, $item->{mentor} );
+ }
+
+ $eprint->{thesis_mentor_family} =~ s/(\S+)\s*-\s*(\S+)/$1-$2/; # fix spaces between dash in double surname
+
+ $eprint->{thesis_callnumber} = $item->{990}->[0];
+ $eprint->{thesis_invnumber} = $item->{991}->[0]; # FIXME?
+
+ # fallback za radove bez datuma na godinu
+ if ( ! $eprint->{thesis_date} && $item->{990}->[0] =~ m{/(\d\d\d\d)/} ) {
+ $eprint->{thesis_date} = $1;
+ }
+
+ if ( ! $eprint->{date} ) {
+ $eprint->{date} = $eprint->{thesis_date};
+ }
+
+ warn "# eprint ",dump($eprint);
+
+ print qq|
+
+ <eprint>
+ <eprintid>$eprint->{eprintid}</eprintid>
+
+ |;
+
+ if ( $eprint->{full_path} ) {
+ print qq|
+
+ <documents>
+ <document>
+
+ <files>
+ <file>
+ <datasetid>document</datasetid>
+ <filename>$eprint->{filename}</filename>
+ <mime_type>application/pdf</mime_type>
+ <url>file://$eprint->{full_path}</url>
+ </file>
+ </files>
+ <mime_type>application/pdf</mime_type>
+ <format>application/pdf</format>
+ <language>hr</language>
+ <security>validuser</security>
+ <main>$eprint->{filename}</main>
+ </document>
+ </documents>
+
+ |;
+ }
+ print qq|
+
+ <eprint_status>archive</eprint_status>
+ <type>$eprint->{type}</type>
+ <metadata_visibility>show</metadata_visibility>
+ <creators>
+ <item>
+ <name>
+ <family>$eprint->{creators_family}</family>
+ <given>$eprint->{creators_given}</given>
+ </name>
+ </item>
+ </creators>
+ <title>$eprint->{title}</title>
+ <ispublished>unpub</ispublished>
+ <subjects>
+ <item>2.06</item>
+ </subjects>
+ <full_text_status>restricted</full_text_status>
+ <keywords>$eprint->{keywords}</keywords>
+ <date>$eprint->{date}</date>
+ <date_type>completed</date_type>
+ <pages>$eprint->{pages}</pages>
+ <institution>Grafički fakultet</institution>
+<!--
+ <department>strojevi</department>
+-->
+ <thesis_date>$eprint->{thesis_date}</thesis_date>
+ <thesis_callnumber>$eprint->{thesis_callnumber}</thesis_callnumber>
+ <thesis_invnumber>$eprint->{thesis_invnumber}</thesis_invnumber>
+ <thesis_mentor>
+ <name>
+ <family>$eprint->{thesis_mentor_family}</family>
+ <given>$eprint->{thesis_mentor_given}</given>
+ </name>
+ </thesis_mentor>
+ </eprint>
+
+ |;
+
+}
+
+print qq{<?xml version="1.0" encoding="utf-8" ?>
+<eprints>
+};
+
+
+open(my $tsv_fh, '<:encoding(UTF-8)', "$koha_path.tsv");
+open(my $tsv_marc, '<:encoding(UTF-8)', "$koha_path.marc");
+
+while(<$tsv_fh>) {
+ my $line = $_;
+ $line =~ s/[\n\r]+$//;
+
+ my ($offset, $biblionumber, $title) = split(/\t/,$_,3);
+
+ warn "# $offset $biblionumber $title\n";
+
+ exit if $ENV{LAST} && $eprintid >= $ENV{LAST};
+
+ my $item;
+
+ if ( $item->{full_path} = $files->{$biblionumber} ) {
+ $stat->{file}++;
+ } else {
+ $stat->{missing}++;
+ }
+
+}
+
+print qq{
+</eprints>
+};
+
+
+warn "# files left ", dump($files);
+
+warn "# stat ", dump($stat);