1 #!/usr/bin/perl -I /usr/share/eprints3/perl_lib
7 # export single record to get structure:
8 # sudo -u eprints /usr/share/eprints3/bin/export snz archive XMLFiles 20 > /tmp/20.xml
13 # sudo -u eprints /usr/share/eprints3/bin/import --verbose --migration --enable-file-imports --update --enable-import-fields grf eprint XML /tmp/xml
15 # 4. re-run view generation
16 # sudo -u eprints /usr/share/eprints3/bin/generate_views grf --verbose
19 use Data::Dump qw(dump);
25 my $institution = 'Grafički fakultet';
27 my $ep = EPrints->new();
28 my $repo = $ep->repository( 'grf' );
29 $repo->{config}->{enable_file_imports} = 1;
30 $repo->{config}->{enable_web_imports} = 1;
32 my $dataset = $repo->dataset( 'eprint' );
33 my $list = $dataset->search;
34 my $count = $list->count;
35 warn "# found [$count] eprints\n";
37 my $eprint = $dataset->dataobj( 21 );
38 warn dump( $eprint->get_value('institution'), $institution );
42 if ( $eprint->get_value( 'institution' ) ne $institution ) {
43 $eprint->set_value( 'institution' => $institution );
44 $eprint->save_revision();
56 my $mkp_path = "/mnt/share/MKP/ELEKTRONIČKI DOKUMENTI/EL.DOKUMENTI PO BIBLIOBROJU/";
57 my $koha_path = "/tmp/koha_ffzg";
59 open(my $fh, '-|:encoding(UTF-8)', 'find "' . $mkp_path . '" -iname "*.pdf"');
60 while(my $full_path = <$fh>) {
63 my $file = $1 if $full_path =~ m{/([^/]+)\.pdf}i;
66 if ( $file =~ m/(\d+)/ ) {
72 warn "# $file_id\t$full_path\n";
73 $files->{ $file_id } = $full_path;
77 warn "# got ", scalar keys %$files, " files\n";
79 store $files, "$koha_path.biblionumber.file";
85 binmode STDOUT, ":utf8";
88 my $item = shift || return;
90 my $f200 = $item->{200}->[0] || die "no 200 in ",dump($item);
92 if ( $f200 =~ s/\s*;\s*([^;]+?)$//i ) {
94 $item->{mentor} =~ s/^\s*voditelji?\s*(?:rada)\s*//i;
96 warn "MISSING ; voditelj [$f200]\n";
99 if ( $f200 =~ s{\s*/\s*([^/]+?)$}{} ) {
102 warn "MISSING / autor [$f200]\n";
105 if ( $f200 =~ s{\s*:\s*([^:]+?)$}{} ) {
106 $item->{tip} = lc($1);
108 warn "MISSING : tip [$f200]\n";
111 $item->{title} = $f200;
113 if ( exists $item->{991} ) {
114 my $file_id = $item->{991}->[0];
115 if ( exists $files->{ $file_id } ) {
116 $item->{full_path} = delete $files->{ $file_id };
117 } elsif ( $file_id =~ s/(\w)0*(\d)/$1$2/ ) {
118 if ( exists $files->{ $file_id } ) {
119 $item->{full_path} = delete $files->{ $file_id };
124 if ( ! exists $item->{full_path} ) {
125 my $file_id = ucfirst( $item->{300}->[0] . ' ' . $item->{700}->[0] );
126 $file_id =~ s/[\.\,]//g;
127 if ( exists $files->{ $file_id } ) {
128 $item->{full_path} = delete $files->{ $file_id };
132 warn "MISSING file for $eprintid\n" unless exists $item->{full_path};
134 warn "# item ",dump($item);
137 eprintid => $eprintid++
140 $eprint->{filename} = $1 if $item->{full_path} =~ m{/([^/]+)$};
141 $eprint->{full_path} = $item->{full_path};
143 $eprint->{date} = $1 if $item->{210}->[0] =~ m/\$d(\d+)/;
144 $eprint->{pages} = $1 if $item->{215}->[0] =~ m/^(\d+)/;
146 ( $eprint->{creators_family}, $eprint->{creators_given} ) = split(/,\s*/, $item->{700}->[0] );
148 $eprint->{title} = $item->{title};
150 $eprint->{keywords} = join(", ", @{ $item->{610} }) if exists $item->{610};
152 if ( exists $item->{700}->[1] ) {
153 ( $eprint->{thesis_mentor_family}, $eprint->{thesis_mentor_given} ) = split(/,\s*/, $item->{700}->[1] );
154 } elsif ( $item->{mentor} ) {
155 ( $eprint->{thesis_mentor_given}, $eprint->{thesis_mentor_family} ) = split(/\s+/, $item->{mentor} );
158 $eprint->{thesis_mentor_family} =~ s/(\S+)\s*-\s*(\S+)/$1-$2/; # fix spaces between dash in double surname
160 $eprint->{thesis_callnumber} = $item->{990}->[0];
161 $eprint->{thesis_invnumber} = $item->{991}->[0]; # FIXME?
163 # fallback za radove bez datuma na godinu
164 if ( ! $eprint->{thesis_date} && $item->{990}->[0] =~ m{/(\d\d\d\d)/} ) {
165 $eprint->{thesis_date} = $1;
168 if ( ! $eprint->{date} ) {
169 $eprint->{date} = $eprint->{thesis_date};
172 warn "# eprint ",dump($eprint);
177 <eprintid>$eprint->{eprintid}</eprintid>
181 if ( $eprint->{full_path} ) {
189 <datasetid>document</datasetid>
190 <filename>$eprint->{filename}</filename>
191 <mime_type>application/pdf</mime_type>
192 <url>file://$eprint->{full_path}</url>
195 <mime_type>application/pdf</mime_type>
196 <format>application/pdf</format>
197 <language>hr</language>
198 <security>validuser</security>
199 <main>$eprint->{filename}</main>
207 <eprint_status>archive</eprint_status>
208 <type>$eprint->{type}</type>
209 <metadata_visibility>show</metadata_visibility>
213 <family>$eprint->{creators_family}</family>
214 <given>$eprint->{creators_given}</given>
218 <title>$eprint->{title}</title>
219 <ispublished>unpub</ispublished>
223 <full_text_status>restricted</full_text_status>
224 <keywords>$eprint->{keywords}</keywords>
225 <date>$eprint->{date}</date>
226 <date_type>completed</date_type>
227 <pages>$eprint->{pages}</pages>
228 <institution>Grafički fakultet</institution>
230 <department>strojevi</department>
232 <thesis_date>$eprint->{thesis_date}</thesis_date>
233 <thesis_callnumber>$eprint->{thesis_callnumber}</thesis_callnumber>
234 <thesis_invnumber>$eprint->{thesis_invnumber}</thesis_invnumber>
237 <family>$eprint->{thesis_mentor_family}</family>
238 <given>$eprint->{thesis_mentor_given}</given>
247 print qq{<?xml version="1.0" encoding="utf-8" ?>
252 open(my $tsv_fh, '<:encoding(UTF-8)', "$koha_path.tsv");
253 open(my $marc_fh, '<', "$koha_path.marc");
254 open(my $import_fh, '>', "$koha_path.import.marc");
260 $line =~ s/[\n\r]+$//;
262 my ($offset, $biblionumber, $title) = split(/\t/,$_,3);
264 warn "# $offset $biblionumber $title\n";
266 exit if $ENV{LAST} && $eprintid >= $ENV{LAST};
270 if ( $item->{full_path} = $files->{$biblionumber} ) {
273 seek $marc_fh, $last_offset, 0;
274 read $marc_fh, my $marc, $offset - $last_offset;
275 print $import_fh $marc;
276 warn "# marc $biblionumber $title\n";
282 $last_offset = $offset;
291 warn "# files left ", dump($files);
293 warn "# stat ", dump($stat);