X-Git-Url: http://git.rot13.org/?a=blobdiff_plain;f=CROSBI.pm;h=24bee4b955fd932e235633a502f2093ec0d8eb33;hb=a2fd8fa6864ecf44978c84aa06a6682f823f1977;hp=b549f8b61205fe000da9bfbb77c725f630f8fb28;hpb=197f7bffb39d4c44087ada8faedf6700446b66af;p=Biblio-Z3950.git diff --git a/CROSBI.pm b/CROSBI.pm index b549f8b..24bee4b 100644 --- a/CROSBI.pm +++ b/CROSBI.pm @@ -8,6 +8,7 @@ use Data::Dump qw/dump/; use DBI; use utf8; +use Scraper; use base 'Scraper'; my $debug = $ENV{DEBUG} || 0; @@ -47,6 +48,9 @@ sub usemap {{ =cut my $dbname = 'bibliografija'; +my $pg_user = $ENV{PGUSER} || ''; +my $pg_passwd = $ENV{PGPASSWD} || ''; +my $pg_host = $ENV{PGHOST} || ''; my @and; my @exec; @@ -56,7 +60,10 @@ sub search { utf8::decode( $query ); warn "QUERY",dump( $query ); - die "need query" unless defined $query; + die "ERROR need query" unless defined $query; + + $query =~ s/^\s+//; + $query =~ s/\s+$//; my $table = lc $self->{database}; $table =~ s/^crosbi-//g; @@ -65,14 +72,15 @@ sub search { my $sql = qq{ -select distinct * +select + $table.* + ,ARRAY( select napomena from rad_napomena where rad_napomena.id = $table.id ) as rad_napomena + ,ARRAY( select projekt from rad_projekt where rad_projekt.id = $table.id ) as rad_projekt + ,ARRAY( select datum from rad_godina where rad_godina.id = $table.id ) as rad_godina + ,ARRAY( select sifra from rad_podrucje where rad_podrucje.id = $table.id ) as rad_podrucje + ,ARRAY( select url from url where url.id = $table.id ) as url from $table -inner join rad_ustanova using (id) -left outer join rad_napomena using (id) -left outer join rad_projekt using (id) -left outer join rad_godina using (id) -left outer join rad_podrucje using (id) -left outer join url using (id) +inner join rad_ustanova using (id) -- sifra }; @and = ( qq{ rad_ustanova.sifra = ? } ); @@ -82,7 +90,7 @@ left outer join url using (id) my $query = shift; warn "## parse_fti [$query]"; my $fti; - if ( $query =~ s/^(fti_.+):// ) { + if ( $query =~ s/^(fti_.+):\s*// ) { $fti = $1; } else { warn "INVALID QUERY no fti_xxx: [$query]"; @@ -92,7 +100,7 @@ left outer join url using (id) my @or; foreach my $f ( split(/,/,$fti) ) { - push @or, "$f @@ to_tsquery(?)"; + push @or, "$f @@ to_tsquery('english',?)"; push @exec, $tsquery; }; push @and, "( " . join(" or ", @or) . ")"; @@ -106,7 +114,7 @@ left outer join url using (id) parse_fti $query; } else { # no " AND " in query my $tsquery = join(' & ', split(/\s+/,$query) ); - push @and, "( fti_au @@ to_tsquery(?) or fti_pr @@ to_tsquery(?) )"; + push @and, "( fti_au @@ to_tsquery('english',?) or fti_pr @@ to_tsquery('english',?) )"; push @exec, $tsquery, $tsquery; } @@ -115,7 +123,7 @@ left outer join url using (id) warn "XXX SQL = ",$sql, dump( @exec ); - my $dbh = DBI->connect_cached("dbi:Pg:dbname=$dbname", '', '', {AutoCommit => 0}); + my $dbh = DBI->connect_cached("dbi:Pg:dbname=$dbname;host=$pg_host", $pg_user, $pg_passwd, {AutoCommit => 0}); my $sth = $dbh->prepare( $sql ); @@ -173,20 +181,14 @@ sub next_marc { $format ||= 'marc'; - my $sth = $self->{_sth} || die "no _sth"; - + my $sth = $self->{_sth} || die "ERROR no _sth"; my $row = $sth->fetchrow_hashref; - while ( $self->{_deduplicate}->{ $row->{id} } ) { - warn "DUPLICATE $row->{id}, skipping\n"; - $row = $sth->fetchrow_hashref; - return unless $row; - } - $self->{_deduplicate}->{ $row->{id} }++; + warn "## row = ",dump($row) if $ENV{DEBUG}; - die "no row" unless $row; + warn "ERROR: no row" unless $row; - my $id = $row->{id} || die "no id"; + my $id = $row->{id} || die "ERROR no id"; my $marc = MARC::Record->new; $marc->encoding('UTF-8'); @@ -199,12 +201,12 @@ sub next_marc { ## LDR 06 - a - language material ## LDR 07 - a - monographic component part - $leader =~ s/^(....)...(.+)/$1naa$2/; + $leader =~ s/^(.....)...(.+)/$1naa$2/; ## LDR 17 - Encoding level ; 7 - minimal level, u - unknown ## LDR 18 - i = isbd ; u = unknown - $leader =~ s/^(.{17})..(.+)/$1uu$2/; + $leader =~ s/^(.{17})..(.+)/$1ui$2/; $marc->leader( $leader ); warn "# leader [$leader]"; @@ -222,7 +224,7 @@ sub next_marc { ## 008 07-10 - Date 1 - $f008 .= substr($row->{datum},0,4); + $f008 .= substr( $row->{rad_godina}->[0] ,0,4); ## 008 11-14 - Date 2 @@ -232,6 +234,10 @@ sub next_marc { ## 008 15-17 - Place of publication, production, or execution - ako nema 102, popunjava se s | $f008 .= 'xx '; +## 008 29 - Conference publication + $f008 .= ' ' x ( 29 - length($f008) ); + $f008 .= $self->{_table} eq 'zbornik' ? '1' : '0'; + ## 008 35-37 - Language $f008 .= ' ' x ( 35 - length($f008) ); # pad to 35 position if ( my $lng = $langrecode008->{ $row->{jezik} } ) { @@ -349,13 +355,15 @@ sub next_marc { a => $row->{volumen}, b => $row->{broj}, i => $row->{godina}, - ); + ) if $row->{volumen}; # /data/FF/crosbi/2016-12-12/casopis-rad_napomena.sql - $marc->add_fields(500,' ',' ', - a => substr($row->{napomena}, 0, 9999), # XXX marc limit for one subfield is 4 digits in dictionary - ); + foreach my $napomena ( @{ $row->{rad_napomena} } ) { + $marc->add_fields(500,' ',' ', + a => substr($napomena, 0, 9999), # XXX marc limit for one subfield is 4 digits in dictionary + ); + } $marc->add_fields(520,' ',' ', a => substr($row->{sazetak}, 0, 9999) @@ -373,9 +381,11 @@ sub next_marc { a => $row->{jezik} ); - $marc->add_fields(690,' ',' ', - a => $row->{sifra} - ); + foreach my $v ( @{ $row->{rad_podrucje} } ) { + $marc->add_fields(690,' ',' ', + a => $v, + ); + } $marc->add_fields(693,' ',' ', @@ -398,43 +408,133 @@ sub next_marc { ) foreach @a; } + sub combine { + my $out = ''; + my $last_delimiter = ''; + while(@_) { + my $value = shift @_; + my $delimiter = shift @_; + my ( $before,$after ) = ( '', '' ); + ( $before, $value, $after ) = @$value if ( ref $value eq 'ARRAY' ); + $out .= $last_delimiter . $before . $value . $after if $value; + $last_delimiter = $delimiter || last; + } + warn "### [$out]"; + return $out; + } + + + if ( $self->{_table} =~ m/(casopis|preprint)/ ) { + $marc->add_fields(773,'0',' ', t => $row->{casopis}, x => $row->{issn}, - g => "$row->{volumen} ($row->{godina}), $row->{broj} ;" . page_range(' str. ',$row->{stranica_prva}, $row->{stranica_zadnja}), +# g => "$row->{volumen} ($row->{godina}), $row->{broj} ;" . page_range(' str. ',$row->{stranica_prva}, $row->{stranica_zadnja}), + g => combine( $row->{volumen}, ' ', [ '(', $row->{godina}, ')' ], ', ', $row->{broj}, ' ;', page_range(' str. ',$row->{stranica_prva}, $row->{stranica_zadnja}) ), + ); + + } elsif ( $self->{_table} =~ m/rknjiga/ ) { + + # rknjiga-dbi2marc.pl + $marc->add_fields(773,'0',' ', + t => $row->{knjiga}, +# d => "$row->{grad} : $row->{nakladnik}, $row->{godina}", + d => combine( $row->{grad}, ' : ', $row->{nakladnik}, ', ', $row->{godina} ), + k => $row->{serija}, + h => $row->{ukupno_stranica}, + n => $row->{uredink}, + z => $row->{isbn}, + g => page_range('str. ',$row->{stranica_prva}, $row->{stranica_zadnja}), + ); + + } elsif ( $self->{_table} =~ m/zbornik/ ) { + + # zbornik-dbi2marc.pl + $marc->add_fields(773,'0',' ', + t => $row->{skup}, +# d => "$row->{grad} : $row->{nakladnik}, $row->{godina}", + d => combine( $row->{grad}, ' : ', $row->{nakladnik}, ', ', $row->{godina} ), + k => $row->{serija}, + h => $row->{ukupno_stranica}, + n => $row->{uredink}, + z => $row->{isbn}, + g => page_range('str. ',$row->{stranica_prva}, $row->{stranica_zadnja}), ); + } else { + die "ERROR: 773 undefined in row ",dump($row); + } + + if ( my $file = $row->{datoteka} ) { $marc->add_fields(856,' ',' ', u => "http://bib.irb.hr/datoteka/$file", ); }; - foreach my $name (qw( openurl url )) { - next if ! $row->{$name}; + + $marc->add_fields(856,' ',' ', + u => $row->{openurl}, + ) if $row->{openurl}; + + foreach my $url ( @{ $row->{url} } ) { $marc->add_fields(856,' ',' ', - u => $row->{$name}, + u => $url, ); } + my $f942c = { + casopis => 'CLA', + preprint => 'PRE', + rknjiga => 'POG', + zbornik => 'RZB', + }; + my @f942 = ( - c => $self->{_table} eq 'casopis' ? 'CLA' : - $self->{_table} eq 'preprint' ? 'PRE' : - 'FIXME', + c => $f942c->{ $self->{_table} } || die "ERROR no table $self->{_table} in ".dump($f942c), ); + if ( $row->{status_rada} ) { push @f942, ( f => 1, g => $row->{status_rada} ); } - push @f942, t => '1.01' if $row->{kategorija} =~ m/Znanstveni/; - push @f942, t => '1.04' if $row->{kategorija} =~ m/Strucni/; + + if ( $self->{_table} =~ m/(casopis|preprint)/ ) { + + if ( $row->{kategorija} =~ m/Znanstveni/ ) { + push @f942, t => '1.01' + } elsif ( $row->{kategorija} =~ m/Strucni/ ) { + push @f942, t => '1.04'; + } else { + warn "ERROR kategorija $row->{kategorija}"; + } + + } elsif ( $self->{_table} =~ m/rknjiga/ ) { + + if ( $row->{kategorija} =~ m/Znanstveni/ ) { + push @f942, t => '1.16.1'; + } elsif ( $row->{kategorija} =~ m/Pregledni/ ) { + push @f942, t => '1.16.2'; + } elsif ( $row->{kategorija} =~ m/Strucni/ ) { + push @f942, t => '1.17'; + } else { + warn "ERROR kategorija $row->{kategorija}"; + } + + } elsif ( $self->{_table} =~ m/zbornik/ ) { + + push @f942, v => $row->{vrst_recenzije}; + + } else { + die "ERROR _table $self->{_table}"; + } $marc->add_fields(942,' ',' ', @f942, u => '1', - z => join(' - ', $row->{kategorija}, $row->{vrsta_rada}), + z => join(' - ', grep { defined $_ } ($row->{kategorija}, $row->{vrst_sudjelovanja}, $row->{vrsta_rada})), ); =for later