X-Git-Url: http://git.rot13.org/?a=blobdiff_plain;f=CROSBI.pm;h=94b90b3a11c46e087d81aeb2d1a0481c76dbbef8;hb=cc912f26e43a081efd258aa9e3888fe496cea726;hp=f65425a4db9aa08da1d439f83ce7d7d64be08402;hpb=96519ff9d7d2e5f21b6d07d3768539cf776933da;p=Biblio-Z3950.git diff --git a/CROSBI.pm b/CROSBI.pm index f65425a..94b90b3 100644 --- a/CROSBI.pm +++ b/CROSBI.pm @@ -6,6 +6,7 @@ use strict; use MARC::Record; use Data::Dump qw/dump/; use DBI; +use utf8; use base 'Scraper'; @@ -30,15 +31,15 @@ sub diag { # @attr 1=1016 any sub usemap {{ - 4 => '', - 7 => '', - 8 => '', - 1003 => '', -# 16 => '', - 21 => '', - 12 => '', -# 1007 => '', - 1016 => '', + 4 => 'fti_pr:', + 7 => 'fti_pr:', + 8 => 'fti_pr:', + 1003 => 'fti_au:', + 16 => 'fti_pr:', + 21 => 'fti_pr:', + 12 => 'fti_pr:', + 1007 => 'fti_pr:', + 1016 => 'fti_au,fti_pr:', }}; =for sql @@ -46,50 +47,89 @@ sub usemap {{ =cut my $dbname = 'bibliografija'; - -my $dbh = DBI->connect("dbi:Pg:dbname=$dbname", '', '', {AutoCommit => 0}); +my @and; +my @exec; sub search { my ( $self, $query ) = @_; + utf8::decode( $query ); + warn "QUERY",dump( $query ); + die "need query" unless defined $query; - my $tsquery = join(' & ', split(/\s+/,$query) ); + $query =~ s/^\s+//; + $query =~ s/\s+$//; - my $sql = qq{ + my $table = lc $self->{database}; + $table =~ s/^crosbi-//g; -select * -from casopis -inner join rad_ustanova using (id) -left outer join rad_napomena using (id) -left outer join rad_projekt using (id) -left outer join rad_godina using (id) -left outer join rad_podrucje using (id) -left outer join url using (id) -where rad_ustanova.sifra = ? and ( - fti_au @@ to_tsquery(?) - or fti_pr @@ to_tsquery(?) -) + $self->{_table} = $table; + + my $sql = qq{ +select + $table.* + ,ARRAY( select napomena from rad_napomena where rad_napomena.id = $table.id ) as rad_napomena + ,ARRAY( select projekt from rad_projekt where rad_projekt.id = $table.id ) as rad_projekt + --,ARRAY( select datum from rad_godina where rad_godina.id = $table.id ) as rad_godina + ,ARRAY( select sifra from rad_podrucje where rad_podrucje.id = $table.id ) as rad_podrucje + ,ARRAY( select url from url where url.id = $table.id ) as url +from $table +inner join rad_ustanova using (id) -- sifra }; - my $sth = $dbh->prepare( $sql ); + @and = ( qq{ rad_ustanova.sifra = ? } ); + @exec = ( 130 ); # FIXME ustanova + + sub parse_fti { + my $query = shift; + warn "## parse_fti [$query]"; + my $fti; + if ( $query =~ s/^(fti_.+):// ) { + $fti = $1; + } else { + warn "INVALID QUERY no fti_xxx: [$query]"; + } + + my $tsquery = join(' & ', split(/\s+/,$query) ); + + my @or; + foreach my $f ( split(/,/,$fti) ) { + push @or, "$f @@ to_tsquery(?)"; + push @exec, $tsquery; + }; + push @and, "( " . join(" or ", @or) . ")"; + } + + if ( $query =~ / AND / ) { + foreach my $and ( split(/ AND /, $query) ) { + parse_fti $and; + } + } elsif ( $query =~ m/fti_.+:/ ) { + parse_fti $query; + } else { # no " AND " in query + my $tsquery = join(' & ', split(/\s+/,$query) ); + push @and, "( fti_au @@ to_tsquery(?) or fti_pr @@ to_tsquery(?) )"; + push @exec, $tsquery, $tsquery; + } -warn "XXX SQL = ",$sql; -#-- and naslov like ? + $sql .= "where " . join(" and ", @and); - $sth->execute( - 130, # FIXME ustanova - $tsquery, - $tsquery, -# , '%' . $query . '%' - ); +warn "XXX SQL = ",$sql, dump( @exec ); + + my $dbh = DBI->connect_cached("dbi:Pg:dbname=$dbname", '', '', {AutoCommit => 0}); + + my $sth = $dbh->prepare( $sql ); + + $sth->execute( @exec ); - $self->{_sth} = $sth; my $hits = $sth->rows; - warn "# [$tsquery] $hits hits\n"; + $self->{_sth} = $sth; + + warn "# [$query] $hits hits\n"; return $self->{hits} = $hits; } @@ -137,16 +177,16 @@ sub next_marc { $format ||= 'marc'; - my $sth = $self->{_sth} || die "no _sth"; + my $row = $self->{_sth}->fetchrow_hashref; - my $row = $sth->fetchrow_hashref; + warn "## row = ",dump($row) if $ENV{DEBUG}; die "no row" unless $row; my $id = $row->{id} || die "no id"; my $marc = MARC::Record->new; - $marc->encoding('utf-8'); + $marc->encoding('UTF-8'); my $leader = $marc->leader; @@ -156,12 +196,12 @@ sub next_marc { ## LDR 06 - a - language material ## LDR 07 - a - monographic component part - $leader =~ s/^(....).../$1naa/; + $leader =~ s/^(.....)...(.+)/$1naa$2/; ## LDR 17 - Encoding level ; 7 - minimal level, u - unknown ## LDR 18 - i = isbd ; u = unknown - $leader =~ s/^(.{17})/$1uu/; + $leader =~ s/^(.{17})..(.+)/$1ui$2/; $marc->leader( $leader ); warn "# leader [$leader]"; @@ -282,8 +322,23 @@ sub next_marc { 'a' => $row->{title} ); + sub page_range { + my ( $prefix, $from, $to ) = @_; + my $out; + if ( $from ) { + $out = $prefix . $from; + $out .= '-' . $to if $to; + } + return $out; + } + + # fake date for Koha import + $marc->add_fields(260,' ',' ', + c => $row->{godina}, + ); + $marc->add_fields(300,' ',' ', - a => join(' ', $row->{stranica_prva}, $row->{stranica_zadnja}), + a => page_range('',$row->{stranica_prva},$row->{stranica_zadnja}), f => 'str.' ); @@ -295,9 +350,11 @@ sub next_marc { # /data/FF/crosbi/2016-12-12/casopis-rad_napomena.sql - $marc->add_fields(500,' ',' ', - a => substr($row->{napomena}, 0, 9999), # XXX marc limit for one subfield is 4 digits in dictionary - ); + foreach my $napomena ( @{ $row->{rad_napomena} } ) { + $marc->add_fields(500,' ',' ', + a => substr($napomena, 0, 9999), # XXX marc limit for one subfield is 4 digits in dictionary + ); + } $marc->add_fields(520,' ',' ', a => substr($row->{sazetak}, 0, 9999) @@ -315,9 +372,11 @@ sub next_marc { a => $row->{jezik} ); - $marc->add_fields(690,' ',' ', - a => $row->{sifra} - ); + foreach my $v ( @{ $row->{rad_podrucje} } ) { + $marc->add_fields(690,' ',' ', + a => $v, + ); + } $marc->add_fields(693,' ',' ', @@ -340,11 +399,10 @@ sub next_marc { ) foreach @a; } - $marc->add_fields(773,'0',' ', t => $row->{casopis}, x => $row->{issn}, - g => "$row->{volumen}, ($row->{godina}), str. $row->{stranica_prva}-$row->{stranica_zadnja}", + g => "$row->{volumen} ($row->{godina}), $row->{broj} ;" . page_range(' str. ',$row->{stranica_prva}, $row->{stranica_zadnja}), ); if ( my $file = $row->{datoteka} ) { @@ -353,24 +411,33 @@ sub next_marc { ); }; - foreach my $name (qw( openurl url )) { - next if ! $row->{$name}; + + $marc->add_fields(856,' ',' ', + u => $row->{openurl}, + ) if $row->{openurl}; + + foreach my $url ( @{ $row->{url} } ) { $marc->add_fields(856,' ',' ', - u => $row->{$name}, + u => $url, ); } - $marc->add_fields(942,' ',' ', - c => 'CLA', - $row->{status_rada} ? ( + my @f942 = ( + c => $self->{_table} eq 'casopis' ? 'CLA' : + $self->{_table} eq 'preprint' ? 'PRE' : + 'FIXME', + ); + if ( $row->{status_rada} ) { + push @f942, ( f => 1, g => $row->{status_rada} - ) : (), - $row->{kategorija} =~ m/Znanstveni/ ? ( - t => '1.01' - ) : $row->{kategorija} =~ m/Strucni/ ? ( - t => '1.04' - ) : (), + ); + } + push @f942, t => '1.01' if $row->{kategorija} =~ m/Znanstveni/; + push @f942, t => '1.04' if $row->{kategorija} =~ m/Strucni/; + + $marc->add_fields(942,' ',' ', + @f942, u => '1', z => join(' - ', $row->{kategorija}, $row->{vrsta_rada}), ); @@ -383,7 +450,7 @@ sub next_marc { =cut # diag "# hash ",dump($hash); - diag "# marc\n", $marc->as_formatted; + diag "# marc\n", $marc->as_formatted if $ENV{DEBUG}; $self->save_marc( "$id.marc", $marc->as_usmarc );