use DBI;
use utf8;
+use Scraper;
use base 'Scraper';
my $debug = $ENV{DEBUG} || 0;
=cut
my $dbname = 'bibliografija';
+my $pg_user = $ENV{PGUSER} || '';
+my $pg_passwd = $ENV{PGPASSWD} || '';
+my $pg_host = $ENV{PGHOST} || '';
my @and;
my @exec;
utf8::decode( $query );
warn "QUERY",dump( $query );
- die "need query" unless defined $query;
+ die "ERROR need query" unless defined $query;
+
+ $query =~ s/^\s+//;
+ $query =~ s/\s+$//;
my $table = lc $self->{database};
$table =~ s/^crosbi-//g;
my $sql = qq{
-select distinct *
+select
+ $table.*
+ ,ARRAY( select napomena from rad_napomena where rad_napomena.id = $table.id ) as rad_napomena
+ ,ARRAY( select projekt from rad_projekt where rad_projekt.id = $table.id ) as rad_projekt
+ ,ARRAY( select datum from rad_godina where rad_godina.id = $table.id ) as rad_godina
+ ,ARRAY( select sifra from rad_podrucje where rad_podrucje.id = $table.id ) as rad_podrucje
+ ,ARRAY( select url from url where url.id = $table.id ) as url
from $table
-inner join rad_ustanova using (id)
-left outer join rad_napomena using (id)
-left outer join rad_projekt using (id)
-left outer join rad_godina using (id)
-left outer join rad_podrucje using (id)
-left outer join url using (id)
+inner join rad_ustanova using (id) -- sifra
};
@and = ( qq{ rad_ustanova.sifra = ? } );
my $query = shift;
warn "## parse_fti [$query]";
my $fti;
- if ( $query =~ s/^(fti_.+):// ) {
+ if ( $query =~ s/^(fti_.+):\s*// ) {
$fti = $1;
} else {
warn "INVALID QUERY no fti_xxx: [$query]";
my @or;
foreach my $f ( split(/,/,$fti) ) {
- push @or, "$f @@ to_tsquery(?)";
+ push @or, "$f @@ to_tsquery('english',?)";
push @exec, $tsquery;
};
push @and, "( " . join(" or ", @or) . ")";
parse_fti $query;
} else { # no " AND " in query
my $tsquery = join(' & ', split(/\s+/,$query) );
- push @and, "( fti_au @@ to_tsquery(?) or fti_pr @@ to_tsquery(?) )";
+ push @and, "( fti_au @@ to_tsquery('english',?) or fti_pr @@ to_tsquery('english',?) )";
push @exec, $tsquery, $tsquery;
}
warn "XXX SQL = ",$sql, dump( @exec );
- my $dbh = DBI->connect_cached("dbi:Pg:dbname=$dbname", '', '', {AutoCommit => 0});
+ my $dbh = DBI->connect_cached("dbi:Pg:dbname=$dbname;host=$pg_host", $pg_user, $pg_passwd, {AutoCommit => 0});
my $sth = $dbh->prepare( $sql );
$format ||= 'marc';
- my $sth = $self->{_sth} || die "no _sth";
-
+ my $sth = $self->{_sth} || die "ERROR no _sth";
my $row = $sth->fetchrow_hashref;
- while ( $self->{_deduplicate}->{ $row->{id} } ) {
- warn "DUPLICATE $row->{id}, skipping\n";
- $row = $sth->fetchrow_hashref;
- return unless $row;
- }
- $self->{_deduplicate}->{ $row->{id} }++;
+ warn "## row = ",dump($row) if $ENV{DEBUG};
- die "no row" unless $row;
+ warn "ERROR: no row" unless $row;
- my $id = $row->{id} || die "no id";
+ my $id = $row->{id} || die "ERROR no id";
my $marc = MARC::Record->new;
$marc->encoding('UTF-8');
## LDR 06 - a - language material
## LDR 07 - a - monographic component part
- $leader =~ s/^(....)...(.+)/$1naa$2/;
+ $leader =~ s/^(.....)...(.+)/$1naa$2/;
## LDR 17 - Encoding level ; 7 - minimal level, u - unknown
## LDR 18 - i = isbd ; u = unknown
- $leader =~ s/^(.{17})..(.+)/$1uu$2/;
+ $leader =~ s/^(.{17})..(.+)/$1ui$2/;
$marc->leader( $leader );
warn "# leader [$leader]";
## 008 07-10 - Date 1
- $f008 .= substr($row->{datum},0,4);
+ $f008 .= substr( $row->{rad_godina}->[0] ,0,4);
## 008 11-14 - Date 2
## 008 15-17 - Place of publication, production, or execution - ako nema 102, popunjava se s |
$f008 .= 'xx ';
+## 008 29 - Conference publication
+ $f008 .= ' ' x ( 29 - length($f008) );
+ $f008 .= $self->{_table} eq 'zbornik' ? '1' : '0';
+
## 008 35-37 - Language
$f008 .= ' ' x ( 35 - length($f008) ); # pad to 35 position
if ( my $lng = $langrecode008->{ $row->{jezik} } ) {
a => $row->{volumen},
b => $row->{broj},
i => $row->{godina},
- );
+ ) if $row->{volumen};
# /data/FF/crosbi/2016-12-12/casopis-rad_napomena.sql
- $marc->add_fields(500,' ',' ',
- a => substr($row->{napomena}, 0, 9999), # XXX marc limit for one subfield is 4 digits in dictionary
- );
+ foreach my $napomena ( @{ $row->{rad_napomena} } ) {
+ $marc->add_fields(500,' ',' ',
+ a => substr($napomena, 0, 9999), # XXX marc limit for one subfield is 4 digits in dictionary
+ );
+ }
$marc->add_fields(520,' ',' ',
a => substr($row->{sazetak}, 0, 9999)
a => $row->{jezik}
);
- $marc->add_fields(690,' ',' ',
- a => $row->{sifra}
- );
+ foreach my $v ( @{ $row->{rad_podrucje} } ) {
+ $marc->add_fields(690,' ',' ',
+ a => $v,
+ );
+ }
$marc->add_fields(693,' ',' ',
) foreach @a;
}
+ sub combine {
+ my $out = '';
+ my $last_delimiter = '';
+ while(@_) {
+ my $value = shift @_;
+ my $delimiter = shift @_;
+ my ( $before,$after ) = ( '', '' );
+ ( $before, $value, $after ) = @$value if ( ref $value eq 'ARRAY' );
+ $out .= $last_delimiter . $before . $value . $after if $value;
+ $last_delimiter = $delimiter || last;
+ }
+ warn "### [$out]";
+ return $out;
+ }
+
+
+ if ( $self->{_table} =~ m/(casopis|preprint)/ ) {
+
$marc->add_fields(773,'0',' ',
t => $row->{casopis},
x => $row->{issn},
- g => "$row->{volumen} ($row->{godina}), $row->{broj} ;" . page_range(' str. ',$row->{stranica_prva}, $row->{stranica_zadnja}),
+# g => "$row->{volumen} ($row->{godina}), $row->{broj} ;" . page_range(' str. ',$row->{stranica_prva}, $row->{stranica_zadnja}),
+ g => combine( $row->{volumen}, ' ', [ '(', $row->{godina}, ')' ], ', ', $row->{broj}, ' ;', page_range(' str. ',$row->{stranica_prva}, $row->{stranica_zadnja}) ),
+ );
+
+ } elsif ( $self->{_table} =~ m/rknjiga/ ) {
+
+ # rknjiga-dbi2marc.pl
+ $marc->add_fields(773,'0',' ',
+ t => $row->{knjiga},
+# d => "$row->{grad} : $row->{nakladnik}, $row->{godina}",
+ d => combine( $row->{grad}, ' : ', $row->{nakladnik}, ', ', $row->{godina} ),
+ k => $row->{serija},
+ h => $row->{ukupno_stranica},
+ n => $row->{uredink},
+ z => $row->{isbn},
+ g => page_range('str. ',$row->{stranica_prva}, $row->{stranica_zadnja}),
+ );
+
+ } elsif ( $self->{_table} =~ m/zbornik/ ) {
+
+ # zbornik-dbi2marc.pl
+ $marc->add_fields(773,'0',' ',
+ t => $row->{skup},
+# d => "$row->{grad} : $row->{nakladnik}, $row->{godina}",
+ d => combine( $row->{grad}, ' : ', $row->{nakladnik}, ', ', $row->{godina} ),
+ k => $row->{serija},
+ h => $row->{ukupno_stranica},
+ n => $row->{uredink},
+ z => $row->{isbn},
+ g => page_range('str. ',$row->{stranica_prva}, $row->{stranica_zadnja}),
);
+ } else {
+ die "ERROR: 773 undefined in row ",dump($row);
+ }
+
+
if ( my $file = $row->{datoteka} ) {
$marc->add_fields(856,' ',' ',
u => "http://bib.irb.hr/datoteka/$file",
);
};
- foreach my $name (qw( openurl url )) {
- next if ! $row->{$name};
+
+ $marc->add_fields(856,' ',' ',
+ u => $row->{openurl},
+ ) if $row->{openurl};
+
+ foreach my $url ( @{ $row->{url} } ) {
$marc->add_fields(856,' ',' ',
- u => $row->{$name},
+ u => $url,
);
}
+ my $f942c = {
+ casopis => 'CLA',
+ preprint => 'PRE',
+ rknjiga => 'POG',
+ zbornik => 'RZB',
+ };
+
my @f942 = (
- c => $self->{_table} eq 'casopis' ? 'CLA' :
- $self->{_table} eq 'preprint' ? 'PRE' :
- 'FIXME',
+ c => $f942c->{ $self->{_table} } || die "ERROR no table $self->{_table} in ".dump($f942c),
);
+
if ( $row->{status_rada} ) {
push @f942, (
f => 1,
g => $row->{status_rada}
);
}
- push @f942, t => '1.01' if $row->{kategorija} =~ m/Znanstveni/;
- push @f942, t => '1.04' if $row->{kategorija} =~ m/Strucni/;
+
+ if ( $self->{_table} =~ m/(casopis|preprint)/ ) {
+
+ if ( $row->{kategorija} =~ m/Znanstveni/ ) {
+ push @f942, t => '1.01'
+ } elsif ( $row->{kategorija} =~ m/Strucni/ ) {
+ push @f942, t => '1.04';
+ } else {
+ warn "ERROR kategorija $row->{kategorija}";
+ }
+
+ } elsif ( $self->{_table} =~ m/rknjiga/ ) {
+
+ if ( $row->{kategorija} =~ m/Znanstveni/ ) {
+ push @f942, t => '1.16.1';
+ } elsif ( $row->{kategorija} =~ m/Pregledni/ ) {
+ push @f942, t => '1.16.2';
+ } elsif ( $row->{kategorija} =~ m/Strucni/ ) {
+ push @f942, t => '1.17';
+ } else {
+ warn "ERROR kategorija $row->{kategorija}";
+ }
+
+ } elsif ( $self->{_table} =~ m/zbornik/ ) {
+
+ push @f942, v => $row->{vrst_recenzije};
+
+ } else {
+ die "ERROR _table $self->{_table}";
+ }
$marc->add_fields(942,' ',' ',
@f942,
u => '1',
- z => join(' - ', $row->{kategorija}, $row->{vrsta_rada}),
+ z => join(' - ', grep { defined $_ } ($row->{kategorija}, $row->{vrst_sudjelovanja}, $row->{vrsta_rada})),
);
=for later