From 22852a835a2c99028075ba77d51f3a06a906dad0 Mon Sep 17 00:00:00 2001 From: Dobrica Pavlinusic Date: Wed, 31 Oct 2007 18:07:33 +0000 Subject: [PATCH] cleanup versions git-svn-id: svn+ssh://mjesec/home/dpavlin/svn/webpac2/trunk@944 07558da8-63fa-0310-ba24-9fe276d99e06 --- Makefile.PL | 10 +-- conf/mjesec.yml | 88 +++++++++++++++----- conf/modify/common.pl | 14 ++-- conf/normalize/ff-libri.pl | 30 +++---- conf/normalize/ff-peri.pl | 166 ++++++++++++++++++++++++++----------- conf/normalize/marc-nsk.pl | 2 +- conf/normalize/minimal.pl | 1 - lib/WebPAC/Output/MARC.pm | 2 +- 8 files changed, 212 insertions(+), 101 deletions(-) diff --git a/Makefile.PL b/Makefile.PL index 00ff13d..453cf0f 100644 --- a/Makefile.PL +++ b/Makefile.PL @@ -12,7 +12,7 @@ WriteMakefile( 'Test::More' => 0, 'YAML' => 0, 'File::Slurp' => 0, - 'Log::Log4perl' => 1.02, + 'Log::Log4perl' => '1.02', 'Cwd' => 0, 'Storable' => 0, 'DBM::Deep' => 0, @@ -23,14 +23,14 @@ WriteMakefile( 'Encode' => 0, 'LWP' => 0, 'File::Path' => 0, - 'Biblio::Isis' => 0.24, - 'MARC::Fast' => 0.08, + 'Biblio::Isis' => '0.24', + 'MARC::Fast' => '0.08', 'Search::Estraier' => 0.06, 'List::Util' => 0, 'Spreadsheet::ParseExcel', => 0, - 'MARC::Record' => 2.0, + 'MARC::Record' => '2.0', 'Data::Dump' => 0, - 'MARC::Lint' => 0, + 'MARC::Lint' => '1.43', 'Proc::Queue' => 0, 'PPI' => 0, 'XML::LibXML' => 0, diff --git a/conf/mjesec.yml b/conf/mjesec.yml index b42100b..c2a38a6 100644 --- a/conf/mjesec.yml +++ b/conf/mjesec.yml @@ -75,6 +75,7 @@ webpac: isis: 'WebPAC::Input::ISIS' marc: 'WebPAC::Input::MARC' excel: 'WebPAC::Input::Excel' + dbf: 'WebPAC::Input::DBF' isi: 'WebPAC::Input::ISI' # define delimiters for validation delimiters: @@ -341,6 +342,20 @@ databases: modify_file: 'conf/modify/common.pl' normalize: path: 'conf/normalize/ff-peri.pl' + - name: marcp + type: marc + path: '/data/webpac2/out/marc/ffiz-peri.marc' + encoding: 'UTF-8' + modify_file: 'conf/modify/common.pl' + normalize: + path: 'conf/normalize/marc.pl' + - name: marcl + type: marc + path: '/data/webpac2/out/marc/ffiz-libri.marc' + encoding: 'UTF-8' + modify_file: 'conf/modify/common.pl' + normalize: + path: 'conf/normalize/marc.pl' - name: libri type: isis path: '/backup/isis_backup/A129-2/isisdata/latest/LIBRI/LIBRI' @@ -350,6 +365,7 @@ databases: path: 'conf/normalize/ff-libri.pl' + # ffkk: # name: 'Komparativna knji¾evnost, Filozofski fakultet u Zagrebu' # input: @@ -386,16 +402,16 @@ databases: # normalize: # path: 'conf/normalize/ff-peri.pl' # -# ffpo: -# name: 'Povijest, Filozofski fakultet u Zagrebu' -# input: -# - name: peri -# type: isis -# path: '/backup/isis_backup/A-201-1/isisdata/latest/PERI/PERI' -# encoding: 'cp852' -# modify_file: 'conf/modify/common.pl' -# normalize: -# path: 'conf/normalize/ff-peri.pl' + ffpo: + name: 'Povijest, Filozofski fakultet u Zagrebu' + input: + - name: peri + type: isis + path: '/backup/isis_backup/A-201-1/isisdata/latest/PERI/PERI' + encoding: 'cp852' + modify_file: 'conf/modify/common.pl' + normalize: + path: 'conf/normalize/ff-peri.pl' # # ffpu: # name: 'Povijest umjetnosti, Filozofski fakultet u Zagrebu' @@ -435,19 +451,45 @@ databases: # normalize: # path: 'conf/normalize/ff-peri.pl' # -# ffsf: -# name: 'Slavenska filologija, Filozofski fakulteti u Zagrebu' -# input: -# - name: peri -# type: isis -# path: '/backup/isis_backup/A125-2/ISISDATA/latest/PERI/PERI' -# encoding: 'cp852' -# #limit: 10 -# modify_file: 'conf/modify/common.pl' -# normalize: -# # path: 'conf/normalize/ff-peri.pl' -# path: 'conf/normalize/ff-peri.pl' - + ffsf: + name: 'Slavenska filologija, Filozofski fakulteti u Zagrebu' + input: + - name: peri + type: isis + path: '/backup/isis_backup/A125-2/ISISDATA/latest/PERI/PERI' + encoding: 'cp852' + #limit: 10 + modify_file: 'conf/modify/common.pl' + normalize: + #path: 'conf/normalize/ff-peri.pl' + path: 'conf/normalize/ff-peri.pl' + - name: marcp + type: marc + path: '/data/webpac2/out/marc/ffsf-peri.marc' + encoding: 'UTF-8' + #modify_file: 'conf/modify/common.pl' + normalize: + path: 'conf/normalize/marc.pl' + - name: xls + type: excel + path: '/data/isis_data/sfd/ffsf-peri.xls' + encoding: 'ISO-8859-2' + #modify_file: 'conf/modify/common.pl' + normalize: + path: 'conf/normalize/excel.pl' + + proba: + name: FF + input: + - name: proba + type: isis + path: '/backup/isis_backup/A-201-1/isisdata/latest/PERI/PERI' + encoding: 'cp852' + #limit: 10 + modify_file: 'conf/modify/common.pl' + normalize: + path: 'conf/normalize/minimal.pl' + # ffso: # name: 'Sociologija, Filozofski fakultet u Zagrebu' # input: diff --git a/conf/modify/common.pl b/conf/modify/common.pl index 5ff5ba9..29ed525 100644 --- a/conf/modify/common.pl +++ b/conf/modify/common.pl @@ -206,10 +206,10 @@ # '^a' => '^g' -326 - '*' - 'g1' => 'a' - 'g6' => 'b' - 'g4' => 'q' - 'g' => 'a' - 'g2' => 'f' +#326 +# '*' +# 'g1' => 'a' +# 'g6' => 'b' +# 'g4' => 'q' +# 'g' => 'a' +# 'g2' => 'f' diff --git a/conf/normalize/ff-libri.pl b/conf/normalize/ff-libri.pl index 4574aa2..f2fa054 100644 --- a/conf/normalize/ff-libri.pl +++ b/conf/normalize/ff-libri.pl @@ -321,37 +321,39 @@ if ( ( rec('200','a') =~ m/^An /) && ( rec(101) =~ m/ENG/ ) ) { marc_compose('245', 'a', suffix( - ( ! ( rec('200','d') || rec('200','e') || rec('200','k') ) ) && ( rec('200','f') ) ? ' / ' : - ( rec('200','d') ) ? ' = ' : - ( rec('200','e') ) ? ' : ' : - ( rec('200','k') ) ? ' ; ' : + ( ! ( rec('200','d') || rec('200','e') || rec('200','k') ) ) && ( rec('200','f') ) ? ' /' : + ( rec('200','d') ) ? ' =' : + ( rec('200','e') ) ? ' :' : + ( rec('200','k') ) ? ' ;' : '', rec('200','a'), ), 'b', suffix( - ( rec('200','d') && rec('200','f') ) ? ' / ' : - ( rec('200','d') && rec('200','c') ) ? '. ' : + ( rec('200','d') && rec('200','f') ) ? ' /' : + ( rec('200','d') && rec('200','c') ) ? '.' : '', rec('200','d'), ), 'b', suffix( - ( rec('200','e') && rec('200','f') ) ? ' / ' : - ( rec('200','e') && rec('200','c') ) ? '. ' : + ( rec('200','e') && rec('200','f') ) ? ' /' : + ( rec('200','e') && rec('200','c') ) ? '.' : '', rec('200','e'), ), 'b', suffix( - ( rec('200','k') && rec('200','f') ) ? ' / ' : - ( rec('200','k') && rec('200','c') ) ? '. ' : + ( rec('200','k') && rec('200','f') ) ? ' /' : + ( rec('200','k') && rec('200','c') ) ? '.' : '', rec('200','k'), ), 'c', suffix( - ( rec('200','f') && rec('200','c') ) ? '. ' : + ( rec('200','f') && rec('200','c') ) ? '.' : '', rec('200','f'), ), - '+', rec('200','c'), + '+', suffix('.', + rec('200','c'), + ), ); #marc_compose('245', @@ -847,8 +849,8 @@ if ($series) { } else { $series = join_with('', - config(), - 'LS', + uc( config() ), + ' LS', rec('000') ); diff --git a/conf/normalize/ff-peri.pl b/conf/normalize/ff-peri.pl index 2bfc995..523c7cb 100644 --- a/conf/normalize/ff-peri.pl +++ b/conf/normalize/ff-peri.pl @@ -1,11 +1,13 @@ if ( rec('200','a') ) { ### LEADER -# raspraviti 17,18 +## raspraviti 17,18 marc_leader('05','n'); # Record status marc_leader('06','a'); # Type of record +## izvuci podatke iz tablice (za po i sf) + if ( rec('225','a') ) { marc_leader('07','d'); } else { @@ -14,6 +16,9 @@ if ( rec('225','a') ) { marc_leader('18','i'); # Descriptive cataloging form +# ako postoji neko od polja 76x-78x - LDR 19 je r, ako ne postoji onda je prazno +# tematski brojevi imaju vezu u + if ( rec('225','a') ) { marc_leader('19','r'); # Linked record requirement } @@ -23,7 +28,6 @@ if ( rec('225','a') ) { ### 007 marc_fixed('007',00,'ta'); - ### 008 - All materials # ¹to zapisati u polje kad datum nije poznat? @@ -33,39 +37,47 @@ marc_fixed('008','00', '070707'); # 00-05 - Date entered on file ## c - Continuing resource currently published ## d - Continuing resource ceased publication +my $year = rec('210','c'); + if ( rec('210','c') ) { marc_fixed('008','06','s'); # 06 - Type of date/Publication status, s = single know date + +# warn "## year = $year"; - my $year = rec('210','c'); - warn "## year = $year"; if ( $year =~ m/^(\d{4})/ ) { -# warn "## $1 ##"; +# warn "## $1 ##\n"; marc_fixed('008','07',$1); # 07-10 - Date 1 } - + + if ( $year =~ m/^\d{4}-(\d{4})/ ) { +# warn "## $1 ##"; + marc_fixed('008','11',$1); # 11-14 - Date 2 + marc_fixed('008','06','d'); + } + } else { marc_fixed('008','06','n'); # 06 - n = unknown date } - -# marc_fixed('008','11', ' '); # 11-14 - Date 2 -marc_fixed('008','15', +marc_fixed('008','15', # 15-17 - Place of publication, production, or execution lc ( rec('102') ) -); # 15-17 - Place of publication, production, or execution +); + + + marc_fixed('008','35', first( lc(rec('101')) ) # 35-37 - Language ); -marc_fixed('008','38', '|'); # 38 - Modified record -marc_fixed('008','39','d'); # 39 - Cataloging source - + # 38 - Modified record + # 39 - Cataloging source ### 008 - Continuing resources if ( ! rec('225','a') ) { -marc_fixed('008','18', '|'); # 18 - Frequency - ovo treba popuniti iz polja 326 - # 19 - Regularity +marc_fixed('008','18', 'u'); # 18 - Frequency - ovo treba popuniti iz polja 326 +marc_fixed('008','19', 'u'); # 19 - Regularity # 20 - Undefined, ontains a blank (#) or fill character (|) # za polje 008 21 treba lookup u tablicu: @@ -92,16 +104,18 @@ marc_fixed('008','21', 'p'); # 21 - Type of continuing resource ### 022 -if ( ! rec('225','a') ) { +#if ( ! rec('225','a') ) { marc('022','a', rec('11') ); -} +#} ### 035$6 # privemeno koristimo 0356 umjesto 0359, zato ¹to marclint 9 prijavljuje kao gre¹ku -marc('035','6', +my $id; + +$id = ( join_with('', uc( config() ), ' P', @@ -109,6 +123,7 @@ marc('035','6', ) ); +marc('035','6', $id); ### 040 # za sve je isti @@ -152,12 +167,10 @@ marc('080','a', ); ### 210 indikatori - # marc_indicators('210', '1',' '); -### 210 -# pronaæi priladan izvor podataka (nsk baza, issn baza) - +### 210 - ABBREVIATED TITLE +# pronaæi prikladan izvor podataka (nsk baza, issn baza) ## iz nsk #if ( rec('11') ) { @@ -198,50 +211,70 @@ marc('222','a', ### 245 indikatori # generiraju se prema èlanu i jeziku. potrebna naknadna kontrola. +marc_indicators('245', 0, 0); + if ( ( rec('200','a') =~ m/^Die /) || ( rec('200','a') =~ m/^Das /) || + ( rec('200','a') =~ m/^Der /) || + ( rec('200','a') =~ m/^Les /) || ( rec('200','a') =~ m/^The /) ) { marc_indicators('245', 0, 4); -} else { - marc_indicators('245', 0, 0); -} + +} + +if ( ( rec('200','a') =~ m/^Un /) || + ( rec('200','a') =~ m/^An /) || + ( rec('200','a') =~ m/^La /) || + ( rec('200','a') =~ m/^Le /) || + ( rec('200','a') =~ m/^Il /) ) { + marc_indicators('245', 0, 3); +} + + +if ( ( rec('200','a') =~ m/^A /) || + ( rec('200','a') =~ m/^L'/) ) { + marc_indicators('245', 0, 2); +} + ### 245 marc_compose('245', 'a', suffix( - ( ! ( rec('200','d') || rec('200','e') || rec('200','k') ) ) && ( rec('200','f') ) ? ' / ' : - ( rec('200','d') ) ? ' = ' : - ( rec('200','e') ) ? ' : ' : - ( rec('200','k') ) ? ' ; ' : + ( ! ( rec('200','d') || rec('200','e') || rec('200','k') ) ) && ( rec('200','f') ) ? ' /' : + ( rec('200','d') ) ? ' =' : + ( rec('200','e') ) ? ' :' : + ( rec('200','k') ) ? ' ;' : '', rec('200','a'), ), 'b', suffix( - ( rec('200','d') && rec('200','f') ) ? ' / ' : - ( rec('200','d') && rec('200','c') ) ? '. ' : + ( rec('200','d') && rec('200','f') ) ? ' /' : + ( rec('200','d') && rec('200','c') ) ? '.' : '', rec('200','d'), ), 'b', suffix( - ( rec('200','e') && rec('200','f') ) ? ' / ' : - ( rec('200','e') && rec('200','c') ) ? '. ' : + ( rec('200','e') && rec('200','f') ) ? ' /' : + ( rec('200','e') && rec('200','c') ) ? '.' : '', rec('200','e'), ), 'b', suffix( - ( rec('200','k') && rec('200','f') ) ? ' / ' : - ( rec('200','k') && rec('200','c') ) ? '. ' : + ( rec('200','k') && rec('200','f') ) ? ' /' : + ( rec('200','k') && rec('200','c') ) ? '.' : '', rec('200','k'), ), 'c', suffix( - ( rec('200','f') && rec('200','c') ) ? '. ' : + ( rec('200','f') && rec('200','c') ) ? '.' : '', rec('200','f'), ), - '+', rec('200','c'), + '+', suffix('.', + rec('200','c'), + ) ); ### 246 inikatori @@ -345,7 +378,7 @@ marc('300','e', ); ### 310 - current stated publication frequency -# potrebno ujednaèiti, polje 008? +# potrebno ujednaèiti, koristi se i u polju 008 marc('310','a', rec('326') @@ -414,17 +447,26 @@ marc('490','x', rec('225','x') ); -#if ( rec('225','a') && rec('11') ) { -# marc('999','a', -# lookup( -# sub { rec('110','a'), }, -# 'nsk','bbaza', -# sub { rec('011','a') }, -# sub { rec('11') } -# ) -# ); -#} +if ( rec('11') ) { + marc('999','a', + lookup( + sub { rec('110','a'), }, + 'nsk','bbaza', + sub { rec('011','a') }, + sub { rec('11') } + ) + ); +} + +#marc('998','a', +# lookup( +# sub { rec('H') }, +# 'ffsf','xls', +# sub { rec('A') }, +# sub { $id }, +# ), +#); ### 5xx @@ -549,11 +591,37 @@ marc('772','t', rec('422','a'), ); +### 773 indikatori + +if ( rec('225','a') ) { + +marc_indicators('773', 0, ' '); + +### 773 +## da li je bitan redoslijed potpolja? + +marc('773','w', + lookup( + sub { rec('000') }, + 'ffsf','peri', + sub { rec('200','a') }, + sub { + regex('s/ ; //g', + rec('225','a') + ) + }, + ) +); + +} + + ### 777 indikatori marc_indicators('777', 0, ' '); ### 777 + marc('777','t', rec('423','a'), ); @@ -645,4 +713,4 @@ marc('992','a', # ) # ); -} # 200a +}# 200a diff --git a/conf/normalize/marc-nsk.pl b/conf/normalize/marc-nsk.pl index 2210ea0..84f2332 100644 --- a/conf/normalize/marc-nsk.pl +++ b/conf/normalize/marc-nsk.pl @@ -31,4 +31,4 @@ marc('500','a', rec('300','a') ); -} + diff --git a/conf/normalize/minimal.pl b/conf/normalize/minimal.pl index 536c4e2..9eda39c 100644 --- a/conf/normalize/minimal.pl +++ b/conf/normalize/minimal.pl @@ -11,5 +11,4 @@ marc('999','a', rec('110','a') ); - } diff --git a/lib/WebPAC/Output/MARC.pm b/lib/WebPAC/Output/MARC.pm index f8c17c4..d1e817b 100644 --- a/lib/WebPAC/Output/MARC.pm +++ b/lib/WebPAC/Output/MARC.pm @@ -5,7 +5,7 @@ use strict; use base qw/WebPAC::Common/; -use MARC::Record 2.0; # need 2.0 for utf-8 encoding see marcpm.sf.net +use MARC::Record; use MARC::Lint; use Data::Dump qw/dump/; use Encode qw/from_to decode/; -- 2.20.1