git-svn-id: svn+ssh://mjesec/home/dpavlin/svn/webpac2/trunk@1103 07558da8-63fa-0310...
authorDobrica Pavlinusic <dpavlin@rot13.org>
Mon, 4 Aug 2008 19:35:18 +0000 (19:35 +0000)
committerDobrica Pavlinusic <dpavlin@rot13.org>
Mon, 4 Aug 2008 19:35:18 +0000 (19:35 +0000)
conf/mjesec.yml
conf/modify/common.pl
conf/normalize/ff-libri.pl

index f43a516..703f455 100644 (file)
@@ -148,121 +148,163 @@ databases:
         credit: 10000
 
 
+#  hazu:
+#    name: 'HAZU'
+#    input:
+#      - name: bib
+#        type: isis
+#        path: '/data/unizg/hazu/BIB'
+#        encoding: 'cp852'
+#        modify_file: 'conf/modify/common.pl'
+#        normalize:
+#          path: 'conf/normalize/minimal.pl'
+#
+#  sand:
+#    name: 'SAND'
+#    input:
+#      name: casopisi
+#      type: dbf
+#      path: '/data/unizg/casopisi/sys/cas2000.dbf'
+#      normalize:
+#        path: 'conf/normalize/common.pl'
+##      mapping_path: 'conf/input/dbf/cas2000.yml'
+#  
+#  hidra:
+#    name: 'HIDRA'
+#    input:
+#      - name: bib
+#        type: isis
+#        path: '/data/hidra/test/BIB'
+#        encoding: 'cp852'
+#        modify_file: 'conf/modify/common.pl'
+#        normalize:
+#          path: 'conf/normalize/minimal.pl'
+#
+#  efzg:
+#    name: 'EFZG'
+#    input:
+#      - name: crolist
+#        type: marc
+#        path: '/data/unizg/drustvene/efzg/unimarc.iso'
+#        encoding: 'ISO-8859-2'
+#        normalize:
+#          path: 'conf/normalize/minimal.pl'
+#  
+#  ipu:
+#    name: 'Institut za povijest umjetnosti'
+#    input:
+#      - name: bibl
+#        type: isis
+#        path: '/data/unizg/humanistika/ipu/20071115/bibl/BIBL'
+#        encoding: 'cp852'
+#        # modify_file: 'conf/modify/common.pl'
+#        normalize:
+#          path: 'conf/normalize/minimal.pl'
+#
+#  zvonar:
+#    name: 'HAZU Odsjek za povijesne znanosti'
+#    input:
+#      - name: libri
+#        type: isis
+#        path: '/data/unizg/humanistika/hazu-p/LIBRI'
+#        normalize:
+#          path: 'conf/normalize/minimal.pl'
+#      - name: peri
+#        type: isis
+#        path: '/data/unizg/humanistika/hazu-p/PERI'
+#        normalize:
+#          path: 'conf/normalize/minimal.pl'
+#
+#  stross:
+#    name: 'Strossmayerova galerija'
+#    input:
+#      - name: bib
+#        type: isis
+#        path: '/data/unizg/stross/BIB'
+#        normalize:
+#          path: 'conf/normalize/minimal.pl'
+#      
+#  koncar:
+#    name: 'Konèar'
+#    input:
+#      - name: unimar
+#        type: isis
+#        path: '/data/tehnika/koncar/UNIMAR'
+#        normalize:
+#          path: 'conf/normalize/minimal.pl'
+#      
+  datumi:
+    name: 'Nevaljali datumi'
+    input:
+      - name: tablica
+        type: excel
+        path: '/data/isis_data/greske/datum_unosa.xls'
+        encoding: 'windows-1250'
+        normalize:
+          - path: 'conf/normalize/common.pl'
+  
+#  ffps:
+#    name: 'Psihologija, Filozofski fakultet u Zagrebu'
+#    code: 'ps'
+#    url: 'http://www.knjiznice.ffzg.hr/psihologija'
+#    input:
+#      - name: libri
+#        type: isis
+#        path: '/backup/isis_backup/sunce2/isisdata/latest/LIBRI/'
+#        encoding: 'cp852'
+#        modify_file: 'conf/modify/common.pl'
+#        normalize:
+#          - path: 'conf/normalize/report-sorted-txt.pl'
+#    output:
+##      - module: 'Excel'
+##        path: '/data/isis_data/ps/reports/libri-nema-jezik.xls'
+#      - module: 'Sorted'
+#        path: 'out/report/mfn/ps/'
+##      - module: 'Excel'
+##        path: '/data/isis_data/ps/reports/autori-udk.xls'
 
-#  'ffps':
-#    name: 'Knji¾nica Odsjeka za psihologiju'
-#    links:
-#      - to: ffps
-#        credit: 10000
-
-#  'drustvene':
-#    name: 'Katalog knji¾nica dru¹tvenih znanosti'
-#    links:
-#      - to: efzg
-#        credit: 10000
-#      - to: eizg
-#        credit: 10000
-#      - to: efos
-#        credit: 10000
-#      - to: irmo
-#        credit: 10000
-#      - to: hika
-#        credit: 10000
-#      - to: iztzg
-#        credit: 10000
-#      - to: jzav
-#        credit: 10000
-#      - to: ijf 
-#        credit: 10000
-   
-#  'ecasopisi':
-#    name: 'Katalog elektronièkih èasopisa'
-#    links:
-#      - to: emerald
-#        credit: 10000
 
-  hazu:
-    name: 'HAZU'
+  stross:
+    name: 'Strossmayerova galerija'
     input:
       - name: bib
         type: isis
-        path: '/data/unizg/hazu/BIB'
-        encoding: 'cp852'
-        modify_file: 'conf/modify/common.pl'
+        path: '/data/unizg/stross/BIB'
         normalize:
-          path: 'conf/normalize/minimal.pl'
+          - path: 'conf/normalize/report-sorted-txt.pl'
+    output:
+#      - module: 'Excel'
+#        path: '/data/isis_data/iz/reports/libri-nema-jezik.xls'
+      - module: 'Sorted'
+        path: 'out/report/mfn/iz/'
+#      - module: 'Excel'
+#        path: '/data/isis_data/iz/reports/autori-udk.xls'
 
-  sand:
-    name: 'SAND'
-    input:
-      name: casopisi
-      type: dbf
-      path: '/data/unizg/casopisi/sys/cas2000.dbf'
-      normalize:
-        path: 'conf/normalize/common.pl'
-#      mapping_path: 'conf/input/dbf/cas2000.yml'
-  
-  hidra:
-    name: 'HIDRA'
+
+  casopisi:
+    name: 'Èasopisi u Knji¾nicama FF-a'
     input:
-      - name: bib
-        type: isis
-        path: '/data/hidra/test/BIB'
-        encoding: 'cp852'
-        modify_file: 'conf/modify/common.pl'
+      - name: ff
+        type: excel
+        path: '/data/isis_data/CASOPISI/svi-casopisi-FF.xls'
+        ncoding: 'utf-8'
         normalize:
-          path: 'conf/normalize/minimal.pl'
+          - path: 'conf/normalize/ff-casopisi-xls.pl'
+#          - path: 'conf/normalize/ff-casopisi-marc.pl'
+    output:
+      - module: 'Excel'
+        path: 'out/excel/casopisi-FF-dupli-knjiznice.xls'
 
-  efzg:
-    name: 'EFZG'
+  fflibri:
+    name: 'Filozofski fakultet u Zagrebu'
     input:
-      - name: crolist
+      - name: marc
         type: marc
-        path: '/data/unizg/drustvene/efzg/unimarc.iso'
+        path: 'out/marc/fflibri.marc'
         encoding: 'ISO-8859-2'
         normalize:
-          path: 'conf/normalize/minimal.pl'
-  
-  ipu:
-    name: 'Institut za povijest umjetnosti'
-    input:
-      - name: bibl
-        type: isis
-        path: '/data/unizg/humanistika/ipu/20071115/bibl/BIBL'
-        encoding: 'cp852'
-        # modify_file: 'conf/modify/common.pl'
-        normalize:
-          path: 'conf/normalize/minimal.pl'
+          - path: 'conf/normalize/ff-libri-dupli.pl'
+    output:
+      - module: 'Excel'
+        path: 'out/excel/knjige-duplo.xls'
 
-  zvonar:
-    name: 'HAZU Odsjek za povijesne znanosti'
-    input:
-      - name: libri
-        type: isis
-        path: '/data/unizg/humanistika/hazu-p/LIBRI'
-        normalize:
-          path: 'conf/normalize/minimal.pl'
-      - name: peri
-        type: isis
-        path: '/data/unizg/humanistika/hazu-p/PERI'
-        normalize:
-          path: 'conf/normalize/minimal.pl'
-
-  stross:
-    name: 'Strossmayerova galerija'
-    input:
-      - name: bib
-        type: isis
-        path: '/data/unizg/stross/BIB'
-        normalize:
-          path: 'conf/normalize/minimal.pl'
-       
-  koncar:
-    name: 'Konèar'
-    input:
-      - name: unimar
-        type: isis
-        path: '/data/tehnika/koncar/UNIMAR'
-        normalize:
-          path: 'conf/normalize/minimal.pl'
-       
index 102b871..34e3d36 100644 (file)
   '*'
     'regex:^' => '^a'
     'regex:\(' => ' ('
-    '. - ' => '^a'
+    ' | ' => '^a'
     ' ; ' => '^a'
-    '; ' => '^a'
-    ';' => '^a'
+    '. - ' => '^a'
+    'ISBN ' => ''
     'ISBN ' => ''
 #    '-' => ''
 
index 818622a..aa920a8 100644 (file)
@@ -27,7 +27,7 @@ if ( rec('999','a') && rec('999','a') =~ m/(pregledan|P)/i) {
 
        marc_leader('07','m');
 
-## LDR 17
+## LDR 17 - 3
        marc_leader('17','7');
 
 ## LDR 18 
@@ -40,16 +40,9 @@ if ( rec('230') | rec('231') | rec('232') | rec('233') ) {
        marc_leader('19','a');
 }
 
-### 001 - Voyager ID
-
-### 003 - razjasniti
-
-### 007 - razjasniti 
-
-marc_fixed('007',00,'ta');
-
 ### 008 - All materials
 ## dodati sve moguce slucajeve za datum, popuniti ono sto nedostaje
+## ostaviti prazno tamo gdje nema ni¹ta 
 
 if ( rec('994','c') =~ m/\d{8}/ ) {
        marc_fixed('008','00',
@@ -59,7 +52,9 @@ if ( rec('994','c') =~ m/\d{8}/ ) {
        );
 } elsif ( rec('994','c') =~ m/\d{6}/ ) {
        marc_fixed('008', 00,
-               '000000'
+               regex('s/^\d\d//',
+                       rec('994','c')
+               ),
        );
        marc_fixed('008', '04',
                '01'
@@ -85,23 +80,38 @@ if ( rec('994','c') =~ m/\d{8}/ ) {
 ## 008 06 - Type of date/Publication status
 ## dodati i ostale moguænosti
 ## 008 07-10 - Date 1 - iz 210d koji kroz modify postaje 210c
-
-my $year = rec('210','c');
+## srediti sve moguæe upitne godine
 
 if ( rec('210','c') ) {
-       marc_fixed('008','06','s');             # 06 - Type of date/Publication status, s = single know date
-       
-#      warn "## year = $year";
+       my $d1;
+       my $d2;
+       if ( rec('210','c') =~ m/\?/ ) {
+               marc_fixed('008','06','q');
+       } else {
+               marc_fixed('008','06','s');
+       }
+       if ( rec('210','c') =~ m/(\d{4})/ ) {
+               $d1 = $1;
+       } else {
+               $d1 = '';
+       }
+       if ( rec('210','c') =~ m/^.*-(\d{4})/ ) {
+               $d2 = $1;
+       } else {
+               $d2 = '',
+       }
+       if ( rec('210','c') =~ m/(\d{2})--/ ) {
+               $d1 = $1.'uu';
+               marc_fixed('008','06','u');
+       }
 
-       if ( $year =~ m/^(\d{4})/ ) {
-#      warn "## $1 ##\n";
-               marc_fixed('008','07',$1);              # 07-10 - Date 1
+       marc_fixed('008','07',$d1);             # 07-10 - Date 1
+       marc_fixed('008','11',$d2);             # 07-10 - Date 1
 
 } else {
        marc_fixed('008','06','n');             # 06 - n = unknown date
        marc_fixed('008','07','uuuu');
 }
-}
 
 
 ## 008 11-14 - Date 2 
@@ -109,40 +119,26 @@ if ( rec('210','c') ) {
 ## 008 15-17 - Place of publication, production, or execution - ¹to ako nema 102?
 ## raspraviti
 
+marc_fixed('008','15','xx');
+
 #marc_fixed('008','15', 
 #      lc ( rec('102') ) 
 #);
 
-my $zemlja = 
-       lookup(
-               sub { rec('B') },
-               'kodovi','zemlje',
-               sub { rec('A') },
-               sub { 
-                       regex('s/[\s;:]/g',
-                               rec('210','a') 
-                       )
-               },
-       );
-
-warn (
-       dump ($zemlja)
-);
-
-if ( $year =~ m/^(\d{4})/ ) {
-       if ( $1 >= 1990 ) {
-               marc_fixed('008','15',
-                       lc ( $zemlja )
-
-               );
-#      } elsif ( dump( $zemlja ) eq 'hr' ) {
-#              marc_fixed('008','15','yu');
-       }
-}
+#my $zemlja = 
+#      lookup(
+#              sub { rec('B') },
+#              'kodovi','zemlje',
+#              sub { rec('A') },
+#              sub { 
+#                      regex('s/[\s;:]/g',
+#                              rec('210','a') 
+#                      )
+#              },
+#      );
 
 
 ## 008 35-37 - Language
-
 marc_fixed('008','35',
        # first( lc(rec('101')) )               
        lc( frec('101'))                
@@ -152,7 +148,6 @@ marc_fixed('008','35',
 marc_fixed('008','38','|');            
 
 ## 008 39 - Cataloging source - d (other)
-
 marc_fixed('008','39','d');            
 
 ### 008 - Books - raspraviti upotrebu ovih polja
@@ -192,14 +187,27 @@ marc_fixed('008','34','|');
 ## modify za polje 10 -> drugi ISBN poèinje prefixom "ISBN" koji se mièe (pr. u sfb)
 
 if ( frec('10') ne ( frec('290') ) ) {
-       marc('020','a', 
-               # isbn_13(
+       if ( rec('10','a') !~ /pogre/ ) {
+               marc('020','a', 
+                       # isbn_13(
+                               regex('s/\s\s/ /g',
+                               # regex('s/\(\d\)\(/$1 \(//g',
+                                       rec('10','a')
+                               )
+                       # )
+               ); 
+       }
+       if ( rec('10','a') =~ /pogre/ ) {
+               marc('020','z',
                        regex('s/\s\s/ /g',
-                       # regex('s/\(\d\)\(/$1 \(//g',
                                rec('10','a')
                        )
-               # )
-       ); 
+               );
+       }
+       marc('020','z',
+               rec('10','z')
+       );
+                       
 }
 
 #warn( 
@@ -217,8 +225,9 @@ marc('035','a',
 join_with('',
        # config('input normalize path'),
        # config('name'),
+       'HR-ZaFF ',
        config(),
-       ' L',
+       'L-',
        # config('input name'),
        # id(),
        # rec('994','a'),
@@ -669,30 +678,15 @@ marc_template(
                'a.|i ;|w',
                # gre¹ke
                'a ;|v ;|w',
-               #mozda greska:
-               'a ;|v,|x ;|w',
-               'a ;|v,|x.|p',
-               'a,|x ;|v',
-               'a,|x.|p ;|w',
-       ],
-         from => "{ a => 1, i => 1, w => 1, \"x\" => 1 }",
-           to => "{ a => 1, p => 1, v => 1, \"x\" => 1 }",
+               'a.|h',
+               'a.|h ;|w',
+               'a.|h ;|v',
+               'h,|i',
+               'h ;|v',
+               'i ;|w',
+               'v',    
+               'w',
 
-       marc_template => [
-               'a',
-               'a ;|v',
-               'a,|x',
-               'a.|n',         # mozda greska 
-               'a.|n,|p',
-               'a.|p',
-               'a ;|v.|n',
-               'a ;|v.|n,|p',  # mozda greska
-               'n,|p',
-               'n ;|v',        # mozda greska
-               'p ;|v',
-               'v',
-               'x',
-               'x ;|v'         # greska
        ],
 );