added header_first to WebPAC::Input::CSV
[webpac2] / conf / llin.yml
index 3705e36..b5bfeea 100644 (file)
@@ -233,20 +233,6 @@ databases:
         tag: 'isis'
         path: 'conf/normalize/common.pl'
   
-  # excel
-  excel:
-    name: 'Excel'
-  
-    input:
-      name: doaj
-      type: excel
-      path: '/data/isis_data/doaj2csv.xls'
-      #encoding: 'cp852'
-      #limit: 100
-      normalize:
-        #tag: 'isis'
-        path: 'conf/normalize/excel.pl'
-
   ffsfb:
     name: 'Slavenska filologija (B), Filozofski fakulteti u Zagrebu'
     input:
@@ -325,11 +311,120 @@ databases:
         path: 'conf/normalize/common.pl'
 
   sand:
-    name: 'Project Gutenberg archive'
+    name: 'SAND'
     input:
       name: casopisi
       type: dbf
       path: 't/data/cas2000.dbf'
       normalize:
         path: 'conf/normalize/common.pl'
-      mapping_path: 'conf/input/dbf/cas2000.yml'
+#      mapping_path: 'conf/input/dbf/cas2000.yml'
+
+  exhibit:
+    name: 'SMILE Exhibit based output for WebPAC'
+    input:
+      name: 'FFZG - Psihologija'
+      type: marc
+      path: 'out/marc/ffsf-peri.marc'
+      encoding: 'cp852'
+      normalize:
+        path: 'conf/normalize/json.pl'
+    output:
+      module: 'JSON'
+      path: 'out/exhibit/ps.js'
+
+  webpacus:
+    name: 'Webpacus'
+    input:
+      name: 'foobar'
+      type: marc
+#      path: 'out/marc/ffsf-peri.marc'
+#      path: 'out/marc/ffkk-libri.marc'
+      path: 'out/marc/ffiz-libri.marc'
+      encoding: 'cp852'
+      normalize:
+        path: 'conf/normalize/webpacus.pl'
+#        path: 'conf/normalize/webpacus-unimarc.pl'
+    output:
+      - module: 'KinoSearch'
+        path: 'var/kinosearch'
+      - module: 'Sorted'
+        path: 'var/sorted'
+      - module: 'Webpacus'
+        path: '/data/Webpacus2'
+
+  hidra:
+    name: 'HIDRA'
+    input:
+      - name: bib
+        module: 'WebPAC::Input::ISIS'
+        path: '/data/hidra/BIB'
+        encoding: 'cp852'
+        #modify_file: 'conf/modify/common.pl'
+        normalize:
+          path: 'conf/normalize/webpacus-hidra.pl' 
+      - name: dir
+        module: 'WebPAC::Input::ISIS'
+        path: '/data/hidra/DIR'
+        encoding: 'cp852'
+        normalize:
+          path: 'conf/normalize/webpacus-hidra.pl' 
+    output:
+      - module: 'KinoSearch'
+        path: 'var/kinosearch'
+      - module: 'Sorted'
+        path: 'var/sorted'
+      - module: 'Webpacus'
+        path: '/data/Webpacus2'
+
+  perlmonks:
+    name: 'PerlMonks'
+    input:
+      - name: xml
+        module: 'WebPAC::Input::XML'
+        path: '/home/dpavlin/monk-search/xml-dump'
+        mungle: 'conf/mungle/perlmonks-xml.pl'
+        normalize:
+          path: 'conf/normalize/perlmonks.pl'
+    output:
+      - module: 'KinoSearch'
+        path: 'var/kinosearch'
+      - module: 'Sorted'
+        path: 'var/sorted'
+      - module: 'Webpacus'
+        path: '/data/Webpacus2'
+
+  ufo:
+    name: 'UFO Reports 1998-2006 in the UK'
+    input:
+      - name: pdf
+        module: 'WebPAC::Input::PDF'
+        path: '/home/dpavlin/x/ufo/UFOReport1998.pdf'
+#          - '/home/dpavlin/x/ufo/UFOReport1998.pdf'
+#          - '/home/dpavlin/x/ufo/UFOReport1999.pdf'
+#          - '/home/dpavlin/x/ufo/UFOReport2000.pdf'
+#          - '/home/dpavlin/x/ufo/UFOReport2001.pdf'
+#          - '/home/dpavlin/x/ufo/UFOReports2002WholeoftheUK.pdf'
+#          - '/home/dpavlin/x/ufo/UFOReports2003WholeoftheUK.pdf'
+#          - '/home/dpavlin/x/ufo/UFOReports2004WholeoftheUK.pdf'
+#          - '/home/dpavlin/x/ufo/UFOReports2005WholeoftheUK.pdf'
+#          - '/home/dpavlin/x/ufo/UFOReports2006WholeoftheUK.pdf'
+        normalize:
+          path: 'conf/normalize/ufo.pl'
+    output:
+      - module: 'Jifty'
+        path: '/data/Webpacus2'
+        model: 'Webpacus::Model::UFO'
+
+  csv:
+    name: 'CSV Excel export'
+    input:
+      - name: peri
+        module: 'WebPAC::Input::ISIS'
+        path: '/data/isis_data/ffps-peri/PERI'
+        encoding: 'cp852'
+        normalize:
+          path: 'conf/normalize/csv_ff-peri.pl'
+    output:
+      - module: 'Excel'
+        path: 'out/peri.xls'