diploma work
authorMarijana Glavica <mglavica@ffzg.hr>
Tue, 11 Aug 2009 12:33:26 +0000 (12:33 +0000)
committerMarijana Glavica <mglavica@ffzg.hr>
Tue, 11 Aug 2009 12:33:26 +0000 (12:33 +0000)
git-svn-id: svn+ssh://mjesec/home/dpavlin/svn/webpac2/trunk@1270 07558da8-63fa-0310-ba24-9fe276d99e06

conf/isi/authors.sh [new file with mode: 0755]
conf/isi/dump-sqlite3.sh [new file with mode: 0644]
conf/isi/dump.sql [new file with mode: 0755]
conf/isi/isi.yml
conf/isi/parovi.sql [new file with mode: 0644]
conf/isi/schema.sql [new file with mode: 0644]
conf/isi/tables-cropsy.pl [new file with mode: 0644]
conf/isi/tables.pl [new file with mode: 0644]

diff --git a/conf/isi/authors.sh b/conf/isi/authors.sh
new file mode 100755 (executable)
index 0000000..3652ea0
--- /dev/null
@@ -0,0 +1,5 @@
+(
+
+sqlite3 -header -column -echo var/webpac2.sqlite 'select * from authors'
+
+2>&1 ) | vi -R -
diff --git a/conf/isi/dump-sqlite3.sh b/conf/isi/dump-sqlite3.sh
new file mode 100644 (file)
index 0000000..28bdfe0
--- /dev/null
@@ -0,0 +1 @@
+sqlite3 -column var/webpac2.sqlite 'select * from cited' | vi -R -
diff --git a/conf/isi/dump.sql b/conf/isi/dump.sql
new file mode 100755 (executable)
index 0000000..44bcbab
--- /dev/null
@@ -0,0 +1 @@
+select cited_au,cited_full,count(*) from cited where cited_au = 'SVERKO B' group by cited_full, cited_au order by cited_full
index 7998002..61e07db 100644 (file)
@@ -139,30 +139,33 @@ databases:
         # - path: 'conf/normalize/isi/isi2xls.pl'
         # - path: 'conf/isi/isi2sorted.pl'
           - path: 'conf/isi/tables.pl'
-#      - name: buskov
-#        type: isi
-#        path: '/data/FF/citati/citing/buskov.txt'
-#        encoding: 'ISO-8859-1'
-#        normalize:
-#        # - path: 'conf/normalize/isi/isi2xls.pl'
-#        # - path: 'conf/isi/isi2sorted.pl'
-#          - path: 'conf/isi/tables.pl'
-#      - name: butkovica.txt
-#        type: isi
-#        path: '/data/FF/citati/citing/butkovica.txt'
-#        encoding: 'ISO-8859-1'
-#        normalize:
-#        # - path: 'conf/normalize/isi/isi2xls.pl'
-#        # - path: 'conf/isi/isi2sorted.pl'
-#          - path: 'conf/isi/tables.pl'
-#      - name: corkalobiruskid.txt
-#        type: isi
-#        path: '/data/FF/citati/citing/corkalobiruskid.txt'
-#        encoding: 'ISO-8859-1'
-#        normalize:
-#        # - path: 'conf/normalize/isi/isi2xls.pl'
-#        # - path: 'conf/isi/isi2sorted.pl'
-#          - path: 'conf/isi/tables.pl'
+      - name: buskov
+        type: isi
+        path: '/data/FF/citati/citing/buskov.txt'
+        encoding: 'ISO-8859-1'
+        normalize:
+        # - path: 'conf/normalize/isi/isi2xls.pl'
+        # - path: 'conf/isi/isi2sorted.pl'
+          - path: 'conf/isi/tables.pl'
+        skip: 1
+      - name: butkovica.txt
+        type: isi
+        path: '/data/FF/citati/citing/butkovica.txt'
+        encoding: 'ISO-8859-1'
+        normalize:
+        # - path: 'conf/normalize/isi/isi2xls.pl'
+        # - path: 'conf/isi/isi2sorted.pl'
+          - path: 'conf/isi/tables.pl'
+        skip: 1
+      - name: corkalobiruskid.txt
+        type: isi
+        path: '/data/FF/citati/citing/corkalobiruskid.txt'
+        encoding: 'ISO-8859-1'
+        normalize:
+        # - path: 'conf/normalize/isi/isi2xls.pl'
+        # - path: 'conf/isi/isi2sorted.pl'
+          - path: 'conf/isi/tables.pl'
+        skip: 1
       - name: hrend
         type: isi
         path: '/data/FF/citati/citing/hrend.txt'
@@ -171,14 +174,15 @@ databases:
         # - path: 'conf/normalize/isi/isi2xls.pl'
         # - path: 'conf/isi/isi2sorted.pl'
           - path: 'conf/isi/tables.pl'
-#      - name: hromatkoi
-#        type: isi
-#        path: '/data/FF/citati/citing/hromatkoi.txt'
-#        encoding: 'ISO-8859-1'
-#        normalize:
-#        # - path: 'conf/normalize/isi/isi2xls.pl'
-#        # - path: 'conf/isi/isi2sorted.pl'
-#          - path: 'conf/isi/tables.pl'
+      - name: hromatkoi
+        type: isi
+        path: '/data/FF/citati/citing/hromatkoi.txt'
+        encoding: 'ISO-8859-1'
+        normalize:
+        # - path: 'conf/normalize/isi/isi2xls.pl'
+        # - path: 'conf/isi/isi2sorted.pl'
+          - path: 'conf/isi/tables.pl'
+        skip: 1
       - name: hudekknezevicj
         type: isi
         path: '/data/FF/citati/citing/hudekknezevicj.txt'
@@ -203,30 +207,33 @@ databases:
         # - path: 'conf/normalize/isi/isi2xls.pl'
         # - path: 'conf/isi/isi2sorted.pl'
           - path: 'conf/isi/tables.pl'
-#      - name: kerestesg
-#        type: isi
-#        path: '/data/FF/citati/citing/kerestesg.txt'
-#        encoding: 'ISO-8859-1'
-#        normalize:
-#        # - path: 'conf/normalize/isi/isi2xls.pl'
-#        # - path: 'conf/isi/isi2sorted.pl'
-#          - path: 'conf/isi/tables.pl'
-#      - name: knezovicz
-#        type: isi
-#        path: '/data/FF/citati/citing/knezovicz.txt'
-#        encoding: 'ISO-8859-1'
-#        normalize:
-#        # - path: 'conf/normalize/isi/isi2xls.pl'
-#        # - path: 'conf/isi/isi2sorted.pl'
-#          - path: 'conf/isi/tables.pl'
-#      - name: kolesaricv
-#        type: isi
-#        path: '/data/FF/citati/citing/kolesaricv.txt'
-#        encoding: 'ISO-8859-1'
-#        normalize:
-#        # - path: 'conf/normalize/isi/isi2xls.pl'
-#        # - path: 'conf/isi/isi2sorted.pl'
-#          - path: 'conf/isi/tables.pl'
+      - name: kerestesg
+        type: isi
+        path: '/data/FF/citati/citing/kerestesg.txt'
+        encoding: 'ISO-8859-1'
+        normalize:
+        # - path: 'conf/normalize/isi/isi2xls.pl'
+        # - path: 'conf/isi/isi2sorted.pl'
+          - path: 'conf/isi/tables.pl'
+        skip: 1
+      - name: knezovicz
+        type: isi
+        path: '/data/FF/citati/citing/knezovicz.txt'
+        encoding: 'ISO-8859-1'
+        normalize:
+        # - path: 'conf/normalize/isi/isi2xls.pl'
+        # - path: 'conf/isi/isi2sorted.pl'
+          - path: 'conf/isi/tables.pl'
+        skip: 1
+      - name: kolesaricv
+        type: isi
+        path: '/data/FF/citati/citing/kolesaricv.txt'
+        encoding: 'ISO-8859-1'
+        normalize:
+        # - path: 'conf/normalize/isi/isi2xls.pl'
+        # - path: 'conf/isi/isi2sorted.pl'
+          - path: 'conf/isi/tables.pl'
+        skip: 1
       - name: kuterovacjagodicg
         type: isi
         path: '/data/FF/citati/citing/kuterovacjagodicg.txt'
@@ -259,14 +266,15 @@ databases:
         # - path: 'conf/normalize/isi/isi2xls.pl'
         # - path: 'conf/isi/isi2sorted.pl'
           - path: 'conf/isi/tables.pl'
-#      - name: maslicsersicd
-#        type: isi
-#        path: '/data/FF/citati/citing/maslicsersicd.txt'
-#        encoding: 'ISO-8859-1'
-#        normalize:
-#        # - path: 'conf/normalize/isi/isi2xls.pl'
-#        # - path: 'conf/isi/isi2sorted.pl'
-#          - path: 'conf/isi/tables.pl'
+      - name: maslicsersicd
+        type: isi
+        path: '/data/FF/citati/citing/maslicsersicd.txt'
+        encoding: 'ISO-8859-1'
+        normalize:
+        # - path: 'conf/normalize/isi/isi2xls.pl'
+        # - path: 'conf/isi/isi2sorted.pl'
+          - path: 'conf/isi/tables.pl'
+        skip: 1
       - name: radosevicvidacekb
         type: isi
         path: '/data/FF/citati/citing/radosevicvidacekb.txt'
@@ -283,28 +291,23 @@ databases:
         # - path: 'conf/normalize/isi/isi2xls.pl'
         # - path: 'conf/isi/isi2sorted.pl'
           - path: 'conf/isi/tables.pl'
-#      - name: galicz
-#        type: isi
-#        path: '/data/FF/citati/citing/galicz.txt'
-#        encoding: 'ISO-8859-1'
-#        normalize:
-#        # - path: 'conf/normalize/isi/isi2xls.pl'
-#        # - path: 'conf/isi/isi2sorted.pl'
-#          - path: 'conf/isi/tables.pl'
-      - name: tadinacm
+      - name: galicz
         type: isi
-        path: '/data/FF/citati/citing/tadinacm.txt'
+        path: '/data/FF/citati/citing/galicz.txt'
         encoding: 'ISO-8859-1'
         normalize:
         # - path: 'conf/normalize/isi/isi2xls.pl'
         # - path: 'conf/isi/isi2sorted.pl'
           - path: 'conf/isi/tables.pl'
-      - name: cropsy
+        skip: 1
+      - name: tadinacm
         type: isi
-        path: '/data/FF/citati/croatia-psychol.txt'
-        encoding: 'ISO-8859-1'
+        path: '/data/FF/citati/citing/tadinacm.txt'
+        encoding: 'ISO-8859-1'
         normalize:
-          - path: 'conf/isi/croatia-psychol.pl'
+        # - path: 'conf/normalize/isi/isi2xls.pl'
+        # - path: 'conf/isi/isi2sorted.pl'
+          - path: 'conf/isi/tables.pl'
       - name: radovi 
         type: excel
         path: '/data/FF/citati/citirani_radovi.xls'
@@ -313,7 +316,13 @@ databases:
         normalize:
         # - path: 'conf/normalize/isi/isi2xls.pl'
         # - path: 'conf/isi/isi2sorted.pl'
-          - path: 'conf/isi/citirani_radovi.pl'
+          - path: 'conf/isi/citirani-radovi.pl'
+      - name: cropsy
+        type: isi
+        path: '/data/FF/citati/croatia-psychol.txt'
+        normalize:
+          - path: 'conf/isi/tables-cropsy.pl'
+        skip: 0
     output:
 #      - module: 'Excel'
 #        path: '/data/FF/citati/data.xls'
@@ -325,4 +334,3 @@ databases:
         dsn: 'dbi:Pg:dbname=dipl'
         schema: 'conf/isi/schema.sql'
 #        table: 'citirani'
-
diff --git a/conf/isi/parovi.sql b/conf/isi/parovi.sql
new file mode 100644 (file)
index 0000000..03e6f0c
--- /dev/null
@@ -0,0 +1,8 @@
+select distinct citirani_radovi.cr, 
+       cited.cr_full, 
+       cited.ut 
+from citirani_radovi 
+left join cited on (citirani_radovi.cr=cited.cr_full) 
+order by citirani_radovi.cr 
+;
+
diff --git a/conf/isi/schema.sql b/conf/isi/schema.sql
new file mode 100644 (file)
index 0000000..ac332d9
--- /dev/null
@@ -0,0 +1,133 @@
+drop view if exists parovi;
+drop view if exists rpcou;
+drop view if exists citingu;
+drop view if exists citiraniu;
+
+drop table if exists utca;
+create table utca (
+       id serial,
+       ut text,
+       ca text
+);
+
+drop table if exists cited;
+create table cited (
+       id serial,
+       ca text,
+       cr_auth text,
+       au text,
+       ut text,
+       cr_full text,
+       cr_year text,
+       cr_ref text, 
+       cr_doi text
+);
+
+drop table if exists authors;
+create table authors (
+       id serial,
+       ut text,
+       au text,
+       af text,
+       ca text
+);
+
+
+drop table if exists citing;
+create table citing (
+       id serial,
+       ut text,
+       pt text,
+       au text,
+       af text,
+       ti text,
+       so text,
+       la text,
+       dt text,
+       c1 text,
+       rp text,
+       nr integer,
+       tc integer,
+       pi text,
+       py integer,
+       di text,
+       sc text
+);
+
+drop table if exists rpco;
+create table rpco (
+       ut text,
+       rp text,
+       rpco text
+);
+
+
+drop table if exists citirani;
+create table citirani (
+       id serial,
+       ca text,
+       cr_auth text,
+       cr_ref text,
+       cr_year text,
+       cr_vol text,
+       cr_page text,
+       ttc integer,
+       cr text,
+       can text
+);
+
+create view citingu as select distinct ut,pt,au,so,la,dt,nr,tc,pi,py,di,sc,rp from citing ;
+
+create view rpcou as select distinct * from rpco ;
+
+create view parovi as select distinct citirani.ca,
+       citirani.cr,
+        cited.cr_full,
+        cited.ut,
+       citingu.pt,
+       citingu.au,
+       citingu.so,
+       citingu.la,
+       citingu.dt,
+       citingu.nr,
+       citingu.tc,
+       citingu.pi,
+       citingu.py,
+       citingu.sc,
+       citingu.rp
+from citirani
+left join cited on citirani.cr = cited.cr_full
+left join citingu on cited.ut = citingu.ut
+left join rpcou on cited.ut = rpcou.ut
+;
+
+create view citiraniu as select distinct cr_auth,cr_ref,cr_year,cr_vol,cr_page,ttc,cr from citirani ;
+
+
+drop table if exists cropsy;
+create table cropsy (
+       id serial,
+       ut text,
+       au text,
+       c1 text,
+       rp text,
+       tc integer,
+       py integer
+);
+
+
+-- CREATE AGGREGATE array_accum (anyelement)
+-- (
+--     sfunc = array_append,
+--     stype = anyarray,
+--     initcond = '{}'
+-- );
+
+-- select d, count(*),
+--     array_to_string(array_accum('+'::text),'') as graph
+--     from hits group by 1 order by 1 asc;
+
+
+-- create index cited_au on cited(au);
+-- create index cited_cited on cited(cited);
+
diff --git a/conf/isi/tables-cropsy.pl b/conf/isi/tables-cropsy.pl
new file mode 100644 (file)
index 0000000..36bf518
--- /dev/null
@@ -0,0 +1,14 @@
+
+my @c1 = rec_array ('C1');
+my @rp = rec_array ('RP');
+foreach my $au ( rec_array('AU') ) {
+       row( 'cropsy', 
+               ut => rec('UT'),
+               au => $au,
+               c1 => shift @c1,
+               rp => shift @rp,
+               tc => rec('TC'),
+               py => rec('PY')
+       )
+}
+
diff --git a/conf/isi/tables.pl b/conf/isi/tables.pl
new file mode 100644 (file)
index 0000000..579eda6
--- /dev/null
@@ -0,0 +1,69 @@
+# warn dump(rec('AU'));
+# warn dump(rec_array('AU'));
+
+#di => split( /;/, rec(
+
+row( 'citing',
+       ut => rec('UT'),
+       pt => rec('PT'),
+       au => regex('s/,//',
+               uc ( frec('AU') )
+       ),
+       ti => rec('TI'),
+       so => rec('SO'),
+       la => rec('LA'),
+       dt => rec('DT'),
+       c1 => frec('C1'),
+       rp => uc ( rec('RP') ),
+       nr => rec('NR'),
+       tc => rec('TC'),
+       pi => rec('PI'),
+       py => rec('PY'),
+       di => rec('DI'),
+       sc => rec('SC')
+);
+
+if ( rec('RP') ) {
+       row( 'rpco',
+               ut => rec('UT'),
+               rp => rec('RP'),
+               rpco =>         
+                       regex('s/.*,(.*)/$1/',
+                               regex('s/\.$//',
+                                       rec('RP')
+                               )
+                       ),
+       );
+}
+
+my @af = rec_array ('AF');
+foreach my $au ( rec_array ('AU') ) {
+       foreach my $cr ( rec_array('CR') ) {
+               row( 'cited', 
+                       ut => rec('UT'),
+                       ca => config('input name'),
+                       cr_auth => $cr->{author}, 
+                       au => regex('s/,//',
+                               uc ( $au )
+                       ),
+                       cr_full => $cr->{full},
+                       cr_year => $cr->{year},
+                       cr_ref => $cr->{reference}
+               );
+       }
+       row( 'authors',
+               ut => rec('UT'),
+               ca => config('input name'),
+               au => regex('s/,//',
+                       uc ( $au )
+               ),
+               af => shift @af,
+       )
+}
+
+row( 'utca',
+       ut => rec('UT'),
+       ca => config('input name')
+);
+
+