normalize spaces
[koha-bibliografija] / html.pl
diff --git a/html.pl b/html.pl
index 26d1433..3d9cfce 100755 (executable)
--- a/html.pl
+++ b/html.pl
@@ -13,12 +13,28 @@ use Text::Unaccent;
 use Carp qw(confess);
 use utf8;
 use JSON;
+use POSIX qw(strftime);
+use Storable;
 
 use lib '/srv/koha_ffzg';
 use C4::Context;
 use XML::LibXML;
 use XML::LibXSLT;
 
+my $pid_file = '/dev/shm/bibliografija.pid';
+{
+       if ( -e $pid_file ) {
+               open(my $fh, '<', $pid_file);
+               my $pid = <$fh>;
+               no autodie; # it will die on kill
+               kill 0, $pid || die "$0 allready running as pid $pid";
+       }
+       open(my $fh, '>', $pid_file);
+       print $fh $$;
+       close($fh);
+}
+
+
 my $dbh = C4::Context->dbh;
 
 sub debug {
@@ -98,10 +114,11 @@ while( my $row = $sth_auth->fetchrow_hashref ) {
 debug 'department_in_group' => $department_in_group;
 
 foreach my $department ( keys %$department_in_sum ) {
-       $department_in_sum->{$department} = 0 unless $department =~ m/(centar|croaticum|katedra|odsjek)/i;
+#      $department_in_sum->{$department} = 0 unless $department =~ m/(centar|croaticum|katedra|odsjek)/i;
 }
 
 debug 'auth_department' => $auth_department;
+store $auth_department, '/dev/shm/auth_department.storable';
 debug 'auth_group' => $auth_group;
 debug 'department_in_sum' => $department_in_sum;
 
@@ -129,6 +146,7 @@ where
 =cut
 
 my $biblio_year;
+my $biblio_full_name;
 my $type_stats;
 
 my $parser = XML::LibXML->new();
@@ -199,6 +217,7 @@ while( my $row = $sth_select_authors->fetchrow_hashref ) {
        my $extract = {
                '008' => undef,
                '100' => '(9|a)',
+               '245' => 'a',
                '680' => 'i',
                '700' => '(9|4|a)',
                '942' => '(t|r|v)'
@@ -278,10 +297,13 @@ while( my $row = $sth_select_authors->fetchrow_hashref ) {
                        foreach my $authid ( @first_author ) {
                                push @{ $authors->{$authid}->{aut}->{ $category } }, $row->{biblionumber};
                        }
+                       $biblio_full_name->{ $row->{biblionumber} } = $data->{100}->[0]->{a};
        } else {
                $have_100 = 0;
        }
 
+       $biblio_full_name->{ $row->{biblionumber} } ||= $data->{245}->[0]->{a};
+
        my $have_edt;
 
        if ( exists $data->{700} ) {
@@ -303,23 +325,33 @@ while( my $row = $sth_select_authors->fetchrow_hashref ) {
 
                                $type_stats->{$type}++;
 
-                               if ( $type =~ m/(edt|trl|com|ctb)/ ) {
-                                       push @{ $authors->{$authid}->{sec}->{ $category } }, $row->{biblionumber};
-                                       push @{ $authors->{$authid}->{$1}->{ $category } }, $row->{biblionumber};
-                               } elsif ( $type =~ m/aut/ ) {
-                                       if ( ! $have_100 ) {
-                                               $have_edt = grep { exists $_->{4} && $_->{4} =~ m/edt/ } @{ $data->{700} } if ! defined $have_edt;
-                                               if ( $have_edt ) {
-                                                       $skip->{ have_700_edt }->{ $row->{biblionumber} }++;
+                               my @types = split(/[\s\/]+/, $type);
+
+                               foreach my $type ( @types ) {
+                                       my $type = substr($type,0,3);
+                                       $type_stats->{_count_each_type}->{$type}++;
+
+                                       if ( $type =~ m/(edt|trl|com|ctb)/ ) {
+                                               push @{ $authors->{$authid}->{__sec}->{ $category } }, $row->{biblionumber};
+                                               push @{ $authors->{$authid}->{$type}->{ $category } }, $row->{biblionumber};
+                                               $type =~ s/(com|ctb)/_ostalo/;
+                                               push @{ $authors->{$authid}->{$type}->{ $category } }, $row->{biblionumber};
+
+                                       } elsif ( $type =~ m/aut/ ) {
+                                               if ( ! $have_100 ) {
+                                                       $have_edt = grep { exists $_->{4} && $_->{4} =~ m/edt/ } @{ $data->{700} } if ! defined $have_edt;
+                                                       if ( $have_edt ) {
+                                                               $skip->{ have_700_edt }->{ $row->{biblionumber} }++;
+                                                       } else {
+                                                               push @{ $authors->{$authid}->{aut}->{ $category } }, $row->{biblionumber};
+                                                       }
                                                } else {
                                                        push @{ $authors->{$authid}->{aut}->{ $category } }, $row->{biblionumber};
                                                }
                                        } else {
-                                               push @{ $authors->{$authid}->{aut}->{ $category } }, $row->{biblionumber};
+#                                              warn "# SKIP ", $row->{biblionumber}, ' no 700$4 in ', dump($data);
+                                               $skip->{ 'no_700$4' }->{ $row->{biblionumber} }++;
                                        }
-                               } else {
-#                                      warn "# SKIP ", $row->{biblionumber}, ' no 700$4 in ', dump($data);
-                                       $skip->{ 'no_700$4' }->{ $row->{biblionumber} }++;
                                }
                        }
                        delete $data->{700};
@@ -330,9 +362,11 @@ while( my $row = $sth_select_authors->fetchrow_hashref ) {
 }
 
 debug 'authors' => $authors;
+store $authors, '/dev/shm/authors.storable';
 debug 'type_stats' => $type_stats;
 debug 'skip' => $skip;
 debug 'biblio_year' => $biblio_year;
+debug 'biblio_full_name' => $biblio_full_name;
 debug 'biblio_data' => $biblio_data;
 debug 'biblio_author_external' => $biblio_author_external;
 
@@ -354,13 +388,17 @@ sub html_title {
 <title>|, join(" ", @_), qq|</title>
 <link href="style.css" type="text/css" rel="stylesheet" />
 <script src="//code.jquery.com/jquery-1.11.2.js"></script>
+<script src="filters.js"></script>
 </head>
 <body>
 |;
 }
 
 sub html_end {
-       return qq|</body>\n</html>\n|;
+       return
+               qq|<small style="color:gray">Zadnji puta osvježeno: |,
+               strftime("%Y-%m-%d %H:%M:%S\n", localtime()),
+               qq|</body>\n</html>\n|;
 }
 
 mkdir 'html' unless -d 'html';
@@ -381,62 +419,93 @@ sub li_biblio {
                qq|</li>\n|;
 }
 
+sub unique {
+       my $unique;
+       $unique->{$_}++ foreach @_;
+       return keys %$unique;
+}
+
+sub unique_biblionumber {
+       my @v = unique @_;
+       return sort {
+               $biblio_year->{$b} <=> $biblio_year->{$a} ||
+               $biblio_full_name->{$a} cmp $biblio_full_name->{$b} ||
+               $a <=> $b
+       } @v;
+}
+
 sub author_html {
        my ( $fh, $authid, $type, $label ) = @_;
 
        return unless exists $authors->{$authid}->{$type};
 
-       print $fh qq|<h2>$label</h2>\n|;
+       print $fh qq|<a name="$type"><h2>$label</h2></a>\n|;
 
        foreach my $category ( sort keys %{ $authors->{$authid}->{$type} } ) {
                my $label = $category_label->{$category} || 'Bez kategorije';
-               print $fh qq|<a name="$type-$category"><h3>$label</h3></a>\n<ul>\n|;
-               foreach my $biblionumber ( sort {
-                               $biblio_year->{$b} <=> $biblio_year->{$a} || $a <=> $b
-                       } @{ $authors->{$authid}->{$type}->{$category} } ) {
+               print $fh qq|<a name="$type-$category"><h3>$label</h3></a>\n<ol>\n|;
+               foreach my $biblionumber ( unique_biblionumber @{ $authors->{$authid}->{$type}->{$category} } ) {
                        print $fh li_biblio( $biblionumber );
                }
-               print $fh qq|</ul>\n|;
+               print $fh qq|</ol>\n|;
        }
 }
 
+my @toc_type_label = (
+'aut' => 'Primarno autorstvo',
+'edt' => 'Uredništva',
+'trl' => 'Prijevodi',
+'_ostalo' => 'Ostalo',
+);
+
+
 sub count_author_years {
+       my $years = shift;
        my ($authid) = @_;
-       my $years;
        foreach my $type ( keys %{ $authors->{$authid} } ) {
+#              next if $type =~ m/^_/; # FIXME
                foreach my $category ( keys %{ $authors->{$authid}->{$type} } ) {
-                       foreach my $biblionumber ( @{ $authors->{$authid}->{$type}->{$category} } ) {
-                               $years->{ $biblio_year->{ $biblionumber } }->{ $type . '-' . $category }++;
+                       foreach my $biblionumber ( unique_biblionumber @{ $authors->{$authid}->{$type}->{$category} } ) {
+                               $years->{ $biblio_year->{ $biblionumber } }->{ $type . '-' . $category }->{ $biblionumber }++;
                        }
                }
        }
        return $years;
 }
 
-foreach my $row ( sort { $a->{full_name} cmp $b->{full_name} } @authors ) {
+sub html_year_selection {
+       my $fh = shift;
+       my @authids = unique @_;
 
-       my $first = substr( $row->{full_name}, 0, 1 );
-       if ( $first ne $first_letter ) {
-               print $index qq{</ul>\n} if $first_letter;
-               $first_letter = $first;
-               print $index qq{<h1>$first</h1>\n<ul>\n};
+       debug 'html_year_selection authids=', [ @authids ];
+
+       print $fh qq|<span id="years">Godine:\n|;
+       my $type_cat_count = {};
+       my $years;
+
+       foreach my $authid ( @authids ) {
+               $years = count_author_years( $years, $authid );
        }
-       print $index qq{<li><a href="}, $row->{authid}, qq{.html">}, $row->{full_name}, "</a></li>\n";
 
-       my $path = "html/$row->{authid}";
-       open(my $fh, '>:encoding(utf-8)', "$path.new");
-       print $fh html_title($row->{full_name}, "bibliografija");
-       print $fh qq|<h1>$row->{full_name} - bibliografija</h1>|;
+       debug 'years' => $years;
 
-       my $years = count_author_years( $row->{authid} );
-       print $fh qq|<span id="years">Godine:|;
-       my $type_cat_count = {};
        foreach my $year ( sort { $b <=> $a } keys %$years ) {
-               print $fh qq|<label><input type=checkbox onClick="toggle_year($year, this)" checked>$year</label>&nbsp;\n|;
+               print $fh qq|<label><input name="year_selection" value="$year" type=checkbox onClick="toggle_year($year, this)" checked="checked">$year</label>&nbsp;\n|;
                foreach my $type_cat ( keys %{ $years->{$year} } ) {
-                       $type_cat_count->{ $type_cat } += $years->{$year}->{$type_cat};
+                       my $count = scalar keys %{ $years->{$year}->{$type_cat} };
+                       $years->{$year}->{$type_cat} = $count; # remove biblionumbers and use count
+                       $type_cat_count->{ $type_cat } += $count;
+                       my ($type,$cat) = split(/-/, $type_cat);
+                       $type_cat_count->{_toc}->{$type}->{$cat}++;
+                       $type_cat_count->{_toc_count}->{$type} += $count;
                }
        }
+
+       print $fh qq|
+<input type=button value="all" onClick="all_years(1)">
+<input type=button value="none" onClick="all_years(0)">
+       |;
+
        print $fh qq|</span>|;
 
        print $fh q|
@@ -446,32 +515,57 @@ var years = |, encode_json($years), q|;
 
 var type_cat_count = |, encode_json($type_cat_count), q|;
 
-function toggle_year(year, el) {
-       if ( el.checked ) {
-               $('.y'+year).show();
-               console.debug('show', year, el.checked);
-               for(var type_cat in years[year]) {
-                       if ( ( type_cat_count[ type_cat ] += years[year][type_cat] ) == years[year][type_cat]) {
-                               $('a[name="'+type_cat+'"]').show();
-                               console.debug(type_cat, 'show');
-                       }
-               }
-       } else {
-               $('.y'+year).hide();
-               console.debug('hide', year, el.checked);
-               for(var type_cat in years[year]) {
-                       if ( ( type_cat_count[ type_cat ] -= years[year][type_cat] ) == 0 ) {
-                               $('a[name="'+type_cat+'"]').hide();
-                               console.debug(type_cat, 'hide');
-                       }
+</script>
+
+       |;
+
+       debug 'type_cat_count' => $type_cat_count;
+
+       # TOC
+       print $fh qq|<ul id="toc">\n|;
+       my $i = 0;
+       while ( $i < $#toc_type_label ) {
+               my $type  = $toc_type_label[$i++] || die "type";
+               my $label = $toc_type_label[$i++] || die "label";
+               next unless exists $type_cat_count->{_toc}->{$type};
+               print $fh qq| <li class="toc" id="toc-$type"><a href="#$type">$label</a> <tt id="toc-count-$type">$type_cat_count->{_toc_count}->{$type}</tt></li>\n <ul>\n|;
+               foreach my $category ( sort keys %{ $type_cat_count->{_toc}->{$type} } ) {
+                       my $label = $category_label->{$category} || 'Bez kategorije';
+                       my $count = $type_cat_count->{ $type . '-' . $category };
+                       my $cat_html = $category;
+                       $cat_html =~ s/\./-/g;
+                       print $fh qq|  <li class="toc" id="toc-$category"><a href="#$type-$category">$label</a> <tt id="toc-count-$type-$cat_html">$count</tt></li>\n|;
                }
+               print $fh qq| </ul>\n|;
        }
+       print $fh qq|</ul>\n|;
+
 }
-</script>
-       |;
 
-       author_html( $fh, $row->{authid}, 'aut' => 'Primarno autorstvo' );
-       author_html( $fh, $row->{authid}, 'sec' => 'Uredništva, prijevodi, krička izdanja' );
+
+foreach my $row ( sort { $a->{full_name} cmp $b->{full_name} } @authors ) {
+
+       my $first = substr( $row->{full_name}, 0, 1 );
+       if ( $first ne $first_letter ) {
+               print $index qq{</ul>\n} if $first_letter;
+               $first_letter = $first;
+               print $index qq{<h1>$first</h1>\n<ul>\n};
+       }
+       print $index qq{<li><a href="}, $row->{authid}, qq{.html">}, $row->{full_name}, "</a></li>\n";
+
+       my $path = "html/$row->{authid}";
+       open(my $fh, '>:encoding(utf-8)', "$path.new");
+       print $fh html_title($row->{full_name}, "bibliografija");
+       print $fh qq|<h1>$row->{full_name} - bibliografija</h1>\n|;
+
+       html_year_selection $fh => $row->{authid};
+
+       my $i = 0;
+       while ( $i < $#toc_type_label ) {
+               my $type  = $toc_type_label[$i++] || die "type";
+               my $label = $toc_type_label[$i++] || die "label";
+               author_html( $fh, $row->{authid}, $type => $label );
+       }
 
        print $fh html_end;
        close($fh);
@@ -490,7 +584,7 @@ my $department_category_author;
 foreach my $department ( sort keys %$auth_department ) {
        foreach my $authid ( sort @{ $auth_department->{$department} } ) {
                my   @categories = keys %{ $authors->{$authid}->{aut} };
-               push @categories,  keys %{ $authors->{$authid}->{sec} };
+               push @categories,  keys %{ $authors->{$authid}->{__sec} };
                foreach my $category ( sort @categories ) {
                        push @{ $department_category_author->{$department}->{$category} }, $authid;
                        push @{ $department_category_author->{'AAA_ukupno'}->{$category} }, $authid if $department_in_sum->{$department};
@@ -505,71 +599,92 @@ foreach my $department ( sort keys %$auth_department ) {
 
 debug 'department_category_author' => $department_category_author;
 
-mkdir 'html/departments' unless -d 'html/departments';
-
-sub unique_biblionumber {
-       my @v = @_;
-       my $u;
-       $u->{$_}++ foreach @v;
-       return sort { $biblio_year->{$b} <=> $biblio_year->{$a} || $a <=> $b } keys %$u;
-}
-
-open(my $dep_fh, '>:encoding(utf-8)', 'html/departments/index.new');
-print $dep_fh html_title('Odsijeci Filozofskog fakulteta u Zagrebu'), qq|<ul>\n|;
-foreach my $department ( sort keys %$department_category_author ) {
-       my $dep = $department || 'Nema odsjeka';
-       my $dep_file = unac_string('utf-8',$dep);
-       print $dep_fh qq|<li><a href="$dep_file.html">$dep</a></li>\n|;
-       open(my $fh, '>:encoding(utf-8)', "html/departments/$dep_file.new");
 
-       print $fh html_title($department . ' bibliografija');
-       print $fh qq|<h1>$department bibliografija</h1>\n|;
+sub department_html {
+       my ( $fh, $department, $type, $label, $csv_fh ) = @_;
 
-       print $fh qq|<h2>Primarno autorstvo</h2>\n|;
+       print $fh qq|<a name="$type"><h2>$label</h2></a>\n|;
 
        foreach my $category ( sort keys %{ $department_category_author->{$department} } ) {
 
                my @authids = @{ $department_category_author->{$department}->{$category} };
                next unless @authids;
 
-               my @biblionumber = unique_biblionumber map { @{ $authors->{$_}->{aut}->{$category} } } grep { exists $authors->{$_}->{aut}->{$category} } @authids;
-               my $unique;
-               $unique->{$_}++ foreach @biblionumber;
+               my @biblionumber = unique_biblionumber map { @{ $authors->{$_}->{$type}->{$category} } } grep { exists $authors->{$_}->{$type}->{$category} } @authids;
 
                next unless @biblionumber;
 
-               my $label = $category_label->{$category} || 'Bez kategorije';
-               print $fh qq|<h3>$label</h3>\n<ul>\n|;
+               my $cat_label = $category_label->{$category} || 'Bez kategorije';
+               print $fh qq|<a name="$type-$category"><h3>$cat_label</h3></a>\n<ol>\n|;
 
-               print $fh li_biblio( $_ ) foreach @biblionumber;
+               foreach my $bib_num ( @biblionumber ) {
+                       my @li = li_biblio( $bib_num );
+                       my $li_html = join('', @li);
+                       $li_html =~ s{<a name="col-\d+"></a>}{}gs;
+                       print $fh $li_html;
 
-               print $fh qq|</ul>|;
-       }
+                       next unless $csv_fh;
 
+                       my $year = $li[1];
+                       my @html;
+                       foreach ( split(/<a name="col-/, $li[4]) ) {
+                               if ( s{(\d+)"></a>}{} ) {
+                                       s{\s+}{ }gs;
+                                       $html[$1] = $_;
+                               } else {
+                                       warn "SKIPPED: Can't find col in [$_] from $li[4]" unless m/^<[^>]+>$/;
+                               }
+                       }
+                       my $html = join("\t", @html);
 
-       print $fh qq|<h2>Sekundarno autorstvo</h2>\n|;
+                       $html =~ s{</?[^>]*>}{}gs;
+                       $html =~ s{\s+$}{}gs;
+                       print $csv_fh "$bib_num\t$year\t$type\t$label\t$category\t$cat_label\t$html\n";
+               }
 
-       foreach my $category ( sort keys %{ $department_category_author->{$department} } ) {
+               print $fh qq|</ol>|;
+       }
 
-               my @authids = @{ $department_category_author->{$department}->{$category} };
-               next unless @authids;
+}
 
-               my @biblionumber = unique_biblionumber map { @{ $authors->{$_}->{sec}->{$category} } } grep { exists $authors->{$_}->{sec}->{$category} } @authids;
 
-               next unless @biblionumber;
+mkdir 'html/departments' unless -d 'html/departments';
 
-               my $label = $category_label->{$category} || 'Bez kategorije';
-               print $fh qq|<h3>$label</h3>\n<ul>\n|;
+open(my $dep_fh, '>:encoding(utf-8)', 'html/departments/index.new');
+print $dep_fh html_title('Odsjeci Filozofskog fakulteta u Zagrebu'), qq|<ul>\n|;
+foreach my $department ( sort keys %$department_category_author ) {
+       my $dep = $department || 'Nema odsjeka';
+       my $dep_file = unac_string('utf-8',$dep);
+       print $dep_fh qq|<li><a href="$dep_file.html">$dep</a></li>\n|;
+       open(my $fh, '>:encoding(utf-8)', "html/departments/$dep_file.new");
+
+       print $fh html_title($department . ' bibliografija');
+       print $fh qq|<h1>$department bibliografija</h1>\n|;
+
+       my @authids;
+       foreach my $category ( sort keys %{ $department_category_author->{$department} } ) {
+               push @authids, @{ $department_category_author->{$department}->{$category} };
+       }
+       html_year_selection $fh => @authids;
 
-               print $fh li_biblio( $_ ) foreach @biblionumber;
+       my $csv_fh;
+       if ( $department eq 'AAA_ukupno' ) {
+               open($csv_fh, '>:encoding(utf-8)', "html/departments/$department.csv");
+       }
 
-               print $fh qq|</ul>|;
+       my $i = 0;
+       while ( $i < $#toc_type_label ) {
+               my $type  = $toc_type_label[$i++] || die "type";
+               my $label = $toc_type_label[$i++] || die "label";
+               department_html( $fh, $department, $type, $label, $csv_fh );
        }
 
+       close($csv_fh) if $csv_fh;
 
        print $fh html_end;
        close($fh);
        rename "html/departments/$dep_file.new", "html/departments/$dep_file.html";
+
 }
 print $dep_fh qq|</ul>\n|, html_end;
 close($dep_fh);
@@ -644,12 +759,9 @@ sub table_count {
        my $label = shift @_;
        my $department = shift @_;
        my $group = shift @_;
-       my @biblionumbers = @_;
-       my $unique;
-       $unique->{$_}++ foreach @biblionumbers;
-       my @bibs = keys %$unique;
-       $table->{ffzg}->{$group}->[ $label2row->{ $label } ]->[ $department2col->{$department} ] = scalar @bibs;
-       $table->{external}->{$group}->[ $label2row->{ $label } ]->[ $department2col->{$department} ] = scalar grep { $biblio_author_external->{$_} } @bibs;
+       my @biblionumbers = unique @_;
+       $table->{ffzg}->{$group}->[ $label2row->{ $label } ]->[ $department2col->{$department} ] = scalar @biblionumbers;
+       $table->{external}->{$group}->[ $label2row->{ $label } ]->[ $department2col->{$department} ] = scalar grep { $biblio_author_external->{$_} } @biblionumbers;
 }
 
 foreach my $group ( '', keys %$azvo_group_title ) {
@@ -741,3 +853,5 @@ close($fh2);
 rename 'html/azvo.new', 'html/azvo.html';
 rename 'html/azvo2.new', 'html/azvo2.html';
 
+unlink $pid_file;
+