first version of rss sorting script for hrcak master
authorDobrica Pavlinusic <dpavlin@rot13.org>
Wed, 20 Jun 2018 21:26:37 +0000 (23:26 +0200)
committerDobrica Pavlinusic <dpavlin@rot13.org>
Wed, 20 Jun 2018 21:26:37 +0000 (23:26 +0200)
hrcak.cgi [new file with mode: 0755]

diff --git a/hrcak.cgi b/hrcak.cgi
new file mode 100755 (executable)
index 0000000..840de1d
--- /dev/null
+++ b/hrcak.cgi
@@ -0,0 +1,74 @@
+#!/usr/bin/perl
+use warnings;
+use strict;
+use autodie;
+use LWP::Simple;
+use CGI;
+use Data::Dump qw(dump);
+
+my $q = CGI->new;
+my $feed = $q->param('feed') || 'polemos';
+
+print qq|Content-type: application/rss+xml\r\n\r\n|;
+
+my $feed_file = $feed;
+$feed_file =~ s/\W+/_/g;
+$feed_file = "cache/$feed_file.rss";
+
+if (mirror("https://hrcak.srce.hr/en/rss/$feed", $feed_file) == RC_NOT_MODIFIED) {
+       warn "Modified $feed into rss/$feed_file\n";
+}
+
+my $bottom;
+my @parts;
+{ 
+       open(my $fh, '<', $feed_file);
+       local $/ = undef;
+       my $rss = <$fh>;
+       my $top = $1 if $rss =~ s/^(.*?)<item>//is;
+       print $top;
+       $bottom = $1 if $rss =~ s/(<\/channel>.*)$//is;
+       @parts = split(/<item>/i, $rss);
+}
+
+my $month2nr;
+{
+       my @months = qw(
+January
+February
+March
+April
+May
+June
+July
+August
+September
+October
+November
+December
+       );
+
+       my $m = 1;
+       foreach ( @months ) {
+               $month2nr->{$_} = sprintf("%02d", $m++);
+       }
+}
+
+
+sub month_year {
+       my $item = shift;
+       if ( $item =~ m/Published: (\w+) (\d{4})\./ ) {
+               return $2 . $month2nr->{$1};
+       } else {
+               die "no Published: in [$item]";
+       }
+}
+
+my @sorted = sort { month_year($a) cmp month_year($b) } @parts;
+
+foreach ( @sorted ) {
+       s/<title>.*Vol/<title>Vol/gs;
+       print "<item>", $_;
+}
+
+print $bottom;