From 42720df9d459e4ba06d8c1a7422135c4972775b6 Mon Sep 17 00:00:00 2001 From: Dobrica Pavlinusic Date: Wed, 20 Jun 2018 23:26:37 +0200 Subject: [PATCH 1/1] first version of rss sorting script for hrcak --- hrcak.cgi | 74 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 74 insertions(+) create mode 100755 hrcak.cgi diff --git a/hrcak.cgi b/hrcak.cgi new file mode 100755 index 0000000..840de1d --- /dev/null +++ b/hrcak.cgi @@ -0,0 +1,74 @@ +#!/usr/bin/perl +use warnings; +use strict; +use autodie; +use LWP::Simple; +use CGI; +use Data::Dump qw(dump); + +my $q = CGI->new; +my $feed = $q->param('feed') || 'polemos'; + +print qq|Content-type: application/rss+xml\r\n\r\n|; + +my $feed_file = $feed; +$feed_file =~ s/\W+/_/g; +$feed_file = "cache/$feed_file.rss"; + +if (mirror("https://hrcak.srce.hr/en/rss/$feed", $feed_file) == RC_NOT_MODIFIED) { + warn "Modified $feed into rss/$feed_file\n"; +} + +my $bottom; +my @parts; +{ + open(my $fh, '<', $feed_file); + local $/ = undef; + my $rss = <$fh>; + my $top = $1 if $rss =~ s/^(.*?)//is; + print $top; + $bottom = $1 if $rss =~ s/(<\/channel>.*)$//is; + @parts = split(//i, $rss); +} + +my $month2nr; +{ + my @months = qw( +January +February +March +April +May +June +July +August +September +October +November +December + ); + + my $m = 1; + foreach ( @months ) { + $month2nr->{$_} = sprintf("%02d", $m++); + } +} + + +sub month_year { + my $item = shift; + if ( $item =~ m/Published: (\w+) (\d{4})\./ ) { + return $2 . $month2nr->{$1}; + } else { + die "no Published: in [$item]"; + } +} + +my @sorted = sort { month_year($a) cmp month_year($b) } @parts; + +foreach ( @sorted ) { + s/.*Vol/<title>Vol/gs; + print "<item>", $_; +} + +print $bottom; -- 2.20.1