first version of rss sorting script for hrcak
[rss-sort] / hrcak.cgi
1 #!/usr/bin/perl
2 use warnings;
3 use strict;
4 use autodie;
5 use LWP::Simple;
6 use CGI;
7 use Data::Dump qw(dump);
8
9 my $q = CGI->new;
10 my $feed = $q->param('feed') || 'polemos';
11
12 print qq|Content-type: application/rss+xml\r\n\r\n|;
13
14 my $feed_file = $feed;
15 $feed_file =~ s/\W+/_/g;
16 $feed_file = "cache/$feed_file.rss";
17
18 if (mirror("https://hrcak.srce.hr/en/rss/$feed", $feed_file) == RC_NOT_MODIFIED) {
19         warn "Modified $feed into rss/$feed_file\n";
20 }
21
22 my $bottom;
23 my @parts;
24
25         open(my $fh, '<', $feed_file);
26         local $/ = undef;
27         my $rss = <$fh>;
28         my $top = $1 if $rss =~ s/^(.*?)<item>//is;
29         print $top;
30         $bottom = $1 if $rss =~ s/(<\/channel>.*)$//is;
31         @parts = split(/<item>/i, $rss);
32 }
33
34 my $month2nr;
35 {
36         my @months = qw(
37 January
38 February
39 March
40 April
41 May
42 June
43 July
44 August
45 September
46 October
47 November
48 December
49         );
50
51         my $m = 1;
52         foreach ( @months ) {
53                 $month2nr->{$_} = sprintf("%02d", $m++);
54         }
55 }
56
57
58 sub month_year {
59         my $item = shift;
60         if ( $item =~ m/Published: (\w+) (\d{4})\./ ) {
61                 return $2 . $month2nr->{$1};
62         } else {
63                 die "no Published: in [$item]";
64         }
65 }
66
67 my @sorted = sort { month_year($a) cmp month_year($b) } @parts;
68
69 foreach ( @sorted ) {
70         s/<title>.*Vol/<title>Vol/gs;
71         print "<item>", $_;
72 }
73
74 print $bottom;