skip invalid urls and crawl collection items

[ILL-Zotero-RT] / zotero.pl
diff --git a/zotero.pl b/zotero.pl

index bc240f9..e3b9051 100755 (executable)
--- a/zotero.pl
+++ b/zotero.pl
@@ -44,13 +44,16 @@ $url .= '&key=' . $key;
  my $file = $UserID . '.' . md5_hex($url) . '.atom';
  $FETCH = 1 if ! -e $file;
  
-warn "# $url -> $file\n";
+warn "# mirror $FETCH $url -> $file\n";
  if ( $FETCH && mirror( $url => $file ) == RC_NOT_MODIFIED ) {
         warn "not modified";
-#      exit 0;
  }
  
-my $feed = XMLin( $file );
+my $feed = eval { XMLin( $file ) };
+if ( $! ) {
+       warn "ERROR $file $!\n";
+       goto skip_url;
+}
  #warn "# feed ",dump($feed);
  
  sub link_to_id {
@@ -75,6 +78,9 @@ foreach my $entry ( keys %{ $feed->{entry} } ) {
  
                 if ( $link->{rel} eq 'up' ) {
                         push @{ $tree->{$key} }, $id;
+               } elsif ( $link->{rel} eq 'self' && $link->{href} =~ m{/collections/} ) {
+                       warn "# get items in this collection";
+                       push @urls, "$link->{href}/items"; 
                 }
         }
  
@@ -126,6 +132,8 @@ if ( my @next = map { $_->{href} } grep { $_->{rel} eq 'next' && $_->{type} eq '
         goto restart;
  }
  
+skip_url:
+
  if ( $url = shift @urls ) {
         warn "## next url $url";
         goto restart;