From 18d2030430e8883fd279598fb68bb26bbbf5160e Mon Sep 17 00:00:00 2001 From: Dobrica Pavlinusic Date: Mon, 30 Jul 2012 18:29:11 +0200 Subject: [PATCH] skip invalid urls and crawl collection items --- zotero.pl | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/zotero.pl b/zotero.pl index bc240f9..e3b9051 100755 --- a/zotero.pl +++ b/zotero.pl @@ -44,13 +44,16 @@ $url .= '&key=' . $key; my $file = $UserID . '.' . md5_hex($url) . '.atom'; $FETCH = 1 if ! -e $file; -warn "# $url -> $file\n"; +warn "# mirror $FETCH $url -> $file\n"; if ( $FETCH && mirror( $url => $file ) == RC_NOT_MODIFIED ) { warn "not modified"; -# exit 0; } -my $feed = XMLin( $file ); +my $feed = eval { XMLin( $file ) }; +if ( $! ) { + warn "ERROR $file $!\n"; + goto skip_url; +} #warn "# feed ",dump($feed); sub link_to_id { @@ -75,6 +78,9 @@ foreach my $entry ( keys %{ $feed->{entry} } ) { if ( $link->{rel} eq 'up' ) { push @{ $tree->{$key} }, $id; + } elsif ( $link->{rel} eq 'self' && $link->{href} =~ m{/collections/} ) { + warn "# get items in this collection"; + push @urls, "$link->{href}/items"; } } @@ -126,6 +132,8 @@ if ( my @next = map { $_->{href} } grep { $_->{rel} eq 'next' && $_->{type} eq ' goto restart; } +skip_url: + if ( $url = shift @urls ) { warn "## next url $url"; goto restart; -- 2.20.1