use JSON;
use Data::Dump qw(dump);
use RT::Client::REST;
+use URI::Escape;
use CouchDB;
use Digest::MD5 qw(md5_hex);
my $FETCH = $ENV{FETCH} || 0;
-my $db = CouchDB->new('10.60.0.95', 5984);
-eval { $db->put("z_$UserID") }; # create user database
+my $db = CouchDB->new('10.60.0.92', 5984);
+eval { $db->put("zotero_$UserID") }; # create user database
+eval {
+ local $/ = undef;
+ my $view = <DATA>;
+ warn "# create $view";
+ $db->put("zotero_$UserID/_design/zotero" => decode_json $view)
+};
-my $url = "https://api.zotero.org/users/$UserID/items?key=$key&format=atom&content=json&order=dateModified&sort=desc";
+eval { $db->put("rt") }; # create RT database
-my $file = "$UserID.atom";
+my @urls = map { "https://api.zotero.org/users/$UserID/$_?format=atom&content=json&order=dateModified&sort=desc" } qw( collections items );
+# we don't need to fetch tags since we can generate using CouchDB views
-warn "# $url\n";
+my $url = shift @urls;
+
+my $tree;
+my $ticket_items;
+my $items;
+
+restart:
+
+$url .= '&key=' . $key;
+
+my $file = $UserID . '.' . md5_hex($url) . '.atom';
+$FETCH = 1 if ! -e $file;
+
+warn "# mirror $FETCH $url -> $file\n";
if ( $FETCH && mirror( $url => $file ) == RC_NOT_MODIFIED ) {
warn "not modified";
-# exit 0;
}
-my $feed = XMLin( $file );
+my $feed = eval { XMLin( $file ) };
+if ( $! ) {
+ warn "ERROR $file $!\n";
+ goto skip_url;
+}
#warn "# feed ",dump($feed);
-my $tree;
-my $ticket_items;
-
-my $items;
+sub link_to_id {
+ my $link = shift;
+ $link =~ s{.+/(items|collections)/}{}; # leave just ID
+ $link =~ s{\?.+}{};
+ return $link;
+}
foreach my $entry ( keys %{ $feed->{entry} } ) {
warn "# entry $entry ",dump($entry);
- my $id = $entry;
- $id =~ s{.+/items/}{}; # leave just ID
+ my $id = link_to_id $entry;
my $item = $feed->{entry}->{$entry};
warn "# entry $entry ",dump($item);
- foreach my $link ( @{ $item->{link} } ) {
- warn "# link $id ",dump($link);
+ foreach my $i ( 0 .. $#{ $item->{link} } ) {
+ my $link = $item->{link}->[$i];
+ warn "# link $id $i:",dump($link);
+
+ $item->{link}->[$i]->{key} = link_to_id $link->{href};
+
if ( $link->{rel} eq 'up' ) {
- my $up = $link->{href};
- $up =~ s{.+/items/}{};
- $up =~ s{\?.+}{};
- push @{ $tree->{$up} }, $id;
+ push @{ $tree->{$key} }, $id;
+ } elsif ( $link->{rel} eq 'self' && $link->{href} =~ m{/collections/} ) {
+ warn "# get items in this collection";
+ push @urls, "$link->{href}/items";
}
}
- $item->{zapi_etag} = $item->{content}->{'zapi:etag'};
+ if ( exists $item->{content} ) {
+ my $type = ( grep { exists $item->{content}->{$_} } qw(zapi:type type) )[0];
+ warn "# content has $type";
+
+ $item->{zapi}->{etag} = $item->{content}->{'zapi:etag'} if exists $item->{content}->{'zapi:etag'};
+
+ $type = $item->{zapi}->{type} = $item->{content}->{$type};
+
+ if ( $type =~ m/json/ ) {
- if ( $item->{content}->{'zapi:type'} eq 'json' ) {
- my $json = $item->{content}->{content};
- warn "# $json\n";
- $json = $item->{content} = decode_json $json;
- warn "# json $id ", dump $json;
+ my $json = $item->{content}->{content};
+ warn "# $json\n";
+ $json = $item->{content} = decode_json $json;
+ warn "# json $id ", dump $json;
- foreach my $tag ( @{ $json->{tags} } ) {
- $tag = $tag->{tag};
- warn "# tag $id $tag\n";
- next unless $tag =~ m/#(\d+)/;
- push @{ $ticket_items->{$1} }, $id;
+ foreach my $tag ( @{ $json->{tags} } ) {
+ $tag = $tag->{tag};
+ warn "# tag $id $tag\n";
+ next unless $tag =~ m/#(\d+)/; # XXX RT number in tag
+ push @{ $ticket_items->{$1} }, $id;
+ }
+
+ } else {
+ warn "ERROR: $type not decoded!";
}
}
+ foreach my $zapi ( grep { m/^zapi:/ } keys %$item ) {
+ my $name = $zapi;
+ $name =~ s/^zapi://;
+ $item->{zapi}->{$name} = delete $item->{$zapi};
+ }
+
$items->{$id} = $item;
- my $old_item = $db->get( "z_$UserID/$id" );
- warn "# old_item ",dump($old_item);
- my $item_md5 = md5_hex encode_json $item;
- $item->{item_md5} = $item_md5;
- if ( $old_item->{zapi_etag} ne $item->{zapi_etag} || $item_md5 ne $old_item->{item_md5} ) {
- $item->{_rev} = $old_item->{_rev};
- $db->put( "z_$UserID/$id" => $item );
- }
+ $db->modify( "zotero_$UserID/$id" => $item );
+
+}
+
+delete $feed->{entry};
+warn "# feed without entry ",dump( $feed );
+
+if ( my @next = map { $_->{href} } grep { $_->{rel} eq 'next' && $_->{type} eq 'application/atom+xml' } @{ $feed->{link} } ) {
+ warn "## next ",dump(@next);
+ $url = $next[0];
+ goto restart;
+}
+
+skip_url:
+
+if ( $url = shift @urls ) {
+ warn "## next url $url";
+ goto restart;
}
warn "# tree ",dump( $tree );
foreach my $nr ( keys %$ticket_items ) {
- my $ticket = $rt->show(type => 'ticket', id => $nr);
+ my $ticket = eval { $rt->show(type => 'ticket', id => $nr) };
warn "# ticket $nr ",dump($ticket);
+ next unless $ticket;
+
+ $ticket->{zotero_items} = $ticket_items->{$nr};
+
+ my $modified = $db->modify( "rt/$nr" => sub {
+ my $doc = shift;
+ $doc->{$_} = $ticket->{$_} foreach keys %$ticket;
+ return $doc;
+ });
+
+ warn "# modified ",dump($modified);
+
+ # copy attachments to CouchDB (they never change, so do it just once
+ if ( my @attachment_ids = $rt->get_attachment_ids( id => $nr ) ) {
+
+ warn "# get_attachment_ids = ",dump( @attachment_ids );
+ my $doc = $db->get("rt/$nr");
+ my @attachments;
+
+ foreach my $attachment_id ( @attachment_ids ) {
+ my $attachment = $rt->get_attachment( parent_id => $nr, id => $attachment_id );
+ if ( $attachment->{Filename} && $attachment->{ContentEncoding} eq 'base64' ) {
+ #$attachment->{Filename} ||= $attachment_id;
+ my $content = delete $attachment->{Content};
+ if ( ! exists $doc->{_attachments}->{ $attachment->{Filename} } ) {
+ utf8::encode($content) || warn "utf8::encode error!";
+ warn "# extracted ",length( $content ), " bytes";
+ warn "## attachment ",dump( $attachment );
+ my $url = sprintf 'rt/%d/%s?rev=%s', $nr, uri_escape($attachment->{Filename}), $modified->{rev};
+# $modified = $db->request( PUT => $url, $content, $attachment->{ContentType} );
+ }
+ }
+ push @attachments, $attachment;
+ }
+
+
+ $db->modify( "rt/$nr" => sub {
+ my $doc = shift;
+ $doc->{attachments} = [ @attachments ];
+ warn "## attachments on $nr = ", $#attachments + 1;
+ return $doc;
+ }) if @attachments;
+
+ }
+
if ( $ticket->{Queue} !~ m/ILL/i ) {
warn "SKIP $ticket not in ILL queue!";
next;
# $rt->comment( ticket_id => $nr, message => dump( $items->{$id} ) );
- last; # FIXME just first
-
}
}
+
+__DATA__
+{"_id":"_design/zotero","language":"javascript","views":{"itemType":{"map":"function(doc) {\n emit(doc.zapi.itemType,1);\n}","reduce":"_count"},"updated":{"map":"function(doc) {\n emit(doc.updated,1);\n}","reduce":"_count"},"tags":{"map":"function(doc) {\n \n doc.content.tags.forEach( function(v) {\n emit(v, doc._id);\n });\n}","reduce":"_count"},"link_up":{"map":"function(doc) {\n if ( doc.link[1].rel == 'up' )\n emit( doc.link[1].key, doc._id );\n}","reduce":"_count"},"year,publisher":{"map":"function(doc) {\n if ( doc.zapi.year )\n emit([doc.zapi.year, doc.content.publisher], 1);\n}","reduce":"_count"}}}