X-Git-Url: http://git.rot13.org/?p=ILL-Zotero-RT;a=blobdiff_plain;f=zotero.pl;h=2dda7841ea0fbbfdd8b325882cd8501d20728ea1;hp=7f99401fa85f5a0fe834378625d1999a28dc1682;hb=HEAD;hpb=71ffa85cdd50cf657eee7d2b4ab1c60f65c70283 diff --git a/zotero.pl b/zotero.pl index 7f99401..2dda784 100755 --- a/zotero.pl +++ b/zotero.pl @@ -7,6 +7,7 @@ use XML::Simple; use JSON; use Data::Dump qw(dump); use RT::Client::REST; +use URI::Escape; use CouchDB; use Digest::MD5 qw(md5_hex); @@ -16,71 +17,140 @@ my $key = $ENV{key} || die "key required"; my $FETCH = $ENV{FETCH} || 0; -my $db = CouchDB->new('10.60.0.95', 5984); -eval { $db->put("z_$UserID") }; # create user database +my $db = CouchDB->new('10.60.0.92', 5984); +eval { $db->put("zotero_$UserID") }; # create user database +eval { + local $/ = undef; + my $view = ; + warn "# create $view"; + $db->put("zotero_$UserID/_design/zotero" => decode_json $view) +}; -my $url = "https://api.zotero.org/users/$UserID/items?key=$key&format=atom&content=json&order=dateModified&sort=desc"; +eval { $db->put("rt") }; # create RT database -my $file = "$UserID.atom"; +my @urls = map { "https://api.zotero.org/users/$UserID/$_?format=atom&content=json&order=dateModified&sort=desc" } qw( collections items ); +# we don't need to fetch tags since we can generate using CouchDB views -warn "# $url\n"; +my $url = shift @urls; + +my $tree; +my $ticket_items; +my $items; + +restart: + +$url .= '&key=' . $key; + +my $file = $UserID . '.' . md5_hex($url) . '.atom'; +$FETCH = 1 if ! -e $file; + +warn "# mirror $FETCH $url -> $file\n"; if ( $FETCH && mirror( $url => $file ) == RC_NOT_MODIFIED ) { warn "not modified"; -# exit 0; } -my $feed = XMLin( $file ); -#warn "# feed ",dump($feed); +my $xml = XML::Simple->new(ForceArray => [ qw( entry ) ]); +my $feed = eval { $xml->XMLin( $file ) }; +if ( $! ) { + warn "ERROR $file $!\n"; + goto skip_url; +} +warn "# feed ",dump($feed); -my $tree; -my $ticket_items; +sub link_to_id { + my $link = shift; + $link =~ s{.+/(items|collections)/}{}; # leave just ID + $link =~ s{\?.+}{}; + return $link; +} -my $items; +my @collection_items; foreach my $entry ( keys %{ $feed->{entry} } ) { warn "# entry $entry ",dump($entry); - my $id = $entry; - $id =~ s{.+/items/}{}; # leave just ID + my $id = link_to_id $entry; + + push @collection_items, $id if $url =~ m{/collections/(\w+)/items}; my $item = $feed->{entry}->{$entry}; - warn "# entry $entry ",dump($item); + warn "# item $id $entry ",dump($item),$/; + + foreach my $i ( 0 .. $#{ $item->{link} } ) { + my $link = $item->{link}->[$i]; + warn "# link $id $i:",dump($link); + + $item->{link}->[$i]->{key} = link_to_id $link->{href}; - foreach my $link ( @{ $item->{link} } ) { - warn "# link $id ",dump($link); if ( $link->{rel} eq 'up' ) { - my $up = $link->{href}; - $up =~ s{.+/items/}{}; - $up =~ s{\?.+}{}; - push @{ $tree->{$up} }, $id; + push @{ $tree->{$key} }, $id; + } elsif ( $link->{rel} eq 'self' && $link->{href} =~ m{/collections/} ) { + warn "# get items in this collection"; + push @urls, "$link->{href}/items?content=json"; } } - $item->{zapi_etag} = $item->{content}->{'zapi:etag'}; + if ( exists $item->{content} ) { + my $type = ( grep { exists $item->{content}->{$_} } qw(zapi:type type) )[0]; + warn "# content has $type"; + + $item->{zapi}->{etag} = $item->{content}->{'zapi:etag'} if exists $item->{content}->{'zapi:etag'}; + + $type = $item->{zapi}->{type} = $item->{content}->{$type}; + + if ( $type =~ m/json/ ) { + + my $json = $item->{content}->{content}; + warn "# $json\n"; + $json = $item->{content} = decode_json $json; + warn "# json $id ", dump $json; - if ( $item->{content}->{'zapi:type'} eq 'json' ) { - my $json = $item->{content}->{content}; - warn "# $json\n"; - $json = $item->{content} = decode_json $json; - warn "# json $id ", dump $json; + foreach my $tag ( @{ $json->{tags} } ) { + $tag = $tag->{tag}; + warn "# tag $id $tag\n"; + next unless $tag =~ m/#(\d+)/; # XXX RT number in tag + push @{ $ticket_items->{$1} }, $id; + } - foreach my $tag ( @{ $json->{tags} } ) { - $tag = $tag->{tag}; - warn "# tag $id $tag\n"; - next unless $tag =~ m/#(\d+)/; - push @{ $ticket_items->{$1} }, $id; + } else { + warn "ERROR: $type not decoded!"; } } + foreach my $zapi ( grep { m/^zapi:/ } keys %$item ) { + my $name = $zapi; + $name =~ s/^zapi://; + $item->{zapi}->{$name} = delete $item->{$zapi}; + } + $items->{$id} = $item; - my $old_item = $db->get( "z_$UserID/$id" ); - warn "# old_item ",dump($old_item); - my $item_md5 = md5_hex encode_json $item; - $item->{item_md5} = $item_md5; - if ( $old_item->{zapi_etag} ne $item->{zapi_etag} || $item_md5 ne $old_item->{item_md5} ) { - $item->{_rev} = $old_item->{_rev}; - $db->put( "z_$UserID/$id" => $item ); - } + $db->modify( "zotero_$UserID/$id" => $item ); + +} + +if ( @collection_items ) { + my $id = $1 if $url =~ m{/collections/(\w+)/items}; + $db->modify( "zotero_$UserID/$id" => sub { + my $doc = shift; + $doc->{x_meta}->{collection_items} = [ @collection_items ]; + return $doc; + }); +} + +delete $feed->{entry}; +warn "# feed without entry ",dump( $feed ); + +if ( my @next = map { $_->{href} } grep { $_->{rel} eq 'next' && $_->{type} eq 'application/atom+xml' } @{ $feed->{link} } ) { + warn "## next ",dump(@next); + $url = $next[0]; + goto restart; +} + +skip_url: + +if ( $url = shift @urls ) { + warn "## next url $url"; + goto restart; } warn "# tree ",dump( $tree ); @@ -97,9 +167,54 @@ $rt->login(username => $ENV{RT_USER}, password => $ENV{RT_PASSWORD}); foreach my $nr ( keys %$ticket_items ) { - my $ticket = $rt->show(type => 'ticket', id => $nr); + my $ticket = eval { $rt->show(type => 'ticket', id => $nr) }; warn "# ticket $nr ",dump($ticket); + next unless $ticket; + + $ticket->{zotero_items} = $ticket_items->{$nr}; + + my $modified = $db->modify( "rt/$nr" => sub { + my $doc = shift; + $doc->{$_} = $ticket->{$_} foreach keys %$ticket; + return $doc; + }); + + warn "# modified ",dump($modified); + + # copy attachments to CouchDB (they never change, so do it just once + if ( my @attachment_ids = $rt->get_attachment_ids( id => $nr ) ) { + + warn "# get_attachment_ids = ",dump( @attachment_ids ); + my $doc = $db->get("rt/$nr"); + my @attachments; + + foreach my $attachment_id ( @attachment_ids ) { + my $attachment = $rt->get_attachment( parent_id => $nr, id => $attachment_id ); + if ( $attachment->{Filename} && $attachment->{ContentEncoding} eq 'base64' ) { + #$attachment->{Filename} ||= $attachment_id; + my $content = delete $attachment->{Content}; + if ( ! exists $doc->{_attachments}->{ $attachment->{Filename} } ) { + utf8::encode($content) || warn "utf8::encode error!"; + warn "# extracted ",length( $content ), " bytes"; + warn "## attachment ",dump( $attachment ); + my $url = sprintf 'rt/%d/%s?rev=%s', $nr, uri_escape($attachment->{Filename}), $modified->{rev}; +# $modified = $db->request( PUT => $url, $content, $attachment->{ContentType} ); + } + } + push @attachments, $attachment; + } + + + $db->modify( "rt/$nr" => sub { + my $doc = shift; + $doc->{attachments} = [ @attachments ]; + warn "## attachments on $nr = ", $#attachments + 1; + return $doc; + }) if @attachments; + + } + if ( $ticket->{Queue} !~ m/ILL/i ) { warn "SKIP $ticket not in ILL queue!"; next; @@ -110,8 +225,9 @@ foreach my $nr ( keys %$ticket_items ) { # $rt->comment( ticket_id => $nr, message => dump( $items->{$id} ) ); - last; # FIXME just first - } } + +__DATA__ +{"_id":"_design/zotero","views":{"link_up":{"map":"function(doc) {\n if ( doc.link[1].rel == 'up' )\n emit( doc.link[1].key, doc._id );\n}","reduce":"_count"},"year,publisher":{"map":"function(doc) {\n if ( doc.zapi.year )\n emit([doc.zapi.year, doc.content.publisher], 1);\n}","reduce":"_count"},"updated":{"map":"function(doc) {\n emit(doc.updated,1);\n}","reduce":"_count"},"itemType":{"map":"function(doc) {\n emit(doc.zapi.itemType,1);\n}","reduce":"_count"},"tags":{"map":"function(doc) {\n \n doc.content.tags.forEach( function(v) {\n emit(v, doc._id);\n });\n}","reduce":"_count"},"collection_items":{"map":"function(doc) {\n if ( doc.x_meta ) {\n doc.x_meta.collection_items.forEach( function(id) {\n emit(doc.content.name, id);\n });\n }\n}","reduce":"_count"}},"language":"javascript"}