From: Dobrica Pavlinusic Date: Fri, 12 Nov 2010 19:58:59 +0000 (+0100) Subject: export published items from reblog X-Git-Url: http://git.rot13.org/?a=commitdiff_plain;h=e4b48f19744c93b86610c016b0936d95a9f5ee1f;hp=-c;p=NoSQL-toys.git export published items from reblog --- e4b48f19744c93b86610c016b0936d95a9f5ee1f diff --git a/reblog/reblog-published.pl b/reblog/reblog-published.pl new file mode 100755 index 0000000..947dd01 --- /dev/null +++ b/reblog/reblog-published.pl @@ -0,0 +1,102 @@ +#!/usr/bin/perl -w + +use strict; +use DBI; +use Data::Dump qw/dump/; + +$|++; + +my $debug = @ARGV ? 1 : 0; + +my $database = 'reblog'; + +my $dbi = "DBI:mysql:database=$database"; +$dbi .= ";host=127.0.0.1;port=13306"; # XXX over ssh + +my $dbh = DBI->connect($dbi,"","",{ RaiseError => 1 }); + +$dbh->do(qq{ + create temporary table published_items as + select + item_id + from + items_userdata + where + label = 'published' and + value_numeric = 1 +}); + +my $sql = qq{ + select + i.id as item_id, +-- i.guid as _id, + i.link as _id, + i.*, + f.url as feed_url, + f.title as feed_title, + f.link as feed_link, + f.description as feed_description + from items i + join published_items p on i.id = p.item_id + join feeds f on i.feed_id = f.id + where i.id > ? + order by i.id asc + limit 1000 +}; + +my $sql_tags = qq{ +select + items_userdata.item_id, + value_long as tags, + timestamp +from items_userdata +join published_items p + on items_userdata.item_id = p.item_id and label='tags' +where + items_userdata.item_id > ? +order by items_userdata.item_id asc +}; + +my $last_row = 0; # FIXME +$last_row = 0 if $debug; + +print "Fetching items from $dbi id > $last_row\n"; + +my $sth = $dbh->prepare($sql); +$sth->execute( $last_row ); + +my @columns = @{ $sth->{NAME} }; +warn dump( @columns ); + +my @feed = grep { s/^feed_// } @columns; + +print "found ",$sth->rows," items to process...\n"; + +my $sth_tags = $dbh->prepare($sql_tags); +$sth_tags->execute( $last_row ); +print "found ",$sth_tags->rows, " tags found...\n"; + +my $count = 0; + +my $row_tags = $sth_tags->fetchrow_hashref(); + +while (my $row = $sth->fetchrow_hashref() ) { + my $_id = $row->{_id} || "c$count"; + $_id =~ s{\W+}{_}g; + $_id =~ s{_+$}{}; + + my $feed; + $feed->{$_} = delete $row->{ "feed_$_" } foreach @feed; + $row->{feed} = $feed; + + while ( $row_tags && $row_tags->{item_id} < $row->{item_id} ) { + $row_tags = $sth_tags->fetchrow_hashref(); + warn "## got tags: ",dump( $row_tags ) if $debug; + $row->{tags} = [ split(/\s+/, $row_tags->{tags} ) ]; + } + + warn dump($row); + +} + +