From: Dobrica Pavlinusic Date: Thu, 28 Jan 2010 21:17:01 +0000 (+0100) Subject: cleanup inset from reblog X-Git-Url: http://git.rot13.org/?p=mongodb-experiments.git;a=commitdiff_plain;h=bb93da128eec8002b8dc6f00e1ba1887231a71a4;hp=cfe83999c0a31ff97f15dcf029f39c94805d97af cleanup inset from reblog --- diff --git a/reblog2mongodb.pl b/reblog2mongodb.pl index 8ecb90a..2276738 100755 --- a/reblog2mongodb.pl +++ b/reblog2mongodb.pl @@ -16,33 +16,14 @@ $dbi .= ";host=127.0.0.1;port=13306"; # XXX over ssh my $dbh = DBI->connect($dbi,"","",{ RaiseError => 1 }); -$dbh->do(qq{ - create temporary table published_items as - select - item_id - from - items_userdata - where - label = 'published' and - value_numeric = 1 -}); - my $sql = qq{ select - i.id as item_id, --- i.guid as _id, --- i.link as _id, - i.*, - f.url as feed_url, - f.title as feed_title, - f.link as feed_link, - f.description as feed_description - from items i - join published_items p on i.id = p.item_id - join feeds f on i.feed_id = f.id - where i.id > ? - order by i.id asc - limit 1000 + md5(link) as _id, + items.* + from items + where id > ? + order by id asc + limit 100000 }; my $sql_tags = qq{ @@ -60,50 +41,40 @@ order by items_userdata.item_id asc my $conn = MongoDB::Connection->new; my $db = $conn->get_database( $database ); +$db->drop if $debug; my $items = $db->get_collection( 'items' ); -my $last_row = 0; -$last_row = 0 if $debug; +$items->ensure_index( { id => 1 } ); -print "Fetching items from $dbi id > $last_row\n"; +# > db.items.find().sort({item_id:-1}).limit(1); +my $last = $items->query()->sort({ 'id' => -1 })->limit(1)->next; +warn dump( $last ); +my $last_item_id = $last->{id} || 0; + +print "Fetching items from $dbi id > $last_item_id\n"; my $sth = $dbh->prepare($sql); -$sth->execute( $last_row ); +$sth->execute( $last_item_id ); warn dump( $sth->{NAME} ); print "found ",$sth->rows," items to process...\n"; -my $sth_tags = $dbh->prepare($sql_tags); -$sth_tags->execute( $last_row ); -print "found ",$sth_tags->rows, " tags found...\n"; - -my $count = 0; - -my $row_tags = $sth_tags->fetchrow_hashref(); - while (my $row = $sth->fetchrow_hashref() ) { - my $_id = $row->{_id} || "c$count"; - $_id =~ s{\W+}{_}g; - $_id =~ s{_+$}{}; - my $doc = $row; -# $row->{_id} = $id; + map { $row->{$_} * 1 } grep { m/id/ && $row->{$_} =~ /^\d+$/ } keys %$row; + $items->insert( $row ); +} - while ( $row_tags && $row_tags->{item_id} < $row->{item_id} ) { - $row_tags = $sth_tags->fetchrow_hashref(); - warn "## got tags: ",dump( $row_tags ) if $debug; - } +__END__ - if ( $row_tags && $row_tags->{item_id} == $row->{item_id} ) { - $doc->{tags} = [ split(/\s+/, $row_tags->{tags} ) ]; - warn "++ ",$row->{item_id}, dump( $row->{tags} ),$/; - } +my $sth_tags = $dbh->prepare($sql_tags); +$sth_tags->execute( $last_item_id ); +print "found ",$sth_tags->rows, " tags found...\n"; - $items->insert( $doc ); +my $count = 0; - $last_row = $row->{id}; - $count++; +my $row_tags = $sth_tags->fetchrow_hashref(); -} +my @join = ( 'id' => 'item_id' );