migrate data from CouchDB to MongoDB
authorDobrica Pavlinusic <dpavlin@rot13.org>
Fri, 29 Jan 2010 14:40:29 +0000 (15:40 +0100)
committerDobrica Pavlinusic <dpavlin@rot13.org>
Fri, 29 Jan 2010 14:40:29 +0000 (15:40 +0100)
couchdb2mongodb.pl [new file with mode: 0755]

diff --git a/couchdb2mongodb.pl b/couchdb2mongodb.pl
new file mode 100755 (executable)
index 0000000..f54f554
--- /dev/null
@@ -0,0 +1,71 @@
+#!/usr/bin/perl
+
+# http://wiki.apache.org/couchdb/HTTP_Bulk_Document_API
+
+use warnings;
+use strict;
+
+use IO::Socket::INET;
+use Storable qw();
+use JSON;
+use Data::Dump qw(dump);
+use Time::HiRes qw(time);
+use File::Path qw(make_path remove_tree);
+use MongoDB;
+
+my $name = shift @ARGV || 'pxelator';
+
+my $conn = MongoDB::Connection->new;
+my $db = $conn->get_database( $name );
+my $audit = $db->get_collection("audit");
+
+sub couchdb_socket {
+       IO::Socket::INET->new(
+               PeerAddr => '10.60.0.91',
+               PeerPort => 5984,
+               Proto => 'tcp',
+       ) || die $!;
+}
+
+sub get_chunk {
+       my $sock = shift;
+       my $chunk;
+       while(<$sock>) {
+               $chunk .= $_;
+               last if /^[\n\r]+$/;
+       }
+#      warn "# $sock\n$chunk\n";
+       return $chunk;
+}
+
+my $sock = couchdb_socket;
+
+print $sock "GET /$name/_all_docs?include_docs=true HTTP/1.0\r\n\r\n";
+
+get_chunk($sock);
+
+my $total = <$sock>;
+$total =~ s{^.*total_rows\D+(\d+).+$}{$1};
+
+$|=1;
+print "# $name total: $total\n";
+
+my $start_t = time();
+my $count = 0;
+
+
+while(<$sock>) {
+       if ( /"id":"([^"]+)"/ ) {
+
+               s/,[\r\n]+$//; # cleanup JSON
+               my $json = from_json( $_ );
+               $audit->insert( $json->{doc} );
+               printf " %d/%d %.2f%% %.2f/s\r", $count, $total
+                       , ( $count / $total ) * 100
+                       , ( $count / ( time() - $start_t ) )
+                       if ++$count % 1000 == 0;
+       } else {
+               warn "UNKNOWN: $_";
+       }
+}
+