added command-line options
[mongodb-experiments.git] / couchdb2mongodb.pl
1 #!/usr/bin/perl
2
3 # http://wiki.apache.org/couchdb/HTTP_Bulk_Document_API
4
5 use warnings;
6 use strict;
7
8 use IO::Socket::INET;
9 use Storable qw();
10 use JSON;
11 use Data::Dump qw(dump);
12 use Time::HiRes qw(time);
13 use File::Path qw(make_path remove_tree);
14 use MongoDB;
15
16 my $name = shift @ARGV || 'pxelator';
17
18 my $conn = MongoDB::Connection->new;
19 my $db = $conn->get_database( $name );
20 my $audit = $db->get_collection("audit");
21
22 sub couchdb_socket {
23         IO::Socket::INET->new(
24                 PeerAddr => '10.60.0.91',
25                 PeerPort => 5984,
26                 Proto => 'tcp',
27         ) || die $!;
28 }
29
30 sub get_chunk {
31         my $sock = shift;
32         my $chunk;
33         while(<$sock>) {
34                 $chunk .= $_;
35                 last if /^[\n\r]+$/;
36         }
37 #       warn "# $sock\n$chunk\n";
38         return $chunk;
39 }
40
41 my $sock = couchdb_socket;
42
43 print $sock "GET /$name/_all_docs?include_docs=true HTTP/1.0\r\n\r\n";
44
45 get_chunk($sock);
46
47 my $total = <$sock>;
48 $total =~ s{^.*total_rows\D+(\d+).+$}{$1};
49
50 $|=1;
51 print "# $name total: $total\n";
52
53 my $start_t = time();
54 my $count = 0;
55
56
57 while(<$sock>) {
58         if ( /"id":"([^"]+)"/ ) {
59
60                 s/,[\r\n]+$//; # cleanup JSON
61                 my $json = from_json( $_ );
62                 $audit->insert( $json->{doc} );
63                 printf " %d/%d %.2f%% %.2f/s\r", $count, $total
64                         , ( $count / $total ) * 100
65                         , ( $count / ( time() - $start_t ) )
66                         if ++$count % 1000 == 0;
67         } else {
68                 warn "UNKNOWN: $_";
69         }
70 }
71