X-Git-Url: http://git.rot13.org/?a=blobdiff_plain;f=dbi2mongo.pl;h=7644a4adc8729fa3f10c1ed5eff5bf4ae7040dff;hb=9362eb35a55188edd5193aff96c084ff37748e62;hp=378728ee28a7455cc2a90f342d5a8a77612b0378;hpb=c0bf687e91fcf399312f4a4805a88cd401ce5314;p=mongodb-experiments.git diff --git a/dbi2mongo.pl b/dbi2mongo.pl index 378728e..7644a4a 100755 --- a/dbi2mongo.pl +++ b/dbi2mongo.pl @@ -1,60 +1,140 @@ #!/usr/bin/perl -w sub BEGIN { -$ENV{DBI_AUTOPROXY}='dbi:Gofer:transport=stream;url=ssh:dpavlin@koha.ffzg.hr'; +#$ENV{DBI_AUTOPROXY}='dbi:Gofer:transport=stream;url=ssh:dpavlin@koha.ffzg.hr'; } use strict; use DBI; use MongoDB; use Data::Dump qw/dump/; +use Getopt::Long; -$|++; +my $debug = 0; +my $drop = 0; +my $limit = 10000; -my $debug = @ARGV ? 1 : 0; +GetOptions( + 'debug!' => \$debug, + 'drop!' => \$drop, + 'limit=i' => \$limit, +) || die $!; -our $dbi = "DBI:mysql:database=test"; -our ( $dbi, $user, $password ); -our ( $database, $collection ) = ( 'test', 'test' ); +our ( $dbi, $user, $password ) = ( "DBI:mysql:database=test" ); +our ( $database, $collection ) = ( '', '' ); +our ( $table, $pk ) = ( 'biblio' => 'biblionumber' ); +our ( $table2, $fk ) = ( 'biblioitems' => 'biblionumber' ); -our $sql = qq{ - select - id as _id, - table.* - from table - where id > ? - order by id asc - limit 100000 -}; +my $config = shift @ARGV || die "usage: $0 config.pl\n"; +require $config; -require 'config.pl'; - -warn "# $dbi $user -> $database $collection\n"; +warn "# $dbi $user -> $database $collection $table.$pk<->$table2.$fk\n"; my $conn = MongoDB::Connection->new; my $db = $conn->get_database( $database ); my $coll = $db->get_collection( $collection ); -my $dbh = DBI->connect($dbi,$user,$password,{ RaiseError => 1 }); +my $dbh = DBI->connect($dbi,$user,$password, { + RaiseError => 1, +# mysql_enable_utf8 => 1, +}); -$db->drop if $debug; +$db->drop if $drop; -# > db.items.find().sort({item_id:-1}).limit(1); +# db.items.find().sort({_id:-1}).limit(1); my $last = $coll->query()->sort({ '_id' => -1 })->limit(1)->next; warn dump( $last ); my $last_id = $last->{_id} || 0; -print "Fetching items from $dbi _id > $last_id\n"; +print "import $table.$pk > $last_id from $dbi\n"; + +our $offset = 0; +our $sth; + +sub select_table { + + $sth = $dbh->prepare(qq{ + select + $pk as _id, + $table.* + from $table + where $pk > ? + order by $pk asc + limit $limit + offset $offset + }); -my $sth = $dbh->prepare($sql); -$sth->execute( $last_id ); + print STDERR " $table:$offset "; + $sth->execute( $last_id ); +# warn "# $table columns ",dump( $sth->{NAME} ) if $offset == 0; + print STDERR " join ", $sth->rows, " rows "; +} -warn dump( $sth->{NAME} ); +our $row; +sub fetch_row { + $row = $sth->fetchrow_hashref(); + if ( ! $row && $sth->rows == $limit ) { + $offset += $limit; + select_table; + $row = $sth->fetchrow_hashref(); + } + return $row; +} -print "found ",$sth->rows," items to process...\n"; +our $join_offset = 0; +our $sth_join; + +sub join_table { + $sth_join = $dbh->prepare(qq{ + select + $table2.* + from $table2 + where $fk > ? + order by $fk asc + limit $limit + offset $join_offset + }); + print STDERR " $table2:$join_offset "; + $sth_join->execute( $last_id ); +# warn "# $table2 columns ",dump( $sth_join->{NAME} ) if $join_offset = 0; + print STDERR " join ",$sth_join->rows, " rows "; +} -while (my $row = $sth->fetchrow_hashref() ) { +our $row_join; +sub fetch_row_join { + $row_join = $sth_join->fetchrow_hashref(); + if ( ! $row_join && $sth_join->rows == $limit ) { + $join_offset += $limit; + join_table; + $row_join = $sth_join->fetchrow_hashref(); + } + return $row_join; +} +sub guess_types { + my $row = shift; map { $row->{$_} * 1 } grep { defined $row->{$_} && $row->{$_} =~ /^\d+$/ } keys %$row; - $coll->insert( $row ); + return $row; +} + +select_table; +join_table; +fetch_row_join; + +while (my $row = fetch_row() ) { + + while ( $row_join && $row_join->{$fk} < $row->{$pk} ) { + fetch_row_join; + } + + while ( $row_join && $row_join->{$fk} == $row->{$pk} ) { + push @{ $row->{ $table2 } }, guess_types($row_join); + print STDERR "j"; + fetch_row_join; + } + + $coll->insert( guess_types($row) ); + print STDERR "."; + } +print STDERR "\n";