From 6261d8b69af094e7daf914332268293995c23974 Mon Sep 17 00:00:00 2001 From: Dobrica Pavlinusic Date: Sun, 9 Oct 2011 23:14:48 +0200 Subject: [PATCH] port md5 support to TokyoCabinet --- bin/debian-install.sh | 2 +- dump-tch.pl | 14 ++++++++ lib/CloudStore/Store.pm | 74 +++++++++++++++++++++++++---------------- test.sh | 2 +- 4 files changed, 62 insertions(+), 30 deletions(-) create mode 100755 dump-tch.pl diff --git a/bin/debian-install.sh b/bin/debian-install.sh index fae3dcc..687efad 100755 --- a/bin/debian-install.sh +++ b/bin/debian-install.sh @@ -1,5 +1,5 @@ #!/bin/sh -x sudo apt-get install libautodie-perl libdata-dump-perl libfile-slurp-perl libtime-hires-perl \ -libjson-xs-perl libmodule-refresh-perl libnss-extrausers libberkeleydb-perl +libjson-xs-perl libmodule-refresh-perl libnss-extrausers libtokyocabinet-perl diff --git a/dump-tch.pl b/dump-tch.pl new file mode 100755 index 0000000..e02920b --- /dev/null +++ b/dump-tch.pl @@ -0,0 +1,14 @@ +#!/usr/bin/perl +use warnings; +use strict; + +use TokyoCabinet; +use Data::Dump qw(dump); + +my $path = shift @ARGV; +die $! unless -r $path; + +my %h; +tie %h, "TokyoCabinet::HDB", $path || die $!; + +print dump( \%h ); diff --git a/lib/CloudStore/Store.pm b/lib/CloudStore/Store.pm index 8ba2ba4..3560ac2 100644 --- a/lib/CloudStore/Store.pm +++ b/lib/CloudStore/Store.pm @@ -9,7 +9,7 @@ use File::Slurp qw(); use Digest::MD5 qw(md5_base64); use Data::Dump qw(dump); use Carp qw(confess); -use BerkeleyDB; +use TokyoCabinet; use WarnColor; @@ -46,7 +46,7 @@ sub modify_file { if ( $data->{file} =~ m{^(.*/).sync/send/([^/]+)$} ) { my $from_dir = $1; - warn "SEND $2 from $from_dir"; + warn "SEND $2 from $from_dir\n"; my $sent_files; open(my $send, '<', $self->blob_path($data) ); while(<$send>) { @@ -68,9 +68,12 @@ sub modify_file { my $to_path = "$dir/received/$file"; mkbasedir $to_path; - warn "SEND $from_path -> $to_path"; + warn "SEND $from_path -> $to_path\n"; + unlink $to_path if -e $to_path; # FIXME why we need this? $sent_files->{$to} += link $from_path, $to_path; # FIXME cross-shard + my $md5 = $self->md5sum($data)->get( $from_dir . $file ) || warn "no md5 for $from_dir$file"; + $self->md5sum({login => $to})->put( "/received/$file" => $md5 ); } warn "SENT ",dump $sent_files; @@ -99,6 +102,14 @@ warn $_; warn "MV $from_path -> $to_path"; mkbasedir $to_path; rename $from_path, $to_path; + + my $md5 = $self->md5sum($data)->get( $from_dir . $from ); + die "no md5sum $from_dir $from " unless $md5; + + $self->md5sum($data)->out( $from_dir . $from ); + $self->md5sum($data)->put( $from_dir . $to => $md5 ); + + warn "$md5 moved to $from_dir $to"; } return 0; # skip dedup @@ -117,8 +128,8 @@ sub new_file { sub remove_file { my ( $self, $data ) = @_; - my $md5sum = $self->md5sum($data); - my $md5 = $md5sum->{ $data->{file} } || return; # directories don't have md5sums + my $md5 = $self->md5sum($data)->get( $data->{file} ); + return unless $md5; # directories don't have md5sums my $path = $self->{md5pool} . '/' . $md5; my ($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size, $atime,$mtime,$ctime,$blksize,$blocks) @@ -180,10 +191,9 @@ sub new_link { # }); # $self->new_file($origin); warn "INFO: sent file ",dump($l,$f); - my $md5sum = $self->md5sum($data); - - my $md5 = $md5sum->{$s} || die "no md5 for $s"; - $md5sum->{$d} = $md5; # FIXME broken! + my $md5 = $self->md5sum($data)->get($s); + $self->md5sum({ login => $to })->put($d => $md5 ); + # FIXME broken! } @@ -193,28 +203,26 @@ sub new_link { } } -our $md5_login; sub md5sum { my ( $self, $data ) = @_; - if ( exists $md5_login->{$data->{login}} ) { - return $md5_login->{$data->{login}}; - } elsif ( my $login = $data->{login} ) { + my $login = $data->{login} || confess "missing login in ",dump $data; - my $md5_path = $self->{dir} || die "no dir?"; - $login =~ s/^u//; - $md5_path .= "/$login/.md5.db"; + return $self->{md5sum}->{$login} if exists $self->{md5sum}->{$login}; - my %md5; - my $db = tie %md5, 'BerkeleyDB::Hash', - -Filename => $md5_path, - -Flags => DB_CREATE, - ; + my ( undef, undef, $uid, $gid, undef, undef, $email, $dir, $shell ) = + getpwnam $login; - return $md5_login->{$login} = \%md5; - } else { - confess "can't open md5sum"; - } + my $md5_path = "$dir/.md5"; + + my $db = TokyoCabinet::HDB->new(); + $db->open($md5_path, $db->OWRITER | $db->OCREAT) + or die "can't open $md5_path: ",$db->errmsg( $db->ecode ); + + warn "open $md5_path"; + + $self->{md5sum}->{$login} = $db; + return $db; } sub rsync_log { @@ -239,9 +247,17 @@ sub rsync_log { } elsif ( $data =~ m/\[(\d+)\] sent \S+ bytes\s+received \S+ bytes/ ) { my $pid = $1; - untie $md5_login->{ $self->{$pid}->{login} } && warn "untie $pid"; + + foreach my $login ( keys %{ $self->{md5sum} } ) { + $self->{md5sum}->{$login}->close; + warn "close md5sum $login"; + } + delete $self->{md5sum}; + delete $self->{pid}->{$pid}; warn "removed $pid"; +warn dump $self; + } else { # warn "## rsync_log $data"; } @@ -275,6 +291,8 @@ sub rsync_transfer { } } elsif ( $data->{itemize} =~ m/\*deleting/ ) { $self->remove_file($data); + } else { + warn "IGNORED ",dump($data); } return $data; } @@ -297,10 +315,10 @@ sub md5pool { # FIXME fix perms? } else { link $path, "$pool/$md5"; + warn "dedup +++ $md5 $path"; } - my $md5sum = $self->md5sum($data); - $md5sum->{ $data->{file} } = $md5; + $self->md5sum($data)->put( $data->{file} => $md5 ); } my $empty_md5 = " " x 32; diff --git a/test.sh b/test.sh index 200af6b..707577f 100755 --- a/test.sh +++ b/test.sh @@ -27,7 +27,7 @@ cd $dir push . --recursive --delete -v -if false ; then # skip tests if false +if true ; then # skip tests if false ps ax > foo push foo -v -- 2.20.1