save md5 db in user dir
authorDobrica Pavlinusic <dpavlin@rot13.org>
Sat, 8 Oct 2011 12:50:00 +0000 (14:50 +0200)
committerDobrica Pavlinusic <dpavlin@rot13.org>
Sat, 8 Oct 2011 12:50:00 +0000 (14:50 +0200)
lib/CloudStore/Store.pm
rsync-piper.pl

index 381e888..4cb8e18 100644 (file)
@@ -14,14 +14,13 @@ use BerkeleyDB;
 use WarnColor;
 
 sub new {
-       my ($class) = @_;
+       my $class = shift;
 
-       my $self = {};
+       my $self = {@_};
        bless $self, $class;
 
-       my %md5;
-       $self->{db} = tie %md5, 'BerkeleyDB::Hash', -Filename => '/tmp/md5.db', -Flags => DB_CREATE;
-       $self->{md5} = \%md5;
+       die "no dir" unless $self->{dir};
+       $self->{md5pool} = $self->{dir} . '/md5';
 
        warn "# new ",dump $self if $ENV{DEBUG};
 
@@ -69,7 +68,7 @@ sub new_link {
 
        if ( $data->{file} =~ m{^(.*/?)\.send/([^/]+)/(.+)$} ) {
                my ( $dir, $to, $name ) = ( $1, $2, $3 );
-               my $path = "users/$data->{login}/blob/" . $data->{file};
+               my $path = $self->blob_path($data);
                my $link_to = readlink $path;
                warn "$link_to";
                if ( $link_to =~ s{^\Q/rsyncd-munged/\E}{/} ) {
@@ -82,12 +81,10 @@ sub new_link {
                        }
                        $s .= $link_to;
 
-                       my $d = "users/$to/blob";
-                       if ( ! -e $d ) {
-                               warn "ERROR: no to user $to in $d";
-                               return;
-                       }
-                       $d .= "/$name";
+                       my $d = $self->blob_path({
+                               pid => $data->{pid},
+                               file => $name
+                       });
 
                        # $name can contain directories so we must create them
                        my $to_dir = $d;
@@ -109,8 +106,8 @@ sub new_link {
 #                              });
 #                              $self->new_file($origin);
                                warn "INFO: sent file ",dump($l,$f);
-                               my $md5 = $self->{md5}->{$s} || die "no md5 for $s";
-                               $self->{md5}->{$d} = $md5;
+#                              my $md5 = $self->{md5}->{$s} || die "no md5 for $s";
+#                              $self->{md5}->{$d} = $md5;
                        }
 
 
@@ -121,15 +118,51 @@ sub new_link {
 }
 
 sub rsync_log {
-       my ( $self,$data ) = @_;
-       warn "# rsync_log $data";
+       my ( $self, $data ) = @_;
+       if ( $data =~ m/\[(\d+)\] rsync \w+ (\S+) \w+ (\S+)/ ) {
+               my ( $pid, $module, $login ) = ( $1, $2, $3 );
+               $login =~ s/\@.+//;
+               my ( undef, undef, $uid, $gid, undef, undef, $email, $dir, $shell ) =
+                       getpwnam $login;
+
+               my %md5;
+               my $db = tie %md5, 'BerkeleyDB::Hash',
+                       -Filename => "$dir/.md5.db",
+                       -Flags => DB_CREATE
+               ;
+
+               $self->{pid}->{$pid} = {
+                       login => $login,
+                       uid => $uid,
+                       gid => $gid,
+                       email => $email,
+                       dir => $dir,
+                       shell => $shell,
+                       md5 => \%md5,
+               };
+
+       } elsif ( $data =~ m/\[(\d+)\] sent \d+ bytes\s+received \d+ bytes/ ) {
+               my $pid = $1;
+               untie $self->{pid}->{$pid}->{md5} && warn "md5 hash untied";
+               warn "removed ", dump
+               delete $self->{pid}->{$pid};
+       } else {
+               warn "## rsync_log $data";
+       }
 }
 
+sub blob_path {
+       my ( $self, $data ) = @_;
+       my $blob = $self->{pid}->{ $data->{pid} }->{dir} || die "no dir for $data->{pid} in ",dump( $self->{pid} );
+       $blob .= '/' . $data->{file};
+       return $blob;
+}
+
+
 sub rsync_transfer {
        my ( $self,$data ) = @_;
 
-       my $blob = "users/$data->{login}/blob";
-        my $path = "$blob/$data->{file}";
+        my $path = $self->blob_path($data);
 
        if ( $data->{itemize} =~ m/^[c>]([fdL])/ ) { # received change/create
                my $type = $1;
@@ -151,11 +184,14 @@ sub rsync_transfer {
 }
 
 sub md5pool {
-       my ( $self, $path, $md5 ) = @_;
+       my ( $self, $data ) = @_;
 
-       my $pool = 'md5'; # FIXME sharding?
+       my $pool = $self->{md5pool} || die "no md5pool in ",dump $self;
        mkdir $pool unless -e $pool;
 
+       my $md5 = $data->{md5} || die "no md5 in ",dump $data;
+       my $path = $self->blob_path($data);
+
        if ( -e "$pool/$md5" ) {
                warn "dedup hit $md5 $path\n";
                my $dedup = $path . '.dedup';
@@ -167,8 +203,8 @@ sub md5pool {
                link $path, "$pool/$md5";
        }
 
-       $self->{md5}->{$path} = $md5;
-       warn "++ $md5 $path\n";
+       my $md5hash = $self->{pid}->{ $data->{pid} }->{md5} || die "no md5 for $data->{pid} in ",dump( $self->{pid} );
+       $md5hash->{ $data->{file} } = $md5;
 }
 
 my $empty_md5 = " " x 32;
@@ -184,34 +220,33 @@ sub dedup {
                while(<$md5sum>) {
                        chomp;
                        my ( $md5, $file ) = split(/\s+/,$_,2);
-                       if ( ! -e "md5/$md5" ) {
+                       if ( ! -e "$self->{md5path}/$md5" ) {
                                warn "MISSING $md5 $file\n";
                                next;
                        }
-                       my $new = "users/$data->{login}/blob/$dir$file";
-                       if ( ! -e $new ) {
+                       my $new = {
+                               pid => $data->{pid},
+                               file => "$dir$file",
+                               md5 => $md5,
+                       };
+                       my $new_path = $self->blob_path($new);
+                       if ( ! -e $new_path ) {
                                # create path from md5sum file
                                my $only_dir = $1 if $new =~ m{^(.+)/[^/]+$};
                                make_path $only_dir unless -d $only_dir;
-                               $imported += link "md5/$md5", $new;
-                               my $fake = {
-                                       login => $data->{login},
-                                       host => $data->{host},
-                                       file => $dir . $file,
-                                       md5 => $md5,
-                                       size => -s $new,
-                               };
-                               $self->new_file($fake);
-                               warn "import from $path ",dump($fake);
+                               $imported += link "$self->{md5path}/$md5", $new_path;
+                               $self->new_file($new);
+                               warn "import from $path ",dump($new);
+                               $self->md5pool( $new );
                        } else {
-                               $self->md5pool( $new => $md5 );
+                               $self->md5pool( $new );
                        }
                }
                print "INFO imported $imported files from ",dump($data);
        }
 
        if ( $data->{md5} ne $empty_md5 ) {
-               $self->md5pool( $path => $data->{md5} );
+               $self->md5pool( $data );
        } else {
                warn "empty md5", dump $data;
        }
index b23ab25..034aaed 100755 (executable)
@@ -55,7 +55,7 @@ if ( $ENV{SQL} ) {
        exit 1;
 }
 
-my $store = CloudStore::Store->new;
+my $store = CloudStore::Store->new( dir => $dir );
 
 unlink $log_fifo if -f $log_fifo;
 mkfifo $log_fifo, 0700 unless -p $log_fifo;
@@ -82,8 +82,8 @@ pid file  = $pid_file
 # don't check secrets file permission (uid)
 strict modes = no
 
-pre-xfer exec = /srv/cloudstore/rsync-xfer-trigger.pl
-post-xfer exec = /srv/cloudstore/rsync-xfer-trigger.pl
+#pre-xfer exec = /srv/cloudstore/rsync-xfer-trigger.pl
+#post-xfer exec = /srv/cloudstore/rsync-xfer-trigger.pl
 
 };
 
@@ -197,8 +197,9 @@ while(1) {
                chomp $line;
                warn $line, $/;
 
-               if ( $line =~ /transfer-log:(.+)/ ) {
-                       my $transfer = $1;
+               if ( $line =~ /\[(\d+)\] transfer-log:(.+)/ ) {
+                       my $pid = $1;
+                       my $transfer = $2;
                        $transfer =~ s|(\d\d\d\d)/(\d\d)/(\d\d)[-\s](\d\d:\d\d:\d\d)|$1-$2-$3T$4|g;
                        my ( $yyyy,$mm,$dd,undef,$login,undef ) = split( /[\-T\|]/, $transfer, 6 );
                        my $host = $1 if $login =~ s/\+(.+)//;
@@ -218,6 +219,9 @@ if(0) {
                        my %data;
                        @data{@transfer_names} = @v ; # FIXME validate?
 
+                       $data{pid} = $pid;
+                       # overwrite pid from transfer log with consistant one for start/stop
+
                        print ">>> data ",dump( \%data ) if $ENV{DEBUG};
 
                        $store->rsync_transfer( \%data );