save md5 db in user dir
[cloudstore.git] / lib / CloudStore / Store.pm
1 package CloudStore::Store;
2 use warnings;
3 use strict;
4
5 use autodie;
6 use JSON::XS;
7 use File::Path qw(make_path);
8 use File::Slurp qw();
9 use Digest::MD5 qw(md5_base64);
10 use Data::Dump qw(dump);
11 use Carp qw(confess);
12 use BerkeleyDB;
13
14 use WarnColor;
15
16 sub new {
17         my $class = shift;
18
19         my $self = {@_};
20         bless $self, $class;
21
22         die "no dir" unless $self->{dir};
23         $self->{md5pool} = $self->{dir} . '/md5';
24
25         warn "# new ",dump $self if $ENV{DEBUG};
26
27         return $self;
28 }
29
30 sub user_set {
31         my ( $self,$data ) = @_;
32 }
33
34 sub user_get {
35         my ( $self,$data ) = @_;
36 }
37
38 sub modify_file {
39         my ( $self,$data ) = @_;
40
41 #       if ( my $old = $self->file_get( $data ) ) {
42 #               $self->usage_decr( $data );
43 #       }
44
45         $self->new_file($data);
46 }
47
48 sub new_file {
49         my ( $self,$data ) = @_;
50 #       $self->file_set($data);
51 #       $self->usage_incr($data);
52 }
53
54 sub remove_file {
55         my ( $self, $data ) = @_;
56 #       $self->usage_decr( $data );
57 }
58
59 sub make_dir {
60         my ( $self, $data ) = @_;
61
62 }
63
64 sub new_link {
65         my ( $self, $data ) = @_;
66
67         warn "# new_link ",dump $data;
68
69         if ( $data->{file} =~ m{^(.*/?)\.send/([^/]+)/(.+)$} ) {
70                 my ( $dir, $to, $name ) = ( $1, $2, $3 );
71                 my $path = $self->blob_path($data);
72                 my $link_to = readlink $path;
73                 warn "$link_to";
74                 if ( $link_to =~ s{^\Q/rsyncd-munged/\E}{/} ) {
75
76                         my $s = $path;
77                         $s =~ s{/[^/]+$}{}; # strip filename
78                         while ( $link_to =~ s{/../}{/} ) {
79                                 $s =~ s{/[^/]+$}{} || die "can't strip $s";
80                                 warn "## simplify $s $link_to\n";
81                         }
82                         $s .= $link_to;
83
84                         my $d = $self->blob_path({
85                                 pid => $data->{pid},
86                                 file => $name
87                         });
88
89                         # $name can contain directories so we must create them
90                         my $to_dir = $d;
91                         $to_dir =~ s{/[^/]+$}{};
92                         make_path $to_dir if ! -e $to_dir;
93
94                         if ( ! -e $s ) {
95                                 warn "ERROR: can't find source $s";
96                         } else {
97
98                                 warn "link $s -> $d\n";
99                                 link $s, $d;
100
101                                 my ($l,$f) = ($1,$2) if $s =~ m{users/([^/]+)/blob/(.+)};
102
103 #                               my $origin = $self->file_get({
104 #                                       login => $l,
105 #                                       file  => $f,
106 #                               });
107 #                               $self->new_file($origin);
108                                 warn "INFO: sent file ",dump($l,$f);
109 #                               my $md5 = $self->{md5}->{$s} || die "no md5 for $s";
110 #                               $self->{md5}->{$d} = $md5;
111                         }
112
113
114                 } else {
115                         warn "ERROR: can't SEND To:$to Name:$name Link:$link_to";
116                 }
117         }
118 }
119
120 sub rsync_log {
121         my ( $self, $data ) = @_;
122         if ( $data =~ m/\[(\d+)\] rsync \w+ (\S+) \w+ (\S+)/ ) {
123                 my ( $pid, $module, $login ) = ( $1, $2, $3 );
124                 $login =~ s/\@.+//;
125                 my ( undef, undef, $uid, $gid, undef, undef, $email, $dir, $shell ) =
126                         getpwnam $login;
127
128                 my %md5;
129                 my $db = tie %md5, 'BerkeleyDB::Hash',
130                         -Filename => "$dir/.md5.db",
131                         -Flags => DB_CREATE
132                 ;
133
134                 $self->{pid}->{$pid} = {
135                         login => $login,
136                         uid => $uid,
137                         gid => $gid,
138                         email => $email,
139                         dir => $dir,
140                         shell => $shell,
141                         md5 => \%md5,
142                 };
143
144         } elsif ( $data =~ m/\[(\d+)\] sent \d+ bytes\s+received \d+ bytes/ ) {
145                 my $pid = $1;
146                 untie $self->{pid}->{$pid}->{md5} && warn "md5 hash untied";
147                 warn "removed ", dump
148                 delete $self->{pid}->{$pid};
149         } else {
150                 warn "## rsync_log $data";
151         }
152 }
153
154 sub blob_path {
155         my ( $self, $data ) = @_;
156         my $blob = $self->{pid}->{ $data->{pid} }->{dir} || die "no dir for $data->{pid} in ",dump( $self->{pid} );
157         $blob .= '/' . $data->{file};
158         return $blob;
159 }
160
161
162 sub rsync_transfer {
163         my ( $self,$data ) = @_;
164
165         my $path = $self->blob_path($data);
166
167         if ( $data->{itemize} =~ m/^[c>]([fdL])/ ) { # received change/create
168                 my $type = $1;
169
170                 if ( $type eq 'f' ) {
171                         $self->modify_file( $data );
172                         $self->dedup( $data, $path );
173                 } elsif ( $type eq 'd' ) {
174                         $self->make_dir( $data );
175                 } elsif ( $type eq 'L' ) {
176                         $self->new_link( $data );
177                 } else {
178                         die "unknown type $type ", dump $data;
179                 }
180         } elsif ( $data->{itemize} =~ m/\*deleting/ ) {
181                 $self->remove_file($data);
182         }
183         return $data;
184 }
185
186 sub md5pool {
187         my ( $self, $data ) = @_;
188
189         my $pool = $self->{md5pool} || die "no md5pool in ",dump $self;
190         mkdir $pool unless -e $pool;
191
192         my $md5 = $data->{md5} || die "no md5 in ",dump $data;
193         my $path = $self->blob_path($data);
194
195         if ( -e "$pool/$md5" ) {
196                 warn "dedup hit $md5 $path\n";
197                 my $dedup = $path . '.dedup';
198                 rename $path, $dedup;
199                 link "$pool/$md5", $path;
200                 unlink $dedup;
201                 # FIXME fix perms?
202         } else {
203                 link $path, "$pool/$md5";
204         }
205
206         my $md5hash = $self->{pid}->{ $data->{pid} }->{md5} || die "no md5 for $data->{pid} in ",dump( $self->{pid} );
207         $md5hash->{ $data->{file} } = $md5;
208 }
209
210 my $empty_md5 = " " x 32;
211
212 sub dedup {
213         my ( $self, $data, $path ) = @_;
214
215         if ( $data->{file} =~ /^(.+\/)?md5sum$/ ) {
216                 my $dir = $1;
217                 my $imported = 0;
218                 warn "IMPORT ", $data->{file}, "\n";
219                 open(my $md5sum, '<', $path);
220                 while(<$md5sum>) {
221                         chomp;
222                         my ( $md5, $file ) = split(/\s+/,$_,2);
223                         if ( ! -e "$self->{md5path}/$md5" ) {
224                                 warn "MISSING $md5 $file\n";
225                                 next;
226                         }
227                         my $new = {
228                                 pid => $data->{pid},
229                                 file => "$dir$file",
230                                 md5 => $md5,
231                         };
232                         my $new_path = $self->blob_path($new);
233                         if ( ! -e $new_path ) {
234                                 # create path from md5sum file
235                                 my $only_dir = $1 if $new =~ m{^(.+)/[^/]+$};
236                                 make_path $only_dir unless -d $only_dir;
237                                 $imported += link "$self->{md5path}/$md5", $new_path;
238                                 $self->new_file($new);
239                                 warn "import from $path ",dump($new);
240                                 $self->md5pool( $new );
241                         } else {
242                                 $self->md5pool( $new );
243                         }
244                 }
245                 print "INFO imported $imported files from ",dump($data);
246         }
247
248         if ( $data->{md5} ne $empty_md5 ) {
249                 $self->md5pool( $data );
250         } else {
251                 warn "empty md5", dump $data;
252         }
253 }
254
255 1;