fixed found_in_db to prevent duplicate file insertion when restarting from
[BackupPC.git] / bin / BackupPC_updatedb
1 #!/usr/local/bin/perl -w
2
3 use strict;
4 use lib "__INSTALLDIR__/lib";
5
6 use DBI;
7 use BackupPC::Lib;
8 use BackupPC::View;
9 use Data::Dumper;
10 use Getopt::Std;
11 use Time::HiRes qw/time/;
12 use File::Pid;
13 use POSIX qw/strftime/;
14
15 use constant BPC_FTYPE_DIR => 5;
16
17 my $debug = 0;
18 $|=1;
19
20 my $start_t = time();
21
22 my $pidfile = new File::Pid;
23
24 if (my $pid = $pidfile->running ) {
25         die "$0 already running: $pid\n";
26 } elsif ($pidfile->pid ne $$) {
27         $pidfile->remove;
28         $pidfile = new File::Pid;
29 }
30 $pidfile->write;
31 print STDERR "$0 using pid ",$pidfile->pid," file ",$pidfile->file,"\n";
32
33 my $t_fmt = '%Y-%m-%d %H:%M:%S';
34
35 my $hosts;
36 my $bpc = BackupPC::Lib->new || die;
37 my %Conf = $bpc->Conf();
38 my $TopDir = $bpc->TopDir();
39 my $beenThere = {};
40
41 my $dsn = $Conf{SearchDSN} || die "Need SearchDSN in config.pl\n";
42 my $user = $Conf{SearchUser} || '';
43
44 my $dbh = DBI->connect($dsn, $user, "", { RaiseError => 1, AutoCommit => 0 });
45
46 my %opt;
47
48 if ( !getopts("cdm:v:", \%opt ) ) {
49         print STDERR <<EOF;
50 usage: $0 [-c|-d] [-m num] [-v|-v level]
51
52 Options:
53         -c      create database on first use
54         -d      delete database before import
55         -m num  import just num increments for one host
56         -v num  set verbosity (debug) level (default $debug)
57 EOF
58         exit 1;
59 }
60
61 ###################################create tables############################3
62
63 if ($opt{c}) {
64         sub do_index {
65                 my $index = shift || return;
66                 my ($table,$col,$unique) = split(/_/, $index);
67                 $unique ||= '';
68                 $index =~ s/,/_/g;
69                 $dbh->do(qq{ create $unique index $index on $table($col) });
70         }
71
72         print "creating tables...\n";
73       
74         $dbh->do(qq{
75                 create table hosts (
76                         ID      SERIAL          PRIMARY KEY,
77                         name    VARCHAR(30)     NOT NULL,
78                         IP      VARCHAR(15)
79                 );            
80         });
81               
82         $dbh->do(qq{
83                 create table shares (
84                         ID      SERIAL          PRIMARY KEY,
85                         hostID  INTEGER         NOT NULL references hosts(id),
86                         name    VARCHAR(30)     NOT NULL,
87                         share   VARCHAR(200)    NOT NULL,
88                         localpath VARCHAR(200)      
89                 );            
90         });
91         
92         $dbh->do(qq{
93                 create table backups (
94                         hostID  INTEGER         NOT NULL references hosts(id),
95                         num     INTEGER         NOT NULL,
96                         date    integer         NOT NULL, 
97                         type    CHAR(4)         not null,
98                         shareID integer         not null references shares(id),
99                         size    integer         not null,
100                         PRIMARY KEY(hostID, num, shareID) 
101                 );            
102         });
103
104         #do_index('backups_hostid,num_unique');
105
106         $dbh->do(qq{
107                 create table dvds (
108                         ID      SERIAL          PRIMARY KEY, 
109                         num     INTEGER         NOT NULL,
110                         name    VARCHAR(255)    NOT NULL,
111                         mjesto  VARCHAR(255)
112                 );
113         });
114
115         $dbh->do(qq{     
116                 create table files (
117                         ID      SERIAL          PRIMARY KEY,  
118                         shareID INTEGER         NOT NULL references shares(id),
119                         backupNum  INTEGER      NOT NULL,
120                         name       VARCHAR(255) NOT NULL,
121                         path       VARCHAR(255) NOT NULL,
122                         date       integer      NOT NULL,
123                         type       INTEGER      NOT NULL,
124                         size       INTEGER      NOT NULL,
125                         dvdid      INTEGER      references dvds(id)     
126                 );
127         });
128
129         print "creating indexes:";
130
131         foreach my $index (qw(
132                 hosts_name
133                 backups_hostID
134                 backups_num
135                 shares_hostID
136                 shares_name
137                 files_shareID
138                 files_path
139                 files_name
140                 files_date
141                 files_size
142         )) {
143                 print " $index";
144                 do_index($index);
145         }
146         print "...\n";
147
148         $dbh->commit;
149
150 }
151
152 if ($opt{d}) {
153         print "deleting ";
154         foreach my $table (qw(files dvds backups shares hosts)) {
155                 print "$table ";
156                 $dbh->do(qq{ DELETE FROM $table });
157         }
158         print " done...\n";
159
160         $dbh->commit;
161 }
162
163 if ($opt{v}) {
164         print "Debug level at $opt{v}\n";
165         $debug = $opt{v};
166 }
167
168 #################################INSERT VALUES#############################
169
170 # get hosts
171 $hosts = $bpc->HostInfoRead();
172 my $hostID;
173 my $shareID;
174
175 my $sth;
176
177 $sth->{insert_hosts} = $dbh->prepare(qq{
178 INSERT INTO hosts (name, IP) VALUES (?,?)
179 });
180
181 $sth->{hosts_by_name} = $dbh->prepare(qq{
182 SELECT ID FROM hosts WHERE name=?
183 });
184
185 $sth->{backups_count} = $dbh->prepare(qq{
186 SELECT COUNT(*)
187 FROM backups
188 WHERE hostID=? AND num=? AND shareid=?
189 });
190
191 $sth->{insert_backups} = $dbh->prepare(qq{
192 INSERT INTO backups (hostID, num, date, type, shareid, size)
193 VALUES (?,?,?,?,?,?)
194 });
195
196 $sth->{insert_files} = $dbh->prepare(qq{
197 INSERT INTO files
198         (shareID, backupNum, name, path, date, type, size)
199         VALUES (?,?,?,?,?,?,?)
200 });
201
202 sub fmt_time {
203         my $t = shift || return;
204         my $out = "";
205         my ($ss,$mm,$hh) = gmtime($t);
206         $out .= "${hh}h" if ($hh);
207         $out .= sprintf("%02d:%02d", $mm,$ss);
208         return $out;
209 }
210
211 foreach my $host_key (keys %{$hosts}) {
212
213         my $hostname = $hosts->{$host_key}->{'host'} || die "can't find host for $host_key";
214
215         $sth->{hosts_by_name}->execute($hosts->{$host_key}->{'host'});
216
217         unless (($hostID) = $sth->{hosts_by_name}->fetchrow_array()) {
218                 $sth->{insert_hosts}->execute(
219                         $hosts->{$host_key}->{'host'},
220                         $hosts->{$host_key}->{'ip'}
221                 );
222
223                 $hostID = $dbh->last_insert_id(undef,undef,'hosts',undef);
224         }
225
226         print "host ".$hosts->{$host_key}->{'host'}.": ";
227  
228         # get backups for a host
229         my @backups = $bpc->BackupInfoRead($hostname);
230         my $incs = scalar @backups;
231         print  "$incs increments\n";
232
233         my $inc_nr = 0;
234         $beenThere = {};
235
236         foreach my $backup (@backups) {
237
238                 $inc_nr++;
239                 last if ($opt{m} && $inc_nr > $opt{m});
240
241                 my $backupNum = $backup->{'num'};
242                 my @backupShares = ();
243
244                 printf("%-10s %2d/%-2d #%-2d %s %5s/%5s files (date: %s dur: %s)\n", 
245                         $hosts->{$host_key}->{'host'},
246                         $inc_nr, $incs, $backupNum, 
247                         $backup->{type} || '?',
248                         $backup->{nFilesNew} || '?', $backup->{nFiles} || '?',
249                         strftime($t_fmt,localtime($backup->{startTime})),
250                         fmt_time($backup->{endTime} - $backup->{startTime})
251                 );
252
253                 my $files = BackupPC::View->new($bpc, $hostname, \@backups, 1);
254                 foreach my $share ($files->shareList($backupNum)) {
255
256                         my $t = time();
257
258                         $shareID = getShareID($share, $hostID, $hostname);
259                 
260                         $sth->{backups_count}->execute($hostID, $backupNum, $shareID);
261                         my ($count) = $sth->{backups_count}->fetchrow_array();
262                         # skip if allready in database!
263                         next if ($count > 0);
264
265                         # dump some log
266                         print strftime($t_fmt,localtime())," ", $share;
267
268                         my ($f, $nf, $d, $nd, $size) = recurseDir($bpc, $hostname, $files, $backupNum, $share, "", $shareID);
269
270                         $sth->{insert_backups}->execute(
271                                 $hostID,
272                                 $backupNum,
273                                 $backup->{'endTime'},
274                                 $backup->{'type'},
275                                 $shareID,
276                                 $size,
277                         );
278
279                         print " commit";
280                         $dbh->commit();
281
282                         my $dur = (time() - $t) || 1;
283                         printf(" %d/%d files %d/%d dirs %0.2f MB [%.2f/s dur: %s]\n",
284                                 $nf, $f, $nd, $d,
285                                 ($size / 1024 / 1024),
286                                 ( ($f+$d) / $dur ),
287                                 fmt_time($dur)
288                         );
289                 }
290
291         }
292 }
293 undef $sth;
294 $dbh->commit();
295 $dbh->disconnect();
296
297 print "total duration: ",fmt_time(time() - $start_t),"\n";
298
299 $pidfile->remove;
300
301 sub getShareID() {
302
303         my ($share, $hostID, $hostname) = @_;
304
305         $sth->{share_id} ||= $dbh->prepare(qq{
306                 SELECT ID FROM shares WHERE hostID=? AND name=?
307         });
308
309         $sth->{share_id}->execute($hostID,$share);
310
311         my ($id) = $sth->{share_id}->fetchrow_array();
312
313         return $id if (defined($id));
314
315         $sth->{insert_share} ||= $dbh->prepare(qq{
316                 INSERT INTO shares 
317                         (hostID,name,share,localpath) 
318                 VALUES (?,?,?,?)
319         });
320
321         my $drop_down = $hostname . '/' . $share;
322         $drop_down =~ s#//+#/#g;
323
324         $sth->{insert_share}->execute($hostID,$share, $drop_down ,undef);
325         return $dbh->last_insert_id(undef,undef,'shares',undef);
326 }
327
328 sub found_in_db {
329
330         my @data = @_;
331         shift @data;
332
333         my ($key, $shareID,undef,$name,$path,$date,undef,$size) = @_;
334
335         return $beenThere->{$key} if (defined($beenThere->{$key}));
336
337         $sth->{file_in_db} ||= $dbh->prepare(qq{
338                 SELECT 1 FROM files
339                 WHERE shareID = ? and
340                         path = ? and 
341                         date = ? and
342                         size = ?
343                 LIMIT 1
344         });
345
346         my @param = ($shareID,$path,$date,$size);
347         $sth->{file_in_db}->execute(@param);
348         my $rows = $sth->{file_in_db}->rows;
349         print STDERR "## found_in_db($shareID,$path,$date,$size) ",( $rows ? '+' : '-' ), join(" ",@param), "\n" if ($debug >= 3);
350
351         $beenThere->{$key}++;
352
353         $sth->{'insert_files'}->execute(@data) unless ($rows);
354         return $rows;
355 }
356
357 ####################################################
358 # recursing through filesystem structure and       #
359 # and returning flattened files list               #
360 ####################################################
361 sub recurseDir($$$$$$$$) {
362
363         my ($bpc, $hostname, $files, $backupNum, $share, $dir, $shareID) = @_;
364
365         print STDERR "\nrecurse($hostname,$backupNum,$share,$dir,$shareID)\n" if ($debug >= 1);
366
367         my ($nr_files, $new_files, $nr_dirs, $new_dirs, $size) = (0,0,0,0,0);
368
369         { # scope
370                 my @stack;
371
372                 print STDERR "# dirAttrib($backupNum, $share, $dir)\n" if ($debug >= 2);
373                 my $filesInBackup = $files->dirAttrib($backupNum, $share, $dir);
374
375                 # first, add all the entries in current directory
376                 foreach my $path_key (keys %{$filesInBackup}) {
377                         print STDERR "# file ",Dumper($filesInBackup->{$path_key}),"\n" if ($debug >= 3);
378                         my @data = (
379                                 $shareID,
380                                 $backupNum,
381                                 $path_key,
382                                 $filesInBackup->{$path_key}->{'relPath'},
383                                 $filesInBackup->{$path_key}->{'mtime'},
384                                 $filesInBackup->{$path_key}->{'type'},
385                                 $filesInBackup->{$path_key}->{'size'}
386                         );
387
388                         my $key = join(" ", (
389                                 $shareID,
390                                 $dir,
391                                 $path_key,
392                                 $filesInBackup->{$path_key}->{'mtime'},
393                                 $filesInBackup->{$path_key}->{'size'}
394                         ));
395
396                         my $found;
397                         if (! defined($beenThere->{$key}) && ! ($found = found_in_db($key, @data)) ) {
398                                 print STDERR "# key: $key [", $beenThere->{$key},"]" if ($debug >= 2);
399
400                                 if ($filesInBackup->{$path_key}->{'type'} == BPC_FTYPE_DIR) {
401                                         $new_dirs++ unless ($found);
402                                         print STDERR " dir\n" if ($debug >= 2);
403                                 } else {
404                                         $new_files++ unless ($found);
405                                         print STDERR " file\n" if ($debug >= 2);
406                                 }
407                                 $size += $filesInBackup->{$path_key}->{'size'} || 0;
408                         }
409
410                         if ($filesInBackup->{$path_key}->{'type'} == BPC_FTYPE_DIR) {
411                                 $nr_dirs++;
412
413                                 my $full_path = $dir . '/' . $path_key;
414                                 push @stack, $full_path;
415                                 print STDERR "### store to stack: $full_path\n" if ($debug >= 3);
416
417 #                               my ($f,$nf,$d,$nd) = recurseDir($bpc, $hostname, $backups, $backupNum, $share, $path_key, $shareID) unless ($beenThere->{$key});
418 #
419 #                               $nr_files += $f;
420 #                               $new_files += $nf;
421 #                               $nr_dirs += $d;
422 #                               $new_dirs += $nd;
423
424                         } else {
425                                 $nr_files++;
426                         }
427                 }
428
429                 print STDERR "## STACK ",join(", ", @stack),"\n" if ($debug >= 2);
430
431                 while ( my $dir = shift @stack ) {
432                         my ($f,$nf,$d,$nd, $s) = recurseDir($bpc, $hostname, $files, $backupNum, $share, $dir, $shareID);
433                         print STDERR "# $dir f: $f nf: $nf d: $d nd: $nd\n" if ($debug >= 1);
434                         $nr_files += $f;
435                         $new_files += $nf;
436                         $nr_dirs += $d;
437                         $new_dirs += $nd;
438                         $size += $s;
439                 }
440         }
441
442         return ($nr_files, $new_files, $nr_dirs, $new_dirs, $size);
443 }
444