added experimental HyperEstraier index creation.
[BackupPC.git] / bin / BackupPC_updatedb
1 #!/usr/local/bin/perl -w
2
3 use strict;
4 use lib "__INSTALLDIR__/lib";
5
6 use DBI;
7 use BackupPC::Lib;
8 use BackupPC::View;
9 use Data::Dumper;
10 use Getopt::Std;
11 use Time::HiRes qw/time/;
12 use File::Pid;
13 use POSIX qw/strftime/;
14
15 use constant BPC_FTYPE_DIR => 5;
16
17 my $debug = 0;
18 $|=1;
19
20 my $start_t = time();
21
22 my $pidfile = new File::Pid;
23
24 if (my $pid = $pidfile->running ) {
25         die "$0 already running: $pid\n";
26 } elsif ($pidfile->pid ne $$) {
27         $pidfile->remove;
28         $pidfile = new File::Pid;
29 }
30 $pidfile->write;
31 print STDERR "$0 using pid ",$pidfile->pid," file ",$pidfile->file,"\n";
32
33 my $t_fmt = '%Y-%m-%d %H:%M:%S';
34
35 my $hosts;
36 my $bpc = BackupPC::Lib->new || die;
37 my %Conf = $bpc->Conf();
38 my $TopDir = $bpc->TopDir();
39 my $beenThere = {};
40
41 my $dsn = $Conf{SearchDSN} || die "Need SearchDSN in config.pl\n";
42 my $user = $Conf{SearchUser} || '';
43
44 my $dbh = DBI->connect($dsn, $user, "", { RaiseError => 1, AutoCommit => 0 });
45
46 my %opt;
47
48 if ( !getopts("cdm:v:i", \%opt ) ) {
49         print STDERR <<EOF;
50 usage: $0 [-c|-d] [-m num] [-v|-v level] [-i]
51
52 Options:
53         -c      create database on first use
54         -d      delete database before import
55         -m num  import just num increments for one host
56         -v num  set verbosity (debug) level (default $debug)
57         -i      update HyperEstraier full text index
58 EOF
59         exit 1;
60 }
61
62 if ($opt{v}) {
63         print "Debug level at $opt{v}\n";
64         $debug = $opt{v};
65 }
66
67 ## update index ##
68 if ($opt{i}) {
69
70         my $index_dir = '/var/tmp/casket';
71
72         print "updating HyperEstraier index $index_dir...";
73
74         use HyperEstraier;
75         my $db = HyperEstraier::Database->new();
76         $db->open($index_dir, $HyperEstraier::Database::DBWRITER | $HyperEstraier::Database::DBCREAT);
77
78         my $sth = $dbh->prepare(qq{
79                 SELECT
80                         files.id                        AS fid,
81                         hosts.name                      AS hname,
82                         shares.name                     AS sname,
83                         shares.share                    AS sharename,
84                         files.backupNum                 AS backupNum,
85                         files.name                      AS filename,
86                         files.path                      AS filepath,
87                         files.date                      AS date,
88                         files.type                      AS filetype,
89                         files.size                      AS size,
90                         files.shareid                   AS shareid
91                 FROM files 
92                         INNER JOIN shares       ON files.shareID=shares.ID
93                         INNER JOIN hosts        ON hosts.ID = shares.hostID
94                         INNER JOIN backups      ON backups.num = files.backupNum and backups.hostID = hosts.ID AND backups.shareID = shares.ID
95         });
96
97         $sth->execute();
98
99         my $dot = int($sth->rows / 15);
100
101         print $sth->rows, " files ($dot/#) ";
102
103         sub fmt_date {
104                 my $t = shift || return;
105                 my $iso = BackupPC::Lib::timeStamp($t);
106                 $iso =~ s/\s/T/;
107                 return $iso;
108         }
109
110         my $i = 0;
111         my $max = int($sth->rows / $dot);
112
113         while (my $row = $sth->fetchrow_hashref()) {
114
115                 # create a document object 
116                 my $doc = HyperEstraier::Document->new;
117
118                 # add attributes to the document object 
119                 $doc->add_attr('@uri', 'file:///' . $row->{'fid'});
120
121                 foreach my $c (qw/fid hname sname sharename backupNum filename filepath shareid/) {
122                         $doc->add_attr($c, $row->{$c}) if ($row->{$c});
123                 }
124
125                 $doc->add_attr('date', fmt_date($row->{'date'}));
126
127                 # add the body text to the document object 
128                 my $path = $row->{'filepath'};
129                 $doc->add_text($path);
130                 $path =~ s/(.)/$1 /g;
131                 $doc->add_hidden_text($path);
132
133                 print STDERR $doc->dump_draft,"\n" if ($debug > 1);
134
135                 # register the document object to the database
136                 $db->put_doc($doc, $HyperEstraier::Database::PDCLEAN);
137
138                 $i++;
139                 if ($i % $dot == 0) {
140                         print "$max ";
141                         $max--;
142                 }
143
144         }
145
146         print "sync";
147         $db->sync();
148         print " close\n";
149         $db->close();
150
151         exit;
152 }
153
154 ###################################create tables############################3
155
156 if ($opt{c}) {
157         sub do_index {
158                 my $index = shift || return;
159                 my ($table,$col,$unique) = split(/_/, $index);
160                 $unique ||= '';
161                 $index =~ s/,/_/g;
162                 $dbh->do(qq{ create $unique index $index on $table($col) });
163         }
164
165         print "creating tables...\n";
166       
167         $dbh->do(qq{
168                 create table hosts (
169                         ID      SERIAL          PRIMARY KEY,
170                         name    VARCHAR(30)     NOT NULL,
171                         IP      VARCHAR(15)
172                 );            
173         });
174               
175         $dbh->do(qq{
176                 create table shares (
177                         ID      SERIAL          PRIMARY KEY,
178                         hostID  INTEGER         NOT NULL references hosts(id),
179                         name    VARCHAR(30)     NOT NULL,
180                         share   VARCHAR(200)    NOT NULL,
181                         localpath VARCHAR(200)      
182                 );            
183         });
184         
185         $dbh->do(qq{
186                 create table backups (
187                         hostID  INTEGER         NOT NULL references hosts(id),
188                         num     INTEGER         NOT NULL,
189                         date    integer         NOT NULL, 
190                         type    CHAR(4)         not null,
191                         shareID integer         not null references shares(id),
192                         size    integer         not null,
193                         PRIMARY KEY(hostID, num, shareID) 
194                 );            
195         });
196
197         #do_index('backups_hostid,num_unique');
198
199         $dbh->do(qq{
200                 create table dvds (
201                         ID      SERIAL          PRIMARY KEY, 
202                         num     INTEGER         NOT NULL,
203                         name    VARCHAR(255)    NOT NULL,
204                         mjesto  VARCHAR(255)
205                 );
206         });
207
208         $dbh->do(qq{     
209                 create table files (
210                         ID      SERIAL          PRIMARY KEY,  
211                         shareID INTEGER         NOT NULL references shares(id),
212                         backupNum  INTEGER      NOT NULL,
213                         name       VARCHAR(255) NOT NULL,
214                         path       VARCHAR(255) NOT NULL,
215                         date       integer      NOT NULL,
216                         type       INTEGER      NOT NULL,
217                         size       INTEGER      NOT NULL,
218                         dvdid      INTEGER      references dvds(id)     
219                 );
220         });
221
222         print "creating indexes:";
223
224         foreach my $index (qw(
225                 hosts_name
226                 backups_hostID
227                 backups_num
228                 shares_hostID
229                 shares_name
230                 files_shareID
231                 files_path
232                 files_name
233                 files_date
234                 files_size
235         )) {
236                 print " $index";
237                 do_index($index);
238         }
239         print "...\n";
240
241         $dbh->commit;
242
243 }
244
245 if ($opt{d}) {
246         print "deleting ";
247         foreach my $table (qw(files dvds backups shares hosts)) {
248                 print "$table ";
249                 $dbh->do(qq{ DELETE FROM $table });
250         }
251         print " done...\n";
252
253         $dbh->commit;
254 }
255
256 #################################INSERT VALUES#############################
257
258 # get hosts
259 $hosts = $bpc->HostInfoRead();
260 my $hostID;
261 my $shareID;
262
263 my $sth;
264
265 $sth->{insert_hosts} = $dbh->prepare(qq{
266 INSERT INTO hosts (name, IP) VALUES (?,?)
267 });
268
269 $sth->{hosts_by_name} = $dbh->prepare(qq{
270 SELECT ID FROM hosts WHERE name=?
271 });
272
273 $sth->{backups_count} = $dbh->prepare(qq{
274 SELECT COUNT(*)
275 FROM backups
276 WHERE hostID=? AND num=? AND shareid=?
277 });
278
279 $sth->{insert_backups} = $dbh->prepare(qq{
280 INSERT INTO backups (hostID, num, date, type, shareid, size)
281 VALUES (?,?,?,?,?,?)
282 });
283
284 $sth->{insert_files} = $dbh->prepare(qq{
285 INSERT INTO files
286         (shareID, backupNum, name, path, date, type, size)
287         VALUES (?,?,?,?,?,?,?)
288 });
289
290 sub fmt_time {
291         my $t = shift || return;
292         my $out = "";
293         my ($ss,$mm,$hh) = gmtime($t);
294         $out .= "${hh}h" if ($hh);
295         $out .= sprintf("%02d:%02d", $mm,$ss);
296         return $out;
297 }
298
299 foreach my $host_key (keys %{$hosts}) {
300
301         my $hostname = $hosts->{$host_key}->{'host'} || die "can't find host for $host_key";
302
303         $sth->{hosts_by_name}->execute($hosts->{$host_key}->{'host'});
304
305         unless (($hostID) = $sth->{hosts_by_name}->fetchrow_array()) {
306                 $sth->{insert_hosts}->execute(
307                         $hosts->{$host_key}->{'host'},
308                         $hosts->{$host_key}->{'ip'}
309                 );
310
311                 $hostID = $dbh->last_insert_id(undef,undef,'hosts',undef);
312         }
313
314         print "host ".$hosts->{$host_key}->{'host'}.": ";
315  
316         # get backups for a host
317         my @backups = $bpc->BackupInfoRead($hostname);
318         my $incs = scalar @backups;
319         print  "$incs increments\n";
320
321         my $inc_nr = 0;
322         $beenThere = {};
323
324         foreach my $backup (@backups) {
325
326                 $inc_nr++;
327                 last if ($opt{m} && $inc_nr > $opt{m});
328
329                 my $backupNum = $backup->{'num'};
330                 my @backupShares = ();
331
332                 printf("%-10s %2d/%-2d #%-2d %s %5s/%5s files (date: %s dur: %s)\n", 
333                         $hosts->{$host_key}->{'host'},
334                         $inc_nr, $incs, $backupNum, 
335                         $backup->{type} || '?',
336                         $backup->{nFilesNew} || '?', $backup->{nFiles} || '?',
337                         strftime($t_fmt,localtime($backup->{startTime})),
338                         fmt_time($backup->{endTime} - $backup->{startTime})
339                 );
340
341                 my $files = BackupPC::View->new($bpc, $hostname, \@backups, 1);
342                 foreach my $share ($files->shareList($backupNum)) {
343
344                         my $t = time();
345
346                         $shareID = getShareID($share, $hostID, $hostname);
347                 
348                         $sth->{backups_count}->execute($hostID, $backupNum, $shareID);
349                         my ($count) = $sth->{backups_count}->fetchrow_array();
350                         # skip if allready in database!
351                         next if ($count > 0);
352
353                         # dump some log
354                         print strftime($t_fmt,localtime())," ", $share;
355
356                         my ($f, $nf, $d, $nd, $size) = recurseDir($bpc, $hostname, $files, $backupNum, $share, "", $shareID);
357
358                         $sth->{insert_backups}->execute(
359                                 $hostID,
360                                 $backupNum,
361                                 $backup->{'endTime'},
362                                 $backup->{'type'},
363                                 $shareID,
364                                 $size,
365                         );
366
367                         print " commit";
368                         $dbh->commit();
369
370                         my $dur = (time() - $t) || 1;
371                         printf(" %d/%d files %d/%d dirs %0.2f MB [%.2f/s dur: %s]\n",
372                                 $nf, $f, $nd, $d,
373                                 ($size / 1024 / 1024),
374                                 ( ($f+$d) / $dur ),
375                                 fmt_time($dur)
376                         );
377                 }
378
379         }
380 }
381 undef $sth;
382 $dbh->commit();
383 $dbh->disconnect();
384
385 print "total duration: ",fmt_time(time() - $start_t),"\n";
386
387 $pidfile->remove;
388
389 sub getShareID() {
390
391         my ($share, $hostID, $hostname) = @_;
392
393         $sth->{share_id} ||= $dbh->prepare(qq{
394                 SELECT ID FROM shares WHERE hostID=? AND name=?
395         });
396
397         $sth->{share_id}->execute($hostID,$share);
398
399         my ($id) = $sth->{share_id}->fetchrow_array();
400
401         return $id if (defined($id));
402
403         $sth->{insert_share} ||= $dbh->prepare(qq{
404                 INSERT INTO shares 
405                         (hostID,name,share,localpath) 
406                 VALUES (?,?,?,?)
407         });
408
409         my $drop_down = $hostname . '/' . $share;
410         $drop_down =~ s#//+#/#g;
411
412         $sth->{insert_share}->execute($hostID,$share, $drop_down ,undef);
413         return $dbh->last_insert_id(undef,undef,'shares',undef);
414 }
415
416 sub found_in_db {
417
418         my @data = @_;
419         shift @data;
420
421         my ($key, $shareID,undef,$name,$path,$date,undef,$size) = @_;
422
423         return $beenThere->{$key} if (defined($beenThere->{$key}));
424
425         $sth->{file_in_db} ||= $dbh->prepare(qq{
426                 SELECT 1 FROM files
427                 WHERE shareID = ? and
428                         path = ? and 
429                         date = ? and
430                         size = ?
431                 LIMIT 1
432         });
433
434         my @param = ($shareID,$path,$date,$size);
435         $sth->{file_in_db}->execute(@param);
436         my $rows = $sth->{file_in_db}->rows;
437         print STDERR "## found_in_db($shareID,$path,$date,$size) ",( $rows ? '+' : '-' ), join(" ",@param), "\n" if ($debug >= 3);
438
439         $beenThere->{$key}++;
440
441         $sth->{'insert_files'}->execute(@data) unless ($rows);
442         return $rows;
443 }
444
445 ####################################################
446 # recursing through filesystem structure and       #
447 # and returning flattened files list               #
448 ####################################################
449 sub recurseDir($$$$$$$$) {
450
451         my ($bpc, $hostname, $files, $backupNum, $share, $dir, $shareID) = @_;
452
453         print STDERR "\nrecurse($hostname,$backupNum,$share,$dir,$shareID)\n" if ($debug >= 1);
454
455         my ($nr_files, $new_files, $nr_dirs, $new_dirs, $size) = (0,0,0,0,0);
456
457         { # scope
458                 my @stack;
459
460                 print STDERR "# dirAttrib($backupNum, $share, $dir)\n" if ($debug >= 2);
461                 my $filesInBackup = $files->dirAttrib($backupNum, $share, $dir);
462
463                 # first, add all the entries in current directory
464                 foreach my $path_key (keys %{$filesInBackup}) {
465                         print STDERR "# file ",Dumper($filesInBackup->{$path_key}),"\n" if ($debug >= 3);
466                         my @data = (
467                                 $shareID,
468                                 $backupNum,
469                                 $path_key,
470                                 $filesInBackup->{$path_key}->{'relPath'},
471                                 $filesInBackup->{$path_key}->{'mtime'},
472                                 $filesInBackup->{$path_key}->{'type'},
473                                 $filesInBackup->{$path_key}->{'size'}
474                         );
475
476                         my $key = join(" ", (
477                                 $shareID,
478                                 $dir,
479                                 $path_key,
480                                 $filesInBackup->{$path_key}->{'mtime'},
481                                 $filesInBackup->{$path_key}->{'size'}
482                         ));
483
484                         my $found;
485                         if (! defined($beenThere->{$key}) && ! ($found = found_in_db($key, @data)) ) {
486                                 print STDERR "# key: $key [", $beenThere->{$key},"]" if ($debug >= 2);
487
488                                 if ($filesInBackup->{$path_key}->{'type'} == BPC_FTYPE_DIR) {
489                                         $new_dirs++ unless ($found);
490                                         print STDERR " dir\n" if ($debug >= 2);
491                                 } else {
492                                         $new_files++ unless ($found);
493                                         print STDERR " file\n" if ($debug >= 2);
494                                 }
495                                 $size += $filesInBackup->{$path_key}->{'size'} || 0;
496                         }
497
498                         if ($filesInBackup->{$path_key}->{'type'} == BPC_FTYPE_DIR) {
499                                 $nr_dirs++;
500
501                                 my $full_path = $dir . '/' . $path_key;
502                                 push @stack, $full_path;
503                                 print STDERR "### store to stack: $full_path\n" if ($debug >= 3);
504
505 #                               my ($f,$nf,$d,$nd) = recurseDir($bpc, $hostname, $backups, $backupNum, $share, $path_key, $shareID) unless ($beenThere->{$key});
506 #
507 #                               $nr_files += $f;
508 #                               $new_files += $nf;
509 #                               $nr_dirs += $d;
510 #                               $new_dirs += $nd;
511
512                         } else {
513                                 $nr_files++;
514                         }
515                 }
516
517                 print STDERR "## STACK ",join(", ", @stack),"\n" if ($debug >= 2);
518
519                 while ( my $dir = shift @stack ) {
520                         my ($f,$nf,$d,$nd, $s) = recurseDir($bpc, $hostname, $files, $backupNum, $share, $dir, $shareID);
521                         print STDERR "# $dir f: $f nf: $nf d: $d nd: $nd\n" if ($debug >= 1);
522                         $nr_files += $f;
523                         $new_files += $nf;
524                         $nr_dirs += $d;
525                         $new_dirs += $nd;
526                         $size += $s;
527                 }
528         }
529
530         return ($nr_files, $new_files, $nr_dirs, $new_dirs, $size);
531 }
532