added HyperEstraierIndex configuration option
[BackupPC.git] / bin / BackupPC_updatedb
1 #!/usr/local/bin/perl -w
2
3 use strict;
4 use lib "__INSTALLDIR__/lib";
5
6 use DBI;
7 use BackupPC::Lib;
8 use BackupPC::View;
9 use Data::Dumper;
10 use Getopt::Std;
11 use Time::HiRes qw/time/;
12 use File::Pid;
13 use POSIX qw/strftime/;
14
15 use constant BPC_FTYPE_DIR => 5;
16
17 my $debug = 0;
18 $|=1;
19
20 my $start_t = time();
21
22 my $pidfile = new File::Pid;
23
24 if (my $pid = $pidfile->running ) {
25         die "$0 already running: $pid\n";
26 } elsif ($pidfile->pid ne $$) {
27         $pidfile->remove;
28         $pidfile = new File::Pid;
29 }
30 $pidfile->write;
31 print STDERR "$0 using pid ",$pidfile->pid," file ",$pidfile->file,"\n";
32
33 my $t_fmt = '%Y-%m-%d %H:%M:%S';
34
35 my $hosts;
36 my $bpc = BackupPC::Lib->new || die;
37 my %Conf = $bpc->Conf();
38 my $TopDir = $bpc->TopDir();
39 my $beenThere = {};
40
41 my $dsn = $Conf{SearchDSN} || die "Need SearchDSN in config.pl\n";
42 my $user = $Conf{SearchUser} || '';
43 my $index_path = $Conf{HyperEstraierIndex};
44
45 my $dbh = DBI->connect($dsn, $user, "", { RaiseError => 1, AutoCommit => 0 });
46
47 my %opt;
48
49 if ( !getopts("cdm:v:i", \%opt ) ) {
50         print STDERR <<EOF;
51 usage: $0 [-c|-d] [-m num] [-v|-v level] [-i]
52
53 Options:
54         -c      create database on first use
55         -d      delete database before import
56         -m num  import just num increments for one host
57         -v num  set verbosity (debug) level (default $debug)
58         -i      update HyperEstraier full text index
59 EOF
60         exit 1;
61 }
62
63 if ($opt{v}) {
64         print "Debug level at $opt{v}\n";
65         $debug = $opt{v};
66 }
67
68 ## update index ##
69 if ($opt{i}) {
70
71         print "updating HyperEstraier files ";
72         
73         my $sth = $dbh->prepare(qq{
74                 SELECT
75                         files.id                        AS fid,
76                         hosts.name                      AS hname,
77                         shares.name                     AS sname,
78                         shares.share                    AS sharename,
79                         files.backupNum                 AS backupNum,
80                         files.name                      AS filename,
81                         files.path                      AS filepath,
82                         files.date                      AS date,
83                         files.type                      AS filetype,
84                         files.size                      AS size,
85                         files.shareid                   AS shareid
86                 FROM files 
87                         INNER JOIN shares       ON files.shareID=shares.ID
88                         INNER JOIN hosts        ON hosts.ID = shares.hostID
89                         INNER JOIN backups      ON backups.num = files.backupNum and backups.hostID = hosts.ID AND backups.shareID = shares.ID
90         });
91
92         $sth->execute();
93
94         my $dot = int($sth->rows / 15);
95
96         print $sth->rows, " files ($dot/#) ";
97
98         sub fmt_date {
99                 my $t = shift || return;
100                 my $iso = BackupPC::Lib::timeStamp($t);
101                 $iso =~ s/\s/T/;
102                 return $iso;
103         }
104
105         my $i = 0;
106         my $max = int($sth->rows / $dot);
107
108         $index_path = $TopDir . '/' . $index_path;
109         $index_path =~ s#//#/#g;
110
111         print "index $index_path...";
112         use HyperEstraier;
113         my $db = HyperEstraier::Database->new();
114         $db->open($index_path, $HyperEstraier::Database::DBWRITER | $HyperEstraier::Database::DBCREAT);
115
116
117         while (my $row = $sth->fetchrow_hashref()) {
118
119                 # create a document object 
120                 my $doc = HyperEstraier::Document->new;
121
122                 # add attributes to the document object 
123                 $doc->add_attr('@uri', 'file:///' . $row->{'fid'});
124
125                 foreach my $c (qw/fid hname sname sharename backupNum filename filepath shareid/) {
126                         $doc->add_attr($c, $row->{$c}) if ($row->{$c});
127                 }
128
129                 $doc->add_attr('date', fmt_date($row->{'date'}));
130
131                 # add the body text to the document object 
132                 my $path = $row->{'filepath'};
133                 $doc->add_text($path);
134                 $path =~ s/(.)/$1 /g;
135                 $doc->add_hidden_text($path);
136
137                 print STDERR $doc->dump_draft,"\n" if ($debug > 1);
138
139                 # register the document object to the database
140                 $db->put_doc($doc, $HyperEstraier::Database::PDCLEAN);
141
142                 $i++;
143                 if ($i % $dot == 0) {
144                         print "$max ";
145                         $max--;
146                 }
147
148         }
149
150         print "sync";
151         $db->sync();
152         print " close\n";
153         $db->close();
154
155         exit;
156 }
157
158 ###################################create tables############################3
159
160 if ($opt{c}) {
161         sub do_index {
162                 my $index = shift || return;
163                 my ($table,$col,$unique) = split(/_/, $index);
164                 $unique ||= '';
165                 $index =~ s/,/_/g;
166                 $dbh->do(qq{ create $unique index $index on $table($col) });
167         }
168
169         print "creating tables...\n";
170       
171         $dbh->do(qq{
172                 create table hosts (
173                         ID      SERIAL          PRIMARY KEY,
174                         name    VARCHAR(30)     NOT NULL,
175                         IP      VARCHAR(15)
176                 );            
177         });
178               
179         $dbh->do(qq{
180                 create table shares (
181                         ID      SERIAL          PRIMARY KEY,
182                         hostID  INTEGER         NOT NULL references hosts(id),
183                         name    VARCHAR(30)     NOT NULL,
184                         share   VARCHAR(200)    NOT NULL,
185                         localpath VARCHAR(200)      
186                 );            
187         });
188         
189         $dbh->do(qq{
190                 create table backups (
191                         hostID  INTEGER         NOT NULL references hosts(id),
192                         num     INTEGER         NOT NULL,
193                         date    integer         NOT NULL, 
194                         type    CHAR(4)         not null,
195                         shareID integer         not null references shares(id),
196                         size    integer         not null,
197                         PRIMARY KEY(hostID, num, shareID) 
198                 );            
199         });
200
201         #do_index('backups_hostid,num_unique');
202
203         $dbh->do(qq{
204                 create table dvds (
205                         ID      SERIAL          PRIMARY KEY, 
206                         num     INTEGER         NOT NULL,
207                         name    VARCHAR(255)    NOT NULL,
208                         mjesto  VARCHAR(255)
209                 );
210         });
211
212         $dbh->do(qq{     
213                 create table files (
214                         ID      SERIAL          PRIMARY KEY,  
215                         shareID INTEGER         NOT NULL references shares(id),
216                         backupNum  INTEGER      NOT NULL,
217                         name       VARCHAR(255) NOT NULL,
218                         path       VARCHAR(255) NOT NULL,
219                         date       integer      NOT NULL,
220                         type       INTEGER      NOT NULL,
221                         size       INTEGER      NOT NULL,
222                         dvdid      INTEGER      references dvds(id)     
223                 );
224         });
225
226         print "creating indexes:";
227
228         foreach my $index (qw(
229                 hosts_name
230                 backups_hostID
231                 backups_num
232                 shares_hostID
233                 shares_name
234                 files_shareID
235                 files_path
236                 files_name
237                 files_date
238                 files_size
239         )) {
240                 print " $index";
241                 do_index($index);
242         }
243         print "...\n";
244
245         $dbh->commit;
246
247 }
248
249 if ($opt{d}) {
250         print "deleting ";
251         foreach my $table (qw(files dvds backups shares hosts)) {
252                 print "$table ";
253                 $dbh->do(qq{ DELETE FROM $table });
254         }
255         print " done...\n";
256
257         $dbh->commit;
258 }
259
260 #################################INSERT VALUES#############################
261
262 # get hosts
263 $hosts = $bpc->HostInfoRead();
264 my $hostID;
265 my $shareID;
266
267 my $sth;
268
269 $sth->{insert_hosts} = $dbh->prepare(qq{
270 INSERT INTO hosts (name, IP) VALUES (?,?)
271 });
272
273 $sth->{hosts_by_name} = $dbh->prepare(qq{
274 SELECT ID FROM hosts WHERE name=?
275 });
276
277 $sth->{backups_count} = $dbh->prepare(qq{
278 SELECT COUNT(*)
279 FROM backups
280 WHERE hostID=? AND num=? AND shareid=?
281 });
282
283 $sth->{insert_backups} = $dbh->prepare(qq{
284 INSERT INTO backups (hostID, num, date, type, shareid, size)
285 VALUES (?,?,?,?,?,?)
286 });
287
288 $sth->{insert_files} = $dbh->prepare(qq{
289 INSERT INTO files
290         (shareID, backupNum, name, path, date, type, size)
291         VALUES (?,?,?,?,?,?,?)
292 });
293
294 sub fmt_time {
295         my $t = shift || return;
296         my $out = "";
297         my ($ss,$mm,$hh) = gmtime($t);
298         $out .= "${hh}h" if ($hh);
299         $out .= sprintf("%02d:%02d", $mm,$ss);
300         return $out;
301 }
302
303 foreach my $host_key (keys %{$hosts}) {
304
305         my $hostname = $hosts->{$host_key}->{'host'} || die "can't find host for $host_key";
306
307         $sth->{hosts_by_name}->execute($hosts->{$host_key}->{'host'});
308
309         unless (($hostID) = $sth->{hosts_by_name}->fetchrow_array()) {
310                 $sth->{insert_hosts}->execute(
311                         $hosts->{$host_key}->{'host'},
312                         $hosts->{$host_key}->{'ip'}
313                 );
314
315                 $hostID = $dbh->last_insert_id(undef,undef,'hosts',undef);
316         }
317
318         print "host ".$hosts->{$host_key}->{'host'}.": ";
319  
320         # get backups for a host
321         my @backups = $bpc->BackupInfoRead($hostname);
322         my $incs = scalar @backups;
323         print  "$incs increments\n";
324
325         my $inc_nr = 0;
326         $beenThere = {};
327
328         foreach my $backup (@backups) {
329
330                 $inc_nr++;
331                 last if ($opt{m} && $inc_nr > $opt{m});
332
333                 my $backupNum = $backup->{'num'};
334                 my @backupShares = ();
335
336                 printf("%-10s %2d/%-2d #%-2d %s %5s/%5s files (date: %s dur: %s)\n", 
337                         $hosts->{$host_key}->{'host'},
338                         $inc_nr, $incs, $backupNum, 
339                         $backup->{type} || '?',
340                         $backup->{nFilesNew} || '?', $backup->{nFiles} || '?',
341                         strftime($t_fmt,localtime($backup->{startTime})),
342                         fmt_time($backup->{endTime} - $backup->{startTime})
343                 );
344
345                 my $files = BackupPC::View->new($bpc, $hostname, \@backups, 1);
346                 foreach my $share ($files->shareList($backupNum)) {
347
348                         my $t = time();
349
350                         $shareID = getShareID($share, $hostID, $hostname);
351                 
352                         $sth->{backups_count}->execute($hostID, $backupNum, $shareID);
353                         my ($count) = $sth->{backups_count}->fetchrow_array();
354                         # skip if allready in database!
355                         next if ($count > 0);
356
357                         # dump some log
358                         print strftime($t_fmt,localtime())," ", $share;
359
360                         my ($f, $nf, $d, $nd, $size) = recurseDir($bpc, $hostname, $files, $backupNum, $share, "", $shareID);
361
362                         $sth->{insert_backups}->execute(
363                                 $hostID,
364                                 $backupNum,
365                                 $backup->{'endTime'},
366                                 $backup->{'type'},
367                                 $shareID,
368                                 $size,
369                         );
370
371                         print " commit";
372                         $dbh->commit();
373
374                         my $dur = (time() - $t) || 1;
375                         printf(" %d/%d files %d/%d dirs %0.2f MB [%.2f/s dur: %s]\n",
376                                 $nf, $f, $nd, $d,
377                                 ($size / 1024 / 1024),
378                                 ( ($f+$d) / $dur ),
379                                 fmt_time($dur)
380                         );
381                 }
382
383         }
384 }
385 undef $sth;
386 $dbh->commit();
387 $dbh->disconnect();
388
389 print "total duration: ",fmt_time(time() - $start_t),"\n";
390
391 $pidfile->remove;
392
393 sub getShareID() {
394
395         my ($share, $hostID, $hostname) = @_;
396
397         $sth->{share_id} ||= $dbh->prepare(qq{
398                 SELECT ID FROM shares WHERE hostID=? AND name=?
399         });
400
401         $sth->{share_id}->execute($hostID,$share);
402
403         my ($id) = $sth->{share_id}->fetchrow_array();
404
405         return $id if (defined($id));
406
407         $sth->{insert_share} ||= $dbh->prepare(qq{
408                 INSERT INTO shares 
409                         (hostID,name,share,localpath) 
410                 VALUES (?,?,?,?)
411         });
412
413         my $drop_down = $hostname . '/' . $share;
414         $drop_down =~ s#//+#/#g;
415
416         $sth->{insert_share}->execute($hostID,$share, $drop_down ,undef);
417         return $dbh->last_insert_id(undef,undef,'shares',undef);
418 }
419
420 sub found_in_db {
421
422         my @data = @_;
423         shift @data;
424
425         my ($key, $shareID,undef,$name,$path,$date,undef,$size) = @_;
426
427         return $beenThere->{$key} if (defined($beenThere->{$key}));
428
429         $sth->{file_in_db} ||= $dbh->prepare(qq{
430                 SELECT 1 FROM files
431                 WHERE shareID = ? and
432                         path = ? and 
433                         date = ? and
434                         size = ?
435                 LIMIT 1
436         });
437
438         my @param = ($shareID,$path,$date,$size);
439         $sth->{file_in_db}->execute(@param);
440         my $rows = $sth->{file_in_db}->rows;
441         print STDERR "## found_in_db($shareID,$path,$date,$size) ",( $rows ? '+' : '-' ), join(" ",@param), "\n" if ($debug >= 3);
442
443         $beenThere->{$key}++;
444
445         $sth->{'insert_files'}->execute(@data) unless ($rows);
446         return $rows;
447 }
448
449 ####################################################
450 # recursing through filesystem structure and       #
451 # and returning flattened files list               #
452 ####################################################
453 sub recurseDir($$$$$$$$) {
454
455         my ($bpc, $hostname, $files, $backupNum, $share, $dir, $shareID) = @_;
456
457         print STDERR "\nrecurse($hostname,$backupNum,$share,$dir,$shareID)\n" if ($debug >= 1);
458
459         my ($nr_files, $new_files, $nr_dirs, $new_dirs, $size) = (0,0,0,0,0);
460
461         { # scope
462                 my @stack;
463
464                 print STDERR "# dirAttrib($backupNum, $share, $dir)\n" if ($debug >= 2);
465                 my $filesInBackup = $files->dirAttrib($backupNum, $share, $dir);
466
467                 # first, add all the entries in current directory
468                 foreach my $path_key (keys %{$filesInBackup}) {
469                         print STDERR "# file ",Dumper($filesInBackup->{$path_key}),"\n" if ($debug >= 3);
470                         my @data = (
471                                 $shareID,
472                                 $backupNum,
473                                 $path_key,
474                                 $filesInBackup->{$path_key}->{'relPath'},
475                                 $filesInBackup->{$path_key}->{'mtime'},
476                                 $filesInBackup->{$path_key}->{'type'},
477                                 $filesInBackup->{$path_key}->{'size'}
478                         );
479
480                         my $key = join(" ", (
481                                 $shareID,
482                                 $dir,
483                                 $path_key,
484                                 $filesInBackup->{$path_key}->{'mtime'},
485                                 $filesInBackup->{$path_key}->{'size'}
486                         ));
487
488                         my $found;
489                         if (! defined($beenThere->{$key}) && ! ($found = found_in_db($key, @data)) ) {
490                                 print STDERR "# key: $key [", $beenThere->{$key},"]" if ($debug >= 2);
491
492                                 if ($filesInBackup->{$path_key}->{'type'} == BPC_FTYPE_DIR) {
493                                         $new_dirs++ unless ($found);
494                                         print STDERR " dir\n" if ($debug >= 2);
495                                 } else {
496                                         $new_files++ unless ($found);
497                                         print STDERR " file\n" if ($debug >= 2);
498                                 }
499                                 $size += $filesInBackup->{$path_key}->{'size'} || 0;
500                         }
501
502                         if ($filesInBackup->{$path_key}->{'type'} == BPC_FTYPE_DIR) {
503                                 $nr_dirs++;
504
505                                 my $full_path = $dir . '/' . $path_key;
506                                 push @stack, $full_path;
507                                 print STDERR "### store to stack: $full_path\n" if ($debug >= 3);
508
509 #                               my ($f,$nf,$d,$nd) = recurseDir($bpc, $hostname, $backups, $backupNum, $share, $path_key, $shareID) unless ($beenThere->{$key});
510 #
511 #                               $nr_files += $f;
512 #                               $new_files += $nf;
513 #                               $nr_dirs += $d;
514 #                               $new_dirs += $nd;
515
516                         } else {
517                                 $nr_files++;
518                         }
519                 }
520
521                 print STDERR "## STACK ",join(", ", @stack),"\n" if ($debug >= 2);
522
523                 while ( my $dir = shift @stack ) {
524                         my ($f,$nf,$d,$nd, $s) = recurseDir($bpc, $hostname, $files, $backupNum, $share, $dir, $shareID);
525                         print STDERR "# $dir f: $f nf: $nf d: $d nd: $nd\n" if ($debug >= 1);
526                         $nr_files += $f;
527                         $new_files += $nf;
528                         $nr_dirs += $d;
529                         $new_dirs += $nd;
530                         $size += $s;
531                 }
532         }
533
534         return ($nr_files, $new_files, $nr_dirs, $new_dirs, $size);
535 }
536