experimental parser for PgBits
[webpac2] / sql / pgbits.pl
1 #!/usr/bin/perl -w
2
3 use File::Find;
4 use File::Slurp;
5 use Class::DBI::Loader;
6 #use Class::DBI::AbstractSearch;
7
8 my $self = {
9         debug => 0,
10         dsn => 'dbi:Pg:dbname=webpac2',
11         user => 'dpavlin',
12         passwd => '',
13         catalog => {
14                 path => '/rest/references/PgGeneratBits/bits',
15                 title   => 'PostgreSQL General Bits',
16                 uri     => 'http://www.varlena.com/varlena/GeneralBits/archive.php',
17         },
18 };
19
20 my $l = Class::DBI::Loader->new(
21         debug           => $self->{'debug'},
22         dsn             => $self->{'dsn'},
23         user            => $self->{'user'},
24         password        => $self->{'passwd'},
25         namespace       => "WebPAC::Input::PgBits::CDBI",
26 #       additional_classes      => qw/Class::DBI::AbstractSearch/,
27 #       additional_base_classes => qw/My::Stuff/,
28         relationships   => 1,
29 );
30
31 my $this_catalog = $l->find_class('catalog_webarchive')->find_or_create( $self->{catalog} );
32 $this_catalog->dbi_commit;
33
34 sub issue {
35         my $file = shift || die;
36
37         my $html = read_file($file);
38
39         my ($issue_no, $issue_date, $this_entry);
40
41         if ($html =~ m#<!-- ISSUE Number/Date -->.+?(\d+-\w+-\d\d\d\d)\s+Issue:\s+(\d+)#s) {
42                 $issue_no = $2;
43                 $issue_date = $1;
44 print "## issue $issue_no on $issue_date [$file]\n";
45
46                 $this_entry = $l->find_class('entries_pgbits')->find_or_create(
47                         date => $issue_date,
48                         issue => $issue_no,
49                         path => $file,
50                         title => $self->{catalog}->{title} . " :: $issue_no",
51                 );
52                 $this_entry->dbi_commit;
53
54                 $l->find_class('catalog_entry')->find_or_create(
55                         catalog_id => $this_catalog->id,
56                         entry_id => $this_entry->id,
57                         e_type => 'pgbits',
58                 )->dbi_commit;
59         } else {
60                 warn "can't find issue number and date in $file, skipping\n";
61                 return;
62         }
63
64
65         while($html =~ s#^.*?<!-- IKEY="([^"]+)" -->.+?<MYTITLE>\s*([^<]+)\s*</MYTITLE>.+?<ITITLE>\s*([^<]+)\s*</ITITLE>.+?<IDATE>\s*([^<]+)\s*</IDATE>.+?</TABLE>\s*(.+?)\s*<ICONT>\s*(.+?)\s*</ICONT>##si){
66                 my $row = {
67                         title => $1 . ( $2 ? " :: $2" : ""),
68
69                         ikey => $1,
70                         mytitle => $2,
71                         ititle => $3,
72                         date => $4,     # idate
73                         html => $5,
74                         contributors => $6,
75
76                         entry_id => $this_entry->id,
77                         i_type => 'pgbits',
78                 };
79         
80                 print $row->{title}," ", $row->{date},"\n";
81                 $l->find_class('items_pgbits')->find_or_create( $row )->dbi_commit;
82         }
83 }
84
85 find({ wanted => sub {
86         my $path = $File::Find::name;
87         return unless ($path =~ m#\.php$#i && $path !~ m#\d+(?:po|es)\.#);
88         issue($path);
89 },
90 follow => 1 }, '/rest/references/PgGeneralBits/bits/');