experimental parser for PgBits
[webpac2] / sql / pgbits.pl
diff --git a/sql/pgbits.pl b/sql/pgbits.pl
new file mode 100755 (executable)
index 0000000..af9431b
--- /dev/null
@@ -0,0 +1,90 @@
+#!/usr/bin/perl -w
+
+use File::Find;
+use File::Slurp;
+use Class::DBI::Loader;
+#use Class::DBI::AbstractSearch;
+
+my $self = {
+       debug => 0,
+       dsn => 'dbi:Pg:dbname=webpac2',
+       user => 'dpavlin',
+       passwd => '',
+       catalog => {
+               path => '/rest/references/PgGeneratBits/bits',
+               title   => 'PostgreSQL General Bits',
+               uri     => 'http://www.varlena.com/varlena/GeneralBits/archive.php',
+       },
+};
+
+my $l = Class::DBI::Loader->new(
+       debug           => $self->{'debug'},
+       dsn             => $self->{'dsn'},
+       user            => $self->{'user'},
+       password        => $self->{'passwd'},
+       namespace       => "WebPAC::Input::PgBits::CDBI",
+#      additional_classes      => qw/Class::DBI::AbstractSearch/,
+#      additional_base_classes => qw/My::Stuff/,
+       relationships   => 1,
+);
+
+my $this_catalog = $l->find_class('catalog_webarchive')->find_or_create( $self->{catalog} );
+$this_catalog->dbi_commit;
+
+sub issue {
+       my $file = shift || die;
+
+       my $html = read_file($file);
+
+       my ($issue_no, $issue_date, $this_entry);
+
+       if ($html =~ m#<!-- ISSUE Number/Date -->.+?(\d+-\w+-\d\d\d\d)\s+Issue:\s+(\d+)#s) {
+               $issue_no = $2;
+               $issue_date = $1;
+print "## issue $issue_no on $issue_date [$file]\n";
+
+               $this_entry = $l->find_class('entries_pgbits')->find_or_create(
+                       date => $issue_date,
+                       issue => $issue_no,
+                       path => $file,
+                       title => $self->{catalog}->{title} . " :: $issue_no",
+               );
+               $this_entry->dbi_commit;
+
+               $l->find_class('catalog_entry')->find_or_create(
+                       catalog_id => $this_catalog->id,
+                       entry_id => $this_entry->id,
+                       e_type => 'pgbits',
+               )->dbi_commit;
+       } else {
+               warn "can't find issue number and date in $file, skipping\n";
+               return;
+       }
+
+
+       while($html =~ s#^.*?<!-- IKEY="([^"]+)" -->.+?<MYTITLE>\s*([^<]+)\s*</MYTITLE>.+?<ITITLE>\s*([^<]+)\s*</ITITLE>.+?<IDATE>\s*([^<]+)\s*</IDATE>.+?</TABLE>\s*(.+?)\s*<ICONT>\s*(.+?)\s*</ICONT>##si){
+               my $row = {
+                       title => $1 . ( $2 ? " :: $2" : ""),
+
+                       ikey => $1,
+                       mytitle => $2,
+                       ititle => $3,
+                       date => $4,     # idate
+                       html => $5,
+                       contributors => $6,
+
+                       entry_id => $this_entry->id,
+                       i_type => 'pgbits',
+               };
+       
+               print $row->{title}," ", $row->{date},"\n";
+               $l->find_class('items_pgbits')->find_or_create( $row )->dbi_commit;
+       }
+}
+
+find({ wanted => sub {
+       my $path = $File::Find::name;
+       return unless ($path =~ m#\.php$#i && $path !~ m#\d+(?:po|es)\.#);
+       issue($path);
+},
+follow => 1 }, '/rest/references/PgGeneralBits/bits/');