added header_first to WebPAC::Input::CSV
[webpac2] / sql / pgbits.pl
1 #!/usr/bin/perl -w
2
3 use File::Find;
4 use File::Slurp;
5 use Class::DBI::Loader;
6 #use Class::DBI::AbstractSearch;
7
8 my $self = {
9         debug => 0,
10         dsn => 'dbi:Pg:dbname=webpac2',
11         user => 'dpavlin',
12         passwd => '',
13         catalog => {
14                 name    => 'PostgreSQL General Bits',
15                 path    => '/rest/references/PgGeneratBits/bits',
16                 uri     => 'http://www.varlena.com/varlena/GeneralBits/archive.php',
17                 type    => 'pgbits',
18         },
19 };
20
21 my $l = Class::DBI::Loader->new(
22         debug           => $self->{'debug'},
23         dsn             => $self->{'dsn'},
24         user            => $self->{'user'},
25         password        => $self->{'passwd'},
26         namespace       => "WebPAC::Input::PgBits::CDBI",
27 #       additional_classes      => qw/Class::DBI::AbstractSearch/,
28 #       additional_base_classes => qw/My::Stuff/,
29         relationships   => 1,
30 );
31
32 my $top = $l->find_class('topics_webarchive')->find_or_create( $self->{catalog} );
33 $top->dbi_commit;
34
35 sub issue {
36         my $file = shift || die;
37
38         my $html = read_file($file);
39
40         my ($issue_no, $issue_date, $this_entry);
41
42         if ($html =~ m#<!-- ISSUE Number/Date -->.+?(\d+-\w+-\d\d\d\d)\s+Issue:\s+(\d+)#s) {
43                 $issue_no = $2;
44                 $issue_date = $1;
45 print "## issue $issue_no on $issue_date [$file]\n";
46
47                 $issue = $l->find_class('topics_pgbits')->find_or_create(
48                         name => "issue $issue_no",
49                         date => $issue_date,
50                         path => $file,
51                         issue => $issue_no,
52                         type => 'pgbits',
53                         parent_id => $top->id,
54                 );
55                 $issue->dbi_commit;
56
57         } else {
58                 warn "can't find issue number and date in $file, skipping\n";
59                 return;
60         }
61
62
63         while($html =~ s#^.*?<!-- IKEY="([^"]+)" -->.+?<MYTITLE>\s*([^<]+)\s*</MYTITLE>.+?<ITITLE>\s*([^<]+)\s*</ITITLE>.+?<IDATE>\s*([^<]+)\s*</IDATE>.+?</TABLE>\s*(.+?)\s*<ICONT>\s*(.+?)\s*</ICONT>##si){
64                 my $row = {
65                         name => $2 . ( $3 ? " :: $3" : ""),
66
67                         ikey => $1,
68                         mytitle => $2,
69                         ititle => $3,
70                         date => $4,     # idate
71                         html => $5,
72                         contributors => $6,
73
74                         type => 'pgbits',
75                 };
76
77                 print $row->{name}," ", $row->{date},"\n";
78                 my $article = $l->find_class('items_pgbits')->find_or_create( $row );
79                 $article->dbi_commit;
80
81                 $l->find_class('item_topics')->find_or_create(
82                         topic_id => $issue->id,
83                         item_id => $article->id,
84                 )->dbi_commit;
85         }
86 }
87
88 find({ wanted => sub {
89         my $path = $File::Find::name;
90         return unless ($path =~ m#\.php$#i && $path !~ m#\d+(?:po|es)\.#);
91         issue($path);
92 },
93 follow => 1 }, '/rest/references/PgGeneralBits/bits/');