From 2f69a941ccd6c8dd7ffc1b558bedeff0e56681a3 Mon Sep 17 00:00:00 2001 From: Dobrica Pavlinusic Date: Tue, 2 Aug 2005 15:20:44 +0000 Subject: [PATCH] implemented tree structure with topics, where each topic can have multiple items git-svn-id: svn+ssh://mjesec/home/dpavlin/svn/webpac2/trunk@37 07558da8-63fa-0310-ba24-9fe276d99e06 --- sql/Makefile | 2 + sql/mkindex.pl | 8 +-- sql/pgbits.pl | 41 ++++++++-------- sql/schema.sql | 103 ++++++++++++++++----------------------- sql/tree-func.sql | 119 +++++++++++++++++++++++++++++++++++++++++++++ sql/tree-views.sql | 26 ++++++++++ 6 files changed, 214 insertions(+), 85 deletions(-) create mode 100644 sql/tree-func.sql create mode 100644 sql/tree-views.sql diff --git a/sql/Makefile b/sql/Makefile index 581c1ee..1071174 100644 --- a/sql/Makefile +++ b/sql/Makefile @@ -4,8 +4,10 @@ init: dropdb $(db) || true createdb $(db) ./mkindex.pl schema.sql | psql $(db) + psql $(db) < tree-func.sql #test -f data.sql && psql $(db) < data.sql ./pgbits.pl + psql $(db) < tree-views.sql save: pg_dump -c $(db) > $(db).sql diff --git a/sql/mkindex.pl b/sql/mkindex.pl index 7730620..2cb05e6 100755 --- a/sql/mkindex.pl +++ b/sql/mkindex.pl @@ -34,9 +34,9 @@ while (<>) { $out->{inherits}->{$table} = $1; } - if (s/^\s*(\S+)(.+?)references\s+(\S+)\s*\((\S+)\)/\t$1$2/i) { + if (s/^\s*(\S+)(.+?)references\s+(\S+)\s*\((\S+)\)([^,]*)([,\s]*)$/\t$1$2$6/i) { # if (/^\s*(\S+)(.+?)references\s+(\S+)\s*\((\S+)\)/) { - @{ $out->{references}->{$table}->{$1} } = ( $3, $4 ); + @{ $out->{references}->{$table}->{$1} } = ( $3, $4, $5 ); } print "$_\n"; @@ -47,8 +47,8 @@ while (<>) { print STDERR Dumper($out); foreach my $table (keys %{ $out->{inherits} }) { - my $parent = $out->{inherits}->{$table} || die; - my $pk = $out->{table_pk}->{$parent} || die; + my $parent = $out->{inherits}->{$table} || die "$table doesn't inherit anything"; + my $pk = $out->{table_pk}->{$parent} || die "$parent doesn't have primary key"; my $seq = $parent . '_' . $pk . '_seq'; print qq{alter table $table alter column $pk set default nextval('$seq');\n}; } diff --git a/sql/pgbits.pl b/sql/pgbits.pl index ae3b4ec..4ce17a5 100755 --- a/sql/pgbits.pl +++ b/sql/pgbits.pl @@ -11,9 +11,10 @@ my $self = { user => 'dpavlin', passwd => '', catalog => { - path => '/rest/references/PgGeneratBits/bits', - title => 'PostgreSQL General Bits', + name => 'PostgreSQL General Bits', + path => '/rest/references/PgGeneratBits/bits', uri => 'http://www.varlena.com/varlena/GeneralBits/archive.php', + type => 'pgbits', }, }; @@ -28,8 +29,8 @@ my $l = Class::DBI::Loader->new( relationships => 1, ); -my $this_catalog = $l->find_class('catalog_webarchive')->find_or_create( $self->{catalog} ); -$this_catalog->dbi_commit; +my $top = $l->find_class('topics_webarchive')->find_or_create( $self->{catalog} ); +$top->dbi_commit; sub issue { my $file = shift || die; @@ -43,19 +44,16 @@ sub issue { $issue_date = $1; print "## issue $issue_no on $issue_date [$file]\n"; - $this_entry = $l->find_class('entries_pgbits')->find_or_create( + $issue = $l->find_class('topics_pgbits')->find_or_create( + name => "issue $issue_no", date => $issue_date, - issue => $issue_no, path => $file, - title => $self->{catalog}->{title} . " :: $issue_no", + issue => $issue_no, + type => 'pgbits', + parent_id => $top->id, ); - $this_entry->dbi_commit; + $issue->dbi_commit; - $l->find_class('catalog_entry')->find_or_create( - catalog_id => $this_catalog->id, - entry_id => $this_entry->id, - e_type => 'pgbits', - )->dbi_commit; } else { warn "can't find issue number and date in $file, skipping\n"; return; @@ -64,7 +62,7 @@ print "## issue $issue_no on $issue_date [$file]\n"; while($html =~ s#^.*?.+?\s*([^<]+)\s*.+?\s*([^<]+)\s*.+?\s*([^<]+)\s*.+?\s*(.+?)\s*\s*(.+?)\s*##si){ my $row = { - title => $2 . ( $3 ? " :: $3" : ""), + name => $2 . ( $3 ? " :: $3" : ""), ikey => $1, mytitle => $2, @@ -73,12 +71,17 @@ print "## issue $issue_no on $issue_date [$file]\n"; html => $5, contributors => $6, - entry_id => $this_entry->id, - i_type => 'pgbits', + type => 'pgbits', }; - - print $row->{title}," ", $row->{date},"\n"; - $l->find_class('items_pgbits')->find_or_create( $row )->dbi_commit; + + print $row->{name}," ", $row->{date},"\n"; + my $article = $l->find_class('items_pgbits')->find_or_create( $row ); + $article->dbi_commit; + + $l->find_class('item_topics')->find_or_create( + topic_id => $issue->id, + item_id => $article->id, + )->dbi_commit; } } diff --git a/sql/schema.sql b/sql/schema.sql index 70729cd..a026530 100644 --- a/sql/schema.sql +++ b/sql/schema.sql @@ -1,75 +1,66 @@ --- Catalogs -create table catalogs ( - id serial, - title text not null, - path text, - date timestamp not null default now(), - primary key(id) +begin; + +create table item_types( + name text not null, + primary key (name) ); -create table catalog_webarchive ( - uri text not null, -- unique index - last_crawled timestamp, +create table items ( + id SERIAL, + name text not null, + path text, -- index + date timestamp not null default now(), + type text not null references item_types(name), -- index primary key(id) -) inherits (catalogs) ; +); --- Entries in Catalog -create table entries ( - id serial, - title text, - path text, - date timestamp not null default now(), +create table topic_types( + name text not null, + primary key (name) +); + +create table topics ( + id SERIAL, + name text, + path text, + date timestamp not null default now(), + type text not null references topic_types(name), -- index + parent_id integer references topics(id), -- index primary key(id) ); -create table catalog_entry ( - catalog_id int references catalogs(id), - entry_id int references entries(id), - e_type text not null, -- index - primary key (catalog_id, entry_id) +create table item_topics ( + item_id integer references items(id) ON UPDATE CASCADE ON DELETE CASCADE, + topic_id integer references topics(id) ON UPDATE CASCADE ON DELETE CASCADE, + PRIMARY KEY (item_id, topic_id) ); --- Items for each Entry -create table items ( - id serial, - title text, - entry_id int references entries(id), - i_type text not null, - date timestamp not null default now(), +-- create inhertited topics and items + +insert into topic_types values ('webarchive'); +create table topics_webarchive ( + uri text not null, -- unique index + last_crawled timestamp, primary key(id) -); +) inherits (topics) ; -- HyperEstraier support table -create table items_est ( +insert into item_types values ('est'); +create table item_est ( path text, -- unique index uri text not null, -- unique index size int, primary key(id) ) inherits (items) ; --- Tags for Entries -create table tags ( - id serial, - title text, -- index - date timestamp not null default now(), - primary key(id) -); - -create table entry_tag ( - entry_id int references entries(id), - tag_id int references tags(id), - value text not null, - t_type text not null, -- index - date timestamp not null default now(), - primary key (entry_id, tag_id) -); - -- Pg General Bits -create table entries_pgbits ( +insert into topic_types values ('pgbits'); +create table topics_pgbits ( issue int not null, -- unique index primary key(id) -) inherits (entries) ; +) inherits (topics) ; +insert into item_types values ('pgbits'); create table items_pgbits ( mytitle text not null, ititle text not null, @@ -78,15 +69,3 @@ create table items_pgbits ( contributors text, primary key(id) ) inherits (items) ; - --- some views - -create view pgbits_articles as select - issue, - date(entries_pgbits.date) as issue_date, - ititle, - mytitle, - date(items_pgbits.date) as date -from items_pgbits -join entries_pgbits on entry_id = entries_pgbits.id ; - diff --git a/sql/tree-func.sql b/sql/tree-func.sql new file mode 100644 index 0000000..7c62767 --- /dev/null +++ b/sql/tree-func.sql @@ -0,0 +1,119 @@ +-- +-- Delete of a topic: update both items and child topics +-- +-- First, reassign the items of the topic to the topics parent +-- But if there is no parent, raise an error and abort the update +-- The items must be reassigned somewhere else sensibly by hand. +-- +-- Then update the children of the topic to now have their +-- grandparent (or NULL) as their parent. +-- +CREATE OR REPLACE function del_topic() +RETURNS TRIGGER AS $$ +DECLARE + r_rec RECORD; +BEGIN + FOR r_rec IN SELECT item_id, topic_id + FROM item_topics + WHERE topic_id = OLD.topic_id LOOP + IF OLD.parent_id IS NULL THEN + RAISE EXCEPTION + 'Cannot delete topic % until its records are reassigned.', + OLD.topic_name; + ELSE + UPDATE item_topics SET topic_id = OLD.parent_id + WHERE item_id = r_rec.item_id AND topic_id = r_rec.topic_id; + END IF; + END LOOP; + + UPDATE topics SET parent_id=OLD.parent_id + WHERE parent_id = OLD.topic_id; + RETURN OLD; +END; +$$ language 'plpgsql'; + +CREATE TRIGGER del_topic BEFORE DELETE ON topics + FOR EACH ROW EXECUTE PROCEDURE del_topic(); + + +-- show all items and thair topics +CREATE VIEW it AS +SELECT i.id as i_id, i.name as item, it.topic_id, t.parent_id, t.name as topic +FROM + item_topics it JOIN items i on i.id = item_id JOIN topics t on t.id = topic_id; + +-- +-- Create an aggregation of topics to form the tree path for any topic +-- +CREATE OR REPLACE FUNCTION get_topic_path( integer ) +RETURNS TEXT AS $$ +DECLARE + path text; + topic_r RECORD; +BEGIN + SELECT INTO topic_r name, parent_id FROM topics WHERE topics.id = $1; + path := topic_r.name; + IF topic_r.parent_id IS NOT NULL + THEN + path := (SELECT get_topic_path(topic_r.parent_id)) || ' >> ' || path; + END IF; + RETURN path; +END; +$$ LANGUAGE 'plpgsql'; + +-- +-- Re-Slice the previous query to be an ordered set of tuples +-- +DROP TYPE topic_node CASCADE; +CREATE TYPE topic_node AS (tn_id integer, tn_parent integer); + +CREATE or REPLACE FUNCTION get_topic_node( integer ) +RETURNS SETOF topic_node AS $$ +DECLARE + t topic_node; + t2 topic_node; +BEGIN + FOR t IN SELECT id, parent_id + FROM topics + WHERE id = $1 + LOOP + IF t.tn_parent IS NOT NULL + THEN + FOR t2 IN SELECT * FROM get_topic_node(t.tn_parent) LOOP + RETURN NEXT t2; + END LOOP; + END if; + RETURN NEXT t; + END LOOP; + RETURN; +END; +$$ language 'plpgsql'; + + +-- +-- Expand the previous query to work by item. +-- This enables the previous query work w/ a parameter +-- by encasing it in yet another function. +-- It also expands to allow selection of multiple items. +-- Qualify the item in the call to the query using item_path +-- +DROP TYPE item_path CASCADE; +CREATE TYPE item_path AS (item_id integer, topic_id integer); + +CREATE OR REPLACE FUNCTION item_path () +RETURNS SETOF item_path AS $$ +DECLARE + it item_path; + i_r record; + tn topic_node; +BEGIN + FOR i_r IN SELECT item_id, topic_id FROM item_topics LOOP + it.item_id = i_r.item_id; + FOR tn IN SELECT * FROM get_topic_node ( i_r.topic_id ) LOOP + it.topic_id = tn.tn_id; + RETURN NEXT it; + END LOOP; + END LOOP; + RETURN; +END; +$$ LANGUAGE 'plpgsql'; diff --git a/sql/tree-views.sql b/sql/tree-views.sql new file mode 100644 index 0000000..10315f6 --- /dev/null +++ b/sql/tree-views.sql @@ -0,0 +1,26 @@ +-- items in each topic +SELECT item, topic FROM it; + + +-- topic path for each topic +select name, get_topic_path( id ) from topics; + + +-- topic path for each item +select item_id, item_name, get_topic_path(topic_id) from it; + +-- +-- Select the parent nodes of a topic. +-- The parameter to get_topic_node must be hardcoded here. +-- The result tuples are ORDERED +-- +SELECT t.name AS base_topic, tn_id , t2.name +FROM topics t, get_topic_node(11) g, topics t2 +WHERE t.id = 11 AND t2.id = g.tn_id; + + +-- returns items... +SELECT it.item_id, i.name as item, it.topic_id, t.name as topic +FROM items i, topics t, item_path() it +WHERE it.item_id = i.id AND it.topic_id = t.id; + -- 2.20.1