2 # This file is part of Koha.
4 # Koha is free software; you can redistribute it and/or modify it under the
5 # terms of the GNU General Public License as published by the Free Software
6 # Foundation; either version 2 of the License, or (at your option) any later
9 # Koha is distributed in the hope that it will be useful, but WITHOUT ANY
10 # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
11 # A PARTICULAR PURPOSE. See the GNU General Public License for more details.
13 # You should have received a copy of the GNU General Public License along with
14 # Koha; if not, write to the Free Software Foundation, Inc., 59 Temple Place,
15 # Suite 330, Boston, MA 02111-1307 USA
25 use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
26 use vars qw($ext_dict $select_all @fields);
32 &get_tag &get_tags &get_tag_rows
36 &delete_tag_rows_by_ids
44 $ext_dict = C4::Context->preference('TagsExternalDictionary');
47 import Data::Dumper qw(:DEFAULT);
48 print STDERR __PACKAGE__ . " external dictionary = " . ($ext_dict||'none') . "\n";
51 require Lingua::Ispell;
52 import Lingua::Ispell qw(spellcheck add_word_lc save_dictionary);
57 $ext_dict and $Lingua::Ispell::path = $ext_dict;
58 $debug and print STDERR "\$Lingua::Ispell::path = $Lingua::Ispell::path\n";
59 @fields = qw(tag_id borrowernumber biblionumber term language date_created);
60 $select_all = "SELECT " . join(',',@fields) . "\n FROM tags_all\n";
63 sub remove_tag ($;$) {
64 my $tag_id = shift or return undef;
65 my $user_id = (@_) ? shift : undef;
66 my $rows = (defined $user_id) ?
67 get_tag_rows({tag_id=>$tag_id, borrowernumber=>$user_id}) :
68 get_tag_rows({tag_id=>$tag_id}) ;
70 (scalar(@$rows) == 1) or return undef; # should never happen (duplicate ids)
71 my $row = shift(@$rows);
72 ($tag_id == $row->{tag_id}) or return 0;
73 my $tags = get_tags({term=>$row->{term}, biblionumber=>$row->{biblionumber}});
74 my $index = shift(@$tags);
75 $debug and print STDERR
76 sprintf "remove_tag: tag_id=>%s, biblionumber=>%s, weight=>%s, weight_total=>%s\n",
77 $row->{tag_id}, $row->{biblionumber}, $index->{weight}, $index->{weight_total};
78 if ($index->{weight} <= 1) {
79 delete_tag_index($row->{term},$row->{biblionumber});
81 decrement_weight($row->{term},$row->{biblionumber});
83 if ($index->{weight_total} <= 1) {
84 delete_tag_approval($row->{term});
86 decrement_weight_total($row->{term});
88 delete_tag_row_by_id($tag_id);
91 sub delete_tag_index ($$) {
93 my $sth = C4::Context->dbh->prepare("DELETE FROM tags_index WHERE term = ? AND biblionumber = ? LIMIT 1");
95 return $sth->rows || 0;
97 sub delete_tag_approval ($) {
99 my $sth = C4::Context->dbh->prepare("DELETE FROM tags_approval WHERE term = ? LIMIT 1");
100 $sth->execute(shift);
101 return $sth->rows || 0;
103 sub delete_tag_row_by_id ($) {
104 (@_) or return undef;
105 my $sth = C4::Context->dbh->prepare("DELETE FROM tags_all WHERE tag_id = ? LIMIT 1");
106 $sth->execute(shift);
107 return $sth->rows || 0;
109 sub delete_tag_rows_by_ids (@) {
110 (@_) or return undef;
113 $i += delete_tag_row_by_id($_);
115 ($i == scalar(@_)) or
116 warn sprintf "delete_tag_rows_by_ids tried %s tag_ids, only succeeded on $i", scalar(@_);
120 sub get_tag_rows ($) {
121 my $hash = shift || {};
122 my @ok_fields = @fields;
123 push @ok_fields, 'limit'; # push the limit! :)
127 foreach my $key (keys %$hash) {
128 $debug and print STDERR "get_tag_rows arg. '$key' = ", $hash->{$key}, "\n";
129 unless (length $key) {
130 carp "Empty argument key to get_tag_rows: ignoring!";
133 unless (1 == scalar grep {/^ $key $/x} @ok_fields) {
134 carp "get_tag_rows received unreconized argument key '$key'.";
137 if ($key eq 'limit') {
138 my $val = $hash->{$key};
139 unless ($val =~ /^(\d+,)?\d+$/) {
140 carp "Non-nuerical limit value '$val' ignored!";
143 $limit = " LIMIT $val\n";
145 $wheres .= ($wheres) ? " AND $key = ?\n" : " WHERE $key = ?\n";
146 push @exe_args, $hash->{$key};
149 my $query = $select_all . ($wheres||'') . $limit;
150 $debug and print STDERR "get_tag_rows query:\n $query\n",
151 "get_tag_rows query args: ", join(',', @exe_args), "\n";
152 my $sth = C4::Context->dbh->prepare($query);
154 $sth->execute(@exe_args);
158 return $sth->fetchall_arrayref({});
161 sub get_tags (;$) { # i.e., from tags_index
162 my $hash = shift || {};
163 my @ok_fields = qw(term biblionumber weight limit sort);
168 foreach my $key (keys %$hash) {
169 $debug and print STDERR "get_tags arg. '$key' = ", $hash->{$key}, "\n";
170 unless (length $key) {
171 carp "Empty argument key to get_tags: ignoring!";
174 unless (1 == scalar grep {/^ $key $/x} @ok_fields) {
175 carp "get_tags received unreconized argument key '$key'.";
178 if ($key eq 'limit') {
179 my $val = $hash->{$key};
180 unless ($val =~ /^(\d+,)?\d+$/) {
181 carp "Non-nuerical limit value '$val' ignored!";
184 $limit = " LIMIT $val\n";
185 } elsif ($key eq 'sort') {
186 foreach my $by (split /\,/, $hash->{$key}) {
188 $by =~ /^([-+])?(term)/ or
189 $by =~ /^([-+])?(biblionumber)/ or
190 $by =~ /^([-+])?(weight)/
192 carp "get_tags received illegal sort order '$by'";
198 $order = " ORDER BY ";
200 $order .= $2 . " " . ((!$1) ? '' : $1 eq '-' ? 'DESC' : $1 eq '+' ? 'ASC' : '') . "\n";
204 my $whereval = $hash->{$key};
205 my $longkey = ($key eq 'term') ? 'tags_index.term' : $key;
206 my $op = ($whereval =~ s/^(>=|<=)// or
207 $whereval =~ s/^(>|=|<)// ) ? $1 : '=';
208 $wheres .= ($wheres) ? " AND $longkey $op ?\n" : " WHERE $longkey $op ?\n";
209 push @exe_args, $whereval;
213 SELECT tags_index.term as term,biblionumber,weight,weight_total
215 LEFT JOIN tags_approval
216 ON tags_index.term = tags_approval.term
217 " . ($wheres||'') . $order . $limit;
218 $debug and print STDERR "get_tags query:\n $query\n",
219 "get_tags query args: ", join(',', @exe_args), "\n";
220 my $sth = C4::Context->dbh->prepare($query);
222 $sth->execute(@exe_args);
226 return $sth->fetchall_arrayref({});
229 sub get_approval_rows (;$) { # i.e., from tags_approval
230 my $hash = shift || {};
231 my @ok_fields = qw(term approved date_approved approved_by weight_total limit sort);
236 foreach my $key (keys %$hash) {
237 $debug and print STDERR "get_approval_rows arg. '$key' = ", $hash->{$key}, "\n";
238 unless (length $key) {
239 carp "Empty argument key to get_approval_rows: ignoring!";
242 unless (1 == scalar grep {/^ $key $/x} @ok_fields) {
243 carp "get_approval_rows received unreconized argument key '$key'.";
246 if ($key eq 'limit') {
247 my $val = $hash->{$key};
248 unless ($val =~ /^(\d+,)?\d+$/) {
249 carp "Non-nuerical limit value '$val' ignored!";
252 $limit = " LIMIT $val\n";
253 } elsif ($key eq 'sort') {
254 foreach my $by (split /\,/, $hash->{$key}) {
256 $by =~ /^([-+])?(term)/ or
257 $by =~ /^([-+])?(biblionumber)/ or
258 $by =~ /^([-+])?(weight_total)/ or
259 $by =~ /^([-+])?(approved(_by)?)/ or
260 $by =~ /^([-+])?(date_approved)/
262 carp "get_approval_rows received illegal sort order '$by'";
268 $order = " ORDER BY " unless $order;
270 $order .= $2 . " " . ((!$1) ? '' : $1 eq '-' ? 'DESC' : $1 eq '+' ? 'ASC' : '') . "\n";
274 my $whereval = $hash->{$key};
275 my $op = ($whereval =~ s/^(>=|<=)// or
276 $whereval =~ s/^(>|=|<)// ) ? $1 : '=';
277 $wheres .= ($wheres) ? " AND $key $op ?\n" : " WHERE $key $op ?\n";
278 push @exe_args, $whereval;
282 SELECT tags_approval.term AS term,
283 tags_approval.approved AS approved,
284 tags_approval.date_approved AS date_approved,
285 tags_approval.approved_by AS approved_by,
286 tags_approval.weight_total AS weight_total,
287 CONCAT(borrowers.surname, ', ', borrowers.firstname) AS approved_by_name
290 ON tags_approval.approved_by = borrowers.borrowernumber ";
291 $query .= ($wheres||'') . $order . $limit;
292 $debug and print STDERR "get_approval_rows query:\n $query\n",
293 "get_approval_rows query args: ", join(',', @exe_args), "\n";
294 my $sth = C4::Context->dbh->prepare($query);
296 $sth->execute(@exe_args);
300 return $sth->fetchall_arrayref({});
303 sub is_approved ($) {
304 my $term = shift or return undef;
305 my $sth = C4::Context->dbh->prepare("SELECT approved FROM tags_approval WHERE term = ?");
306 $sth->execute($term);
307 unless ($sth->rows) {
308 $ext_dict and return (spellcheck($term) ? 0 : 1); # spellcheck returns empty on OK word
314 sub get_tag_index ($;$) {
315 my $term = shift or return undef;
318 $sth = C4::Context->dbh->prepare("SELECT * FROM tags_index WHERE term = ? AND biblionumber = ?");
319 $sth->execute($term,shift);
321 $sth = C4::Context->dbh->prepare("SELECT * FROM tags_index WHERE term = ?");
322 $sth->execute($term);
324 return $sth->fetchrow_hashref;
328 my $operator = shift;
329 defined $operator or return undef; # have to test defined to allow =0 (kohaadmin)
332 spellcheck($_) or next;
337 my $aref = get_approval_rows({term=>$_});
338 if ($aref and scalar @$aref) {
339 mod_tag_approval($operator,$_,1);
341 add_tag_approval($_,$operator);
346 # note: there is no "unwhitelist" operation because there is no remove for Ispell.
347 # The blacklist regexps should operate "in front of" the whitelist, so if you approve
348 # a term mistakenly, you can still reverse it. But there is no going back to "neutral".
350 my $operator = shift;
351 defined $operator or return undef; # have to test defined to allow =0 (kohaadmin)
353 my $aref = get_approval_rows({term=>$_});
354 if ($aref and scalar @$aref) {
355 mod_tag_approval($operator,$_,-1);
357 add_tag_approval($_,$operator,-1);
363 my $operator = shift;
364 defined $operator or return undef; # have to test defined to allow =0 (kohaadmin)
365 my $query = "INSERT INTO tags_blacklist (regexp,y,z) VALUES (?,?,?)";
366 # my $sth = C4::Context->dbh->prepare($query);
370 my $operator = shift;
371 defined $operator or return undef; # have to test defined to allow =0 (kohaadmin)
372 my $query = "REMOVE FROM tags_blacklist WHERE blacklist_id = ?";
373 # my $sth = C4::Context->dbh->prepare($query);
374 # $sth->execute($term);
378 sub add_tag_approval ($;$$) { # or disapproval
379 my $term = shift or return undef;
380 my $query = "SELECT * FROM tags_approval WHERE term = ?";
381 my $sth = C4::Context->dbh->prepare($query);
382 $sth->execute($term);
383 ($sth->rows) and return increment_weight_total($term);
384 my $operator = (@_ ? shift : 0);
386 my $approval = (@_ ? shift : 1); # default is to approve
387 $query = "INSERT INTO tags_approval (term,approved_by,approved,date_approved) VALUES (?,?,?,NOW())";
388 $debug and print STDERR "add_tag_approval query:\n$query\nadd_tag_approval args: ($term,$operator,$approval)\n";
389 $sth = C4::Context->dbh->prepare($query);
390 $sth->execute($term,$operator,$approval);
392 $query = "INSERT INTO tags_approval (term,date_approved) VALUES (?,NOW())";
393 $debug and print STDERR "add_tag_approval query:\n$query\nadd_tag_approval args: ($term)\n";
394 $sth = C4::Context->dbh->prepare($query);
395 $sth->execute($term);
400 sub mod_tag_approval ($$$) {
401 my $operator = shift;
402 defined $operator or return undef; # have to test defined to allow =0 (kohaadmin)
403 my $term = shift or return undef;
404 my $approval = (@_ ? shift : 1); # default is to approve
405 my $query = "UPDATE tags_approval SET approved_by=?, approved=?, date_approved=NOW() WHERE term = ?";
406 $debug and print STDERR "mod_tag_approval query:\n$query\nmod_tag_approval args: ($operator,$approval,$term)\n";
407 my $sth = C4::Context->dbh->prepare($query);
408 $sth->execute($operator,$approval,$term);
411 sub add_tag_index ($$;$) {
412 my $term = shift or return undef;
413 my $biblionumber = shift or return undef;
414 my $query = "SELECT * FROM tags_index WHERE term = ? AND biblionumber = ?";
415 my $sth = C4::Context->dbh->prepare($query);
416 $sth->execute($term,$biblionumber);
417 ($sth->rows) and return increment_weight($term,$biblionumber);
418 $query = "INSERT INTO tags_index (term,biblionumber) VALUES (?,?)";
419 $debug and print "add_tag_index query:\n$query\nadd_tag_index args: ($term,$biblionumber)\n";
420 $sth = C4::Context->dbh->prepare($query);
421 $sth->execute($term,$biblionumber);
425 sub get_tag ($) { # by tag_id
426 (@_) or return undef;
427 my $sth = C4::Context->dbh->prepare("$select_all WHERE tag_id = ?");
428 $sth->execute(shift);
429 return $sth->fetchrow_hashref;
432 sub rectify_weights (;$) {
433 my $dbh = C4::Context->dbh;
436 SELECT term,biblionumber,count(*) as count
439 (@_) and $query .= " WHERE term =? ";
440 $query .= " GROUP BY term,biblionumber ";
441 $sth = $dbh->prepare($query);
443 $sth->execute(shift);
447 my $results = $sth->fetchall_arrayref({}) or return undef;
449 foreach (@$results) {
450 _set_weight($_->{count},$_->{term},$_->{biblionumber});
451 $tally{$_->{term}} += $_->{count};
453 foreach (keys %tally) {
454 _set_weight_total($tally{$_},$_);
456 return ($results,\%tally);
459 sub increment_weights ($$) {
460 increment_weight(@_);
461 increment_weight_total(shift);
463 sub decrement_weights ($$) {
464 decrement_weight(@_);
465 decrement_weight_total(shift);
467 sub increment_weight_total ($) {
468 _set_weight_total('weight_total+1',shift);
470 sub increment_weight ($$) {
471 _set_weight('weight+1',shift,shift);
473 sub decrement_weight_total ($) {
474 _set_weight_total('weight_total-1',shift);
476 sub decrement_weight ($$) {
477 _set_weight('weight-1',shift,shift);
479 sub _set_weight_total ($$) {
480 my $sth = C4::Context->dbh->prepare("
482 SET weight_total=" . (shift) . "
484 "); # note: CANNOT use "?" for weight_total (see the args above).
485 $sth->execute(shift); # just the term
487 sub _set_weight ($$$) {
488 my $dbh = C4::Context->dbh;
489 my $sth = $dbh->prepare("
491 SET weight=" . (shift) . "
498 sub add_tag ($$;$$) { # biblionumber,term,[borrowernumber,approvernumber]
499 my $biblionumber = shift or return undef;
500 my $term = shift or return undef;
501 my $borrowernumber = (@_) ? shift : 0; # the user, default to kohaadmin
503 # first, add to tags regardless of approaval
504 my $query = "INSERT INTO tags_all
505 (borrowernumber,biblionumber,term,date_created)
506 VALUES (?,?,?,NOW())";
507 $debug and print STDERR "add_tag query:\n $query\n",
508 "add_tag query args: ($borrowernumber,$biblionumber,$term)\n";
509 my $sth = C4::Context->dbh->prepare($query);
510 $sth->execute($borrowernumber,$biblionumber,$term);
513 if (@_) { # if an arg remains, it is the borrowernumber of the approver: tag is pre-approved.
514 my $approver = shift;
515 add_tag_approval($term,$approver);
516 add_tag_index($term,$biblionumber,$approver);
517 } elsif (is_approved($term)) {
518 add_tag_approval($term,1);
519 add_tag_index($term,$biblionumber,1);
521 add_tag_approval($term);
522 add_tag_index($term,$biblionumber);
529 =head1 C4::Tags.pm - Support for user tagging of biblios.
531 More verose debugging messages are sent in the presence of non-zero $ENV{"DEBUG"}.
533 =head2 add_tag(biblionumber,term[,borrowernumber])
535 =head3 TO DO: Add real perldoc
537 =head2 External Dictionary (Ispell) [Recommended]
539 An external dictionary can be used as a means of "pre-populating" and tracking
540 allowed terms based on the widely available Ispell dictionary. This can be the system
541 dictionary or a personal version, but in order to support whitelisting, it must be
542 editable to the process running Koha.
544 To enable, enter the absolute path to the ispell dictionary in the system
545 preference "TagsExternalDictionary".
547 Using external Ispell is recommended for both ease of use and performance. Note that any
548 language version of Ispell can be installed. It is also possible to modify the dictionary
549 at the command line to affect the desired content.
551 =head2 Table Structure
553 The tables used by tags are:
559 Your first thought may be that this looks a little complicated. It is, but only because
560 it has to be. I'll try to explain.
562 tags_all - This table would be all we really need if we didn't care about moderation or
563 performance or tags disappearing when borrowers are removed. Too bad, we do. Otherwise
564 though, it contains all the relevant info about a given tag:
565 tag_id - unique id number for it
566 borrowernumber - user that entered it
567 biblionumber - book record it is attached to
568 term - tag "term" itself
569 language - perhaps used later to influence weighting
570 date_created - date and time it was created
572 tags_approval - Since we need to provide moderation, this table is used to track it. If no
573 external dictionary is used, this table is the sole reference for approval and rejection.
574 With an external dictionary, it tracks pending terms and past whitelist/blacklist actions.
575 This could be called an "approved terms" table. See above regarding the External Dictionary.
576 term - tag "term" itself
577 approved - Negative, 0 or positive if tag is rejected, pending or approved.
578 date_approved - date of last action
579 approved_by - staffer performing the last action
580 weight_total - total occurance of term in any biblio by any users
582 tags_index - This table is for performance, because by far the most common operation will
583 be fetching tags for a list of search results. We will have a set of biblios, and we will
584 want ONLY their approved tags and overall weighting. While we could implement a query that
585 would traverse tags_all filtered against tags_approval, the performance implications of
586 trying to calculate that and the "weight" (number of times a tag appears) on the fly are drastic.
587 term - approved term as it appears in tags_approval
588 biblionumber - book record it is attached to
589 weight - number of times tag applied by any user
591 tags_blacklist - TODO
593 So the best way to think about the different tabes is that they are each tailored to a certain
594 use. Note that tags_approval and tags_index do not rely on the user's borrower mapping, so
595 the tag population can continue to grow even if a user is removed, along with the corresponding
600 If you want to auto-populate some tags for debugging, do something like this:
602 mysql> select biblionumber from biblio where title LIKE "%Health%";
633 26 rows in set (0.00 sec)
635 Then, take those numbers and type them into this perl command line:
636 perl -ne 'use C4::Tags qw(get_tags add_tag); use Data::Dumper;chomp; add_tag($_,"health",51,1); print Dumper get_tags({limit=>5,term=>"health",});'