ported r248:252 from hidra branch:
authorDobrica Pavlinusic <dpavlin@rot13.org>
Tue, 9 Mar 2004 12:17:05 +0000 (12:17 +0000)
committerDobrica Pavlinusic <dpavlin@rot13.org>
Tue, 9 Mar 2004 12:17:05 +0000 (12:17 +0000)
r248: much improved installation instructions, especially for Debian
      GNU/Linux distributions
r249: changed use of Spreadsheet::ParseExcel and MARC to require/import so
      that dependency on those modules can be resolved in runtime.
r250: finished installation documentation
r251: removing dependency on HTML::Parser would ease installation
r252: smaller eval{} fiexes. eval{} logic should really move to
      parse_format.pm

git-svn-id: file:///home/dpavlin/private/svn/webpac/trunk@255 13eb9ef6-21d5-0310-b721-a9d68796d827

INSTALL
TODO
all2xml.pl

diff --git a/INSTALL b/INSTALL
index 0217d75..973f7b5 100644 (file)
--- a/INSTALL
+++ b/INSTALL
@@ -1,4 +1,4 @@
-beta-beta-beta INSTALL INSTRUCTIONS
+INSTALL INSTRUCTIONS
 
 Feel free to contact me via e-mail to dpavlin@rot13.org if those instructions
 don't work for you.
@@ -50,20 +50,21 @@ don't work for you.
                                to get this one from
                                http://savannah.nongnu.org/projects/unac/
        Config::IniFiles
-       DBD::Pg                 or some other DBD driver, you can also
-                               use the one which came with distribution
-                               (e.g. libdbd-pg-perl on Debian)
+       DBD::Pg                 or some other DBD driver like DBD::SQLite
        CGI::Application
        HTML::Pager
        HTML::Template
        HTML::FillInForm
        SWISH
        SWISH::Fork             and of course, swish-e executable
-                               (e.g. swish-e package on Debian)
        XML::Simple
+       Text::Iconv
+       TDB_File
+       HTML::Entities          (part of HTML::Parser)
 
    CPAN shell will also download some more modules to satisfy dependencies.
 
+
    If you plan to use M$ Excel files for import (type=excel), you will need:
 
        Spreadsheet::ParseExcel
@@ -72,26 +73,81 @@ don't work for you.
 
        MARC
 
+
+2.1 Installation on Debian GNU/Linux
+
+   You will need following packages to get started:
+
+       perl
+       swish-e
+
+   and all additional packages which are dependencies.
+
+   You also don't have to install all CPAN modules manually. Just use
+   following Debian packages:
+
+       libtext-unaccent-perl
+       libconfig-inifiles-perl
+       libdbd-pg-perl          or some other DBD driver
+       libdbd-sqlite-perl      like DBD::SQLite
+       libhtml-template-perl
+       libxml-simple-perl
+       libtext-iconv-perl
+       tdb-dev                 (for TDB_File module later)
+       libhtml-parser-perl
+
+   and install following packages by hand from CPAN
+   because they are not part of Debian distribution:
+
+       CGI::Application
+       HTML::Pager
+       HTML::FillInForm
+       SWISH
+       SWISH::Fork
+
+   and optionally some of those modules:
+
+       Spreadsheet::ParseExcel
+       MARC
+
+   For compilation of OpenIsis in next step, you will also need following
+   packages:
+
+       make
+       gcc
+       libc-dev
+
 3. You will need OpenIsis if you are using ISIS as an import format.
    Currently, WebPAC uses OpenIsis 0.9.0 which *HAVE TO BE PATCHED*
    with special patch so that perl module OpenIsis.pm have close call
    (because there is hard-limit of 32 ISIS files in OpenIsis.pm, and that
    is too low for our use).
 
-   Get OpenIsis from:
+   You can do that yourself, or if you did checkout of our subversion repository
+   you will already have latest OpenIsis in webpac/openisis/ directory.
+
+   If you want to do it yourself, first get OpenIsis from:
        http://openisis.org/Doc/GetIt
 
-   Get patch for close from:
+   Then get patch for close from:
        http://www.rot13.org/~dpavlin/projects/openisis-0.9.0-perl_close.diff
 
-   Unpack OpenIsis archive and apply patch -p0 to source tree. Type make, and
-   than:
+   Unpack OpenIsis archive and apply patch -p0 to source tree.
+
+   Eater way, now you got OpenIsis 0.9.0 with close support for perl. So,
+   first compile C parts:
+
+       make
 
+   And then compile perl module and install it:
+
+       make perl
        cd perl
-       perl Makefile.PL
-       make
        sudo make install
 
+   Since you need development tools on target machine to compile OpenIsis,
+   you might want to compile it on another machine and just copy perl module.
+
 4. edit global.conf and all2xml.conf to suit your needs. Comments inside
    those files should help get you started.
 
diff --git a/TODO b/TODO
index ae0372e..39a3c26 100644 (file)
--- a/TODO
+++ b/TODO
@@ -57,3 +57,5 @@ should be better way...)
 
 - add support for transcripts of a,o,u umlauts (ae ou ue)
   a -> (a|ae)
+
+- remove dependency on HTML::Parser (HTML::Entities used in index_DBI_cache)
index 8c3c1f7..bc764df 100755 (executable)
@@ -211,8 +211,9 @@ sub data2xml {
                        # placeholder for all repeatable entries for index
 
                        sub chk_eval($) {
-                               my $data = shift || return;
-                               if ($data =~ s/eval{([^}]+)}//) {
+                               my $data = shift;
+                               return if (! defined($data));
+                               if ($data && $data =~ s/\s*eval{([^}]+)}\s*//) {
                                        if (eval "$1") {
                                                return $data;
                                        } else {
@@ -270,7 +271,7 @@ sub data2xml {
                                                                $display = $new_display;
                                                                $cache->{lhash}->{$display} = $new_display;
                                                        } else {
-                                                               print STDERR "WARNING: lookup for '$display' didn't find anything.\n";
+#                                                              print STDERR "WARNING: lookup for '$display' didn't find anything.\n";
                                                                $display = "";
                                                                $cache->{lhash}->{$display} = $null;
                                                        }
@@ -291,16 +292,19 @@ sub data2xml {
                                }
                                # type="swish" ; field for swish
                                if ($swish) {
-                                       my $tmp;
                                        if ($filter && ($s || $se)) {
                                                no strict 'refs';
                                                my $tmp = join(" ",&$filter($swish)) if ($s || $se);
+                                               $swish_data .= $tmp if ($s);
+                                               $swish_exact_data .= "xxbxx $tmp xxexx " if ($se && $tmp ne "");
+
                                        } else {
-                                               $tmp = $swish;
+                                               $swish_data .= $swish if ($s);
+                                               $swish_exact_data .= "xxbxx $swish xxexx " if ($se && $swish ne "");
                                        }
 
                                        $tmp = chk_eval($tmp);
-                                       $swish_data .= $tmp if ($s);
+                                       $swish_data .= $tmp if ($s && $tmp);
                                        $swish_exact_data .= "xxbxx $tmp xxexx " if ($se && $tmp ne "");
                                }
 
@@ -334,7 +338,8 @@ sub data2xml {
                                                no strict 'refs';
                                                $idisplay = &$filter($idisplay);
                                        }
-                                       push @index_data, $idisplay if (! $iterate_by_page);
+                                       $idisplay = chk_eval($idisplay);
+                                       push @index_data, $idisplay if ($idisplay && !$iterate_by_page);
                                }
 
                                # store fields in lookup
@@ -741,8 +746,9 @@ print STDERR "using: $type...\n";
                print STDERR "\n";
 
        } elsif ($type_base eq "excel") {
-               use Spreadsheet::ParseExcel;
-               use Spreadsheet::ParseExcel::Utility qw(int2col);
+               require Spreadsheet::ParseExcel;
+               require Spreadsheet::ParseExcel::Utility;
+               import Spreadsheet::ParseExcel::Utility qw(int2col);
                
                $import2cp = Text::Iconv->new($config->{excel_codepage},$codepage);
                my $excel_file = $cfg -> val($database, 'excel_file') || die "$database doesn't have 'excel_file' defined!";
@@ -796,7 +802,7 @@ print STDERR "using: $type...\n";
                }
        } elsif ($type_base eq "marc") {
 
-               use MARC;
+               require MARC;
                
                $import2cp = Text::Iconv->new($config->{marc_codepage},$codepage);
                my $marc_file = $cfg -> val($database, 'marc_file') || die "$database doesn't have 'marc_file' defined!";