ported r248:252 from hidra branch:
authorDobrica Pavlinusic <dpavlin@rot13.org>
Tue, 9 Mar 2004 12:17:05 +0000 (12:17 +0000)
committerDobrica Pavlinusic <dpavlin@rot13.org>
Tue, 9 Mar 2004 12:17:05 +0000 (12:17 +0000)
r248: much improved installation instructions, especially for Debian
      GNU/Linux distributions
r249: changed use of Spreadsheet::ParseExcel and MARC to require/import so
      that dependency on those modules can be resolved in runtime.
r250: finished installation documentation
r251: removing dependency on HTML::Parser would ease installation
r252: smaller eval{} fiexes. eval{} logic should really move to
      parse_format.pm

git-svn-id: file:///home/dpavlin/private/svn/webpac/trunk@255 13eb9ef6-21d5-0310-b721-a9d68796d827

INSTALL
TODO
all2xml.pl

diff --git a/INSTALL b/INSTALL
index 0217d75..973f7b5 100644 (file)
--- a/INSTALL
+++ b/INSTALL
@@ -1,4 +1,4 @@
-beta-beta-beta INSTALL INSTRUCTIONS
+INSTALL INSTRUCTIONS
 
 Feel free to contact me via e-mail to dpavlin@rot13.org if those instructions
 don't work for you.
 
 Feel free to contact me via e-mail to dpavlin@rot13.org if those instructions
 don't work for you.
@@ -50,20 +50,21 @@ don't work for you.
                                to get this one from
                                http://savannah.nongnu.org/projects/unac/
        Config::IniFiles
                                to get this one from
                                http://savannah.nongnu.org/projects/unac/
        Config::IniFiles
-       DBD::Pg                 or some other DBD driver, you can also
-                               use the one which came with distribution
-                               (e.g. libdbd-pg-perl on Debian)
+       DBD::Pg                 or some other DBD driver like DBD::SQLite
        CGI::Application
        HTML::Pager
        HTML::Template
        HTML::FillInForm
        SWISH
        SWISH::Fork             and of course, swish-e executable
        CGI::Application
        HTML::Pager
        HTML::Template
        HTML::FillInForm
        SWISH
        SWISH::Fork             and of course, swish-e executable
-                               (e.g. swish-e package on Debian)
        XML::Simple
        XML::Simple
+       Text::Iconv
+       TDB_File
+       HTML::Entities          (part of HTML::Parser)
 
    CPAN shell will also download some more modules to satisfy dependencies.
 
 
    CPAN shell will also download some more modules to satisfy dependencies.
 
+
    If you plan to use M$ Excel files for import (type=excel), you will need:
 
        Spreadsheet::ParseExcel
    If you plan to use M$ Excel files for import (type=excel), you will need:
 
        Spreadsheet::ParseExcel
@@ -72,26 +73,81 @@ don't work for you.
 
        MARC
 
 
        MARC
 
+
+2.1 Installation on Debian GNU/Linux
+
+   You will need following packages to get started:
+
+       perl
+       swish-e
+
+   and all additional packages which are dependencies.
+
+   You also don't have to install all CPAN modules manually. Just use
+   following Debian packages:
+
+       libtext-unaccent-perl
+       libconfig-inifiles-perl
+       libdbd-pg-perl          or some other DBD driver
+       libdbd-sqlite-perl      like DBD::SQLite
+       libhtml-template-perl
+       libxml-simple-perl
+       libtext-iconv-perl
+       tdb-dev                 (for TDB_File module later)
+       libhtml-parser-perl
+
+   and install following packages by hand from CPAN
+   because they are not part of Debian distribution:
+
+       CGI::Application
+       HTML::Pager
+       HTML::FillInForm
+       SWISH
+       SWISH::Fork
+
+   and optionally some of those modules:
+
+       Spreadsheet::ParseExcel
+       MARC
+
+   For compilation of OpenIsis in next step, you will also need following
+   packages:
+
+       make
+       gcc
+       libc-dev
+
 3. You will need OpenIsis if you are using ISIS as an import format.
    Currently, WebPAC uses OpenIsis 0.9.0 which *HAVE TO BE PATCHED*
    with special patch so that perl module OpenIsis.pm have close call
    (because there is hard-limit of 32 ISIS files in OpenIsis.pm, and that
    is too low for our use).
 
 3. You will need OpenIsis if you are using ISIS as an import format.
    Currently, WebPAC uses OpenIsis 0.9.0 which *HAVE TO BE PATCHED*
    with special patch so that perl module OpenIsis.pm have close call
    (because there is hard-limit of 32 ISIS files in OpenIsis.pm, and that
    is too low for our use).
 
-   Get OpenIsis from:
+   You can do that yourself, or if you did checkout of our subversion repository
+   you will already have latest OpenIsis in webpac/openisis/ directory.
+
+   If you want to do it yourself, first get OpenIsis from:
        http://openisis.org/Doc/GetIt
 
        http://openisis.org/Doc/GetIt
 
-   Get patch for close from:
+   Then get patch for close from:
        http://www.rot13.org/~dpavlin/projects/openisis-0.9.0-perl_close.diff
 
        http://www.rot13.org/~dpavlin/projects/openisis-0.9.0-perl_close.diff
 
-   Unpack OpenIsis archive and apply patch -p0 to source tree. Type make, and
-   than:
+   Unpack OpenIsis archive and apply patch -p0 to source tree.
+
+   Eater way, now you got OpenIsis 0.9.0 with close support for perl. So,
+   first compile C parts:
+
+       make
 
 
+   And then compile perl module and install it:
+
+       make perl
        cd perl
        cd perl
-       perl Makefile.PL
-       make
        sudo make install
 
        sudo make install
 
+   Since you need development tools on target machine to compile OpenIsis,
+   you might want to compile it on another machine and just copy perl module.
+
 4. edit global.conf and all2xml.conf to suit your needs. Comments inside
    those files should help get you started.
 
 4. edit global.conf and all2xml.conf to suit your needs. Comments inside
    those files should help get you started.
 
diff --git a/TODO b/TODO
index ae0372e..39a3c26 100644 (file)
--- a/TODO
+++ b/TODO
@@ -57,3 +57,5 @@ should be better way...)
 
 - add support for transcripts of a,o,u umlauts (ae ou ue)
   a -> (a|ae)
 
 - add support for transcripts of a,o,u umlauts (ae ou ue)
   a -> (a|ae)
+
+- remove dependency on HTML::Parser (HTML::Entities used in index_DBI_cache)
index 8c3c1f7..bc764df 100755 (executable)
@@ -211,8 +211,9 @@ sub data2xml {
                        # placeholder for all repeatable entries for index
 
                        sub chk_eval($) {
                        # placeholder for all repeatable entries for index
 
                        sub chk_eval($) {
-                               my $data = shift || return;
-                               if ($data =~ s/eval{([^}]+)}//) {
+                               my $data = shift;
+                               return if (! defined($data));
+                               if ($data && $data =~ s/\s*eval{([^}]+)}\s*//) {
                                        if (eval "$1") {
                                                return $data;
                                        } else {
                                        if (eval "$1") {
                                                return $data;
                                        } else {
@@ -270,7 +271,7 @@ sub data2xml {
                                                                $display = $new_display;
                                                                $cache->{lhash}->{$display} = $new_display;
                                                        } else {
                                                                $display = $new_display;
                                                                $cache->{lhash}->{$display} = $new_display;
                                                        } else {
-                                                               print STDERR "WARNING: lookup for '$display' didn't find anything.\n";
+#                                                              print STDERR "WARNING: lookup for '$display' didn't find anything.\n";
                                                                $display = "";
                                                                $cache->{lhash}->{$display} = $null;
                                                        }
                                                                $display = "";
                                                                $cache->{lhash}->{$display} = $null;
                                                        }
@@ -291,16 +292,19 @@ sub data2xml {
                                }
                                # type="swish" ; field for swish
                                if ($swish) {
                                }
                                # type="swish" ; field for swish
                                if ($swish) {
-                                       my $tmp;
                                        if ($filter && ($s || $se)) {
                                                no strict 'refs';
                                                my $tmp = join(" ",&$filter($swish)) if ($s || $se);
                                        if ($filter && ($s || $se)) {
                                                no strict 'refs';
                                                my $tmp = join(" ",&$filter($swish)) if ($s || $se);
+                                               $swish_data .= $tmp if ($s);
+                                               $swish_exact_data .= "xxbxx $tmp xxexx " if ($se && $tmp ne "");
+
                                        } else {
                                        } else {
-                                               $tmp = $swish;
+                                               $swish_data .= $swish if ($s);
+                                               $swish_exact_data .= "xxbxx $swish xxexx " if ($se && $swish ne "");
                                        }
 
                                        $tmp = chk_eval($tmp);
                                        }
 
                                        $tmp = chk_eval($tmp);
-                                       $swish_data .= $tmp if ($s);
+                                       $swish_data .= $tmp if ($s && $tmp);
                                        $swish_exact_data .= "xxbxx $tmp xxexx " if ($se && $tmp ne "");
                                }
 
                                        $swish_exact_data .= "xxbxx $tmp xxexx " if ($se && $tmp ne "");
                                }
 
@@ -334,7 +338,8 @@ sub data2xml {
                                                no strict 'refs';
                                                $idisplay = &$filter($idisplay);
                                        }
                                                no strict 'refs';
                                                $idisplay = &$filter($idisplay);
                                        }
-                                       push @index_data, $idisplay if (! $iterate_by_page);
+                                       $idisplay = chk_eval($idisplay);
+                                       push @index_data, $idisplay if ($idisplay && !$iterate_by_page);
                                }
 
                                # store fields in lookup
                                }
 
                                # store fields in lookup
@@ -741,8 +746,9 @@ print STDERR "using: $type...\n";
                print STDERR "\n";
 
        } elsif ($type_base eq "excel") {
                print STDERR "\n";
 
        } elsif ($type_base eq "excel") {
-               use Spreadsheet::ParseExcel;
-               use Spreadsheet::ParseExcel::Utility qw(int2col);
+               require Spreadsheet::ParseExcel;
+               require Spreadsheet::ParseExcel::Utility;
+               import Spreadsheet::ParseExcel::Utility qw(int2col);
                
                $import2cp = Text::Iconv->new($config->{excel_codepage},$codepage);
                my $excel_file = $cfg -> val($database, 'excel_file') || die "$database doesn't have 'excel_file' defined!";
                
                $import2cp = Text::Iconv->new($config->{excel_codepage},$codepage);
                my $excel_file = $cfg -> val($database, 'excel_file') || die "$database doesn't have 'excel_file' defined!";
@@ -796,7 +802,7 @@ print STDERR "using: $type...\n";
                }
        } elsif ($type_base eq "marc") {
 
                }
        } elsif ($type_base eq "marc") {
 
-               use MARC;
+               require MARC;
                
                $import2cp = Text::Iconv->new($config->{marc_codepage},$codepage);
                my $marc_file = $cfg -> val($database, 'marc_file') || die "$database doesn't have 'marc_file' defined!";
                
                $import2cp = Text::Iconv->new($config->{marc_codepage},$codepage);
                my $marc_file = $cfg -> val($database, 'marc_file') || die "$database doesn't have 'marc_file' defined!";