-# global configuration file
-
-# global options
-[global]
- # define database to use for indexes
- dbi_dbd=Pg
- dbi_dsn=dbname=webpac
- dbi_user=dpavlin
- dbi_passwd=xx
-
# describe your databases here
-[ps-libri]
- # directory for isis databases
- isis_db=/data/isis_data/ps/LIBRI/LIBRI
- # this will use xml file in import_xml/ dir
- type=isis
- # this is arbirary xml added to each record (e.g. Library identifier
- # or something like that
- xml=<type>Knjiga</type>
-#[ge-libri]
-# isis_db=/data/isis_data/ge/LIBRI/LIBRI
+### ISIS ###
+#
+#[ps-libri]
+# # directory for isis databases
+# isis_db=/data/isis_data/ps/LIBRI/LIBRI
+# # this will use xml file in import_xml/ dir
# type=isis
+# # this is arbirary xml added to each record (e.g. Library identifier
+# # or something like that
# xml=<type>Knjiga</type>
+#
+##[ge-libri]
+## isis_db=/data/isis_data/ge/LIBRI/LIBRI
+## type=isis
+## xml=<type>Knjiga</type>
+#
+#
+#[ps-peri]
+# isis_db=/data/isis_data/ps/PERI/PERI
+# type=isis
+# xml=<type>Èasopis</type>
+#
+#[kk-arti]
+# isis_db=/data/isis_data/kk/ARTI/ARTI
+# type=isis
+# xml=<type>Èlanak</type>
+#
+### M$ Excel ###
-
-[ps-peri]
- isis_db=/data/isis_data/ps/PERI/PERI
- type=isis
+[ffzg-journals]
+ type=excel
+ excel_file=/data/isis_data/excel/tekuci_FF.xls
xml=<type>Èasopis</type>
-[kk-arti]
- isis_db=/data/isis_data/kk/ARTI/ARTI
- type=isis
- xml=<type>Èlanak</type>
-
-
return $utf2cp->convert($_[0]);
}
-# decode isis import codepage
-my $isis2cp;
+# decode isis/excel or other import codepage
+my $import2cp;
# outgoing xml must be in UTF-8
my $cp2utf = Text::Iconv->new($codepage,'UTF-8');
-sub isis2xml {
+# mapping between data type and tag which specify
+# format in XML file
+my %type2tag = (
+ 'isis' => 'isis',
+ 'excel' => 'column'
+);
+
+sub data2xml {
use xmlify;
+ my $type = shift @_;
my $row = shift @_;
my $add_xml = shift @_;
my ($swish,$display);
- foreach my $x (@{$config->{indexer}->{$field}->{isis}}) {
+ my $tag = $type2tag{$type} || die "can't find which tag to use for type $type";
+ foreach my $x (@{$config->{indexer}->{$field}->{$tag}}) {
my $format = x($x->{content});
my $delimiter = x($x->{delimiter}) || ' ';
- my $isis_i = 0; # isis repeatable offset
+ my $repeat_off = 0; # repeatable offset
my ($s,$d,$i) = (1,1,0); # swish, display default
$s = 0 if (lc($x->{type}) eq "display");
# init vars so that we go into while...
($swish,$display) = (1,1);
- while ($swish || $display) {
- ($swish,$display) = parse_format($format,$row,$isis_i++,$isis2cp);
-
+ if ($swish || $display) {
+ ($swish,$display) = parse_format($type, $format,$row,$repeat_off++,$import2cp);
# filter="name" ; filter this field through
# filter/[name].pm
my $filter = $x->{filter};
##########################################################################
+# read configuration for this script
my $cfg = new Config::IniFiles( -file => $config_file );
+# read global.conf configuration
+my $cfg_global = new Config::IniFiles( -file => 'global.conf' );
+
# open index
$index = new index_DBI(
- $cfg->val('global', 'dbi_dbd'),
- $cfg->val('global', 'dbi_dsn'),
- $cfg->val('global', 'dbi_user'),
- $cfg->val('global', 'dbi_passwd') || '',
+ $cfg_global->val('global', 'dbi_dbd'),
+ $cfg_global->val('global', 'dbi_dsn'),
+ $cfg_global->val('global', 'dbi_user'),
+ $cfg_global->val('global', 'dbi_passwd') || '',
);
-# delete [global] section to leave just databases sections
-$cfg->DeleteSection('global');
-
foreach my $database ($cfg->Sections) {
- my $isis_db = $cfg -> val($database, 'isis_db') || die "$database doesn't have 'isis_db' defined!";
- my $type = $cfg -> val($database, 'type') || die "$database doesn't have 'type' defined";
+ my $type = lc($cfg -> val($database, 'type')) || die "$database doesn't have 'type' defined";
my $add_xml = $cfg -> val($database, 'xml'); # optional
- $config=XMLin("./import_xml/$type.xml", forcearray => [ 'isis' ], forcecontent => 1);
+print STDERR "reading ./import_xml/$type.xml\n";
- $isis2cp = Text::Iconv->new($config->{isis_codepage},$codepage);
+ $config=XMLin("./import_xml/$type.xml", forcearray => [ $type2tag{$type} ], forcecontent => 1);
- my $db = OpenIsis::open( $isis_db );
- if (0) {
-# # FIX
-# if (! $db ) {
- print STDERR "WARNING: can't open '$isis_db'\n";
- next ;
+ # output current progress indicator
+ my $last_p = 0;
+ sub progress {
+ #return if (! $opts{q}); # FIXME
+ my $current = shift;
+ my $total = shift;
+ my $p = int($current * 100 / $total);
+ if ($p != $last_p) {
+ printf STDERR ("%5d / %5d [%-51s] %-2d %% \r",$current,$total,"=" x ($p/2).">", $p );
+ $last_p = $p;
+ }
}
- my $max_rowid = OpenIsis::maxRowid( $db );
+ # now read database
+print STDERR "using: $type...\n";
- print STDERR "Reading database: $isis_db [$max_rowid rows]\n";
+ if ($type eq "isis") {
+ my $isis_db = $cfg -> val($database, 'isis_db') || die "$database doesn't have 'isis_db' defined!";
- my $path = $database; # was $isis_db
+ $import2cp = Text::Iconv->new($config->{isis_codepage},$codepage);
+ my $db = OpenIsis::open( $isis_db );
- my $last_p = 0;
+ my $max_rowid = OpenIsis::maxRowid( $db );
+
+ print STDERR "Reading database: $isis_db [$max_rowid rows]\n";
+
+ my $path = $database;
+
+ for (my $row_id = 1; $row_id <= $max_rowid; $row_id++ ) {
+ my $row = OpenIsis::read( $db, $row_id );
+ if ($row && $row->{mfn}) {
+
+ progress($row->{mfn}, $max_rowid);
+
+ my $swishpath = $path."#".int($row->{mfn});
+
+ if (my $xml = data2xml($type,$row,$add_xml)) {
+ $xml = $cp2utf->convert($xml);
+ use bytes; # as opposed to chars
+ print "Path-Name: $swishpath\n";
+ print "Content-Length: ".(length($xml)+1)."\n";
+ print "Document-Type: XML\n\n$xml\n";
+ }
+ }
+ }
+ print STDERR "\n";
+
+ } elsif ($type eq "excel") {
+ use Spreadsheet::ParseExcel;
+ use Spreadsheet::ParseExcel::Utility qw(int2col);
+
+ $import2cp = Text::Iconv->new($config->{excel_codepage},$codepage);
+ my $excel_file = $cfg -> val($database, 'excel_file') || die "$database doesn't have 'excel_file' defined!";
-# { my $row_id = 4514;
-# FIX
- for (my $row_id = 1; $row_id <= $max_rowid; $row_id++ ) {
- my $row = OpenIsis::read( $db, $row_id );
- if ($row && $row->{mfn}) {
- # output current process indicator
- my $p = int($row->{mfn} * 100 / $max_rowid);
- if ($p != $last_p) {
- printf STDERR ("%5d / %5d [%-51s] %-2d %% \r",$row->{mfn},$max_rowid,"=" x ($p/2).">", $p ) if (! $opts{q});
- $last_p = $p;
+ my $sheet = x($config->{sheet}) || die "no sheet in $type.xml";
+ my $start_row = x($config->{start_row}) || die "no start_row in $type.xml";
+
+ my $oBook = Spreadsheet::ParseExcel::Workbook->Parse($excel_file) || die "can't open Excel file '$excel_file'";
+
+ my $sheet_nr = 0;
+ foreach my $oWks (@{$oBook->{Worksheet}}) {
+ #print STDERR "-- SHEET $sheet_nr:", $oWks->{Name}, "\n";
+ last if ($oWks->{Name} eq $sheet);
+ $sheet_nr++;
+ }
+
+ my $oWorksheet = $oBook->{Worksheet}[$sheet_nr];
+
+ print STDERR "using sheet: ",$oWorksheet->{Name},"\n";
+ defined ($oWorksheet) || die "can't find sheet '$sheet' in $excel_file";
+ my $end_row = x($config->{end_row}) || $oWorksheet->{MaxRow};
+
+ for(my $iR = $oWorksheet->{MinRow} ; defined $end_row && $iR <= $end_row ; $iR++) {
+ my $row;
+ for(my $iC = $oWorksheet->{MinCol} ; defined $oWorksheet->{MaxCol} && $iC <= $oWorksheet->{MaxCol} ; $iC++) {
+ my $cell = $oWorksheet->{Cells}[$iR][$iC];
+ if ($cell) {
+ $row->{int2col($iC)} = $cell->Value;
+ }
}
- my $swishpath = $path."#".int($row->{mfn});
+ progress($iR, $end_row);
+
+# print "row[$iR/$end_row] ";
+# foreach (keys %{$row}) {
+# print "$_: ",$row->{$_},"\t";
+# }
+# print "\n";
+
+ my $swishpath = $database."#".$iR;
+
+ next if (! $row);
- if (my $xml = isis2xml($row,$add_xml)) {
+ if (my $xml = data2xml($type,$row,$add_xml)) {
$xml = $cp2utf->convert($xml);
use bytes; # as opposed to chars
print "Path-Name: $swishpath\n";
}
}
}
- print STDERR "\n";
}
# call this to commit index
--- /dev/null
+<?xml version="1.0" encoding="ISO-8859-2"?>
+<!--
+ parametars for M$ Excel import from spreadsheet
+
+ column letters must be in UPPER CASE
+ end_row configuration option is optional, but you should use it,
+ because it will use Excel MaxRow property which is
+ often set at 31999
+-->
+<config
+ excel_codepage="windows-1250"
+ sheet="Sheet1"
+ start_row="2"
+ end_row="500"
+>
+
+<!--
+ <isis type="swish|display|index">_pre_000x_sep_000x_sep_000x_post_</isis>
+-->
+ <indexer>
+ <headline>
+ <column type="display">|A|</column>
+ </headline>
+
+ <IdentificationNumbers name="ISN">
+ <column type="swish" filter="isn_swish">|B|</column>
+ <column type="index">|B|</column>
+ </IdentificationNumbers>
+
+<!--
+ <ISBN name="ISBN" order="10">
+ <column type="display">10</column>
+ </ISBN>
+
+-->
+ <ISSN name="ISSN" order="11">
+ <column type="display">|B|</column>
+ </ISSN>
+
+<!--
+ <MFN name="MFN">
+ <column>mfn</column>
+ <column type="index">mfn</column>
+ </MFN>
+
+ <PersonalName name="Autor" order="15">
+ <column type="swish">700 701 702 710 711 503 200f 200g</column>
+ <column type="display">700a, 700b</column>
+ <column type="display">710a. 710b (710d ; 710f ; 710e)</column>
+ <column type="display">503a, 503b</column>
+ <column type="index">700a, 700b</column>
+ <column type="index">701a, 701b</column>
+ <column type="index">702a, 702b</column>
+ <column type="index">710a</column>
+ <column type="index">503a, 503b</column>
+ </PersonalName>
+
+-->
+ <TitleAndResponsibility name="Naslov" order="20">
+ <column>|A|</column>
+ <column type="index">|A|</column>
+ </TitleAndResponsibility>
+
+<!--
+ <EditionStatement name="Izdanje" order="25">
+ <column type="display">205a</column>
+ </EditionStatement>
+
+ <Date name="Godina izdavanja" order="30">
+ <column type="swish">210d</column>
+ </Date>
+
+ <Publisher name="Izdavaè" order="35">
+ <column type="swish">210a 210c</column>
+ <column type="index">210c, 210a</column>
+ </Publisher>
+
+ <Publication name="Impresum" order="40">
+ <column type="display">210a : 210c, 210d</column>
+ </Publication>
+
+ <PhisicalDescription name="Materijalni opis" order="45">
+ <column type="swish">215a : 215c ; 215d</column>
+ <column type="display">215a : 215c ; 215d</column>
+ </PhisicalDescription>
+
+ <Language name="Jezik" order="50">
+ <column type="swish" filter="lang_swish">101</column>
+ <column type="display" filter="lang_display">101</column>
+ <column type="index" filter="lang_display">101</column>
+ </Language>
+
+ <Series name="Nakladnièka cjelina" order="55">
+ <column>225a : 225e ; 225v. 225h, 225i ; 225w</column>
+ <column type="index">225a</column>
+ </Series>
+
+ <Note name="Napomena" order="60">
+ <column delimiter=". -">300</column>
+ <column append="1" delimiter=". - ">320</column>
+ <column append="1" delimiter=". - ">327</column>
+ <column append="1" delimiter=". - ">329</column>
+ <column type="index">300</column>
+ <column type="index">320</column>
+ <column type="index">327</column>
+ <column type="index">329</column>
+ </Note>
+
+ <UncontrolledTerms name="Tema" order="65">
+ <column delimiter=" * " format="upper">610 </column>
+ <column type="index">610</column>
+ </UncontrolledTerms>
+
+ <contents name="Sadr¾aj ¹ðè澩ÐÈÆ®" order="70">
+ <column>330 ¹ðè澩ÐÈÆ®</column>
+ <column type="index">330</column>
+ </contents>
+
+ <title_vol name="Podaci o svescima" order="75">
+ <column type="display">230v : 230a. - 250. - 260. ISBN 290</column>
+ <column type="display">231v : 231a. - 251. - 261. ISBN 291</column>
+ <column type="display">232v : 232a. - 252. - 262. ISBN 292</column>
+ <column type="display">233v : 233a. - 253. - 263. ISBN 293</column>
+ </title_vol>
+
+ <UDC name="UDK" order="80">
+ <column>675a</column>
+ <column type="index">675a</column>
+ </UDC>
+
+ <OtherClass name="APA CC" order="81">
+ <column>686</column>
+ <column type="index">686</column>
+ </OtherClass>
+-->
+
+ <library name="Dostupno u" order="85">
+ <column>|C|</column>
+ </library>
+
+<!--
+ <shelfNumber name="Signatura" order="90">
+ <column>990</column>
+ <column type="index">990</column>
+ </shelfNumber>
+
+ <invNo name="Inventarni broj" order="95">
+ <column delimiter=", ">991</column>
+ <column type="index">991</column>
+ </invNo>
+-->
+
+ <type name="Tip graðe" type="display" order="100">
+ <!-- added via xml tag in .conf -->
+ </type>
+
+
+ </indexer>
+</config>
#-------------------------------------------------------------
#
-# parse_format('format',$isis_row);
+# parse_format(...)
#
-use isis_sf;
sub parse_format {
+ my $type = shift || die "parset_format must be called with type!";
my $format = shift || die "parse_format must be called with format!";
my $row = shift || die "parse_format must be called with row!";
my $i = shift || 0; # isis repeatable number
my $codepage = shift || die "parse_format must be called with codepage!";
+ if ($type eq "isis") {
+ return parse_isis_format($format,$row,$i,$codepage);
+ } elsif ($type eq "excel") {
+ return parse_excel_format($format,$row,$i,$codepage);
+ }
+}
+
+#-------------------------------------------------------------
+
+sub parse_isis_format {
+ use isis_sf;
+
+ my $format = shift;
+ my $row = shift;
+ my $i = shift;
+ my $codepage = shift;
my $out;
my $out_swish;
}
#-------------------------------------------------------------
+
+sub parse_excel_format {
+ my $format = shift;
+ my $row = shift;
+ my $i = shift;
+ my $codepage = shift;
+
+ my $out;
+ my $out_swish;
+
+ my $prefix = "";
+ if ($format =~ s/^([^A-Z\|]{1,3})//) {
+ $prefix = $1;
+ }
+
+ my $display;
+ my $swish;
+
+ while ($format && length($format) > 0) {
+#print STDERR "\n#### $format #";
+ if ($format =~ s/^\|([A-Z]{1,2})\|//) {
+#print STDERR "--$1-> $format -[",length($format),"] ";
+ if ($row->{$1}) {
+ my $tmp = $row->{$1};
+ $tmp = $codepage->convert($tmp) if ($codepage);
+ $display .= $prefix . $tmp;
+ $swish .= $tmp." ";
+#print STDERR " == $tmp";
+ }
+ $prefix = "";
+ } elsif ($format =~ s/^([^A-Z\|]+)(\|[A-Z]{1,2}\|)/$2/) {
+ $prefix .= $1 if ($display);
+ } else {
+ print STDERR "unparsed format: $format\n";
+ $prefix .= $format;
+ $format = "";
+ }
+#print STDERR " display: $display swish: $swish [format: $format]";
+ }
+ # add suffix
+ $display .= $prefix if ($display);
+
+ return ($swish,$display);
+}
+
1;
# Enable alternate search syntax.
# This means a search query can contain ``+'' and ``-'' as syntax parameter.
-EnableAltSearchSyntax yes
+# this directive is removed in swish 2.4
+#EnableAltSearchSyntax yes
#