From: Dobrica Pavlinusic Date: Mon, 28 Feb 2005 10:01:34 +0000 (+0000) Subject: added import_xml_file and import_xml_tag into configuration file, X-Git-Url: http://git.rot13.org/?p=webpac;a=commitdiff_plain;h=692bb1a2dfe2bc58103e8999e1fc2b2d7d01acc8 added import_xml_file and import_xml_tag into configuration file, documentation for .dbf import git-svn-id: file:///home/dpavlin/private/svn/webpac/trunk@679 13eb9ef6-21d5-0310-b721-a9d68796d827 --- diff --git a/all2xml.pl b/all2xml.pl index 8f6438f..c2d258c 100755 --- a/all2xml.pl +++ b/all2xml.pl @@ -64,7 +64,6 @@ my %type2tag = ( 'excel' => 'column', 'marc' => 'marc', 'feed' => 'feed', - 'dbf' => 'isis', # special case, re-use isis import_xml ); my $cache; # for cacheing @@ -226,7 +225,7 @@ sub data2xml { my ($swish,$display); - my $tag = $type2tag{$type} || die "can't find which tag to use for type $type"; + my $tag = $cfg->val($database, 'import_xml_tag') || $type2tag{$type} || die "can't find which tag to use for type $type"; # is this field page-by-page? my $iterate_by_page = $config->{indexer}->{$field}->{iterate_by_page}; @@ -668,7 +667,8 @@ foreach my $database ($cfg->Sections) { print STDERR "opening lookup file '$lookup_file'\n"; } - my $import_xml_file = "./import_xml/$type.xml"; + my $import_xml_type = $cfg->val($database, 'import_xml_file') || $type; + my $import_xml_file = "./import_xml/$import_xml_type.xml"; if (! -r $import_xml_file) { print STDERR "ERROR: file $import_xml_file not readable skipping!\n"; @@ -681,7 +681,8 @@ foreach my $database ($cfg->Sections) { my $type_base = $type; $type_base =~ s/_.+$//g; - $config=XMLin($import_xml_file, ForceArray => [ $type2tag{$type_base}, 'config', 'format' ], ForceContent => 1 ); + my $tag = $cfg->val($database, 'import_xml_tag') || $type2tag{$type_base} || die "can't find which tag to use for type $type"; + $config=XMLin($import_xml_file, ForceArray => [ $tag, 'config', 'format' ], ForceContent => 1 ); # helper for progress bar sub fmt_time { @@ -743,7 +744,6 @@ foreach my $database ($cfg->Sections) { } # now read database -print STDERR "using: $type...\n"; # erase cache for tags by order in this database delete $cache->{tags_by_order}; diff --git a/doc/dbf_import.pod b/doc/dbf_import.pod new file mode 100644 index 0000000..2cac05d --- /dev/null +++ b/doc/dbf_import.pod @@ -0,0 +1,69 @@ +=head1 Import DBF files into WebPAC + +dBase file format C<.dbf> is supported using C perl module. +Configuration in C for this type looks like this: + + [hda] + dbf_file=/data/drustvene/hda/ISO.DBF + type=dbf + import_xml_file=isis + import_xml_tag=isis + dbf_codepage=cp852 + dbf_mapping=<<_END_OF_MAP_ + ID_BROJ mfn + ISBN_BROJ 010 + SKUPINA1 200 + SKUPINA2 205 + SKUPINA4 210 + SKUPINA5 215 + SKUPINA6 225 + SKUPINA7 300 + ANOTACIJA 330 + PREDMET1 610 + PREDMET2 610 + PREDMET3 510 + UDK 675 + REDALICA 700 + SIGNATURA 990 + _END_OF_MAP_ + +Options are following: + +=over 4 + +=item dbf_file + +Full path to C<.dbf> file that you want to import. + +=item type + +It should be C for dBase files. + +=item import_xml_file + +If you want to re-use exising import_xml file, you can use this option +to specify C.xml> file. + +If not used, you will have to create C file. + +=item import_xml_tag + +If using C you will like to specify which tag to use +for C data using this option. + +=item dbf_codepage + +Override codepage in C for this C<.dbf> database. + +=item dbf_mapping + +Specify mapping from C<.dbf> fields to ISO fields. Each field is assumed to +have MARC/ISO subfields in form C<^a> or C<$a> and so on. + +Mapping must be specified using text here notation in config file, and must +have two fields: original name of column from C<.dbf> file and new field name +which is used in C. + +=back + +=cut