3 # This script will try (hard) to convert database from
4 # PhpMyLibrary (http://phpmylibrary.sourceforge.net/) back
5 # to MARC format (ISO 2709)
7 # 2003-01-31 Dobrica Pavlinusic <dpavlin@rot13.org>
9 # This script is written for WebPac project available at
10 # http://webpac.sourceforge.net/
12 # MARC file format documentation is taken from
13 # http://www.ariadne.ac.uk/issue7/marc/
21 my $database = "postnuke";
22 my $host = "localhost";
29 my $result = GetOptions(
30 "database=s" => \$database,
33 "password=s" => \$passwd,
39 print qq{usage: $0 [--database="$database" --host="$host" --user="$user" --password="$passwd"] > file.marc\n
40 This script will convert PhpMyLibrary database to standard UNIMARC format\n};
44 my $dsn = "DBI:mysql:database=$database;host=$host";
45 my $dbh = DBI->connect($dsn, $user, $passwd, {'RaiseError' => 1});
47 # UNIMARC leader format
48 #my $leader_fmt = qq{%05diam0 22%05d 45 };
50 my $leader_fmt = qq{%05dcas 22%05d a 4500};
53 my $sth = $dbh->prepare("SELECT marc FROM tblbib");
59 while (my $row = $sth->fetchrow_hashref()) {
60 my $marc = $row->{'marc'};
63 my $real_len = length($marc);
65 my $skip = 0; # skip this record?
67 # fix PhpMyLibrary MARC (why do I have to do this? It's MARC,
73 # 5 Status (n=new, c=corrected and d=deleted)
74 # 6 Type of Record (a=printed material)
75 # 7 Bibliographic Level (m=monograph)
77 # 10 Indictator count (2 for monographs)
78 # 11 Subfield code count (2 - 0x1F+subfield code itself)
79 # 12-16 Base address of data
80 # 17 Encoding level (blank=full level, 1=sublevel 1, 2=sublevel 2,
82 # 18 Descriptive Cataloguing Form (blank=record is full ISBD,
83 # n=record is in non-ISBD format, i=record is in
84 # an incomplete ISBD format)
86 # 20 Length of length field in directory (always 4 in UNIMARC)
87 # 21 Length of Starting Character Position in directory (always
89 # 22 Length of implementation defined portion in directory (always
93 # |0 4|5 89 |12 16|1n 450 |
94 # (xxxxx)nam 22(.....) 450 <---
95 $marc =~ m/^(.....)......(.....)polerioj/ || die "record: '$marc' unparsable!";
96 my ($reclen,$base_addr) = ($1,$2);
98 my $directory = substr($marc,24,$base_addr-24);
99 my $fields = substr($marc,$base_addr-1);
101 print STDERR "# $rec_nr fields: '$fields'\n" if ($debug);
102 print STDERR "# $rec_nr directory: [",length($directory),"]\n" if ($debug);
104 # PhpMyLibrary MARC records don't have indicators, so we'll add them
110 while (!$skip && $directory =~ s/(\d{3})(\d{4})(\d{5})//) {
111 my ($tag,$len,$addr) = ($1,$2,$3);
113 print STDERR "tag/len/addr: $tag $len $addr\n" if ($debug);
117 my $del = substr($f,0,1);
119 die "expected 0x1e, got '$del' (".ord($del)."): '$f'" if (ord($del) != 30);
122 if (($addr+$len) > length($fields)) {
123 print STDERR "WARNING: error in dictionary on record $rec_nr skipping...\n" if ($debug);
129 my $f = substr($fields,$addr,$len);
130 print STDERR "data $tag [$len] $addr: '$f'\n" if ($debug);
132 my $del = substr($fields,$addr+$len,1);
134 # check field delimiters...
135 if ($del ne chr(30)) {
136 print STDERR "WARNING: skipping record $rec_nr, can't find delimiters got: '$del'\n" if ($debug);
143 if ($tag =~ m/^00/) {
144 # fields 001-008 doesn't have indicators
145 $new_dictionary .= sprintf("%03d%04d%05d",$tag,$len,$addr);
148 $new_dictionary .= sprintf("%03d%04d%05d",$tag,($len+2),($addr+$o));
149 $new_fields.=chr(30)." ".substr($f,1);
155 my $new_leader = sprintf($leader_fmt,24+length($new_dictionary.$new_fields)+2,$base_addr);
156 my $new_marc = $new_leader . $new_dictionary . $new_fields . chr(30);
157 $new_marc .= chr(29); # end of record
159 print STDERR "original and new marc: [$rec_nr]\n$marc\n$new_marc\n\n" if ($debug);
164 # last if ($count > 100);
170 print STDERR "$count records from database $database converted...\n";