5 use Data::Dump qw/dump/;
9 use vars qw ($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
12 #Give a hoot don't pollute, do not export more than needed by default
20 MARC::Fast - Very fast implementation of MARC database reader
26 my $marc = new MARC::Fast(
27 marcdb => 'unimarc.iso',
30 foreach my $mfn ( 1 .. $marc->count ) {
31 print $marc->to_ascii( $mfn );
34 For longer example with command line options look at L<scripts/dump_fastmarc.pl>
38 This is very fast alternative to C<MARC> and C<MARC::Record> modules.
40 It's is also very subtable for random access to MARC records (as opposed to
49 my $marc = new MARC::Fast(
50 marcdb => 'unimarc.iso',
55 my ($t, $record_number) = @_;
63 ################################################## subroutine header end ##
69 bless ($self, $class);
71 croak "need marcdb parametar" unless ($self->{marcdb});
73 print STDERR "# opening ",$self->{marcdb},"\n" if ($self->{debug});
75 open($self->{fh}, $self->{marcdb}) || croak "can't open ",$self->{marcdb},": $!";
80 while (! eof($self->{fh})) {
83 # save record position
84 push @{$self->{fh_offset}}, tell($self->{fh});
87 my $len = read($self->{fh}, $leader, 24);
90 warn "short read of leader, aborting\n";
98 # 5 Status (n=new, c=corrected and d=deleted)
99 # 6 Type of Record (a=printed material)
100 # 7 Bibliographic Level (m=monograph)
102 # 10 Indictator count (2 for monographs)
103 # 11 Subfield code count (2 - 0x1F+subfield code itself)
104 # 12-16 Base address of data
105 # 17 Encoding level (blank=full level, 1=sublevel 1, 2=sublevel 2,
107 # 18 Descriptive Cataloguing Form (blank=record is full ISBD,
108 # n=record is in non-ISBD format, i=record is in
109 # an incomplete ISBD format)
111 # 20 Length of length field in directory (always 4 in UNIMARC)
112 # 21 Length of Starting Character Position in directory (always
114 # 22 Length of implementation defined portion in directory (always
118 # |0 45 89 |12 16|1n 450 |
119 # |xxxxxnam 22(.....) 45 <---
121 print STDERR "REC ",$self->{count},": $leader\n" if ($self->{debug});
123 # store leader for later
124 push @{$self->{leader}}, $leader;
126 # skip to next record
127 my $o = substr($leader,0,5);
128 warn "# in record ", $self->{count}," record length isn't number but: ",dump($o),"\n" unless $o =~ m/^\d+$/;
130 seek($self->{fh},$o-24,1) if ($o);
142 Return number of records in database
150 return $self->{count};
155 Fetch record from database
157 my $hash = $marc->fetch(42);
159 First record number is C<1>
169 $self->{last_leader} = undef;
173 my $leader = $self->{leader}->[$rec_nr - 1];
174 $self->{last_leader} = $leader;
176 carp "can't find record $rec_nr";
179 my $offset = $self->{fh_offset}->[$rec_nr - 1];
180 unless (defined($offset)) {
181 carp "can't find offset for record $rec_nr";
185 my $reclen = substr($leader,0,5);
186 my $base_addr = substr($leader,12,5);
188 print STDERR "# $rec_nr leader: '$leader' reclen: $reclen base addr: $base_addr [dir: ",$base_addr - 24,"]\n" if ($self->{debug});
192 print STDERR "# seeking to $offset + 24\n" if ($self->{debug});
194 if ( ! seek($self->{fh}, $offset+24, 0) ) {
195 carp "can't seek to $offset: $!";
199 print STDERR "# reading ",$base_addr-24," bytes of dictionary\n" if ($self->{debug});
202 if( ! read($self->{fh},$directory,$base_addr-24) ) {
203 carp "can't read directory: $!";
206 print STDERR "# $rec_nr directory: [",length($directory),"] '$directory'\n" if ($self->{debug});
209 print STDERR "# reading ",$reclen-$base_addr," bytes of fields\n" if ($self->{debug});
212 if( ! read($self->{fh},$fields,$reclen-$base_addr) ) {
213 carp "can't read fields: $!";
216 print STDERR "# $rec_nr fields: '$fields'\n" if ($self->{debug});
221 while (!$skip && $directory =~ s/(\d{3})(\d{4})(\d{5})//) {
222 my ($tag,$len,$addr) = ($1,$2,$3);
224 if (($addr+$len) > length($fields)) {
225 print STDERR "WARNING: error in dictionary on record $rec_nr skipping...\n" if (! $self->{quiet});
231 my $f = substr($fields,$addr,$len);
232 print STDERR "tag/len/addr $tag [$len] $addr: '$f'\n" if ($self->{debug});
234 push @{ $row->{$tag} }, $f;
236 my $del = substr($fields,$addr+$len-1,1);
238 # check field delimiters...
239 if ($self->{assert} && $del ne chr(30)) {
240 print STDERR "WARNING: skipping record $rec_nr, can't find delimiter 30 got: '$del'\n" if (! $self->{quiet});
245 if ($self->{assert} && length($f) < 2) {
246 print STDERR "WARNING: skipping field $tag from record $rec_nr because it's too short!\n" if (! $self->{quiet});
258 Returns leader of last record L<fetch>ed
260 print $marc->last_leader;
262 Added in version 0.08 of this module, so if you need it use:
266 to be sure that it's supported.
272 return $self->{last_leader};
278 Read record with specified MFN and convert it to hash
280 my $hash = $marc->to_hash( $mfn, include_subfields => 1, );
282 It has ability to convert characters (using C<hash_filter>) from MARC
283 database before creating structures enabling character re-mapping or quick
286 This function returns hash which is like this:
293 'f' => 'Valdo D\'Arienzo',
294 'e' => 'tipografie e tipografi nel XVI secolo',
298 This method will also create additional field C<000> with MFN.
305 my $mfn = shift || confess "need mfn!";
309 # init record to include MFN as field 000
310 my $rec = { '000' => [ $mfn ] };
312 my $row = $self->fetch($mfn) || return;
314 foreach my $tag (keys %{$row}) {
315 foreach my $l (@{$row->{$tag}}) {
321 $l = $self->{'hash_filter'}->($l, $tag) if ($self->{'hash_filter'});
326 ($val->{'i1'},$val->{'i2'}) = ($1,$2) if ($l =~ s/^([01 #])([01 #])\x1F/\x1F/);
333 foreach my $t (split(/\x1F/,$l)) {
335 my $f = substr($t,0,1);
336 my $v = substr($t,1);
338 push @subfields, ( $f, $sf_usage->{$f}++ || 0 );
340 # repeatable subfiled -- convert it to array
341 if ( defined $val->{$f} ) {
342 if ( ref($val->{$f}) ne 'ARRAY' ) {
343 $val->{$f} = [ $val->{$f}, $v ];
345 push @{$val->{$f}}, $v;
351 $val->{subfields} = [ @subfields ] if $args->{include_subfields};
356 push @{$rec->{$tag}}, $val;
365 print $marc->to_ascii( 42 );
372 my $mfn = shift || confess "need mfn";
373 my $row = $self->fetch($mfn) || return;
377 foreach my $f (sort keys %{$row}) {
378 my $dump = join('', @{ $row->{$f} });
380 $dump =~ s/\x1f/\$/g;
381 $out .= "$f\t$dump\n";
390 =head1 UTF-8 ENCODING
392 This module does nothing with encoding. But, since MARC format is byte
393 oriented even when using UTF-8 which has variable number of bytes for each
394 character, file is opened in binary mode.
396 As a result, all scalars recturned to perl don't have utf-8 flag. Solution is
397 to use C<hash_filter> and L<Encode> to decode utf-8 encoding like this:
401 my $marc = new MARC::Fast(
402 marcdb => 'utf8.marc',
404 Encode::decode( 'utf-8', $_[0] );
408 This will affect C<to_hash>, but C<fetch> will still return binary representation
409 since it doesn't support C<hash_filter>.
416 http://www.rot13.org/~dpavlin/
420 This program is free software; you can redistribute
421 it and/or modify it under the same terms as Perl itself.
423 The full text of the license can be found in the
424 LICENSE file included with this module.
429 L<Biblio::Isis>, perl(1).