9 use vars qw ($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
12 #Give a hoot don't pollute, do not export more than needed by default
20 MARC::Fast - Very fast implementation of MARC database reader
26 my $marc = new MARC::Fast(
27 marcdb => 'unimarc.iso',
30 foreach my $mfn ( 1 .. $marc->count ) {
31 print $marc->to_ascii( $mfn );
34 For longer example with command line options look at L<scripts/dump_fastmarc.pl>
38 This is very fast alternative to C<MARC> and C<MARC::Record> modules.
40 It's is also very subtable for random access to MARC records (as opposed to
49 my $marc = new MARC::Fast(
50 marcdb => 'unimarc.iso',
55 my ($t, $record_number) = @_;
63 ################################################## subroutine header end ##
69 bless ($self, $class);
71 croak "need marcdb parametar" unless ($self->{marcdb});
73 print STDERR "# opening ",$self->{marcdb},"\n" if ($self->{debug});
75 open($self->{fh}, $self->{marcdb}) || croak "can't open ",$self->{marcdb},": $!";
80 while (! eof($self->{fh})) {
83 # save record position
84 push @{$self->{fh_offset}}, tell($self->{fh});
87 my $len = read($self->{fh}, $leader, 24);
90 carp "short read of leader, aborting\n";
97 # 5 Status (n=new, c=corrected and d=deleted)
98 # 6 Type of Record (a=printed material)
99 # 7 Bibliographic Level (m=monograph)
101 # 10 Indictator count (2 for monographs)
102 # 11 Subfield code count (2 - 0x1F+subfield code itself)
103 # 12-16 Base address of data
104 # 17 Encoding level (blank=full level, 1=sublevel 1, 2=sublevel 2,
106 # 18 Descriptive Cataloguing Form (blank=record is full ISBD,
107 # n=record is in non-ISBD format, i=record is in
108 # an incomplete ISBD format)
110 # 20 Length of length field in directory (always 4 in UNIMARC)
111 # 21 Length of Starting Character Position in directory (always
113 # 22 Length of implementation defined portion in directory (always
117 # |0 45 89 |12 16|1n 450 |
118 # |xxxxxnam 22(.....) 45 <---
120 print STDERR "REC ",$self->{count},": $leader\n" if ($self->{debug});
122 # store leader for later
123 push @{$self->{leader}}, $leader;
125 # skip to next record
126 my $o = substr($leader,0,5);
128 seek($self->{fh},$o-24,1) if ($o);
140 Return number of records in database
148 return $self->{count};
153 Fetch record from database
155 my $hash = $marc->fetch(42);
157 First record number is C<1>
167 $self->{last_leader} = undef;
171 my $leader = $self->{leader}->[$rec_nr - 1];
172 $self->{last_leader} = $leader;
174 carp "can't find record $rec_nr";
177 my $offset = $self->{fh_offset}->[$rec_nr - 1];
178 unless (defined($offset)) {
179 carp "can't find offset for record $rec_nr";
183 my $reclen = substr($leader,0,5);
184 my $base_addr = substr($leader,12,5);
186 print STDERR "# $rec_nr leader: '$leader' reclen: $reclen base addr: $base_addr [dir: ",$base_addr - 24,"]\n" if ($self->{debug});
190 print STDERR "# seeking to $offset + 24\n" if ($self->{debug});
192 if ( ! seek($self->{fh}, $offset+24, 0) ) {
193 carp "can't seek to $offset: $!";
197 print STDERR "# reading ",$base_addr-24," bytes of dictionary\n" if ($self->{debug});
200 if( ! read($self->{fh},$directory,$base_addr-24) ) {
201 carp "can't read directory: $!";
204 print STDERR "# $rec_nr directory: [",length($directory),"] '$directory'\n" if ($self->{debug});
207 print STDERR "# reading ",$reclen-$base_addr," bytes of fields\n" if ($self->{debug});
210 if( ! read($self->{fh},$fields,$reclen-$base_addr) ) {
211 carp "can't read fields: $!";
214 print STDERR "# $rec_nr fields: '$fields'\n" if ($self->{debug});
219 while (!$skip && $directory =~ s/(\d{3})(\d{4})(\d{5})//) {
220 my ($tag,$len,$addr) = ($1,$2,$3);
222 if (($addr+$len) > length($fields)) {
223 print STDERR "WARNING: error in dictionary on record $rec_nr skipping...\n" if (! $self->{quiet});
229 my $f = substr($fields,$addr,$len);
230 print STDERR "tag/len/addr $tag [$len] $addr: '$f'\n" if ($self->{debug});
232 push @{ $row->{$tag} }, $f;
234 my $del = substr($fields,$addr+$len-1,1);
236 # check field delimiters...
237 if ($self->{assert} && $del ne chr(30)) {
238 print STDERR "WARNING: skipping record $rec_nr, can't find delimiter 30 got: '$del'\n" if (! $self->{quiet});
243 if ($self->{assert} && length($f) < 2) {
244 print STDERR "WARNING: skipping field $tag from record $rec_nr because it's too short!\n" if (! $self->{quiet});
256 Returns leader of last record L<fetch>ed
258 print $marc->last_leader;
260 Added in version 0.08 of this module, so if you need it use:
264 to be sure that it's supported.
270 return $self->{last_leader};
276 Read record with specified MFN and convert it to hash
278 my $hash = $marc->to_hash( $mfn, include_subfields => 1, );
280 It has ability to convert characters (using C<hash_filter>) from MARC
281 database before creating structures enabling character re-mapping or quick
284 This function returns hash which is like this:
291 'f' => 'Valdo D\'Arienzo',
292 'e' => 'tipografie e tipografi nel XVI secolo',
296 This method will also create additional field C<000> with MFN.
303 my $mfn = shift || confess "need mfn!";
307 # init record to include MFN as field 000
308 my $rec = { '000' => [ $mfn ] };
310 my $row = $self->fetch($mfn) || return;
312 foreach my $rec_nr (keys %{$row}) {
313 foreach my $l (@{$row->{$rec_nr}}) {
319 $l = $self->{'hash_filter'}->($l, $rec_nr) if ($self->{'hash_filter'});
324 ($val->{'i1'},$val->{'i2'}) = ($1,$2) if ($l =~ s/^([01 #])([01 #])\x1F/\x1F/);
331 foreach my $t (split(/\x1F/,$l)) {
333 my $f = substr($t,0,1);
335 push @subfields, ( $f, $sf_usage->{$f}++ || 0 );
337 # repeatable subfiled -- convert it to array
339 if ( $sf_usage->{$f} == 2 ) {
340 $val->{$f} = [ $val->{$f}, $val ];
342 push @{$val->{$f}}, $val;
345 $val->{substr($t,0,1)} = substr($t,1);
347 $val->{subfields} = [ @subfields ] if $args->{include_subfields};
352 push @{$rec->{$rec_nr}}, $val;
361 print $marc->to_ascii( 42 );
368 my $mfn = shift || confess "need mfn";
369 my $row = $self->fetch($mfn) || return;
373 foreach my $f (sort keys %{$row}) {
374 my $dump = join('', @{ $row->{$f} });
376 $dump =~ s/\x1f/\$/g;
377 $out .= "$f\t$dump\n";
391 http://www.rot13.org/~dpavlin/
395 This program is free software; you can redistribute
396 it and/or modify it under the same terms as Perl itself.
398 The full text of the license can be found in the
399 LICENSE file included with this module.
404 L<Biblio::Isis>, perl(1).