9 use vars qw ($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
12 #Give a hoot don't pollute, do not export more than needed by default
20 MARC::Fast - Very fast implementation of MARC database reader
29 This is very fast alternative to C<MARC> and C<MARC::Record> modules.
31 It's is also very sutable for random access to MARC records (as opposed to
40 my $marc = new MARC::Fast(
41 marcdb => 'unimarc.iso',
49 ################################################## subroutine header end ##
55 bless ($self, $class);
57 croak "need marcdb parametar" unless ($self->{marcdb});
59 print STDERR "# opening ",$self->{marcdb},"\n" if ($self->{debug});
61 open($self->{fh}, $self->{marcdb}) || croak "can't open ",$self->{marcdb},": $!";
65 while (! eof($self->{fh})) {
68 # save record position
69 push @{$self->{fh_offset}}, tell($self->{fh});
72 read($self->{fh}, $leader, 24);
77 # 5 Status (n=new, c=corrected and d=deleted)
78 # 6 Type of Record (a=printed material)
79 # 7 Bibliographic Level (m=monograph)
81 # 10 Indictator count (2 for monographs)
82 # 11 Subfield code count (2 - 0x1F+subfield code itself)
83 # 12-16 Base address of data
84 # 17 Encoding level (blank=full level, 1=sublevel 1, 2=sublevel 2,
86 # 18 Descriptive Cataloguing Form (blank=record is full ISBD,
87 # n=record is in non-ISBD format, i=record is in
88 # an incomplete ISBD format)
90 # 20 Length of length field in directory (always 4 in UNIMARC)
91 # 21 Length of Starting Character Position in directory (always
93 # 22 Length of implementation defined portion in directory (always
97 # |0 45 89 |12 16|1n 450 |
98 # |xxxxxnam 22(.....) 45 <---
100 print STDERR "REC ",$self->{count},": $leader\n" if ($self->{debug});
102 # store leader for later
103 push @{$self->{leaders}}, $leader;
105 # skip to next record
106 seek($self->{fh},substr($leader,0,5)-24,1);
115 Return number of records in database
123 return $self->{count};
128 Fetch record from database
130 my $hash = $marc->fetch(42);
137 my $rec_nr = shift || return;
139 my $leader = $self->{leaders}->[$rec_nr - 1];
141 carp "can't find record $rec_nr";
144 my $offset = $self->{fh_offset}->[$rec_nr - 1];
145 unless (defined($offset)) {
146 carp "can't find offset for record $rec_nr";
150 my $reclen = substr($leader,0,5);
151 my $base_addr = substr($leader,12,5);
153 print STDERR "# $rec_nr leader: '$leader' reclen: $reclen base addr: $base_addr [dir: ",$base_addr - 24,"]\n" if ($self->{debug});
157 print STDERR "# seeking to $offset + 24\n" if ($self->{debug});
159 if ( ! seek($self->{fh}, $offset+24, 0) ) {
160 carp "can't seek to $offset: $!";
164 print STDERR "# reading ",$base_addr-24," bytes of dictionary\n" if ($self->{debug});
167 if( ! read($self->{fh},$directory,$base_addr-24) ) {
168 carp "can't read directory: $!";
171 print STDERR "# $rec_nr directory: [",length($directory),"] '$directory'\n" if ($self->{debug});
174 print STDERR "# reading ",$reclen-$base_addr," bytes of fields\n" if ($self->{debug});
177 if( ! read($self->{fh},$fields,$reclen-$base_addr) ) {
178 carp "can't read fields: $!";
181 print STDERR "# $rec_nr fields: '$fields'\n" if ($self->{debug});
186 while (!$skip && $directory =~ s/(\d{3})(\d{4})(\d{5})//) {
187 my ($tag,$len,$addr) = ($1,$2,$3);
189 if (($addr+$len) > length($fields)) {
190 print STDERR "WARNING: error in dictionary on record $rec_nr skipping...\n" if (! $self->{quiet});
196 my $f = substr($fields,$addr,$len);
197 print STDERR "tag/len/addr $tag [$len] $addr: '$f'\n" if ($self->{debug});
201 my $del = substr($fields,$addr+$len-1,1);
203 # check field delimiters...
204 if ($self->{assert} && $del ne chr(30)) {
205 print STDERR "WARNING: skipping record $rec_nr, can't find delimiter 30 got: '$del'\n" if (! $self->{quiet});
210 if ($self->{assert} && length($f) < 2) {
211 print STDERR "WARNING: skipping field $tag from record $rec_nr because it's too short!\n" if (! $self->{quiet});
236 http://www.rot13.org/~dpavlin/
240 This program is free software; you can redistribute
241 it and/or modify it under the same terms as Perl itself.
243 The full text of the license can be found in the
244 LICENSE file included with this module.