7 use Data::Dump qw(dump);
13 my ($file,$parse) = @_;
15 print STDERR "# reading $file ";
18 open(my $fh, '<', $file);
19 my $h = <$fh>; # header
22 my ( $idsl, $tag, $tagno, $id, $sfi, $sfino, $text ) = $parse->($_);
26 print STDERR "\nSKIP $file +$lines [$_] " if $idsl;
33 $id .= " " if length $id < 2;
34 $id .= " " if length $id < 2;
35 my ($i1, $i2) = split(//, $id, 2);
37 $sfi =~ s/^\$// || die "can't fix subfield [$sfi]";
39 $text =~ tr/^~]}\\|[{@`/ČčĆćĐ𩹮ž/; # CROASCII (YUS|HRN) B1.002:1982
41 $data->{$idsl}->{$tag}->[ $tagno ]->[ 0 ] = $i1;
42 $data->{$idsl}->{$tag}->[ $tagno ]->[ 1 ] = $i2;
43 $data->{$idsl}->{$tag}->[ $tagno ]->[ ( $sfino * 2 ) + 2 ] = $sfi;
44 $data->{$idsl}->{$tag}->[ $tagno ]->[ ( $sfino * 2 ) + 3 ] = $text;
46 print STDERR "$lines " if $lines % 1000 == 0;
51 csv_file( 'TEKTAG.csv', sub {
54 my ( $idsl, $tag, $tagno, $id, $sfi, $sfino, $textkey, $textres ) = split(/,/,$_);
56 my $text = $textkey . $textres; # FIXME fix CAPITAL letters in $textkey
58 return ( $idsl, $tag, $tagno, $id, $sfi, $sfino, $text );
61 csv_file( 'LONTAG.csv', sub {
64 my ( $idsl, $tag, $tagno, $id, $sfi, $sfino, $text ) = split(/;/,$_, 7);
67 $text =~ s/;/\n/g; # join OPIS[1-11]
69 return ( $idsl, $tag, $tagno, $id, $sfi, $sfino, $text );
72 csv_file( 'IDNTAG.csv', sub {
75 my ( $idsl, $tag, $STSL, $KZVS, $BIBRAZ, $HIRAZ, $KPS, $OKO ) = split(/,/,$_);
85 $leader[23] = ' '; # last char;
87 my $full = join('', map { defined $_ ? $_ : ' ' } @leader);
88 $data->{$idsl}->{'leader'} = $full;
92 csv_file( 'OBRTAG.csv', sub {
95 my ( $IDSL, $TAG, $TAGNO, $ID, $SFI, $SFINO, $CODINF ) = split(/,/,$_);
96 return ( $IDSL, $TAG, $TAGNO, $ID, $SFI, $SFINO, $CODINF );
99 print STDERR "\n# getting all ids ";
100 my @ids = keys %$data;
101 print STDERR scalar(@ids), " found\n";
103 my $marc_file = 'liberated.marc';
104 open(my $marc_fh, '>:encoding(UTF-8)', $marc_file);
107 foreach my $id ( @ids ) {
108 my $rec = MARC::Record->new;
109 $rec->encoding( 'UTF-8' );
110 $rec->add_fields( [ '001', $id ] );
112 foreach my $field ( sort keys %{ $data->{$id} } ) {
113 if ( $field eq 'leader' ) {
114 $rec->leader( $data->{$id}->{$field} );
117 foreach my $arr ( @{ $data->{$id}->{$field} } ) {
119 print STDERR "SKIPPED $id $field ",dump( $data->{$id}->{$field} ), "\n";
122 $rec->add_fields( $field, @$arr );
126 #print $rec->as_formatted;
127 #print "# $id ",dump($data->{$id});
129 print $marc_fh $rec->as_usmarc;
131 print "$number " if $number % 1000 == 0;
135 print "$marc_file ",-s $marc_file, " bytes\n";