7 use Data::Dump qw(dump);
12 sub vc_casefix { # partial reverse-engeenered implementation of case mapping
13 my ( $tag, $sfi, $vc, $k, $rest ) = @_;
15 $vc .= " " x (4 - length($vc));
17 $k =~ tr/^~]}\\|[{@`/ČčĆćĐ𩹮ž/; # CROASCII (YUS|HRN) B1.002:1982
19 my @m = map { ord($_) } split(//, $vc);
21 my $mask = ( $m[3] & 0b00001111 ) << 4 | ( $m[2] & 0b00001111 );
25 my @chars = split(//, $k);
26 foreach my $c ( @chars ) {
27 die "key [$k]" unless defined $c;
28 $fixed .= $mask & $sel ? uc($c) : lc($c);
32 if ( substr($vc,0,1) eq 'x' || ( $tag =~ m/^(200|210|225|700|701|702|710|711|712)$/ ) ) { #&& $sfi eq '$a' )) {
33 $fixed = ucfirst $fixed;
36 if ( $rest && length($fixed) < 8 ) {
37 # warn "# padding $fixed|$rest";
38 $fixed .= " " x ( 8 - length($fixed) );
41 return $fixed . $rest;
45 my ($file,$parse) = @_;
47 print STDERR "# reading $file ";
50 open(my $fh, '<:encoding(UTF-8)', $file);
51 my $h = <$fh>; # header
55 # tr/^~]}\\|[{@`/ČčĆćĐ𩹮ž/; # CROASCII (YUS|HRN) B1.002:1982
57 my ( $idsl, $tag, $tagno, $id, $sfi, $sfino, $text ) = $parse->($_);
61 print STDERR "\nSKIP $file +$lines [$_] " if $idsl;
68 $id .= " " if length $id < 2;
69 $id .= " " if length $id < 2;
70 my ($i1, $i2) = split(//, $id, 2);
72 $sfi =~ s/^\$// || die "can't fix subfield [$sfi]";
74 $text =~ tr/^~]}\\|[{@`/ČčĆćĐ𩹮ž/; # CROASCII (YUS|HRN) B1.002:1982
76 $data->{$idsl}->{$tag}->[ $tagno ]->[ 0 ] = $i1;
77 $data->{$idsl}->{$tag}->[ $tagno ]->[ 1 ] = $i2;
78 $data->{$idsl}->{$tag}->[ $tagno ]->[ ( $sfino * 2 ) + 2 ] = $sfi;
79 $data->{$idsl}->{$tag}->[ $tagno ]->[ ( $sfino * 2 ) + 3 ] = $text;
81 print STDERR "$lines " if $lines % 1000 == 0;
86 csv_file( 'tsv/TEKTAG.csv', sub {
89 my ( $idsl, $tag, $tagno, $id, $sfi, $sfino, $textkey, $textres, $vc ) = split(/\t/,$_);
91 my $text = vc_casefix( $tag, $sfi, $vc, $textkey, $textres );
93 return ( $idsl, $tag, $tagno, $id, $sfi, $sfino, $text );
96 csv_file( 'tsv/NUMTAG.csv', sub {
99 my ( $IDSL, $TAG, $TAGNO, $ID, $SFI, $SFINO, $LTAG, $LTAGNO, $LID, $LSFI, $TXKEY, $TXRES, $VC, $STOPW ) = split(/\t/,$_);
101 my $text = vc_casefix( $TAG, $SFI, $VC, $TXKEY, $TXRES );
103 return ( $IDSL, $TAG, $TAGNO, $ID, $SFI, $SFINO, $text );
106 csv_file( 'tsv/LONTAG.csv', sub {
109 my ( $idsl, $tag, $tagno, $id, $sfi, $sfino, $text ) = split(/\t/,$_, 7);
112 $text =~ s/\t/\n/g; # join OPIS[1-11]
114 return ( $idsl, $tag, $tagno, $id, $sfi, $sfino, $text );
117 csv_file( 'tsv/IDNTAG.csv', sub {
120 my ( $idsl, $tag, $STSL, $KZVS, $BIBRAZ, $HIRAZ, $KPS, $OKO ) = split(/\t/,$_);
125 $leader[7] = $BIBRAZ;
130 $leader[23] = ' '; # last char;
132 my $full = join('', map { defined $_ ? $_ : ' ' } @leader);
133 $data->{$idsl}->{'leader'} = $full;
137 csv_file( 'tsv/OBRTAG.csv', sub {
140 my ( $IDSL, $TAG, $TAGNO, $ID, $SFI, $SFINO, $CODINF ) = split(/\t/,$_);
141 return ( $IDSL, $TAG, $TAGNO, $ID, $SFI, $SFINO, $CODINF );
144 print STDERR "\n# getting all ids ";
145 my @ids = sort keys %$data;
146 print STDERR scalar(@ids), " found\n";
148 my $marc_file = 'liberated.marc';
149 open(my $marc_fh, '>:encoding(UTF-8)', $marc_file);
152 foreach my $id ( @ids ) {
153 my $rec = MARC::Record->new;
154 $rec->encoding( 'UTF-8' );
155 $rec->add_fields( [ '001', $id ] );
157 foreach my $field ( sort keys %{ $data->{$id} } ) {
158 if ( $field eq 'leader' ) {
159 $rec->leader( $data->{$id}->{$field} );
162 foreach my $arr ( @{ $data->{$id}->{$field} } ) {
164 # print STDERR "SKIPPED $id $field ",dump( $data->{$id}->{$field} ), "\n";
167 $arr = [ $arr->[3] ] if ( $field < 010 ); # control fields don't have idicators or subfields
168 $rec->add_fields( $field, @$arr );
172 #print $rec->as_formatted;
173 #print "# $id ",dump($data->{$id});
175 print $marc_fh $rec->as_usmarc;
177 print "$number " if $number % 1000 == 0;
181 print "$marc_file ",-s $marc_file, " bytes\n";