X-Git-Url: http://git.rot13.org/?a=blobdiff_plain;f=mtoolkit%2Futf8-fix.pl;h=b818870817a7eb1b11162e3a935822145cd2ebbd;hb=02929a67d6018ca3e9ff8314e5117b81471d8215;hp=f78aa3cbb522ed9b981bab07621ddbbf1c0639d3;hpb=0a3f750c58612afecc5f9e43ea007e47f1cb0e52;p=eprints3-migration.git diff --git a/mtoolkit/utf8-fix.pl b/mtoolkit/utf8-fix.pl index f78aa3c..b818870 100755 --- a/mtoolkit/utf8-fix.pl +++ b/mtoolkit/utf8-fix.pl @@ -33,9 +33,17 @@ s/\xC3\x83\xC2\x83\xC3\xA2\xC2\x80\xC2\x99/V/gs && $stat->{'V fake'}++; s/\xC3\x83\xC2\x83\xC3\x85\xC2\xB8/ß/gs && $stat->{'ss'}++; s/$junk\xC3\x82\xC2\x8D/č/gs && $stat->{'cv'}++; +s/\xC3\x84\xC2\x8D/č/gs && $stat->{'cv 84'}++; + s/$junk\xC2\x80\xC2\xA1/ć/gs && $stat->{"c"}++; +# #Buri\xC3\x84\xC2\x87.pdf +s/\xC3\x84\xC2\x87/ć/gs && $stat->{'c C384C287'}++; + s/$junk\xC3\x82\xC2\xA1/š/gs && $stat->{'s'}++; +#Ma\xC3\x85\xC2\xA1a +s/\xC3\x85\xC2\xA1/š/gs && $stat->{'s C385C2'}++; s/$junk\xC2\x80\xC4\xA1/š/gs && $stat->{'s C4'}++; + s/$junk\xC3\x82\xC2\xBE/ž/gs && $stat->{'z'}++; s/$junk\xC2\x80\xC2\x98/đ/gs && $stat->{'d'}++; @@ -64,9 +72,12 @@ s/$junk\xC2\xAD/-/gs && $stat->{'-'}++; s/$junk\xC2\x80\xC2\x9C/-/gs && $stat->{'--'}++; s/$junk\xC2\x9D/-/gs && $stat->{'-- ?'}++; +s/$junk\xC3\x82\xC2\xA2/- /gs && $stat->{'dot'}++; + + my $e = $_; - if ( $e =~ m/([\xC0-\xC4][\x80-\xff]{4,8})/s ) { + if ( $e =~ m/([\xC0-\xC4][\x80-\xff]{3,99})/s ) { warn "XXX ", dump($e), "\n$e\n"; } print $e;