recognize utf-8 bom marker and force encoding
authorDobrica Pavlinusic <dpavlin@rot13.org>
Sat, 5 Sep 2015 18:35:54 +0000 (20:35 +0200)
committerDobrica Pavlinusic <dpavlin@rot13.org>
Sat, 5 Sep 2015 18:37:02 +0000 (20:37 +0200)
lib/WebPAC/Input/ISI.pm

index 1a9a2de..db35940 100644 (file)
@@ -87,8 +87,12 @@ sub new {
 
        my $line = <$fh>;
        chomp($line);
-       if ( $line =~ /^FN\s(.+)$/) {
-               $format = $1;
+       if ( $line =~ /^(\xEF\xBB\xBF)FN\s(.+)$/) {
+               $format = $2;
+               if ( defined $1 && $self->{encoding} !~ m/utf-8/ ) {
+                       warn "E: file ", $self->{path}, " is in utf-8 encoding, but config is ", $self->{encoding}, " forcing utf-8\n";
+                       $self->{encoding} = 'utf-8';
+               }
        } else {
                die "first line of $arg->{path} has to be FN, but is: $line";
        }