Various corrections

[koha.git] / misc / translator / tmpl_process3.pl
diff --git a/misc/translator/tmpl_process3.pl b/misc/translator/tmpl_process3.pl

index 4653c1a..e76a781 100755 (executable)
--- a/misc/translator/tmpl_process3.pl
+++ b/misc/translator/tmpl_process3.pl
@@ -6,7 +6,7 @@
  
  =head1 NAME
  
-tmpl_process3.pl - Experimental version of tmpl_process.pl
+tmpl_process3.pl - Alternative version of tmpl_process.pl
  using gettext-compatible translation files
  
  =cut
@@ -16,7 +16,7 @@ use Getopt::Long;
  use Locale::PO;
  use File::Temp qw( :POSIX );
  use TmplTokenizer;
-use VerboseWarnings qw( error_normal warn_normal );
+use VerboseWarnings qw( :warn :die );
  
  ###############################################################################
  
@@ -26,12 +26,18 @@ use vars qw( $recursive_p );
  use vars qw( $pedantic_p );
  use vars qw( $href );
  use vars qw( $type );  # file extension (DOS form without the dot) to match
+use vars qw( $charset_in $charset_out );
  
  ###############################################################################
  
  sub find_translation ($) {
      my($s) = @_;
-    my $key = TmplTokenizer::quote_po($s) if $s =~ /\S/;
+    my $key = $s;
+    if ($s =~ /\S/s) {
+       $key = TmplTokenizer::string_canon($key);
+       $key = TmplTokenizer::charset_convert($key, $charset_in, $charset_out);
+       $key = TmplTokenizer::quote_po($key);
+    }
      return defined $href->{$key}
                 && !$href->{$key}->fuzzy
                 && length Locale::PO->dequote($href->{$key}->msgstr)?
@@ -48,11 +54,10 @@ sub text_replace_tag ($$) {
         if ($attr->{$a}) {
             next if $a eq 'content' && $tag ne 'meta';
             next if $a eq 'value' && ($tag ne 'input'
-               || (ref $attr->{'type'} && $attr->{'type'}->[1] =~ /^(?:hidden|radio)$/)); # FIXME
+               || (ref $attr->{'type'} && $attr->{'type'}->[1] =~ /^(?:hidden|radio|text)$/)); # FIXME
             my($key, $val, $val_orig, $order) = @{$attr->{$a}}; #FIXME
-           my($pre, $trimmed, $post) = TmplTokenizer::trim $val;
             if ($val =~ /\S/s) {
-               my $s = $pre . find_translation($trimmed) . $post;
+               my $s = find_translation($val);
                 if ($attr->{$a}->[1] ne $s) { #FIXME
                     $attr->{$a}->[1] = $s; # FIXME
                     $attr->{$a}->[2] = ($s =~ /"/s)? "'$s'": "\"$s\""; #FIXME
@@ -82,23 +87,32 @@ sub text_replace (**) {
      last unless defined $s;
         my($kind, $t, $attr) = ($s->type, $s->string, $s->attributes);
         if ($kind eq TmplTokenType::TEXT) {
-           my($pre, $trimmed, $post) = TmplTokenizer::trim $t;
-           print $output $pre, find_translation($trimmed), $post;
+           print $output find_translation($t);
         } elsif ($kind eq TmplTokenType::TEXT_PARAMETRIZED) {
             my $fmt = find_translation($s->form);
-           print $output TmplTokenizer::parametrize($fmt, map {
+           print $output TmplTokenizer::parametrize($fmt, 1, $s, sub {
+               $_ = $_[0];
                 my($kind, $t, $attr) = ($_->type, $_->string, $_->attributes);
                 $kind == TmplTokenType::TAG && %$attr?
-                   text_replace_tag($t, $attr): $t } $s->parameters);
+                   text_replace_tag($t, $attr): $t });
         } elsif ($kind eq TmplTokenType::TAG && %$attr) {
             print $output text_replace_tag($t, $attr);
+       } elsif ($s->has_js_data) {
+           for my $t (@{$s->js_data}) {
+               # FIXME for this whole block
+               if ($t->[0]) {
+                   printf $output "%s%s%s", $t->[2], find_translation $t->[3],
+                           $t->[2];
+               } else {
+                   print $output $t->[1];
+               }
+           }
         } elsif (defined $t) {
             print $output $t;
         }
      }
  }
  
-# FIXME: Should we use the GNOME convention of using POTFILES.in instead?
  sub listfiles ($$) {
      my($dir, $type) = @_;
      my @it = ();
@@ -124,6 +138,35 @@ sub listfiles ($$) {
  
  ###############################################################################
  
+sub usage ($) {
+    my($exitcode) = @_;
+    my $h = $exitcode? *STDERR: *STDOUT;
+    print $h <<EOF;
+Usage: $0 create [OPTION]
+  or:  $0 update [OPTION]
+  or:  $0 install [OPTION]
+  or:  $0 --help
+Create or update PO files from templates, or install translated templates.
+
+  -i, --input=SOURCE          Get or update strings from SOURCE file.
+                              SOURCE is a directory if -r is also specified.
+  -o, --outputdir=DIRECTORY   Install translation(s) to specified DIRECTORY
+      --pedantic-warnings     Issue warnings even for detected problems
+                              which are likely to be harmless
+  -r, --recursive             SOURCE in the -i option is a directory
+  -s, --str-file=FILE         Specify FILE as the translation (po) file
+                              for input (install) or output (create, update)
+  -x, --exclude=REGEXP        Exclude files matching the given REGEXP
+      --help                  Display this help and exit
+
+The -o option is ignored for the "create" and "update" actions.
+Try `perldoc $0' for perhaps more information.
+EOF
+    exit($exitcode);
+}
+
+###############################################################################
+
  sub usage_error (;$) {
      for my $msg (split(/\n/, $_[0])) {
         print STDERR "$msg\n";
@@ -141,11 +184,18 @@ GetOptions(
      'str-file|s=s'                     => \$str_file,
      'exclude|x=s'                      => \@excludes,
      'pedantic-warnings|pedantic'       => sub { $pedantic_p = 1 },
+    'help'                             => \&usage,
  ) || usage_error;
  
  VerboseWarnings::set_application_name $0;
  VerboseWarnings::set_pedantic_mode $pedantic_p;
  
+# keep the buggy Locale::PO quiet if it says stupid things
+$SIG{__WARN__} = sub {
+       my($s) = @_;
+       print STDERR $s unless $s =~ /^Strange line in [^:]+: #~/s
+    };
+
  my $action = shift or usage_error('You must specify an ACTION.');
  usage_error('You must at least specify input and string list filenames.')
      if !@in_files || !defined $str_file;
@@ -179,29 +229,75 @@ if (-d $in_files[0]) {
      }
  }
  
+# restores the string list from file
+$href = Locale::PO->load_file_ashash($str_file);
+
+# guess the charsets. HTML::Templates defaults to iso-8859-1
+if (defined $href) {
+    die "$str_file: PO file is corrupted, or not a PO file\n"
+           unless defined $href->{'""'};
+    $charset_out = TmplTokenizer::charset_canon $2
+           if $href->{'""'}->msgstr =~ /\bcharset=(["']?)([^;\s"'\\]+)\1/;
+    for my $msgid (keys %$href) {
+       if ($msgid =~ /\bcharset=(["']?)([^;\s"'\\]+)\1/) {
+           my $candidate = TmplTokenizer::charset_canon $2;
+           die "Conflicting charsets in msgid: $charset_in vs $candidate\n"
+                   if defined $charset_in && $charset_in ne $candidate;
+           $charset_in = $candidate;
+       }
+    }
+}
+if (!defined $charset_in) {
+    $charset_in = TmplTokenizer::charset_canon 'iso8859-1';
+    warn "Warning: Can't determine original templates' charset, defaulting to $charset_in\n";
+}
+
+my $xgettext = './xgettext.pl';        # actual text extractor script
+my $st;
+
  if ($action eq 'create')  {
      # updates the list. As the list is empty, every entry will be added
-    die "$str_file: Output file already exists" if -f $str_file;
+    if (!-s $str_file) {
+       warn "Removing empty file $str_file\n";
+       unlink $str_file || die "$str_file: $!\n";
+    }
+    die "$str_file: Output file already exists\n" if -f $str_file;
      my($tmph, $tmpfile) = tmpnam();
+    # Generate the temporary file that acts as <MODULE>/POTFILES.in
      for my $input (@in_files) {
         print $tmph "$input\n";
      }
      close $tmph;
-    system {'./xgettext.pl'} ('xgettext.pl', '-s', '-f', $tmpfile, '-o', $str_file);
-    unlink $tmpfile || warn_normal "$tmpfile: unlink failed: $!\n", undef;
+    # Generate the specified po file ($str_file)
+    $st = system ($xgettext, '-s', '-f', $tmpfile, '-o', $str_file);
+    warn_normal "Text extraction failed: $xgettext: $!\n", undef if $st != 0;
+#   unlink $tmpfile || warn_normal "$tmpfile: unlink failed: $!\n", undef;
  
  } elsif ($action eq 'update') {
      my($tmph1, $tmpfile1) = tmpnam();
      my($tmph2, $tmpfile2) = tmpnam();
      close $tmph2; # We just want a name
+    # Generate the temporary file that acts as <MODULE>/POTFILES.in
      for my $input (@in_files) {
         print $tmph1 "$input\n";
      }
      close $tmph1;
-    system('./xgettext.pl', '-s', '-f', $tmpfile1, '-o', $tmpfile2);
-    system('msgmerge', '-U', '-s', $str_file, $tmpfile2);
-    unlink $tmpfile1 || warn_normal "$tmpfile1: unlink failed: $!\n", undef;
-    unlink $tmpfile2 || warn_normal "$tmpfile2: unlink failed: $!\n", undef;
+    # Generate the temporary file that acts as <MODULE>/<LANG>.pot
+    $st = system($xgettext, '-s', '-f', $tmpfile1, '-o', $tmpfile2,
+           '--po-mode',
+           (defined $charset_in? ('-I', $charset_in): ()),
+           (defined $charset_out? ('-O', $charset_out): ()));
+    if ($st == 0) {
+       # Merge the temporary "pot file" with the specified po file ($str_file)
+       # FIXME: msgmerge(1) is a Unix dependency
+       # FIXME: need to check the return value
+       $st = system('msgmerge', '-U', '-s', $str_file, $tmpfile2);
+    } else {
+       error_normal "Text extraction failed: $xgettext: $!\n", undef;
+       error_additional "Will not run msgmerge\n", undef;
+    }
+#   unlink $tmpfile1 || warn_normal "$tmpfile1: unlink failed: $!\n", undef;
+#   unlink $tmpfile2 || warn_normal "$tmpfile2: unlink failed: $!\n", undef;
  
  } elsif ($action eq 'install') {
      if(!defined($out_dir)) {
@@ -221,9 +317,6 @@ if ($action eq 'create')  {
      open(INPUT, "<$str_file") || die "$str_file: $!\n";
      close INPUT;
  
-    # restores the string list from file
-    $href = Locale::PO->load_file_ashash($str_file);
-
      # creates the new tmpl file using the new translation
      for my $input (@in_files) {
         die "Assertion failed"
@@ -249,6 +342,12 @@ if ($action eq 'create')  {
  } else {
      usage_error('Unknown action specified.');
  }
+
+if ($st == 0) {
+    printf "The %s seems to be successful.\n", $action;
+} else {
+    printf "%s FAILED.\n", "\u$action";
+}
  exit 0;
  
  ###############################################################################
@@ -259,28 +358,87 @@ exit 0;
  
  =head1 DESCRIPTION
  
-This is an experimental version of the tmpl_process.pl script,
-using standard gettext-style PO files.
+This is an alternative version of the tmpl_process.pl script,
+using standard gettext-style PO files.  While there still might
+be changes made to the way it extracts strings, at this moment
+it should be stable enough for general use; it is already being
+used for the Chinese and Polish translations.
  
  Currently, the create, update, and install actions have all been
  reimplemented and seem to work.
  
+=head2 Features
+
+=over
+
+=item -
+
+Translation files in standard Uniforum PO format.
+All standard tools including all gettext tools,
+plus PO file editors like kbabel(1) etc.
+can be used.
+
+=item -
+
+Minor changes in whitespace in source templates
+do not generally require strings to be re-translated.
+
+=item -
+
+Able to handle <TMPL_VAR> variables in the templates;
+<TMPL_VAR> variables are usually extracted in proper context,
+represented by a short %s placeholder.
+
+=item -
+
+Able to handle text input and radio button INPUT elements
+in the templates; these INPUT elements are also usually
+extracted in proper context,
+represented by a short %S or %p placeholder.
+
+=item -
+
+Automatic comments in the generated PO files to provide
+even more context (line numbers, and the names and types
+of the variables).
+
+=item -
+
+The %I<n>$s (or %I<n>$p, etc.) notation can be used
+for change the ordering of the variables,
+if such a reordering is required for correct translation.
+
+=item -
+
+If a particular <TMPL_VAR> should not appear in the
+translation, it can be suppressed with the %0.0s notation.
+
+=item -
+
+Using the PO format also means translators can add their
+own comments in the translation files, if necessary.
+
+=item -
+
+Create, update, and install actions are all based on the
+same scanner module. This ensures that update and install
+have the same idea of what is a translatable string;
+attribute names in tags, for example, will not be
+accidentally translated.
+
+=back
+
+=head1 NOTES
+
+Anchors are represented by an <AI<n>> notation.
+The meaning of this non-standard notation might not be obvious.
+
  The create action calls xgettext.pl to do the actual work;
  the update action calls xgettext.pl and msgmerge(1) to do the
  actual work.
  
-The script can detect <TMPL_VAR> directives embedded inside what
-appears to be a full sentence (this actual work being done by
-TmplTokenizer(3)); these larger patterns appear in the translation
-file as c-format strings with %s.
-
  =head1 BUGS
  
-The --help option has not been implemented yet.
-
-There are probably some real bugs too, since this has not been
-tested very much.
-
  xgettext.pl must be present in the current directory; the
  msgmerge(1) command must also be present in the search path.
  The script currently does not check carefully whether these
@@ -291,11 +449,19 @@ generate GNU PO files properly; a couple of workarounds have
  been written in TmplTokenizer and more is likely to be needed
  (e.g., to get rid of the "Strange line" warning for #~).
  
+This script may not work in Windows.
+
+There are probably some other bugs too, since this has not been
+tested very much.
+
  =head1 SEE ALSO
  
  xgettext.pl,
+TmplTokenizer.pm,
  msgmerge(1),
  Locale::PO(3),
  translator_doc.txt
  
+http://www.saas.nsw.edu.au/koha_wiki/index.php?page=DifficultTerms
+
  =cut