X-Git-Url: http://git.rot13.org/?a=blobdiff_plain;f=misc%2Ftranslator%2Ftmpl_process3.pl;h=b543a91c6ded176e6d9ae97010dc6faaf87bc470;hb=278e916c25b3ff0a5441d131ef999ab0450b027d;hp=345d6fdcaff427086a8b19c03efc35f29b645b76;hpb=a399dcefad193fc21ef2dc1fe31d07686ab2da46;p=koha.git diff --git a/misc/translator/tmpl_process3.pl b/misc/translator/tmpl_process3.pl index 345d6fdcaf..b543a91c6d 100755 --- a/misc/translator/tmpl_process3.pl +++ b/misc/translator/tmpl_process3.pl @@ -6,7 +6,7 @@ =head1 NAME -tmpl_process3.pl - Experimental version of tmpl_process.pl +tmpl_process3.pl - Alternative version of tmpl_process.pl using gettext-compatible translation files =cut @@ -20,12 +20,12 @@ use VerboseWarnings qw( :warn :die ); ############################################################################### -use vars qw( @in_files $in_dir $str_file $out_dir ); +use vars qw( @in_files $in_dir $str_file $out_dir $quiet ); use vars qw( @excludes $exclude_regex ); use vars qw( $recursive_p ); use vars qw( $pedantic_p ); use vars qw( $href ); -use vars qw( $type ); # file extension (DOS form without the dot) to match +use vars qw( $type ); # file extension (DOS form without the dot) to match use vars qw( $charset_in $charset_out ); ############################################################################### @@ -34,14 +34,14 @@ sub find_translation ($) { my($s) = @_; my $key = $s; if ($s =~ /\S/s) { - $key = TmplTokenizer::string_canon($key); - $key = TmplTokenizer::charset_convert($key, $charset_in, $charset_out); - $key = TmplTokenizer::quote_po($key); + $key = TmplTokenizer::string_canon($key); + $key = TmplTokenizer::charset_convert($key, $charset_in, $charset_out); + $key = TmplTokenizer::quote_po($key); } return defined $href->{$key} - && !$href->{$key}->fuzzy - && length Locale::PO->dequote($href->{$key}->msgstr)? - Locale::PO->dequote($href->{$key}->msgstr): $s; + && !$href->{$key}->fuzzy + && length Locale::PO->dequote($href->{$key}->msgstr)? + Locale::PO->dequote($href->{$key}->msgstr): $s; } sub text_replace_tag ($$) { @@ -51,31 +51,31 @@ sub text_replace_tag ($$) { my $tag = lc($1) if $t =~ /^<(\S+)/s; my $translated_p = 0; for my $a ('alt', 'content', 'title', 'value') { - if ($attr->{$a}) { - next if $a eq 'content' && $tag ne 'meta'; - next if $a eq 'value' && ($tag ne 'input' - || (ref $attr->{'type'} && $attr->{'type'}->[1] =~ /^(?:hidden|radio)$/)); # FIXME - my($key, $val, $val_orig, $order) = @{$attr->{$a}}; #FIXME - if ($val =~ /\S/s) { - my $s = find_translation($val); - if ($attr->{$a}->[1] ne $s) { #FIXME - $attr->{$a}->[1] = $s; # FIXME - $attr->{$a}->[2] = ($s =~ /"/s)? "'$s'": "\"$s\""; #FIXME - $translated_p = 1; - } - } - } + if ($attr->{$a}) { + next if $a eq 'content' && $tag ne 'meta'; + next if $a eq 'value' && ($tag ne 'input' + || (ref $attr->{'type'} && $attr->{'type'}->[1] =~ /^(?:hidden|radio|text|submit)$/)); # FIXME + my($key, $val, $val_orig, $order) = @{$attr->{$a}}; #FIXME + if ($val =~ /\S/s) { + my $s = find_translation($val); + if ($attr->{$a}->[1] ne $s) { #FIXME + $attr->{$a}->[1] = $s; # FIXME + $attr->{$a}->[2] = ($s =~ /"/s)? "'$s'": "\"$s\""; #FIXME + $translated_p = 1; + } + } + } } if ($translated_p) { - $it = "<$tag" - . join('', map { - sprintf(' %s=%s', $_, $attr->{$_}->[2]) #FIXME - } sort { - $attr->{$a}->[3] <=> $attr->{$b}->[3] #FIXME - } keys %$attr) - . '>'; + $it = "<$tag" + . join('', map { + sprintf(' %s=%s', $_, $attr->{$_}->[2]) #FIXME + } sort { + $attr->{$a}->[3] <=> $attr->{$b}->[3] #FIXME + } keys %$attr) + . '>'; } else { - $it = $t; + $it = $t; } return $it; } @@ -83,50 +83,76 @@ sub text_replace_tag ($$) { sub text_replace (**) { my($h, $output) = @_; for (;;) { - my $s = TmplTokenizer::next_token $h; + my $s = TmplTokenizer::next_token $h; last unless defined $s; - my($kind, $t, $attr) = ($s->type, $s->string, $s->attributes); - if ($kind eq TmplTokenType::TEXT) { - print $output find_translation($t); - } elsif ($kind eq TmplTokenType::TEXT_PARAMETRIZED) { - my $fmt = find_translation($s->form); - print $output TmplTokenizer::parametrize($fmt, [ map { - my($kind, $t, $attr) = ($_->type, $_->string, $_->attributes); - $kind == TmplTokenType::TAG && %$attr? - text_replace_tag($t, $attr): $t } $s->parameters ], [ $s->anchors ]); - } elsif ($kind eq TmplTokenType::TAG && %$attr) { - print $output text_replace_tag($t, $attr); - } elsif (defined $t) { - print $output $t; - } + my($kind, $t, $attr) = ($s->type, $s->string, $s->attributes); + if ($kind eq TmplTokenType::TEXT) { + print $output find_translation($t); + } elsif ($kind eq TmplTokenType::TEXT_PARAMETRIZED) { + my $fmt = find_translation($s->form); + print $output TmplTokenizer::parametrize($fmt, 1, $s, sub { + $_ = $_[0]; + my($kind, $t, $attr) = ($_->type, $_->string, $_->attributes); + $kind == TmplTokenType::TAG && %$attr? + text_replace_tag($t, $attr): $t }); + } elsif ($kind eq TmplTokenType::TAG && %$attr) { + print $output text_replace_tag($t, $attr); + } elsif ($s->has_js_data) { + for my $t (@{$s->js_data}) { + # FIXME for this whole block + if ($t->[0]) { + printf $output "%s%s%s", $t->[2], find_translation $t->[3], + $t->[2]; + } else { + print $output $t->[1]; + } + } + } elsif (defined $t) { + print $output $t; + } } } -sub listfiles ($$) { - my($dir, $type) = @_; +sub listfiles ($$$) { + my($dir, $type, $action) = @_; my @it = (); if (opendir(DIR, $dir)) { - my @dirent = readdir DIR; # because DIR is shared when recursing - closedir DIR; - for my $dirent (@dirent) { - my $path = "$dir/$dirent"; - if ($dirent =~ /^\./ || $dirent eq 'CVS' || $dirent eq 'RCS' - || (defined $exclude_regex && $dirent =~ /^(?:$exclude_regex)$/)) { - ; - } elsif (-f $path) { - push @it, $path if !defined $type || $dirent =~ /\.(?:$type)$/; - } elsif (-d $path && $recursive_p) { - push @it, listfiles($path, $type); - } - } + my @dirent = readdir DIR; # because DIR is shared when recursing + closedir DIR; + for my $dirent (@dirent) { + my $path = "$dir/$dirent"; + if ($dirent =~ /^\./ || $dirent eq 'CVS' || $dirent eq 'RCS' + || (defined $exclude_regex && $dirent =~ /^(?:$exclude_regex)$/)) { + ; + } elsif (-f $path) { + push @it, $path if (!defined $type || $dirent =~ /\.(?:$type)$/) || $action eq 'install'; + } elsif (-d $path && $recursive_p) { + push @it, listfiles($path, $type, $action); + } + } } else { - warn_normal "$dir: $!", undef; + warn_normal "$dir: $!", undef; } return @it; } ############################################################################### +sub mkdir_recursive ($) { + my($dir) = @_; + local($`, $&, $', $1); + $dir = $` if $dir ne /^\/+$/ && $dir =~ /\/+$/; + my ($prefix, $basename) = ($dir =~ /\/([^\/]+)$/s)? ($`, $1): ('.', $dir); + mkdir_recursive($prefix) if $prefix ne '.' && !-d $prefix; + if (!-d $dir) { + print STDERR "Making directory $dir..." unless $quiet; + # creates with rwxrwxr-x permissions + mkdir($dir, 0775) || warn_normal "$dir: $!", undef; + } +} + +############################################################################### + sub usage ($) { my($exitcode) = @_; my $h = $exitcode? *STDERR: *STDOUT; @@ -147,33 +173,35 @@ Create or update PO files from templates, or install translated templates. for input (install) or output (create, update) -x, --exclude=REGEXP Exclude files matching the given REGEXP --help Display this help and exit + -q, --quiet no output to screen (except for errors) The -o option is ignored for the "create" and "update" actions. -Try `perldoc $0' for perhaps more information. +Try `perldoc $0 for perhaps more information. EOF exit($exitcode); -} +}#` ############################################################################### sub usage_error (;$) { for my $msg (split(/\n/, $_[0])) { - print STDERR "$msg\n"; + print STDERR "$msg\n"; } - print STDERR "Try `$0 --help' for more information.\n"; + print STDERR "Try `$0 --help for more information.\n"; exit(-1); } ############################################################################### GetOptions( - 'input|i=s' => \@in_files, - 'outputdir|o=s' => \$out_dir, - 'recursive|r' => \$recursive_p, - 'str-file|s=s' => \$str_file, - 'exclude|x=s' => \@excludes, - 'pedantic-warnings|pedantic' => sub { $pedantic_p = 1 }, - 'help' => \&usage, + 'input|i=s' => \@in_files, + 'outputdir|o=s' => \$out_dir, + 'recursive|r' => \$recursive_p, + 'str-file|s=s' => \$str_file, + 'exclude|x=s' => \@excludes, + 'quiet|q' => \$quiet, + 'pedantic-warnings|pedantic' => sub { $pedantic_p = 1 }, + 'help' => \&usage, ) || usage_error; VerboseWarnings::set_application_name $0; @@ -181,8 +209,8 @@ VerboseWarnings::set_pedantic_mode $pedantic_p; # keep the buggy Locale::PO quiet if it says stupid things $SIG{__WARN__} = sub { - my($s) = @_; - print STDERR $s unless $s =~ /^Strange line in [^:]+: #~/s + my($s) = @_; + print STDERR $s unless $s =~ /^Strange line in [^:]+: #~/s }; my $action = shift or usage_error('You must specify an ACTION.'); @@ -190,13 +218,13 @@ usage_error('You must at least specify input and string list filenames.') if !@in_files || !defined $str_file; # Type match defaults to *.tmpl plus *.inc if not specified -$type = "tmpl|inc" if !defined($type); +$type = "tmpl|inc|js|xsl|def" if !defined($type); # Check the inputs for being files or directories for my $input (@in_files) { usage_error("$input: Input must be a file or directory.\n" - . "(Symbolic links are not supported at the moment)") - unless -d $input || -f $input;; + . "(Symbolic links are not supported at the moment)") + unless -d $input || -f $input;; } # Generates the global exclude regular expression @@ -205,16 +233,16 @@ $exclude_regex = '(?:'.join('|', @excludes).')' if @excludes; # Generate the list of input files if a directory is specified if (-d $in_files[0]) { die "If you specify a directory as input, you must specify only it.\n" - if @in_files > 1; + if @in_files > 1; # input is a directory, generates list of files to process $in_dir = $in_files[0]; $in_dir =~ s/\/$//; # strips the trailing / if any - @in_files = listfiles($in_dir, $type); + @in_files = listfiles($in_dir, $type, $action); } else { for my $input (@in_files) { - die "You cannot specify input files and directories at the same time.\n" - unless -f $input; + die "You cannot specify input files and directories at the same time.\n" + unless -f $input; } } @@ -223,44 +251,79 @@ $href = Locale::PO->load_file_ashash($str_file); # guess the charsets. HTML::Templates defaults to iso-8859-1 if (defined $href) { - die "$str_file: PO file is corrupted, or not a PO file\n" - unless defined $href->{'""'}; - $charset_out = TmplTokenizer::charset_canon $2 - if $href->{'""'}->msgstr =~ /\bcharset=(["']?)([^;\s"'\\]+)\1/; - for my $msgid (keys %$href) { - if ($msgid =~ /\bcharset=(["']?)([^;\s"'\\]+)\1/) { - my $candidate = TmplTokenizer::charset_canon $2; - die "Conflicting charsets in msgid: $charset_in vs $candidate\n" - if defined $charset_in && $charset_in ne $candidate; - $charset_in = $candidate; - } - } + die "$str_file: PO file is corrupted, or not a PO file\n" unless defined $href->{'""'}; + $charset_out = TmplTokenizer::charset_canon $2 if $href->{'""'}->msgstr =~ /\bcharset=(["']?)([^;\s"'\\]+)\1/; + $charset_in = $charset_out; + warn "Charset in/out: ".$charset_out; +# for my $msgid (keys %$href) { +# if ($msgid =~ /\bcharset=(["']?)([^;\s"'\\]+)\1/) { +# my $candidate = TmplTokenizer::charset_canon $2; +# die "Conflicting charsets in msgid: $charset_in vs $candidate => $msgid\n" +# if defined $charset_in && $charset_in ne $candidate; +# $charset_in = $candidate; +# } +# } } + +# set our charset in to UTF-8 if (!defined $charset_in) { - $charset_in = TmplTokenizer::charset_canon 'iso8859-1'; + $charset_in = TmplTokenizer::charset_canon 'UTF-8'; warn "Warning: Can't determine original templates' charset, defaulting to $charset_in\n"; } - -my $xgettext = './xgettext.pl'; # actual text extractor script +# set our charset out to UTF-8 +if (!defined $charset_out) { + $charset_out = TmplTokenizer::charset_canon 'UTF-8'; + warn "Warning: Charset Out defaulting to $charset_out\n"; +} +my $xgettext = './xgettext.pl'; # actual text extractor script my $st; if ($action eq 'create') { # updates the list. As the list is empty, every entry will be added if (!-s $str_file) { - warn "Removing empty file $str_file\n"; - unlink $str_file || die "$str_file: $!\n"; + warn "Removing empty file $str_file\n"; + unlink $str_file || die "$str_file: $!\n"; } die "$str_file: Output file already exists\n" if -f $str_file; - my($tmph, $tmpfile) = tmpnam(); + my($tmph1, $tmpfile1) = tmpnam(); + my($tmph2, $tmpfile2) = tmpnam(); + close $tmph2; # We just want a name # Generate the temporary file that acts as /POTFILES.in for my $input (@in_files) { - print $tmph "$input\n"; + print $tmph1 "$input\n"; } - close $tmph; + close $tmph1; + warn "I $charset_in O $charset_out"; # Generate the specified po file ($str_file) - $st = system ($xgettext, '-s', '-f', $tmpfile, '-o', $str_file); - warn_normal "Text extraction failed: $xgettext: $!\n", undef if $st != 0; -# unlink $tmpfile || warn_normal "$tmpfile: unlink failed: $!\n", undef; + $st = system ($xgettext, '-s', '-f', $tmpfile1, '-o', $tmpfile2, + (defined $charset_in? ('-I', $charset_in): ()), + (defined $charset_out? ('-O', $charset_out): ()) + ); + # Run msgmerge so that the pot file looks like a real pot file + # We need to help msgmerge a bit by pre-creating a dummy po file that has + # the headers and the "" msgid & msgstr. It will fill in the rest. + if ($st == 0) { + # Merge the temporary "pot file" with the specified po file ($str_file) + # FIXME: msgmerge(1) is a Unix dependency + # FIXME: need to check the return value + unless (-f $str_file) { + local(*INPUT, *OUTPUT); + open(INPUT, "<$tmpfile2"); + open(OUTPUT, ">$str_file"); + while () { + print OUTPUT; + last if /^\n/s; + } + close INPUT; + close OUTPUT; + } + $st = system('msgmerge', '-U', '-s', $str_file, $tmpfile2); + } else { + error_normal "Text extraction failed: $xgettext: $!\n", undef; + error_additional "Will not run msgmerge\n", undef; + } +# unlink $tmpfile1 || warn_normal "$tmpfile1: unlink failed: $!\n", undef; +# unlink $tmpfile2 || warn_normal "$tmpfile2: unlink failed: $!\n", undef; } elsif ($action eq 'update') { my($tmph1, $tmpfile1) = tmpnam(); @@ -268,34 +331,34 @@ if ($action eq 'create') { close $tmph2; # We just want a name # Generate the temporary file that acts as /POTFILES.in for my $input (@in_files) { - print $tmph1 "$input\n"; + print $tmph1 "$input\n"; } close $tmph1; # Generate the temporary file that acts as /.pot $st = system($xgettext, '-s', '-f', $tmpfile1, '-o', $tmpfile2, - '--po-mode', - (defined $charset_in? ('-I', $charset_in): ()), - (defined $charset_out? ('-O', $charset_out): ())); + '--po-mode', + (defined $charset_in? ('-I', $charset_in): ()), + (defined $charset_out? ('-O', $charset_out): ())); if ($st == 0) { - # Merge the temporary "pot file" with the specified po file ($str_file) - # FIXME: msgmerge(1) is a Unix dependency - # FIXME: need to check the return value - $st = system('msgmerge', '-U', '-s', $str_file, $tmpfile2); + # Merge the temporary "pot file" with the specified po file ($str_file) + # FIXME: msgmerge(1) is a Unix dependency + # FIXME: need to check the return value + $st = system('msgmerge', '-U', '-s', $str_file, $tmpfile2); } else { - error_normal "Text extraction failed: $xgettext: $!\n", undef; - error_additional "Will not run msgmerge\n", undef; + error_normal "Text extraction failed: $xgettext: $!\n", undef; + error_additional "Will not run msgmerge\n", undef; } # unlink $tmpfile1 || warn_normal "$tmpfile1: unlink failed: $!\n", undef; # unlink $tmpfile2 || warn_normal "$tmpfile2: unlink failed: $!\n", undef; } elsif ($action eq 'install') { if(!defined($out_dir)) { - usage_error("You must specify an output directory when using the install method."); + usage_error("You must specify an output directory when using the install method."); } - + if ($in_dir eq $out_dir) { - warn "You must specify a different input and output directory.\n"; - exit -1; + warn "You must specify a different input and output directory.\n"; + exit -1; } # Make sure the output directory exists @@ -308,24 +371,29 @@ if ($action eq 'create') { # creates the new tmpl file using the new translation for my $input (@in_files) { - die "Assertion failed" - unless substr($input, 0, length($in_dir) + 1) eq "$in_dir/"; - - my $h = TmplTokenizer->new( $input ); - $h->set_allow_cformat( 1 ); - VerboseWarnings::set_input_file_name $input; - - my $target = $out_dir . substr($input, length($in_dir)); - my $targetdir = $` if $target =~ /[^\/]+$/s; - if (!-d $targetdir) { - print STDERR "Making directory $targetdir..."; - # creates with rwxrwxr-x permissions - mkdir($targetdir, 0775) || warn_normal "$targetdir: $!", undef; - } - print STDERR "Creating $target...\n"; - open( OUTPUT, ">$target" ) || die "$target: $!\n"; - text_replace( $h, *OUTPUT ); - close OUTPUT; + die "Assertion failed" + unless substr($input, 0, length($in_dir) + 1) eq "$in_dir/"; +# print "$input / $type\n"; + if (!defined $type || $input =~ /\.(?:$type)$/) { + my $h = TmplTokenizer->new( $input ); + $h->set_allow_cformat( 1 ); + VerboseWarnings::set_input_file_name $input; + + my $target = $out_dir . substr($input, length($in_dir)); + my $targetdir = $` if $target =~ /[^\/]+$/s; + mkdir_recursive($targetdir) unless -d $targetdir; + print STDERR "Creating $target...\n" unless $quiet; + open( OUTPUT, ">$target" ) || die "$target: $!\n"; + text_replace( $h, *OUTPUT ); + close OUTPUT; + } else { + # just copying the file + my $target = $out_dir . substr($input, length($in_dir)); + my $targetdir = $` if $target =~ /[^\/]+$/s; + mkdir_recursive($targetdir) unless -d $targetdir; + system("cp -f $input $target"); + print STDERR "Copying $input...\n" unless $quiet; + } } } else { @@ -333,10 +401,9 @@ if ($action eq 'create') { } if ($st == 0) { - printf "The %s seems to be successful, with %d warning(s).\n", - $action, VerboseWarnings::warned; + printf "The %s seems to be successful.\n", $action unless $quiet; } else { - printf "%s FAILED.\n", "\u$action"; + printf "%s FAILED.\n", "\u$action" unless $quiet; } exit 0; @@ -348,27 +415,85 @@ exit 0; =head1 DESCRIPTION -This is an experimental version of the tmpl_process.pl script, -using standard gettext-style PO files. Note that the behaviour -of this script should still be considered unstable. +This is an alternative version of the tmpl_process.pl script, +using standard gettext-style PO files. While there still might +be changes made to the way it extracts strings, at this moment +it should be stable enough for general use; it is already being +used for the Chinese and Polish translations. Currently, the create, update, and install actions have all been reimplemented and seem to work. +=head2 Features + +=over + +=item - + +Translation files in standard Uniforum PO format. +All standard tools including all gettext tools, +plus PO file editors like kbabel(1) etc. +can be used. + +=item - + +Minor changes in whitespace in source templates +do not generally require strings to be re-translated. + +=item - + +Able to handle variables in the templates; + variables are usually extracted in proper context, +represented by a short %s placeholder. + +=item - + +Able to handle text input and radio button INPUT elements +in the templates; these INPUT elements are also usually +extracted in proper context, +represented by a short %S or %p placeholder. + +=item - + +Automatic comments in the generated PO files to provide +even more context (line numbers, and the names and types +of the variables). + +=item - + +The %I$s (or %I$p, etc.) notation can be used +for change the ordering of the variables, +if such a reordering is required for correct translation. + +=item - + +If a particular should not appear in the +translation, it can be suppressed with the %0.0s notation. + +=item - + +Using the PO format also means translators can add their +own comments in the translation files, if necessary. + +=item - + +Create, update, and install actions are all based on the +same scanner module. This ensures that update and install +have the same idea of what is a translatable string; +attribute names in tags, for example, will not be +accidentally translated. + +=back + +=head1 NOTES + +Anchors are represented by an > notation. +The meaning of this non-standard notation might not be obvious. + The create action calls xgettext.pl to do the actual work; the update action calls xgettext.pl and msgmerge(1) to do the actual work. -The script can detect directives embedded inside what -appears to be a full sentence (this actual work being done by -TmplTokenizer(3)); these larger patterns appear in the translation -file as c-format strings with %s. - -Whitespace in extracted strings are folded to single blanks, in -order to prevent new strings from appearing when minor changes in -the original templates occur, and to prevent overly difficult to -read strings in the PO file. - =head1 BUGS xgettext.pl must be present in the current directory; the @@ -381,14 +506,19 @@ generate GNU PO files properly; a couple of workarounds have been written in TmplTokenizer and more is likely to be needed (e.g., to get rid of the "Strange line" warning for #~). +This script may not work in Windows. + There are probably some other bugs too, since this has not been tested very much. =head1 SEE ALSO xgettext.pl, +TmplTokenizer.pm, msgmerge(1), Locale::PO(3), translator_doc.txt +http://www.saas.nsw.edu.au/koha_wiki/index.php?page=DifficultTerms + =cut