X-Git-Url: http://git.rot13.org/?a=blobdiff_plain;f=misc%2Ftranslator%2Fxgettext.pl;h=2da236360f38ca0f502a4d16a5e622723f432962;hb=09ab9d4769a978769d9e0e17c8d83aaa6876aa13;hp=3d5d11b042f814f959b4778ca574923ccbe26811;hpb=fb1cfd3dd395ba68d2344449f44357263ef62d04;p=koha.git diff --git a/misc/translator/xgettext.pl b/misc/translator/xgettext.pl index 3d5d11b042..2da236360f 100755 --- a/misc/translator/xgettext.pl +++ b/misc/translator/xgettext.pl @@ -21,26 +21,44 @@ use vars qw( $charset_in $charset_out ); ############################################################################### -sub negligible_p ($) { +sub string_negligible_p ($) { my($t) = @_; # a string # Don't emit pure whitespace, pure numbers, pure punctuation, # single letters, or TMPL_VAR's. # Punctuation should arguably be translated. But without context - # they are untranslatable. + # they are untranslatable. Note that $t is a string, not a token object. return !$extract_all_p && ( - TmplTokenizer::blank_p($t) # blank or TMPL_VAR + TmplTokenizer::blank_p($t) # blank or TMPL_VAR || $t =~ /^\d+$/ # purely digits - || $t =~ /^[-\.,:;'"%\(\)\[\]\|]+$/ # pure punctuation w/o context + || $t =~ /^[-\+\.,:;!\?'"%\(\)\[\]\|]+$/ # punctuation w/o context || $t =~ /^[A-Za-z]$/ # single letters - ); + ) +} + +sub token_negligible_p( $ ) { + my($x) = @_; + my $t = $x->type; + return !$extract_all_p && ( + $t == TmplTokenType::TEXT? string_negligible_p( $x->string ): + $t == TmplTokenType::DIRECTIVE? 1: + $t == TmplTokenType::TEXT_PARAMETRIZED + && join( '', map { my $t = $_->type; + $t == TmplTokenType::DIRECTIVE? + '1': $t == TmplTokenType::TAG? + '': token_negligible_p( $_ )? + '': '1' } @{$x->children} ) eq '' ); } ############################################################################### sub remember ($$) { my($token, $string) = @_; - $text{$string} = [] unless defined $text{$string}; - push @{$text{$string}}, $token; + # If we determine that the string is negligible, don't bother to remember + unless (string_negligible_p( $string ) || token_negligible_p( $token )) { + my $key = TmplTokenizer::string_canon( $string ); + $text{$key} = [] unless defined $text{$key}; + push @{$text{$key}}, $token; + } } ############################################################################### @@ -69,10 +87,8 @@ sub text_extract (*) { last unless defined $s; my($kind, $t, $attr) = ($s->type, $s->string, $s->attributes); if ($kind eq TmplTokenType::TEXT) { - #$t = TmplTokenizer::trim $t; remember( $s, $t ) if $t =~ /\S/s; } elsif ($kind eq TmplTokenType::TEXT_PARAMETRIZED) { - #$t = TmplTokenizer::trim $t; remember( $s, $s->form ) if $s->form =~ /\S/s; } elsif ($kind eq TmplTokenType::TAG && %$attr) { # value [tag=input], meta @@ -96,7 +112,7 @@ sub text_extract (*) { sub generate_strings_list () { # Emit all extracted strings. for my $t (string_list) { - printf OUTPUT "%s\n", $t unless negligible_p($t); + printf OUTPUT "%s\n", $t # unless negligible_p($t); } } @@ -127,7 +143,7 @@ msgstr "" EOF my $directory_re = quotemeta("$directory/"); for my $t (string_list) { - next if negligible_p($t); + #next if negligible_p($t); my $cformat_p; for my $token (@{$text{$t}}) { my $pathname = $token->pathname; @@ -138,6 +154,7 @@ EOF } printf OUTPUT "#, c-format\n" if $cformat_p; printf OUTPUT "msgid %s\n", TmplTokenizer::quote_po + TmplTokenizer::string_canon TmplTokenizer::charset_convert $t, $charset_in, $charset_out; printf OUTPUT "msgstr %s\n\n", (defined $translation{$t}? TmplTokenizer::quote_po( $translation{$t} ): "\"\""); @@ -316,17 +333,11 @@ the gettext format. =back -Right now it does about the same thing as text-extract2.pl but -generates gettext-style output; however, because it is scanner- -instead of parser-based, it is able to address the 4 weaknesses -listed in translator_doc.txt. Ultimately, the goal is to make -this able to do some kind of simple analysis on the input to -produce gettext-style output with c-format strings, in order to -facilitate translation to languages with a different word order -than English. - -When the above is finished, the generated po file may contain -some HTML tags in addition to %s strings. +Note that this script is experimental and should still be +considered unstable. + +Please refer to the explanation in tmpl_process3 for further +details. If you want to generate GNOME-style POTFILES.in files, such files (passed to -f) can be generated thus: