X-Git-Url: http://git.rot13.org/?a=blobdiff_plain;f=misc%2Ftranslator%2Fxgettext.pl;h=0343ed29c23103a9cdaff9e538ae8395b04eb23e;hb=cd81bdc6a0b0250d97cbb5c49bcb90290d29e34a;hp=abe315e5d7f1e3856ef342eeb1c37e9040a0ff7d;hpb=2ea6fdec156419fbbdd5f64da927d0b2b45e751c;p=koha.git

diff --git a/misc/translator/xgettext.pl b/misc/translator/xgettext.pl
index abe315e5d7..0343ed29c2 100755
--- a/misc/translator/xgettext.pl
+++ b/misc/translator/xgettext.pl
@@ -2,10 +2,13 @@
 
 =head1 NAME
 
-xgettext.pl - xgettext(1)-like interface for .tmpl strings extraction
+xgettext.pl - xgettext(1)-like interface for .tt strings extraction
 
 =cut
 
+use FindBin;
+use lib $FindBin::Bin;
+
 use strict;
 use warnings;
 use Getopt::Long;
@@ -24,9 +27,11 @@ use vars qw( $disable_fuzzy_p );
 use vars qw( $verbose_p );
 use vars qw( $po_mode_p );
 
+our $OUTPUT;
+
 ###############################################################################
 
-sub string_negligible_p ($) {
+sub string_negligible_p {
     my($t) = @_;				# a string
     # Don't emit pure whitespace, pure numbers, pure punctuation,
     # single letters, or TMPL_VAR's.
@@ -37,38 +42,46 @@ sub string_negligible_p ($) {
 	    || $t =~ /^\d+$/			# purely digits
 	    || $t =~ /^[-\+\.,:;!\?'"%\(\)\[\]\|]+$/ # punctuation w/o context
 	    || $t =~ /^[A-Za-z]$/		# single letters
+            || $t =~ /^(&[a-z]+;|&#\d+;|&#x[0-9a-fA-F]+;|%%|%s|\s|[[:punct:]])*$/ # html entities,placeholder,punct, ...
+        || ( $t =~ /^\[\%.*\%\]$/ and $t !~ /\%\].*\[\%/ )    # pure TT entities
 	)
 }
 
-sub token_negligible_p( $ ) {
-    my($x) = @_;
+sub token_negligible_p {
+    my ($x) = @_;
     my $t = $x->type;
     return !$extract_all_p && (
-	    $t == TmplTokenType::TEXT? string_negligible_p( $x->string ):
-	    $t == TmplTokenType::DIRECTIVE? 1:
-	    $t == TmplTokenType::TEXT_PARAMETRIZED
-		&& join( '', map { my $t = $_->type;
-			$t == TmplTokenType::DIRECTIVE?
-				'1': $t == TmplTokenType::TAG?
-					'': token_negligible_p( $_ )?
-					'': '1' } @{$x->children} ) eq '' );
+          $t == C4::TmplTokenType::TEXT() ? string_negligible_p( $x->string )
+        : $t == C4::TmplTokenType::DIRECTIVE() ? 1
+        : $t == C4::TmplTokenType::TEXT_PARAMETRIZED()
+        && join(
+            '',
+            map {
+                my $t = $_->type;
+                    $t == C4::TmplTokenType::DIRECTIVE() ? '1'
+                  : $t == C4::TmplTokenType::TAG()       ? ''
+                  : token_negligible_p($_)               ? ''
+                  : '1'
+            } @{ $x->children }
+        ) eq ''
+    );
 }
 
 ###############################################################################
 
-sub remember ($$) {
+sub remember {
     my($token, $string) = @_;
     # If we determine that the string is negligible, don't bother to remember
     unless (string_negligible_p( $string ) || token_negligible_p( $token )) {
-	my $key = TmplTokenizer::string_canon( $string );
-	$text{$key} = [] unless defined $text{$key};
-	push @{$text{$key}}, $token;
+        my $key = TmplTokenizer::string_canon( $string );
+        $text{$key} = [] unless defined $text{$key};
+        push @{$text{$key}}, $token;
     }
 }
 
 ###############################################################################
 
-sub string_list () {
+sub string_list {
     my @t = keys %text;
     # The real gettext tools seems to sort case sensitively; I don't know why
     @t = sort { $a cmp $b } @t if $sort eq 's';
@@ -83,52 +96,57 @@ sub string_list () {
     return @t;
 }
 
-###############################################################################
+  ###############################################################################
 
-sub text_extract (*) {
+sub text_extract {
     my($h) = @_;
     for (;;) {
-	my $s = TmplTokenizer::next_token $h;
-    last unless defined $s;
-	my($kind, $t, $attr) = ($s->type, $s->string, $s->attributes);
-	if ($kind eq TmplTokenType::TEXT) {
-	    remember( $s, $t ) if $t =~ /\S/s;
-	} elsif ($kind eq TmplTokenType::TEXT_PARAMETRIZED) {
-	    remember( $s, $s->form ) if $s->form =~ /\S/s;
-	} elsif ($kind eq TmplTokenType::TAG && %$attr) {
-	    # value [tag=input], meta
-	    my $tag = lc($1) if $t =~ /^<(\S+)/s;
-	    for my $a ('alt', 'content', 'title', 'value','label') {
-		if ($attr->{$a}) {
-            next if $a eq 'label' && $tag ne 'optgroup';
-		    next if $a eq 'content' && $tag ne 'meta';
-		    next if $a eq 'value' && ($tag ne 'input'
-			|| (ref $attr->{'type'} && $attr->{'type'}->[1] =~ /^(?:hidden|radio|checkbox)$/)); # FIXME
-		    my($key, $val, $val_orig, $order) = @{$attr->{$a}}; #FIXME
-		    $val = TmplTokenizer::trim $val;
-		    remember( $s, $val ) if $val =~ /\S/s;
-		}
+        my $s = TmplTokenizer::next_token $h;
+        last unless defined $s;
+        my($kind, $t, $attr) = ($s->type, $s->string, $s->attributes);
+        if ($kind eq C4::TmplTokenType::TEXT) {
+	    if ($t =~ /\S/s && $t !~ /<!/){
+		remember( $s, $t );
 	    }
-	} elsif ($s->has_js_data) {
-	    for my $t (@{$s->js_data}) {
-		remember( $s, $t->[3] ) if $t->[0]; # FIXME
+        } elsif ($kind eq C4::TmplTokenType::TEXT_PARAMETRIZED) {
+	    if ($s->form =~ /\S/s && $s->form !~ /<!/){
+		remember( $s, $s->form );
 	    }
-	}
+        } elsif ($kind eq C4::TmplTokenType::TAG && %$attr) {
+            # value [tag=input], meta
+            my $tag;
+            $tag = lc($1) if $t =~ /^<(\S+)/s;
+            for my $a ('alt', 'content', 'title', 'value', 'label', 'placeholder') {
+                if ($attr->{$a}) {
+                    next if $a eq 'label' && $tag ne 'optgroup';
+                    next if $a eq 'content' && $tag ne 'meta';
+                    next if $a eq 'value' && ($tag ne 'input'
+                        || (ref $attr->{'type'} && $attr->{'type'}->[1] =~ /^(?:hidden|radio|checkbox)$/)); # FIXME
+                    my($key, $val, $val_orig, $order) = @{$attr->{$a}}; #FIXME
+                    $val = TmplTokenizer::trim $val;
+                    remember( $s, $val ) if $val =~ /\S/s;
+                }
+            }
+        } elsif ($s->has_js_data) {
+            for my $t (@{$s->js_data}) {
+              remember( $s, $t->[3] ) if $t->[0]; # FIXME
+            }
+        }
     }
 }
 
 ###############################################################################
 
-sub generate_strings_list () {
+sub generate_strings_list {
     # Emit all extracted strings.
     for my $t (string_list) {
-	printf OUTPUT "%s\n", $t;
+        printf $OUTPUT "%s\n", $t;
     }
 }
 
 ###############################################################################
 
-sub generate_po_file () {
+sub generate_po_file {
     # We don't emit the Plural-Forms header; it's meaningless for us
     my $pot_charset = (defined $charset_out? $charset_out: 'CHARSET');
     $pot_charset = TmplTokenizer::charset_canon $pot_charset;
@@ -136,17 +154,17 @@ sub generate_po_file () {
     my $time = POSIX::strftime('%Y-%m-%d %H:%M%z', localtime(time));
     my $time_pot = $time;
     my $time_po  = $po_mode_p? $time: 'YEAR-MO-DA HO:MI+ZONE';
-    print OUTPUT <<EOF;
+    print $OUTPUT <<EOF;
 # SOME DESCRIPTIVE TITLE.
 # Copyright (C) YEAR THE PACKAGE'S COPYRIGHT HOLDER
 # This file is distributed under the same license as the PACKAGE package.
 # FIRST AUTHOR <EMAIL\@ADDRESS>, YEAR.
 #
 EOF
-    print OUTPUT <<EOF unless $disable_fuzzy_p;
+    print $OUTPUT <<EOF unless $disable_fuzzy_p;
 #, fuzzy
 EOF
-    print OUTPUT <<EOF;
+    print $OUTPUT <<EOF;
 msgid ""
 msgstr ""
 "Project-Id-Version: PACKAGE VERSION\\n"
@@ -161,77 +179,79 @@ msgstr ""
 EOF
     my $directory_re = quotemeta("$directory/");
     for my $t (string_list) {
-	if ($text{$t}->[0]->type == TmplTokenType::TEXT_PARAMETRIZED) {
+	if ($text{$t}->[0]->type == C4::TmplTokenType::TEXT_PARAMETRIZED) {
 	    my($token, $n) = ($text{$t}->[0], 0);
-	    printf OUTPUT "#. For the first occurrence,\n"
+        printf $OUTPUT "#. For the first occurrence,\n"
 		    if @{$text{$t}} > 1 && $token->parameters_and_fields > 0;
 	    for my $param ($token->parameters_and_fields) {
 		$n += 1;
 		my $type = $param->type;
-		my $subtype = ($type == TmplTokenType::TAG
+		my $subtype = ($type == C4::TmplTokenType::TAG
 			&& $param->string =~ /^<input\b/is?
 				$param->attributes->{'type'}->[1]: undef);
 		my $fmt = TmplTokenizer::_formalize( $param );
 		$fmt =~ s/^%/%$n\$/;
-		if ($type == TmplTokenType::DIRECTIVE) {
-		    $type = $param->string =~ /(TMPL_[A-Z]+)+/is? $1: 'ERROR';
+		if ($type == C4::TmplTokenType::DIRECTIVE) {
+#		    $type = "Template::Toolkit Directive";
+		    $type = $param->string =~ /\[%(.*?)%\]/is? $1: 'ERROR';
 		    my $name = $param->string =~ /\bname=(["']?)([^\s"']+)\1/is?
 			    $2: undef;
-		    printf OUTPUT "#. %s: %s\n", $fmt,
+            printf $OUTPUT "#. %s: %s\n", $fmt,
 			"$type" . (defined $name? " name=$name": '');
 		} else {
 		    my $name = $param->attributes->{'name'};
-		    my $value = $param->attributes->{'value'}
+            my $value;
+            $value = $param->attributes->{'value'}
 			    unless $subtype =~ /^(?:text)$/;
-		    printf OUTPUT "#. %s: %s\n", $fmt, "type=$subtype"
+            printf $OUTPUT "#. %s: %s\n", $fmt, "type=$subtype"
 			    . (defined $name?  " name=$name->[1]": '')
 			    . (defined $value? " value=$value->[1]": '');
 		}
 	    }
-	} elsif ($text{$t}->[0]->type == TmplTokenType::TAG) {
+	} elsif ($text{$t}->[0]->type == C4::TmplTokenType::TAG) {
 	    my($token) = ($text{$t}->[0]);
-	    printf OUTPUT "#. For the first occurrence,\n"
+        printf $OUTPUT "#. For the first occurrence,\n"
 		    if @{$text{$t}} > 1 && $token->parameters_and_fields > 0;
 	    if ($token->string =~ /^<meta\b/is) {
 		my $type = $token->attributes->{'http-equiv'}->[1];
-		print OUTPUT "#. META http-equiv=$type\n" if defined $type;
+        print $OUTPUT "#. META http-equiv=$type\n" if defined $type;
 	    } elsif ($token->string =~ /^<([a-z0-9]+)/is) {
 		my $tag = uc($1);
 		my $type = (lc($tag) eq 'input'?
 			$token->attributes->{'type'}: undef);
 		my $name = $token->attributes->{'name'};
-		printf OUTPUT "#. %s\n", $tag
+        printf $OUTPUT "#. %s\n", $tag
 		    . (defined $type? " type=$type->[1]": '')
 		    . (defined $name? " name=$name->[1]": '');
 	    }
 	} elsif ($text{$t}->[0]->has_js_data) {
-	    printf OUTPUT "#. For the first occurrence,\n" if @{$text{$t}} > 1;
-	    printf OUTPUT "#. SCRIPT\n";
+        printf $OUTPUT "#. For the first occurrence,\n" if @{$text{$t}} > 1;
+        printf $OUTPUT "#. SCRIPT\n";
 	}
 	my $cformat_p;
 	for my $token (@{$text{$t}}) {
 	    my $pathname = $token->pathname;
 	    $pathname =~ s/^$directory_re//os;
         $pathname =~ s/^.*\/koha-tmpl\/(.*)$/$1/;
-	    printf OUTPUT "#: %s:%d\n", $pathname, $token->line_number
+        printf $OUTPUT "#: %s:%d\n", $pathname, $token->line_number
 		    if defined $pathname && defined $token->line_number;
-	    $cformat_p = 1 if $token->type == TmplTokenType::TEXT_PARAMETRIZED;
+	    $cformat_p = 1 if $token->type == C4::TmplTokenType::TEXT_PARAMETRIZED;
 	}
-	printf OUTPUT "#, c-format\n" if $cformat_p;
-	printf OUTPUT "msgid %s\n", TmplTokenizer::quote_po
+        printf $OUTPUT "#, c-format\n" if $cformat_p;
+        printf $OUTPUT "msgid %s\n", TmplTokenizer::quote_po
 		TmplTokenizer::string_canon
 		TmplTokenizer::charset_convert $t, $charset_in, $charset_out;
-	printf OUTPUT "msgstr %s\n\n", (defined $translation{$t}?
+        printf $OUTPUT "msgstr %s\n\n", (defined $translation{$t}?
 		TmplTokenizer::quote_po( $translation{$t} ): "\"\"");
     }
 }
 
 ###############################################################################
 
-sub convert_translation_file () {
-    open(INPUT, "<$convert_from") || die "$convert_from: $!\n";
+sub convert_translation_file {
+    open(my $INPUT, '<', $convert_from) || die "$convert_from: $!\n";
     VerboseWarnings::set_input_file_name $convert_from;
-    while (<INPUT>) {
+    while (<$INPUT>) {
 	chomp;
 	my($msgid, $msgstr) = split(/\t/);
 	die "$convert_from: $.: Malformed tmpl_process input (no tab)\n"
@@ -241,7 +261,7 @@ sub convert_translation_file () {
 	$msgid =~ s/^SELECTED>//;
 
 	# Create dummy token
-	my $token = TmplToken->new( $msgid, TmplTokenType::UNKNOWN, undef, undef );
+	my $token = TmplToken->new( $msgid, C4::TmplTokenType::UNKNOWN, undef, undef );
 	remember( $token, $msgid );
 	$msgstr =~ s/^(?:LIMIT;|LIMITED;)//g; # unneeded for tmpl_process3
 	$translation{$msgid} = $msgstr unless $msgstr eq '*****';
@@ -268,7 +288,7 @@ sub convert_translation_file () {
 
 ###############################################################################
 
-sub usage ($) {
+sub usage {
     my($exitcode) = @_;
     my $h = $exitcode? *STDERR: *STDOUT;
     print $h <<EOF;
@@ -302,7 +322,7 @@ EOF
 
 ###############################################################################
 
-sub usage_error (;$) {
+sub usage_error {
     print STDERR "$_[0]\n" if @_;
     print STDERR "Try `$0 --help' for more information.\n";
     exit(-1);
@@ -341,16 +361,16 @@ usage_error('You cannot specify both --convert-from and --files-from')
 
 if (defined $output && $output ne '-') {
     print STDERR "$0: Opening output file \"$output\"\n" if $verbose_p;
-    open(OUTPUT, ">$output") || die "$output: $!\n";
+        open($OUTPUT, '>', $output) || die "$output: $!\n";
 } else {
     print STDERR "$0: Outputting to STDOUT...\n" if $verbose_p;
-    open(OUTPUT, ">&STDOUT");
+    open($OUTPUT, ">&STDOUT");
 }
 
 if (defined $files_from) {
     print STDERR "$0: Opening input file list \"$files_from\"\n" if $verbose_p;
-    open(INPUT, "<$files_from") || die "$files_from: $!\n";
-    while (<INPUT>) {
+    open(my $INPUT, '<', $files_from) || die "$files_from: $!\n";
+    while (<$INPUT>) {
 	chomp;
 	my $input = /^\//? $_: "$directory/$_";
 	my $h = TmplTokenizer->new( $input );
@@ -359,7 +379,7 @@ if (defined $files_from) {
 	print STDERR "$0: Processing file \"$input\"\n" if $verbose_p;
 	text_extract( $h );
     }
-    close INPUT;
+    close $INPUT;
 } else {
     print STDERR "$0: Converting \"$convert_from\"\n" if $verbose_p;
     convert_translation_file;
@@ -376,8 +396,7 @@ exit(-1) if TmplTokenizer::fatal_p;
 
 =head1 DESCRIPTION
 
-This is an experimental script based on the modularized
-text-extract2.pl script.  It has behaviour similar to
+This script has behaviour similar to
 xgettext(1), and generates gettext-compatible output files.
 
 A gettext-like format provides the following advantages:
@@ -388,7 +407,7 @@ A gettext-like format provides the following advantages:
 
 Translation to non-English-like languages with different word
 order:  gettext's c-format strings can theoretically be
-emulated if we are able to do some analysis on the .tmpl input
+emulated if we are able to do some analysis on the .tt input
 and treat <TMPL_VAR> in a way similar to %s.
 
 =item - 
@@ -419,10 +438,10 @@ details.
 If you want to generate GNOME-style POTFILES.in files, such
 files (passed to -f) can be generated thus:
 
-	(cd ../.. && find koha-tmpl/opac-tmpl/default/en \
-		-name \*.inc -o -name \*.tmpl) > opac/POTFILES.in
-	(cd ../.. && find koha-tmpl/intranet-tmpl/default/en \
-		-name \*.inc -o -name \*.tmpl) > intranet/POTFILES.in
+    (cd ../.. && find koha-tmpl/opac-tmpl/default/en \
+        -name \*.inc -o -name \*.tt) > opac/POTFILES.in
+    (cd ../.. && find koha-tmpl/intranet-tmpl/default/en \
+        -name \*.inc -o -name \*.tt) > intranet/POTFILES.in
 
 This is, however, quite pointless, because the "create" and
 "update" actions have already been implemented in tmpl_process3.pl.