X-Git-Url: http://git.rot13.org/?a=blobdiff_plain;f=misc%2Ftranslator%2Ftext-extract2.pl;h=52dc9909a8f223afee79720a24e8668bb325b9bb;hb=8451615142377edb4ead75a705c39b1ece22d833;hp=db45f6f8c6c5efec1063f8125d30483d617d0c9e;hpb=59d2e3518078002046cfd3046d48360c96dfed1a;p=koha.git diff --git a/misc/translator/text-extract2.pl b/misc/translator/text-extract2.pl index db45f6f8c6..52dc9909a8 100755 --- a/misc/translator/text-extract2.pl +++ b/misc/translator/text-extract2.pl @@ -22,63 +22,69 @@ use strict; use vars qw( $input ); use vars qw( $debug_dump_only_p ); use vars qw( $pedantic_p ); +use vars qw( $allow_cformat_p ); # FOR TESTING PURPOSES ONLY!! ############################################################################### -sub debug_dump (*) { # for testing only +sub underline ($) { # for testing only + my($s) = @_; + join('', map {/[\0-\37]/? $_: "$_\b$_"} split(//, $s)); +} + +sub debug_dump ($) { # for testing only my($h) = @_; - print "re_tag_compat is /$TmplTokenizer::re_tag_compat/\n"; + print "re_tag_compat is /", TmplTokenizer::re_tag(1), "/\n"; for (;;) { my $s = TmplTokenizer::next_token $h; last unless defined $s; printf "%s\n", ('-' x 79); - my($kind, $t, $attr) = @$s; # FIXME - printf "%s:\n", $kind; - printf "%4dH%s\n", length($t), - join('', map {/[\0-\37]/? $_: "$_\b$_"} split(//, $t)); - if ($kind eq TmplTokenizer::KIND_TAG && %$attr) { + my($kind, $t, $attr) = ($s->type, $s->string, $s->attributes); + printf "%s [line %d]:\n", $kind->to_string, $s->line_number; + printf "%4dH%s\n", length($t), underline($t); + if ($kind == TmplTokenType::TAG && %$attr) { printf "Attributes:\n"; for my $a (keys %$attr) { my($key, $val, $val_orig, $order) = @{$attr->{$a}}; - printf "%s = %dH%s -- %s\n", $a, length $val, - join('', map {/[\0-\37]/? $_: "$_\b$_"} split(//, $val)), + printf "%s = %dH%s -- %s\n", $a, length $val, underline $val, $val_orig; } } + if ($kind == TmplTokenType::TEXT_PARAMETRIZED) { + printf "Form (c-format string):\n"; + printf "%dH%s\n", length $s->form, underline $s->form; + printf "Parameters:\n"; + my $i = 1; + for my $a ($s->parameters) { + my $t = $a->string; + printf "%%%d\$s = %dH%s\n", $i, length $t, underline $t; + $i += 1; + } + } } } ############################################################################### -sub trim ($) { - my($s) = @_; - $s =~ s/^(?:\s|\ $TmplTokenizer::re_end_entity)+//os; - $s =~ s/(?:\s|\ $TmplTokenizer::re_end_entity)+$//os; - return $s; -} - -############################################################################### - -sub text_extract (*) { +sub text_extract ($) { my($h) = @_; my %text = (); for (;;) { my $s = TmplTokenizer::next_token $h; last unless defined $s; - my($kind, $t, $attr) = @$s; # FIXME - if ($kind eq TmplTokenizer::KIND_TEXT) { - $t = trim $t; + my($kind, $t, $attr) = ($s->type, $s->string, $s->attributes); + if ($kind == TmplTokenType::TEXT) { + $t = TmplTokenizer::trim $t; $text{$t} = 1 if $t =~ /\S/s; - } elsif ($kind eq TmplTokenizer::KIND_TAG && %$attr) { + } elsif ($kind == TmplTokenType::TAG && %$attr) { # value [tag=input], meta my $tag = lc($1) if $t =~ /^<(\S+)/s; for my $a ('alt', 'content', 'title', 'value') { if ($attr->{$a}) { next if $a eq 'content' && $tag ne 'meta'; next if $a eq 'value' && ($tag ne 'input' - || (ref $attr->{'type'} && $attr->{'type'}->[1] eq 'hidden')); # FIXME + || (ref $attr->{'type'} && $attr->{'type'}->[1] =~ /^(?:hidden|radio)$/)); # FIXME my($key, $val, $val_orig, $order) = @{$attr->{$a}}; #FIXME - $val = trim $val; + $val = TmplTokenizer::trim $val; $text{$val} = 1 if $val =~ /\S/s; } } @@ -88,7 +94,7 @@ sub text_extract (*) { # Don't emit pure whitespace, pure numbers, or TMPL_VAR's. for my $t (keys %text) { printf "%s\n", $t - unless $t =~ /^(?:\s|\ $TmplTokenizer::re_end_entity|$TmplTokenizer::re_tmpl_var)*$/os || $t =~ /^\d+$/; + unless TmplTokenizer::blank_p($t) || $t =~ /^\d+$/; } } @@ -121,6 +127,7 @@ sub usage_error (;$) { ############################################################################### GetOptions( + 'enable-cformat' => \$allow_cformat_p, 'f|file=s' => \$input, 'debug-dump-only' => \$debug_dump_only_p, 'pedantic-warnings' => sub { $pedantic_p = 1 }, @@ -133,11 +140,12 @@ VerboseWarnings::set_pedantic_mode $pedantic_p; usage_error('Missing mandatory option -f') unless defined $input; -open(INPUT, "<$input") || die "$0: $input: $!\n"; +my $h = TmplTokenizer->new( $input ); +$h->set_allow_cformat( 1 ) if $allow_cformat_p; if ($debug_dump_only_p) { - debug_dump(*INPUT); + debug_dump( $h ); } else { - text_extract(*INPUT); + text_extract( $h ); } warn "This input will not work with Mozilla standards-compliant mode\n", undef