use vars qw( $input );
use vars qw( $debug_dump_only_p );
use vars qw( $pedantic_p );
+use vars qw( $allow_cformat_p ); # FOR TESTING PURPOSES ONLY!!
###############################################################################
-sub debug_dump (*) { # for testing only
+sub underline ($) { # for testing only
+ my($s) = @_;
+ join('', map {/[\0-\37]/? $_: "$_\b$_"} split(//, $s));
+}
+
+sub debug_dump ($) { # for testing only
my($h) = @_;
- print "re_tag_compat is /$TmplTokenizer::re_tag_compat/\n";
+ print "re_tag_compat is /", TmplTokenizer::re_tag(1), "/\n";
for (;;) {
my $s = TmplTokenizer::next_token $h;
last unless defined $s;
printf "%s\n", ('-' x 79);
- my($kind, $t, $attr) = @$s; # FIXME
- printf "%s:\n", $kind;
- printf "%4dH%s\n", length($t),
- join('', map {/[\0-\37]/? $_: "$_\b$_"} split(//, $t));
- if ($kind eq TmplTokenizer::KIND_TAG && %$attr) {
+ my($kind, $t, $attr) = ($s->type, $s->string, $s->attributes);
+ printf "%s [line %d]:\n", $kind->to_string, $s->line_number;
+ printf "%4dH%s\n", length($t), underline($t);
+ if ($kind == TmplTokenType::TAG && %$attr) {
printf "Attributes:\n";
for my $a (keys %$attr) {
my($key, $val, $val_orig, $order) = @{$attr->{$a}};
- printf "%s = %dH%s -- %s\n", $a, length $val,
- join('', map {/[\0-\37]/? $_: "$_\b$_"} split(//, $val)),
+ printf "%s = %dH%s -- %s\n", $a, length $val, underline $val,
$val_orig;
}
}
+ if ($kind == TmplTokenType::TEXT_PARAMETRIZED) {
+ printf "Form (c-format string):\n";
+ printf "%dH%s\n", length $s->form, underline $s->form;
+ printf "Parameters:\n";
+ my $i = 1;
+ for my $a ($s->parameters) {
+ my $t = $a->string;
+ printf "%%%d\$s = %dH%s\n", $i, length $t, underline $t;
+ $i += 1;
+ }
+ }
}
}
###############################################################################
-sub trim ($) {
- my($s) = @_;
- $s =~ s/^(?:\s|\ $TmplTokenizer::re_end_entity)+//os;
- $s =~ s/(?:\s|\ $TmplTokenizer::re_end_entity)+$//os;
- return $s;
-}
-
-###############################################################################
-
-sub text_extract (*) {
+sub text_extract ($) {
my($h) = @_;
my %text = ();
for (;;) {
my $s = TmplTokenizer::next_token $h;
last unless defined $s;
- my($kind, $t, $attr) = @$s; # FIXME
- if ($kind eq TmplTokenizer::KIND_TEXT) {
- $t = trim $t;
+ my($kind, $t, $attr) = ($s->type, $s->string, $s->attributes);
+ if ($kind == TmplTokenType::TEXT) {
+ $t = TmplTokenizer::trim $t;
$text{$t} = 1 if $t =~ /\S/s;
- } elsif ($kind eq TmplTokenizer::KIND_TAG && %$attr) {
+ } elsif ($kind == TmplTokenType::TAG && %$attr) {
# value [tag=input], meta
my $tag = lc($1) if $t =~ /^<(\S+)/s;
for my $a ('alt', 'content', 'title', 'value') {
if ($attr->{$a}) {
next if $a eq 'content' && $tag ne 'meta';
next if $a eq 'value' && ($tag ne 'input'
- || (ref $attr->{'type'} && $attr->{'type'}->[1] eq 'hidden')); # FIXME
+ || (ref $attr->{'type'} && $attr->{'type'}->[1] =~ /^(?:hidden|radio)$/)); # FIXME
my($key, $val, $val_orig, $order) = @{$attr->{$a}}; #FIXME
- $val = trim $val;
+ $val = TmplTokenizer::trim $val;
$text{$val} = 1 if $val =~ /\S/s;
}
}
# Don't emit pure whitespace, pure numbers, or TMPL_VAR's.
for my $t (keys %text) {
printf "%s\n", $t
- unless $t =~ /^(?:\s|\ $TmplTokenizer::re_end_entity|$TmplTokenizer::re_tmpl_var)*$/os || $t =~ /^\d+$/;
+ unless TmplTokenizer::blank_p($t) || $t =~ /^\d+$/;
}
}
###############################################################################
GetOptions(
+ 'enable-cformat' => \$allow_cformat_p,
'f|file=s' => \$input,
'debug-dump-only' => \$debug_dump_only_p,
'pedantic-warnings' => sub { $pedantic_p = 1 },
usage_error('Missing mandatory option -f') unless defined $input;
-open(INPUT, "<$input") || die "$0: $input: $!\n";
+my $h = TmplTokenizer->new( $input );
+$h->set_allow_cformat( 1 ) if $allow_cformat_p;
if ($debug_dump_only_p) {
- debug_dump(*INPUT);
+ debug_dump( $h );
} else {
- text_extract(*INPUT);
+ text_extract( $h );
}
warn "This input will not work with Mozilla standards-compliant mode\n", undef