###############################################################################
+sub trim ($) {
+ my($s) = @_;
+ $s =~ s/^(?:\s|\ $re_end_entity)+//os;
+ $s =~ s/(?:\s|\ $re_end_entity)+$//os;
+ return $s;
+}
+
+###############################################################################
+
sub text_extract (*) {
my($h) = @_;
my %text = ();
last unless defined $s;
my($kind, $t, $attr) = @$s; # FIXME
if ($kind eq KIND_TEXT) {
- $t =~ s/\s+$//s;
+ $t = trim $t;
$text{$t} = 1 if $t =~ /\S/s;
} elsif ($kind eq KIND_TAG && %$attr) {
# value [tag=input], meta
next if $a eq 'value' && ($tag ne 'input'
|| (ref $attr->{'type'} && $attr->{'type'}->[1] eq 'hidden')); # FIXME
my($key, $val, $val_orig, $order) = @{$attr->{$a}}; #FIXME
- $val =~ s/\s+$//s;
+ $val = trim $val;
$text{$val} = 1 if $val =~ /\S/s;
}
}
}
# Emit all extracted strings. Don't emit pure whitespace or pure numbers.
for my $t (keys %text) {
- printf "%s\n", $t unless $t =~ /^(?:\s|\ )*$/s || $t =~ /^\d+$/;
+ printf "%s\n", $t
+ unless $t =~ /^(?:\s|\ $re_end_entity)*$/os || $t =~ /^\d+$/;
}
}