print out available commands
authorDobrica Pavlinusic <dpavlin@rot13.org>
Mon, 16 Aug 2010 15:19:37 +0000 (17:19 +0200)
committerDobrica Pavlinusic <dpavlin@rot13.org>
Mon, 16 Aug 2010 15:19:37 +0000 (17:19 +0200)
docs/parse-html.pl [new file with mode: 0755]

diff --git a/docs/parse-html.pl b/docs/parse-html.pl
new file mode 100755 (executable)
index 0000000..7de3758
--- /dev/null
@@ -0,0 +1,25 @@
+#!/usr/bin/perl
+
+use warnings;
+use strict;
+
+open(my $html, '<', 'Programming_Guide_A5013_RevEs.html') || die "run pdftohtml: $!";
+
+sub strip_html {
+       my $t = shift;
+       $t =~ s{&nbsp;}{ }gs;
+       $t =~ s{(<br>|\n)+}{}gs;
+       $t =~ s{\s+$}{}gs;
+       return $t;
+}
+
+while(<$html>) {
+       next if m{^(&nbsp)?Page \d+};
+       if ( m{<b>(\w+)&nbsp;</b><br>} ) {
+               my $command = $1;
+               my $param = <$html>;
+               my $description = <$html>;
+               print "$command\t", strip_html($param) , "\t", strip_html($description), "\n";
+       }
+}
+