#!/usr/bin/perl # read files for fulltext index # filelist is on stdin (e.g. by find) # index entries go to stdout # usage: # find /foo -name \*.html | ./fulltext >/tmp/idx 2>/tmp/mst # find /usr/share/doc -type f -a \! -name \*.htm\* | ./fulltext >/tmp/idx 2>/tmp/mst # sort -o /tmp/idx /tmp/idx # time ./openisis -write db/test/ft -stream -fmt mfn ) { chomp; $f = $_; next unless open( F, $f ); $fn++; print STDERR "100\t$f\n"; $line = 0; while ( ) { next if /^\s*$/; last if 255 < ++$line; chomp; $w = 0; for $word (split /\W+/) { next unless $word; printf "%s\t%d\t%d\t%d\t%d\n", uc($word), $fn, 800, $line, ++$w; } } print STDERR "\f\n"; }