fixed parsing for science direct html with more than one <a href=> per one <tr>
[webpac] / openisis / openisis.c
1 /*
2         openisis - an open implementation of the CDS/ISIS database
3         Version 0.8.x (patchlevel see file Version)
4         Copyright (C) 2001-2003 by Erik Grziwotz, erik@openisis.org
5
6         This library is free software; you can redistribute it and/or
7         modify it under the terms of the GNU Lesser General Public
8         License as published by the Free Software Foundation; either
9         version 2.1 of the License, or (at your option) any later version.
10
11         This library is distributed in the hope that it will be useful,
12         but WITHOUT ANY WARRANTY; without even the implied warranty of
13         MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14         Lesser General Public License for more details.
15
16         You should have received a copy of the GNU Lesser General Public
17         License along with this library; if not, write to the Free Software
18         Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
19
20         see README for more information
21 EOH */
22
23 /*
24         $Id: openisis.c,v 1.67 2003/06/03 11:25:02 kripke Exp $
25         main file of openisis executable.
26 */
27
28 #ifdef HAVE_PTHREAD
29 #define HAVE_THREADS
30 #endif
31
32 #include <assert.h>
33 #include <stdlib.h> /* free */
34 #include <string.h> /* strcmp */
35 #ifdef WIN32
36 #       include <sys/timeb.h>
37 #       include <sys/types.h>
38 #       define timeval _timeb
39 #else
40 #       include <unistd.h> /* gettimeofday */
41 #endif
42 #include <sys/time.h> /* gettimeofday */
43 #ifdef  HAVE_PTHREAD
44 #include <pthread.h> /* threaded crashtest */
45 #endif
46
47 #include "openisis.h"
48
49 /*
50         temporary library includes for testing of functions
51         that should later be accessible via openisis.h
52 */
53 #include "lio.h"
54 extern Db* ldb_getdb (int dbid);
55
56
57 /* ************************************************************
58         private types
59 */
60 typedef enum { /* what to do */
61         DO_DUMP,
62         DO_SCAN, /* simple full scan */
63         DO_SEARCH, /* basic index based searching */
64         DO_TERMS, /* list terms */
65         DO_CHECK, /* check db */
66         DO_PERF, /* do random reads for performance checking */
67         DO_CRASH, /* do multi-threaded crashtest */
68         DO_SPLIT, /* split a field value */
69         DO_STREAM, /* stream in records */
70         DO_MFNLIST, /* fetch records by mfn list */
71         DO_IFLOAD, /* read a lk2-style file from stdin */
72         DO_SWLOAD, /* load stopwords */
73         DO_IFDUMP, /* dump a lk2-style file to stdout */
74         DO_FDT, /* print fdt */
75         DO_VUTF /* validate UTF-8 input */
76 } todo;
77
78 typedef enum {
79         FMT_MFN,  /* rowid only */
80         FMT_MFNF, /* rowid, 1st field */
81         FMT_PROP, /* property style */
82         FMT_TXT,  /* plaintext masterfile style */
83         FMT_TXTW  /* plaintext masterfile style with W lines */
84 } format;
85
86 typedef enum {
87         IFM_DUMP,
88         IFM_TAB,
89         IFM_OLD,  /* dump old index */
90         IFM_COPY, /* copy old index */
91         IFM_CHK   /* check (new oxi) index */
92 }       ifmode;
93
94
95 /* ************************************************************
96         private data
97 */
98 static const char *pft;
99 static const char **term, **val;
100 static OpenIsisSet *post;
101 int nterm, nval;
102 static int db = -1, wdb = -1, append = 0, idxall = 0;
103
104 /* ************************************************************
105         private functions
106 */
107 static int argchk ( const char *param, const char *n, const char *v )
108 {
109         return strcmp( param, n ) ? 0 : v ? 1 :
110                 (openIsisSMsg( OPENISIS_ERR_INVAL, "no value for param '%s'", param ), 0);
111 }       /* argchk */
112
113
114 static int print ( OpenIsisRec *r, int freeit, format f )
115 {
116         union { OpenIsisRec r; char buf[10000]; } x;
117         int i, ret;
118         if ( ! r ) {
119                 if ( FMT_TXT == f )
120                         openIsisSMsg( 1, "\n" );
121                 return -1;
122         }
123         ret = r->rowid;
124         if ( pft ) {
125                 OpenIsisRec *q;
126                 OPENISIS_INITBUF(x);
127                 q = openIsisFmt( &x.r, pft, r );
128                 if ( freeit )
129                         free( r );
130                 freeit = q != &x.r;
131                 r = q;
132         }
133         if ( FMT_MFN == f )
134                 openIsisSMsg( 1, "%d\n", r->rowid );
135         else if ( FMT_MFNF == f )
136                 openIsisSMsg( 1, "%d %.*s\n", r->rowid,
137                         0 == r->len ? 1 : (int)r->field[0].len,
138                         0 == r->len ? "-" : r->field[0].val );
139         else if ( FMT_TXT <= f ) {
140                 openIsisSMsg( 1, "\n" ); /* blank line */
141                 if ( FMT_TXTW == f )
142                         openIsisSMsg( 1, "W\t%d\n", r->rowid );
143                 for ( i=0; i<r->len; i++ ) {
144                         if ( r->field[i].val )
145                                 openIsisSMsg( 1, "%d\t%.*s\n", r->field[i].tag,
146                                         (int)r->field[i].len, r->field[i].val );
147                         else
148                                 openIsisSMsg( 1, "%d\t%d\n",  r->field[i].tag, r->field[i].len );
149                 }
150         } else for ( i=0; i<r->len; i++ ) {
151                 if ( ! r->field[i].val ) { /* shouldn't happen -- numeric ? */
152                         openIsisSMsg( 1, "%d.?=%d\n", r->rowid, r->field[i].len );
153                         continue;
154                 }
155                 openIsisSMsg( 1, "%d.%d=%.*s\n", r->rowid, r->field[i].tag,
156                         (int)r->field[i].len, r->field[i].val );
157                 if ( r->field[i].len && '^' == *r->field[i].val ) { /* split subfields */
158                         OpenIsisRec *rf = openIsisReadField( 0, r->field+i );
159                         if ( rf ) {
160                                 int j;
161                                 for ( j=0; j<rf->len; j++ )
162                                         openIsisSMsg( 1, "%d.%d.%c=%.*s\n",
163                                                 r->rowid, r->field[i].tag,
164                                                 (0x60 & (int)rf->field[j].tag ) ?
165                                                         (int)rf->field[j].tag : ' ',
166                                                 (int)rf->field[j].len, rf->field[j].val );
167                                 free( rf );
168                         }
169                 }
170         }
171         if ( 0 <= wdb ) {
172                 OpenIsisRec *q = 0;
173                 int ok;
174                 if ( append )
175                         r->rowid = 0;
176                 if ( idxall && r != &x.r ) { /* add index entries for all fields */
177                         OPENISIS_INITBUF(x);
178                         q = &x.r;
179                         for ( i=0; i<r->len; i++ ) {
180                                 char hit[64];
181                                 OpenIsisField *fld = r->field + i;
182                                 sprintf( hit, "%d.%d.%d.1       ", r->rowid, fld->tag, i );
183                                 OPENISIS_RADDS( q, OPENISIS_XHIT, hit, q != &x.r );
184                                 OPENISIS_RCAT( q, fld->val, fld->len, q != &x.r );
185                         }
186                 }
187                 ok = openIsisDWritex( wdb, r, q );
188                 openIsisSMsg( 1, "wrote mfn %d (%d)\n", r->rowid, ok );
189         }
190         if ( freeit )
191                 free( r );
192         return ret;
193 }       /* print */
194
195
196 static void printid ( int id, format f )
197 {
198         if ( FMT_MFN == f && 0 > wdb )
199                 openIsisSMsg( 1, "%d\n", id );
200         else 
201                 print( openIsisReadRow( db, id ), !0, f );
202 }       /* printid */
203
204
205 static int printlk2 ( void *me, OpenIsisKey *key, OpenIsisHit *hit )
206 {
207         (void)me;
208         if ( key && hit )
209                 /* 30 key BLANK 7 mfn BLANK 5 tag BLANK 4 occ BLANK 4 pos*/
210                 openIsisSMsg( 1, "%-30.*s %7u %5u %4u %4u\n",
211                         key->len, key->byt, hit->mfn, hit->tag, hit->occ, hit->pos );
212         return 0;
213 }       /* printlk2 */
214
215
216 static int printtab ( void *me, OpenIsisKey *key, OpenIsisHit *hit )
217 {
218         (void)me;
219         if ( key && hit )
220                 /* 30 key BLANK 7 mfn BLANK 5 tag BLANK 4 occ BLANK 4 pos*/
221                 openIsisSMsg( 1, "%.*s\t%u\t%u\t%u\t%u\n",
222                         key->len, key->byt, hit->mfn, hit->tag, hit->occ, hit->pos );
223         return 0;
224 }       /* printtab */
225
226
227 /* timing utility. set the timeval, return milliseconds since last call. */
228 #ifdef WIN32
229 static int millis ( struct _timeb *tb )
230 {
231         struct _timeb otb = *tb;
232         _ftime( tb );
233         return (tb->time - otb.time)*1000 + (tb->millitm - otb.millitm);
234 }       /* millis */
235 #else
236 static int millis ( struct timeval *tv )
237 {
238         struct timeval otv = *tv;
239         gettimeofday( tv, 0 );
240         return (tv->tv_sec - otv.tv_sec)*1000 + (tv->tv_usec - otv.tv_usec)/1000;
241 }       /* millis */
242 #endif
243
244 #ifdef  HAVE_PTHREAD
245 int myOpenIsisLockFunc ( int lock )
246 {
247         static pthread_mutex_t mut = PTHREAD_MUTEX_INITIALIZER; /* the "fast" kind */
248         static pthread_cond_t cond = PTHREAD_COND_INITIALIZER;
249         /*
250         LOG_DBG( LOG_ERROR, "thread %d op 0x%08x", (int)pthread_self(), lock );
251         */
252         switch ( OPENISIS_WAIT & lock ) {
253         case OPENISIS_RELE: return pthread_mutex_unlock( &mut );
254         case OPENISIS_LOCK: return pthread_mutex_lock( &mut );
255         case OPENISIS_WAKE: return pthread_cond_broadcast( &cond );
256         case OPENISIS_WAIT: return pthread_cond_wait( &cond, &mut );
257         }
258         return -1;
259 }
260 #else
261 #       define myOpenIsisLockFunc 0
262 #endif
263
264 typedef struct {
265         int             start;
266 }       threadarg;
267
268 /* (p)thread routine */
269 static void * run ( void *arg )
270 {
271         struct timeval tv;
272         int m;
273         threadarg *my = (threadarg *)arg;
274         int start = my->start;
275         int i;
276
277         millis( &tv );
278         for ( i=start+1; i++!=start; ) {
279                 int j;
280                 if ( i >= nterm )
281                         i = 0;
282                 for ( j=0; j<post[i].len; j++ ) {
283                         int mfn = post[i].id[j];
284                         OpenIsisRec *r = openIsisReadRow( db, mfn );
285                         if ( ! r ) {
286                                 openIsisSMsg( 2, "no rec %d\n", mfn );
287                                 continue;
288                         }
289                         if ( 0 >= r->len )
290                                 openIsisSMsg( 2, "no fields for %d\n", mfn );
291                         else if ( r->field[0].len != (int)strlen(val[mfn])
292                                 || memcmp( r->field[0].val, val[mfn], r->field[0].len )
293                         )
294                                 openIsisSMsg( 2, "mismatch on %d\n", mfn );
295                         free( r );
296                 }
297         }
298         m = millis(&tv);
299         openIsisSMsg( 1, "thread %d@%d terminated after %.3f seconds\n",
300 #ifdef HAVE_THREADS
301                 (int)pthread_self()
302 #else
303                 0L
304 #endif
305                 , start, m/1000. );
306         return (void *)m;
307 }       /* run */
308
309
310 /* multithreaded crashtest */
311 static int crash ( const char *pre )
312 {
313         struct timeval tv;
314         union { OpenIsisRec r; char buf[10000]; } x;
315         int l;
316         int p = 0;
317         int i, pass;
318         int fd = 2;
319         threadarg def;
320
321         millis( &tv );
322         x.r.len   = 0;
323         x.r.bytes = sizeof(x);
324         nterm = 0;
325         while ( openIsisTerm( &x.r, db, pre ) && x.r.len )
326                 nterm += x.r.len;
327         openIsisSMsg( fd, "%d terms\n", nterm );
328         openIsisSMsg( fd, "%.3f sec\n", millis(&tv)/1000. );
329
330         term = (const char **)malloc( nterm * sizeof(*term) );
331         post = (OpenIsisSet *)malloc( nterm * sizeof(*post) );
332         nterm = 0;
333         nval = 0;
334         while ( openIsisTerm( &x.r, db, pre ) && x.r.len ) {
335                 for ( i=0; i<x.r.len; i++, nterm++ ) {
336                         int cnt;
337                         OpenIsisSet *set = post+nterm;
338                         char *c = malloc( x.r.field[i].len + 1 );
339
340                         memcpy( c, x.r.field[i].val, x.r.field[i].len );
341                         c[ x.r.field[i].len ] = 0;
342                         term[ nterm ] = c;
343
344                         set->len = 0;
345                         openIsisQuery( set, db, c, OPENISIS_QRY_KEYEQ, 0 );
346                         if ( 0 >= set->len ) {
347                                 openIsisSMsg( 2, "no results for '%s'\n", c );
348                                 return 1;
349                         }
350                         for ( cnt = 0; cnt < set->len; cnt++ )
351                                 if ( nval < set->id[cnt] )
352                                         nval = set->id[cnt];
353                         p += set->len;
354                 }
355         }
356         openIsisSMsg( fd, "%d postings max mfn %d\n", p, nval );
357         openIsisSMsg( fd, "%.3f sec\n", millis(&tv)/1000. );
358
359         val = (const char **)malloc( (1+nval) * sizeof(*val) );
360         for ( i=1; i<=nval; i++ ) {
361                 OpenIsisRec *r = openIsisReadRow( db, i );
362                 val[i] = 0;
363                 if ( ! r ) {
364                         openIsisSMsg( 2, "no rec %d\n", i );
365                         continue;
366                 }
367                 if ( 0 < r->len ) {
368                         char *c = malloc( r->field[0].len + 1 );
369
370                         memcpy( c, r->field[0].val, r->field[0].len );
371                         c[ r->field[0].len ] = 0;
372                         val[i] = c;
373                 }
374                 free( r );
375         }
376         l = millis(&tv);
377         openIsisSMsg( fd, "sequential read %d rows in %.3f seconds %d rows per sec\n",
378                 nval, l/1000., nval*1000/(l?l:1) );
379
380         def.start = 0;
381         i = 0;
382         run( &def );
383         l = millis(&tv);
384         run( &def );
385         l = millis(&tv);
386         run( &def );
387         l = millis(&tv);
388         openIsisSMsg( fd, "in-thread run in %.3f seconds %d rows per sec\n",
389                 l/1000., p*1000/(l?l:1) );
390 #ifdef HAVE_THREADS
391 #define passes 3
392         {
393         static int nThreads[] = { 8, 2, 1, 4 };
394         int res[ sizeof(nThreads)/sizeof(nThreads[0]) ];
395         pthread_t th[ 8 /* max. nThreads */ ];
396         threadarg arg[ sizeof(th)/sizeof(th[0]) ];
397         int j;
398
399
400         for ( i=0; i<(int)(sizeof(nThreads)/sizeof(nThreads[0])); i++ )
401                 res[i] = 0;
402         for ( j=0; j<(int)(sizeof(th)/sizeof(th[0])); j++ )
403 #if 0
404                 if ( ! (arg[j].ses = openIsisSesGet( -1, 0 )) ) {
405                         openIsisSMsg( fd, "could not get %dth session\n", j );
406                         return 1;
407                 }
408 #endif
409         for ( pass=0; pass<passes; pass++ )
410                 for ( i=0; i<(int)(sizeof(nThreads)/sizeof(nThreads[0])); i++ ) {
411                         int rps, avg;
412                         millis(&tv);
413                         assert( nThreads[i] <= (int)(sizeof(th)/sizeof(th[0])) );
414                         for ( j=0; j<nThreads[i]; j++ ) {
415                                 arg[j].start = j*nterm/nThreads[i];
416                                 if ( pthread_create( th+j, 0, run, arg+j ) )
417                                         th[j] = (pthread_t)0;
418                         }
419                         openIsisSMsg( fd, "started %d threads\n", nThreads[i] );
420                         avg = 0;
421                         for ( j=0; j<nThreads[i]; j++ ) {
422                                 int t;
423                                 pthread_join( th[j], (void**)&t );
424                                 avg += t;
425                         }
426                         avg /= nThreads[i];
427                         l = millis(&tv);
428                         rps = (int)(nThreads[i] * p * 1000 / avg);
429                         openIsisSMsg( fd, "joined %d threads avg %.3f after %.3f seconds %d rows per sec\n",
430                                 nThreads[i], avg/1000., l/1000., rps );
431                         res[i] += rps;
432                 }
433         for ( i=0; i<(int)(sizeof(nThreads)/sizeof(nThreads[0])); i++ )
434                 openIsisSMsg( fd, "%d threads  %d rows per sec\n", nThreads[i], res[i]/passes );
435         }
436 #else
437         (void)pass; /* avoid compiler warning */
438 #endif  /* HAVE_THREADS */
439         return 0;
440 }       /* crash */
441
442
443 /* ************************************************************
444         package functions
445 */
446 /* ************************************************************
447         public functions
448 */
449 int main ( int argc, const char **argv )
450 {
451         int ret = 0;
452         int i,intarg=0;
453         todo what = DO_DUMP;
454         format fmt = FMT_TXT;
455         int check = OPENISIS_CHK_FIX;
456         int searchmode = OPENISIS_QRY_KEYAT;
457         int idxmode = OPENISIS_IDXPF;
458         const char *search = 0;
459         const char *idxto = 0;
460         int needdb = !0;
461         const char *dowrite = 0;
462         ifmode ifm = IFM_DUMP;
463         OpenIsisRec *argr;
464         OpenIsisDb *odb;
465
466         /* initialize minimal env */
467         openIsisCOpen(0);
468         argr = openIsisRSet( 0,
469                 OPENISIS_RFDT|OPENISIS_RARGV|OPENISIS_RIGN | (argc-1),
470                 openIsisFdtSyspar, argv+1 );
471
472         if ( 2 == argc && ! strcmp("-version",argv[1]) ) {
473                 openIsisSMsg( 0, "%s\n", OPENISIS_VERSION );
474                 goto bye;
475         }
476         /* check options ... */
477         for ( i=0; i < argc; ) {
478                 const char *n = argv[i], *v = 0;
479                 assert( n );
480                 if ( '-' == n[0] )
481                         n++;
482                 if ( 1 == argc - i || '-' == argv[i+1][0] ) { /* no value */
483                         i++;
484                 } else {
485                         v = argv[i+1];
486                         assert( v );
487                         i += 2;
488                 }
489
490                 if ( argchk("logfile",n,v) )
491                         openIsisLog( '=', v );
492                 else if ( argchk("v",n,v) )
493                         openIsisLog( *v, 0 );
494                 else if ( argchk("scan",n,v) ) {
495                         what = DO_SCAN;
496                         search = v;
497                 }
498                 else if ( argchk("search",n,v) ) {
499                         what = DO_SEARCH;
500                         search = v;
501                 }
502                 else if ( argchk("upto",n,v) ) {
503                         idxmode = OPENISIS_IDXUPTO;
504                         idxto = v;
505                 }
506                 else if ( argchk("incl",n,v) ) {
507                         idxmode = OPENISIS_IDXINCL;
508                         idxto = v;
509                 }
510                 else if ( argchk("query",n,v) ) {
511                         what = DO_SEARCH;
512                         searchmode = OPENISIS_QRY_SIMPLE;
513                         search = v;
514                 }
515                 else if ( argchk("terms",n,v) ) {
516                         what = DO_TERMS;
517                         search = v;
518                 }
519                 else if ( argchk("perf",n,v) ) {
520                         what = DO_PERF;
521                         intarg = atoi(v);
522                 }
523                 else if ( argchk("crash",n,v) ) {
524                         what = DO_CRASH;
525                         search = v;
526                 }
527                 else if ( argchk("split",n,v) ) {
528                         what = DO_SPLIT;
529                         search = v;
530                         needdb = 0;
531                 }
532                 else if ( argchk("fmt",n,v) ) {
533                         if ( ! strcmp("mfn",v) )
534                                 fmt =FMT_MFN;
535                         else if ( ! strcmp("mfnf",v) )
536                                 fmt =FMT_MFNF;
537                         else if ( ! strcmp("prop",v) )
538                                 fmt =FMT_PROP;
539                         else if ( ! strcmp("txt",v) )
540                                 fmt =FMT_TXT;
541                         else if ( ! strcmp("txtw",v) )
542                                 fmt =FMT_TXTW;
543                 }
544                 else if ( ! strcmp("check",n) )
545                         what = DO_CHECK;
546                 else if ( ! strcmp("vutf",n) ) {
547                         what = DO_VUTF;
548                         needdb = 0;
549                 } else if ( argchk("pft",n,v) )
550                         pft = v;
551                 else if ( ! strcmp("stream",n) ) {
552                         what = DO_STREAM;
553                         needdb = 0;
554                 } else if ( argchk("write",n,v) )
555                         dowrite = v;
556                 else if ( argchk("append",n,v) ) {
557                         dowrite = v;
558                         append = !0;
559                 } else if ( ! strcmp("idxall",n) )
560                         idxall = !0;
561                 else if ( ! strcmp("mfnlist",n) )
562                         what = DO_MFNLIST;
563                 else if ( argchk("ifload",n,v) ) {
564                         what = DO_IFLOAD;
565                         intarg = atoi(v);
566                 } else if ( ! strcmp("swload",n) ) {
567                         what = DO_SWLOAD;
568                 } else if ( ! strcmp("ifadd",n) ) {
569                         what = DO_IFLOAD;
570                         intarg = -1;
571                 } else if ( ! strcmp("ifdel",n) ) {
572                         what = DO_IFLOAD;
573                         intarg = -2;
574                 } else if ( ! strcmp("ifcopy",n) ) {
575                         what = DO_IFDUMP;
576                         ifm = IFM_COPY;
577                 } else if ( ! strcmp("ifchk",n) ) {
578                         what = DO_IFDUMP;
579                         ifm = IFM_CHK;
580                 } else if ( ! strcmp("iftab",n) ) {
581                         what = DO_IFDUMP;
582                         ifm = IFM_TAB;
583                 } else if ( ! strcmp("ifdump",n) )
584                         what = DO_IFDUMP;
585                 else if ( ! strcmp("noxi",n) )
586                         ifm = IFM_OLD;
587                 else if ( argchk("out",n,v) ) {
588                         char buf[256] = ">";
589                         int l = strlen(v);
590                         if ( l < 254 ) {
591                                 memcpy( buf+1, v, l );
592                                 buf[l+2] = 0;
593                                 openIsisSOpen( buf, 0, 0 );
594                         }
595                 }
596                 else if ( argchk("fdtdump",n,v) ) {
597                         what = DO_FDT;
598                         intarg = atoi(v);
599                 }
600         }       /* while argc */
601
602
603         if ( needdb && 0 > (db = openIsisOpen( 0, argv + 1, argc - 1 )) ) {
604                 openIsisSMsg( 2,
605                         "openisis " OPENISIS_VERSION "\n\n"
606                         "please specify a valid database with -db, e.g.\n"
607                         "-db /winisis/data/cds\n"
608                         "\n"
609                         "other options are:\n"
610                         "-search term      search for term\n"
611                         "-query \"query\"    run a query like \"water * plant\"\n"
612                         "-terms term       list terms matching term (e.g. plant$)\n"
613                 );
614                 /* warning: string length `580' is greater than the minimum length
615                  * `509' ISO C89 is required to support
616                 */
617                 openIsisSMsg( 2,
618                         "-fmt mfn          for a search or query, list only the mfn\n"
619                         "-fmt mfnf         for a search or query, list the mfn and 1st field\n"
620                         "-pft \"pft\"        use printformat (currently very limited)\n"
621                         "-write dbpath     specify a db where records are written to\n"
622                         "-mfnlist          read mfns from stdin\n"
623                         "-ifload pctfree   read .lk2-index from stdin\n"
624                         "\n"
625                         "default output format is one field per line like tag<TAB>value\n"
626                 );
627                 ret = 1;
628                 goto bye;
629         }
630
631         if ( dowrite && 0 > (wdb = openIsisOpen( dowrite, 0, 0 )) ) {
632                 openIsisSMsg( 2, "could not open write target db '%s'\n", dowrite );
633                 ret = 2;
634                 goto bye;
635         }
636                 
637
638         switch ( what ) {
639         case DO_DUMP: {
640                 int max = openIsisMaxRowid( db );
641                 int rowid;
642                 for ( rowid = 1; rowid <= max; rowid++ )
643                         printid( rowid, fmt );
644         } break; /* DO_DUMP */
645         case DO_MFNLIST: {
646                 char *buf = 0;
647                 int l;
648                 while ( 0 <= (l = openIsisSReadln( &buf )) ) {
649                         int id = 0;
650                         while ( l-- )
651                                 id = 10*id + *buf++ - '0';
652                         if ( id )
653                                 printid( id, fmt );
654                 }
655         } break; /* DO_MFNLIST */
656         case DO_SCAN: {
657                 int max = openIsisMaxRowid( db );
658                 int rowid;
659                 for ( rowid = 1; 0 < rowid && rowid <= max; rowid++ )
660                         rowid = print( openIsisScan( db, rowid, 0, search ), !0, fmt );
661         } break; /* DO_SCAN */
662         case DO_SEARCH: {
663                 int cnt;
664                 OpenIsisSet set;
665                 set.len = 0;
666                 openIsisQuery( &set, db, search, searchmode, 0 );
667                 if ( 0 >= set.len ) {
668                         openIsisSMsg( 2, "no results for '%s'\n", search );
669                         ret = 1;
670                         goto bye;
671                 }
672                 /* openIsisSMsg( 2, "%d\trows for\t%s\n", set.len, search ); */
673                 for ( cnt = 0; cnt < set.len; cnt++ )
674                         printid( set.id[cnt], fmt );
675         } break; /* DO_SEARCH */
676         case DO_TERMS: {
677                 union { OpenIsisRec r; char buf[10000]; } x;
678                 x.r.len   = 0;
679                 x.r.bytes = sizeof(x);
680                 while ( openIsisTerm( &x.r, db, search ) && x.r.len ) {
681                         /* openIsisSMsg( 1, "%d terms\n", x.r.len ); */
682                         for ( i=0; i<x.r.len; i++ )
683                                 openIsisSMsg( 1, "%.*s\n", (int)x.r.field[i].len, x.r.field[i].val );
684                 }
685         } break; /* DO_TERMS */
686         case DO_PERF: {
687                 int max = openIsisMaxRowid( db );
688                 while ( 0 < intarg-- ) {
689                         OpenIsisRec *r = openIsisReadRow( db, 1+((int)rand() % max) );
690                         free( r );
691                 }
692         } break; /* DO_PERF */
693         case DO_CHECK:  
694                 ret = openIsisCheck( db, check );
695                 goto bye;
696         case DO_CRASH:  
697                 ret = crash( search );
698                 goto bye;
699         case DO_SPLIT: {
700                 OpenIsisField f;
701                 OpenIsisRec *r;
702                 f.tag = 24; f.val = search; f.len = strlen(search);
703                 r = openIsisReadField( 0, &f );
704                 if ( r )
705                         for ( i=0; i<r->len; i++ )
706                                 openIsisSMsg( 1, "%c=%.*s\n", (int)r->field[i].tag,
707                                         (int)r->field[i].len, r->field[i].val );
708         }       break;
709         case DO_STREAM: {
710                 OpenIsisIos ios;
711                 OpenIsisRecStream rs = { 0, OPENISIS_STOPONEMPTY, 0, 0, 0 };
712                 LIO_SINIT( &ios, lio_stdio, "stdin", LIO_IN );
713                 rs.in = &ios; /* some gcc versions need it this way */
714                 while ( 0 < (i = openIsisSGetr( &rs )) )
715                         print( rs.rec, 0, fmt );
716         }       break;
717         case DO_IFLOAD:
718         case DO_SWLOAD: {
719                 OpenIsisKey key;
720                 OpenIsisHit hit;
721                 OpenIsisIndex idx = openIsisIdxOpen( db, intarg );
722                 char *buf = 0;
723                 int l, lines = 0;
724
725                 memset( &hit, 0, sizeof(hit) );
726                 hit.dbn = (-2 == intarg) ? 0xffff : 0; /* secret key for ifdel */
727                 while ( 0 <= (l = openIsisSReadln( &buf )) && buf ) {
728                         char *t = memchr( buf, '\t', l );
729                         if ( DO_SWLOAD == what ) {
730                                 memcpy( key.byt, buf, key.len = (unsigned char)l );
731                         } else if ( t ) { /* tab delimited */
732                                 key.len = (unsigned char)(t - buf);
733                                 memcpy( key.byt, buf, key.len );
734                                 if ( 0 >= (l -= t-buf+1) || !(t = memchr( buf=t+1, '\t', l )) )
735                                         continue;
736                                 hit.mfn = (unsigned) openIsisA2i( buf, t-buf );
737                                 if ( 0 >= (l -= t-buf+1) || !(t = memchr( buf=t+1, '\t', l )) )
738                                         continue;
739                                 hit.tag = (unsigned short) openIsisA2i( buf, t-buf );
740                                 if ( 0 >= (l -= t-buf+1) || !(t = memchr( buf=t+1, '\t', l )) )
741                                         continue;
742                                 hit.occ = (unsigned short) openIsisA2i( buf, t-buf );
743                                 if ( 0 >= (l -= t-buf+1) )
744                                         continue;
745                                 hit.pos = (unsigned short) openIsisA2i( t+1, l );
746                         } else {
747                                 /* 10/30 key BLANK 7 mfn BLANK 5 tag BLANK 4 occ BLANK 4 pos*/
748                                 int eok = l - 24; /* pos of blank after key, 10 or 30 */
749                                 if ( 54 != l && 34 != l ) {
750                                         openIsisSMsg( OPENISIS_ERR_INVAL,
751                                                 "bad ifload input len %d, want 34 or54 bytes + newline\n", l );
752                                         break;
753                                 }
754                                 for ( i=eok-1; ' ' == buf[i] && i--; )
755                                         ;
756                                 key.len = (unsigned char) (++i);
757                                 memcpy( key.byt, buf, key.len );
758                                 log_msg( LOG_VERBOSE, "'%.*s'", 7, buf+eok+1 );
759                                 hit.mfn = (unsigned) openIsisA2i( buf+eok+1, 7 );
760                                 hit.tag = (unsigned short)openIsisA2i( buf+eok+9, 5 );
761                                 hit.occ = (unsigned short)openIsisA2i( buf+eok+15, 4 );
762                                 hit.pos = (unsigned short)openIsisA2i( buf+eok+20, 4 );
763                         }
764                         log_msg( LOG_VERBOSE, "'%.*s' %d %d %d %d",
765                                 key.len, key.byt, hit.mfn, hit.tag, hit.occ, hit.pos );
766                         if ( openIsisIdxAdd( idx, &key, &hit ) )
767                                 break;
768                         if ( !(++lines & 0x3ff) )
769                                 log_msg( LOG_INFO, "%dK lines", lines >> 10 );
770                 }
771                 openIsisIdxDone( idx );
772         }       break;
773         case DO_IFDUMP: {
774                 OpenIsisIdxLoop l;
775                 memset( &l, 0, sizeof(l) );
776                 l.flg = idxmode;
777                 switch ( ifm ) {
778                 case IFM_OLD:
779                         l.flg |= OPENISIS_IDXTRAD;
780                 case IFM_DUMP:
781                         l.cb = (OpenIsisIdxCb*)printlk2;
782                         break;
783                 case IFM_TAB:
784                         l.cb = (OpenIsisIdxCb*)printtab;
785                         break;
786                 case IFM_COPY:
787                         l.flg |= OPENISIS_IDXTRAD;
788                         l.me = openIsisIdxOpen( 0 <= wdb ? wdb : db, 0 );
789                         l.cb = (OpenIsisIdxCb*)openIsisIdxAdd;
790                         break;
791                 case IFM_CHK:
792                         /* nuttin */
793                         break;
794                 }
795                 if ( search )
796                         memcpy( l.key.byt, search,
797                                 l.key.len = (unsigned char)strlen( search ) );
798                 if ( idxto )
799                         memcpy( l.to.byt, idxto,
800                                 l.to.len = (unsigned char)strlen( idxto ) );
801                 openIsisIdxLoop( db, &l );
802                 if ( IFM_COPY == ifm )
803                         openIsisIdxDone( (OpenIsisIndex)l.me );
804         }       break;
805         case DO_FDT:
806                 odb = ldb_getdb( db );
807                 if ( odb && odb->fdt )
808                         print( openIsisFFdt2Rec( odb->fdt, 0, intarg ), 0, fmt );
809                 break;
810         case DO_VUTF: {
811                 char buf[1024];
812                 int t = 0, f = 0, g;
813                 while ( 0 < (g = lio_read( &lio_in, buf, sizeof(buf) )) ) {
814                         int l = openIsisValidUTF8( buf, g, &f );
815                         if ( l ) {
816                                 openIsisSMsg( OPENISIS_ERR_INVAL,
817                                         "at total %d = %d+%d\n", l-1+t, l-1, t );
818                                 ret = 1;
819                                 goto bye;
820                         }
821                         t += g;
822                 }
823         }
824         }       /* switch ( what ) */
825 bye:
826         if ( 0 <= db )
827                 openIsisClose( db );
828         if ( 0 <= wdb )
829                 openIsisClose( wdb );
830
831         /* at least with WINE,
832                 atexit cleanup is not performed
833                 unless we explicitly call exit :(
834         */
835         exit( ret );
836         return ret;
837 }       /* openisis main */