2 openisis - an open implementation of the CDS/ISIS database
3 Version 0.8.x (patchlevel see file Version)
4 Copyright (C) 2001-2003 by Erik Grziwotz, erik@openisis.org
6 This library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
11 This library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public
17 License along with this library; if not, write to the Free Software
18 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 see README for more information
24 $Id: ldb.c,v 1.95 2003/06/10 11:00:34 kripke Exp $
25 implementation of general db access functions.
30 #include <limits.h> /* PATH_MAX */
35 #if defined( __GNUC__ ) && defined ( alloca )
39 /* gcc defines always a cpu type - this we use for byteorder checking */
40 #if defined( sparc ) || defined( __ppc__ )
41 # define LDB_BIG_ENDIAN
42 /* TODO: figure out fastest "htonl" on those boxes that usually don't swap */
43 static int rvi ( int i ) {
45 ((char*)&r)[0] = ((char*)&i)[3];
46 ((char*)&r)[1] = ((char*)&i)[2];
47 ((char*)&r)[2] = ((char*)&i)[1];
48 ((char*)&r)[3] = ((char*)&i)[0];
51 static short rvs ( short i ) {
53 ((char*)&r)[0] = ((char*)&i)[1];
54 ((char*)&r)[1] = ((char*)&i)[0];
57 #define SWI( i ) i = rvi( i )
58 #define SWS( s ) s = rvs( s )
66 # define LDB_NEEDALIGN
68 #if defined( LDB_NEEDALIGN )
69 static unsigned GETINT ( const void *m )
75 static unsigned short GETSHORT ( const void *m )
82 #define GETINT( m ) (*(unsigned*)(m))
83 #define GETSHORT( m ) (*(unsigned short*)(m))
95 #define IsAbsPath(p) \
97 '/' == *(p) || '\\' == *(p) || ( \
99 '/' == (p)[2] || '\\' == (p)[2] \
102 #define IsAbsPath(p) \
107 #define LF 10 /* LineFeed a.k.a. newline - '\n' isn't really well defined */
108 #define TAB 9 /* horizontal, that is */
109 #define VT 11 /* vertical, used as newline replacement */
111 /* ************************************************************
115 /** extension of master file proper. */
116 static const char EXT_MST_MST[] = ".mst";
117 /** extension of master file xref. */
118 static const char EXT_MST_XRF[] = ".xrf";
126 static const char * const EXT_MST[MST_FILES] = {
131 /** extension of inverted file short term nodes. */
132 static const char EXT_INV_N01[] = ".n01";
133 /** extension of inverted file short term leaves. */
134 static const char EXT_INV_L01[] = ".l01";
135 /** extension of inverted file long term nodes. */
136 static const char EXT_INV_N02[] = ".n02";
137 /** extension of inverted file long term leaves. */
138 static const char EXT_INV_L02[] = ".l02";
139 /** extension of inverted file control records. */
140 static const char EXT_INV_CNT[] = ".cnt";
141 /** extension of inverted file postings. */
142 static const char EXT_INV_IFP[] = ".ifp";
154 static const char * const EXT_INV[INV_FILES] = {
163 /** extension of lbt B-Link-Tree.
164 It's named oxi because that is nicer than oix for OpenIsis indeX.
165 however, see http://www.oxicenter.com.br/
167 static const char EXT_LBT_OXI[] = ".oxi";
168 static const char * const EXT_LBT[] = {
173 /** plaintext master file
175 static const char EXT_TXT_TXT[] = ".txt";
176 static const char EXT_TXT_PTR[] = ".ptr";
177 static const char EXT_TXT_OPT[] = ".opt";
183 static const char * const EXT_TXT[] = {
188 static const char ISIX[] = "ISIX"; /* ptr magic */
191 /** extension of supporting file alpha character table. */
192 static const char EXT_SUP_ACT[] = ".act";
193 /** extension of supporting file uppercase table. */
194 static const char EXT_SUP_UCT[] = ".uct";
202 static const char * const EXT_SUP[SUP_FILES] = {
208 typedef int lblk[128];
215 int mst[MST_FILES]; /* master file */
216 int inv[INV_FILES]; /* primary inverted file */
217 int mfc[LMFC__FL]; /* master file control record */
218 unsigned mflen; /* master file length */
219 int xrf[129]; /* last used xrf block : THREAD THREAT */
220 int xrlen; /* length of xrf (in blocks) */
221 unsigned short ptr; /* type of pointer file (new style xrf) */
222 unsigned short ptrl; /* pointer bytes, 512 for old xrf */
223 char *mmap; /* memory map of xrf/ptr */
224 int mmlen; /* length of map (in ptrl) */
225 int cnt[LDB_INDEXES][LCNT__FL]; /* two cnt records */
226 short tlen[LDB_INDEXES]; /* max term length for each index */
227 LcsTab ctab[LCS__TABS];
239 #define DB_OPEN 0x010000
240 #define DB_INVOPEN 0x020000
241 #define DB_LBTOPEN 0x040000
242 #define DB_TXTOPEN 0x080000
243 #define DB_WRITABLE 0x100000
244 #define DB_MODIFIED 0x200000
246 #define DB_TXTMODE 0x20
248 #define DB_VARI 0xf /* mask for variant */
251 /* get xstr for record rec in set */
252 #define DB_XSTR( db, set, rec ) \
253 lstrlib[ set ].desc[ DB_VARI & (db)->flags ][ rec ]
254 /* get record names for record rec in set */
255 #define DB_RNAM( db, set, rec ) \
256 lstrlib[ set ].name[ rec ]
259 /** packed little endian masterfile control structure.
274 /** packed little endian masterfile record.
276 typedef struct Dict {
282 /** packed little endian masterfile record.
286 short recl; /* a.k.a. mfrl */
287 short bwbl; /* low part of int */
288 short bwbh; /* high part of int */
290 /* it is believed, that this first five fields up to here (12 bytes packed)
291 are to be in one 512-byte block; the manual mentiones even 14 bytes ... ???
301 /* ************************************************************
305 static LDb defdbspace[32];
306 /* array of open dbs. should expand dynamically. */
307 static LDb *dbs = defdbspace;
308 static int dbs_len = sizeof(defdbspace)/sizeof(defdbspace[0]);
313 /* ************************************************************
316 static LDb *getDb ( int id )
318 if ( 0 <= id && id < dbs_len && dbs[id].flags ) {
321 log_msg( LOG_ERROR, "attempt to access bad db id %d", id );
326 /* ************************************************************
330 /* additional flags in the LIO_FD range */
331 OPEN_TRY = 1, /* try writable, open readonly else */
332 OPEN_UC = 2, /* use uppercase ext */
333 /* commonly used combinations */
334 /* 1) open as is, do not complain about any failure, do not create */
335 OPEN_ASIS = LIO_SEEK|LIO_RDWR|OPEN_TRY,
336 /* 2) open readonly, do not complain about any failure, do not create */
337 OPEN_RDIF = LIO_SEEK|LIO_RD|LIO_TRY,
338 /* 3) open readonly, complain about any failure */
339 OPEN_RD = LIO_SEEK|LIO_RD,
340 /* 4) open or create writable, complain on failure */
341 OPEN_NEW = LIO_SEEK|LIO_RDWR|LIO_CREAT,
342 OPEN_BLANK = LIO_SEEK|LIO_RDWR|LIO_CREAT|LIO_TRUNC
345 /* figure out wether to use uppercase extension on path.
346 if last path component (everything after the last / and \)
347 does contain an uppercase ascii and does not contain a lowercase ascii,
348 return OPEN_UC, else 0.
350 static int autocase ( const char *path )
353 const char *e = path + strlen( path );
355 if ( 'A'<=*e && *e<= 'Z' )
357 else if ( 'a'<=*e && *e<= 'z' )
359 else if ( '/'==*e || '\\' == *e )
364 /* set extension. fname MUST already end with .xxx.
365 if how has OPEN_UC set, use uppercase extension
367 static char *setext ( char *fname, const char *ext, int how )
369 int l = strlen( fname ) - 4;
370 memcpy( fname+l, ext, 4 );
371 if ( OPEN_UC & how ) {
373 for ( ;*p; p++ ) /* use uppercase extensions */
374 if ( 'a' <= *p && *p <= 'z' )
381 try to open all files according to how.
382 ldb is only interested in seekable readable true files.
384 1 if all files could be opened writable
385 0 if all files could be opened readonly,
386 and that was requested by a RD mode or try write
387 something negative else
389 static int openfiles ( int *fid, char *path,
390 const char * const *ext, int nfiles, int how )
393 int wr = LIO_WR&how ? 1 : 0, mode = LIO_WANT & how;
395 for ( i=0; i<nfiles; i++ ) {
396 setext( path, ext[i], how );
397 fid[i] = lio_open( path, mode & LIO_WANT );
398 log_msg( LOG_INFO, "opening file '%s' %c 0x%x",
399 path, wr ? 'w' : 'r', fid[i] );
400 if ( 0 < fid[i] ) { /* ok */
401 mode &= ~LIO_FLOCK; /* lock only leading file */
405 while ( i-- ) /* close others */
406 lio_close( &fid[i], LIO_INOUT );
407 if ( OPEN_TRY & how )
408 return openfiles( fid, path, ext, nfiles,
409 (how & ~(OPEN_TRY|LIO_WR)) | LIO_TRY );
410 return LIO_TRY&how ? -ERR_BADF /* silent */
411 : log_msg( LOG_SYSERR, "could not open file '%s' for %sing",
412 path, wr ? "writ" : "read" );
414 return wr; /* good */
418 static int closefiles ( int *fid, int nfiles )
421 for ( i=0; i<nfiles; i++ )
422 if ( 0 < fid[i] && LIO_INOUT & fid[i] )
423 lio_close( &fid[i], LIO_INOUT );
428 static int readblk ( void *dst, int siz, int fid, int where )
431 got = lio_pread( &fid, dst, siz, where );
435 if ( LOG_DO( LOG_ALL ) )
438 if ( siz == (int)got )
440 log_msg( LOG_WARN, "got %u bytes wanted %d at %d in 0x%x",
441 got, siz, where, fid );
446 /* ************************************************************
450 static int *nrec ( int *xstr )
452 int *dst = (int*)mAlloc( xstr[LSTR_ILEN] );
460 typedef struct b8 { char x[8]; } b8;
461 typedef struct b4 { char x[4]; } b4;
462 typedef struct b2 { char x[2]; } b2;
465 static int convert ( int *dst, char *src, int *xstr )
467 static int pow2[] = { 1, 2, 4, 8 };
469 int *xmbrs = xstr+LSTR_XMBR;
470 int nmbrs = LSTRFIX(*xstr);
472 char *buf, *part = src, *srcend = src + xstr[LSTR_XLEN];
475 return log_msg( ERR_NOMEM, "out of memory (no dst) in convert" );
476 if ( LSTRLEN(*xstr) > xstr[LSTR_ILEN] )
477 return log_msg( ERR_INVAL, "bad ilen %d need %d",
478 xstr[LSTR_ILEN], (int)LSTRLEN(*xstr) );
480 return log_msg( ERR_INVAL, "bad *dst 0x%08x need 0x%08x", *dst, *xstr );
481 /* clean and re-init */
482 memset( dst, 0, xstr[LSTR_ILEN] );
485 buf = ((char*)dst) + LSTRLEN(*dst);
487 /* cvt the fixed part (occ==-1) and each occurrence of repeated part. */
488 for ( ;/* occ < LSTROCC(*dst) */; ) { /* cvt one part */
490 for ( i=0; i<nmbrs; i++, mbr++ ) { /* assign one xmbr */
492 char *s = part + LONG2OFF(xmbr);
493 int sbytes = srcend - s;
504 if ( LMBRISNUM( xmbr ) ) {
506 #ifdef LDB_BIG_ENDIAN
507 # define NEEDSWAP(xmbr) ( ! ( LMBR_FHIE & (xmbr) ) )
509 # define NEEDSWAP(xmbr) (LMBR_FHIE & (xmbr))
511 int ld = LMBRLD(xmbr);
512 #ifndef LDB_NEEDALIGN
513 if ( ! NEEDSWAP( xmbr ) ) {
514 /* much faster this way ... */
515 /* TODO: len and bit checks; actually not needed yet ... */
517 case 3: *mbr = *(lll*)s; break;
518 case 2: *mbr = *(int*)s; break;
519 case 1: *mbr = *(short*)s; break;
520 case 0: *mbr = *s; break;
525 len = pow2[ LMBRLD(xmbr) ];
527 return log_msg( ERR_INVAL,
528 "srcbuf too short %d have %d need %d occ %d mbr %d",
529 xstr[LSTR_XLEN], sbytes, len, occ, i );
530 if ( !NEEDSWAP( xmbr ) )
531 /* for ( j = len; j--; ) num.buf[j] = s[j]; */
533 case 3: num.x8 = *(b8*)s; break;
534 case 2: num.x4 = *(b4*)s; break;
535 case 1: num.x2 = *(b2*)s; break;
536 case 0: num.buf[0] = *s; break;
538 else /* swap bytes */
539 for ( j = len; j--; )
540 num.buf[j] = s[len - 1 - j];
542 case 8: *mbr = num.ll; break;
543 /* TODO: defines for 16 and 64 bit compilers */
544 case 4: *mbr = num.i; break;
545 case 2: *mbr = num.s; break;
546 case 1: *mbr = num.buf[0]; break;
548 if ( LMBRISBITS( xmbr ) ) { /* apply bit shift and mask */
549 *mbr >>= LMBRBITOFF( xmbr );
550 *mbr &= ~(-1L << LMBRBITLEN( xmbr ));
554 /* else raw data -- that's easy :) */
556 int offset = buf - (char*)dst;
558 len = LONG2LEN(xmbr);
560 if ( need > xstr[LSTR_ILEN] )
561 return log_msg( ERR_INVAL,
562 "bad buflen %d need %d+%d in occ %d mbr %d",
563 xstr[LSTR_ILEN], offset, len, occ, i );
565 return log_msg( ERR_INVAL,
566 "srcbuf too short %d have %d need %d occ %d mbr %d",
567 xstr[LSTR_XLEN], sbytes, len, occ, i );
568 memcpy( buf, s, len );
569 *mbr = buf - (char*)dst;
574 if ( ++occ >= LSTROCC(*dst) )
577 part += (short)xstr[LSTR_XRLO]; /* adv. rep. part len */
578 else { /* was the fixed part, setup for repeated */
579 nmbrs = LSTRREP(*xstr);
580 part += (short)(xstr[LSTR_XRLO]>>16); /* adv. rep. part off */
588 static int readrec ( int *dst, int fid, int where, int *xstr )
592 alloca( xstr[LSTR_XLEN] )
594 mAlloc( xstr[LSTR_XLEN] )
603 where = -where * xstr[LSTR_XLEN];
604 got = readblk( buf, xstr[LSTR_XLEN], fid, where );
605 ret = got ? got : convert( dst, buf, xstr );
615 static int readlog ( int *dst, int fid, int where, LDb *db, int set, int rec )
617 int ret = readrec( dst, fid, where, DB_XSTR( db, set, rec ) );
618 if ( !ret && LOG_DO( LOG_TRACE ) )
619 LOG_STR( dst, lstrlib[ set ].name[ rec ] );
625 /* ************************************************************
626 access functions for the record types
629 static int getOff ( LDb *db, int rowid, int xr )
631 int rowix = rowid - 1; /* mfns count from 1 */
632 int xrf_block = rowix / 127;
633 int blkix = xrf_block + 1; /* ... so do xrf blocks */
635 if ( xrf_block < db->mmlen ) {
637 ((lblk*)db->mmap)[xrf_block][1+(int)(rowix % 127)] = rvi( xr );
639 xr = rvi( ((lblk*)db->mmap)[xrf_block][1+(int)(rowix % 127)] );
642 /* if ( LIO_LOCK() ) return -3; */
643 if ( xr ) { /* write */
644 if ( blkix <= db->xrlen ) {
646 if ( 4 != lio_pwrite( &db->mst[MST_XRF], &xr, 4,
647 xrf_block*512 + 4*(1 + (rowix % 127))) )
649 } else { /* extent */
651 memset( extend, 0, sizeof(extend) );
652 while ( db->xrlen < blkix ) { /* extend */
653 extend[0] = db->xrlen + 1; /* set blk id */
654 if ( blkix == extend[0] )
655 extend[1+(int)(rowix % 127)] = rvi( xr );
657 if ( 512 != lio_pwrite( &db->mst[MST_XRF], extend, 512,
663 if ( blkix == db->xrf[LXRF_XPOS] )
664 db->xrf[ LXRF_XREC + (int)(rowix % 127) ] = xr;
665 /* and go on read it back, just to check ... */
667 if ( blkix != db->xrf[LXRF_XPOS] ) {
669 LOG_DBG( LOG_VERBOSE, "fetching xrf block %d had %d",
670 blkix, ! db->xrf ? -1 : db->xrf[LXRF_XPOS] );
671 ret = readlog( db->xrf, db->mst[MST_XRF],
672 -xrf_block, db, LSET_MST, LSTR_XRF );
674 log_msg( LOG_ERROR, "\twhen fetching xrf block %d", blkix );
676 } else if ( blkix == -db->xrf[LXRF_XPOS] ) {
677 LOG_DBG( LOG_DEBUG, "hmmm ... negative" );
678 db->xrf[LXRF_XPOS] = blkix;
679 } else if ( blkix != db->xrf[LXRF_XPOS] ) {
680 log_msg( LOG_WARN, "bad xrf %d wanted %d",
681 db->xrf[LXRF_XPOS], blkix );
685 xr = db->xrf[ LXRF_XREC + (int)(rowix % 127) ];
691 21 bits (<<11) signed for the (512 byte) block ("xrmfb")
692 1 for the first block (offset 0)
693 0 means, never had such a record
694 -1 and xrmfp=0: record removed from MST
695 (there is no record at pos 0 in 1st block,
696 since there resides the MST header)
697 other negative value -x or pos!=0:
698 record logically deleted, was at +x
699 1 bit (1<<10): this record is new and not yet inverted
700 1 bit (1<<9): this record is changed and not yet re-inverted
701 9 bits for the block-relative position ("xrmfp")
703 off = (((xr & 0xfffff800) >> 2) - 0x200) | (0x1ff & xr);
704 if ( 0 < (xr & ~0x600) ) {
706 "offset for rowid %d is %d (blk %d pos %d) flg 0x%08x at %d[%d]",
707 rowid, off, (xr>>11)&0xfffff, xr&0x1ff, xr&0x80000600, blkix, rowix%127 );
712 "offset for rowid %d is %d (blk %d pos %d) flg 0x%08x at %d[%d]",
713 rowid, off, (xr>>11)&0xfffff, xr&0x1ff, xr&0x80000600, blkix, rowix%127 );
718 static int* getMfr ( LDb *db, int off, int *nxtoff )
721 int xstr[LSTR_LONGS(7+3)];
722 } my = *(struct mfcxstr *)DB_XSTR( db, LSET_MST, LSTR_MFR );
731 LOG_DBG( LOG_VERBOSE, "getting MFR at off %d", off );
732 if ( 498 < off % 512 )
733 log_msg( LOG_WARN, "blk pos > 498 in offset 0x%08x", off );
735 if ( readrec( head, db->mst[MST_MST], off, my.xstr ) ) {
736 log_msg( LOG_ERROR, "\twhen reading MFR head at %d", off );
739 /* log_str( LOG_VERBOSE, head, lstrlib[LSET_MST].name[LSTR_MFR] ); */
740 len = head[LMFR_RECL];
741 LOG_DBG( LOG_VERBOSE, "got MFR %d reclen %d", head[LMFR_MFN], len );
743 *nxtoff = off + (0 < len ? len : -len); /* an odditiy */
744 if ( 1 & *nxtoff ) /* an odditiy */
745 (*nxtoff)++; /* round up to even */
746 if ( 498 < *nxtoff % 512 ) {
750 if ( *nxtoff > db->mfc[LMFC_NMFB]*512 + db->mfc[LMFC_NMFP] ) {
751 LOG_DBG( LOG_VERBOSE, "at end of db: %d > %d*512+%hd",
752 *nxtoff, db->mfc[LMFC_NMFB], db->mfc[LMFC_NMFP] );
757 log_msg( LOG_INFO, "found deleted rec len %hd at offset %d", len, off );
761 /* check external base length */
762 base = LONG2OFF(my.xstr[LSTR_XRLO])
763 + head[LMFR_NVF]*LONG2LEN(my.xstr[LSTR_XRLO]);
764 if ( 0 > head[LMFR_BASE] || 0 > head[LMFR_NVF]
765 || 0x8fff < head[LMFR_NVF]
766 || len < head[LMFR_BASE] || len < base
767 || (head[LMFR_NVF] && head[LMFR_BASE] < base)
770 "bad len %d base %d nvf %d need base %d at offset %d",
771 len, head[LMFR_BASE], head[LMFR_NVF], base, off );
772 /* check alignment problem */
773 base = LONG2OFF(my.xstr[LSTR_XRLO])
774 + head[LMFR_STAT]*LONG2LEN(my.xstr[LSTR_XRLO]);
775 if ( 0 > head[LMFR_NVF] || 0 > head[LMFR_STAT]
776 || 0x8fff < head[LMFR_STAT]
777 || len < head[LMFR_NVF] || len < base
778 || (head[LMFR_STAT] && head[LMFR_NVF] < base)
782 log_msg( LOG_ERROR, "probably alignment problem, try -format aligned" );
786 /* set up external structure for this rec */
787 my.xstr[LSTR_SIZE] |= head[LMFR_NVF] << 16; /* occ of rep. part */
788 my.xstr[LSTR_XLEN] = len;
789 /* internal base length */
790 base = LSTRLEN( *my.xstr );
791 /* internal len adjusted for the slightly longer base */
792 my.xstr[LSTR_ILEN] = len + base - head[LMFR_BASE]; /* the buffer */
794 rec = nrec( my.xstr );
796 log_msg( LOG_SYSERR, "could not alloc MFR of len %hd", my.xstr[LSTR_ILEN] );
802 /* first try faster alloca, but stack may be too limited for large records */
810 log_msg( LOG_SYSERR, "could not alloc MFR of len %hd", len );
813 if ( readblk( buf, len, db->mst[MST_MST], off ) ) {
814 log_msg( LOG_ERROR, "\twhen reading MFR" );
817 #ifndef LDB_BIG_ENDIAN
818 if ( LVAR_PAC != (DB_VARI & db->flags) ) {
820 if ( convert( rec, buf, my.xstr ) ) {
821 log_msg( LOG_ERROR, "\twhen converting MFR" );
824 #ifndef LDB_BIG_ENDIAN
825 } else { /* 10% faster */
826 Mfr *mfr = (Mfr*)buf;
827 short *s = &mfr->dict->tag;
828 int *f = rec + LMFR__FL;
829 int *e = f + 3*head[LMFR_NVF];
830 rec[LMFR_MFN] = mfr->mfn;
831 rec[LMFR_RECL] = mfr->recl;
833 rec[LMFR_BWB] = mfr->bwbh<<16 | mfr->bwbl;
834 rec[LMFR_BWP] = mfr->bwp;
836 rec[LMFR_BASE] = mfr->base;
837 rec[LMFR_NVF] = mfr->nvf;
838 rec[LMFR_STAT] = mfr->stat;
847 if ( rec[LMFR_STAT] ) {
848 log_msg( LOG_WARN, "found status %hd", rec[LMFR_STAT] );
852 /* do a consistency check */
853 if ( rec[LMFR_NVF] < 0 || rec[LMFR_BASE] < 0 ) {
854 log_msg( LOG_ERROR, "found neg. field nvf %hd base %hd",
855 rec[LMFR_NVF], rec[LMFR_BASE] );
859 /* now care for the field values */
861 char *valsrc = buf+rec[LMFR_BASE];
862 char *recsta = ((char*)rec);
863 char *valdst = recsta + base;
864 int xbufl = rec[LMFR_RECL] - rec[LMFR_BASE];
867 for ( i=0; i < rec[LMFR_NVF]; i++ ) {
868 int *d = &rec[LMFR__FL + i*LMFR__RL];
869 if ( d[LMFR_POS] < 0 || d[LMFR_LEN] < 0 ) {
871 "bad field %d at off %d: negativ pos %hd or len %hd",
872 i, off, d[LMFR_POS], d[LMFR_LEN] );
875 if ( d[LMFR_POS] + d[LMFR_LEN] > xbufl ) {
877 "bad field %d at off %d: pos %hd + len %hd > buf %d",
878 i, off, d[LMFR_POS], d[LMFR_LEN], xbufl );
881 sumlens += d[LMFR_LEN];
882 if ( sumlens > xbufl ) {
884 "bad fields at off %d: sum of lengths %d > buf %d",
885 off, sumlens, xbufl );
888 memcpy( valdst, valsrc+d[LMFR_POS], d[LMFR_LEN] );
889 d[LMFR_POS] = valdst - recsta;
890 valdst += d[LMFR_LEN];
892 } /* consistency check */
893 rec[LMFR_BWB] = /* "used" bytes */
894 rec[LMFR_RECL] = my.xstr[LSTR_ILEN];
895 rec[LMFR_BWP] = rec[LMFR_NVF]; /* avail fields = used fields */
896 rec[LMFR_BASE] = base;
898 if ( LOG_TRACE <= log_lev )
899 LOG_STR( rec, lstrlib[LSET_MST].name[LSTR_MFR] );
918 *rec = db->head.dbid;
923 /** write the record.
924 If it doesn't yet have a mfn, assign one.
925 NOTE: on a BIG_ENDIAN, anything but the mfn and recl will be frobbed
928 static int putMfr ( LDb *db, Mfr *mfr )
933 if ( !(db->flags & DB_WRITABLE) ) {
934 log_msg( LOG_ERROR, "db is not writable" );
937 /* minimalist sanity check */
939 || mfr->base != 18 + 6*mfr->nvf
940 || mfr->base > mfr->recl
942 log_msg( LOG_ERROR, "bad nvf/base/recl %d/%d/%d ",
943 mfr->nvf, mfr->base, mfr->recl );
946 db->flags |= DB_MODIFIED;
949 if ( db->mfc[LMFC_NMFN] > mfr->mfn )
950 oldpos = getOff( db, mfr->mfn, 0 );
952 db->mfc[LMFC_NMFN] = mfr->mfn+1;
955 mfr->bwp = 511 & oldpos;
956 block = 1 + (oldpos >> 9); /* blockno counting from 1 */
957 mfr->bwbl = 0xffff & block;
958 mfr->bwbh = block >> 16;
959 if ( db->mfc[LMFC_NMFN] <= mfr->mfn )
960 db->mfc[LMFC_NMFN] = mfr->mfn+1;
962 mfr->mfn = db->mfc[LMFC_NMFN]++; /* assign new mfn */
964 mfr->bwbl = mfr->bwbh = mfr->bwp = 0;
968 if ( 498 < (newpos & 511) ) /* round up to next block boundary */
969 newpos = ~511 & (newpos + 14);
972 db->mflen = newpos + mfr->recl;
973 #ifdef LDB_BIG_ENDIAN
974 { /* swap swap swap */
976 short nvf = mfr->nvf;
977 SWI( mfr->mfn ); SWS( mfr->recl ); SWS( mfr->bwbl ); SWS( mfr->bwbh );
978 SWS( mfr->bwp ); SWS( mfr->base ); SWS( mfr->nvf ); SWS( mfr->stat );
979 for ( ; nvf--; d++ ) {
986 ret = lio_pwrite( &db->mst[MST_MST], (char*)mfr, rvs(mfr->recl), newpos );
987 #ifdef LDB_BIG_ENDIAN
988 /* restore mnf, recl */
992 if ( ret != mfr->recl )
993 return log_msg( ERR_TRASH, "could not write Mfr %d bytes got %d",
995 getOff( db, mfr->mfn, (1 << (oldpos ? 9 : 10))
996 | (((newpos & 0xfffffe00) + 0x200) << 2) | (0x1ff & newpos) );
1002 static int putRec ( LDb *db, Rec *rec )
1012 const char *rbase = ((char *)rec) + rec->base;
1014 Field *f = rec->field;
1015 /* TODO: if not rec->len, delete ? */
1016 for ( i = rec->len; i--; f++ ) {
1020 LOG_OTO( cleanup, ( ERR_FAULT, "bad rec NULL val" ) );
1021 contig = contig && (f->val == rbase + buflen);
1024 reclen = 18 + 6*rec->len + buflen;
1037 ( ERR_NOMEM, "could not alloc MFR of len %hd", reclen ) );
1038 mfr->mfn = rec->rowid;
1040 mfr->bwbl = mfr->bwbh = mfr->bwp = 0;
1041 mfr->base = 18 + 6*rec->len;
1042 mfr->nvf = rec->len;
1047 for ( i = rec->len; i--; d++, f++ ) {
1050 buflen += (d->len = f->len);
1053 memcpy( ((char*)mfr)+mfr->base, rbase, buflen );
1055 char *mbase = ((char*)mfr)+mfr->base;
1058 for ( i = rec->len; i--; d++, f++ )
1060 memcpy( mbase + d->pos, f->val, d->len );
1062 ret = putMfr( db, mfr );
1063 if ( !ret && !rec->rowid )
1064 rec->rowid = mfr->mfn;
1078 /* ************************************************************
1079 access functions for plaintext db
1083 create a pointer from the least significant bytes of pos, len, fld
1084 buf must have db->ptrl bytes (up to 16 = 8+4+4)
1085 and the most strict alignment (i.e. 4 or 8) possible for db->ptrl
1088 static char *mkptr ( char *buf, LDb *db,
1089 unsigned pos, unsigned len, unsigned fld )
1091 switch ( db->ptr ) {
1092 case 0x0134: /* '4' */
1093 ((unsigned*)buf)[0] = pos;
1094 if ( ~0xff & fld ) fld = 0;
1095 #ifdef LDB_BIG_ENDIAN /* the first = high order 3 bytes are len */
1096 ((unsigned*)buf)[1] = (0xff&fld) | len<<8;
1097 #else /* the first = low order 3 bytes are len */
1098 ((unsigned*)buf)[1] = (0xffffff&len) | fld<<24;
1101 case 0x0044: /* 'D' */
1102 ((unsigned*)buf)[0] = pos;
1103 ((unsigned*)buf)[1] = len;
1105 case 0x0035: /* '5' */
1106 #ifdef LDB_BIG_ENDIAN /* the first = high order 5 bytes are pos */
1107 *(lll*)buf = (0xffffff&len) | ((lll)pos)<<24;
1108 #else /* the first = low order 5 bytes are pos */
1109 *(lll*)buf = pos | ((lll)len)<<40;
1121 read pointer, return len
1122 if 0x0f00 & db->ptr, fld must not be 0, else *fld is untouched
1124 static unsigned rdptr ( unsigned *pos, unsigned *fld, LDb *db, char *buf )
1126 switch ( db->ptr ) {
1128 *pos = *(unsigned*)buf;
1129 *fld = ((unsigned char *)buf)[7];
1130 #ifdef LDB_BIG_ENDIAN
1131 return ((unsigned*)buf)[1] >> 8;
1133 return 0xffffff & ((unsigned*)buf)[1];
1136 *pos = *(unsigned*)buf;
1137 return ((unsigned*)buf)[1];
1139 #ifdef LDB_BIG_ENDIAN
1140 /* *pos = (*(unsigned*)(buf+1)); would bus error on sparc */
1141 *pos = (unsigned) (*(lll*)buf >> 24);
1142 return 0xffffff & (unsigned)*(lll*)buf; /* last 3 bytes */
1143 #else /* guess there is no little endian that needs alignment ? */
1144 *pos = *(unsigned*)buf; /* use low order = first 4 of first 5 bytes */
1145 return 0xffffff & (*(unsigned*)(buf+5));
1155 static void setPtr ( LDb *db, int mfn,
1156 unsigned pos, unsigned len, unsigned fld )
1159 if ( mfn < db->mmlen ) {
1160 mkptr( db->mmap + mfn*db->ptrl, db, pos, len, fld );
1163 lio_pwrite( &db->mst[MST_XRF],
1164 mkptr( pt.r, db, pos, len, fld), db->ptrl, mfn*db->ptrl );
1168 static unsigned getPtr ( unsigned *pos, unsigned *fld, LDb *db, int mfn )
1171 return mfn < db->mmlen
1172 ? rdptr( pos, fld, db, db->mmap + mfn*db->ptrl )
1173 : db->ptrl == lio_pread( &db->mst[MST_XRF],
1174 pt.r, db->ptrl, mfn*db->ptrl )
1175 ? rdptr( pos, fld, db, pt.r )
1181 static int putPlain ( LDb *db, Rec *rec )
1189 the original text is read contigously at base.
1190 the record is then cooked as requested:
1191 0 well done: do full fixup,
1192 apply conversions and create fields.
1193 1 english: do not create fields (rec->fields is 0), apply no conversions,
1194 but set rec->len to actual number of fields (counting if necessary).
1196 set len only if it's known from the pointer
1198 static Rec *dText ( LDb *db, int mfn, int raw )
1200 unsigned base, sz, pos, len, fld; /* #fields actually used */
1201 unsigned n = 0; /* known #fields */
1206 len = getPtr( &pos, &n, db, mfn );
1207 LOG_DBG( LOG_TRACE, "dText %d pos %d len %d fld %d", mfn, pos, len, n );
1212 else if ( !(fld = n) ) {
1213 fld = len / 36; /* assume one (costing 12 bytes) per 36 bytes data */
1214 if ( fld < 8 ) /* small record is likely to have some short fields */
1219 r = (Rec*)mAlloc( sz );
1220 p = ((char*)r) + base;
1221 if ( (int)len != lio_pread( &db->mst[MST_MST], p, len, pos ) ) {
1225 LOG_DBG( LOG_TRACE, "'%.*s'", len, p );
1226 r->dbid = db->head.dbid;
1228 r->used = r->bytes = sz;
1232 if ( raw && (n || 1 != raw) )
1235 f = r->field; /* next field to assign */
1236 fe = f + fld; /* end of assignable fields */
1238 loop through buffer lines to count a/o assign
1240 while f < fe, also fix and assign them
1242 for ( n=0;;) { /* possibly 2 passes needed */
1243 for ( ;p < e; p = q+1 ) {
1244 if ( !(q = memchr( p, LF, e-p )) )
1246 if ( TAB != *p || !n ) {
1248 p += a2il( p, q-p, &f->tag );
1249 if ( p < q && TAB == *p )
1251 f->len = q - (f->val = p);
1252 f++; /* f == r->field+n, as long as we don't hit fe */
1257 /* continuation line */
1258 if ( f != r->field+n )
1260 /* we ARE assigning & didn't loose sync at fe */
1261 /* append to previous */ {
1262 char *dest = (char*)f[-1].val + f[-1].len;
1263 int dist = p - dest, l = q-p;
1265 memmove( dest, p, l );
1266 memset( q-dist, ' ', dist ); /* cleanup */
1270 /* now n != 0, since initially p < e, since len != 0 */
1271 if ( r->len && r->len != (int)n ) {
1272 log_msg( LOG_WARN, "rec %d len %d != ptr %d", mfn, n, r->len );
1275 if ( raw || (int)n <= r->fields ) /* all counted/assigned */
1277 /* extend the record to n fields */
1278 log_msg( LOG_INFO, "extending rec %d %d -> %d fields", mfn, fld, n );
1282 x = (Rec*)mAlloc( sz );
1283 x->dbid = db->head.dbid;
1285 x->used = x->bytes = sz;
1289 p = ((char*)x) + base;
1291 q = ((char*)r) + r->base;
1292 memcpy( p, q, len );
1293 memcpy( x->field, r->field, r->fields*sizeof(Field) );
1294 for ( f=x->field, n=r->fields; n--; )
1300 fe = r->field + fld;
1301 p = (char*)f[-1].val + f[-1].len;
1302 /* seek behind the LF that delimited the last field */
1307 if ( !raw && (DB_TXTMODE & db->flags) )
1308 for ( f = r->field, fe = r->field + r->fields; f < fe; f++ )
1309 for ( p = (char*)f->val, e = p+f->len; (p = memchr(p,VT,e-p)); )
1315 static int pText ( LDb *db, Rec *r, const char *mark )
1317 char buf[128 + 65536];
1318 unsigned pos = 0, len = 0, fld, off;
1322 if ( !(db->flags & DB_WRITABLE) ) {
1323 log_msg( LOG_ERROR, "db is not writable" );
1327 r->rowid = db->mfc[LMFC_NMFN]++; /* assign new mfn */
1328 else if ( db->mfc[LMFC_NMFN] <= r->rowid )
1329 db->mfc[LMFC_NMFN] = r->rowid + 1;
1332 len = getPtr( &pos, &fld, db, r->rowid );
1334 p = b = 32768 >= r->used ? buf : mAlloc(128+2*r->used);
1337 p += u2a( p, r->rowid );
1350 int l = strlen(mark);
1352 log_msg( LOG_WARN, "mark '%.48s'%s has length %d",
1353 mark, l<48 ? "" : "...", l );
1356 memcpy( p, mark, l );
1364 p += len = rSerB( p, r );
1365 if ( len > 1 ) /* don't count 2 trailing LFs */
1367 db->flags |= DB_MODIFIED;
1370 ret = lio_pwrite( &db->mst[MST_MST], b, p - b, pos );
1371 if ( ret == p - b ) {
1372 setPtr( db, r->rowid, pos+off, len, r->len );
1381 /* ************************************************************
1385 static int search ( LDb *db, const char *key, LdbPost *post,
1386 Rec *rec, DXLoop *lp )
1388 int i, j, prefix, idx, ret, ock;
1393 struct { /* terms cursor */
1394 char key[LDB_MAX_KEYLEN+1]; /* key or key prefix */
1395 short klen; /* key length to compare */
1396 char imin; /* minimum index to search */
1397 char imax; /* maximum index to search */
1398 int leaf[LDB_INDEXES][LDB_TERMBUF]; /* one leaf buffer per index */
1399 short lpos[LDB_INDEXES]; /* next position in leaf, -1 if done */
1401 short klen; /* length for initial locate */
1402 int block[128]; /* buffer to read one block */
1408 /* prepare cursor struct */
1409 memset( &crs, 0, sizeof(crs) ); /* tabula rasa */
1410 crs.klen = strlen( key );
1411 /* check for prefix match */
1413 prefix = LDB_PFX & post->mode;
1414 else if ( (prefix = crs.klen && '$' == key[crs.klen - 1]) )
1416 /* check out minimum index to search */
1417 for ( crs.imin=0; crs.klen > db->tlen[(int)crs.imin]; )
1418 if ( LDB_INDEXES == ++(crs.imin) )
1419 return log_msg( ERR_INVAL, "bad keylen %d key '%.64s'", crs.klen, key );
1421 memset( crs.key, ' ', sizeof(crs.key)-1 );
1423 unsigned char *uc = (unsigned char*)crs.key;
1424 unsigned char *uk = (unsigned char*)key;
1425 for ( i=crs.klen; i--; )
1426 uc[i] = db->ctab[LCS_UCASE].c[ uk[i] ];
1429 crs.imax = LDB_INDEXES-1;
1431 crs.imax = crs.imin;
1432 crs.klen = db->tlen[(int)crs.imin];
1434 log_msg( LOG_INFO, "search for '%.*s'%c", crs.klen, crs.key, prefix?'$':' ' );
1437 if ( rec && rec->len ) {
1438 /* use last key from record to locate starting position */
1439 key = rec->field[rec->len-1].val;
1440 klen = rec->field[rec->len-1].len;
1444 for ( i=crs.imin; i<=crs.imax; i++ ) { /* find leaf positions */
1445 int nFile = INV_N01 + 2*i; /* node file index */
1446 int nStr = LSTR_N01 + 2*i; /* node struct index */
1447 int *nstr = DB_XSTR( db, LSET_INV, nStr );
1449 short cmplen = klen <= db->tlen[i] ? klen : db->tlen[i];
1450 pos = db->cnt[i][LCNT_POSR]; /* pos of root record */
1452 for ( lvl = 0; 0<pos; lvl++ ) { /* traverse node levels */
1454 LOG_DBG( LOG_DEBUG, "node %d at %d lvl %d", pos, j, lvl );
1455 assert( (int)sizeof(node) >= nstr[LSTR_ILEN] );
1457 ret = readlog( node, db->inv[nFile], 1-pos, db, LSET_INV, nStr );
1458 if ( pos != node[LN0X_POS] /* wrong address */
1459 || i+1 != node[LN0X_TYPE] /* wrong type */
1460 || node[LN0X_OCK] < 1 /* no keys */
1461 || 2*db->cnt[i][LCNT_ORDN] < node[LN0X_OCK] /* too many keys */
1463 return log_msg( ERR_TRASH, "bad node pos %d type %d keys %d",
1464 node[LN0X_POS], node[LN0X_TYPE], node[LN0X_OCK]
1466 ock = node[LN0X_OCK];
1468 j<ock && 0 < (ret = memcmp( key,
1469 ((char*)node)+node[j*LN0X__RL+LN0X__FL+LN0X_KEY], cmplen ));
1472 /* now j is at end or on next index not less */
1473 if ( j==ock /* end */
1474 || ret /* index is greater than key */
1475 || prefix /* backtrack even on exact match */
1477 j--; /* step into last ock with lower key */
1478 pos = node[LN0X__FL + j*LN0X__RL + LN0X_REF];
1480 /* got some negative ref to leaf; set leaf pos */
1481 crs.leaf[i][LL0X_PS] = -pos;
1483 since the lpos and LL0X_OCK are both 0 by the memset above,
1484 we will initially load the leaves
1487 /* done preparing cursor */
1489 if ( post ) /* prepare for postings */
1491 xstr = DB_XSTR( db, LSET_INV, LSTR_IFP );
1493 for (;;) { /* loop terms in prefix mode */
1494 /* vars for postings: */
1495 int infb, infp; /* block and pos where to read postings */
1496 int added; /* postings added or marked per term */
1497 int blkno; /* postings block number */
1498 int remain = 0; /* postings to fetch from next block of segment */
1499 int ifp[LIFP__FL]; /* postings header */
1501 idx = -1; /* index to use */
1502 /* compare index terms, load leafes if needed */
1503 for ( i = crs.imin; i <= crs.imax; i++ ) {
1504 short cmplen = klen <= db->tlen[i] ? klen : db->tlen[i];
1506 if ( leaf[LL0X_OCK] <= crs.lpos[i] ) { /* load */
1507 int lFile = INV_L01 + 2*i; /* leaf file index */
1508 int lStr = LSTR_L01 + 2*i; /* leaf struct index */
1509 int *lstr = DB_XSTR( db, LSET_INV, lStr );
1513 if ( ! (pos = leaf[LL0X_PS]) )
1515 LOG_DBG( LOG_DEBUG, "leaf %d", pos );
1516 assert( (int)sizeof(crs.leaf[i]) >= lstr[LSTR_ILEN] );
1518 ret = readlog( leaf, db->inv[lFile], 1-pos, db, LSET_INV, lStr );
1519 if ( pos != leaf[LL0X_POS] /* wrong address */
1520 || i+1 != leaf[LL0X_TYPE] /* wrong type */
1521 || leaf[LL0X_OCK] < 1 /* no keys */
1522 || 2*db->cnt[i][LCNT_ORDN] < leaf[LL0X_OCK] /* too many keys */
1524 return log_msg( ERR_TRASH, "bad leaf pos %d type %d keys %d",
1525 leaf[LL0X_POS], leaf[LL0X_TYPE], leaf[LL0X_OCK] );
1526 ock = leaf[LL0X_OCK];
1527 /* advance to first term which is not too small
1528 (should be needed only for first leaf of an index)
1531 j<ock && (0 < (ret = memcmp( key,
1532 ((char*)leaf)+leaf[LL0X__FL + j*LL0X__RL + LL0X_KEY], cmplen ))
1533 || (!ret && key!=crs.key) ); /* skip exact while locating */
1537 goto reread; /* start over w/ next leaf of same index */
1539 || (key!=crs.key && !memcmp( crs.key,
1540 ((char*)leaf)+leaf[LL0X__FL + j*LL0X__RL + LL0X_KEY], crs.klen ))
1545 if ( 0 > crs.lpos[i] )
1551 /* compare this index next term to that of index idx */
1552 /* assume that index w/ lower number has shorter keys */
1554 ((char*)leaf)+leaf[LL0X__FL + crs.lpos[i]*LL0X__RL + LL0X_KEY],
1555 ((char*)crs.leaf[idx])+
1556 crs.leaf[idx][LL0X__FL + crs.lpos[idx]*LL0X__RL + LL0X_KEY],
1564 leaf = crs.leaf[idx];
1565 entry = leaf + LL0X__FL + j*LL0X__RL;
1566 term = ((char*)leaf) + entry[LL0X_KEY];
1567 if ( memcmp( crs.key, term, crs.klen ) )
1571 if ( rec ) { /* record the term */
1572 /* field to assign */
1573 Field *f = rec->field + rec->len;
1574 short tlen = db->tlen[idx];
1575 /* end of available buffer */
1577 ? (char*)f[-1].val /* before previously assigned field */
1578 : ((char*)rec + rec->bytes); /* end of record */
1579 while ( tlen && ' ' == term[tlen-1] )
1582 if ( b < (char*)(f+1) ) /* no space left on device */
1584 /* probably we're nuking the locator now: */
1585 memcpy( b, term, tlen );
1590 /* reset key from locator to prefix */
1595 if ( ! post && ! lp )
1597 /* collect postings */
1598 infb = entry[LL0X_INFB];
1599 infp = entry[LL0X_INFP];
1600 /* the IFP file is organized in blocks of 128 longs.
1601 1st int is block number followed by 127 data.
1602 postings are organized in chained segments so that each segment
1603 fits within one such block. a segment has five longs header,
1604 giving number of postings and pointer to next segment.
1607 for ( blkno=0; infb; blkno++ ) { /* segments */
1608 LdbP merge[127/2]; /* buffer to collect new postings */
1609 int *base; /* start of data */
1610 int *b; /* start of postings */
1611 int n; /* max postings in this seg's 1st block */
1612 int xlen; /* external length to read */
1613 int f = post ? post->fil - 1 : 0; /* highest pos to consider in given postings */
1614 int m = 0; /* fill merge buffer */
1615 int k; /* loop segment */
1617 if ( infp > 127-2-5 ) {
1618 return log_msg( ERR_TRASH, "found bad IFP pos %d blk %d for %.*s",
1619 infp, blkno, klen, key );
1621 if ( remain ) { /* consecutive block of same segment */
1627 n = (127 - 5 - infp)/2;
1630 pos = (infb - 1) * 512 + (infp + 1) * 4;
1632 && !((pos-blockpos) >> 9) /* 0 <= (pos-blockpos) < 512 */
1633 && pos+xlen <= blockpos+ 1 + (0x1ff & ~blockpos)
1635 base = block + (pos - blockpos)/sizeof(int);
1637 int blklen = 1 + (0x1ff & ~pos);
1638 assert( xlen <= blklen );
1639 assert( blklen <= (int)sizeof(block) );
1640 assert( 0 == (0x1ff & (pos + blklen)) );
1641 ret = readblk( block, blklen, db->inv[INV_IFP], pos );
1643 return log_msg( ERR_IO, "\twhen reading IFP" );
1647 if ( remain ) { /* no header to convert */
1649 b = base; /* no header */
1651 assert( (int)sizeof(ifp) >= xstr[LSTR_ILEN] );
1653 ret = convert( ifp, (char *)base, xstr );
1655 return log_msg( ERR_TRASH, "\twhen converting IFP header" );
1656 if ( n > ifp[LIFP_SEGP] )
1658 remain = ifp[LIFP_SEGP] - n;
1659 b = base+5; /* after header */
1661 LOG_DBG( LOG_VERBOSE,
1662 "key %d.%d '%.*s' blk %d post %d/%d r %d xlen %d at b/p %d.%d=%d",
1663 leaf[LL0X_PS], j, db->tlen[idx], term, blkno,
1664 n, ifp[LIFP_TOTP], remain, xlen, infb, infp, pos );
1665 if ( LOG_DO( LOG_TRACE ) )
1666 LOG_STR( ifp, lstrlib[ LSET_INV ].name[ LSTR_IFP ] );
1667 assert( (size_t)n <= sizeof(merge)/sizeof(merge[0]) );
1671 unsigned char tlen = (unsigned char) db->tlen[idx];
1672 while ( tlen && ' ' == term[tlen-1] )
1674 memcpy( kbf.byt, term, kbf.len = tlen );
1675 for ( k=0; k<n; k++ ) { /* callback needs 'em sorted */
1677 unsigned char *c = (unsigned char *)&b[k*2];
1678 LdbP e; /* the entry */
1679 #ifdef LDB_BIG_ENDIAN
1680 memcpy(e.bytes,c,8);
1682 e.bytes[0] = c[7]; e.bytes[1] = c[6];
1683 e.bytes[2] = c[5]; e.bytes[3] = c[4];
1684 e.bytes[4] = c[3]; e.bytes[5] = c[2];
1685 e.bytes[6] = c[1]; e.bytes[7] = c[0];
1687 ppos = LDBP_POS( &e );
1688 hit.mfn = (unsigned)LDBP_ROW( &e );
1689 hit.tag = (unsigned short)LDBP_TAG( &e );
1690 hit.occ = (unsigned short)(ppos >> 16);
1691 hit.pos = (unsigned short)ppos;
1692 if ( lp->cb( lp->me, &kbf, &hit ) )
1696 if ( post ) for ( k=n; k--; ) {
1697 /* loop backwards (for the fun of it) postings in segment */
1698 int prow, ptag, ppos;
1699 unsigned char *c = (unsigned char *)&b[k*2];
1700 LdbP e; /* the entry */
1701 LdbP samerow; /* highest possible entry w/ same row as e */
1702 #ifdef LDB_BIG_ENDIAN
1703 /* the 8 bytes of a posting are BIG ENDIAN ! */
1704 memcpy(e.bytes,c,8);
1706 e.bytes[0] = c[7]; e.bytes[1] = c[6];
1707 e.bytes[2] = c[5]; e.bytes[3] = c[4];
1708 e.bytes[4] = c[3]; e.bytes[5] = c[2];
1709 e.bytes[6] = c[1]; e.bytes[7] = c[0];
1711 prow = LDBP_ROW( &e );
1712 ptag = LDBP_TAG( &e );
1713 ppos = LDBP_POS( &e );
1714 LOG_DBG( LOG_VERBOSE, "post %d.%hd pos %06x key '%.*s'",
1715 prow, ptag, ppos, db->tlen[idx], term );
1716 if ( 0 >= ptag /* bad tag */
1717 || !prow || prow >= db->mfc[LMFC_NMFN] /* bad mfn */
1721 || (post->cut && prow >= post->cut)
1722 || (post->tag && post->tag != ptag)
1725 if ( prow < post->skp ) /* quickly bail out on skip condition */
1727 LDBP_SETROWTOP( &samerow, &e ); /* for mfn comparison */
1728 /* sweep down to postings for the same row as e ... */
1729 while ( f >= 0 && LDBP_GT( p+f, &samerow ) )
1731 if ( LDB_AND & post->mode ) {
1733 /* loop postings for same row, mark all (that are near enough) */
1734 LDBP_SETROWBOT( &samerow, &e ); /* for mfn comparison */
1735 /* NOTE: postings for row are GT than bottom even if marked */
1736 for ( l = f; l>=0 && LDBP_GT( p+l, &samerow ); l-- ) {
1739 if ( ptag != LDBP_TAG( p+l ) ) continue;
1740 if ( LDB_NEAR_G != post->near ) {
1741 dist = LDBP_POS( p+l ) - LDBP_POS( &e );
1742 if ( dist < 0 ) dist = -dist;
1745 : -post->near != dist /* exact $$$$ */
1749 LDBP_SETMARK( p+l );
1752 } else { /* OR mode */
1754 if ( ! post->near ) /* add if row not found: ignore details */
1755 add = 0 > f || prow > LDBP_ROW( p+f );
1756 else { /* add if no exact match */
1758 /* NOTE: we don't use mark bit in OR mode, do we ? */
1759 for ( l = f; l>=0 && LDBP_GT( p+l, &e ); l-- )
1761 add = 0 > l || LDBP_GT( &e, p+l );
1766 } /* for postings in segment */
1767 if ( m ) { /* merge in the merge buffer */
1770 for ( k = post->fil += m; m && k--; ) {
1772 if ( k < m || LDBP_GT( mm, &p[k-m] ) ) {
1775 LOG_DBG( LOG_DEBUG, "merging %d at %d", LDBP_ROW(&src), k );
1778 if ( k < post->len )
1780 else { /* set cut */
1781 int row = LDBP_ROW( &src );
1782 if ( row < post->cut || !post->cut )
1786 if ( post->fil > post->len )
1787 post->fil = post->len;
1788 if ( post->cut ) /* postings for cut row are unreliable */
1789 while ( post->fil && post->cut <= LDBP_ROW(p+post->fil-1) )
1792 if ( remain ) { /* advance to start of next block */
1796 infb = ifp[LIFP_NXTB];
1797 infp = ifp[LIFP_NXTP];
1799 } /* for segments */
1800 LOG_DBG( LOG_VERBOSE, "added %d postings for key '%.*s'",
1801 added, db->tlen[idx], term );
1802 } /* for terms in prefix/postings mode */
1804 if ( post /* fixup */
1805 && LDB_AND & post->mode && !(LDB_KEEPMARKS & post->mode)
1807 int mark = LDB_NOT & post->mode ? 0 : 0x8000;
1809 for ( i=0; i<post->fil; i++ )
1810 if ( mark == LDBP_MARK(p+i) ) {
1816 return ! rec ? 0 : rec->len;
1820 static int ldb_last_path_sep (const char *path) {
1830 p2 = strrchr (path, '/');
1831 i2 = p2 ? p2 - path : -1;
1833 p3 = strrchr (path, '\\');
1834 i3 = p3 ? p3 - path : -1;
1840 } /* ldb_last_path_sep */
1843 static int ldb_open (const char *dbname, Rec *dbpar, Rec *syspar, Fdt *fdt)
1846 int i, plen, sz, dbid, lck = LIO_TLOCK; /* WLOCK only on special demand */
1847 int ret = 0, invret = -1, lbtret = 0, autoformat = 1, writable = -1;
1848 int uc = -1, gotopt = 0, txtfd = 0, copyidx = 0;
1851 char buf[65536+1]; /* need 64K buf for copying DO NOT SHRINK !!! */
1852 char path[ PATH_MAX ];
1854 memset( &ndb, 0, sizeof(ndb) );
1855 /* these should be 0 by memsetting to 0 anyway ... */
1856 ndb.path = 0; ndb.mmap = 0;
1857 ndb.flags |= DB_MMAP; /* it mean's: we'll try */
1860 if ( 0 <= (i = rInt2(dbpar, syspar, OPENISIS_SLOGV, -1)) )
1863 /* prepare name ... */
1866 return log_msg( ERR_FAULT, "ldb_open: dbname not given");
1867 dbname = rString (dbpar, OPENISIS_DNAME, 0, buf, sizeof(buf));
1869 return log_msg( ERR_FAULT, "ldb_open: no dbname parameter");
1871 plen = strlen (dbname);
1873 return log_msg( ERR_FAULT, "ldb_open: empty dbname");
1875 if ( !memcmp( ".mst", dbname+plen-4, 4 ) ) {
1878 } else if ( !memcmp( ".MST", dbname+plen-4, 4 ) ) {
1883 if ( sizeof(buf) <= (unsigned)plen
1884 || sizeof(path) <= (unsigned)(plen + 4 + 1)
1886 return log_msg( ERR_FAULT, "ldb_open: dbname too long '%s'", dbname);
1887 if ('/' == dbname[plen - 1]
1889 || '\\' == dbname[plen - 1]
1892 return log_msg( ERR_FAULT,
1893 "ldb_open: must not specify directory as dbname '%s'", dbname);
1895 strcpy(ndb.head.name, dbname);
1897 int i1 = 1 + plen - DBNLEN ;
1898 int i2 = ldb_last_path_sep (dbname);
1899 if (0 <= i2 && plen > ++i2 && i2 > i1) {
1902 strncpy(ndb.head.name, dbname + i1, DBNLEN - 1) [DBNLEN - 1] = 0;
1903 log_msg( LOG_WARN, "ldb_open: truncating dbname '%s' to '%s'",
1904 dbname, ndb.head.name);
1907 strcpy(path, dbname);
1908 if (! IsAbsPath (path)) {
1910 if ( (dbpar || syspar)
1911 && (p = rString2 (dbpar, syspar, OPENISIS_DPATH, buf, sizeof(buf)))
1914 if (sizeof(path) <= (unsigned)(plen + plen2 + 4 + 1 + 1))
1915 return log_msg( ERR_FAULT,
1916 "ldb_open: dbname or dbpath too long: %d %d '%s'",
1918 memmove (path + 1 + plen2, path, 1 + plen);
1920 memcpy (path, p, plen2);
1923 if ( !IsAbsPath(path)
1925 && (p = rString(syspar, OPENISIS_SPATH, 0, buf, sizeof(buf)))
1928 if (sizeof(path) <= (unsigned)(plen + plen2 + 4 + 1 + 1))
1929 return log_msg( ERR_FAULT,
1930 "ldb_open: dbname or syspath too long: %d %d '%s'",
1932 memmove(path + 1 + plen2, path, 1 + plen);
1934 memcpy(path, p, plen2);
1937 } /* name and path */
1939 /* more init AFTER honoring verbosity */
1945 for ( dbid=0; dbid<dbs_len; dbid++ ) {
1946 if ( dbs[dbid].flags &&
1947 !strcmp( ndb.head.name, dbs[dbid].head.name ) ) {
1948 log_msg( LOG_INFO, "reopening %d '%s'", dbid, ndb.head.name );
1953 if ( dbid == dbs_len )
1954 for ( dbid=0; dbid<dbs_len && dbs[dbid].flags; dbid++ )
1956 if ( dbid == dbs_len )
1961 db->head.dbid = dbid;
1963 /* preset record sizes */
1964 db->mfc[0] = *DB_XSTR( db, LSET_MST, LSTR_MFC );
1965 db->xrf[0] = *DB_XSTR( db, LSET_MST, LSTR_XRF );
1967 db->cnt[1][0] = *DB_XSTR( db, LSET_INV, LSTR_CNT );
1968 /* isis-1 index term lengths */
1972 /* only the packed little endian ("DOS") format is writable
1974 if ( LVAR_PAC != (DB_VARI & db->flags) )
1978 db->path = mDup( path, plen+1 ); /* save path */
1979 memcpy( path+plen, ".???", 5 );
1982 dbpar = rDup(dbpar, 0, 0);
1983 /* check options file and extension case */
1984 if ( 0 <= uc ) /* use case from dbname */
1985 i = lio_open( setext(path,EXT_TXT_OPT,uc), OPEN_RDIF );
1986 else if ( 0 > (i = lio_open( setext(path,EXT_TXT_OPT,uc=0), OPEN_RDIF ))
1987 && 0 > (i = lio_open( setext(path,EXT_TXT_OPT,uc=OPEN_UC), OPEN_RDIF ))
1989 uc = autocase( db->path );
1991 if ( 0 < (sz = lio_size(i)) ) {
1992 p = sz < (int)sizeof(buf) ? buf : mAlloc(sz);
1993 if ( (gotopt = (sz == lio_read( &i, p, sz ))) )
1994 rDeser( &dbpar, p, sz, 0 );
1995 log_msg( LOG_INFO, "reading %d bytes options from '%s' %s",
1996 sz, path, gotopt ? "ok" : "nok" );
2000 lio_close( &i, LIO_INOUT );
2004 if ( (dbpar || syspar) && 0 <= (i = rInt2(dbpar, syspar, OPENISIS_DRO, -1)))
2005 writable = !i; /* explicit 0/1 */
2008 /* trad. index is never openend writable. */
2009 invret = openfiles( db->inv, path, EXT_INV, INV_FILES, uc|OPEN_RDIF );
2010 if (dbpar || syspar) {
2012 if (rString2 (dbpar, syspar, OPENISIS_DTYPE, fmtstr, sizeof(fmtstr))) {
2013 if (! strcmp ("aligned", fmtstr)) {
2014 db->flags |= LVAR_ALI;
2016 } else if (! strcmp ("naligned", fmtstr))
2022 log_msg( LOG_WARN, "cannot guess format -- no inverted file" );
2024 unsigned len = lio_size( db->inv[INV_CNT] );
2026 db->flags |= LVAR_ALI;
2027 autoenc = "iso8859-1";
2028 /* writable = 0; we do not write aligned format */
2029 } else if ( 52L == len )
2032 log_msg( LOG_WARN, "cannot guess format -- bad .cnt len %d", len );
2033 log_msg( LOG_INFO, "using autoformat %saligned for .cnt len %d",
2034 (db->flags & LVAR_ALI) ? "":"un", len );
2040 if ( !(ret = openfiles( db->mst, path, EXT_MST, MST_FILES,
2041 lck|OPEN_ASIS|LIO_CREAT ))
2046 if ( 0 <= (ret = openfiles( &txtfd, path, EXT_TXT, 1,
2047 lck|LIO_SYNC|(writable?OPEN_ASIS:OPEN_RDIF) ))
2048 ) { /* .txt exists: use it */
2051 else if (1 == writable) {
2052 log_msg( LOG_ERROR, "file '%s' is readonly", path );
2057 0 <= (ret = openfiles( db->mst, path, EXT_MST, MST_FILES,
2058 lck|((writable && !(db->flags & LVAR_ALI))?OPEN_ASIS:OPEN_RDIF) ))
2059 && (ret || 1!=writable)
2060 ) { /* trad. files are ok */
2063 } else if ( 1 != (ret = openfiles( &txtfd, path, EXT_TXT, 1,
2064 lck|(ret ? LIO_SYNC : 0)|OPEN_NEW )) ) /* don't sync on autoconv */
2068 /* MW: creation mode? KR: ugo+rw & ~umask */
2069 if ( 1 == (lbtret = openfiles( &db->oxi.fd, path, EXT_LBT, 1,
2070 lck|(writable?OPEN_ASIS:OPEN_RDIF) ))
2073 else if ( !writable )
2075 else if ( !lbtret ) { /* exists ro */
2076 log_msg( LOG_ERROR, "file '%s' is readonly", path );
2078 } else { /* create and copy to oxi */
2079 if ( 1 != openfiles( &db->oxi.fd, path, EXT_LBT, 1, lck|OPEN_NEW ) )
2085 if ( db->mst[MST_MST] ) { /* care for the traditionals */
2086 if ( (ret = readlog(
2087 db->mfc, db->mst[MST_MST], 0, db, LSET_MST, LSTR_MFC ))
2089 /* NEW goto cleanup; */
2090 memset( db->mfc, 0, sizeof(db->mfc) );
2091 db->mfc[LMFC_NMFN] = 1;
2092 db->mfc[LMFC_NMFB] = 1;
2093 db->mfc[LMFC_NMFP] = 64;
2097 int lastblock = (db->mflen = lio_size( db->mst[MST_MST] ))/512;
2098 if ( 511 & db->mflen ) lastblock++;
2100 the next record's block should be either the last one we have
2101 or the next one to follow
2102 if ( db->mfc[LMFC_NMFB] != lastblock
2103 && db->mfc[LMFC_NMFB] != lastblock+1
2105 log_msg( LOG_VERBOSE, "NMFB mismatch: NMFB %d ~ %d",
2106 db->mfc[LMFC_NMFB], lastblock );
2108 /* set LOGICAL mf length */
2109 db->mflen = (db->mfc[LMFC_NMFB]-1)*512 + db->mfc[LMFC_NMFP];
2112 db->xrlen = lio_size( db->mst[MST_XRF] ) / 512;
2113 if ( (DB_MMAP & db->flags)
2116 == lio_mmap( &db->mst[MST_XRF], (void**)&db->mmap, db->xrlen*512 )
2118 db->mmlen = db->xrlen;
2123 /* TODO: make on-demand preparation even faster using buffered IO */
2124 if ( !lio_size(txtfd) ) {
2125 const char newline = LF;
2127 && 0 < (i = lio_open( setext(path,EXT_TXT_OPT,uc), LIO_RD ))
2128 ) { /* copy the options file */
2129 log_msg( LOG_INFO, "copying %d bytes options", lio_size(i) );
2130 while ( 0 < (sz = lio_read( &i, buf, sizeof(buf)-1 )) )
2131 lio_write( &txtfd, buf, sz );
2132 if ( LIO_INOUT & i ) { /* is supposed to autoclose */
2133 log_msg( LOG_WARN, "tss tss tss ..." );
2134 lio_close( &i, LIO_INOUT );
2137 lio_write( &txtfd, &newline, 1 );
2140 if ( db->mst[MST_MST] ) { /* copy to new empty txt */
2141 int end = db->mfc[LMFC_NMFN];
2143 max recsize for traditionals is 32K.
2144 field values may double, if consisting entirely of newlines.
2145 rec->used may be more than 32K, since we 12 bytes per field.
2146 However, we know there are only sign+5digits+tab+newline used per tag,
2147 fitting within 2* the original 6 bytes per field.
2150 log_msg( LOG_INFO, "copying traditional data" );
2151 db->flags |= DB_OPEN; /* pretend */
2152 for ( i=1; i<end; i++ ) {
2153 Rec *r = dRead( dbid, i );
2156 else if ( (int)sizeof(buf) <= (sz = rSerB( buf, r )) ) {
2157 log_msg( ERR_IDIOT, "serialized %d bytes" );
2160 lio_write( &txtfd, buf, sz );
2162 db->flags &= ~DB_OPEN; /* pret end */
2165 lio_mmap( 0, (void**)&db->mmap, db->mmlen*512 );
2167 closefiles( db->mst, MST_FILES );
2169 db->mst[MST_MST] = txtfd;
2170 db->mflen = lio_size( db->mst[MST_MST] );
2172 db->ptr = 0x0134; /* should be config opt */
2173 if ( !remake ) { /* other reasons why we should remake */
2175 unsigned isix = GETINT(ISIX);
2179 if ( 0 > (db->mst[MST_XRF] = lio_open( setext(path,EXT_TXT_PTR,uc),
2180 LIO_SEEK|(writable?LIO_RDWR:LIO_RD) ))
2182 log_msg( LOG_INFO, "'%s' not found", path );
2183 else if ( 6 != lio_read(&db->mst[MST_XRF],buf,6) )
2184 log_msg( LOG_WARN, "'%s' too short", path );
2185 else if ( isix != (magic = GETINT(buf)) ) /* FOO! */
2186 log_msg( LOG_WARN, "'%s' has black magic 0x%08x", path, magic );
2187 /* TODO: save that foo if it doesn't read ISIX ? */
2188 else if ( 0xf000 & (ptr = GETSHORT(buf+4)) ) /* bad endianess */
2189 log_msg( LOG_WARN, "'%s' has bad endianess type 0x%04x", path, ptr );
2190 else if ( (db->ptr && db->ptr != ptr) ) /* other type configured */
2191 log_msg( LOG_WARN, "'%s' type 0x%04x != cfg 0x%04x", path, ptr, db->ptr );
2192 else if ( lio_time(db->mst[MST_XRF]) < lio_time(db->mst[MST_MST]) )
2193 log_msg( LOG_WARN, "'%s' older than data", path );
2200 db->ptr = 0x0134; /* m*256 + l*16 + k, doc/Serialized */
2201 /* BTW: 0x34 is ASCII digit '4', so it's ISIX4^A on little endian */
2203 } else { /* fix unsupported type */
2204 unsigned m = 0xf&(db->ptr>>8);
2205 unsigned l = 0xf&(db->ptr>>4);
2206 unsigned k = 0xf&db->ptr;
2208 if ( m > 4 ) { m = 4; mod = 1; }
2209 if ( l > 4 ) { l = 4; mod = 1; }
2210 if ( k > 4 ) { k = 4; mod = 1; } /* TODO: allow 8 with large files */
2211 /* total ptr bytes = sum(nibbles) <= 45, but won't use more than 8+4+4 */
2213 log_msg( LOG_WARN, "fixing unsupported ptr type 0x%04x", db->ptr );
2214 db->ptr = (unsigned short)(m<<8 | l<<4 | k);
2221 unsigned base = 0; /* of current block */
2222 unsigned pos = 0; /* of last record */
2223 unsigned fld = 0; /* of last record */
2224 unsigned nmfn = 0; /* next mfn = maxmfn+1 */
2225 unsigned xmfn = 0; /* explicitly given */
2227 int more; /* buf not empty flag */
2228 char *last; /* of current block */
2230 lio_close( &db->mst[MST_XRF], LIO_INOUT );
2231 if ( 0 > (db->mst[MST_XRF] = lio_open(
2232 setext(path,EXT_TXT_PTR,uc), OPEN_BLANK ))
2235 /* write signature */
2236 memcpy( pt.r, "ISIX", 4 );
2237 memcpy( pt.r+4, &db->ptr, 2 );
2238 memcpy( pt.r+6, ":)", 2 );
2240 memset( pt.r+8, ')', db->ptrl - 8 );
2241 lio_pwrite( &db->mst[MST_XRF], pt.r, db->ptrl, 0 );
2242 /* loop the masterfile */
2243 lio_seek( &db->mst[MST_MST], 0 );
2244 last = (p = buf) + lio_read( &db->mst[MST_MST], buf, 8192 ) - 1;
2245 more = last > buf; /* one byte is no byte ;) */
2246 if ( more && LF == *p ) { /* no options: no \n\n */
2250 for (;;) { /* records */
2252 for (;;) { /* lines and stuff to end of record */
2253 if ( p < last ) { /* have one lookahead */
2255 continue; /* the tight loop ... or use memchr ? */
2256 if ( LF != *p ) { /* now p <= last */
2257 if ( fld || !(0xc0 & *p) ) { /* < '@', 'A', ... */
2258 if ( TAB != *p ) /* no continuation */
2262 fld++; /* count field, unless we really recognize a opline */
2265 /* now we have '@'...'Z' at start of 1st line */
2266 sz = last - p; /* avail after p */
2267 if ( sz && TAB != p[1] ) /* no opline */
2273 break; /* give it a try */
2275 log_msg( LOG_WARN, "unknown opline %c at mfn %d", *p, nmfn );
2278 if ( sz > 127 ) /* longer -> no opline */
2280 if ( ! sz || ! (q = memchr(p+1, LF, sz)) ) {
2281 if ( sz >= 127 || ! more )
2282 continue; /* too long or undelimited last */
2283 p--; /* back to \n, so we come here again */
2286 if ( q < p+3 || p[2] < '0' || '9' < p[2] )
2289 take a closer look at whether the whole line makes sense
2291 if ( op ) { /* yeah, two metas in sequence! weird stuff! */
2292 p--; /* step back to newline */
2293 pos = base+(p-buf); /* fake pos as if we had no line at all */
2294 break; /* go handle the PREVIOUS opline */
2297 xmfn = a2i( p+2, q-p-2 );
2298 fld--; /* uncount this line */
2299 pos = base + (q-buf) + 1; /* start after q */
2305 LOG_DBG( LOG_DEBUG, "MORE %d at pos %d base %d p +%d last +%d",
2306 more, pos, base, p-buf, last-buf );
2308 goto schicht; /* german: done */
2309 base += p - buf; /* shift out bytes before p */
2310 len = last-p; /* bytes to keep after p; < 128 */
2311 if ( len ) /* we're probing for more lookahead */
2312 memmove( buf, p, 1+last-p );
2313 else /* typically */
2314 *buf = *p; /* but save the last dance */
2318 if ( 0 < (sz = lio_read( &db->mst[MST_MST], buf+1+len, 8192 )) ) {
2322 more = 0; /* but yet, finish this up */
2323 /* since *buf = *last was the files last character,
2324 we'd expect a newline
2327 p = buf+(LF==*buf ? 1 : 2); /* pretend buf started \n */
2330 /* else try again opline */
2331 } /* lines and stuff */
2332 /* now p is on a delimiting blank lines \n -- or such ... */
2333 len = base + (p-buf) - pos; /* >= 0 */
2334 mfn = xmfn ? xmfn : nmfn;
2335 log_msg( LOG_INFO, "ptr %c %d(%d/%d) pos %d len %d",
2336 op?op:'>', mfn, xmfn, nmfn, pos, len );
2337 if ( base + (p-buf) < pos ) /* FOO !!! */
2339 if ( len ) /* could have been completely empty */
2340 len--; /* mute last \n */
2341 if ( 'D' == op && len ) /* FOO !!! */
2343 if ( mfn && (len || op) )
2344 lio_pwrite( &db->mst[MST_XRF],
2345 mkptr( pt.r, db, pos, len, fld), db->ptrl, mfn*db->ptrl );
2346 pos = base + (p-buf) + 1; /* next starts after p */
2347 if ( 'D' != op ) { /* 'D'elete does not lead to implicit reuse */
2348 if ( op && nmfn < xmfn )
2350 nmfn++; /* continue after this */
2352 xmfn = fld = op = 0;
2356 db->mfc[LMFC_NMFN] =
2357 db->xrlen = lio_size( db->mst[MST_XRF] ) / db->ptrl;
2358 if ( (DB_MMAP & db->flags)
2360 && db->xrlen*db->ptrl
2361 == lio_mmap( &db->mst[MST_XRF], (void**)&db->mmap, db->xrlen*db->ptrl )
2363 db->mmlen = db->xrlen;
2364 log_msg( LOG_INFO, "mapped %d*%d = %d",
2365 db->xrlen, db->ptrl, db->xrlen*db->ptrl );
2366 db->flags |= DB_TXTOPEN;
2367 db->flags &= ~DB_VARI; /* clear alignment and such */
2368 } /* if ( txtfd ) */
2370 /* supporting files, ctables */
2372 if ( 0 >= (sz = lio_slurp( &p, sizeof(buf), setext(path,EXT_SUP_ACT,uc), 1 ))
2373 || lcs_mktab( db->ctab+LCS_CTYPE, p, sz, LCS_A )
2375 memcpy( db->ctab+LCS_CTYPE, lcs_latin1_ct, sizeof(db->ctab[0]) );
2376 if ( 0 >= (sz = lio_slurp( &p, sizeof(buf), setext(path,EXT_SUP_UCT,uc), 1 ))
2377 || lcs_mktab( db->ctab+LCS_UCASE, p, sz, 0 )
2379 memcpy( db->ctab+LCS_UCASE, lcs_latin1_uc, sizeof(db->ctab[0]) );
2383 if ( (p = rString (dbpar, OPENISIS_DFDT, 0, buf, sizeof(buf))) ) {
2385 Db *dbfdt = nDbByName (openisis_stub0, p);
2387 recfdt = dRead (dbfdt->dbid, 1);
2389 int idfdt = ldb_open (p, 0, syspar, 0);
2391 recfdt = dRead (idfdt, 1); /*MMM*/
2396 fdt = fRec2Fdt (recfdt);
2397 } else if ( gotopt )
2398 fdt = fRec2Fdt(dbpar);
2400 fdt = fFromFile (path);
2404 log_msg( LOG_INFO, "have %d fdt entries for %s",
2405 fdt->len, db->head.name);
2407 log_msg( LOG_INFO, "have no fdt for %s", db->head.name);
2409 db->head.tms = timeUpd(0); /* what watch? */
2410 log_msg( LOG_INFO, "tms %d for %s", db->head.tms, db->head.name);
2413 /* set path and name */
2414 if (0 <= (i = ldb_last_path_sep (db->path))) {
2416 strncpy(path, db->path, i)[i] = 0;
2419 dbpar = rSet (dbpar, RCHG | RDIS, OPENISIS_DPATH, path, 0);
2421 dbpar = rSet (dbpar, RCHG | RDIS, OPENISIS_DNAME, db->head.name, 0);
2424 if (!(p = rString (dbpar, OPENISIS_DENC, 0, buf, sizeof(buf))))
2426 && (p = rString (syspar, OPENISIS_DENC, 0, buf, sizeof(buf))))
2429 dbpar = rSet(dbpar, RDIS, OPENISIS_DENC, p, 0);
2431 log_msg( LOG_INFO, "using encoding %s for %s", p, db->head.name);
2433 db->head.cfg = dbpar;
2436 db->flags |= DB_OPEN;
2438 if ( writable && LVAR_PAC == (DB_VARI & db->flags) )
2439 db->flags |= DB_WRITABLE;
2442 if ( (dbpar || syspar) && 0 < rInt2(dbpar, syspar, OPENISIS_DDUMP, -1) ) {
2446 if ( (r = ldb_readRecAtOff(dbid,off,&off)) )
2448 } while ( 0 < off );
2455 db->oxi.flg |= LBT_WRITE;
2456 if ( (p = getenv("OXITYP")) && 0 < (i = atoi(p)) && 4 > i )
2457 db->oxi.typ = i << 4;
2458 if ( !lbtret && !lbt_init( &db->oxi ) )
2459 db->flags |= DB_LBTOPEN;
2462 && ! (ret = readlog( db->cnt[0], db->inv[INV_CNT],
2463 0, db, LSET_INV, LSTR_CNT ))
2464 && ! (ret = readlog( db->cnt[1], db->inv[INV_CNT],
2465 -1, db, LSET_INV, LSTR_CNT ))
2468 db->flags |= DB_INVOPEN;
2472 log_msg( LOG_INFO, "copying traditional index" );
2474 memset( &l, 0, sizeof(l) );
2476 l.cb = (DXCb*)cXAdd;
2477 lbt_batch( & db->oxi, 5 );
2478 search( db, 0, 0, 0, &l );
2479 cXAdd( & db->oxi, 0, 0 );
2481 closefiles( db->inv, INV_FILES );
2490 closefiles( &db->oxi.fd, 1 );
2491 closefiles( &txtfd, 1 );
2492 closefiles( db->inv, INV_FILES );
2493 closefiles( db->mst, MST_FILES );
2494 return 0 > ret ? ret : ret ? -ret : -1;
2498 /* ************************************************************
2504 /* ************************************************************
2508 int *ldb_readRecAtOff ( int dbid, lxref off, int *nxtoff )
2511 LDb *db = getDb( dbid );
2513 log_msg( LOG_ERROR, "\tat ldb_readRecAtOff" );
2518 rec = getMfr( db, off, nxtoff );
2521 LOG_DBG( LOG_VERBOSE, "db %d off %d: got %hd bytes",
2522 dbid, off, !rec ? -1 : rec[LMFR_RECL] );
2524 } /* ldb_readRecAtOff */
2528 int ldb_search ( int dbid, const char *key, LdbPost *post, Rec *rec )
2530 LDb *db = getDb( dbid );
2535 if ( post ) { /* prepare for postings */
2537 post->len = sizeof(post->p)/sizeof(post->p[0]); /* standard length */
2538 if ( LDB_NOT & post->mode )
2539 post->mode |= LDB_AND;
2541 if ( DB_INVOPEN & db->flags )
2542 return search( db, key, post, rec, 0 );
2543 if ( !(DB_LBTOPEN & db->flags) )
2547 memset( &k, 0, sizeof(k) );
2552 unsigned char *uk = (unsigned char*)key;
2553 int l = strlen( key );
2556 k.len = (unsigned char)l;
2558 k.byt[l] = db->ctab[LCS_UCASE].c[ uk[l] ];
2560 return lbt_search( &db->oxi, &k, post, rec );
2564 int ldb_p2s ( Set *set, LdbPost *post )
2567 int last=0, max = set->len;
2571 max = OPENISIS_SETLEN;
2575 s[0] = LDBP_ROW(post->p);
2576 for ( i=1; i<post->fil && last < max; i++ ) {
2577 int row = LDBP_ROW(post->p+i);
2578 if ( s[last] != row )
2581 return set->len = last+1;
2586 LcsTab *ldb_tabs( int dbid )
2588 LDb *db = getDb( dbid );
2589 return ! db ? 0 : db->ctab;
2594 Db *ldb_getdb (int dbid) {
2595 LDb *db = getDb (dbid);
2596 return db ? &db->head : 0;
2599 /* ************************************************************
2602 int dMaxId ( int dbid )
2604 LDb *db = getDb( dbid );
2607 return db->mfc[LMFC_NMFN] - 1;
2611 Raw *dRaw ( int dbid, int rowid )
2617 if ( LIO_LOCK() ) return 0;
2620 log_msg( LOG_ERROR, "\tat openIsisReadRaw %d", rowid );
2623 off = getOff( db, rowid, 0 );
2624 log_msg( LOG_INFO, "found xref 0x%08x for %d", off, rowid );
2626 log_msg( LOG_INFO, "found deleted xref 0x%08x for %d", off, rowid );
2629 rec = getMfr( db, off, 0 );
2631 log_msg( LOG_WARN, "\tno record at %d rowid %d", off, rowid );
2634 LOG_DBG( LOG_VERBOSE, "db %d row %d: got %hd bytes",
2635 dbid, rowid, !rec ? -1 : rec[LMFR_RECL] );
2636 if ( rec[LMFR_MFN] != rowid ) {
2637 log_msg( LOG_ERROR, "got mfn %d expected %d", rec[LMFR_MFN], rowid );
2648 Rec *dRead ( int dbid, int rowid )
2650 LDb *db = getDb( dbid );
2652 if ( DB_TXTOPEN & db->flags )
2653 return dText( db, rowid, 0 );
2654 if ( (r = (Rec *) dRaw( dbid, rowid )) ) {
2655 char * base = (char*)r;
2656 Field *f = r->field;
2659 f->val = base + (int)f->val;
2660 assert( RECOK( r ) );
2666 int dWritex ( int dbid, Rec *rec, Rec *idx )
2668 LDb *db = getDb( dbid );
2673 if ( !(DB_WRITABLE & db->flags) )
2674 return log_msg( ERR_INVAL, "db %d not writable", dbid );
2676 DB_TXTOPEN & db->flags ? pText( db, rec, 0 ) : putRec( db, rec )
2680 const unsigned char *const uc = db->ctab[LCS_UCASE].c;
2683 int mode = 'f'; /* 'w', 's' */
2687 int mfn = rec ? rec->rowid : 0;
2690 Field *f = idx->field, *last = f + idx->len - 1;
2692 for ( ; f <= last; f++ ) {
2693 const char *val = f->val;
2699 case XCTL: { /* index cmd [opt] */
2700 const char *cmd = val, *e = val + len;
2701 int cmdlen, opt = 0, haveopt;
2702 while ( val < e && 64 < *val ) /* eat ASCII letters */
2705 if ( val < e && (TAB == *val || ' ' == *val) )
2707 haveopt = val < e && a2il( val, e-val, &opt );
2709 cut = haveopt ? opt : 30;
2713 case 'f': /* fields */
2718 case 'w': /* words */
2722 case 's': /* split */
2739 return log_msg( ERR_INVAL, "bad index control '%.*s'", cmdlen, cmd );
2742 int i = 0, v[5], *pv = v;
2743 const char *e = val + len;
2746 case '+': del = 0; val++; break;
2747 case '-': del = 1; val++; break;
2749 for ( ; val < e && i<5; i++ ) {
2750 int dig = a2il( val, e-val, v+i );
2752 if ( val >= e || TAB == *val )
2755 return log_msg( ERR_INVAL,
2756 "bad HIT '%.*s' after %d", e-val, val, v[i] );
2765 case 5: h.dbn = (unsigned short)*pv++;
2766 case 4: h.mfn = (unsigned)*pv++;
2767 case 3: h.pos = (unsigned short)pv[2];
2768 case 2: h.occ = (unsigned short)pv[1];
2769 case 1: h.tag = (unsigned short)pv[0];
2770 /* case 0: ! f->len */
2772 if ( val < e && TAB == *val )
2775 } break; /* case XHIT */
2777 return log_msg( ERR_IDIOT, "sorry, XFST not implemented" );
2779 case XADD: /* binary key */
2780 /* if ( f->len < db->oxi.vsz )
2781 memset( k.val.byt, 0, db->oxi.vsz - f->len );
2784 + (f->len < (int)db->oxi.vsz ? (int)db->oxi.vsz - f->len : 0),
2785 f->val, f->len > (int)db->oxi.vsz ? (int)db->oxi.vsz : f->len );
2786 k.val.len = db->oxi.vsz;
2791 return log_msg( ERR_INVAL, "bad index control tag %d", f->tag );
2792 switch ( mode ) { /* check for tag change */
2794 if ( tag == f->tag )
2800 if ( tag == f->tag )
2813 if ( ! k.val.len ) { /* not ADD/DEL: use hit, val */
2814 unsigned char *dst = k.byt;
2815 const unsigned char *src = (const unsigned char *)val;
2818 k.len = (unsigned char)len;
2820 *dst++ = uc[ *src++ ];
2821 cXMkVal( &db->oxi, &k.val, &h );
2822 LOG_DBG( LOG_DEBUG, "#%d %c key '%.*s' hit %d.%d.%d.%d.%d",
2823 f - idx->field, del ? '-' : '+', k.len, k.byt,
2824 h.dbn, h.mfn, h.tag, h.occ, h.pos );
2826 ret = del ? lbt_del( &db->oxi, &k ) : lbt_add( &db->oxi, &k );
2833 int dWrite ( int dbid, Rec *rec )
2835 /* TODO: use FST lines as idx */
2836 return dWritex( dbid, rec, 0 );
2840 Rec* dTerm ( Rec *rec, int dbid, const char *key )
2842 return 0 > ldb_search( dbid, key, 0, rec ) ? 0 : rec;
2846 int dXLoop ( int dbid, DXLoop *l )
2848 LDb *db = getDb( dbid );
2852 if ( OPENISIS_IDXTRAD & l->flg ) {
2853 if ( !(db->flags & DB_INVOPEN) )
2855 return search( db, 0, 0, 0, l );
2857 if ( !(db->flags & DB_LBTOPEN) || db->oxi.bat )
2859 return lbt_loop( & db->oxi, l );
2864 int cInit ( int argc, const char **argv, CLockFunc lockfunc )
2866 (void)argc; (void)argv;
2869 lio_lock = lockfunc;
2874 Db* cDOpen (const char *dbname, Rec *dbpar, Rec *syspar, Fdt *fdt) {
2878 dbid = ldb_open (dbname, dbpar, syspar, fdt);
2880 return &dbs[dbid].head;
2885 int cDOpenv ( const char *dbname, const char **argv, int argc )
2892 dbpar = rSet (0, RARGV | RFDT | RNOC | RIGN | argc,
2893 openIsisFdtDbpar, argv);
2895 rt = ldb_open (dbname, dbpar, 0, 0);
2903 int cDClose ( int dbid )
2905 LDb *db = getDb( dbid );
2908 if ( LIO_LOCK() ) return -ERR_BUSY;
2909 if ( DB_MODIFIED == ((DB_MODIFIED|DB_TXTOPEN) & db->flags) ) {
2910 /* write back the MF control */
2912 /* if ( 498 < (db->mflen & 511) ) db->mflen = ~511 & (db->mflen + 14); */
2913 mfc.ctlm = rvi( db->mfc[LMFC_CTLM] );
2914 mfc.nmfn = rvi( db->mfc[LMFC_NMFN] );
2915 mfc.nmfb = rvi( 1 + (db->mflen >> 9) );
2916 mfc.nmfp = rvs( 511 & db->mflen );
2917 mfc.type = rvs( db->mfc[LMFC_TYPE] );
2918 mfc.rcnt = rvi( db->mfc[LMFC_RCNT] );
2919 mfc.mfx1 = rvi( db->mfc[LMFC_MFX1] );
2920 mfc.mfx2 = rvi( db->mfc[LMFC_MFX2] );
2921 mfc.mfx3 = rvi( db->mfc[LMFC_MFX3] );
2922 if ( sizeof(mfc) != lio_pwrite( &db->mst[MST_MST], &mfc, sizeof(mfc), 0) )
2923 log_msg( ERR_TRASH, "could not write MST header" );
2926 if ( (DB_MODIFIED|DB_TXTOPEN) == ((DB_MODIFIED|DB_TXTOPEN) & db->flags) )
2927 memcpy( db->mmap, ISIX, 4 ); /* force newer mtime on proper close */
2928 lio_mmap( 0, (void**)&db->mmap, db->mmlen*db->ptrl );
2931 closefiles( db->mst, MST_FILES );
2932 if ( DB_INVOPEN & db->flags )
2933 closefiles( db->inv, INV_FILES );
2934 if ( DB_LBTOPEN & db->flags )
2935 lbt_close( &db->oxi );
2937 if ( db->path ) mFree( (char*)db->path );
2938 if (db->head.cfg) mFree (db->head.cfg);
2939 if (db->head.fdt) fFree (db->head.fdt);
2940 memset( db, 0, sizeof(db) );
2946 int cDCheck ( int dbid, int flags )
2948 static char dot = '.';
2950 LDb *db = getDb( dbid );
2951 int nxtoff = 64, off;
2957 lio_write( &lio_out, &dot, 1 );
2958 if ( (r = ldb_readRecAtOff(dbid,off=nxtoff,&nxtoff)) ) {
2959 int o = getOff( db, r[LMFR_MFN], 0 );
2961 log_msg( LOG_WARN, "mfn %d xrf %d != real %d\n",
2962 r[LMFR_MFN], o, off );
2966 } while ( 0 < nxtoff );
2971 OpenIsisIdx *cXOpen ( int dbid, int mode )
2973 LDb *db = getDb( dbid );
2975 || !(db->flags & DB_LBTOPEN)
2976 || !(db->oxi.flg & LBT_WRITE) /* may be writable if db is not */
2979 preliminary undocumented feature:
2980 mode -1 gives direct access in non-batch mode
2982 || (0 <= mode && lbt_batch( & db->oxi, (unsigned char)mode ))