2 openisis - an open implementation of the CDS/ISIS database
3 Version 0.8.x (patchlevel see file Version)
4 Copyright (C) 2001-2003 by Erik Grziwotz, erik@openisis.org
6 This library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
11 This library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public
17 License along with this library; if not, write to the Free Software
18 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 see README for more information
24 $Id: lfmt.c,v 1.14 2003/04/08 00:20:52 kripke Exp $
25 implementation of record cooking.
28 #include <string.h> /* memset et al */
33 #define LFMT_DBG LOG_WARN /* level of debugging */
35 /* ************************************************************
38 typedef struct { /* function definition */
39 const char *name; /* hmmm ... fixed char[] probably faster */
46 LFMT_MH, /* heading */
50 typedef struct LfmtIter_ { /* value iterator */
51 const unsigned char *def; /* position in format where iter was opened. */
52 const unsigned char *end; /* position in format where iter was closed. */
53 int pos; /* next field pos */
54 int off; /* offset to start search for next subfield */
55 int occ; /* next occ */
59 char submode; /* primary loop (counted by occ) is over subfields. */
60 int from; /* primary range. */
62 int sfrom; /* additional subfield range. */
68 typedef struct LfmtRec_ { /* record context -- pushed by the r in REF */
69 struct LfmtRec_ *prev;
71 int frame; /* stack frame associated with this record */
72 const unsigned char *loop; /* position in format where loop was opened. */
73 LfmtIter iter[LFMT_NITER]; /* iterators in loop */
74 int occ; /* loop occ */
75 unsigned char i; /* iterator count */
76 char more; /* more flag */
77 char had; /* had flag */
78 char sub; /* database's subfield separator, typically ^ or $ */
81 typedef struct LfmtFmt_ { /* format context -- pushed by @ */
82 struct LfmtFmt_ *prev;
83 const unsigned char *f;
84 const unsigned char *e; /* end -- just for paranoia check */
86 int upcase; /* uppercase flag */
90 typedef struct { /* other type */
91 const LfmtToken *tok; /* recognized token */
92 int data; /* start of params in output */
93 int used; /* used len of output up to our params */
94 char pos; /* position in signature */
95 char want; /* currently expected type */
96 char expl; /* explicit flag */
97 char rec; /* record flag */
102 list of special token tags after the format tags 1 .. 999.
103 unlike formatting tokens, which just emit a field with their negative tag,
104 the literal's tag is reset to 0,
105 and all other tokens emit fields with tags based on their parameters.
107 tokens are organized in groups of (up to) 16.
108 the group of an operator determines it's precedence.
110 #define LFMT_DEFGROUP(i) ((i) << 4) /* * 16 */
111 #define LFMT_GROUP(i) ((i) >> 4)
115 LFMT_LITERAL = 63, /* literals group starting from 63*16 = 1008 */
116 /* syntactical groups starting from 64*16 = 0x400 */
118 LFMT_VALUES = 96, /* starting from 96*16 = 0x600 */
120 LFMT_OPITER, /* iterator operators */
121 LFMT_OPSTR, /* string operators */
122 LFMT_OPMUL, /* multiplicative operators */
123 LFMT_OPADD, /* additive operators */
124 LFMT_OPREL, /* relational operators */
125 LFMT_OPBOOL, /* boolean operators */
126 LFMT_OPASSIGN, /* assignment */
127 LFMT_STOPPER = 128, /* starting from 128*16 = 0x800 */
133 LFMT_LIT = LFMT_DEFGROUP( LFMT_LITERAL ),
137 LFMT_BARE, /* field of bareword C&A literal */
138 LFMT_NUMBER, /* standalone numeric literal */
140 LFMT_OPEN = LFMT_DEFGROUP( LFMT_FUNCTIONS ),
155 LFMT_SI = LFMT_DEFGROUP( LFMT_VALUES ),
158 LFMT_V = LFMT_DEFGROUP( LFMT_ITERATORS ),
161 /* operator groups by decreasing precedence */
162 LFMT_INDEX = LFMT_DEFGROUP( LFMT_OPITER ),
164 LFMT_STAR = LFMT_DEFGROUP( LFMT_OPSTR ),
165 LFMT_DOT, LFMT_INDENT,
166 LFMT_MUL = LFMT_DEFGROUP( LFMT_OPMUL ),
168 LFMT_PLUS = LFMT_DEFGROUP( LFMT_OPADD ),
170 LFMT_EQ = LFMT_DEFGROUP( LFMT_OPREL ),
171 LFMT_NE, LFMT_LT, LFMT_LE, LFMT_GT, LFMT_GE, LFMT_CT,
172 LFMT_AND = LFMT_DEFGROUP( LFMT_OPBOOL ),
174 LFMT_ASSIGN = LFMT_DEFGROUP( LFMT_OPASSIGN ),
176 LFMT_COMMA = LFMT_DEFGROUP( LFMT_STOPPER ),
180 LFMT_BLANK = LFMT_DEFGROUP( LFMT_SYNTAX ),
184 LFMT_IF, LFMT_THEN, LFMT_ELSE, LFMT_FI,
185 LFMT_SELECT, LFMT_CASE, LFMT_ELSECASE, LFMT_ENDSEL,
190 while fields emitted by formatting and literal tokens
191 have tags between -1 and -1023 (0xffffffff to 0xfffffc01)
192 with all high bits set, numerics have lowest negative values
193 ranging from 0x80000000 to 0xbfffffff (-2147483648 to -1073741825).
194 That means the highest two bits are 1 and 0.
195 All numeric fields have a val of NULL and use len to hold a number.
196 Besides pure int or fixed-point numbers,
197 there are other numerical types:
198 next two bits may indicate a range or field selector.
199 The lower 3 bytes may give fractional part or range end.
201 #define LFMT_NMASK 0xf0000000L /* highest bits 1 and 0 mark a number */
202 #define LFMT_NUM 0x80000000L /* basic number */
203 #define LFMT_ITR 0xa0000000L /* with iterator bit */
204 #define LFMT_VAL 0x00ffffffL /* mask for 3 bytes additional info */
206 #define LFMT_ISNUM(t) (LFMT_NUM == (LFMT_NMASK & (t)))
208 #define LFMT_ISINT(t) (LFMT_NUM == (t))
210 #define LFMT_ISITR(t) (LFMT_ITR == (LFMT_NMASK & (t)))
212 /* ************************************************************
215 static const LfmtToken number = { "", "_1i", LFMT_NUMBER };
216 /* while a numeric *field* has large LFMT_NUM tag,
217 the anonymous number *token* has a literal tag,
218 so we don't take it as high precedence operator.
220 static const LfmtToken blank = { "_", "_0o_", LFMT_BLANK };
221 static const LfmtToken loop = { "loop", "_0o_", LFMT_LOOP };
224 32 = 33 = ! 34 = " 35 = # 36 = $ 37 = % 38 = & 39 = '
225 40 = ( 41 = ) 42 = * 43 = + 44 = , 45 = - 46 = . 47 = /
226 48 = 0 49 = 1 50 = 2 51 = 3 52 = 4 53 = 5 54 = 6 55 = 7
227 56 = 8 57 = 9 58 = : 59 = ; 60 = < 61 = = 62 = > 63 = ?
228 64 = @ 65 = A 66 = B 67 = C 68 = D 69 = E 70 = F 71 = G
229 72 = H 73 = I 74 = J 75 = K 76 = L 77 = M 78 = N 79 = O
230 80 = P 81 = Q 82 = R 83 = S 84 = T 85 = U 86 = V 87 = W
231 88 = X 89 = Y 90 = Z 91 = [ 92 = \ 93 = ] 94 = ^ 95 = _
232 96 = ` 97 = a 98 = b 99 = c 100 = d 101 = e 102 = f 103 = g
233 104 = h 105 = i 106 = j 107 = k 108 = l 109 = m 110 = n 111 = o
234 112 = p 113 = q 114 = r 115 = s 116 = t 117 = u 118 = v 119 = w
235 120 = x 121 = y 122 = z 123 = { 124 = | 125 = } 126 = ~
237 /* stops dictionary search */
238 static const char sentinel[] = "\x7f";
239 #define ENDDICT { sentinel, NULL, 0 }
241 static const LfmtToken dictSpecial[] = {
242 { "", "_0", 0 }, /* eof */
243 { "!", "_1x", OPENISIS_FMT_ESC },
244 { "\"", "_1x", LFMT_COND },
245 { "#", "_0", OPENISIS_FMT_SHARP },
246 /* { "$", "_0", LFMT_LIT }, */
247 { "%", "_0", OPENISIS_FMT_PERCENT },
248 { "&", "_1as_", LFMT_AMP },
249 { "'", "_1x", LFMT_LIT },
250 { "(", "_0", LFMT_OPEN },
251 { ")", "_0", LFMT_CLOSE },
252 { "*", "n1n", LFMT_MUL },
253 { "+", "n1n", LFMT_PLUS },
254 { ",", "_0", LFMT_COMMA },
255 { "-", "n1n", LFMT_MINUS },
256 { ".", "s1i", LFMT_DOT },
257 { "..","_0", LFMT_RANGE },
258 { "/", "n1n", LFMT_DIV }, /* alias { "/", "_0", OPENISIS_FMT_SLASH }, */
259 { "/*","_1x", LFMT_COMM },
261 { ":", "s1s", LFMT_CT },
262 { ":=","v1f", LFMT_ASSIGN },
263 /* { ";", "b_", LFMT_LIT }, */
264 { "<", "n1n", LFMT_LT },
265 { "<=","n1n", LFMT_LE },
266 { "<>","n1n", LFMT_NE },
267 { "=", "n1n", LFMT_EQ },
268 { ">", "n1n", LFMT_GT },
269 { ">=","n1n", LFMT_GE },
270 /* { "?", "b_", LFMT_LIT }, */
271 { "@", "_1a", LFMT_AT },
273 { "[", "v1nn", LFMT_INDEX },
274 /* { "\\", "_0", LFMT_LIT }, */
275 { "]", "_0", LFMT_CLOSE },
276 { "^", "v1c", LFMT_SUBFIELD },
277 /* { "_", "_0", LFMT_LIT }, */
278 /* { "`", "_0", LFMT_LIT }, */
280 { "{", "_0", OPENISIS_FMT_OPEN },
281 { "|", "_1x", LFMT_REP },
282 { "}", "_0", OPENISIS_FMT_CLOSE },
283 { "~", "_1n", LFMT_NOT },
285 static const LfmtToken dictA[] = { ENDDICT };
286 static const LfmtToken dictB[] = {
287 { "B", "_0", OPENISIS_FMT_B },
289 static const LfmtToken dictC[] = {
290 { "C", "_1i", OPENISIS_FMT_C },
292 static const LfmtToken dictD[] = {
293 { "D", "_1i", LFMT_D },
295 static const LfmtToken dictE[] = {
296 { "E", "_1i", LFMT_EI },
297 { "ELSE", "_1s_", LFMT_ELSE },
299 static const LfmtToken dictF[] = {
300 /* { "F", "_1nii", LFMT_F }, */
301 { "F", "_1i", OPENISIS_FMT_F },
303 static const LfmtToken dictG[] = { ENDDICT };
304 static const LfmtToken dictH[] = { ENDDICT };
305 static const LfmtToken dictI[] = {
306 { "I", "_0", OPENISIS_FMT_I },
307 { "IF", "_1n", LFMT_IF },
309 static const LfmtToken dictJ[] = { ENDDICT };
310 static const LfmtToken dictK[] = { ENDDICT };
311 static const LfmtToken dictL[] = {
312 { "LINK", "_1s_", OPENISIS_FMT_LINK },
314 static const LfmtToken dictM[] = {
315 { "M", "_1ii", OPENISIS_FMT_M },
316 { "MDL", "_0", LFMT_MDL },
317 { "MDU", "_0", LFMT_MDU },
318 { "MHL", "_0", LFMT_MHL },
319 { "MHU", "_0", LFMT_MHU },
320 { "MPL", "_0", LFMT_MPL },
321 { "MPU", "_0", LFMT_MPU },
323 static const LfmtToken dictN[] = {
324 { "N", "_1i", LFMT_N },
325 { "NC", "_0i", OPENISIS_FMT_NC },
327 static const LfmtToken dictO[] = { ENDDICT };
328 static const LfmtToken dictP[] = {
329 { "PICT", "_1s", OPENISIS_FMT_PICT },
331 static const LfmtToken dictQ[] = {
332 { "QC", "_0", OPENISIS_FMT_QC },
334 static const LfmtToken dictR[] = {
335 { "REF", "_2rs_", LFMT_REF },
337 static const LfmtToken dictS[] = {
338 { "S", "_1s_", LFMT_S },
340 static const LfmtToken dictT[] = {
341 { "TAB", "_0i", OPENISIS_FMT_TAB },
343 static const LfmtToken dictU[] = {
344 { "UL", "_0", OPENISIS_FMT_UL },
346 static const LfmtToken dictV[] = {
347 { "V", "_1i", LFMT_V },
349 static const LfmtToken dictW[] = { ENDDICT };
350 static const LfmtToken dictX[] = {
351 { "X", "_0i", OPENISIS_FMT_X },
353 static const LfmtToken dictY[] = { ENDDICT };
354 static const LfmtToken dictZ[] = {
355 { "Z", "_1is", LFMT_Z },
358 static const LfmtToken * const dictAZ[26] = {
359 dictA, dictB, dictC, dictD, dictE, dictF, dictG, dictH, dictI,
360 dictJ, dictK, dictL, dictM, dictN, dictO, dictP, dictQ, dictR,
361 dictS, dictT, dictU, dictV, dictW, dictX, dictY, dictZ
365 /* character conversion */
366 static const unsigned char upcase[256] = {
367 0,' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',
368 ' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',
369 ' ','!','"','#','$','%','&','\'', '(',')','*','+',',','-','.','/',
370 '0','1','2','3','4','5','6','7', '8','9',':',';','<','=','>','?',
371 '@','A','B','C','D','E','F','G','H','I','J','K','L','M','N','O',
372 'P','Q','R','S','T','U','V','W','X','Y','Z','[','\\',']','^','_',
373 '`','A','B','C','D','E','F','G','H','I','J','K','L','M','N','O',
374 'P','Q','R','S','T','U','V','W','X','Y','Z','{','|','}','~',' ',
375 '?','?','?','?','?','?','?','?','?','?','?','?','?','?','?','?',
376 '?','?','?','?','?','?','?','?','?','?','?','?','?','?','?','?',
377 '?','?','?','?','?','?','?','?','?','?','?','?','?','?','?','?',
378 '?','?','?','?','?','?','?','?','?','?','?','?','?','?','?','?',
379 '?','?','?','?','?','?','?','?','?','?','?','?','?','?','?','?',
380 '?','?','?','?','?','?','?','?','?','?','?','?','?','?','?','?',
381 '?','?','?','?','?','?','?','?','?','?','?','?','?','?','?','?',
382 '?','?','?','?','?','?','?','?','?','?','?','?','?','?','?','?'
386 DIG=0x01, /* digit */
387 ALP=0x02, /* ascii alphabetic */
388 IDE=0x04, /* other identifier character */
389 LIT=0x10, /* as signature type, eat literal (a,c,i,x) */
393 static const unsigned char ctype[256] = {
394 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
395 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
396 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
397 DIG,DIG,DIG,DIG,DIG,DIG,DIG,DIG,DIG,DIG, 0, 0, 0, 0, 0, 0,
398 /*'@','A','B','C','D','E','F','G','H','I','J','K','L','M','N','O',*/
399 0,ALP,ALP,ALP,ALP,ALP,ALP,ALP,ALP,ALP,ALP,ALP,ALP,ALP,ALP,ALP,
400 /*'P','Q','R','S','T','U','V','W','X','Y','Z','[','\\',']','^','_',*/
401 ALP,ALP,ALP,ALP,ALP,ALP,ALP,ALP,ALP,ALP,ALP, 0, 0, 0, 0,IDE,
402 0,ALT,ALP,ALT,ALP,ALP,ALP,ALP,ALP,ALT,ALP,ALP,ALP,ALP,ALP,ALP,
403 ALP,ALP,ALP,ALP,ALP,ALP,ALP,ALP,ALT,ALP,ALP, 0, 0, 0, 0, 0,
404 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
405 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
406 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
407 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
408 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
409 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
410 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
411 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
415 /* ************************************************************
418 /* don't rely on braindead ctype.h, it may use locale */
419 #define ISALPHA(u) ( ALP & ctype[(unsigned char)(u)])
420 #define ISDIGIT(u) ( DIG & ctype[(unsigned char)(u)])
421 #define ISALNUM(u) ((ALP|DIG) & ctype[(unsigned char)(u)])
422 #define ISIDENT(u) ((ALP|DIG|IDE) & ctype[(unsigned char)(u)])
423 #define ISLITER(u) ( LIT & ctype[(unsigned char)(u)])
425 /* ************************************************************
428 /* ************************************************************
432 Rec* rFmt ( Rec *buf, const char *fmt, Rec *irec )
436 #define ADD( ntag, s, n ) do { \
437 RADD( o, ntag, s, n, o!=buf ); \
438 if ( NULL == o ) goto outofmem; \
440 #define CAT( s, n ) do { \
441 RCAT( o, s, n, o!=buf ); \
442 if ( NULL == o ) goto outofmem; \
445 #define FADD( f ) ADD( f.tag, f.val, f.len )
447 #define SADD( ntag, s ) ADD( ntag, s, strlen(s) )
448 #define SCAT( s ) CAT( s, strlen(s) )
449 #define SERR( err ) do { \
450 o = rMsg( o, o!=buf, -LOG_ERROR, \
451 "@%d: %s", fp - f->f, err ); \
452 if ( NULL == o ) goto outofmem; \
454 #define SERR1( fmt, a ) do { \
455 o = rMsg( o, o!=buf, -LOG_ERROR, \
456 "@%d: " fmt, fp - f->f, a ); \
457 if ( NULL == o ) goto outofmem; \
459 #define SERR2( fmt, a, b ) do { \
460 o = rMsg( o, o!=buf, -LOG_ERROR, \
461 "@%d: " fmt, fp - f->f, a, b ); \
462 if ( NULL == o ) goto outofmem; \
465 #define NADD( i, d ) do { \
466 ADD( LFMT_NUM | (d), NULL, 0 ); \
467 o->field[o->len - 1].val = NULL; \
468 o->field[o->len - 1].len = i; \
471 LfmtFrame stack[128], *s = stack; /* current frame (always == stack+sp) */
473 /* push new frame for token t */
474 #define PUSHFRAME( t ) do { \
475 if ( ++sp == sizeof(stack)/sizeof(stack[0]) ) goto overflow; \
477 memset( s, 0, sizeof(*s) ); \
481 s->want = (t)->sign[2]; \
484 LfmtFmt fmt0, *f = NULL; /* default and current format */
485 const unsigned char *fp = NULL; /* format pointer */
486 #define PUSHFORMAT( pfmt, fstr ) do { \
487 memset( pfmt, 0, sizeof(*(pfmt)) ); \
490 fp = f->f = (const unsigned char *)fstr; \
491 f->e = fp + strlen(fstr); \
494 LfmtRec rec0, *r = NULL; /* default and current record */
495 #define PUSHRECORD( prec, nrec ) do { \
496 memset( prec, 0, sizeof(*(prec)) ); \
504 int tlen = 0; /* token length */
509 PUSHFORMAT( &fmt0, fmt );
510 PUSHRECORD( &rec0, irec );
512 o->dbid = irec->dbid ;
513 o->rowid = irec->rowid;
516 for ( ;; fp += tlen ) {
517 const LfmtToken *tok = NULL;
520 int start = s->data + s->pos; /* start of current param in output */
521 int flds = o->len - start; /* # fields for current param */
522 int vpos = 0; /* pos in dict where we had match of tlen chars */
523 int dpos = 0; /* pos in dict */
525 unsigned char eatlit = 0;
526 const LfmtToken *dict;
528 CLOSE_OP, /* don't close anything, operator wants field */
529 CLOSE_VAR, /* close variables only */
530 CLOSE_FIELD, /* coerce field only */
531 CLOSE_PARAM, /* compact param only (eat comma) */
532 CLOSE_EXPL, /* close frame explicitly (eat closing token) */
533 CLOSE_IMPL /* close frame implicitly, continue on token */
535 static const char *closename[] = {
536 "", "var", "field", "param", "expl", "impl" };
539 tlen = 0; /* verified length */
543 "frame %s%c%s id %d pos %d got %d want %c%s",
544 s->tok->name, s->expl ? '(' : ' ', s->tok->sign, s->tok->tag,
545 s->pos, o->len - s->data, s->want, flds ? " (have)" : "" );
549 while ( ' ' == (u = upcase[*fp]) ) /* eat whitespace */
553 dict = dictAZ[u-'A'];
554 if ( sentinel != dict->name )
555 u = upcase[ fp[tlen = 1] ];
556 } else if ( ISDIGIT( *fp ) ) {
557 tok = &number; /* read numeric literal */
562 /* tok is the last candidate that matched up to IT'S length */
565 LOG_DBG( LOG_TRACE, "trying token '%s'", dict[dpos].name );
566 if ( tlen && vpos != dpos
567 && strncmp( dict[dpos].name, dict[vpos].name, tlen ) )
568 break; /* ran out of verified length */
569 if ( u == dict[dpos].name[tlen] )
571 while ( (test = dict[dpos].name[tlen]) && u == test )
572 /* test this entry -- same pos, next char */
573 u = upcase[ fp[++tlen] ];
574 if ( !test ) /* dict entry ends here -- a hit so far */
577 LOG_DBG( LFMT_DBG, "\thit on token %s(%s) len %d",
578 tok->name, tok->sign, tlen );
580 if ( u < test || !u ) /* too large - bail out*/
584 if ( NULL != tok ) { /* had some match */
585 if ( tok < &dict[vpos] ) /* had later == longer match */
587 else if ( dict != dictSpecial ) {
588 /* alpha-token must not be followed by alpha */
589 assert( tlen == (int)strlen(tok->name) );
590 if ( ISALPHA( u ) && ISALPHA( tok->name[tlen-1] ) )
595 if ( NULL == tok /* no hit in alphadict */
596 || (dictSpecial == tok && *fp) /* false eof hit in special dict */
598 SERR2( "unrecognized token '%.*s'", tlen+1, fp );
602 if ( ISLITER( tok->sign[2] ) ) /* eat literals */
603 switch ( tok->sign[2] ) { /* c, a, i, x */
605 if ( (lit.len = (eatlit = fp[tlen]) ? 1 : 0 ) ) {
606 lit.tag = -LFMT_BARE;
607 lit.val = (char *)fp + tlen;
612 if ( (eatlit = ISIDENT( fp[tlen] )) ) {
613 lit.tag = -LFMT_BARE;
614 lit.val = (char *)fp + tlen;
616 while ( ISIDENT( fp[tlen] ) ) {
626 if ( (eatlit = ISDIGIT( u = fp[tlen] )) ) {
628 while ( u && 10 > (u -= '0') ) {
629 lit.len = 10*lit.len + u;
633 /* decimal digits on standalone numeric literal */
635 && (unsigned char)('.'-'0') == u
636 && '.' != fp[tlen+1] /* avoid eating 1..2 */
640 while ( (u = fp[++tlen]) && 10 > (u -= '0') )
645 while ( 6 < dec++ ) v *= 10;
646 lit.tag = LFMT_NUM | (v & LFMT_VAL);
650 switch ( tok->tag ) {
654 case OPENISIS_FMT_ESC:
661 lit.tag = - tok->tag;
662 lit.val = (char *)fp+1;
663 while ( (u = fp[tlen++])
664 && (eatlit != u || (LFMT_COMM==tok->tag && '/' != fp[tlen]))
667 SERR1( "unterminated %s-literal", tok->name );
671 if ( LFMT_COMM == tok->tag ) {
677 if ( tok->sign[2] ) { /* token wants params */
678 if ( (!eatlit || tok->sign[3]) && (expl = '(' == fp[tlen]) )
684 "at %d: '%.*s' %s(%s) id %d (%x) group %d pos %d%c of %s%c%s",
685 fp - f->f, tlen > 10 ? 10 : tlen, fp,
686 tok->name, tok->sign, tok->tag, tok->tag, LFMT_GROUP(tok->tag),
687 s->pos, s->want, s->tok->name, s->expl?'(':' ', s->tok->sign
690 /* close what needs to be closed */
692 int group = LFMT_GROUP( tok->tag );
693 int dflt, wantnum, gotnum;
697 if ( '_' != tok->sign[0] /* token is operator ... */
698 && (s->expl /* ... within explicit frame or ... */
699 || group < LFMT_GROUP( s->tok->tag )) /* ... of higher prec */
700 ) /* we take it -- even as a variable ? */
701 close = 'v' == tok->sign[0] ? CLOSE_OP : CLOSE_VAR;
702 else switch ( tok->tag ) {
712 if ( CLOSE_VAR > close ) /* leave variable to operator */
713 break; /* close loop */
715 if ( flds && LFMT_ISITR( o->field[ o->len-1 ].tag ) ) {
716 /* dereference iterator variable */
717 LfmtIter * iter = r->iter + r->i;
720 iter->from = 1; /* field defaults to 1..0 (all) */
722 iter->sfrom = iter->sto = 1; /* subfield defaults to 1..1 (1st) */
725 LOG_DBG( LFMT_DBG, "iterator %d/%d V%d[%d..%d]^%c[%d..%d]%s",
726 o->field[ o->len-1 ].len, r->occ,
727 iter->tag, iter->from, iter->to,
728 iter->sub ? iter->sub : ' ', iter->sfrom, iter->sto,
729 iter->submode ? " subfield mode" : ""
732 o->len--; /* kill the variable. */
733 /* move to next legal position.
734 In standard mode, where we advance one field occurence at a time,
735 this is just the occ >= from.
736 A legal position may still emit no field,
737 if a selected subfield is not available in the current field occ.
738 In subfield mode, we may have to advance 0, 1 or several field
739 occurences to find next occ of subfield.
744 if ( 0 > iter->occ ) /* we were already done */
746 /* if have legal occurence, ADD it */
747 if ( iter->occ >= iter->from ) for ( socc=1;; socc++ ) {
748 /* not initialization pass. */
749 Field *v = r->r->field + iter->pos;
750 const char *src = v->val;
752 if ( iter->sub ) { /* find subfield */
753 const char *p = src + iter->off, *e = src+len;
757 || (iter->sub != *p++ && iter->sub != '*')
763 if ( iter->sfrom && socc < iter->sfrom )
765 if ( iter->sto && socc > iter->sto )
768 while ( p < e && r->sub != *p )
770 iter->off += len = p - src;
772 /* make sure there's enough room, +3 for DATA mode */
773 ADD( v->tag, NULL, len+3 );
777 const char *e = src + len;
778 char *dst = (char *)o->field[ o->len - 1 ].val;
781 for ( src++; src < e; *dst++ = *src++ )
782 if ( '=' == *src || '>' == *src ) {
783 while ( '>' != *src++ && src < e )
785 if ( src < e && '<' == *src ) { /* have >< */
791 } else if ( r->sub == *src ) {
794 if ( dst == o->field[ o->len - 1 ].val )
796 else if ( 'a' == *src )
798 else if ( 'j' > *src )
807 o->field[ o->len - 1 ].len =
808 dst - o->field[ o->len - 1 ].val;
809 if ( LFMT_MD == f->mode )
813 char *p = (char *)o->field[ o->len - 1 ].val;
814 char *e = p + o->field[ o->len - 1 ].len;
816 *p = upcase[(unsigned char)*p];
819 if ( iter->submode || ! iter->sub )
822 if ( iter->submode ) {
823 if ( iter->occ && iter->occ < iter->from ) /* was skipped */
825 for ( ; iter->pos < r->r->len; iter->pos++, iter->off = 0 )
826 if ( iter->tag == r->r->field[ iter->pos ].tag || ! iter->tag ) {
827 Field *v = r->r->field + iter->pos;
828 const char *p = v->val + iter->off, *e = v->val + v->len;
832 || (iter->sub != *p++ && iter->sub != '*')
835 if ( p < e ) { /* hit */
836 iter->off = (p - v->val) - 2;
843 while ( iter->pos < r->r->len
845 && iter->tag != r->r->field[ iter->pos ].tag
851 if ( iter->pos >= r->r->len /* end of record */
852 || (iter->to && iter->to < iter->occ) /* end of selected occ */
857 } while ( iter->occ <= iter->from );
859 if ( 0 < iter->occ ) /* we have a next occurence */
861 if ( ! r->had && flds && -LFMT_REP == o->field[ o->len - 1 ].tag ) {
864 o->used -= o->field[o->len].len;
866 r->i++; /* advance to next iterator */
867 } /* dereference iterator variable */
869 if ( CLOSE_FIELD > close ) /* leave field to operator */
870 break; /* close loop */
872 if ( ! flds ) { /* had no field */
873 if ( CLOSE_PARAM > close ) /* nothing to do */
874 break; /* close loop */
875 } else if ( ! s->expl /* frame is implicit ... */
876 && ( LFMT_LOOP != s->tok->tag /* ... and not a loop ... */
877 || CLOSE_EXPL == close /* ... or we saw a hard closer ... */
878 || (r->i && CLOSE_PARAM == close)
879 /* ... or a loop after 1st iterator on param closer */
881 ) /** upgrade the closing mode to IMPL */
884 /* close the field, i.e. coerce it */
885 dflt = !flds /* param was not given ... */
886 && (s->pos /* but was started explicitly by a comma like F( 3, ) */
887 || CLOSE_PARAM == close /* or is closed explicitly like F( ,3 ) */
889 wantnum = 'i' == s->want || 'n' == s->want;
890 gotnum = flds && LFMT_ISNUM( o->field[ o->len-1 ].tag );
893 "\tclose %s %d%c pos %d%c of %s%c%s id %x",
895 flds, flds ? gotnum ? 'n' : 's' : dflt ? 'd' : '-', s->pos, s->want,
896 s->tok->name, s->expl?'(':' ', s->tok->sign, s->tok->tag );
898 /* close and fix last field */
899 if ( gotnum && !wantnum ) {
900 /* coerce number to string, so it may get concatenated */
902 int ll = lprint( b, o->field[ o->len-1 ].len );
909 if ( CLOSE_PARAM > close )
910 break; /* close loop */
912 /* close and fix param */
914 if ( dflt ) { /* default 0 */
917 } else if ( 1 < flds ) {
918 SERR( "multiple fields for numerical param" );
920 } else if ( flds && !gotnum ) {
921 SERR( "expected number" );
924 if ( 'i' == s->want ) /* kill decimal */
925 o->field[s->data].tag &= ~LFMT_VAL;
930 } else if ( gotnum ) {
931 SERR( "expected string" ); /* shouldn't happen !? */
933 } else if ( 1 < flds /* concat strings ... */
934 && ! s->rec /* ... unless in record mode frame (blank,loop,ref) */
937 for ( i = start+1; i < o->len; i++ )
938 if ( o->field[i].val )
939 o->field[ start ].len += o->field[ i ].len;
944 /* done fixing param */
946 if ( CLOSE_IMPL > close ) {
947 tok = NULL; /* eat token */
948 if ( CLOSE_EXPL > close ) { /* i.e. CLOSE_PARAM */
949 if ( flds ) { /* increment param pos, set next wanted type */
950 int sl = strlen( s->tok->sign ); /* frame takes sl-2 params */
951 if ( ++(s->pos) < sl-2 ) {
952 if ( '_' != s->tok->sign[ 2 + s->pos ] ) /* repeated param */
953 s->want = s->tok->sign[ 2 + s->pos ]; /* else keep last val */
954 } else if ( '_' != s->tok->sign[sl-1] ) { /* too much */
955 SERR2( "expected at most %d params for '%s'",
956 sl-2, s->tok->name );
960 break; /* close loop */
964 /* done with token -- close and fix frame */
966 #define KILLARGS() do { \
971 #define KILLOP() do { \
972 o->len = s->data - 1; \
973 o->used = s->used - (o->field[o->len].val ? o->field[o->len].len : 0); \
976 switch ( s->tok->tag ) {
977 case LFMT_PLUS: /* arithmetic */
981 lll a = o->field[s->data-1].len;
982 lll b = o->field[s->data].len;
983 /* LOG_DBG( LFMT_DBG, "\tarith on %Ld %Ld", a, b ); */
984 switch ( s->tok->tag ) {
985 case LFMT_PLUS: a += b; break;
986 case LFMT_MINUS: a -= b; break;
987 case LFMT_MUL: a *= b; break;
988 case LFMT_DIV: if ( 0 != b ) a /= b; break;
995 int ll = lprint( b, o->field[ s->data ].len );
1000 r->iter[ r->i ].tag = o->field[ s->data ].len;
1001 /* set iterator field */
1002 o->field[ s->data ].tag = LFMT_ITR | r->iter[ r->i ].tag;
1003 o->field[ s->data ].len = r->i;
1006 LfmtIter * iter = r->iter + r->i;
1007 if ( 1 != r->occ ) {
1008 SERR( "OOPS! index not within loop" );
1011 if ( ! iter->sub || (iter->submode = !iter->from) ) {
1013 if ( !(iter->from = (int) o->field[ s->data ].len) )
1015 iter->to = s->pos ? (int) o->field[ s->data+1 ].len : iter->from;
1017 /* additional subfield loop */
1018 if ( !(iter->sfrom = (int) o->field[ s->data ].len) )
1020 iter->sto = s->pos ? (int) o->field[ s->data+1 ].len : iter->sfrom;
1022 iter->end = fp + tlen;
1024 } break; /* case LFMT_INDEX: */
1026 if ( 1 != r->occ ) {
1027 SERR( "OOPS! subfield not within loop" );
1030 r->iter[ r->i ].sub = o->field[ s->data ].val[0];
1031 r->iter[ r->i ].end = fp + tlen;
1036 fp = r->loop; /* start over */
1037 tlen = 0; /* care for += tlen */
1048 /* close frame, unless it's the outmost */
1052 /* TODO: close loop ? */
1055 start = s->data + s->pos; /* start of current param in output */
1056 flds = o->len - start; /* # fields for current param */
1058 } while ( CLOSE_IMPL == close );
1067 if ( NULL == r->loop ) /* open loop ? */
1068 switch ( LFMT_GROUP(tok->tag) ) {
1069 case LFMT_FUNCTIONS:
1070 if ( LFMT_OPEN != tok->tag )
1072 goto openloop; /* else should do, but gcc doesn't grok it right */
1074 if ( LFMT_COND != tok->tag && LFMT_REP != tok->tag )
1076 case LFMT_ITERATORS:
1078 LOG_DBG( LFMT_DBG, "\topening loop on %s", tok->name );
1080 s->rec = stack[sp-1].rec; /* inherit record mode */
1086 if ( (s->expl = (LFMT_OPEN == tok->tag)) ) {
1092 if ( tok->sign[2] && LFMT_LITERAL != LFMT_GROUP(tok->tag) ) {
1093 /* token opens standard frame */
1094 if ( LFMT_ITERATORS == LFMT_GROUP(tok->tag) ) {
1095 if ( 1 != r->occ ) { /* not first run */
1096 if ( r->iter[r->i].def != fp ) {
1097 SERR1( "internal error at %d. iterator", r->i + 1 );
1100 NADD( r->i, LFMT_ITR | r->iter[r->i].tag ); /* push iterator */
1101 fp = r->iter[r->i].end; /* skip parsing */
1102 tlen = 0; /* care for += tlen */
1103 continue; /* next token */
1105 if ( LFMT_NITER-1 == r->i ) {
1106 SERR1( "maximum number of iterators %d exceeded", LFMT_NITER );
1109 memset( r->iter + r->i, 0, sizeof(r->iter[0]) );
1110 r->iter[r->i].def = fp;
1111 r->iter[r->i].end = fp + tlen;
1114 switch ( tok->tag ) { /* special treatment */
1122 LOG_DBG( LFMT_DBG, "\topen %s%c%s id %x",
1123 s->tok->name, s->expl?'(':' ', s->tok->sign, s->tok->tag );
1126 if ( lit.tag ) { /* a literal */
1127 if ( NULL != lit.val ) { /* string literal */
1128 switch ( lit.tag ) {
1130 if ( r->i ? !r->more : r->occ > 1 )
1134 if ( r->i && !r->had )
1138 ADD( lit.tag, lit.val, lit.len );
1139 LOG_DBG( LFMT_DBG, "\tlit %s%.*s id %x",
1140 tok->name, lit.len, lit.val, tok->tag );
1141 } else { /* numeric literal */
1142 NADD( lit.len, lit.tag );
1143 LOG_DBG( LFMT_DBG, "\tlit %d", lit.len );
1145 /* new implicit frame immediatly closed by literal ?
1146 problem are implicit loops
1147 if ( o->len == s->data + 1 && ! s->expl ) {
1152 } else switch ( tok->tag ) { /* other special tokens */
1153 case LFMT_MDL: f->mode = LFMT_MD; f->upcase = 0; break;
1154 case LFMT_MDU: f->mode = LFMT_MD; f->upcase = 1; break;
1155 case LFMT_MHL: f->mode = LFMT_MH; f->upcase = 0; break;
1156 case LFMT_MHU: f->mode = LFMT_MH; f->upcase = 1; break;
1157 case LFMT_MPL: f->mode = LFMT_MP; f->upcase = 0; break;
1158 case LFMT_MPU: f->mode = LFMT_MP; f->upcase = 1; break;
1171 if ( NULL == o->field[i].val ) {
1173 } else if ( -LFMT_LIT >= o->field[i].tag )
1174 o->field[i].tag = 0;