2 openisis - an open implementation of the CDS/ISIS database
3 Version 0.8.x (patchlevel see file Version)
4 Copyright (C) 2001-2003 by Erik Grziwotz, erik@openisis.org
6 This library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
11 This library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public
17 License along with this library; if not, write to the Free Software
18 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 see README for more information
25 $Id: lcs.h,v 1.3 2003/04/08 00:20:52 kripke Exp $
30 LCS_SINGLE, /* some ASCII-compliant single byte charset */
31 LCS_UTF8, /* unicode in 8bit transfer encoding */
32 LCS_UCS2 /* the unicode base multilingual plane (BMP, first 64K chars)
33 in 2byte encoding, native (platform) byte order */
37 LCS_LATIN1, /* the ISO-8859-1 (Latin 1) charset */
38 LCS_CP850, /* MS-DOS Latin 1, known as the "OEM" charset */
39 LCS__SBCS /* # of single byte charsets */
44 LCS_CTYPE, /* flags see below */
45 LCS_UNICO, /* 128 unicodes */
46 LCS_UCASE, /* each characters uppercase code */
47 LCS_INDEX, /* single byte recoding for index ("strxfrm") */
48 LCS_XEDNI, /* inverse */
49 LCS_RECOD, /* single byte recoding to extern (typically Latin1) */
50 LCS_DOCER, /* inverse */
55 /* some conversion table on a single byte charset
56 c may hold an actab (!0 for any character deemed "alpha"),
57 uctab ("uppercase" replacement code, typically removing diacriticals),
58 code-to-code conversion or other.
59 u may hold unicode values for the upper half of an
60 ASCII compatible single byte charset.
65 unsigned short u[128];
69 /* controls (0..31,127) */
70 LCS_R = 0x00, /* record separator (FF,GS), */
71 LCS_F = 0x10, /* field separator (NUL,CR,LF,RS) */
72 LCS_C = 0x28, /* other control */
73 /* other separators */
74 LCS_S = 0x48, /* space(blank,nbsp), */
75 LCS_T = 0x68, /* other token separator (,:;=), */
76 /* other punctuation assumed "word" characters */
78 LCS_Y = 0xb0, /* symbol */
80 LCS_I = 0xc0, /* C-ident (_). */
81 LCS_D = 0xd0, /* digits ('0'..'9') */
82 LCS_A = 0xe0, /* ASCII alpha */
83 LCS_L = 0xe4, /* ... among those formatting literals a,c,i,x */
84 LCS_N = 0xf0 /* other alpha ("national"/non-ASCII) */
87 #define LCS_ISCONTR( t ) (!(0xc0 & (t)))
88 #define LCS_ISSPACE( t ) (LCS_S >= (t))
89 #define LCS_ISPRINT( t ) (0xc0 & (t)) /* (LCS_S <= (t)) */
90 #define LCS_ISIDENT( t ) (0xc0 == (0xc0 & (t)))
91 #define LCS_ISALPHA( t ) (0xe0 == (0xe0 & (t)))
93 /* for record parsing */
94 #define LCS_ISSEP( t ) (!(0x80 & (t)))
95 #define LCS_ISWORD( t ) (0x80 & (t))
96 #define LCS_ISFR( t ) (!(0xe0 & (t))) /* field or record separator */
97 #define LCS_ISCST( t ) (0x08 & (t)) /* other separator */
101 LCS_MKUNI = -1 /* as "bits" value, have mktab create unicode table */
104 /** create the table dst from string containing
105 a free style sequence of decimal numbers (ignoring any non-digit).
106 if bits is 0, numbers are assigned sequentially to dst->c,
107 else if bits < 0, they are assigned to dst->u,
108 else for every number, bits are set in the corresponding array element
109 else something strange may happen.
111 extern int lcs_mktab ( LcsTab *dst, char *numbers, int len, int bits );
113 /** create single byte conversion table from two unicode tables.
114 if to is NULL, the trivial (Latin1) table is used.
116 extern int lcs_mkrecod ( unsigned char *dst, unsigned short *from,
117 unsigned short *to );
119 extern unsigned char lcs_latin1_uc[256];
120 extern unsigned char lcs_latin1_ct[256];