1 /***************************************************************************
2 * Copyright (c) 2004, Broadcom Corporation
4 * Confidential Property of Broadcom Corporation
6 * THIS SOFTWARE MAY ONLY BE USED SUBJECT TO AN EXECUTED SOFTWARE LICENSE
7 * AGREEMENT BETWEEN THE USER AND BROADCOM. YOU HAVE NO RIGHT TO USE OR
8 * EXPLOIT THIS MATERIAL EXCEPT SUBJECT TO THE TERMS OF SUCH AN AGREEMENT.
10 * $brcm_Workfile: nanoxml.h $
11 * $brcm_Revision: Irvine_BSEAVSW_Devel/1 $
12 * $brcm_Date: 1/6/04 11:15p $
18 * $brcm_Log: /vobs/BSEAV/linux/lib/nanoxml/nanoxml.h $
20 * Irvine_BSEAVSW_Devel/1 1/6/04 11:15p erickson
21 * PR9211: created nanoxml library and testapp
23 ****************************************************************/
27 /*=*******************
28 NanoXML is a minimal streaming SAX XML parser. As a SAX parser, it parses XML and fires events
29 back to the caller using callbacks. It does not build data structures like a DOM parser. As
30 a streaming parser, it can process XML in incremental buffers, even 1 byte at a time. It makes
31 no assumptions as to where memory boundaries are.
34 1) Tiny. It compiles to 1650 bytes on i386 with gcc (-O2, stripping symbols).
35 2) Portable. It is written in cross-platform ANSI C.
36 3) Fast. It minimizes memory copying to one instance: when a tag or attribute name
37 spans a nxml_write() call, otherwise there are no memory copies.
38 Reading 4K disk reads, I measured 80 MB parsed in 1.3 seconds on stb-irva-01.broadcom.com.
41 <tag attribute_name="attribute_value">data</tag>
44 tag_begin - called when a new tag is encountered. Passes the complete tag name.
45 attribute_begin - called when a new attribute is encountered. Passes the complete attribute name.
46 attribute_value - called with attribute value data. It could require multiple callbacks
47 to complete the attribute value.
48 data - called with element data. It could require multiple callbacks to complete the
50 tag_end - called when a tag is closed. Even self closing tags (e.g. <tag/> receive
51 both a tag_begin and a tag_end.
54 1) Tag and attribute names cannot be greater than NXML_MAX_NAME_SIZE. Larger
55 names will be truncated without any warning.
56 2) You will receive one tag_begin callback at the beginning of each tag, but before any attribute
57 callbacks for that tag.
58 3) You may receive zero or more attribute_begin callbacks after the tag_begin but before
59 the first data callback. After you receive a data callback, you cannot receive
60 an attribute_begin callback.
61 4) You can receive many data or attribute_value callbacks for each tag or attribute.
62 This is because the size of the data or attribute_value is not bounded.
63 5) If you have a tag that has attributes and that tag ends with a "/" (e.g. <tag attr="value" />),
64 the tag_end callback will not send you the tag_name.
65 It will be a non-NULL pointer but len == 0.
66 This could be changed by increasing the storage to two name buffers instead of one.
69 1) support attribute values with single quotes or no quotes
70 2) supports double double quotes in attribute values
71 3) separate buffers for tag and attribute names
72 4) support <?xml version="1.0"?> tag.
77 Opaque handle returned by nxml_open.
79 typedef struct nxml *nxml_t;
83 Maximum size of tag or attribute name that can be guaranteed to
86 #define NXML_MAX_NAME_SIZE 128
90 Settings structure which must be passed to nxml_open.
93 Every callback MUST be specified.
96 void (*tag_begin)(nxml_t handle, const char *tag_name, unsigned len);
97 void (*attribute_begin)(nxml_t handle, const char *attr_name, unsigned len);
98 void (*attribute_value)(nxml_t handle, const char *attr_value, unsigned len);
99 void (*data)(nxml_t handle, const char *data, unsigned len);
100 void (*tag_end)(nxml_t handle, const char *tag_name, unsigned len);
103 #define min(A,B) ((A)<(B)?(A):(B))
105 #define DEBUG(X) printf X
111 state_begin_tag, /* look for < or data */
112 state_tag_name, /* found <, looking for whole name */
113 state_end_tag_name, /* found </, looking for whole name */
114 state_attr_name, /* tag begun, looking for attr */
115 state_attr_value_equals, /* attr_name found, looking for = */
116 state_attr_value_quote, /* attr_name and = found, looking for quote */
117 state_attr_value, /* attr name found, sending attr_value */
118 state_finish_tag /* look for the >, ignoring everything else */
122 nxml_settings settings;
124 char namecache[NXML_MAX_NAME_SIZE];
129 #if defined(__cplusplus)
135 Open a nanoxml parsing handle.
138 The handle is required to maintain state between nxml_write calls.
139 You can open multiple handles and use them concurrently (there are no global variables).
141 int nxml_open(nxml_t *handle, const nxml_settings *settings);
145 Close a nanoxml parsing handle when you are done.
148 The handle becomes invalid.
150 void nxml_close(nxml_t handle);
157 You can write data in any amount that you want.
158 You will get 0 or more callbacks in response to a write call.
160 int nxml_write(nxml_t handle, const char *data, unsigned len);
162 #if defined(__cplusplus)
166 #endif /* NANOXML_H__ */