3 * Copyright (C) Igor Sysoev
7 #include <ngx_config.h>
12 #define NGX_HTTP_NO_CHARSET -2
13 #define NGX_HTTP_CHARSET_VAR 0x10000
15 /* 1 byte length and up to 3 bytes for the UTF-8 encoding of the UCS-2 */
18 #define NGX_HTML_ENTITY_LEN (sizeof("") - 1)
33 } ngx_http_charset_recode_t;
41 } ngx_http_charset_tables_t;
45 ngx_array_t charsets; /* ngx_http_charset_t */
46 ngx_array_t tables; /* ngx_http_charset_tables_t */
47 ngx_array_t recodes; /* ngx_http_charset_recode_t */
48 } ngx_http_charset_main_conf_t;
53 ngx_int_t source_charset;
54 ngx_flag_t override_charset;
57 ngx_array_t *types_keys;
58 } ngx_http_charset_loc_conf_t;
66 ngx_chain_t *free_bufs;
67 ngx_chain_t *free_buffers;
70 u_char saved[NGX_UTF_LEN];
75 } ngx_http_charset_ctx_t;
79 ngx_http_charset_tables_t *table;
80 ngx_http_charset_t *charset;
81 ngx_uint_t characters;
82 } ngx_http_charset_conf_ctx_t;
85 static ngx_int_t ngx_http_charset_get_charset(ngx_http_charset_t *charsets,
86 ngx_uint_t n, ngx_str_t *charset);
87 static ngx_int_t ngx_http_charset_set_charset(ngx_http_request_t *r,
88 ngx_http_charset_t *charsets, ngx_int_t charset, ngx_int_t source_charset);
89 static ngx_uint_t ngx_http_charset_recode(ngx_buf_t *b, u_char *table);
90 static ngx_chain_t *ngx_http_charset_recode_from_utf8(ngx_pool_t *pool,
91 ngx_buf_t *buf, ngx_http_charset_ctx_t *ctx);
92 static ngx_chain_t *ngx_http_charset_recode_to_utf8(ngx_pool_t *pool,
93 ngx_buf_t *buf, ngx_http_charset_ctx_t *ctx);
95 static ngx_chain_t *ngx_http_charset_get_buf(ngx_pool_t *pool,
96 ngx_http_charset_ctx_t *ctx);
97 static ngx_chain_t *ngx_http_charset_get_buffer(ngx_pool_t *pool,
98 ngx_http_charset_ctx_t *ctx, size_t size);
100 static char *ngx_http_charset_map_block(ngx_conf_t *cf, ngx_command_t *cmd,
102 static char *ngx_http_charset_map(ngx_conf_t *cf, ngx_command_t *dummy,
105 static char *ngx_http_set_charset_slot(ngx_conf_t *cf, ngx_command_t *cmd,
107 static ngx_int_t ngx_http_add_charset(ngx_array_t *charsets, ngx_str_t *name);
109 static void *ngx_http_charset_create_main_conf(ngx_conf_t *cf);
110 static void *ngx_http_charset_create_loc_conf(ngx_conf_t *cf);
111 static char *ngx_http_charset_merge_loc_conf(ngx_conf_t *cf,
112 void *parent, void *child);
113 static ngx_int_t ngx_http_charset_postconfiguration(ngx_conf_t *cf);
116 ngx_str_t ngx_http_charset_default_types[] = {
117 ngx_string("text/html"),
118 ngx_string("text/xml"),
119 ngx_string("text/plain"),
120 ngx_string("text/vnd.wap.wml"),
121 ngx_string("application/x-javascript"),
122 ngx_string("application/rss+xml"),
127 static ngx_command_t ngx_http_charset_filter_commands[] = {
129 { ngx_string("charset"),
130 NGX_HTTP_MAIN_CONF|NGX_HTTP_SRV_CONF|NGX_HTTP_LOC_CONF
131 |NGX_HTTP_LIF_CONF|NGX_CONF_TAKE1,
132 ngx_http_set_charset_slot,
133 NGX_HTTP_LOC_CONF_OFFSET,
134 offsetof(ngx_http_charset_loc_conf_t, charset),
137 { ngx_string("source_charset"),
138 NGX_HTTP_MAIN_CONF|NGX_HTTP_SRV_CONF|NGX_HTTP_LOC_CONF
139 |NGX_HTTP_LIF_CONF|NGX_CONF_TAKE1,
140 ngx_http_set_charset_slot,
141 NGX_HTTP_LOC_CONF_OFFSET,
142 offsetof(ngx_http_charset_loc_conf_t, source_charset),
145 { ngx_string("override_charset"),
146 NGX_HTTP_MAIN_CONF|NGX_HTTP_SRV_CONF|NGX_HTTP_LOC_CONF
147 |NGX_HTTP_LIF_CONF|NGX_CONF_FLAG,
148 ngx_conf_set_flag_slot,
149 NGX_HTTP_LOC_CONF_OFFSET,
150 offsetof(ngx_http_charset_loc_conf_t, override_charset),
153 { ngx_string("charset_types"),
154 NGX_HTTP_MAIN_CONF|NGX_HTTP_SRV_CONF|NGX_HTTP_LOC_CONF|NGX_CONF_1MORE,
156 NGX_HTTP_LOC_CONF_OFFSET,
157 offsetof(ngx_http_charset_loc_conf_t, types_keys),
158 &ngx_http_charset_default_types[0] },
160 { ngx_string("charset_map"),
161 NGX_HTTP_MAIN_CONF|NGX_CONF_BLOCK|NGX_CONF_TAKE2,
162 ngx_http_charset_map_block,
163 NGX_HTTP_MAIN_CONF_OFFSET,
171 static ngx_http_module_t ngx_http_charset_filter_module_ctx = {
172 NULL, /* preconfiguration */
173 ngx_http_charset_postconfiguration, /* postconfiguration */
175 ngx_http_charset_create_main_conf, /* create main configuration */
176 NULL, /* init main configuration */
178 NULL, /* create server configuration */
179 NULL, /* merge server configuration */
181 ngx_http_charset_create_loc_conf, /* create location configuration */
182 ngx_http_charset_merge_loc_conf /* merge location configuration */
186 ngx_module_t ngx_http_charset_filter_module = {
188 &ngx_http_charset_filter_module_ctx, /* module context */
189 ngx_http_charset_filter_commands, /* module directives */
190 NGX_HTTP_MODULE, /* module type */
191 NULL, /* init master */
192 NULL, /* init module */
193 NULL, /* init process */
194 NULL, /* init thread */
195 NULL, /* exit thread */
196 NULL, /* exit process */
197 NULL, /* exit master */
198 NGX_MODULE_V1_PADDING
202 static ngx_http_output_header_filter_pt ngx_http_next_header_filter;
203 static ngx_http_output_body_filter_pt ngx_http_next_body_filter;
207 ngx_http_charset_header_filter(ngx_http_request_t *r)
209 ngx_int_t charset, source_charset;
210 ngx_str_t *mc, *from, *to, s;
212 ngx_http_charset_t *charsets;
213 ngx_http_charset_ctx_t *ctx;
214 ngx_http_variable_value_t *vv;
215 ngx_http_charset_loc_conf_t *lcf, *mlcf;
216 ngx_http_charset_main_conf_t *mcf;
218 mcf = ngx_http_get_module_main_conf(r, ngx_http_charset_filter_module);
220 charsets = mcf->charsets.elts;
221 n = mcf->charsets.nelts;
223 /* destination charset */
227 if (r->headers_out.content_encoding
228 && r->headers_out.content_encoding->value.len)
230 return ngx_http_next_header_filter(r);
233 if (r->headers_out.content_type.len == 0) {
234 return ngx_http_next_header_filter(r);
237 if (r->headers_out.override_charset
238 && r->headers_out.override_charset->len)
240 charset = ngx_http_charset_get_charset(charsets, n,
241 r->headers_out.override_charset);
243 if (charset == NGX_HTTP_NO_CHARSET) {
244 ngx_log_error(NGX_LOG_ERR, r->connection->log, 0,
245 "unknown charset \"%V\" to override",
246 r->headers_out.override_charset);
248 return ngx_http_next_header_filter(r);
252 mlcf = ngx_http_get_module_loc_conf(r,
253 ngx_http_charset_filter_module);
254 charset = mlcf->charset;
256 if (charset == NGX_HTTP_NO_CHARSET) {
257 return ngx_http_next_header_filter(r);
260 if (r->headers_out.charset.len) {
261 if (mlcf->override_charset == 0) {
262 return ngx_http_next_header_filter(r);
266 if (ngx_http_test_content_type(r, &mlcf->types) == NULL) {
267 return ngx_http_next_header_filter(r);
271 if (charset >= NGX_HTTP_CHARSET_VAR) {
272 vv = ngx_http_get_indexed_variable(r,
273 charset - NGX_HTTP_CHARSET_VAR);
275 if (vv == NULL || vv->not_found) {
282 charset = ngx_http_charset_get_charset(charsets, n, &s);
287 ctx = ngx_http_get_module_ctx(r->main, ngx_http_charset_filter_module);
291 mc = &r->main->headers_out.charset;
294 return ngx_http_next_header_filter(r);
297 ctx = ngx_pcalloc(r->pool, sizeof(ngx_http_charset_ctx_t));
302 ngx_http_set_ctx(r->main, ctx, ngx_http_charset_filter_module);
304 charset = ngx_http_charset_get_charset(charsets, n, mc);
306 ctx->charset = charset;
309 charset = ctx->charset;
315 if (r->headers_out.charset.len == 0) {
316 lcf = ngx_http_get_module_loc_conf(r, ngx_http_charset_filter_module);
318 source_charset = lcf->source_charset;
320 if (source_charset >= NGX_HTTP_CHARSET_VAR) {
321 vv = ngx_http_get_indexed_variable(r,
322 source_charset - NGX_HTTP_CHARSET_VAR);
324 if (vv == NULL || vv->not_found) {
331 source_charset = ngx_http_charset_get_charset(charsets, n, &s);
334 if (charset != NGX_HTTP_NO_CHARSET) {
335 return ngx_http_charset_set_charset(r, mcf->charsets.elts, charset,
339 if (source_charset == NGX_CONF_UNSET) {
340 return ngx_http_next_header_filter(r);
343 from = &charsets[source_charset].name;
344 to = &r->main->headers_out.charset;
349 source_charset = ngx_http_charset_get_charset(charsets, n,
350 &r->headers_out.charset);
352 if (charset == NGX_HTTP_NO_CHARSET
353 || source_charset == NGX_HTTP_NO_CHARSET)
355 if (charset != source_charset
356 || ngx_strcasecmp(r->main->headers_out.charset.data,
357 r->headers_out.charset.data)
360 from = &r->headers_out.charset;
361 to = (charset == NGX_HTTP_NO_CHARSET) ?
362 &r->main->headers_out.charset:
363 &charsets[charset].name;
368 return ngx_http_next_header_filter(r);
371 if (source_charset != charset
372 && (charsets[source_charset].tables == NULL
373 || charsets[source_charset].tables[charset] == NULL))
375 from = &charsets[source_charset].name;
376 to = &charsets[charset].name;
381 r->headers_out.content_type.len = r->headers_out.content_type_len;
383 return ngx_http_charset_set_charset(r, mcf->charsets.elts, charset,
388 ngx_log_error(NGX_LOG_ERR, r->connection->log, 0,
389 "no \"charset_map\" between the charsets \"%V\" and \"%V\"",
392 return ngx_http_next_header_filter(r);
397 ngx_http_charset_get_charset(ngx_http_charset_t *charsets, ngx_uint_t n,
402 for (i = 0; i < n; i++) {
403 if (charsets[i].name.len != charset->len) {
407 if (ngx_strncasecmp(charsets[i].name.data, charset->data, charset->len)
414 return NGX_HTTP_NO_CHARSET;
419 ngx_http_charset_set_charset(ngx_http_request_t *r,
420 ngx_http_charset_t *charsets, ngx_int_t charset, ngx_int_t source_charset)
422 ngx_http_charset_ctx_t *ctx;
424 if (r->headers_out.status == NGX_HTTP_MOVED_PERMANENTLY
425 || r->headers_out.status == NGX_HTTP_MOVED_TEMPORARILY)
428 * do not set charset for the redirect because NN 4.x
429 * use this charset instead of the next page charset
432 r->headers_out.charset.len = 0;
434 return ngx_http_next_header_filter(r);
437 r->headers_out.charset = charsets[charset].name;
438 r->utf8 = charsets[charset].utf8;
440 if (source_charset == NGX_CONF_UNSET || source_charset == charset) {
441 return ngx_http_next_header_filter(r);
444 ctx = ngx_pcalloc(r->pool, sizeof(ngx_http_charset_ctx_t));
449 ngx_http_set_ctx(r, ctx, ngx_http_charset_filter_module);
451 ctx->table = charsets[source_charset].tables[charset];
452 ctx->charset = charset;
453 ctx->length = charsets[charset].length;
454 ctx->from_utf8 = charsets[source_charset].utf8;
455 ctx->to_utf8 = charsets[charset].utf8;
457 r->filter_need_in_memory = 1;
459 if ((ctx->to_utf8 || ctx->from_utf8) && r == r->main) {
460 ngx_http_clear_content_length(r);
463 r->filter_need_temporary = 1;
466 return ngx_http_next_header_filter(r);
471 ngx_http_charset_body_filter(ngx_http_request_t *r, ngx_chain_t *in)
475 ngx_chain_t *cl, *out, **ll;
476 ngx_http_charset_ctx_t *ctx;
478 ctx = ngx_http_get_module_ctx(r, ngx_http_charset_filter_module);
480 if (ctx == NULL || ctx->table == NULL) {
481 return ngx_http_next_body_filter(r, in);
484 if ((ctx->to_utf8 || ctx->from_utf8) || ctx->busy) {
489 for (cl = in; cl; cl = cl->next) {
492 if (ngx_buf_size(b) == 0) {
494 *ll = ngx_alloc_chain_link(r->pool);
508 *ll = ngx_http_charset_recode_to_utf8(r->pool, b, ctx);
511 *ll = ngx_http_charset_recode_from_utf8(r->pool, b, ctx);
523 rc = ngx_http_next_body_filter(r, out);
526 if (ctx->busy == NULL) {
530 for (cl = ctx->busy; cl->next; cl = cl->next) { /* void */ }
540 if (ngx_buf_size(b) != 0) {
544 ctx->busy = cl->next;
546 if (b->tag != (ngx_buf_tag_t) &ngx_http_charset_filter_module) {
551 b->shadow->pos = b->shadow->last;
555 cl->next = ctx->free_buffers;
556 ctx->free_buffers = cl;
560 cl->next = ctx->free_bufs;
567 for (cl = in; cl; cl = cl->next) {
568 (void) ngx_http_charset_recode(cl->buf, ctx->table);
571 return ngx_http_next_body_filter(r, in);
576 ngx_http_charset_recode(ngx_buf_t *b, u_char *table)
582 for (p = b->pos; p < last; p++) {
584 if (*p != table[*p]) {
594 if (*p != table[*p]) {
609 ngx_http_charset_recode_from_utf8(ngx_pool_t *pool, ngx_buf_t *buf,
610 ngx_http_charset_ctx_t *ctx)
613 u_char c, *p, *src, *dst, *saved, **table;
617 ngx_chain_t *out, *cl, **ll;
621 if (ctx->saved_len == 0) {
623 for ( /* void */ ; src < buf->last; src++) {
629 len = src - buf->pos;
632 out = ngx_http_charset_get_buf(pool, ctx);
639 b->temporary = buf->temporary;
640 b->memory = buf->memory;
642 b->flush = buf->flush;
650 size = buf->last - src;
653 n = ngx_utf8_decode(&saved, size);
655 if (n == 0xfffffffe) {
656 /* incomplete UTF-8 symbol */
658 ngx_memcpy(ctx->saved, src, size);
659 ctx->saved_len = size;
668 size = len + buf->last - src;
672 if (size < NGX_HTML_ENTITY_LEN) {
673 size += NGX_HTML_ENTITY_LEN;
676 cl = ngx_http_charset_get_buffer(pool, ctx, size);
694 out = ngx_alloc_chain_link(pool);
705 /* process incomplete UTF sequence from previous buffer */
707 ngx_log_debug1(NGX_LOG_DEBUG_HTTP, pool->log, 0,
708 "http charset utf saved: %z", ctx->saved_len);
712 for (i = ctx->saved_len; i < NGX_UTF_LEN; i++) {
713 ctx->saved[i] = *p++;
715 if (p == buf->last) {
721 n = ngx_utf8_decode(&saved, i);
726 table = (u_char **) ctx->table;
733 } else if (n == 0xfffffffe) {
735 /* incomplete UTF-8 symbol */
737 if (i < NGX_UTF_LEN) {
738 out = ngx_http_charset_get_buf(pool, ctx);
750 ngx_memcpy(&ctx->saved[ctx->saved_len], src, i);
757 size = buf->last - buf->pos;
759 if (size < NGX_HTML_ENTITY_LEN) {
760 size += NGX_HTML_ENTITY_LEN;
763 cl = ngx_http_charset_get_buffer(pool, ctx, size);
776 } else if (n == 0xfffffffe) {
779 ngx_log_debug0(NGX_LOG_DEBUG_HTTP, pool->log, 0,
780 "http charset invalid utf 0");
782 saved = &ctx->saved[NGX_UTF_LEN];
784 } else if (n > 0x10ffff) {
787 ngx_log_debug0(NGX_LOG_DEBUG_HTTP, pool->log, 0,
788 "http charset invalid utf 1");
791 dst = ngx_sprintf(dst, "&#%uD;", n);
794 src += (saved - ctx->saved) - ctx->saved_len;
801 table = (u_char **) ctx->table;
803 while (src < buf->last) {
805 if ((size_t) (b->end - dst) < NGX_HTML_ENTITY_LEN) {
808 size = buf->last - src + NGX_HTML_ENTITY_LEN;
810 cl = ngx_http_charset_get_buffer(pool, ctx, size);
827 len = buf->last - src;
829 n = ngx_utf8_decode(&src, len);
844 dst = ngx_sprintf(dst, "&#%uD;", n);
849 if (n == 0xfffffffe) {
850 /* incomplete UTF-8 symbol */
852 ngx_memcpy(ctx->saved, src, len);
853 ctx->saved_len = len;
866 ngx_log_debug0(NGX_LOG_DEBUG_HTTP, pool->log, 0,
867 "http charset invalid utf 2");
874 dst = ngx_sprintf(dst, "&#%uD;", n);
879 b->last_buf = buf->last_buf;
880 b->last_in_chain = buf->last_in_chain;
881 b->flush = buf->flush;
890 ngx_http_charset_recode_to_utf8(ngx_pool_t *pool, ngx_buf_t *buf,
891 ngx_http_charset_ctx_t *ctx)
894 u_char *p, *src, *dst, *table;
896 ngx_chain_t *out, *cl, **ll;
900 for (src = buf->pos; src < buf->last; src++) {
901 if (table[*src * NGX_UTF_LEN] == '\1') {
908 out = ngx_alloc_chain_link(pool);
921 * we assume that there are about half of characters to be recoded,
922 * so we preallocate "size / 2 + size / 2 * ctx->length"
925 len = src - buf->pos;
928 out = ngx_http_charset_get_buf(pool, ctx);
935 b->temporary = buf->temporary;
936 b->memory = buf->memory;
938 b->flush = buf->flush;
946 size = buf->last - src;
947 size = size / 2 + size / 2 * ctx->length;
952 size = buf->last - src;
953 size = len + size / 2 + size / 2 * ctx->length;
958 cl = ngx_http_charset_get_buffer(pool, ctx, size);
975 while (src < buf->last) {
977 p = &table[*src++ * NGX_UTF_LEN];
980 if ((size_t) (b->end - dst) < len) {
983 size = buf->last - src;
984 size = len + size / 2 + size / 2 * ctx->length;
986 cl = ngx_http_charset_get_buffer(pool, ctx, size);
1006 b->last_buf = buf->last_buf;
1007 b->last_in_chain = buf->last_in_chain;
1008 b->flush = buf->flush;
1016 static ngx_chain_t *
1017 ngx_http_charset_get_buf(ngx_pool_t *pool, ngx_http_charset_ctx_t *ctx)
1021 cl = ctx->free_bufs;
1024 ctx->free_bufs = cl->next;
1026 cl->buf->shadow = NULL;
1032 cl = ngx_alloc_chain_link(pool);
1037 cl->buf = ngx_calloc_buf(pool);
1038 if (cl->buf == NULL) {
1044 cl->buf->tag = (ngx_buf_tag_t) &ngx_http_charset_filter_module;
1050 static ngx_chain_t *
1051 ngx_http_charset_get_buffer(ngx_pool_t *pool, ngx_http_charset_ctx_t *ctx,
1055 ngx_chain_t *cl, **ll;
1057 for (ll = &ctx->free_buffers, cl = ctx->free_buffers;
1059 ll = &cl->next, cl = cl->next)
1063 if ((size_t) (b->end - b->start) >= size) {
1075 cl = ngx_alloc_chain_link(pool);
1080 cl->buf = ngx_create_temp_buf(pool, size);
1081 if (cl->buf == NULL) {
1087 cl->buf->temporary = 1;
1088 cl->buf->tag = (ngx_buf_tag_t) &ngx_http_charset_filter_module;
1095 ngx_http_charset_map_block(ngx_conf_t *cf, ngx_command_t *cmd, void *conf)
1097 ngx_http_charset_main_conf_t *mcf = conf;
1100 u_char *p, *dst2src, **pp;
1105 ngx_http_charset_t *charset;
1106 ngx_http_charset_tables_t *table;
1107 ngx_http_charset_conf_ctx_t ctx;
1109 value = cf->args->elts;
1111 src = ngx_http_add_charset(&mcf->charsets, &value[1]);
1112 if (src == NGX_ERROR) {
1113 return NGX_CONF_ERROR;
1116 dst = ngx_http_add_charset(&mcf->charsets, &value[2]);
1117 if (dst == NGX_ERROR) {
1118 return NGX_CONF_ERROR;
1122 ngx_conf_log_error(NGX_LOG_EMERG, cf, 0,
1123 "\"charset_map\" between the same charsets "
1124 "\"%V\" and \"%V\"", &value[1], &value[2]);
1125 return NGX_CONF_ERROR;
1128 table = mcf->tables.elts;
1129 for (i = 0; i < mcf->tables.nelts; i++) {
1130 if ((src == table->src && dst == table->dst)
1131 || (src == table->dst && dst == table->src))
1133 ngx_conf_log_error(NGX_LOG_EMERG, cf, 0,
1134 "duplicate \"charset_map\" between "
1135 "\"%V\" and \"%V\"", &value[1], &value[2]);
1136 return NGX_CONF_ERROR;
1140 table = ngx_array_push(&mcf->tables);
1141 if (table == NULL) {
1142 return NGX_CONF_ERROR;
1148 if (ngx_strcasecmp(value[2].data, (u_char *) "utf-8") == 0) {
1149 table->src2dst = ngx_pcalloc(cf->pool, 256 * NGX_UTF_LEN);
1150 if (table->src2dst == NULL) {
1151 return NGX_CONF_ERROR;
1154 table->dst2src = ngx_pcalloc(cf->pool, 256 * sizeof(void *));
1155 if (table->dst2src == NULL) {
1156 return NGX_CONF_ERROR;
1159 dst2src = ngx_pcalloc(cf->pool, 256);
1160 if (dst2src == NULL) {
1161 return NGX_CONF_ERROR;
1164 pp = (u_char **) &table->dst2src[0];
1167 for (i = 0; i < 128; i++) {
1168 p = &table->src2dst[i * NGX_UTF_LEN];
1171 dst2src[i] = (u_char) i;
1174 for (/* void */; i < 256; i++) {
1175 p = &table->src2dst[i * NGX_UTF_LEN];
1181 table->src2dst = ngx_palloc(cf->pool, 256);
1182 if (table->src2dst == NULL) {
1183 return NGX_CONF_ERROR;
1186 table->dst2src = ngx_palloc(cf->pool, 256);
1187 if (table->dst2src == NULL) {
1188 return NGX_CONF_ERROR;
1191 for (i = 0; i < 128; i++) {
1192 table->src2dst[i] = (u_char) i;
1193 table->dst2src[i] = (u_char) i;
1196 for (/* void */; i < 256; i++) {
1197 table->src2dst[i] = '?';
1198 table->dst2src[i] = '?';
1202 charset = mcf->charsets.elts;
1205 ctx.charset = &charset[dst];
1210 cf->handler = ngx_http_charset_map;
1211 cf->handler_conf = conf;
1213 rv = ngx_conf_parse(cf, NULL);
1217 if (ctx.characters) {
1218 n = ctx.charset->length;
1219 ctx.charset->length /= ctx.characters;
1221 if (((n * 10) / ctx.characters) % 10 > 4) {
1222 ctx.charset->length++;
1231 ngx_http_charset_map(ngx_conf_t *cf, ngx_command_t *dummy, void *conf)
1233 u_char *p, *dst2src, **pp;
1238 ngx_http_charset_tables_t *table;
1239 ngx_http_charset_conf_ctx_t *ctx;
1241 if (cf->args->nelts != 2) {
1242 ngx_conf_log_error(NGX_LOG_EMERG, cf, 0, "invalid parameters number");
1243 return NGX_CONF_ERROR;
1246 value = cf->args->elts;
1248 src = ngx_hextoi(value[0].data, value[0].len);
1249 if (src == NGX_ERROR || src > 255) {
1250 ngx_conf_log_error(NGX_LOG_EMERG, cf, 0,
1251 "invalid value \"%V\"", &value[0]);
1252 return NGX_CONF_ERROR;
1258 if (ctx->charset->utf8) {
1259 p = &table->src2dst[src * NGX_UTF_LEN];
1261 *p++ = (u_char) (value[1].len / 2);
1263 for (i = 0; i < value[1].len; i += 2) {
1264 dst = ngx_hextoi(&value[1].data[i], 2);
1265 if (dst == NGX_ERROR || dst > 255) {
1266 ngx_conf_log_error(NGX_LOG_EMERG, cf, 0,
1267 "invalid value \"%V\"", &value[1]);
1268 return NGX_CONF_ERROR;
1271 *p++ = (u_char) dst;
1276 ctx->charset->length += i;
1279 p = &table->src2dst[src * NGX_UTF_LEN] + 1;
1281 n = ngx_utf8_decode(&p, i);
1284 ngx_conf_log_error(NGX_LOG_EMERG, cf, 0,
1285 "invalid value \"%V\"", &value[1]);
1286 return NGX_CONF_ERROR;
1289 pp = (u_char **) &table->dst2src[0];
1291 dst2src = pp[n >> 8];
1293 if (dst2src == NULL) {
1294 dst2src = ngx_pcalloc(cf->pool, 256);
1295 if (dst2src == NULL) {
1296 return NGX_CONF_ERROR;
1299 pp[n >> 8] = dst2src;
1302 dst2src[n & 0xff] = (u_char) src;
1305 dst = ngx_hextoi(value[1].data, value[1].len);
1306 if (dst == NGX_ERROR || dst > 255) {
1307 ngx_conf_log_error(NGX_LOG_EMERG, cf, 0,
1308 "invalid value \"%V\"", &value[1]);
1309 return NGX_CONF_ERROR;
1312 table->src2dst[src] = (u_char) dst;
1313 table->dst2src[dst] = (u_char) src;
1321 ngx_http_set_charset_slot(ngx_conf_t *cf, ngx_command_t *cmd, void *conf)
1326 ngx_str_t *value, var;
1327 ngx_http_charset_main_conf_t *mcf;
1329 cp = (ngx_int_t *) (p + cmd->offset);
1331 if (*cp != NGX_CONF_UNSET) {
1332 return "is duplicate";
1335 value = cf->args->elts;
1337 if (cmd->offset == offsetof(ngx_http_charset_loc_conf_t, charset)
1338 && ngx_strcmp(value[1].data, "off") == 0)
1340 *cp = NGX_HTTP_NO_CHARSET;
1345 if (value[1].data[0] == '$') {
1346 var.len = value[1].len - 1;
1347 var.data = value[1].data + 1;
1349 *cp = ngx_http_get_variable_index(cf, &var);
1351 if (*cp == NGX_ERROR) {
1352 return NGX_CONF_ERROR;
1355 *cp += NGX_HTTP_CHARSET_VAR;
1360 mcf = ngx_http_conf_get_module_main_conf(cf,
1361 ngx_http_charset_filter_module);
1363 *cp = ngx_http_add_charset(&mcf->charsets, &value[1]);
1364 if (*cp == NGX_ERROR) {
1365 return NGX_CONF_ERROR;
1373 ngx_http_add_charset(ngx_array_t *charsets, ngx_str_t *name)
1376 ngx_http_charset_t *c;
1379 for (i = 0; i < charsets->nelts; i++) {
1380 if (name->len != c[i].name.len) {
1384 if (ngx_strcasecmp(name->data, c[i].name.data) == 0) {
1389 if (i < charsets->nelts) {
1393 c = ngx_array_push(charsets);
1402 if (ngx_strcasecmp(name->data, (u_char *) "utf-8") == 0) {
1414 ngx_http_charset_create_main_conf(ngx_conf_t *cf)
1416 ngx_http_charset_main_conf_t *mcf;
1418 mcf = ngx_pcalloc(cf->pool, sizeof(ngx_http_charset_main_conf_t));
1420 return NGX_CONF_ERROR;
1423 if (ngx_array_init(&mcf->charsets, cf->pool, 2, sizeof(ngx_http_charset_t))
1426 return NGX_CONF_ERROR;
1429 if (ngx_array_init(&mcf->tables, cf->pool, 1,
1430 sizeof(ngx_http_charset_tables_t))
1433 return NGX_CONF_ERROR;
1436 if (ngx_array_init(&mcf->recodes, cf->pool, 2,
1437 sizeof(ngx_http_charset_recode_t))
1440 return NGX_CONF_ERROR;
1448 ngx_http_charset_create_loc_conf(ngx_conf_t *cf)
1450 ngx_http_charset_loc_conf_t *lcf;
1452 lcf = ngx_pcalloc(cf->pool, sizeof(ngx_http_charset_loc_conf_t));
1454 return NGX_CONF_ERROR;
1458 * set by ngx_pcalloc():
1460 * lcf->types = { NULL };
1461 * lcf->types_keys = NULL;
1464 lcf->charset = NGX_CONF_UNSET;
1465 lcf->source_charset = NGX_CONF_UNSET;
1466 lcf->override_charset = NGX_CONF_UNSET;
1473 ngx_http_charset_merge_loc_conf(ngx_conf_t *cf, void *parent, void *child)
1475 ngx_http_charset_loc_conf_t *prev = parent;
1476 ngx_http_charset_loc_conf_t *conf = child;
1479 ngx_http_charset_recode_t *recode;
1480 ngx_http_charset_main_conf_t *mcf;
1482 if (ngx_http_merge_types(cf, conf->types_keys, &conf->types,
1483 prev->types_keys, &prev->types,
1484 ngx_http_charset_default_types)
1487 return NGX_CONF_ERROR;
1490 ngx_conf_merge_value(conf->override_charset, prev->override_charset, 0);
1491 ngx_conf_merge_value(conf->charset, prev->charset, NGX_HTTP_NO_CHARSET);
1493 if (conf->source_charset == NGX_CONF_UNSET) {
1494 conf->source_charset = prev->source_charset;
1497 if (conf->charset == NGX_HTTP_NO_CHARSET
1498 || conf->source_charset == NGX_CONF_UNSET
1499 || conf->charset == conf->source_charset)
1504 if (conf->source_charset >= NGX_HTTP_CHARSET_VAR
1505 || conf->charset >= NGX_HTTP_CHARSET_VAR)
1510 mcf = ngx_http_conf_get_module_main_conf(cf,
1511 ngx_http_charset_filter_module);
1512 recode = mcf->recodes.elts;
1513 for (i = 0; i < mcf->recodes.nelts; i++) {
1514 if (conf->source_charset == recode[i].src
1515 && conf->charset == recode[i].dst)
1521 recode = ngx_array_push(&mcf->recodes);
1522 if (recode == NULL) {
1523 return NGX_CONF_ERROR;
1526 recode->src = conf->source_charset;
1527 recode->dst = conf->charset;
1534 ngx_http_charset_postconfiguration(ngx_conf_t *cf)
1536 u_char **src, **dst;
1539 ngx_http_charset_t *charset;
1540 ngx_http_charset_recode_t *recode;
1541 ngx_http_charset_tables_t *tables;
1542 ngx_http_charset_main_conf_t *mcf;
1544 mcf = ngx_http_conf_get_module_main_conf(cf,
1545 ngx_http_charset_filter_module);
1547 recode = mcf->recodes.elts;
1548 tables = mcf->tables.elts;
1549 charset = mcf->charsets.elts;
1551 for (i = 0; i < mcf->recodes.nelts; i++) {
1555 for (t = 0; t < mcf->tables.nelts; t++) {
1557 if (c == tables[t].src && recode[i].dst == tables[t].dst) {
1561 if (c == tables[t].dst && recode[i].dst == tables[t].src) {
1566 ngx_log_error(NGX_LOG_EMERG, cf->log, 0,
1567 "no \"charset_map\" between the charsets \"%V\" and \"%V\"",
1568 &charset[c].name, &charset[recode[i].dst].name);
1576 for (t = 0; t < mcf->tables.nelts; t++) {
1578 src = charset[tables[t].src].tables;
1581 src = ngx_pcalloc(cf->pool, sizeof(u_char *) * mcf->charsets.nelts);
1586 charset[tables[t].src].tables = src;
1589 dst = charset[tables[t].dst].tables;
1592 dst = ngx_pcalloc(cf->pool, sizeof(u_char *) * mcf->charsets.nelts);
1597 charset[tables[t].dst].tables = dst;
1600 src[tables[t].dst] = tables[t].src2dst;
1601 dst[tables[t].src] = tables[t].dst2src;
1604 ngx_http_next_header_filter = ngx_http_top_header_filter;
1605 ngx_http_top_header_filter = ngx_http_charset_header_filter;
1607 ngx_http_next_body_filter = ngx_http_top_body_filter;
1608 ngx_http_top_body_filter = ngx_http_charset_body_filter;