diff options
Diffstat (limited to 'data/lighttpd/lighttpd-1.4.53/src/keyvalue.c')
-rw-r--r-- | data/lighttpd/lighttpd-1.4.53/src/keyvalue.c | 460 |
1 files changed, 460 insertions, 0 deletions
diff --git a/data/lighttpd/lighttpd-1.4.53/src/keyvalue.c b/data/lighttpd/lighttpd-1.4.53/src/keyvalue.c new file mode 100644 index 000000000..c1b6b019c --- /dev/null +++ b/data/lighttpd/lighttpd-1.4.53/src/keyvalue.c @@ -0,0 +1,460 @@ +#include "first.h" + +#include "keyvalue.h" +#include "base.h" +#include "burl.h" +#include "log.h" + +#include <stdlib.h> +#include <string.h> + +#ifdef HAVE_PCRE_H +#include <pcre.h> +#endif + +typedef struct pcre_keyvalue { +#ifdef HAVE_PCRE_H + pcre *key; + pcre_extra *key_extra; +#endif + buffer *value; +} pcre_keyvalue; + +pcre_keyvalue_buffer *pcre_keyvalue_buffer_init(void) { + pcre_keyvalue_buffer *kvb; + + kvb = calloc(1, sizeof(*kvb)); + force_assert(NULL != kvb); + + return kvb; +} + +int pcre_keyvalue_buffer_append(server *srv, pcre_keyvalue_buffer *kvb, buffer *key, buffer *value) { +#ifdef HAVE_PCRE_H + size_t i; + const char *errptr; + int erroff; + pcre_keyvalue *kv; + + if (!key) return -1; + + if (kvb->size == 0) { + kvb->size = 4; + kvb->used = 0; + + kvb->kv = malloc(kvb->size * sizeof(*kvb->kv)); + force_assert(NULL != kvb->kv); + + for(i = 0; i < kvb->size; i++) { + kvb->kv[i] = calloc(1, sizeof(**kvb->kv)); + force_assert(NULL != kvb->kv[i]); + } + } else if (kvb->used == kvb->size) { + kvb->size += 4; + + kvb->kv = realloc(kvb->kv, kvb->size * sizeof(*kvb->kv)); + force_assert(NULL != kvb->kv); + + for(i = kvb->used; i < kvb->size; i++) { + kvb->kv[i] = calloc(1, sizeof(**kvb->kv)); + force_assert(NULL != kvb->kv[i]); + } + } + + kv = kvb->kv[kvb->used]; + if (NULL == (kv->key = pcre_compile(key->ptr, + 0, &errptr, &erroff, NULL))) { + + log_error_write(srv, __FILE__, __LINE__, "SS", + "rexexp compilation error at ", errptr); + return -1; + } + + if (NULL == (kv->key_extra = pcre_study(kv->key, 0, &errptr)) && + errptr != NULL) { + return -1; + } + + kv->value = buffer_init_buffer(value); + + kvb->used++; + +#else + static int logged_message = 0; + if (logged_message) return 0; + logged_message = 1; + log_error_write(srv, __FILE__, __LINE__, "s", + "pcre support is missing, please install libpcre and the headers"); + UNUSED(kvb); + UNUSED(key); + UNUSED(value); +#endif + + return 0; +} + +void pcre_keyvalue_buffer_free(pcre_keyvalue_buffer *kvb) { +#ifdef HAVE_PCRE_H + size_t i; + pcre_keyvalue *kv; + + for (i = 0; i < kvb->size; i++) { + kv = kvb->kv[i]; + if (kv->key) pcre_free(kv->key); + if (kv->key_extra) pcre_free(kv->key_extra); + if (kv->value) buffer_free(kv->value); + free(kv); + } + + if (kvb->kv) free(kvb->kv); +#endif + + free(kvb); +} + +#ifdef HAVE_PCRE_H +static void pcre_keyvalue_buffer_append_match(buffer *b, const char **list, int n, unsigned int num, int flags) { + if (num < (unsigned int)n) { /* n is always > 0 */ + burl_append(b, list[num], strlen(list[num]), flags); + } +} + +static void pcre_keyvalue_buffer_append_ctxmatch(buffer *b, pcre_keyvalue_ctx *ctx, unsigned int num, int flags) { + const struct cond_cache_t * const cache = ctx->cache; + if (!cache) return; /* no enclosing match context */ + if ((int)num < cache->patterncount) { + const int off = cache->matches[(num <<= 1)]; /*(num *= 2)*/ + const int len = cache->matches[num+1] - off; + burl_append(b, cache->comp_value->ptr + off, (size_t)len, flags); + } +} + +static int pcre_keyvalue_buffer_subst_ext(buffer *b, const char *pattern, const char **list, int n, pcre_keyvalue_ctx *ctx) { + const unsigned char *p = (unsigned char *)pattern+2;/* +2 past ${} or %{} */ + int flags = 0; + while (!light_isdigit(*p) && *p != '}' && *p != '\0') { + if (0) { + } + else if (p[0] == 'e' && p[1] == 's' && p[2] == 'c') { + p+=3; + if (p[0] == ':') { + flags |= BURL_ENCODE_ALL; + p+=1; + } + else if (0 == strncmp((const char *)p, "ape:", 4)) { + flags |= BURL_ENCODE_ALL; + p+=4; + } + else if (0 == strncmp((const char *)p, "nde:", 4)) { + flags |= BURL_ENCODE_NDE; + p+=4; + } + else if (0 == strncmp((const char *)p, "psnde:", 6)) { + flags |= BURL_ENCODE_PSNDE; + p+=6; + } + else { /* skip unrecognized esc... */ + p = (const unsigned char *)strchr((const char *)p, ':'); + if (NULL == p) return -1; + ++p; + } + } + else if (p[0] == 'n' && p[1] == 'o') { + p+=2; + if (0 == strncmp((const char *)p, "esc:", 4)) { + flags |= BURL_ENCODE_NONE; + p+=4; + } + else if (0 == strncmp((const char *)p, "escape:", 7)) { + flags |= BURL_ENCODE_NONE; + p+=7; + } + else { /* skip unrecognized no... */ + p = (const unsigned char *)strchr((const char *)p, ':'); + if (NULL == p) return -1; + ++p; + } + } + else if (p[0] == 't' && p[1] == 'o') { + p+=2; + if (0 == strncmp((const char *)p, "lower:", 6)) { + flags |= BURL_TOLOWER; + p+=6; + } + else if (0 == strncmp((const char *)p, "upper:", 6)) { + flags |= BURL_TOLOWER; + p+=6; + } + else { /* skip unrecognized to... */ + p = (const unsigned char *)strchr((const char *)p, ':'); + if (NULL == p) return -1; + ++p; + } + } + else if (p[0] == 'u' && p[1] == 'r' && p[2] == 'l' && p[3] == '.') { + p+=4; + if (0 == strncmp((const char *)p, "scheme}", 7)) { + burl_append(b, CONST_BUF_LEN(ctx->burl->scheme), flags); + p+=6; + } + else if (0 == strncmp((const char *)p, "authority}", 10)) { + burl_append(b, CONST_BUF_LEN(ctx->burl->authority), flags); + p+=9; + } + else if (0 == strncmp((const char *)p, "port}", 5)) { + buffer_append_int(b, (int)ctx->burl->port); + p+=4; + } + else if (0 == strncmp((const char *)p, "path}", 5)) { + burl_append(b, CONST_BUF_LEN(ctx->burl->path), flags); + p+=4; + } + else if (0 == strncmp((const char *)p, "query}", 6)) { + burl_append(b, CONST_BUF_LEN(ctx->burl->query), flags); + p+=5; + } + else { /* skip unrecognized url.* */ + p = (const unsigned char *)strchr((const char *)p, '}'); + if (NULL == p) return -1; + } + break; + } + else if (p[0] == 'q' && p[1] == 's' && p[2] == 'a' && p[3] == '}') { + const buffer *qs = ctx->burl->query; + if (!buffer_is_empty(qs)) { + if (NULL != strchr(b->ptr, '?')) { + if (!buffer_string_is_empty(qs)) + buffer_append_string_len(b, CONST_STR_LEN("&")); + } + else { + buffer_append_string_len(b, CONST_STR_LEN("?")); + } + burl_append(b, CONST_BUF_LEN(qs), flags); + } + p+=3; + break; + } + else if (p[0] == 'e' && p[1] == 'n' && p[2] == 'c' + && 0 == strncmp((const char *)p+3, "b64u:", 5)) { + flags |= BURL_ENCODE_B64U; + p+=8; + } + else if (p[0] == 'd' && p[1] == 'e' && p[2] == 'c' + && 0 == strncmp((const char *)p+3, "b64u:", 5)) { + flags |= BURL_DECODE_B64U; + p+=8; + } + else ++p; /* skip unrecognized char */ + } + if (*p == '\0') return -1; + if (*p != '}') { /* light_isdigit(*p) */ + unsigned int num = *p - '0'; + ++p; + if (light_isdigit(*p)) num = num * 10 + (*p++ - '0'); + if (*p != '}') { + p = (const unsigned char *)strchr((const char *)p, '}'); + if (NULL == p) return -1; + } + if (0 == flags) flags = BURL_ENCODE_PSNDE; /* default */ + pattern[0] == '$' /*(else '%')*/ + ? pcre_keyvalue_buffer_append_match(b, list, n, num, flags) + : pcre_keyvalue_buffer_append_ctxmatch(b, ctx, num, flags); + } + return (int)(p + 1 - (unsigned char *)pattern - 2); +} + +static void pcre_keyvalue_buffer_subst(buffer *b, const buffer *patternb, const char **list, int n, pcre_keyvalue_ctx *ctx) { + const char *pattern = patternb->ptr; + const size_t pattern_len = buffer_string_length(patternb); + size_t start = 0; + + /* search for $... or %... pattern substitutions */ + + buffer_clear(b); + + for (size_t k = 0; k + 1 < pattern_len; ++k) { + if (pattern[k] == '$' || pattern[k] == '%') { + + buffer_append_string_len(b, pattern + start, k - start); + + if (pattern[k + 1] == '{') { + int num = pcre_keyvalue_buffer_subst_ext(b, pattern+k, list, n, ctx); + if (num < 0) return; /* error; truncate result */ + k += (size_t)num; + } else if (light_isdigit(((unsigned char *)pattern)[k + 1])) { + unsigned int num = (unsigned int)pattern[k + 1] - '0'; + pattern[k] == '$' /*(else '%')*/ + ? pcre_keyvalue_buffer_append_match(b, list, n, num, 0) + : pcre_keyvalue_buffer_append_ctxmatch(b, ctx, num, 0); + } else { + /* enable escape: "%%" => "%", "%a" => "%a", "$$" => "$" */ + buffer_append_string_len(b, pattern+k, pattern[k] == pattern[k+1] ? 1 : 2); + } + + k++; + start = k + 1; + } + } + + buffer_append_string_len(b, pattern + start, pattern_len - start); +} + +handler_t pcre_keyvalue_buffer_process(pcre_keyvalue_buffer *kvb, pcre_keyvalue_ctx *ctx, buffer *input, buffer *result) { + for (int i = 0, used = (int)kvb->used; i < used; ++i) { + pcre_keyvalue * const kv = kvb->kv[i]; + #define N 20 + int ovec[N * 3]; + #undef N + int n = pcre_exec(kv->key, kv->key_extra, CONST_BUF_LEN(input), + 0, 0, ovec, sizeof(ovec)/sizeof(int)); + if (n < 0) { + if (n != PCRE_ERROR_NOMATCH) { + return HANDLER_ERROR; + } + } + else if (buffer_string_is_empty(kv->value)) { + /* short-circuit if blank replacement pattern + * (do not attempt to match against remaining kvb rules) */ + ctx->m = i; + return HANDLER_GO_ON; + } + else { /* it matched */ + const char **list; + ctx->m = i; + pcre_get_substring_list(input->ptr, ovec, n, &list); + pcre_keyvalue_buffer_subst(result, kv->value, list, n, ctx); + pcre_free(list); + return HANDLER_FINISHED; + } + } + + return HANDLER_GO_ON; +} +#else +handler_t pcre_keyvalue_buffer_process(pcre_keyvalue_buffer *kvb, pcre_keyvalue_ctx *ctx, buffer *input, buffer *result) { + UNUSED(kvb); + UNUSED(ctx); + UNUSED(input); + UNUSED(result); + return HANDLER_GO_ON; +} +#endif + + +/* modified from burl_normalize_basic() to handle %% extra encoding layer */ + +/* c (char) and n (nibble) MUST be unsigned integer types */ +#define li_cton(c,n) \ + (((n) = (c) - '0') <= 9 || (((n) = ((c)&0xdf) - 'A') <= 5 ? ((n) += 10) : 0)) + +static void pcre_keyvalue_burl_percent_toupper (buffer *b) +{ + const unsigned char * const s = (unsigned char *)b->ptr; + const int used = (int)buffer_string_length(b); + unsigned int n1, n2; + for (int i = 0; i < used; ++i) { + if (s[i]=='%' && li_cton(s[i+1],n1) && li_cton(s[i+2],n2)) { + if (s[i+1] >= 'a') b->ptr[i+1] &= 0xdf; /* uppercase hex */ + if (s[i+2] >= 'a') b->ptr[i+2] &= 0xdf; /* uppercase hex */ + i+=2; + } + } +} + +static void pcre_keyvalue_burl_percent_percent_toupper (buffer *b) +{ + const unsigned char * const s = (unsigned char *)b->ptr; + const int used = (int)buffer_string_length(b); + unsigned int n1, n2; + for (int i = 0; i < used; ++i) { + if (s[i] == '%' && s[i+1]=='%' + && li_cton(s[i+2],n1) && li_cton(s[i+3],n2)) { + if (s[i+2] >= 'a') b->ptr[i+2] &= 0xdf; /* uppercase hex */ + if (s[i+3] >= 'a') b->ptr[i+3] &= 0xdf; /* uppercase hex */ + i+=3; + } + } +} + +static const char hex_chars_uc[] = "0123456789ABCDEF"; + +static void pcre_keyvalue_burl_percent_high_UTF8 (buffer *b, buffer *t) +{ + const unsigned char * const s = (unsigned char *)b->ptr; + unsigned char *p; + const int used = (int)buffer_string_length(b); + unsigned int count = 0, j = 0; + for (int i = 0; i < used; ++i) { + if (s[i] > 0x7F) ++count; + } + if (0 == count) return; + + p = (unsigned char *)buffer_string_prepare_copy(t, used+(count*2)); + for (int i = 0; i < used; ++i, ++j) { + if (s[i] <= 0x7F) + p[j] = s[i]; + else { + p[j] = '%'; + p[++j] = hex_chars_uc[(s[i] >> 4) & 0xF]; + p[++j] = hex_chars_uc[s[i] & 0xF]; + } + } + buffer_commit(t, j); + buffer_copy_buffer(b, t); +} + +static void pcre_keyvalue_burl_percent_percent_high_UTF8 (buffer *b, buffer *t) +{ + const unsigned char * const s = (unsigned char *)b->ptr; + unsigned char *p; + const int used = (int)buffer_string_length(b); + unsigned int count = 0, j = 0; + for (int i = 0; i < used; ++i) { + if (s[i] > 0x7F) ++count; + } + if (0 == count) return; + + p = (unsigned char *)buffer_string_prepare_copy(t, used+(count*3)); + for (int i = 0; i < used; ++i, ++j) { + if (s[i] <= 0x7F) + p[j] = s[i]; + else { + p[j] = '%'; + p[++j] = '%'; + p[++j] = hex_chars_uc[(s[i] >> 4) & 0xF]; + p[++j] = hex_chars_uc[s[i] & 0xF]; + } + } + buffer_commit(t, j); + buffer_copy_buffer(b, t); +} + +/* Basic normalization of regex and regex replacement to mirror some of + * the normalizations performed on request URI (for better compatibility). + * Note: not currently attempting to replace unnecessary percent-encoding + * (would need to know if regex was intended to match url-path or + * query-string or both, and then would have to regex-escape if those + * chars where special regex chars such as . * + ? ( ) [ ] | and more) + * Not attempting to percent-encode chars which should be encoded, again + * since regex might target url-path, query-string, or both, and we would + * have to avoid percent-encoding special regex chars. + * Also not attempting to detect unnecessarily regex-escape in, e.g. %\x\x + * Preserve improper %-encoded sequences which are not %XX (using hex chars) + * Intentionally not performing path simplification (e.g. ./ ../) + * If regex-specific normalizations begin to be made to k here, + * must revisit callers, e.g. one configfile.c use on non-regex string. + * "%%" (percent_percent) is used in regex replacement strings since + * otherwise "%n" is used to indicate regex backreference where n is number. + */ + +void pcre_keyvalue_burl_normalize_key (buffer *k, buffer *t) +{ + pcre_keyvalue_burl_percent_toupper(k); + pcre_keyvalue_burl_percent_high_UTF8(k, t); +} + +void pcre_keyvalue_burl_normalize_value (buffer *v, buffer *t) +{ + pcre_keyvalue_burl_percent_percent_toupper(v); + pcre_keyvalue_burl_percent_percent_high_UTF8(v, t); +} |