From fb43360f1de24044306ccb910a0cddae6f61e136 Mon Sep 17 00:00:00 2001 From: Roman Bataev Date: Fri, 10 Apr 2015 16:56:12 -0700 Subject: [PATCH] Use uint32_t instead of uchar_t to avoid conflicts on SmartOS --- json.cpp | 267 +++++++++++++++++++++++++++---------------------------- 1 file changed, 132 insertions(+), 135 deletions(-) diff --git a/json.cpp b/json.cpp index 533646e60..f92f096a2 100644 --- a/json.cpp +++ b/json.cpp @@ -102,11 +102,11 @@ static void sb_grow(SB *sb, int need) { size_t length = sb->cur - sb->start; size_t alloc = sb->end - sb->start; - + do { alloc *= 2; } while (alloc < length + need); - + sb->start = (char*) realloc(sb->start, alloc + 1); if (sb->start == NULL) out_of_memory(); @@ -150,13 +150,10 @@ static void sb_free(SB *sb) * These are taken from the ccan/charset module and customized a bit. * Putting them here means the compiler can (choose to) inline them, * and it keeps ccan/json from having a dependency. - */ - -/* - * Type for Unicode codepoints. + * + * We use uint32_t Type for Unicode codepoints. * We need our own because wchar_t might be 16 bits. */ -typedef uint32_t uchar_t; /* * Validate a single UTF-8 character starting at @s. @@ -181,7 +178,7 @@ typedef uint32_t uchar_t; static int utf8_validate_cz(const char *s) { unsigned char c = *s++; - + if (c <= 0x7F) { /* 00..7F */ return 1; } else if (c <= 0xC1) { /* 80..C1 */ @@ -191,33 +188,33 @@ static int utf8_validate_cz(const char *s) /* Make sure subsequent byte is in the range 0x80..0xBF. */ if (((unsigned char)*s++ & 0xC0) != 0x80) return 0; - + return 2; } else if (c <= 0xEF) { /* E0..EF */ /* Disallow overlong 3-byte sequence. */ if (c == 0xE0 && (unsigned char)*s < 0xA0) return 0; - + /* Disallow U+D800..U+DFFF. */ if (c == 0xED && (unsigned char)*s > 0x9F) return 0; - + /* Make sure subsequent bytes are in the range 0x80..0xBF. */ if (((unsigned char)*s++ & 0xC0) != 0x80) return 0; if (((unsigned char)*s++ & 0xC0) != 0x80) return 0; - + return 3; } else if (c <= 0xF4) { /* F0..F4 */ /* Disallow overlong 4-byte sequence. */ if (c == 0xF0 && (unsigned char)*s < 0x90) return 0; - + /* Disallow codepoints beyond U+10FFFF. */ if (c == 0xF4 && (unsigned char)*s > 0x8F) return 0; - + /* Make sure subsequent bytes are in the range 0x80..0xBF. */ if (((unsigned char)*s++ & 0xC0) != 0x80) return 0; @@ -225,7 +222,7 @@ static int utf8_validate_cz(const char *s) return 0; if (((unsigned char)*s++ & 0xC0) != 0x80) return 0; - + return 4; } else { /* F5..FF */ return 0; @@ -236,13 +233,13 @@ static int utf8_validate_cz(const char *s) static bool utf8_validate(const char *s) { int len; - + for (; *s != 0; s += len) { len = utf8_validate_cz(s); if (len == 0) return false; } - + return true; } @@ -253,10 +250,10 @@ static bool utf8_validate(const char *s) * This function assumes input is valid UTF-8, * and that there are enough characters in front of @s. */ -static int utf8_read_char(const char *s, uchar_t *out) +static int utf8_read_char(const char *s, uint32_t *out) { const unsigned char *c = (const unsigned char*) s; - + assert(utf8_validate_cz(s)); if (c[0] <= 0x7F) { @@ -265,21 +262,21 @@ static int utf8_read_char(const char *s, uchar_t *out) return 1; } else if (c[0] <= 0xDF) { /* C2..DF (unless input is invalid) */ - *out = ((uchar_t)c[0] & 0x1F) << 6 | - ((uchar_t)c[1] & 0x3F); + *out = ((uint32_t)c[0] & 0x1F) << 6 | + ((uint32_t)c[1] & 0x3F); return 2; } else if (c[0] <= 0xEF) { /* E0..EF */ - *out = ((uchar_t)c[0] & 0xF) << 12 | - ((uchar_t)c[1] & 0x3F) << 6 | - ((uchar_t)c[2] & 0x3F); + *out = ((uint32_t)c[0] & 0xF) << 12 | + ((uint32_t)c[1] & 0x3F) << 6 | + ((uint32_t)c[2] & 0x3F); return 3; } else { /* F0..F4 (unless input is invalid) */ - *out = ((uchar_t)c[0] & 0x7) << 18 | - ((uchar_t)c[1] & 0x3F) << 12 | - ((uchar_t)c[2] & 0x3F) << 6 | - ((uchar_t)c[3] & 0x3F); + *out = ((uint32_t)c[0] & 0x7) << 18 | + ((uint32_t)c[1] & 0x3F) << 12 | + ((uint32_t)c[2] & 0x3F) << 6 | + ((uint32_t)c[3] & 0x3F); return 4; } } @@ -292,10 +289,10 @@ static int utf8_read_char(const char *s, uchar_t *out) * * This function will write up to 4 bytes to @out. */ -static int utf8_write_char(uchar_t unicode, char *out) +static int utf8_write_char(uint32_t unicode, char *out) { unsigned char *o = (unsigned char*) out; - + assert(unicode <= 0x10FFFF && !(unicode >= 0xD800 && unicode <= 0xDFFF)); if (unicode <= 0x7F) { @@ -329,10 +326,10 @@ static int utf8_write_char(uchar_t unicode, char *out) * @uc should be 0xD800..0xDBFF, and @lc should be 0xDC00..0xDFFF. * If they aren't, this function returns false. */ -static bool from_surrogate_pair(uint16_t uc, uint16_t lc, uchar_t *unicode) +static bool from_surrogate_pair(uint16_t uc, uint16_t lc, uint32_t *unicode) { if (uc >= 0xD800 && uc <= 0xDBFF && lc >= 0xDC00 && lc <= 0xDFFF) { - *unicode = 0x10000 + ((((uchar_t)uc & 0x3FF) << 10) | (lc & 0x3FF)); + *unicode = 0x10000 + ((((uint32_t)uc & 0x3FF) << 10) | (lc & 0x3FF)); return true; } else { return false; @@ -344,12 +341,12 @@ static bool from_surrogate_pair(uint16_t uc, uint16_t lc, uchar_t *unicode) * * @unicode must be U+10000..U+10FFFF. */ -static void to_surrogate_pair(uchar_t unicode, uint16_t *uc, uint16_t *lc) +static void to_surrogate_pair(uint32_t unicode, uint16_t *uc, uint16_t *lc) { - uchar_t n; - + uint32_t n; + assert(unicode >= 0x10000 && unicode <= 0x10FFFF); - + n = unicode - 0x10000; *uc = ((n >> 10) & 0x3FF) | 0xD800; *lc = (n & 0x3FF) | 0xDC00; @@ -392,17 +389,17 @@ JsonNode *json_decode(const char *json) { const char *s = json; JsonNode *ret; - + skip_space(&s); if (!parse_value(&s, &ret)) return NULL; - + skip_space(&s); if (*s != 0) { json_delete(ret); return NULL; } - + return ret; } @@ -415,9 +412,9 @@ char *json_encode_string(const char *str) { SB sb; sb_init(&sb); - + emit_string(&sb, str); - + return sb_finish(&sb); } @@ -425,12 +422,12 @@ char *json_stringify(const JsonNode *node, const char *space) { SB sb; sb_init(&sb); - + if (space != NULL) emit_value_indented(&sb, node, space, 0); else emit_value(&sb, node); - + return sb_finish(&sb); } @@ -438,7 +435,7 @@ void json_delete(JsonNode *node) { if (node != NULL) { json_remove_from_parent(node); - + switch (node->tag) { case JSON_STRING: free(node->string_); @@ -455,7 +452,7 @@ void json_delete(JsonNode *node) } default:; } - + free(node); } } @@ -463,15 +460,15 @@ void json_delete(JsonNode *node) bool json_validate(const char *json) { const char *s = json; - + skip_space(&s); if (!parse_value(&s, NULL)) return false; - + skip_space(&s); if (*s != 0) return false; - + return true; } @@ -479,30 +476,30 @@ JsonNode *json_find_element(JsonNode *array, int index) { JsonNode *element; int i = 0; - + if (array == NULL || array->tag != JSON_ARRAY) return NULL; - + json_foreach(element, array) { if (i == index) return element; i++; } - + return NULL; } JsonNode *json_find_member(JsonNode *object, const char *name) { JsonNode *member; - + if (object == NULL || object->tag != JSON_OBJECT) return NULL; - + json_foreach(member, object) if (strcmp(member->key, name) == 0) return member; - + return NULL; } @@ -568,7 +565,7 @@ static void append_node(JsonNode *parent, JsonNode *child) child->parent = parent; child->prev = parent->children.tail; child->next = NULL; - + if (parent->children.tail != NULL) parent->children.tail->next = child; else @@ -581,7 +578,7 @@ static void prepend_node(JsonNode *parent, JsonNode *child) child->parent = parent; child->prev = NULL; child->next = parent->children.head; - + if (parent->children.head != NULL) parent->children.head->prev = child; else @@ -599,7 +596,7 @@ void json_append_element(JsonNode *array, JsonNode *element) { assert(array->tag == JSON_ARRAY); assert(element->parent == NULL); - + append_node(array, element); } @@ -607,7 +604,7 @@ void json_prepend_element(JsonNode *array, JsonNode *element) { assert(array->tag == JSON_ARRAY); assert(element->parent == NULL); - + prepend_node(array, element); } @@ -615,7 +612,7 @@ void json_append_member(JsonNode *object, const char *key, JsonNode *value) { assert(object->tag == JSON_OBJECT); assert(value->parent == NULL); - + append_member(object, json_strdup(key), value); } @@ -623,7 +620,7 @@ void json_prepend_member(JsonNode *object, const char *key, JsonNode *value) { assert(object->tag == JSON_OBJECT); assert(value->parent == NULL); - + value->key = json_strdup(key); prepend_node(object, value); } @@ -631,7 +628,7 @@ void json_prepend_member(JsonNode *object, const char *key, JsonNode *value) void json_remove_from_parent(JsonNode *node) { JsonNode *parent = node->parent; - + if (parent != NULL) { if (node->prev != NULL) node->prev->next = node->next; @@ -641,9 +638,9 @@ void json_remove_from_parent(JsonNode *node) node->next->prev = node->prev; else parent->children.tail = node->prev; - + free(node->key); - + node->parent = NULL; node->prev = node->next = NULL; node->key = NULL; @@ -653,7 +650,7 @@ void json_remove_from_parent(JsonNode *node) static bool parse_value(const char **sp, JsonNode **out) { const char *s = *sp; - + switch (*s) { case 'n': if (expect_literal(&s, "null")) { @@ -663,7 +660,7 @@ static bool parse_value(const char **sp, JsonNode **out) return true; } return false; - + case 'f': if (expect_literal(&s, "false")) { if (out) @@ -672,7 +669,7 @@ static bool parse_value(const char **sp, JsonNode **out) return true; } return false; - + case 't': if (expect_literal(&s, "true")) { if (out) @@ -681,7 +678,7 @@ static bool parse_value(const char **sp, JsonNode **out) return true; } return false; - + case '"': { char *str; if (parse_string(&s, out ? &str : NULL)) { @@ -692,21 +689,21 @@ static bool parse_value(const char **sp, JsonNode **out) } return false; } - + case '[': if (parse_array(&s, out)) { *sp = s; return true; } return false; - + case '{': if (parse_object(&s, out)) { *sp = s; return true; } return false; - + default: { double num; if (parse_number(&s, out ? &num : NULL)) { @@ -725,34 +722,34 @@ static bool parse_array(const char **sp, JsonNode **out) const char *s = *sp; JsonNode *ret = out ? json_mkarray() : NULL; JsonNode *element; - + if (*s++ != '[') goto failure; skip_space(&s); - + if (*s == ']') { s++; goto success; } - + for (;;) { if (!parse_value(&s, out ? &element : NULL)) goto failure; skip_space(&s); - + if (out) json_append_element(ret, element); - + if (*s == ']') { s++; goto success; } - + if (*s++ != ',') goto failure; skip_space(&s); } - + success: *sp = s; if (out) @@ -770,42 +767,42 @@ static bool parse_object(const char **sp, JsonNode **out) JsonNode *ret = out ? json_mkobject() : NULL; char *key; JsonNode *value; - + if (*s++ != '{') goto failure; skip_space(&s); - + if (*s == '}') { s++; goto success; } - + for (;;) { if (!parse_string(&s, out ? &key : NULL)) goto failure; skip_space(&s); - + if (*s++ != ':') goto failure_free_key; skip_space(&s); - + if (!parse_value(&s, out ? &value : NULL)) goto failure_free_key; skip_space(&s); - + if (out) append_member(ret, key, value); - + if (*s == '}') { s++; goto success; } - + if (*s++ != ',') goto failure; skip_space(&s); } - + success: *sp = s; if (out) @@ -827,10 +824,10 @@ bool parse_string(const char **sp, char **out) char throwaway_buffer[4]; /* enough space for a UTF-8 character */ char *b; - + if (*s++ != '"') return false; - + if (out) { sb_init(&sb); sb_need(&sb, 4); @@ -838,10 +835,10 @@ bool parse_string(const char **sp, char **out) } else { b = throwaway_buffer; } - + while (*s != '"') { unsigned char c = *s++; - + /* Parse next character, and write it to b. */ if (c == '\\') { c = *s++; @@ -869,11 +866,11 @@ bool parse_string(const char **sp, char **out) case 'u': { uint16_t uc, lc; - uchar_t unicode; - + uint32_t unicode; + if (!parse_hex16(&s, &uc)) goto failed; - + if (uc >= 0xD800 && uc <= 0xDFFF) { /* Handle UTF-16 surrogate pair. */ if (*s++ != '\\' || *s++ != 'u' || !parse_hex16(&s, &lc)) @@ -886,7 +883,7 @@ bool parse_string(const char **sp, char **out) } else { unicode = uc; } - + b += utf8_write_char(unicode, b); break; } @@ -900,16 +897,16 @@ bool parse_string(const char **sp, char **out) } else { /* Validate and echo a UTF-8 character. */ int len; - + s--; len = utf8_validate_cz(s); if (len == 0) goto failed; /* Invalid UTF-8 character. */ - + while (len--) *b++ = *s++; } - + /* * Update sb to know about the new bytes, * and set up b to write another character. @@ -923,7 +920,7 @@ bool parse_string(const char **sp, char **out) } } s++; - + if (out) *out = sb_finish(&sb); *sp = s; @@ -1058,7 +1055,7 @@ void emit_value_indented(SB *out, const JsonNode *node, const char *space, int i static void emit_array(SB *out, const JsonNode *array) { const JsonNode *element; - + sb_putc(out, '['); json_foreach(element, array) { emit_value(out, element); @@ -1072,18 +1069,18 @@ static void emit_array_indented(SB *out, const JsonNode *array, const char *spac { const JsonNode *element = array->children.head; int i; - + if (element == NULL) { sb_puts(out, "[]"); return; } - + sb_puts(out, "[\n"); while (element != NULL) { for (i = 0; i < indent_level + 1; i++) sb_puts(out, space); emit_value_indented(out, element, space, indent_level + 1); - + element = element->next; sb_puts(out, element != NULL ? ",\n" : "\n"); } @@ -1095,7 +1092,7 @@ static void emit_array_indented(SB *out, const JsonNode *array, const char *spac static void emit_object(SB *out, const JsonNode *object) { const JsonNode *member; - + sb_putc(out, '{'); json_foreach(member, object) { emit_string(out, member->key); @@ -1111,12 +1108,12 @@ static void emit_object_indented(SB *out, const JsonNode *object, const char *sp { const JsonNode *member = object->children.head; int i; - + if (member == NULL) { sb_puts(out, "{}"); return; } - + sb_puts(out, "{\n"); while (member != NULL) { for (i = 0; i < indent_level + 1; i++) @@ -1124,7 +1121,7 @@ static void emit_object_indented(SB *out, const JsonNode *object, const char *sp emit_string(out, member->key); sb_puts(out, ": "); emit_value_indented(out, member, space, indent_level + 1); - + member = member->next; sb_puts(out, member != NULL ? ",\n" : "\n"); } @@ -1138,20 +1135,20 @@ void emit_string(SB *out, const char *str) bool escape_unicode = false; const char *s = str; char *b; - + assert(utf8_validate(str)); - + /* * 14 bytes is enough space to write up to two * \uXXXX escapes and two quotation marks. */ sb_need(out, 14); b = out->cur; - + *b++ = '"'; while (*s != 0) { unsigned char c = *s++; - + /* Encode the next character, and write it to b. */ switch (c) { case '"': @@ -1184,10 +1181,10 @@ void emit_string(SB *out, const char *str) break; default: { int len; - + s--; len = utf8_validate_cz(s); - + if (len == 0) { /* * Handle invalid UTF-8 character gracefully in production @@ -1210,9 +1207,9 @@ void emit_string(SB *out, const char *str) } else if (c < 0x1F || (c >= 0x80 && escape_unicode)) { /* Encode using \u.... */ uint32_t unicode; - + s += utf8_read_char(s, &unicode); - + if (unicode <= 0xFFFF) { *b++ = '\\'; *b++ = 'u'; @@ -1234,11 +1231,11 @@ void emit_string(SB *out, const char *str) while (len--) *b++ = *s++; } - + break; } } - + /* * Update *out to know about the new bytes, * and set up b to write another encoded character. @@ -1248,7 +1245,7 @@ void emit_string(SB *out, const char *str) b = out->cur; } *b++ = '"'; - + out->cur = b; } @@ -1262,7 +1259,7 @@ static void emit_number(SB *out, double num) */ char buf[64]; sprintf(buf, "%.16g", num); - + if (number_is_valid(buf)) sb_puts(out, buf); else @@ -1282,11 +1279,11 @@ static bool number_is_valid(const char *num) static bool expect_literal(const char **sp, const char *str) { const char *s = *sp; - + while (*str != '\0') if (*s++ != *str++) return false; - + *sp = s; return true; } @@ -1317,7 +1314,7 @@ static bool parse_hex16(const char **sp, uint16_t *out) ret <<= 4; ret += tmp; } - + if (out) *out = ret; *sp = s; @@ -1331,12 +1328,12 @@ static bool parse_hex16(const char **sp, uint16_t *out) static int write_hex16(char *out, uint16_t val) { const char *hex = "0123456789ABCDEF"; - + *out++ = hex[(val >> 12) & 0xF]; *out++ = hex[(val >> 8) & 0xF]; *out++ = hex[(val >> 4) & 0xF]; *out++ = hex[ val & 0xF]; - + return 4; } @@ -1347,13 +1344,13 @@ bool json_check(const JsonNode *node, char errmsg[256]) snprintf(errmsg, 256, __VA_ARGS__); \ return false; \ } while (0) - + if (node->key != NULL && !utf8_validate(node->key)) problem("key contains invalid UTF-8"); - + if (!tag_is_valid(node->tag)) problem("tag is invalid (%u)", node->tag); - + if (node->tag == JSON_BOOL) { if (node->bool_ != false && node->bool_ != true) problem("bool_ is neither false (%d) nor true (%d)", (int)false, (int)true); @@ -1365,7 +1362,7 @@ bool json_check(const JsonNode *node, char errmsg[256]) } else if (node->tag == JSON_ARRAY || node->tag == JSON_OBJECT) { JsonNode *head = node->children.head; JsonNode *tail = node->children.tail; - + if (head == NULL || tail == NULL) { if (head != NULL) problem("tail is NULL, but head is not"); @@ -1374,10 +1371,10 @@ bool json_check(const JsonNode *node, char errmsg[256]) } else { JsonNode *child; JsonNode *last = NULL; - + if (head->prev != NULL) problem("First child's prev pointer is not NULL"); - + for (child = head; child != NULL; last = child, child = child->next) { if (child == node) problem("node is its own child"); @@ -1385,27 +1382,27 @@ bool json_check(const JsonNode *node, char errmsg[256]) problem("child->next == child (cycle)"); if (child->next == head) problem("child->next == head (cycle)"); - + if (child->parent != node) problem("child does not point back to parent"); if (child->next != NULL && child->next->prev != child) problem("child->next does not point back to child"); - + if (node->tag == JSON_ARRAY && child->key != NULL) problem("Array element's key is not NULL"); if (node->tag == JSON_OBJECT && child->key == NULL) problem("Object member's key is NULL"); - + if (!json_check(child, errmsg)) return false; } - + if (last != tail) problem("tail does not match pointer found by starting at head and following next links"); } } - + return true; - + #undef problem }