开源项目--cJSON6--JSON生成器

mac2022-06-30 39

文章目录

头文件测试代码函数实现生成数字生成字符串生成数组和对象最终代码

什么是JSON生成器？

JSON生成器负责把树形数据结构转化为JSON文本，这个过程又称为字符串化（stringify）。

头文件

生成器的API：

char* lept_stringify(const lept_value* v, size_t* length);

在实现JSON解析的时候，我们加入了一个动态堆栈，用于存储临时的解析结果。而在JSON生成器中，也要存储生成的结果，所以最简单的再利用该数据结构，作为输出缓冲区。

因为我们已经写过JSON解析器，所以生成器的写法就照着解析器写就可以。

#ifndef LEPT_PARSE_STRINGIFY_INIT_SIZE #define LEPT_PARSE_STRINGIFY_INIT_SIZE 256 #endif //int lept_stringify(const lept_value* v, char** json, size_t* length){ // lept_context c; // int ret; // assert(v != NULL); // assert(json != NULL); // c.stack = (char*)malloc(c.size = LEPT_PARSE_STRINGIFY_INIT_SIZE); // c.top = 0; // if (LEPT_PARSE_OK != (ret = lept_stringify_value(&c, v))){ // free(c.stack); // *json = NULL; // return ret; // } // if (length) // *length = c.top; // PUTC(&c, '\0'); // *json = c.stack; // return LEPT_PARSE_OK; //} char* lept_stringify(const lept_value* v, size_t* length) { lept_context c; assert(v != NULL); c.stack = (char*)malloc(c.size = LEPT_PARSE_STRINGIFY_INIT_SIZE); c.top = 0; lept_stringify_value(&c, v); if (length) *length = c.top; PUTC(&c, '\0'); return c.stack; }

测试代码

测试代码编写的思路：将一个JSON进行解析生成字符串，然后利用这个字符串在生成JSON2，最后逐字符对比两个JSON是否相同，这种测试称为“往返测试”。

#define TEST_ROUNDTRIP(json)\ do {\ lept_value v; \ char* json2; \ size_t length; \ lept_init(&v); \ EXPECT_EQ_INT(LEPT_PARSE_OK, lept_parse(&v, json)); \ json2 = lept_stringify(&v, &length); \ EXPECT_EQ_STRING(json, json2, length); \ lept_free(&v); \ free(json2); \ } while (0) static void test_stringify_number() { TEST_ROUNDTRIP("0"); TEST_ROUNDTRIP("-0"); TEST_ROUNDTRIP("1"); TEST_ROUNDTRIP("-1"); TEST_ROUNDTRIP("1.5"); TEST_ROUNDTRIP("-1.5"); TEST_ROUNDTRIP("3.25"); TEST_ROUNDTRIP("1e+20"); TEST_ROUNDTRIP("1.234e+20"); TEST_ROUNDTRIP("1.234e-20"); TEST_ROUNDTRIP("1.0000000000000002"); /* the smallest number > 1 */ TEST_ROUNDTRIP("4.9406564584124654e-324"); /* minimum denormal */ TEST_ROUNDTRIP("-4.9406564584124654e-324"); TEST_ROUNDTRIP("2.2250738585072009e-308"); /* Max subnormal double */ TEST_ROUNDTRIP("-2.2250738585072009e-308"); TEST_ROUNDTRIP("2.2250738585072014e-308"); /* Min normal positive double */ TEST_ROUNDTRIP("-2.2250738585072014e-308"); TEST_ROUNDTRIP("1.7976931348623157e+308"); /* Max double */ TEST_ROUNDTRIP("-1.7976931348623157e+308"); } static void test_stringify_string() { TEST_ROUNDTRIP("\"\""); TEST_ROUNDTRIP("\"Hello\""); TEST_ROUNDTRIP("\"Hello\\nWorld\""); TEST_ROUNDTRIP("\"\\\" \\\\ / \\b \\f \\n \\r \\t\""); TEST_ROUNDTRIP("\"Hello\\u0000World\""); } static void test_stringify_array() { TEST_ROUNDTRIP("[]"); TEST_ROUNDTRIP("[null,false,true,123,\"abc\",[1,2,3]]"); } static void test_stringify_object() { TEST_ROUNDTRIP("{}"); TEST_ROUNDTRIP("{\"n\":null,\"f\":false,\"t\":true,\"i\":123,\"s\":\"abc\",\"a\":[1,2,3],\"o\":{\"1\":1,\"2\":2,\"3\":3}}"); } static void test_stringify() { TEST_ROUNDTRIP("null"); TEST_ROUNDTRIP("false"); TEST_ROUNDTRIP("true"); test_stringify_number(); test_stringify_string(); test_stringify_array(); test_stringify_object(); }

函数实现

首先看看lept_parse_value函数的实现。

//解析的代码 static int lept_parse_value(lept_context* c, lept_value* v) { switch (*c->json) { case 't': return lept_parse_literal(c, v, "true", LEPT_TRUE); case 'f': return lept_parse_literal(c, v, "false", LEPT_FALSE); case 'n': return lept_parse_literal(c, v, "null", LEPT_NULL); default: return lept_parse_number(c, v);//对于number的情况，可以这样处理。 case '\0': return LEPT_PARSE_EXPECT_VALUE; } } //仿写生成的函数 #define PUTS(c, s, len) memcpy(lept_context_push(c, len), s, len) static int lept_stringify_value(lept_context* c, const lept_value* v) { size_t i; int ret; switch (v->type) { case LEPT_NULL: PUTS(c, "null", 4); break; case LEPT_FALSE: PUTS(c, "false", 5); break; case LEPT_TRUE: PUTS(c, "true", 4); break; /* ... */ } return LEPT_STRINGIFY_OK; }

生成数字

case LEPT_NUMBER: { char buffer[32]; int length = sprintf(buffer, "%.17g", v->u.n); PUTS(c, buffer, length); } break;

但这样需要在 PUTS() 中做一次 memcpy()，实际上我们可以避免这次复制，只需要生成的时候直接写进 c 里的推栈，然后再按实际长度调查 c->top：

case LEPT_NUMBER: { char* buffer = lept_context_push(c, 32); int length = sprintf(buffer, "%.17g", v->u.n); c->top -= 32 - length; } break;

简写成：

case LEPT_NUMBER: c->top -= 32 - sprintf(lept_context_push(c, 32), "%.17g", v->u.n); break;

生成字符串

static void lept_stringify_string(lept_context* c, const char* s, size_t len) { size_t i; assert(s != NULL); PUTC(c, '"'); for (i = 0; i < len; i++) { unsigned char ch = (unsigned char)s[i]; switch (ch) { case '\"': PUTS(c, "\\\"", 2); break; case '\\': PUTS(c, "\\\\", 2); break; case '\b': PUTS(c, "\\b", 2); break; case '\f': PUTS(c, "\\f", 2); break; case '\n': PUTS(c, "\\n", 2); break; case '\r': PUTS(c, "\\r", 2); break; case '\t': PUTS(c, "\\t", 2); break; default: if (ch < 0x20) { char buffer[7]; sprintf(buffer, "\\uX", ch); PUTS(c, buffer, 6); } else PUTC(c, s[i]); } } PUTC(c, '"');} static void lept_stringify_value(lept_context* c, const lept_value* v) { switch (v->type) { /* ... */ case LEPT_STRING: lept_stringify_string(c, v->u.s.s, v->u.s.len); break; } }

优化 lept_stringify_string()

上面的 lept_stringify_string() 实现中，每次输出一个字符／字符串，都要调用 lept_context_push()。如果我们使用一些性能剖测工具，也可能会发现这个函数消耗较多 CPU。

static void* lept_context_push(lept_context* c, size_t size) { void* ret; assert(size > 0); if (c->top + size >= c->size) { // (1) if (c->size == 0) c->size = LEPT_PARSE_STACK_INIT_SIZE; while (c->top + size >= c->size) c->size += c->size >> 1; /* c->size * 1.5 */ c->stack = (char*)realloc(c->stack, c->size); } ret = c->stack + c->top; // (2) c->top += size; // (3) return ret; // (4) }

中间最花费时间的，应该会是 (1)，需要计算而且作分支检查。即使使用内联函数去减少函数调用的开销，这个分支也无法避免。所以，一个优化的点子是，预先分配足够的内存，每次加入字符就不用做这个检查了。但多大的内存才足够呢？我们可以看到，每个字符可生成最长的形式是 \u00XX，占 6 个字符，再加上前后两个双引号，也就是共 len * 6 + 2 个输出字符。那么，使用 char* p = lept_context_push() 作一次分配后，便可以用 *p++ = c 去输出字符了。最后，再按实际输出量调整堆栈指针。另一个小优化点，是自行编写十六进位输出，避免了 printf() 内解析格式的开销。

static void lept_stringify_string(lept_context* c, const char* s, size_t len) { static const char hex_digits[] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' }; size_t i, size; char* head, *p; assert(s != NULL); p = head = lept_context_push(c, size = len * 6 + 2); /* "\u00xx..." */ *p++ = '"'; for (i = 0; i < len; i++) { unsigned char ch = (unsigned char)s[i]; switch (ch) { case '\"': *p++ = '\\'; *p++ = '\"'; break; case '\\': *p++ = '\\'; *p++ = '\\'; break; case '\b': *p++ = '\\'; *p++ = 'b'; break; case '\f': *p++ = '\\'; *p++ = 'f'; break; case '\n': *p++ = '\\'; *p++ = 'n'; break; case '\r': *p++ = '\\'; *p++ = 'r'; break; case '\t': *p++ = '\\'; *p++ = 't'; break; default: if (ch < 0x20) { *p++ = '\\'; *p++ = 'u'; *p++ = '0'; *p++ = '0'; *p++ = hex_digits[ch >> 4]; *p++ = hex_digits[ch & 15]; } else *p++ = s[i]; } } *p++ = '"'; c->top -= size - (p - head); }

要注意的是，很多优化都是有代价的。第一个优化采取空间换时间的策略，对于只含一个字符串的 JSON，很可能会分配多 6 倍内存；但对于正常含多个值的 JSON，多分配的内存可在之后的值所利用，不会造成太多浪费。而第二个优化的缺点，就是有稍增加了一点程序体积。也许有人会问，为什么 hex_digits 不用字符串字面量 “0123456789ABCDEF”？其实是可以的，但这会多浪费 1 个字节（实际因数据对齐可能会浪费 4 个或更多）。

生成数组和对象

生成数组也是非常简单，只要输出 [ 和 ]，中间对逐个子值递归调用 lept_stringify_value()。只要注意在第一个元素后才加入 ,。而对象也仅是多了一个键和 :。

static void lept_stringify_value(lept_context* c, const lept_value* v) { size_t i; switch (v->type) { /* ... */ case LEPT_ARRAY: PUTC(c, '['); for (i = 0; i < v->u.a.size; i++) { if (i > 0) PUTC(c, ','); lept_stringify_value(c, &v->u.a.e[i]); } PUTC(c, ']'); break; case LEPT_OBJECT: PUTC(c, '{'); for (i = 0; i < v->u.o.size; i++) { if (i > 0) PUTC(c, ','); lept_stringify_string(c, v->u.o.m[i].k, v->u.o.m[i].klen); PUTC(c, ':'); lept_stringify_value(c, &v->u.o.m[i].v); } PUTC(c, '}'); break; /* ... */ }}

最终代码

#ifndef LEPT_PARSE_STRINGIFY_INIT_SIZE #define LEPT_PARSE_STRINGIFY_INIT_SIZE 256 #endif #define PUTS(c, s, len) memcpy(lept_context_push(c,len),s,len) static void lept_stringify_string(lept_context* c, const char* s, size_t len){ static const char hex_digits[] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' }; size_t i, size; char* head, *p; assert(s != NULL); p = head = lept_context_push(c, size = len * 6 + 2);//每个字符可生成最长的形式是\u00XX，占 6 个字符，再加上前后两个双引号，也就是共 len * 6 + 2 个输出字符 *p++ = '"'; //PUTC(c, '"'); //PUTC 函数每次都会调用lept_context_push函数，这个函数的if(c->top + size >= c->size) 这一句开销比较大，需要计算而且作分支检查。即使使用 C99 的inline 关键字（或使用宏）去减少函数调用的开销，这个分支也无法避免；写成*p++比较好 for (i = 0; i < len; i++){ unsigned char ch = (unsigned char)s[i]; switch (ch) { /*case '\"': PUTC(c, "\\\"", 2); break; case '\\': PUTC(c, "\\\\", 2); break; case '\b': PUTC(c, "\\b", 2); break; case '\f': PUTC(c, "\\f", 2); break; case '\n': PUTC(c, "\\n", 2); break; case '\r': PUTC(c, "\\r", 2); break; case '\t': PUTC(c, "\\t", 2); break;*/ case '\"': *p++ = '\\'; *p++ = '\"'; break; case '\\': *p++ = '\\'; *p++ = '\\'; break; case '\b': *p++ = '\\'; *p++ = 'b'; break; case '\f': *p++ = '\\'; *p++ = 'f'; break; case '\n': *p++ = '\\'; *p++ = 'n'; break; case '\r': *p++ = '\\'; *p++ = 'r'; break; case '\t': *p++ = '\\'; *p++ = 't'; break; default: if (ch < 0x20){ //char buffer[7]; //sprintf(buffer, "\\uX", ch);//其他少于 0x20 的字符需要转义为 \u00xx 形式。 //PUTS(c, buffer, 0); *p++ = '\\'; *p++ = 'u'; *p++ = '0'; *p++ = '0'; *p++ = hex_digits[ch >> 4]; *p++ = hex_digits[ch & 15]; } else //PUTC(c, s[i]); *p++ = s[i]; } } //PUTC(c, '"'); *p++ = '"'; c->top -= size - (p - head); } static int lept_stringify_value(lept_context* c, const lept_value* v){ size_t i; int ret; switch (v->type) { case LEPT_NULL: PUTS(c, "null", 4); break; case LEPT_FALSE: PUTS(c, "false", 5); break; case LEPT_TRUE: PUTS(c, "true", 4); break; case LEPT_NUMBER: //{ // char* buffer = lept_context_push(c, 32); // int length = sprintf(buffer, "%.17g", v->u.n);//sprintf把浮点数转换为文本 // c->top -= 32 - length; //} c->top -= 32 - sprintf(lept_context_push(c, 32), "%.17g", v->u.n); break; case LEPT_STRING: lept_stringify_string(c, v->u.s.s, v->u.s.len); break; case LEPT_ARRAY: PUTC(c, '['); for (i = 0; i < v->u.a.size; i++){ if (i > 0) PUTC(c, ','); lept_stringify_value(c, &v->u.a.e[i]); } PUTC(c, ']'); break; case LEPT_OBJECT: PUTC(c, '{'); for (i = 0; i < v->u.o.size; i++){ if (i>0) PUTC(c, ','); lept_stringify_string(c, v->u.o.m[i].k, v->u.o.m[i].klen); PUTC(c, ':'); lept_stringify_value(c, &v->u.o.m[i].v); } PUTC(c, '}'); break; default: assert(0 && "invalid type"); } } char* lept_stringify(const lept_value* v, size_t* length) { lept_context c; assert(v != NULL); c.stack = (char*)malloc(c.size = LEPT_PARSE_STRINGIFY_INIT_SIZE); c.top = 0; lept_stringify_value(&c, v); if (length) *length = c.top; PUTC(&c, '\0'); return c.stack; }

最新回复(0)