From 517ecb85f58ed6ac8b4d5443931612e75e7c7dc2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bart=C5=82omiej=20Mary=C5=84czak?= Date: Fri, 6 Dec 2024 21:43:41 +0100 Subject: [PATCH] feat(stdlib): vim.json.encode(...,{escape_slash:boolean}) #30561 Problem: vim.json.encode escapes every slash in string values (for example in file paths), and is not optional. Use-case is for preventing HTML injections (eg. injecting `` closing tag); in the context of Nvim this is rarely useful. Solution: - Add a `escape_slash` flag to `vim.json.encode`. - Defaults to `false`. (This is a "breaking" change, but more like a bug fix.) --- runtime/doc/lua.txt | 7 +- runtime/doc/news.txt | 2 + runtime/lua/vim/_meta/json.lua | 5 +- src/cjson/lua_cjson.c | 121 +++++++++++++++++++++--------- test/functional/lua/json_spec.lua | 39 ++++++++++ 5 files changed, 136 insertions(+), 38 deletions(-) diff --git a/runtime/doc/lua.txt b/runtime/doc/lua.txt index 48fa595a53..80808abd51 100644 --- a/runtime/doc/lua.txt +++ b/runtime/doc/lua.txt @@ -811,11 +811,14 @@ vim.json.decode({str}, {opts}) *vim.json.decode()* Return: ~ (`any`) -vim.json.encode({obj}) *vim.json.encode()* +vim.json.encode({obj}, {opts}) *vim.json.encode()* Encodes (or "packs") Lua object {obj} as JSON in a Lua string. Parameters: ~ - • {obj} (`any`) + • {obj} (`any`) + • {opts} (`table?`) Options table with keys: + • escape_slash: (boolean) (default false) When true, escapes + `/` character in JSON strings Return: ~ (`string`) diff --git a/runtime/doc/news.txt b/runtime/doc/news.txt index 8b4b396a26..3a3e1a8d64 100644 --- a/runtime/doc/news.txt +++ b/runtime/doc/news.txt @@ -60,6 +60,7 @@ DEFAULTS current buffer, respectively. • 'number', 'relativenumber', 'signcolumn', and 'foldcolumn' are disabled in |terminal| buffers. See |terminal-config| for an example of changing these defaults. +• |vim.json.encode()| no longer escapes the forward slash symbol by default DIAGNOSTICS @@ -169,6 +170,7 @@ The following new features were added. API • |nvim__ns_set()| can set properties for a namespace +• |vim.json.encode()| has an option to enable forward slash escaping DEFAULTS diff --git a/runtime/lua/vim/_meta/json.lua b/runtime/lua/vim/_meta/json.lua index 07d89aafc8..1a7e87db9c 100644 --- a/runtime/lua/vim/_meta/json.lua +++ b/runtime/lua/vim/_meta/json.lua @@ -35,5 +35,8 @@ function vim.json.decode(str, opts) end --- Encodes (or "packs") Lua object {obj} as JSON in a Lua string. ---@param obj any +---@param opts? table Options table with keys: +--- - escape_slash: (boolean) (default false) When true, escapes `/` +--- character in JSON strings ---@return string -function vim.json.encode(obj) end +function vim.json.encode(obj, opts) end diff --git a/src/cjson/lua_cjson.c b/src/cjson/lua_cjson.c index 254355e5a2..e4bd0bcf6b 100644 --- a/src/cjson/lua_cjson.c +++ b/src/cjson/lua_cjson.c @@ -172,6 +172,16 @@ typedef struct { int decode_array_with_array_mt; } json_config_t; +typedef struct { + const char **char2escape[256]; +} json_encode_options_t; + +typedef struct { + json_config_t *cfg; + json_encode_options_t *options; + strbuf_t *json; +} json_encode_t; + typedef struct { /* convert null in json objects to lua nil instead of vim.NIL */ bool luanil_object; @@ -209,7 +219,7 @@ static const char *char2escape[256] = { "\\u0018", "\\u0019", "\\u001a", "\\u001b", "\\u001c", "\\u001d", "\\u001e", "\\u001f", NULL, NULL, "\\\"", NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, "\\/", + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, @@ -555,11 +565,11 @@ static void json_create_config(lua_State *l) /* ===== ENCODING ===== */ -static void json_encode_exception(lua_State *l, json_config_t *cfg, strbuf_t *json, int lindex, +static void json_encode_exception(lua_State *l, json_encode_t *ctx, int lindex, const char *reason) { - if (!cfg->encode_keep_buffer) - strbuf_free(json); + if (!ctx->cfg->encode_keep_buffer) + strbuf_free(ctx->json); luaL_error(l, "Cannot serialise %s: %s", lua_typename(l, lua_type(l, lindex)), reason); } @@ -570,12 +580,13 @@ static void json_encode_exception(lua_State *l, json_config_t *cfg, strbuf_t *js * - String (Lua stack index) * * Returns nothing. Doesn't remove string from Lua stack */ -static void json_append_string(lua_State *l, strbuf_t *json, int lindex) +static void json_append_string(lua_State *l, json_encode_t *ctx, int lindex) { const char *escstr; unsigned i; const char *str; size_t len; + strbuf_t *json = ctx->json; str = lua_tolstring(l, lindex, &len); @@ -587,7 +598,7 @@ static void json_append_string(lua_State *l, strbuf_t *json, int lindex) strbuf_append_char_unsafe(json, '\"'); for (i = 0; i < len; i++) { - escstr = char2escape[(unsigned char)str[i]]; + escstr = (*ctx->options->char2escape)[(unsigned char)str[i]]; if (escstr) strbuf_append_string(json, escstr); else @@ -600,11 +611,12 @@ static void json_append_string(lua_State *l, strbuf_t *json, int lindex) * -1 object (not a pure array) * >=0 elements in array */ -static int lua_array_length(lua_State *l, json_config_t *cfg, strbuf_t *json) +static int lua_array_length(lua_State *l, json_encode_t *ctx) { double k; int max; int items; + json_config_t *cfg = ctx->cfg; max = 0; items = 0; @@ -635,7 +647,7 @@ static int lua_array_length(lua_State *l, json_config_t *cfg, strbuf_t *json) max > items * cfg->encode_sparse_ratio && max > cfg->encode_sparse_safe) { if (!cfg->encode_sparse_convert) - json_encode_exception(l, cfg, json, -1, "excessively sparse array"); + json_encode_exception(l, ctx, -1, "excessively sparse array"); return -1; } @@ -666,17 +678,18 @@ static void json_check_encode_depth(lua_State *l, json_config_t *cfg, current_depth); } -static void json_append_data(lua_State *l, json_config_t *cfg, - int current_depth, strbuf_t *json); +static void json_append_data(lua_State *l, json_encode_t *cfg, + int current_depth); /* json_append_array args: * - lua_State * - JSON strbuf * - Size of passwd Lua array (top of stack) */ -static void json_append_array(lua_State *l, json_config_t *cfg, int current_depth, - strbuf_t *json, int array_length) +static void json_append_array(lua_State *l, json_encode_t *ctx, int current_depth, + int array_length) { int comma, i; + strbuf_t *json = ctx->json; strbuf_append_char(json, '['); @@ -688,23 +701,25 @@ static void json_append_array(lua_State *l, json_config_t *cfg, int current_dept comma = 1; lua_rawgeti(l, -1, i); - json_append_data(l, cfg, current_depth, json); + json_append_data(l, ctx, current_depth); lua_pop(l, 1); } strbuf_append_char(json, ']'); } -static void json_append_number(lua_State *l, json_config_t *cfg, - strbuf_t *json, int lindex) +static void json_append_number(lua_State *l, json_encode_t *ctx, + int lindex) { double num = lua_tonumber(l, lindex); int len; + json_config_t *cfg = ctx->cfg; + strbuf_t *json = ctx->json; if (cfg->encode_invalid_numbers == 0) { /* Prevent encoding invalid numbers */ if (isinf(num) || isnan(num)) - json_encode_exception(l, cfg, json, lindex, + json_encode_exception(l, ctx, lindex, "must not be NaN or Infinity"); } else if (cfg->encode_invalid_numbers == 1) { /* Encode NaN/Infinity separately to ensure Javascript compatible @@ -733,10 +748,11 @@ static void json_append_number(lua_State *l, json_config_t *cfg, strbuf_extend_length(json, len); } -static void json_append_object(lua_State *l, json_config_t *cfg, - int current_depth, strbuf_t *json) +static void json_append_object(lua_State *l, json_encode_t *ctx, + int current_depth) { int comma, keytype; + strbuf_t *json = ctx->json; /* Object */ strbuf_append_char(json, '{'); @@ -754,19 +770,19 @@ static void json_append_object(lua_State *l, json_config_t *cfg, keytype = lua_type(l, -2); if (keytype == LUA_TNUMBER) { strbuf_append_char(json, '"'); - json_append_number(l, cfg, json, -2); + json_append_number(l, ctx, -2); strbuf_append_mem(json, "\":", 2); } else if (keytype == LUA_TSTRING) { - json_append_string(l, json, -2); + json_append_string(l, ctx, -2); strbuf_append_char(json, ':'); } else { - json_encode_exception(l, cfg, json, -2, + json_encode_exception(l, ctx, -2, "table key must be a number or string"); /* never returns */ } /* table, key, value */ - json_append_data(l, cfg, current_depth, json); + json_append_data(l, ctx, current_depth); lua_pop(l, 1); /* table, key */ } @@ -775,20 +791,22 @@ static void json_append_object(lua_State *l, json_config_t *cfg, } /* Serialise Lua data into JSON string. */ -static void json_append_data(lua_State *l, json_config_t *cfg, - int current_depth, strbuf_t *json) +static void json_append_data(lua_State *l, json_encode_t *ctx, + int current_depth) { int len; int as_array = 0; int as_empty_dict = 0; int has_metatable; + json_config_t *cfg = ctx->cfg; + strbuf_t *json = ctx->json; switch (lua_type(l, -1)) { case LUA_TSTRING: - json_append_string(l, json, -1); + json_append_string(l, ctx, -1); break; case LUA_TNUMBER: - json_append_number(l, cfg, json, -1); + json_append_number(l, ctx, -1); break; case LUA_TBOOLEAN: if (lua_toboolean(l, -1)) @@ -818,12 +836,12 @@ static void json_append_data(lua_State *l, json_config_t *cfg, if (as_array) { len = lua_objlen(l, -1); - json_append_array(l, cfg, current_depth, json, len); + json_append_array(l, ctx, current_depth, len); } else { - len = lua_array_length(l, cfg, json); + len = lua_array_length(l, ctx); if (len > 0 || (len == 0 && !cfg->encode_empty_table_as_object && !as_empty_dict)) { - json_append_array(l, cfg, current_depth, json, len); + json_append_array(l, ctx, current_depth, len); } else { if (has_metatable) { lua_getmetatable(l, -1); @@ -833,11 +851,11 @@ static void json_append_data(lua_State *l, json_config_t *cfg, as_array = lua_rawequal(l, -1, -2); lua_pop(l, 2); /* pop pointer + metatable */ if (as_array) { - json_append_array(l, cfg, current_depth, json, 0); + json_append_array(l, ctx, current_depth, 0); break; } } - json_append_object(l, cfg, current_depth, json); + json_append_object(l, ctx, current_depth); } } break; @@ -846,7 +864,7 @@ static void json_append_data(lua_State *l, json_config_t *cfg, break; case LUA_TLIGHTUSERDATA: if (lua_touserdata(l, -1) == &json_array) { - json_append_array(l, cfg, current_depth, json, 0); + json_append_array(l, ctx, current_depth, 0); } break; case LUA_TUSERDATA: @@ -862,7 +880,7 @@ static void json_append_data(lua_State *l, json_config_t *cfg, default: /* Remaining types (LUA_TFUNCTION, LUA_TUSERDATA, LUA_TTHREAD, * and LUA_TLIGHTUSERDATA) cannot be serialised */ - json_encode_exception(l, cfg, json, -1, "type not supported"); + json_encode_exception(l, ctx, -1, "type not supported"); /* never returns */ } } @@ -870,12 +888,44 @@ static void json_append_data(lua_State *l, json_config_t *cfg, static int json_encode(lua_State *l) { json_config_t *cfg = json_fetch_config(l); + json_encode_options_t options = { .char2escape = { char2escape } }; + json_encode_t ctx = { .options = &options, .cfg = cfg }; strbuf_t local_encode_buf; strbuf_t *encode_buf; char *json; int len; + const char *customChar2escape[256]; - luaL_argcheck(l, lua_gettop(l) == 1, 1, "expected 1 argument"); + switch (lua_gettop(l)) { + case 1: + break; + case 2: + luaL_checktype(l, 2, LUA_TTABLE); + lua_getfield(l, 2, "escape_slash"); + + /* We only handle the escape_slash option for now */ + if (lua_isnil(l, -1)) { + lua_pop(l, 2); + break; + } + + luaL_checktype(l, -1, LUA_TBOOLEAN); + + int escape_slash = lua_toboolean(l, -1); + + if (escape_slash) { + /* This can be optimised by adding a new hard-coded escape table for this case, + * but this path will rarely if ever be used, so let's just memcpy.*/ + memcpy(customChar2escape, char2escape, sizeof(char2escape)); + customChar2escape['/'] = "\\/"; + *ctx.options->char2escape = customChar2escape; + } + + lua_pop(l, 2); + break; + default: + return luaL_error (l, "expected 1 or 2 arguments"); + } if (!cfg->encode_keep_buffer) { /* Use private buffer */ @@ -887,7 +937,8 @@ static int json_encode(lua_State *l) strbuf_reset(encode_buf); } - json_append_data(l, cfg, 0, encode_buf); + ctx.json = encode_buf; + json_append_data(l, &ctx, 0); json = strbuf_string(encode_buf, &len); lua_pushlstring(l, json, len); diff --git a/test/functional/lua/json_spec.lua b/test/functional/lua/json_spec.lua index a6e814d739..8df42fcaa1 100644 --- a/test/functional/lua/json_spec.lua +++ b/test/functional/lua/json_spec.lua @@ -152,6 +152,45 @@ describe('vim.json.encode()', function() clear() end) + it('dumps strings with & without escaped slash', function() + -- With slash + eq('"Test\\/"', exec_lua([[return vim.json.encode('Test/', { escape_slash = true })]])) + eq( + 'Test/', + exec_lua([[return vim.json.decode(vim.json.encode('Test/', { escape_slash = true }))]]) + ) + + -- Without slash + eq('"Test/"', exec_lua([[return vim.json.encode('Test/')]])) + eq('"Test/"', exec_lua([[return vim.json.encode('Test/', {})]])) + eq('"Test/"', exec_lua([[return vim.json.encode('Test/', { _invalid = true })]])) + eq('"Test/"', exec_lua([[return vim.json.encode('Test/', { escape_slash = false })]])) + eq( + '"Test/"', + exec_lua([[return vim.json.encode('Test/', { _invalid = true, escape_slash = false })]]) + ) + eq( + 'Test/', + exec_lua([[return vim.json.decode(vim.json.encode('Test/', { escape_slash = false }))]]) + ) + + -- Checks for for global side-effects + eq( + '"Test/"', + exec_lua([[ + vim.json.encode('Test/', { escape_slash = true }) + return vim.json.encode('Test/') + ]]) + ) + eq( + '"Test\\/"', + exec_lua([[ + vim.json.encode('Test/', { escape_slash = false }) + return vim.json.encode('Test/', { escape_slash = true }) + ]]) + ) + end) + it('dumps strings', function() eq('"Test"', exec_lua([[return vim.json.encode('Test')]])) eq('""', exec_lua([[return vim.json.encode('')]]))