From 21a5ce033c5a853bed3204ea9f0f7a3cfc1d164f Mon Sep 17 00:00:00 2001 From: ZyX Date: Tue, 3 Oct 2017 01:30:02 +0300 Subject: [PATCH] viml/parser/expressions: Add support for the dot operator and numbers --- src/nvim/viml/parser/expressions.c | 108 +++++++- src/nvim/viml/parser/expressions.h | 23 +- test/unit/viml/expressions/parser_spec.lua | 294 +++++++++++++++++++++ 3 files changed, 416 insertions(+), 9 deletions(-) diff --git a/src/nvim/viml/parser/expressions.c b/src/nvim/viml/parser/expressions.c index 5d892fb8f8..4babf4312c 100644 --- a/src/nvim/viml/parser/expressions.c +++ b/src/nvim/viml/parser/expressions.c @@ -915,7 +915,8 @@ static inline void viml_pexpr_debug_print_token( // // NVimUnaryPlus -> NVimUnaryOperator // NVimBinaryPlus -> NVimBinaryOperator -// NVimConcatOrSubscript -> NVimBinaryOperator +// NVimConcat -> NVimBinaryOperator +// NVimConcatOrSubscript -> NVimConcat // // NVimRegister -> SpecialChar // NVimNumber -> Number @@ -971,6 +972,7 @@ static const ExprOpLvl node_type_to_op_lvl[] = { [kExprNodeUnknownFigure] = kEOpLvlParens, [kExprNodeLambda] = kEOpLvlParens, [kExprNodeDictLiteral] = kEOpLvlParens, + [kExprNodeListLiteral] = kEOpLvlParens, [kExprNodeArrow] = kEOpLvlArrow, @@ -985,17 +987,21 @@ static const ExprOpLvl node_type_to_op_lvl[] = { [kExprNodeComparison] = kEOpLvlComparison, [kExprNodeBinaryPlus] = kEOpLvlAddition, + [kExprNodeConcat] = kEOpLvlAddition, [kExprNodeUnaryPlus] = kEOpLvlUnary, + [kExprNodeConcatOrSubscript] = kEOpLvlSubscript, [kExprNodeSubscript] = kEOpLvlSubscript, [kExprNodeCurlyBracesIdentifier] = kEOpLvlComplexIdentifier, [kExprNodeComplexIdentifier] = kEOpLvlValue, [kExprNodePlainIdentifier] = kEOpLvlValue, + [kExprNodePlainKey] = kEOpLvlValue, [kExprNodeRegister] = kEOpLvlValue, - [kExprNodeListLiteral] = kEOpLvlValue, + [kExprNodeInteger] = kEOpLvlValue, + [kExprNodeFloat] = kEOpLvlValue, }; static const ExprOpAssociativity node_type_to_op_ass[] = { @@ -1008,6 +1014,7 @@ static const ExprOpAssociativity node_type_to_op_ass[] = { [kExprNodeUnknownFigure] = kEOpAssLeft, [kExprNodeLambda] = kEOpAssNo, [kExprNodeDictLiteral] = kEOpAssNo, + [kExprNodeListLiteral] = kEOpAssNo, // Does not really matter. [kExprNodeArrow] = kEOpAssNo, @@ -1030,17 +1037,21 @@ static const ExprOpAssociativity node_type_to_op_ass[] = { [kExprNodeComparison] = kEOpAssRight, [kExprNodeBinaryPlus] = kEOpAssLeft, + [kExprNodeConcat] = kEOpAssLeft, [kExprNodeUnaryPlus] = kEOpAssNo, + [kExprNodeConcatOrSubscript] = kEOpAssLeft, [kExprNodeSubscript] = kEOpAssLeft, [kExprNodeCurlyBracesIdentifier] = kEOpAssLeft, [kExprNodeComplexIdentifier] = kEOpAssLeft, [kExprNodePlainIdentifier] = kEOpAssNo, + [kExprNodePlainKey] = kEOpAssNo, [kExprNodeRegister] = kEOpAssNo, - [kExprNodeListLiteral] = kEOpAssNo, + [kExprNodeInteger] = kEOpAssNo, + [kExprNodeFloat] = kEOpAssNo, }; /// Get AST node priority level @@ -1420,10 +1431,20 @@ ExprAST viml_pexpr_parse(ParserState *const pstate, const int flags) [kENodeArgument] = kELFlagIsNotCmp, [kENodeArgumentSeparator] = kELFlagForbidScope, }; - // FIXME Determine when (not) to allow floating-point numbers. + const bool is_concat_or_subscript = ( + want_node == kENodeValue + && kv_size(ast_stack) > 1 + && (*kv_Z(ast_stack, 1))->type == kExprNodeConcatOrSubscript); const int lexer_additional_flags = ( kELFlagPeek - | ((flags & kExprFlagsDisallowEOC) ? kELFlagForbidEOC : 0)); + | ((flags & kExprFlagsDisallowEOC) ? kELFlagForbidEOC : 0) + | ((want_node == kENodeValue + && (kv_size(ast_stack) == 1 + || ((*kv_Z(ast_stack, 1))->type != kExprNodeConcat + && ((*kv_Z(ast_stack, 1))->type + != kExprNodeConcatOrSubscript)))) + ? kELFlagAllowFloat + : 0)); LexExprToken cur_token = viml_pexpr_next_token( pstate, want_node_to_lexer_flags[want_node] | lexer_additional_flags); if (cur_token.type == kExprLexEOC) { @@ -1456,11 +1477,42 @@ viml_pexpr_parse_process_token: ExprASTNode *cur_node = NULL; assert((want_node == kENodeValue || want_node == kENodeArgument) == (*top_node_p == NULL)); + // Note: in Vim whether expression "cond?d.a:2" is valid depends both on + // "cond" and whether "d" is a dictionary: expression is valid if condition + // is true and "d" is a dictionary (with "a" key or it will complain about + // missing one, but this is not relevant); if any of the requirements is + // broken then this thing is parsed as "d . a:2" yielding missing colon + // error. This parser does not allow such ambiguity, especially because it + // simply can’t: whether "d" is a dictionary is not known at the parsing + // time. + // + // Here example will always contain a concat with "a:2" sucking colon, + // making expression invalid both because there is no longer a spare colon + // for ternary and because concatenating dictionary with anything is not + // valid. There are more cases when this will make a difference though. + const bool node_is_key = ( + is_concat_or_subscript + && (cur_token.type == kExprLexPlainIdentifier + ? (!cur_token.data.var.autoload + && cur_token.data.var.scope == 0) + : (cur_token.type == kExprLexNumber)) + && prev_token.type != kExprLexSpacing); + if (is_concat_or_subscript && !node_is_key) { + // Note: in Vim "d. a" (this is the reason behind `prev_token.type != + // kExprLexSpacing` part of the condition) as well as any other "d.{expr}" + // where "{expr}" does not look like a key is invalid whenever "d" happens + // to be a dictionary. Since parser has no idea whether preceding + // expression is actually a dictionary it can’t outright reject anything, + // so it turns kExprNodeConcatOrSubscript into kExprNodeConcat instead, + // which will yield different errors then Vim does in a number of + // circumstances, and in any case runtime and not parse time errors. + (*kv_Z(ast_stack, 1))->type = kExprNodeConcat; + } if ((want_node == kENodeArgumentSeparator && tok_type != kExprLexComma && tok_type != kExprLexArrow) || (want_node == kENodeArgument - && !(tok_type == kExprLexPlainIdentifier + && !(cur_token.type == kExprLexPlainIdentifier && cur_token.data.var.scope == 0 && !cur_token.data.var.autoload) && tok_type != kExprLexArrow)) { @@ -1844,7 +1896,10 @@ viml_pexpr_parse_figure_brace_closing_error: want_node = (want_node == kENodeArgument ? kENodeArgumentSeparator : kENodeOperator); - NEW_NODE_WITH_CUR_POS(cur_node, kExprNodePlainIdentifier); + NEW_NODE_WITH_CUR_POS(cur_node, + (node_is_key + ? kExprNodePlainKey + : kExprNodePlainIdentifier)); cur_node->data.var.scope = cur_token.data.var.scope; const size_t scope_shift = (cur_token.data.var.scope == 0 ? 0 @@ -1854,6 +1909,7 @@ viml_pexpr_parse_figure_brace_closing_error: cur_node->data.var.ident_len = cur_token.len - scope_shift; *top_node_p = cur_node; if (scope_shift) { + assert(!node_is_key); viml_parser_highlight(pstate, cur_token.start, 1, HL(IdentifierScope)); viml_parser_highlight(pstate, shifted_pos(cur_token.start, 1), 1, @@ -1863,7 +1919,9 @@ viml_pexpr_parse_figure_brace_closing_error: viml_parser_highlight(pstate, shifted_pos(cur_token.start, scope_shift), cur_token.len - scope_shift, - HL(Identifier)); + (node_is_key + ? HL(IdentifierKey) + : HL(Identifier))); } } else { if (cur_token.data.var.scope == 0) { @@ -1882,6 +1940,40 @@ viml_pexpr_parse_figure_brace_closing_error: } break; } + case kExprLexNumber: { + if (want_node != kENodeValue) { + OP_MISSING; + } + if (node_is_key) { + NEW_NODE_WITH_CUR_POS(cur_node, kExprNodePlainKey); + cur_node->data.var.ident = pline.data + cur_token.start.col; + cur_node->data.var.ident_len = cur_token.len; + HL_CUR_TOKEN(IdentifierKey); + } else if (cur_token.data.num.is_float) { + NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeFloat); + cur_node->data.flt.value = cur_token.data.num.val.floating; + HL_CUR_TOKEN(Float); + } else { + NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeInteger); + cur_node->data.num.value = cur_token.data.num.val.integer; + HL_CUR_TOKEN(Number); + } + want_node = kENodeOperator; + *top_node_p = cur_node; + break; + } + case kExprLexDot: { + ADD_VALUE_IF_MISSING(_("E15: Unexpected dot: %.*s")); + if (prev_token.type == kExprLexSpacing) { + NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeConcat); + HL_CUR_TOKEN(Concat); + } else { + NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeConcatOrSubscript); + HL_CUR_TOKEN(ConcatOrSubscript); + } + ADD_OP_NODE(cur_node); + break; + } case kExprLexParenthesis: { if (cur_token.data.brc.closing) { if (want_node == kENodeValue) { diff --git a/src/nvim/viml/parser/expressions.h b/src/nvim/viml/parser/expressions.h index 29903490bb..0d496c87ba 100644 --- a/src/nvim/viml/parser/expressions.h +++ b/src/nvim/viml/parser/expressions.h @@ -166,6 +166,8 @@ typedef enum { /// Looks like "string", "g:Foo", etc: consists from a single /// kExprLexPlainIdentifier token. kExprNodePlainIdentifier = 'i', + /// Plain dictionary key, for use with kExprNodeConcatOrSubscript + kExprNodePlainKey = 'k', /// Complex identifier: variable/function name with curly braces kExprNodeComplexIdentifier = 'I', /// Figure brace expression which is not yet known @@ -180,6 +182,19 @@ typedef enum { kExprNodeColon = ':', ///< Colon “operator”. kExprNodeArrow = '>', ///< Arrow “operator”. kExprNodeComparison = '=', ///< Various comparison operators. + /// Concat operator + /// + /// To be only used in cases when it is known for sure it is not a subscript. + kExprNodeConcat = '.', + /// Concat or subscript operator + /// + /// For cases when it is not obvious whether expression is a concat or + /// a subscript. May only have either number or plain identifier as the second + /// child. To make it easier to avoid curly braces in place of + /// kExprNodePlainIdentifier node kExprNodePlainKey is used. + kExprNodeConcatOrSubscript = 'S', + kExprNodeInteger = '0', ///< Integral number. + kExprNodeFloat = '1', ///< Floating-point number. } ExprASTNodeType; typedef struct expr_ast_node ExprASTNode; @@ -219,7 +234,7 @@ struct expr_ast_node { /// Points to inside parser reader state. const char *ident; size_t ident_len; ///< Actual identifier length. - } var; ///< For kExprNodePlainIdentifier. + } var; ///< For kExprNodePlainIdentifier and kExprNodePlainKey. struct { bool got_colon; ///< True if colon was seen. } ter; ///< For kExprNodeTernaryValue. @@ -228,6 +243,12 @@ struct expr_ast_node { ExprCaseCompareStrategy ccs; ///< Case comparison strategy. bool inv; ///< True if comparison is to be inverted. } cmp; ///< For kExprNodeComparison. + struct { + uvarnumber_T value; + } num; ///< For kExprNodeInteger. + struct { + float_T value; + } flt; ///< For kExprNodeFloat. } data; }; diff --git a/test/unit/viml/expressions/parser_spec.lua b/test/unit/viml/expressions/parser_spec.lua index efa88455e4..8d96c29db7 100644 --- a/test/unit/viml/expressions/parser_spec.lua +++ b/test/unit/viml/expressions/parser_spec.lua @@ -77,6 +77,7 @@ make_enum_conv_tab(lib, { 'kExprNodeNested', 'kExprNodeCall', 'kExprNodePlainIdentifier', + 'kExprNodePlainKey', 'kExprNodeComplexIdentifier', 'kExprNodeUnknownFigure', 'kExprNodeLambda', @@ -86,6 +87,10 @@ make_enum_conv_tab(lib, { 'kExprNodeColon', 'kExprNodeArrow', 'kExprNodeComparison', + 'kExprNodeConcat', + 'kExprNodeConcatOrSubscript', + 'kExprNodeInteger', + 'kExprNodeFloat', }, 'kExprNode', function(ret) east_node_type_tab = ret end) local function conv_east_node_type(typ) @@ -118,6 +123,9 @@ local function eastnode2lua(pstate, eastnode, checked_nodes) typ = typ .. ('(scope=%s,ident=%s)'):format( tostring(intchar2lua(eastnode.data.var.scope)), ffi.string(eastnode.data.var.ident, eastnode.data.var.ident_len)) + elseif typ == 'PlainKey' then + typ = typ .. ('(key=%s)'):format( + ffi.string(eastnode.data.var.ident, eastnode.data.var.ident_len)) elseif (typ == 'UnknownFigure' or typ == 'DictLiteral' or typ == 'CurlyBracesIdentifier' or typ == 'Lambda') then typ = typ .. ('(%s)'):format( @@ -128,6 +136,10 @@ local function eastnode2lua(pstate, eastnode, checked_nodes) typ = typ .. ('(type=%s,inv=%u,ccs=%s)'):format( conv_cmp_type(eastnode.data.cmp.type), eastnode.data.cmp.inv and 1 or 0, conv_ccs(eastnode.data.cmp.ccs)) + elseif typ == 'Integer' then + typ = typ .. ('(val=%u)'):format(tonumber(eastnode.data.num.value)) + elseif typ == 'Float' then + typ = typ .. ('(val=%e)'):format(tonumber(eastnode.data.flt.value)) end ret_str = typ .. ':' .. ret_str local can_simplify = true @@ -190,6 +202,8 @@ end) describe('Expressions parser', function() local function check_parsing(str, flags, exp_ast, exp_highlighting_fs) + flags = flags or 0 + local pstate = new_pstate({str}) local east = lib.viml_pexpr_parse(pstate, flags) local ast = east2lua(pstate, east) @@ -3649,4 +3663,284 @@ describe('Expressions parser', function() hl('Identifier', 'b', 1), }) end) + itp('works with concat/subscript', function() + check_parsing('.', 0, { + -- 0 + ast = { + { + 'ConcatOrSubscript:0:0:.', + children = { + 'Missing:0:0:', + }, + }, + }, + err = { + arg = '.', + msg = 'E15: Unexpected dot: %.*s', + }, + }, { + hl('InvalidConcatOrSubscript', '.'), + }) + + check_parsing('a.', 0, { + -- 01 + ast = { + { + 'ConcatOrSubscript:0:1:.', + children = { + 'PlainIdentifier(scope=0,ident=a):0:0:a', + }, + }, + }, + err = { + arg = '', + msg = 'E15: Expected value, got EOC: %.*s', + }, + }, { + hl('Identifier', 'a'), + hl('ConcatOrSubscript', '.'), + }) + + check_parsing('a.b', 0, { + -- 012 + ast = { + { + 'ConcatOrSubscript:0:1:.', + children = { + 'PlainIdentifier(scope=0,ident=a):0:0:a', + 'PlainKey(key=b):0:2:b', + }, + }, + }, + }, { + hl('Identifier', 'a'), + hl('ConcatOrSubscript', '.'), + hl('IdentifierKey', 'b'), + }) + + check_parsing('1.2', 0, { + -- 012 + ast = { + 'Float(val=1.200000e+00):0:0:1.2', + }, + }, { + hl('Float', '1.2'), + }) + + check_parsing('1.2 + 1.3e-5', 0, { + -- 012345678901 + -- 0 1 + ast = { + { + 'BinaryPlus:0:3: +', + children = { + 'Float(val=1.200000e+00):0:0:1.2', + 'Float(val=1.300000e-05):0:5: 1.3e-5', + }, + }, + }, + }, { + hl('Float', '1.2'), + hl('BinaryPlus', '+', 1), + hl('Float', '1.3e-5', 1), + }) + + check_parsing('a . 1.2 + 1.3e-5', 0, { + -- 0123456789012345 + -- 0 1 + ast = { + { + 'BinaryPlus:0:7: +', + children = { + { + 'Concat:0:1: .', + children = { + 'PlainIdentifier(scope=0,ident=a):0:0:a', + { + 'ConcatOrSubscript:0:5:.', + children = { + 'Integer(val=1):0:3: 1', + 'PlainKey(key=2):0:6:2', + }, + }, + }, + }, + 'Float(val=1.300000e-05):0:9: 1.3e-5', + }, + }, + }, + }, { + hl('Identifier', 'a'), + hl('Concat', '.', 1), + hl('Number', '1', 1), + hl('ConcatOrSubscript', '.'), + hl('IdentifierKey', '2'), + hl('BinaryPlus', '+', 1), + hl('Float', '1.3e-5', 1), + }) + + check_parsing('1.3e-5 + 1.2 . a', 0, { + -- 0123456789012345 + -- 0 1 + ast = { + { + 'Concat:0:12: .', + children = { + { + 'BinaryPlus:0:6: +', + children = { + 'Float(val=1.300000e-05):0:0:1.3e-5', + 'Float(val=1.200000e+00):0:8: 1.2', + }, + }, + 'PlainIdentifier(scope=0,ident=a):0:14: a', + }, + }, + }, + }, { + hl('Float', '1.3e-5'), + hl('BinaryPlus', '+', 1), + hl('Float', '1.2', 1), + hl('Concat', '.', 1), + hl('Identifier', 'a', 1), + }) + + check_parsing('1.3e-5 + a . 1.2', 0, { + -- 0123456789012345 + -- 0 1 + ast = { + { + 'Concat:0:10: .', + children = { + { + 'BinaryPlus:0:6: +', + children = { + 'Float(val=1.300000e-05):0:0:1.3e-5', + 'PlainIdentifier(scope=0,ident=a):0:8: a', + }, + }, + { + 'ConcatOrSubscript:0:14:.', + children = { + 'Integer(val=1):0:12: 1', + 'PlainKey(key=2):0:15:2', + }, + }, + }, + }, + }, + }, { + hl('Float', '1.3e-5'), + hl('BinaryPlus', '+', 1), + hl('Identifier', 'a', 1), + hl('Concat', '.', 1), + hl('Number', '1', 1), + hl('ConcatOrSubscript', '.'), + hl('IdentifierKey', '2'), + }) + + check_parsing('1.2.3', 0, { + -- 01234 + ast = { + { + 'ConcatOrSubscript:0:3:.', + children = { + { + 'ConcatOrSubscript:0:1:.', + children = { + 'Integer(val=1):0:0:1', + 'PlainKey(key=2):0:2:2', + }, + }, + 'PlainKey(key=3):0:4:3', + }, + }, + }, + }, { + hl('Number', '1'), + hl('ConcatOrSubscript', '.'), + hl('IdentifierKey', '2'), + hl('ConcatOrSubscript', '.'), + hl('IdentifierKey', '3'), + }) + + check_parsing('a.1.2', 0, { + -- 01234 + ast = { + { + 'ConcatOrSubscript:0:3:.', + children = { + { + 'ConcatOrSubscript:0:1:.', + children = { + 'PlainIdentifier(scope=0,ident=a):0:0:a', + 'PlainKey(key=1):0:2:1', + }, + }, + 'PlainKey(key=2):0:4:2', + }, + }, + }, + }, { + hl('Identifier', 'a'), + hl('ConcatOrSubscript', '.'), + hl('IdentifierKey', '1'), + hl('ConcatOrSubscript', '.'), + hl('IdentifierKey', '2'), + }) + + check_parsing('a . 1.2', 0, { + -- 0123456 + ast = { + { + 'Concat:0:1: .', + children = { + 'PlainIdentifier(scope=0,ident=a):0:0:a', + { + 'ConcatOrSubscript:0:5:.', + children = { + 'Integer(val=1):0:3: 1', + 'PlainKey(key=2):0:6:2', + }, + }, + }, + }, + }, + }, { + hl('Identifier', 'a'), + hl('Concat', '.', 1), + hl('Number', '1', 1), + hl('ConcatOrSubscript', '.'), + hl('IdentifierKey', '2'), + }) + + check_parsing('+a . +b', 0, { + -- 0123456 + ast = { + { + 'Concat:0:2: .', + children = { + { + 'UnaryPlus:0:0:+', + children = { + 'PlainIdentifier(scope=0,ident=a):0:1:a', + }, + }, + { + 'UnaryPlus:0:4: +', + children = { + 'PlainIdentifier(scope=0,ident=b):0:6:b', + }, + }, + }, + }, + }, + }, { + hl('UnaryPlus', '+'), + hl('Identifier', 'a'), + hl('Concat', '.', 1), + hl('UnaryPlus', '+', 1), + hl('Identifier', 'b'), + }) + end) end)