viml/parser/expressions: Add support for the dot operator and numbers

2024-12-20 03:05:11 -07:00 · 2017-10-03 01:30:02 +03:00 · 2017-10-03 01:30:02 +03:00 · 21a5ce033c
commit 21a5ce033c
parent 163792e9b9
3 changed files with 416 additions and 9 deletions
--- a/src/nvim/viml/parser/expressions.c
+++ b/src/nvim/viml/parser/expressions.c
@ -915,7 +915,8 @@ static inline void viml_pexpr_debug_print_token(
 //
 // NVimUnaryPlus -> NVimUnaryOperator
 // NVimBinaryPlus -> NVimBinaryOperator
-// NVimConcatOrSubscript -> NVimBinaryOperator
+// NVimConcat -> NVimBinaryOperator
+// NVimConcatOrSubscript -> NVimConcat
 //
 // NVimRegister -> SpecialChar
 // NVimNumber -> Number
@ -971,6 +972,7 @@ static const ExprOpLvl node_type_to_op_lvl[] = {
  [kExprNodeUnknownFigure] = kEOpLvlParens,
  [kExprNodeLambda] = kEOpLvlParens,
  [kExprNodeDictLiteral] = kEOpLvlParens,
+  [kExprNodeListLiteral] = kEOpLvlParens,

  [kExprNodeArrow] = kEOpLvlArrow,

@ -985,17 +987,21 @@ static const ExprOpLvl node_type_to_op_lvl[] = {
  [kExprNodeComparison] = kEOpLvlComparison,

  [kExprNodeBinaryPlus] = kEOpLvlAddition,
+  [kExprNodeConcat] = kEOpLvlAddition,

  [kExprNodeUnaryPlus] = kEOpLvlUnary,

+  [kExprNodeConcatOrSubscript] = kEOpLvlSubscript,
  [kExprNodeSubscript] = kEOpLvlSubscript,

  [kExprNodeCurlyBracesIdentifier] = kEOpLvlComplexIdentifier,

  [kExprNodeComplexIdentifier] = kEOpLvlValue,
  [kExprNodePlainIdentifier] = kEOpLvlValue,
+  [kExprNodePlainKey] = kEOpLvlValue,
  [kExprNodeRegister] = kEOpLvlValue,
-  [kExprNodeListLiteral] = kEOpLvlValue,
+  [kExprNodeInteger] = kEOpLvlValue,
+  [kExprNodeFloat] = kEOpLvlValue,
 };

 static const ExprOpAssociativity node_type_to_op_ass[] = {
@ -1008,6 +1014,7 @@ static const ExprOpAssociativity node_type_to_op_ass[] = {
  [kExprNodeUnknownFigure] = kEOpAssLeft,
  [kExprNodeLambda] = kEOpAssNo,
  [kExprNodeDictLiteral] = kEOpAssNo,
+  [kExprNodeListLiteral] = kEOpAssNo,

  // Does not really matter.
  [kExprNodeArrow] = kEOpAssNo,
@ -1030,17 +1037,21 @@ static const ExprOpAssociativity node_type_to_op_ass[] = {
  [kExprNodeComparison] = kEOpAssRight,

  [kExprNodeBinaryPlus] = kEOpAssLeft,
+  [kExprNodeConcat] = kEOpAssLeft,

  [kExprNodeUnaryPlus] = kEOpAssNo,

+  [kExprNodeConcatOrSubscript] = kEOpAssLeft,
  [kExprNodeSubscript] = kEOpAssLeft,

  [kExprNodeCurlyBracesIdentifier] = kEOpAssLeft,

  [kExprNodeComplexIdentifier] = kEOpAssLeft,
  [kExprNodePlainIdentifier] = kEOpAssNo,
+  [kExprNodePlainKey] = kEOpAssNo,
  [kExprNodeRegister] = kEOpAssNo,
-  [kExprNodeListLiteral] = kEOpAssNo,
+  [kExprNodeInteger] = kEOpAssNo,
+  [kExprNodeFloat] = kEOpAssNo,
 };

 /// Get AST node priority level
@ -1420,10 +1431,20 @@ ExprAST viml_pexpr_parse(ParserState *const pstate, const int flags)
      [kENodeArgument] = kELFlagIsNotCmp,
      [kENodeArgumentSeparator] = kELFlagForbidScope,
    };
-    // FIXME Determine when (not) to allow floating-point numbers.
+    const bool is_concat_or_subscript = (
+        want_node == kENodeValue
+        && kv_size(ast_stack) > 1
+        && (*kv_Z(ast_stack, 1))->type == kExprNodeConcatOrSubscript);
    const int lexer_additional_flags = (
        kELFlagPeek
-        | ((flags & kExprFlagsDisallowEOC) ? kELFlagForbidEOC : 0));
+        | ((flags & kExprFlagsDisallowEOC) ? kELFlagForbidEOC : 0)
+        | ((want_node == kENodeValue
+            && (kv_size(ast_stack) == 1
+                || ((*kv_Z(ast_stack, 1))->type != kExprNodeConcat
+                    && ((*kv_Z(ast_stack, 1))->type
+                        != kExprNodeConcatOrSubscript))))
+            ? kELFlagAllowFloat
+            : 0));
    LexExprToken cur_token = viml_pexpr_next_token(
        pstate, want_node_to_lexer_flags[want_node] | lexer_additional_flags);
    if (cur_token.type == kExprLexEOC) {
@ -1456,11 +1477,42 @@ viml_pexpr_parse_process_token:
    ExprASTNode *cur_node = NULL;
    assert((want_node == kENodeValue || want_node == kENodeArgument)
           == (*top_node_p == NULL));
+    // Note: in Vim whether expression "cond?d.a:2" is valid depends both on
+    // "cond" and whether "d" is a dictionary: expression is valid if condition
+    // is true and "d" is a dictionary (with "a" key or it will complain about
+    // missing one, but this is not relevant); if any of the requirements is
+    // broken then this thing is parsed as "d . a:2" yielding missing colon
+    // error. This parser does not allow such ambiguity, especially because it
+    // simply can’t: whether "d" is a dictionary is not known at the parsing
+    // time.
+    //
+    // Here example will always contain a concat with "a:2" sucking colon,
+    // making expression invalid both because there is no longer a spare colon 
+    // for ternary and because concatenating dictionary with anything is not
+    // valid. There are more cases when this will make a difference though.
+    const bool node_is_key = (
+        is_concat_or_subscript
+        && (cur_token.type == kExprLexPlainIdentifier
+            ? (!cur_token.data.var.autoload
+               && cur_token.data.var.scope == 0)
+            : (cur_token.type == kExprLexNumber))
+        && prev_token.type != kExprLexSpacing);
+    if (is_concat_or_subscript && !node_is_key) {
+      // Note: in Vim "d. a" (this is the reason behind `prev_token.type !=
+      // kExprLexSpacing` part of the condition) as well as any other "d.{expr}"
+      // where "{expr}" does not look like a key is invalid whenever "d" happens
+      // to be a dictionary. Since parser has no idea whether preceding
+      // expression is actually a dictionary it can’t outright reject anything,
+      // so it turns kExprNodeConcatOrSubscript into kExprNodeConcat instead,
+      // which will yield different errors then Vim does in a number of
+      // circumstances, and in any case runtime and not parse time errors.
+      (*kv_Z(ast_stack, 1))->type = kExprNodeConcat;
+    }
    if ((want_node == kENodeArgumentSeparator
         && tok_type != kExprLexComma
         && tok_type != kExprLexArrow)
        || (want_node == kENodeArgument
-            && !(tok_type == kExprLexPlainIdentifier
+            && !(cur_token.type == kExprLexPlainIdentifier
                 && cur_token.data.var.scope == 0
                 && !cur_token.data.var.autoload)
            && tok_type != kExprLexArrow)) {
@ -1844,7 +1896,10 @@ viml_pexpr_parse_figure_brace_closing_error:
          want_node = (want_node == kENodeArgument
                       ? kENodeArgumentSeparator
                       : kENodeOperator);
-          NEW_NODE_WITH_CUR_POS(cur_node, kExprNodePlainIdentifier);
+          NEW_NODE_WITH_CUR_POS(cur_node,
+                                (node_is_key
+                                 ? kExprNodePlainKey
+                                 : kExprNodePlainIdentifier));
          cur_node->data.var.scope = cur_token.data.var.scope;
          const size_t scope_shift = (cur_token.data.var.scope == 0
                                      ? 0
@ -1854,6 +1909,7 @@ viml_pexpr_parse_figure_brace_closing_error:
          cur_node->data.var.ident_len = cur_token.len - scope_shift;
          *top_node_p = cur_node;
          if (scope_shift) {
+            assert(!node_is_key);
            viml_parser_highlight(pstate, cur_token.start, 1,
                                  HL(IdentifierScope));
            viml_parser_highlight(pstate, shifted_pos(cur_token.start, 1), 1,
@ -1863,7 +1919,9 @@ viml_pexpr_parse_figure_brace_closing_error:
            viml_parser_highlight(pstate, shifted_pos(cur_token.start,
                                                      scope_shift),
                                  cur_token.len - scope_shift,
-                                  HL(Identifier));
+                                  (node_is_key
+                                   ? HL(IdentifierKey)
+                                   : HL(Identifier)));
          }
        } else {
          if (cur_token.data.var.scope == 0) {
@ -1882,6 +1940,40 @@ viml_pexpr_parse_figure_brace_closing_error:
        }
        break;
      }
+      case kExprLexNumber: {
+        if (want_node != kENodeValue) {
+          OP_MISSING;
+        }
+        if (node_is_key) {
+          NEW_NODE_WITH_CUR_POS(cur_node, kExprNodePlainKey);
+          cur_node->data.var.ident = pline.data + cur_token.start.col;
+          cur_node->data.var.ident_len = cur_token.len;
+          HL_CUR_TOKEN(IdentifierKey);
+        } else if (cur_token.data.num.is_float) {
+          NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeFloat);
+          cur_node->data.flt.value = cur_token.data.num.val.floating;
+          HL_CUR_TOKEN(Float);
+        } else {
+          NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeInteger);
+          cur_node->data.num.value = cur_token.data.num.val.integer;
+          HL_CUR_TOKEN(Number);
+        }
+        want_node = kENodeOperator;
+        *top_node_p = cur_node;
+        break;
+      }
+      case kExprLexDot: {
+        ADD_VALUE_IF_MISSING(_("E15: Unexpected dot: %.*s"));
+        if (prev_token.type == kExprLexSpacing) {
+          NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeConcat);
+          HL_CUR_TOKEN(Concat);
+        } else {
+          NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeConcatOrSubscript);
+          HL_CUR_TOKEN(ConcatOrSubscript);
+        }
+        ADD_OP_NODE(cur_node);
+        break;
+      }
      case kExprLexParenthesis: {
        if (cur_token.data.brc.closing) {
          if (want_node == kENodeValue) {
--- a/src/nvim/viml/parser/expressions.h
+++ b/src/nvim/viml/parser/expressions.h
@ -166,6 +166,8 @@ typedef enum {
  /// Looks like "string", "g:Foo", etc: consists from a single 
  /// kExprLexPlainIdentifier token.
  kExprNodePlainIdentifier = 'i',
+  /// Plain dictionary key, for use with kExprNodeConcatOrSubscript
+  kExprNodePlainKey = 'k',
  /// Complex identifier: variable/function name with curly braces
  kExprNodeComplexIdentifier = 'I',
  /// Figure brace expression which is not yet known
@ -180,6 +182,19 @@ typedef enum {
  kExprNodeColon = ':',  ///< Colon “operator”.
  kExprNodeArrow = '>',  ///< Arrow “operator”.
  kExprNodeComparison = '=',  ///< Various comparison operators.
+  /// Concat operator
+  ///
+  /// To be only used in cases when it is known for sure it is not a subscript.
+  kExprNodeConcat = '.',
+  /// Concat or subscript operator
+  ///
+  /// For cases when it is not obvious whether expression is a concat or
+  /// a subscript. May only have either number or plain identifier as the second
+  /// child. To make it easier to avoid curly braces in place of
+  /// kExprNodePlainIdentifier node kExprNodePlainKey is used.
+  kExprNodeConcatOrSubscript = 'S',
+  kExprNodeInteger = '0',  ///< Integral number.
+  kExprNodeFloat = '1',  ///< Floating-point number.
 } ExprASTNodeType;

 typedef struct expr_ast_node ExprASTNode;
@ -219,7 +234,7 @@ struct expr_ast_node {
      /// Points to inside parser reader state.
      const char *ident;
      size_t ident_len;  ///< Actual identifier length.
-    } var;  ///< For kExprNodePlainIdentifier.
+    } var;  ///< For kExprNodePlainIdentifier and kExprNodePlainKey.
    struct {
      bool got_colon;  ///< True if colon was seen.
    } ter;  ///< For kExprNodeTernaryValue.
@ -228,6 +243,12 @@ struct expr_ast_node {
      ExprCaseCompareStrategy ccs;  ///< Case comparison strategy.
      bool inv;  ///< True if comparison is to be inverted.
    } cmp;  ///< For kExprNodeComparison.
+    struct {
+      uvarnumber_T value;
+    } num;  ///< For kExprNodeInteger.
+    struct {
+      float_T value;
+    } flt;  ///< For kExprNodeFloat.
  } data;
 };

--- a/test/unit/viml/expressions/parser_spec.lua
+++ b/test/unit/viml/expressions/parser_spec.lua
@ -77,6 +77,7 @@ make_enum_conv_tab(lib, {
  'kExprNodeNested',
  'kExprNodeCall',
  'kExprNodePlainIdentifier',
+  'kExprNodePlainKey',
  'kExprNodeComplexIdentifier',
  'kExprNodeUnknownFigure',
  'kExprNodeLambda',
@ -86,6 +87,10 @@ make_enum_conv_tab(lib, {
  'kExprNodeColon',
  'kExprNodeArrow',
  'kExprNodeComparison',
+  'kExprNodeConcat',
+  'kExprNodeConcatOrSubscript',
+  'kExprNodeInteger',
+  'kExprNodeFloat',
 }, 'kExprNode', function(ret) east_node_type_tab = ret end)

 local function conv_east_node_type(typ)
@ -118,6 +123,9 @@ local function eastnode2lua(pstate, eastnode, checked_nodes)
    typ = typ .. ('(scope=%s,ident=%s)'):format(
      tostring(intchar2lua(eastnode.data.var.scope)),
      ffi.string(eastnode.data.var.ident, eastnode.data.var.ident_len))
+  elseif typ == 'PlainKey' then
+    typ = typ .. ('(key=%s)'):format(
+      ffi.string(eastnode.data.var.ident, eastnode.data.var.ident_len))
  elseif (typ == 'UnknownFigure' or typ == 'DictLiteral'
          or typ == 'CurlyBracesIdentifier' or typ == 'Lambda') then
    typ = typ .. ('(%s)'):format(
@ -128,6 +136,10 @@ local function eastnode2lua(pstate, eastnode, checked_nodes)
    typ = typ .. ('(type=%s,inv=%u,ccs=%s)'):format(
      conv_cmp_type(eastnode.data.cmp.type), eastnode.data.cmp.inv and 1 or 0,
      conv_ccs(eastnode.data.cmp.ccs))
+  elseif typ == 'Integer' then
+    typ = typ .. ('(val=%u)'):format(tonumber(eastnode.data.num.value))
+  elseif typ == 'Float' then
+    typ = typ .. ('(val=%e)'):format(tonumber(eastnode.data.flt.value))
  end
  ret_str = typ .. ':' .. ret_str
  local can_simplify = true
@ -190,6 +202,8 @@ end)

 describe('Expressions parser', function()
  local function check_parsing(str, flags, exp_ast, exp_highlighting_fs)
+    flags = flags or 0
+
    local pstate = new_pstate({str})
    local east = lib.viml_pexpr_parse(pstate, flags)
    local ast = east2lua(pstate, east)
@ -3649,4 +3663,284 @@ describe('Expressions parser', function()
      hl('Identifier', 'b', 1),
    })
  end)
+  itp('works with concat/subscript', function()
+    check_parsing('.', 0, {
+      --           0
+      ast = {
+        {
+          'ConcatOrSubscript:0:0:.',
+          children = {
+            'Missing:0:0:',
+          },
+        },
+      },
+      err = {
+        arg = '.',
+        msg = 'E15: Unexpected dot: %.*s',
+      },
+    }, {
+      hl('InvalidConcatOrSubscript', '.'),
+    })
+
+    check_parsing('a.', 0, {
+      --           01
+      ast = {
+        {
+          'ConcatOrSubscript:0:1:.',
+          children = {
+            'PlainIdentifier(scope=0,ident=a):0:0:a',
+          },
+        },
+      },
+      err = {
+        arg = '',
+        msg = 'E15: Expected value, got EOC: %.*s',
+      },
+    }, {
+      hl('Identifier', 'a'),
+      hl('ConcatOrSubscript', '.'),
+    })
+
+    check_parsing('a.b', 0, {
+      --           012
+      ast = {
+        {
+          'ConcatOrSubscript:0:1:.',
+          children = {
+            'PlainIdentifier(scope=0,ident=a):0:0:a',
+            'PlainKey(key=b):0:2:b',
+          },
+        },
+      },
+    }, {
+      hl('Identifier', 'a'),
+      hl('ConcatOrSubscript', '.'),
+      hl('IdentifierKey', 'b'),
+    })
+
+    check_parsing('1.2', 0, {
+      --           012
+      ast = {
+        'Float(val=1.200000e+00):0:0:1.2',
+      },
+    }, {
+      hl('Float', '1.2'),
+    })
+
+    check_parsing('1.2 + 1.3e-5', 0, {
+      --           012345678901
+      --           0         1
+      ast = {
+        {
+          'BinaryPlus:0:3: +',
+          children = {
+            'Float(val=1.200000e+00):0:0:1.2',
+            'Float(val=1.300000e-05):0:5: 1.3e-5',
+          },
+        },
+      },
+    }, {
+      hl('Float', '1.2'),
+      hl('BinaryPlus', '+', 1),
+      hl('Float', '1.3e-5', 1),
+    })
+
+    check_parsing('a . 1.2 + 1.3e-5', 0, {
+      --           0123456789012345
+      --           0         1
+      ast = {
+        {
+          'BinaryPlus:0:7: +',
+          children = {
+            {
+              'Concat:0:1: .',
+              children = {
+                'PlainIdentifier(scope=0,ident=a):0:0:a',
+                {
+                  'ConcatOrSubscript:0:5:.',
+                  children = {
+                    'Integer(val=1):0:3: 1',
+                    'PlainKey(key=2):0:6:2',
+                  },
+                },
+              },
+            },
+            'Float(val=1.300000e-05):0:9: 1.3e-5',
+          },
+        },
+      },
+    }, {
+      hl('Identifier', 'a'),
+      hl('Concat', '.', 1),
+      hl('Number', '1', 1),
+      hl('ConcatOrSubscript', '.'),
+      hl('IdentifierKey', '2'),
+      hl('BinaryPlus', '+', 1),
+      hl('Float', '1.3e-5', 1),
+    })
+
+    check_parsing('1.3e-5 + 1.2 . a', 0, {
+      --           0123456789012345
+      --           0         1
+      ast = {
+        {
+          'Concat:0:12: .',
+          children = {
+            {
+              'BinaryPlus:0:6: +',
+              children = {
+                'Float(val=1.300000e-05):0:0:1.3e-5',
+                'Float(val=1.200000e+00):0:8: 1.2',
+              },
+            },
+            'PlainIdentifier(scope=0,ident=a):0:14: a',
+          },
+        },
+      },
+    }, {
+      hl('Float', '1.3e-5'),
+      hl('BinaryPlus', '+', 1),
+      hl('Float', '1.2', 1),
+      hl('Concat', '.', 1),
+      hl('Identifier', 'a', 1),
+    })
+
+    check_parsing('1.3e-5 + a . 1.2', 0, {
+      --           0123456789012345
+      --           0         1
+      ast = {
+        {
+          'Concat:0:10: .',
+          children = {
+            {
+              'BinaryPlus:0:6: +',
+              children = {
+                'Float(val=1.300000e-05):0:0:1.3e-5',
+                'PlainIdentifier(scope=0,ident=a):0:8: a',
+              },
+            },
+            {
+              'ConcatOrSubscript:0:14:.',
+              children = {
+                'Integer(val=1):0:12: 1',
+                'PlainKey(key=2):0:15:2',
+              },
+            },
+          },
+        },
+      },
+    }, {
+      hl('Float', '1.3e-5'),
+      hl('BinaryPlus', '+', 1),
+      hl('Identifier', 'a', 1),
+      hl('Concat', '.', 1),
+      hl('Number', '1', 1),
+      hl('ConcatOrSubscript', '.'),
+      hl('IdentifierKey', '2'),
+    })
+
+    check_parsing('1.2.3', 0, {
+      --           01234
+      ast = {
+        {
+          'ConcatOrSubscript:0:3:.',
+          children = {
+            {
+              'ConcatOrSubscript:0:1:.',
+              children = {
+                'Integer(val=1):0:0:1',
+                'PlainKey(key=2):0:2:2',
+              },
+            },
+            'PlainKey(key=3):0:4:3',
+          },
+        },
+      },
+    }, {
+      hl('Number', '1'),
+      hl('ConcatOrSubscript', '.'),
+      hl('IdentifierKey', '2'),
+      hl('ConcatOrSubscript', '.'),
+      hl('IdentifierKey', '3'),
+    })
+
+    check_parsing('a.1.2', 0, {
+      --           01234
+      ast = {
+        {
+          'ConcatOrSubscript:0:3:.',
+          children = {
+            {
+              'ConcatOrSubscript:0:1:.',
+              children = {
+                'PlainIdentifier(scope=0,ident=a):0:0:a',
+                'PlainKey(key=1):0:2:1',
+              },
+            },
+            'PlainKey(key=2):0:4:2',
+          },
+        },
+      },
+    }, {
+      hl('Identifier', 'a'),
+      hl('ConcatOrSubscript', '.'),
+      hl('IdentifierKey', '1'),
+      hl('ConcatOrSubscript', '.'),
+      hl('IdentifierKey', '2'),
+    })
+
+    check_parsing('a . 1.2', 0, {
+      --           0123456
+      ast = {
+        {
+          'Concat:0:1: .',
+          children = {
+            'PlainIdentifier(scope=0,ident=a):0:0:a',
+            {
+              'ConcatOrSubscript:0:5:.',
+              children = {
+                'Integer(val=1):0:3: 1',
+                'PlainKey(key=2):0:6:2',
+              },
+            },
+          },
+        },
+      },
+    }, {
+      hl('Identifier', 'a'),
+      hl('Concat', '.', 1),
+      hl('Number', '1', 1),
+      hl('ConcatOrSubscript', '.'),
+      hl('IdentifierKey', '2'),
+    })
+
+    check_parsing('+a . +b', 0, {
+      --           0123456
+      ast = {
+        {
+          'Concat:0:2: .',
+          children = {
+            {
+              'UnaryPlus:0:0:+',
+              children = {
+                'PlainIdentifier(scope=0,ident=a):0:1:a',
+              },
+            },
+            {
+              'UnaryPlus:0:4: +',
+              children = {
+                'PlainIdentifier(scope=0,ident=b):0:6:b',
+              },
+            },
+          },
+        },
+      },
+    }, {
+      hl('UnaryPlus', '+'),
+      hl('Identifier', 'a'),
+      hl('Concat', '.', 1),
+      hl('UnaryPlus', '+', 1),
+      hl('Identifier', 'b'),
+    })
+  end)
 end)