From 21a5ce033c5a853bed3204ea9f0f7a3cfc1d164f Mon Sep 17 00:00:00 2001
From: ZyX <kp-pav@yandex.ru>
Date: Tue, 3 Oct 2017 01:30:02 +0300
Subject: [PATCH] viml/parser/expressions: Add support for the dot operator and
 numbers

---
 src/nvim/viml/parser/expressions.c         | 108 +++++++-
 src/nvim/viml/parser/expressions.h         |  23 +-
 test/unit/viml/expressions/parser_spec.lua | 294 +++++++++++++++++++++
 3 files changed, 416 insertions(+), 9 deletions(-)

diff --git a/src/nvim/viml/parser/expressions.c b/src/nvim/viml/parser/expressions.c
index 5d892fb8f8..4babf4312c 100644
--- a/src/nvim/viml/parser/expressions.c
+++ b/src/nvim/viml/parser/expressions.c
@@ -915,7 +915,8 @@ static inline void viml_pexpr_debug_print_token(
 //
 // NVimUnaryPlus -> NVimUnaryOperator
 // NVimBinaryPlus -> NVimBinaryOperator
-// NVimConcatOrSubscript -> NVimBinaryOperator
+// NVimConcat -> NVimBinaryOperator
+// NVimConcatOrSubscript -> NVimConcat
 //
 // NVimRegister -> SpecialChar
 // NVimNumber -> Number
@@ -971,6 +972,7 @@ static const ExprOpLvl node_type_to_op_lvl[] = {
   [kExprNodeUnknownFigure] = kEOpLvlParens,
   [kExprNodeLambda] = kEOpLvlParens,
   [kExprNodeDictLiteral] = kEOpLvlParens,
+  [kExprNodeListLiteral] = kEOpLvlParens,
 
   [kExprNodeArrow] = kEOpLvlArrow,
 
@@ -985,17 +987,21 @@ static const ExprOpLvl node_type_to_op_lvl[] = {
   [kExprNodeComparison] = kEOpLvlComparison,
 
   [kExprNodeBinaryPlus] = kEOpLvlAddition,
+  [kExprNodeConcat] = kEOpLvlAddition,
 
   [kExprNodeUnaryPlus] = kEOpLvlUnary,
 
+  [kExprNodeConcatOrSubscript] = kEOpLvlSubscript,
   [kExprNodeSubscript] = kEOpLvlSubscript,
 
   [kExprNodeCurlyBracesIdentifier] = kEOpLvlComplexIdentifier,
 
   [kExprNodeComplexIdentifier] = kEOpLvlValue,
   [kExprNodePlainIdentifier] = kEOpLvlValue,
+  [kExprNodePlainKey] = kEOpLvlValue,
   [kExprNodeRegister] = kEOpLvlValue,
-  [kExprNodeListLiteral] = kEOpLvlValue,
+  [kExprNodeInteger] = kEOpLvlValue,
+  [kExprNodeFloat] = kEOpLvlValue,
 };
 
 static const ExprOpAssociativity node_type_to_op_ass[] = {
@@ -1008,6 +1014,7 @@ static const ExprOpAssociativity node_type_to_op_ass[] = {
   [kExprNodeUnknownFigure] = kEOpAssLeft,
   [kExprNodeLambda] = kEOpAssNo,
   [kExprNodeDictLiteral] = kEOpAssNo,
+  [kExprNodeListLiteral] = kEOpAssNo,
 
   // Does not really matter.
   [kExprNodeArrow] = kEOpAssNo,
@@ -1030,17 +1037,21 @@ static const ExprOpAssociativity node_type_to_op_ass[] = {
   [kExprNodeComparison] = kEOpAssRight,
 
   [kExprNodeBinaryPlus] = kEOpAssLeft,
+  [kExprNodeConcat] = kEOpAssLeft,
 
   [kExprNodeUnaryPlus] = kEOpAssNo,
 
+  [kExprNodeConcatOrSubscript] = kEOpAssLeft,
   [kExprNodeSubscript] = kEOpAssLeft,
 
   [kExprNodeCurlyBracesIdentifier] = kEOpAssLeft,
 
   [kExprNodeComplexIdentifier] = kEOpAssLeft,
   [kExprNodePlainIdentifier] = kEOpAssNo,
+  [kExprNodePlainKey] = kEOpAssNo,
   [kExprNodeRegister] = kEOpAssNo,
-  [kExprNodeListLiteral] = kEOpAssNo,
+  [kExprNodeInteger] = kEOpAssNo,
+  [kExprNodeFloat] = kEOpAssNo,
 };
 
 /// Get AST node priority level
@@ -1420,10 +1431,20 @@ ExprAST viml_pexpr_parse(ParserState *const pstate, const int flags)
       [kENodeArgument] = kELFlagIsNotCmp,
       [kENodeArgumentSeparator] = kELFlagForbidScope,
     };
-    // FIXME Determine when (not) to allow floating-point numbers.
+    const bool is_concat_or_subscript = (
+        want_node == kENodeValue
+        && kv_size(ast_stack) > 1
+        && (*kv_Z(ast_stack, 1))->type == kExprNodeConcatOrSubscript);
     const int lexer_additional_flags = (
         kELFlagPeek
-        | ((flags & kExprFlagsDisallowEOC) ? kELFlagForbidEOC : 0));
+        | ((flags & kExprFlagsDisallowEOC) ? kELFlagForbidEOC : 0)
+        | ((want_node == kENodeValue
+            && (kv_size(ast_stack) == 1
+                || ((*kv_Z(ast_stack, 1))->type != kExprNodeConcat
+                    && ((*kv_Z(ast_stack, 1))->type
+                        != kExprNodeConcatOrSubscript))))
+            ? kELFlagAllowFloat
+            : 0));
     LexExprToken cur_token = viml_pexpr_next_token(
         pstate, want_node_to_lexer_flags[want_node] | lexer_additional_flags);
     if (cur_token.type == kExprLexEOC) {
@@ -1456,11 +1477,42 @@ viml_pexpr_parse_process_token:
     ExprASTNode *cur_node = NULL;
     assert((want_node == kENodeValue || want_node == kENodeArgument)
            == (*top_node_p == NULL));
+    // Note: in Vim whether expression "cond?d.a:2" is valid depends both on
+    // "cond" and whether "d" is a dictionary: expression is valid if condition
+    // is true and "d" is a dictionary (with "a" key or it will complain about
+    // missing one, but this is not relevant); if any of the requirements is
+    // broken then this thing is parsed as "d . a:2" yielding missing colon
+    // error. This parser does not allow such ambiguity, especially because it
+    // simply can’t: whether "d" is a dictionary is not known at the parsing
+    // time.
+    //
+    // Here example will always contain a concat with "a:2" sucking colon,
+    // making expression invalid both because there is no longer a spare colon 
+    // for ternary and because concatenating dictionary with anything is not
+    // valid. There are more cases when this will make a difference though.
+    const bool node_is_key = (
+        is_concat_or_subscript
+        && (cur_token.type == kExprLexPlainIdentifier
+            ? (!cur_token.data.var.autoload
+               && cur_token.data.var.scope == 0)
+            : (cur_token.type == kExprLexNumber))
+        && prev_token.type != kExprLexSpacing);
+    if (is_concat_or_subscript && !node_is_key) {
+      // Note: in Vim "d. a" (this is the reason behind `prev_token.type !=
+      // kExprLexSpacing` part of the condition) as well as any other "d.{expr}"
+      // where "{expr}" does not look like a key is invalid whenever "d" happens
+      // to be a dictionary. Since parser has no idea whether preceding
+      // expression is actually a dictionary it can’t outright reject anything,
+      // so it turns kExprNodeConcatOrSubscript into kExprNodeConcat instead,
+      // which will yield different errors then Vim does in a number of
+      // circumstances, and in any case runtime and not parse time errors.
+      (*kv_Z(ast_stack, 1))->type = kExprNodeConcat;
+    }
     if ((want_node == kENodeArgumentSeparator
          && tok_type != kExprLexComma
          && tok_type != kExprLexArrow)
         || (want_node == kENodeArgument
-            && !(tok_type == kExprLexPlainIdentifier
+            && !(cur_token.type == kExprLexPlainIdentifier
                  && cur_token.data.var.scope == 0
                  && !cur_token.data.var.autoload)
             && tok_type != kExprLexArrow)) {
@@ -1844,7 +1896,10 @@ viml_pexpr_parse_figure_brace_closing_error:
           want_node = (want_node == kENodeArgument
                        ? kENodeArgumentSeparator
                        : kENodeOperator);
-          NEW_NODE_WITH_CUR_POS(cur_node, kExprNodePlainIdentifier);
+          NEW_NODE_WITH_CUR_POS(cur_node,
+                                (node_is_key
+                                 ? kExprNodePlainKey
+                                 : kExprNodePlainIdentifier));
           cur_node->data.var.scope = cur_token.data.var.scope;
           const size_t scope_shift = (cur_token.data.var.scope == 0
                                       ? 0
@@ -1854,6 +1909,7 @@ viml_pexpr_parse_figure_brace_closing_error:
           cur_node->data.var.ident_len = cur_token.len - scope_shift;
           *top_node_p = cur_node;
           if (scope_shift) {
+            assert(!node_is_key);
             viml_parser_highlight(pstate, cur_token.start, 1,
                                   HL(IdentifierScope));
             viml_parser_highlight(pstate, shifted_pos(cur_token.start, 1), 1,
@@ -1863,7 +1919,9 @@ viml_pexpr_parse_figure_brace_closing_error:
             viml_parser_highlight(pstate, shifted_pos(cur_token.start,
                                                       scope_shift),
                                   cur_token.len - scope_shift,
-                                  HL(Identifier));
+                                  (node_is_key
+                                   ? HL(IdentifierKey)
+                                   : HL(Identifier)));
           }
         } else {
           if (cur_token.data.var.scope == 0) {
@@ -1882,6 +1940,40 @@ viml_pexpr_parse_figure_brace_closing_error:
         }
         break;
       }
+      case kExprLexNumber: {
+        if (want_node != kENodeValue) {
+          OP_MISSING;
+        }
+        if (node_is_key) {
+          NEW_NODE_WITH_CUR_POS(cur_node, kExprNodePlainKey);
+          cur_node->data.var.ident = pline.data + cur_token.start.col;
+          cur_node->data.var.ident_len = cur_token.len;
+          HL_CUR_TOKEN(IdentifierKey);
+        } else if (cur_token.data.num.is_float) {
+          NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeFloat);
+          cur_node->data.flt.value = cur_token.data.num.val.floating;
+          HL_CUR_TOKEN(Float);
+        } else {
+          NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeInteger);
+          cur_node->data.num.value = cur_token.data.num.val.integer;
+          HL_CUR_TOKEN(Number);
+        }
+        want_node = kENodeOperator;
+        *top_node_p = cur_node;
+        break;
+      }
+      case kExprLexDot: {
+        ADD_VALUE_IF_MISSING(_("E15: Unexpected dot: %.*s"));
+        if (prev_token.type == kExprLexSpacing) {
+          NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeConcat);
+          HL_CUR_TOKEN(Concat);
+        } else {
+          NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeConcatOrSubscript);
+          HL_CUR_TOKEN(ConcatOrSubscript);
+        }
+        ADD_OP_NODE(cur_node);
+        break;
+      }
       case kExprLexParenthesis: {
         if (cur_token.data.brc.closing) {
           if (want_node == kENodeValue) {
diff --git a/src/nvim/viml/parser/expressions.h b/src/nvim/viml/parser/expressions.h
index 29903490bb..0d496c87ba 100644
--- a/src/nvim/viml/parser/expressions.h
+++ b/src/nvim/viml/parser/expressions.h
@@ -166,6 +166,8 @@ typedef enum {
   /// Looks like "string", "g:Foo", etc: consists from a single 
   /// kExprLexPlainIdentifier token.
   kExprNodePlainIdentifier = 'i',
+  /// Plain dictionary key, for use with kExprNodeConcatOrSubscript
+  kExprNodePlainKey = 'k',
   /// Complex identifier: variable/function name with curly braces
   kExprNodeComplexIdentifier = 'I',
   /// Figure brace expression which is not yet known
@@ -180,6 +182,19 @@ typedef enum {
   kExprNodeColon = ':',  ///< Colon “operator”.
   kExprNodeArrow = '>',  ///< Arrow “operator”.
   kExprNodeComparison = '=',  ///< Various comparison operators.
+  /// Concat operator
+  ///
+  /// To be only used in cases when it is known for sure it is not a subscript.
+  kExprNodeConcat = '.',
+  /// Concat or subscript operator
+  ///
+  /// For cases when it is not obvious whether expression is a concat or
+  /// a subscript. May only have either number or plain identifier as the second
+  /// child. To make it easier to avoid curly braces in place of
+  /// kExprNodePlainIdentifier node kExprNodePlainKey is used.
+  kExprNodeConcatOrSubscript = 'S',
+  kExprNodeInteger = '0',  ///< Integral number.
+  kExprNodeFloat = '1',  ///< Floating-point number.
 } ExprASTNodeType;
 
 typedef struct expr_ast_node ExprASTNode;
@@ -219,7 +234,7 @@ struct expr_ast_node {
       /// Points to inside parser reader state.
       const char *ident;
       size_t ident_len;  ///< Actual identifier length.
-    } var;  ///< For kExprNodePlainIdentifier.
+    } var;  ///< For kExprNodePlainIdentifier and kExprNodePlainKey.
     struct {
       bool got_colon;  ///< True if colon was seen.
     } ter;  ///< For kExprNodeTernaryValue.
@@ -228,6 +243,12 @@ struct expr_ast_node {
       ExprCaseCompareStrategy ccs;  ///< Case comparison strategy.
       bool inv;  ///< True if comparison is to be inverted.
     } cmp;  ///< For kExprNodeComparison.
+    struct {
+      uvarnumber_T value;
+    } num;  ///< For kExprNodeInteger.
+    struct {
+      float_T value;
+    } flt;  ///< For kExprNodeFloat.
   } data;
 };
 
diff --git a/test/unit/viml/expressions/parser_spec.lua b/test/unit/viml/expressions/parser_spec.lua
index efa88455e4..8d96c29db7 100644
--- a/test/unit/viml/expressions/parser_spec.lua
+++ b/test/unit/viml/expressions/parser_spec.lua
@@ -77,6 +77,7 @@ make_enum_conv_tab(lib, {
   'kExprNodeNested',
   'kExprNodeCall',
   'kExprNodePlainIdentifier',
+  'kExprNodePlainKey',
   'kExprNodeComplexIdentifier',
   'kExprNodeUnknownFigure',
   'kExprNodeLambda',
@@ -86,6 +87,10 @@ make_enum_conv_tab(lib, {
   'kExprNodeColon',
   'kExprNodeArrow',
   'kExprNodeComparison',
+  'kExprNodeConcat',
+  'kExprNodeConcatOrSubscript',
+  'kExprNodeInteger',
+  'kExprNodeFloat',
 }, 'kExprNode', function(ret) east_node_type_tab = ret end)
 
 local function conv_east_node_type(typ)
@@ -118,6 +123,9 @@ local function eastnode2lua(pstate, eastnode, checked_nodes)
     typ = typ .. ('(scope=%s,ident=%s)'):format(
       tostring(intchar2lua(eastnode.data.var.scope)),
       ffi.string(eastnode.data.var.ident, eastnode.data.var.ident_len))
+  elseif typ == 'PlainKey' then
+    typ = typ .. ('(key=%s)'):format(
+      ffi.string(eastnode.data.var.ident, eastnode.data.var.ident_len))
   elseif (typ == 'UnknownFigure' or typ == 'DictLiteral'
           or typ == 'CurlyBracesIdentifier' or typ == 'Lambda') then
     typ = typ .. ('(%s)'):format(
@@ -128,6 +136,10 @@ local function eastnode2lua(pstate, eastnode, checked_nodes)
     typ = typ .. ('(type=%s,inv=%u,ccs=%s)'):format(
       conv_cmp_type(eastnode.data.cmp.type), eastnode.data.cmp.inv and 1 or 0,
       conv_ccs(eastnode.data.cmp.ccs))
+  elseif typ == 'Integer' then
+    typ = typ .. ('(val=%u)'):format(tonumber(eastnode.data.num.value))
+  elseif typ == 'Float' then
+    typ = typ .. ('(val=%e)'):format(tonumber(eastnode.data.flt.value))
   end
   ret_str = typ .. ':' .. ret_str
   local can_simplify = true
@@ -190,6 +202,8 @@ end)
 
 describe('Expressions parser', function()
   local function check_parsing(str, flags, exp_ast, exp_highlighting_fs)
+    flags = flags or 0
+
     local pstate = new_pstate({str})
     local east = lib.viml_pexpr_parse(pstate, flags)
     local ast = east2lua(pstate, east)
@@ -3649,4 +3663,284 @@ describe('Expressions parser', function()
       hl('Identifier', 'b', 1),
     })
   end)
+  itp('works with concat/subscript', function()
+    check_parsing('.', 0, {
+      --           0
+      ast = {
+        {
+          'ConcatOrSubscript:0:0:.',
+          children = {
+            'Missing:0:0:',
+          },
+        },
+      },
+      err = {
+        arg = '.',
+        msg = 'E15: Unexpected dot: %.*s',
+      },
+    }, {
+      hl('InvalidConcatOrSubscript', '.'),
+    })
+
+    check_parsing('a.', 0, {
+      --           01
+      ast = {
+        {
+          'ConcatOrSubscript:0:1:.',
+          children = {
+            'PlainIdentifier(scope=0,ident=a):0:0:a',
+          },
+        },
+      },
+      err = {
+        arg = '',
+        msg = 'E15: Expected value, got EOC: %.*s',
+      },
+    }, {
+      hl('Identifier', 'a'),
+      hl('ConcatOrSubscript', '.'),
+    })
+
+    check_parsing('a.b', 0, {
+      --           012
+      ast = {
+        {
+          'ConcatOrSubscript:0:1:.',
+          children = {
+            'PlainIdentifier(scope=0,ident=a):0:0:a',
+            'PlainKey(key=b):0:2:b',
+          },
+        },
+      },
+    }, {
+      hl('Identifier', 'a'),
+      hl('ConcatOrSubscript', '.'),
+      hl('IdentifierKey', 'b'),
+    })
+
+    check_parsing('1.2', 0, {
+      --           012
+      ast = {
+        'Float(val=1.200000e+00):0:0:1.2',
+      },
+    }, {
+      hl('Float', '1.2'),
+    })
+
+    check_parsing('1.2 + 1.3e-5', 0, {
+      --           012345678901
+      --           0         1
+      ast = {
+        {
+          'BinaryPlus:0:3: +',
+          children = {
+            'Float(val=1.200000e+00):0:0:1.2',
+            'Float(val=1.300000e-05):0:5: 1.3e-5',
+          },
+        },
+      },
+    }, {
+      hl('Float', '1.2'),
+      hl('BinaryPlus', '+', 1),
+      hl('Float', '1.3e-5', 1),
+    })
+
+    check_parsing('a . 1.2 + 1.3e-5', 0, {
+      --           0123456789012345
+      --           0         1
+      ast = {
+        {
+          'BinaryPlus:0:7: +',
+          children = {
+            {
+              'Concat:0:1: .',
+              children = {
+                'PlainIdentifier(scope=0,ident=a):0:0:a',
+                {
+                  'ConcatOrSubscript:0:5:.',
+                  children = {
+                    'Integer(val=1):0:3: 1',
+                    'PlainKey(key=2):0:6:2',
+                  },
+                },
+              },
+            },
+            'Float(val=1.300000e-05):0:9: 1.3e-5',
+          },
+        },
+      },
+    }, {
+      hl('Identifier', 'a'),
+      hl('Concat', '.', 1),
+      hl('Number', '1', 1),
+      hl('ConcatOrSubscript', '.'),
+      hl('IdentifierKey', '2'),
+      hl('BinaryPlus', '+', 1),
+      hl('Float', '1.3e-5', 1),
+    })
+
+    check_parsing('1.3e-5 + 1.2 . a', 0, {
+      --           0123456789012345
+      --           0         1
+      ast = {
+        {
+          'Concat:0:12: .',
+          children = {
+            {
+              'BinaryPlus:0:6: +',
+              children = {
+                'Float(val=1.300000e-05):0:0:1.3e-5',
+                'Float(val=1.200000e+00):0:8: 1.2',
+              },
+            },
+            'PlainIdentifier(scope=0,ident=a):0:14: a',
+          },
+        },
+      },
+    }, {
+      hl('Float', '1.3e-5'),
+      hl('BinaryPlus', '+', 1),
+      hl('Float', '1.2', 1),
+      hl('Concat', '.', 1),
+      hl('Identifier', 'a', 1),
+    })
+
+    check_parsing('1.3e-5 + a . 1.2', 0, {
+      --           0123456789012345
+      --           0         1
+      ast = {
+        {
+          'Concat:0:10: .',
+          children = {
+            {
+              'BinaryPlus:0:6: +',
+              children = {
+                'Float(val=1.300000e-05):0:0:1.3e-5',
+                'PlainIdentifier(scope=0,ident=a):0:8: a',
+              },
+            },
+            {
+              'ConcatOrSubscript:0:14:.',
+              children = {
+                'Integer(val=1):0:12: 1',
+                'PlainKey(key=2):0:15:2',
+              },
+            },
+          },
+        },
+      },
+    }, {
+      hl('Float', '1.3e-5'),
+      hl('BinaryPlus', '+', 1),
+      hl('Identifier', 'a', 1),
+      hl('Concat', '.', 1),
+      hl('Number', '1', 1),
+      hl('ConcatOrSubscript', '.'),
+      hl('IdentifierKey', '2'),
+    })
+
+    check_parsing('1.2.3', 0, {
+      --           01234
+      ast = {
+        {
+          'ConcatOrSubscript:0:3:.',
+          children = {
+            {
+              'ConcatOrSubscript:0:1:.',
+              children = {
+                'Integer(val=1):0:0:1',
+                'PlainKey(key=2):0:2:2',
+              },
+            },
+            'PlainKey(key=3):0:4:3',
+          },
+        },
+      },
+    }, {
+      hl('Number', '1'),
+      hl('ConcatOrSubscript', '.'),
+      hl('IdentifierKey', '2'),
+      hl('ConcatOrSubscript', '.'),
+      hl('IdentifierKey', '3'),
+    })
+
+    check_parsing('a.1.2', 0, {
+      --           01234
+      ast = {
+        {
+          'ConcatOrSubscript:0:3:.',
+          children = {
+            {
+              'ConcatOrSubscript:0:1:.',
+              children = {
+                'PlainIdentifier(scope=0,ident=a):0:0:a',
+                'PlainKey(key=1):0:2:1',
+              },
+            },
+            'PlainKey(key=2):0:4:2',
+          },
+        },
+      },
+    }, {
+      hl('Identifier', 'a'),
+      hl('ConcatOrSubscript', '.'),
+      hl('IdentifierKey', '1'),
+      hl('ConcatOrSubscript', '.'),
+      hl('IdentifierKey', '2'),
+    })
+
+    check_parsing('a . 1.2', 0, {
+      --           0123456
+      ast = {
+        {
+          'Concat:0:1: .',
+          children = {
+            'PlainIdentifier(scope=0,ident=a):0:0:a',
+            {
+              'ConcatOrSubscript:0:5:.',
+              children = {
+                'Integer(val=1):0:3: 1',
+                'PlainKey(key=2):0:6:2',
+              },
+            },
+          },
+        },
+      },
+    }, {
+      hl('Identifier', 'a'),
+      hl('Concat', '.', 1),
+      hl('Number', '1', 1),
+      hl('ConcatOrSubscript', '.'),
+      hl('IdentifierKey', '2'),
+    })
+
+    check_parsing('+a . +b', 0, {
+      --           0123456
+      ast = {
+        {
+          'Concat:0:2: .',
+          children = {
+            {
+              'UnaryPlus:0:0:+',
+              children = {
+                'PlainIdentifier(scope=0,ident=a):0:1:a',
+              },
+            },
+            {
+              'UnaryPlus:0:4: +',
+              children = {
+                'PlainIdentifier(scope=0,ident=b):0:6:b',
+              },
+            },
+          },
+        },
+      },
+    }, {
+      hl('UnaryPlus', '+'),
+      hl('Identifier', 'a'),
+      hl('Concat', '.', 1),
+      hl('UnaryPlus', '+', 1),
+      hl('Identifier', 'b'),
+    })
+  end)
 end)