viml/parser/expressions: Add support for the dot operator and numbers

This commit is contained in:
ZyX 2017-10-03 01:30:02 +03:00
parent 163792e9b9
commit 21a5ce033c
3 changed files with 416 additions and 9 deletions

View File

@ -915,7 +915,8 @@ static inline void viml_pexpr_debug_print_token(
//
// NVimUnaryPlus -> NVimUnaryOperator
// NVimBinaryPlus -> NVimBinaryOperator
// NVimConcatOrSubscript -> NVimBinaryOperator
// NVimConcat -> NVimBinaryOperator
// NVimConcatOrSubscript -> NVimConcat
//
// NVimRegister -> SpecialChar
// NVimNumber -> Number
@ -971,6 +972,7 @@ static const ExprOpLvl node_type_to_op_lvl[] = {
[kExprNodeUnknownFigure] = kEOpLvlParens,
[kExprNodeLambda] = kEOpLvlParens,
[kExprNodeDictLiteral] = kEOpLvlParens,
[kExprNodeListLiteral] = kEOpLvlParens,
[kExprNodeArrow] = kEOpLvlArrow,
@ -985,17 +987,21 @@ static const ExprOpLvl node_type_to_op_lvl[] = {
[kExprNodeComparison] = kEOpLvlComparison,
[kExprNodeBinaryPlus] = kEOpLvlAddition,
[kExprNodeConcat] = kEOpLvlAddition,
[kExprNodeUnaryPlus] = kEOpLvlUnary,
[kExprNodeConcatOrSubscript] = kEOpLvlSubscript,
[kExprNodeSubscript] = kEOpLvlSubscript,
[kExprNodeCurlyBracesIdentifier] = kEOpLvlComplexIdentifier,
[kExprNodeComplexIdentifier] = kEOpLvlValue,
[kExprNodePlainIdentifier] = kEOpLvlValue,
[kExprNodePlainKey] = kEOpLvlValue,
[kExprNodeRegister] = kEOpLvlValue,
[kExprNodeListLiteral] = kEOpLvlValue,
[kExprNodeInteger] = kEOpLvlValue,
[kExprNodeFloat] = kEOpLvlValue,
};
static const ExprOpAssociativity node_type_to_op_ass[] = {
@ -1008,6 +1014,7 @@ static const ExprOpAssociativity node_type_to_op_ass[] = {
[kExprNodeUnknownFigure] = kEOpAssLeft,
[kExprNodeLambda] = kEOpAssNo,
[kExprNodeDictLiteral] = kEOpAssNo,
[kExprNodeListLiteral] = kEOpAssNo,
// Does not really matter.
[kExprNodeArrow] = kEOpAssNo,
@ -1030,17 +1037,21 @@ static const ExprOpAssociativity node_type_to_op_ass[] = {
[kExprNodeComparison] = kEOpAssRight,
[kExprNodeBinaryPlus] = kEOpAssLeft,
[kExprNodeConcat] = kEOpAssLeft,
[kExprNodeUnaryPlus] = kEOpAssNo,
[kExprNodeConcatOrSubscript] = kEOpAssLeft,
[kExprNodeSubscript] = kEOpAssLeft,
[kExprNodeCurlyBracesIdentifier] = kEOpAssLeft,
[kExprNodeComplexIdentifier] = kEOpAssLeft,
[kExprNodePlainIdentifier] = kEOpAssNo,
[kExprNodePlainKey] = kEOpAssNo,
[kExprNodeRegister] = kEOpAssNo,
[kExprNodeListLiteral] = kEOpAssNo,
[kExprNodeInteger] = kEOpAssNo,
[kExprNodeFloat] = kEOpAssNo,
};
/// Get AST node priority level
@ -1420,10 +1431,20 @@ ExprAST viml_pexpr_parse(ParserState *const pstate, const int flags)
[kENodeArgument] = kELFlagIsNotCmp,
[kENodeArgumentSeparator] = kELFlagForbidScope,
};
// FIXME Determine when (not) to allow floating-point numbers.
const bool is_concat_or_subscript = (
want_node == kENodeValue
&& kv_size(ast_stack) > 1
&& (*kv_Z(ast_stack, 1))->type == kExprNodeConcatOrSubscript);
const int lexer_additional_flags = (
kELFlagPeek
| ((flags & kExprFlagsDisallowEOC) ? kELFlagForbidEOC : 0));
| ((flags & kExprFlagsDisallowEOC) ? kELFlagForbidEOC : 0)
| ((want_node == kENodeValue
&& (kv_size(ast_stack) == 1
|| ((*kv_Z(ast_stack, 1))->type != kExprNodeConcat
&& ((*kv_Z(ast_stack, 1))->type
!= kExprNodeConcatOrSubscript))))
? kELFlagAllowFloat
: 0));
LexExprToken cur_token = viml_pexpr_next_token(
pstate, want_node_to_lexer_flags[want_node] | lexer_additional_flags);
if (cur_token.type == kExprLexEOC) {
@ -1456,11 +1477,42 @@ viml_pexpr_parse_process_token:
ExprASTNode *cur_node = NULL;
assert((want_node == kENodeValue || want_node == kENodeArgument)
== (*top_node_p == NULL));
// Note: in Vim whether expression "cond?d.a:2" is valid depends both on
// "cond" and whether "d" is a dictionary: expression is valid if condition
// is true and "d" is a dictionary (with "a" key or it will complain about
// missing one, but this is not relevant); if any of the requirements is
// broken then this thing is parsed as "d . a:2" yielding missing colon
// error. This parser does not allow such ambiguity, especially because it
// simply cant: whether "d" is a dictionary is not known at the parsing
// time.
//
// Here example will always contain a concat with "a:2" sucking colon,
// making expression invalid both because there is no longer a spare colon
// for ternary and because concatenating dictionary with anything is not
// valid. There are more cases when this will make a difference though.
const bool node_is_key = (
is_concat_or_subscript
&& (cur_token.type == kExprLexPlainIdentifier
? (!cur_token.data.var.autoload
&& cur_token.data.var.scope == 0)
: (cur_token.type == kExprLexNumber))
&& prev_token.type != kExprLexSpacing);
if (is_concat_or_subscript && !node_is_key) {
// Note: in Vim "d. a" (this is the reason behind `prev_token.type !=
// kExprLexSpacing` part of the condition) as well as any other "d.{expr}"
// where "{expr}" does not look like a key is invalid whenever "d" happens
// to be a dictionary. Since parser has no idea whether preceding
// expression is actually a dictionary it cant outright reject anything,
// so it turns kExprNodeConcatOrSubscript into kExprNodeConcat instead,
// which will yield different errors then Vim does in a number of
// circumstances, and in any case runtime and not parse time errors.
(*kv_Z(ast_stack, 1))->type = kExprNodeConcat;
}
if ((want_node == kENodeArgumentSeparator
&& tok_type != kExprLexComma
&& tok_type != kExprLexArrow)
|| (want_node == kENodeArgument
&& !(tok_type == kExprLexPlainIdentifier
&& !(cur_token.type == kExprLexPlainIdentifier
&& cur_token.data.var.scope == 0
&& !cur_token.data.var.autoload)
&& tok_type != kExprLexArrow)) {
@ -1844,7 +1896,10 @@ viml_pexpr_parse_figure_brace_closing_error:
want_node = (want_node == kENodeArgument
? kENodeArgumentSeparator
: kENodeOperator);
NEW_NODE_WITH_CUR_POS(cur_node, kExprNodePlainIdentifier);
NEW_NODE_WITH_CUR_POS(cur_node,
(node_is_key
? kExprNodePlainKey
: kExprNodePlainIdentifier));
cur_node->data.var.scope = cur_token.data.var.scope;
const size_t scope_shift = (cur_token.data.var.scope == 0
? 0
@ -1854,6 +1909,7 @@ viml_pexpr_parse_figure_brace_closing_error:
cur_node->data.var.ident_len = cur_token.len - scope_shift;
*top_node_p = cur_node;
if (scope_shift) {
assert(!node_is_key);
viml_parser_highlight(pstate, cur_token.start, 1,
HL(IdentifierScope));
viml_parser_highlight(pstate, shifted_pos(cur_token.start, 1), 1,
@ -1863,7 +1919,9 @@ viml_pexpr_parse_figure_brace_closing_error:
viml_parser_highlight(pstate, shifted_pos(cur_token.start,
scope_shift),
cur_token.len - scope_shift,
HL(Identifier));
(node_is_key
? HL(IdentifierKey)
: HL(Identifier)));
}
} else {
if (cur_token.data.var.scope == 0) {
@ -1882,6 +1940,40 @@ viml_pexpr_parse_figure_brace_closing_error:
}
break;
}
case kExprLexNumber: {
if (want_node != kENodeValue) {
OP_MISSING;
}
if (node_is_key) {
NEW_NODE_WITH_CUR_POS(cur_node, kExprNodePlainKey);
cur_node->data.var.ident = pline.data + cur_token.start.col;
cur_node->data.var.ident_len = cur_token.len;
HL_CUR_TOKEN(IdentifierKey);
} else if (cur_token.data.num.is_float) {
NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeFloat);
cur_node->data.flt.value = cur_token.data.num.val.floating;
HL_CUR_TOKEN(Float);
} else {
NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeInteger);
cur_node->data.num.value = cur_token.data.num.val.integer;
HL_CUR_TOKEN(Number);
}
want_node = kENodeOperator;
*top_node_p = cur_node;
break;
}
case kExprLexDot: {
ADD_VALUE_IF_MISSING(_("E15: Unexpected dot: %.*s"));
if (prev_token.type == kExprLexSpacing) {
NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeConcat);
HL_CUR_TOKEN(Concat);
} else {
NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeConcatOrSubscript);
HL_CUR_TOKEN(ConcatOrSubscript);
}
ADD_OP_NODE(cur_node);
break;
}
case kExprLexParenthesis: {
if (cur_token.data.brc.closing) {
if (want_node == kENodeValue) {

View File

@ -166,6 +166,8 @@ typedef enum {
/// Looks like "string", "g:Foo", etc: consists from a single
/// kExprLexPlainIdentifier token.
kExprNodePlainIdentifier = 'i',
/// Plain dictionary key, for use with kExprNodeConcatOrSubscript
kExprNodePlainKey = 'k',
/// Complex identifier: variable/function name with curly braces
kExprNodeComplexIdentifier = 'I',
/// Figure brace expression which is not yet known
@ -180,6 +182,19 @@ typedef enum {
kExprNodeColon = ':', ///< Colon “operator”.
kExprNodeArrow = '>', ///< Arrow “operator”.
kExprNodeComparison = '=', ///< Various comparison operators.
/// Concat operator
///
/// To be only used in cases when it is known for sure it is not a subscript.
kExprNodeConcat = '.',
/// Concat or subscript operator
///
/// For cases when it is not obvious whether expression is a concat or
/// a subscript. May only have either number or plain identifier as the second
/// child. To make it easier to avoid curly braces in place of
/// kExprNodePlainIdentifier node kExprNodePlainKey is used.
kExprNodeConcatOrSubscript = 'S',
kExprNodeInteger = '0', ///< Integral number.
kExprNodeFloat = '1', ///< Floating-point number.
} ExprASTNodeType;
typedef struct expr_ast_node ExprASTNode;
@ -219,7 +234,7 @@ struct expr_ast_node {
/// Points to inside parser reader state.
const char *ident;
size_t ident_len; ///< Actual identifier length.
} var; ///< For kExprNodePlainIdentifier.
} var; ///< For kExprNodePlainIdentifier and kExprNodePlainKey.
struct {
bool got_colon; ///< True if colon was seen.
} ter; ///< For kExprNodeTernaryValue.
@ -228,6 +243,12 @@ struct expr_ast_node {
ExprCaseCompareStrategy ccs; ///< Case comparison strategy.
bool inv; ///< True if comparison is to be inverted.
} cmp; ///< For kExprNodeComparison.
struct {
uvarnumber_T value;
} num; ///< For kExprNodeInteger.
struct {
float_T value;
} flt; ///< For kExprNodeFloat.
} data;
};

View File

@ -77,6 +77,7 @@ make_enum_conv_tab(lib, {
'kExprNodeNested',
'kExprNodeCall',
'kExprNodePlainIdentifier',
'kExprNodePlainKey',
'kExprNodeComplexIdentifier',
'kExprNodeUnknownFigure',
'kExprNodeLambda',
@ -86,6 +87,10 @@ make_enum_conv_tab(lib, {
'kExprNodeColon',
'kExprNodeArrow',
'kExprNodeComparison',
'kExprNodeConcat',
'kExprNodeConcatOrSubscript',
'kExprNodeInteger',
'kExprNodeFloat',
}, 'kExprNode', function(ret) east_node_type_tab = ret end)
local function conv_east_node_type(typ)
@ -118,6 +123,9 @@ local function eastnode2lua(pstate, eastnode, checked_nodes)
typ = typ .. ('(scope=%s,ident=%s)'):format(
tostring(intchar2lua(eastnode.data.var.scope)),
ffi.string(eastnode.data.var.ident, eastnode.data.var.ident_len))
elseif typ == 'PlainKey' then
typ = typ .. ('(key=%s)'):format(
ffi.string(eastnode.data.var.ident, eastnode.data.var.ident_len))
elseif (typ == 'UnknownFigure' or typ == 'DictLiteral'
or typ == 'CurlyBracesIdentifier' or typ == 'Lambda') then
typ = typ .. ('(%s)'):format(
@ -128,6 +136,10 @@ local function eastnode2lua(pstate, eastnode, checked_nodes)
typ = typ .. ('(type=%s,inv=%u,ccs=%s)'):format(
conv_cmp_type(eastnode.data.cmp.type), eastnode.data.cmp.inv and 1 or 0,
conv_ccs(eastnode.data.cmp.ccs))
elseif typ == 'Integer' then
typ = typ .. ('(val=%u)'):format(tonumber(eastnode.data.num.value))
elseif typ == 'Float' then
typ = typ .. ('(val=%e)'):format(tonumber(eastnode.data.flt.value))
end
ret_str = typ .. ':' .. ret_str
local can_simplify = true
@ -190,6 +202,8 @@ end)
describe('Expressions parser', function()
local function check_parsing(str, flags, exp_ast, exp_highlighting_fs)
flags = flags or 0
local pstate = new_pstate({str})
local east = lib.viml_pexpr_parse(pstate, flags)
local ast = east2lua(pstate, east)
@ -3649,4 +3663,284 @@ describe('Expressions parser', function()
hl('Identifier', 'b', 1),
})
end)
itp('works with concat/subscript', function()
check_parsing('.', 0, {
-- 0
ast = {
{
'ConcatOrSubscript:0:0:.',
children = {
'Missing:0:0:',
},
},
},
err = {
arg = '.',
msg = 'E15: Unexpected dot: %.*s',
},
}, {
hl('InvalidConcatOrSubscript', '.'),
})
check_parsing('a.', 0, {
-- 01
ast = {
{
'ConcatOrSubscript:0:1:.',
children = {
'PlainIdentifier(scope=0,ident=a):0:0:a',
},
},
},
err = {
arg = '',
msg = 'E15: Expected value, got EOC: %.*s',
},
}, {
hl('Identifier', 'a'),
hl('ConcatOrSubscript', '.'),
})
check_parsing('a.b', 0, {
-- 012
ast = {
{
'ConcatOrSubscript:0:1:.',
children = {
'PlainIdentifier(scope=0,ident=a):0:0:a',
'PlainKey(key=b):0:2:b',
},
},
},
}, {
hl('Identifier', 'a'),
hl('ConcatOrSubscript', '.'),
hl('IdentifierKey', 'b'),
})
check_parsing('1.2', 0, {
-- 012
ast = {
'Float(val=1.200000e+00):0:0:1.2',
},
}, {
hl('Float', '1.2'),
})
check_parsing('1.2 + 1.3e-5', 0, {
-- 012345678901
-- 0 1
ast = {
{
'BinaryPlus:0:3: +',
children = {
'Float(val=1.200000e+00):0:0:1.2',
'Float(val=1.300000e-05):0:5: 1.3e-5',
},
},
},
}, {
hl('Float', '1.2'),
hl('BinaryPlus', '+', 1),
hl('Float', '1.3e-5', 1),
})
check_parsing('a . 1.2 + 1.3e-5', 0, {
-- 0123456789012345
-- 0 1
ast = {
{
'BinaryPlus:0:7: +',
children = {
{
'Concat:0:1: .',
children = {
'PlainIdentifier(scope=0,ident=a):0:0:a',
{
'ConcatOrSubscript:0:5:.',
children = {
'Integer(val=1):0:3: 1',
'PlainKey(key=2):0:6:2',
},
},
},
},
'Float(val=1.300000e-05):0:9: 1.3e-5',
},
},
},
}, {
hl('Identifier', 'a'),
hl('Concat', '.', 1),
hl('Number', '1', 1),
hl('ConcatOrSubscript', '.'),
hl('IdentifierKey', '2'),
hl('BinaryPlus', '+', 1),
hl('Float', '1.3e-5', 1),
})
check_parsing('1.3e-5 + 1.2 . a', 0, {
-- 0123456789012345
-- 0 1
ast = {
{
'Concat:0:12: .',
children = {
{
'BinaryPlus:0:6: +',
children = {
'Float(val=1.300000e-05):0:0:1.3e-5',
'Float(val=1.200000e+00):0:8: 1.2',
},
},
'PlainIdentifier(scope=0,ident=a):0:14: a',
},
},
},
}, {
hl('Float', '1.3e-5'),
hl('BinaryPlus', '+', 1),
hl('Float', '1.2', 1),
hl('Concat', '.', 1),
hl('Identifier', 'a', 1),
})
check_parsing('1.3e-5 + a . 1.2', 0, {
-- 0123456789012345
-- 0 1
ast = {
{
'Concat:0:10: .',
children = {
{
'BinaryPlus:0:6: +',
children = {
'Float(val=1.300000e-05):0:0:1.3e-5',
'PlainIdentifier(scope=0,ident=a):0:8: a',
},
},
{
'ConcatOrSubscript:0:14:.',
children = {
'Integer(val=1):0:12: 1',
'PlainKey(key=2):0:15:2',
},
},
},
},
},
}, {
hl('Float', '1.3e-5'),
hl('BinaryPlus', '+', 1),
hl('Identifier', 'a', 1),
hl('Concat', '.', 1),
hl('Number', '1', 1),
hl('ConcatOrSubscript', '.'),
hl('IdentifierKey', '2'),
})
check_parsing('1.2.3', 0, {
-- 01234
ast = {
{
'ConcatOrSubscript:0:3:.',
children = {
{
'ConcatOrSubscript:0:1:.',
children = {
'Integer(val=1):0:0:1',
'PlainKey(key=2):0:2:2',
},
},
'PlainKey(key=3):0:4:3',
},
},
},
}, {
hl('Number', '1'),
hl('ConcatOrSubscript', '.'),
hl('IdentifierKey', '2'),
hl('ConcatOrSubscript', '.'),
hl('IdentifierKey', '3'),
})
check_parsing('a.1.2', 0, {
-- 01234
ast = {
{
'ConcatOrSubscript:0:3:.',
children = {
{
'ConcatOrSubscript:0:1:.',
children = {
'PlainIdentifier(scope=0,ident=a):0:0:a',
'PlainKey(key=1):0:2:1',
},
},
'PlainKey(key=2):0:4:2',
},
},
},
}, {
hl('Identifier', 'a'),
hl('ConcatOrSubscript', '.'),
hl('IdentifierKey', '1'),
hl('ConcatOrSubscript', '.'),
hl('IdentifierKey', '2'),
})
check_parsing('a . 1.2', 0, {
-- 0123456
ast = {
{
'Concat:0:1: .',
children = {
'PlainIdentifier(scope=0,ident=a):0:0:a',
{
'ConcatOrSubscript:0:5:.',
children = {
'Integer(val=1):0:3: 1',
'PlainKey(key=2):0:6:2',
},
},
},
},
},
}, {
hl('Identifier', 'a'),
hl('Concat', '.', 1),
hl('Number', '1', 1),
hl('ConcatOrSubscript', '.'),
hl('IdentifierKey', '2'),
})
check_parsing('+a . +b', 0, {
-- 0123456
ast = {
{
'Concat:0:2: .',
children = {
{
'UnaryPlus:0:0:+',
children = {
'PlainIdentifier(scope=0,ident=a):0:1:a',
},
},
{
'UnaryPlus:0:4: +',
children = {
'PlainIdentifier(scope=0,ident=b):0:6:b',
},
},
},
},
},
}, {
hl('UnaryPlus', '+'),
hl('Identifier', 'a'),
hl('Concat', '.', 1),
hl('UnaryPlus', '+', 1),
hl('Identifier', 'b'),
})
end)
end)