viml/parser/expressions: Start creating expressions parser

Currently supported nodes:

- Register as it is one of the simplest value nodes (even numbers are
  not that simple with that dot handling).
- Plus, both unary and binary.
- Parenthesis, both nesting and calling.

Note regarding unit tests: it stores data for AST in highlighting in
strings in place of tables because luassert fails to do a good job at
representing big tables. Squashing a bunch of data into a single string
simply yields more readable result.
This commit is contained in:
ZyX 2017-09-03 21:58:16 +03:00
parent 919223c23a
commit 430e516d3a
5 changed files with 1615 additions and 4 deletions

View File

@ -13,10 +13,18 @@
#include "nvim/types.h"
#include "nvim/charset.h"
#include "nvim/ascii.h"
#include "nvim/lib/kvec.h"
#include "nvim/viml/parser/expressions.h"
#include "nvim/viml/parser/parser.h"
typedef kvec_withinit_t(ExprASTNode **, 16) ExprASTStack;
typedef enum {
kELvlOperator, ///< Operators: function call, subscripts, binary operators, …
kELvlValue, ///< Actual value: literals, variables, nested expressions.
} ExprASTLevel;
#ifdef INCLUDE_GENERATED_DECLARATIONS
# include "viml/parser/expressions.c.generated.h"
#endif
@ -144,6 +152,7 @@ LexExprToken viml_pexpr_next_token(ParserState *const pstate, const bool peek)
// Environment variable.
case '$': {
// FIXME: Parser function cant be thread-safe with vim_isIDc.
CHARREG(kExprLexEnv, vim_isIDc);
break;
}
@ -183,6 +192,7 @@ LexExprToken viml_pexpr_next_token(ParserState *const pstate, const bool peek)
ret.data.var.autoload = (
memchr(pline.data + 2, AUTOLOAD_CHAR, ret.len - 2)
!= NULL);
// FIXME: Resolve ambiguity with an argument to the lexer function.
// Previous CHARREG stopped at autoload character in order to make it
// possible to detect `is#`. Continue now with autoload characters
// included.
@ -372,3 +382,618 @@ viml_pexpr_next_token_adv_return:
}
return ret;
}
// start = s ternary_expr s EOC
// ternary_expr = binop_expr
// ( s Question s ternary_expr s Colon s ternary_expr s )?
// binop_expr = unaryop_expr ( binop unaryop_expr )?
// unaryop_expr = ( unaryop )? subscript_expr
// subscript_expr = subscript_expr subscript
// | value_expr
// subscript = Bracket('[') s ternary_expr s Bracket(']')
// | s Parenthesis('(') call_args Parenthesis(')')
// | Dot ( PlainIdentifier | Number )+
// # Note: `s` before Parenthesis('(') is only valid if preceding subscript_expr
// # is PlainIdentifier
// value_expr = ( float | Number
// | DoubleQuotedString | SingleQuotedString
// | paren_expr
// | list_literal
// | lambda_literal
// | dict_literal
// | Environment
// | Option
// | Register
// | var )
// float = Number Dot Number ( PlainIdentifier('e') ( Plus | Minus )? Number )?
// # Note: `1.2.3` is concat and not float. `"abc".2.3` is also concat without
// # floats.
// paren_expr = Parenthesis('(') s ternary_expr s Parenthesis(')')
// list_literal = Bracket('[') s
// ( ternary_expr s Comma s )*
// ternary_expr? s
// Bracket(']')
// dict_literal = FigureBrace('{') s
// ( ternary_expr s Colon s ternary_expr s Comma s )*
// ( ternary_expr s Colon s ternary_expr s )?
// FigureBrace('}')
// lambda_literal = FigureBrace('{') s
// ( PlainIdentifier s Comma s )*
// PlainIdentifier s
// Arrow s
// ternary_expr s
// FigureBrace('}')
// var = varchunk+
// varchunk = PlainIdentifier
// | Comparison("is" | "is#" | "isnot" | "isnot#")
// | FigureBrace('{') s ternary_expr s FigureBrace('}')
// call_args = ( s ternary_expr s Comma s )* s ternary_expr? s
// binop = s ( Plus | Minus | Dot
// | Comparison
// | Multiplication
// | Or
// | And ) s
// unaryop = s ( Not | Plus | Minus ) s
// s = Spacing?
//
// Binary operator precedence and associativity:
//
// Operator | Precedence | Associativity
// ---------+------------+-----------------
// || | 2 | left
// && | 3 | left
// cmp* | 4 | not associative
// + - . | 5 | left
// * / % | 6 | left
//
// * comparison operators:
//
// == ==# ==? != !=# !=?
// =~ =~# =~? !~ !~# !~?
// > ># >? <= <=# <=?
// < <# <? >= >=# >=?
// is is# is? isnot isnot# isnot?
//
// Used highlighting groups and assumed linkage:
//
// NVimInvalid -> Error
// NVimInvalidValue -> NVimInvalid
// NVimInvalidOperator -> NVimInvalid
// NVimInvalidDelimiter -> NVimInvalid
//
// NVimOperator -> Operator
// NVimUnaryOperator -> NVimOperator
// NVimBinaryOperator -> NVimOperator
// NVimComparisonOperator -> NVimOperator
// NVimTernaryOperator -> NVimOperator
//
// NVimParenthesis -> Delimiter
//
// NVimInvalidSpacing -> NVimInvalid
// NVimInvalidTernaryOperator -> NVimInvalidOperator
// NVimInvalidRegister -> NVimInvalidValue
// NVimInvalidClosingBracket -> NVimInvalidDelimiter
// NVimInvalidSpacing -> NVimInvalid
//
// NVimUnaryPlus -> NVimUnaryOperator
// NVimBinaryPlus -> NVimBinaryOperator
// NVimRegister -> SpecialChar
// NVimNestingParenthesis -> NVimParenthesis
// NVimCallingParenthesis -> NVimParenthesis
/// Allocate a new node and set some of the values
///
/// @param[in] type Node type to allocate.
/// @param[in] level Node level to allocate
static inline ExprASTNode *viml_pexpr_new_node(const ExprASTNodeType type)
FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_MALLOC
{
ExprASTNode *ret = xmalloc(sizeof(*ret));
ret->type = type;
ret->children = NULL;
ret->next = NULL;
return ret;
}
typedef enum {
kEOpLvlInvalid = 0,
kEOpLvlParens,
kEOpLvlTernary,
kEOpLvlOr,
kEOpLvlAnd,
kEOpLvlComparison,
kEOpLvlAddition, ///< Addition, subtraction and concatenation.
kEOpLvlMultiplication, ///< Multiplication, division and modulo.
kEOpLvlUnary, ///< Unary operations: not, minus, plus.
kEOpLvlSubscript, ///< Subscripts.
kEOpLvlValue, ///< Values: literals, variables, nested expressions, …
} ExprOpLvl;
typedef enum {
kEOpAssNo= 'n', ///< Not associative / not applicable.
kEOpAssLeft = 'l', ///< Left associativity.
kEOpAssRight = 'r', ///< Right associativity.
} ExprOpAssociativity;
static const ExprOpLvl node_type_to_op_lvl[] = {
[kExprNodeMissing] = kEOpLvlInvalid,
[kExprNodeOpMissing] = kEOpLvlMultiplication,
[kExprNodeNested] = kEOpLvlParens,
[kExprNodeComplexIdentifier] = kEOpLvlParens,
[kExprNodeTernary] = kEOpLvlTernary,
[kExprNodeBinaryPlus] = kEOpLvlAddition,
[kExprNodeUnaryPlus] = kEOpLvlUnary,
[kExprNodeSubscript] = kEOpLvlSubscript,
[kExprNodeCall] = kEOpLvlSubscript,
[kExprNodeRegister] = kEOpLvlValue,
[kExprNodeListLiteral] = kEOpLvlValue,
[kExprNodePlainIdentifier] = kEOpLvlValue,
};
static const ExprOpAssociativity node_type_to_op_ass[] = {
[kExprNodeMissing] = kEOpAssNo,
[kExprNodeOpMissing] = kEOpAssNo,
[kExprNodeNested] = kEOpAssNo,
[kExprNodeComplexIdentifier] = kEOpAssLeft,
[kExprNodeTernary] = kEOpAssNo,
[kExprNodeBinaryPlus] = kEOpAssLeft,
[kExprNodeUnaryPlus] = kEOpAssNo,
[kExprNodeSubscript] = kEOpAssLeft,
[kExprNodeCall] = kEOpAssLeft,
[kExprNodeRegister] = kEOpAssNo,
[kExprNodeListLiteral] = kEOpAssNo,
[kExprNodePlainIdentifier] = kEOpAssNo,
};
#ifdef UNIT_TESTING
#include <stdio.h>
REAL_FATTR_UNUSED
static inline void viml_pexpr_debug_print_ast_stack(
const ExprASTStack *const ast_stack,
const char *const msg)
FUNC_ATTR_NONNULL_ALL FUNC_ATTR_ALWAYS_INLINE
{
fprintf(stderr, "\n%sstack: %zu:\n", msg, kv_size(*ast_stack));
for (size_t i = 0; i < kv_size(*ast_stack); i++) {
const ExprASTNode *const *const eastnode_p = (
(const ExprASTNode *const *)kv_A(*ast_stack, i));
if (*eastnode_p == NULL) {
fprintf(stderr, "- %p : NULL\n", (void *)eastnode_p);
} else {
fprintf(stderr, "- %p : %p : %c : %zu:%zu:%zu\n",
(void *)eastnode_p, (void *)(*eastnode_p), (*eastnode_p)->type,
(*eastnode_p)->start.line, (*eastnode_p)->start.col,
(*eastnode_p)->len);
}
}
}
#define PSTACK(msg) \
viml_pexpr_debug_print_ast_stack(&ast_stack, #msg)
#define PSTACK_P(msg) \
viml_pexpr_debug_print_ast_stack(ast_stack, #msg)
#endif
/// Handle binary operator
///
/// This function is responsible for handling priority levels as well.
static void viml_pexpr_handle_bop(ExprASTStack *const ast_stack,
ExprASTNode *const bop_node,
ExprASTLevel *const want_level_p)
FUNC_ATTR_NONNULL_ALL
{
ExprASTNode **top_node_p = NULL;
ExprASTNode *top_node;
ExprOpLvl top_node_lvl;
ExprOpAssociativity top_node_ass;
assert(kv_size(*ast_stack));
const ExprOpLvl bop_node_lvl = node_type_to_op_lvl[bop_node->type];
do {
ExprASTNode **new_top_node_p = kv_last(*ast_stack);
ExprASTNode *new_top_node = *new_top_node_p;
assert(new_top_node != NULL);
const ExprOpLvl new_top_node_lvl = node_type_to_op_lvl[new_top_node->type];
const ExprOpAssociativity new_top_node_ass = (
node_type_to_op_ass[new_top_node->type]);
if (top_node_p != NULL
&& ((bop_node_lvl > new_top_node_lvl
|| (bop_node_lvl == new_top_node_lvl
&& new_top_node_ass == kEOpAssNo)))) {
break;
}
kv_drop(*ast_stack, 1);
top_node_p = new_top_node_p;
top_node = new_top_node;
top_node_lvl = new_top_node_lvl;
top_node_ass = new_top_node_ass;
} while (kv_size(*ast_stack));
// FIXME Handle right and no associativity correctly
*top_node_p = bop_node;
bop_node->children = top_node;
assert(bop_node->children->next == NULL);
kvi_push(*ast_stack, top_node_p);
kvi_push(*ast_stack, &bop_node->children->next);
*want_level_p = kELvlValue;
}
/// Get highlight group name
#define HL(g) (is_invalid ? "NVimInvalid" #g : "NVim" #g)
/// Highlight current token with the given group
#define HL_CUR_TOKEN(g) \
viml_parser_highlight(pstate, cur_token.start, cur_token.len, \
HL(g))
/// Allocate new node, saving some values
#define NEW_NODE(type) \
viml_pexpr_new_node(type)
/// Set position of the given node to position from the given token
///
/// @param cur_node Node to modify.
/// @param cur_token Token to set position from.
#define POS_FROM_TOKEN(cur_node, cur_token) \
do { \
cur_node->start = cur_token.start; \
cur_node->len = cur_token.len; \
} while (0)
/// Allocate new node and set its position from the current token
///
/// If previous token happened to contain spacing then it will be included.
///
/// @param cur_node Variable to save allocated node to.
/// @param typ Node type.
#define NEW_NODE_WITH_CUR_POS(cur_node, typ) \
do { \
cur_node = NEW_NODE(typ); \
POS_FROM_TOKEN(cur_node, cur_token); \
if (prev_token.type == kExprLexSpacing) { \
cur_node->start = prev_token.start; \
cur_node->len += prev_token.len; \
} \
} while (0)
// TODO(ZyX-I): actual condition
/// Check whether it is possible to have next expression after current
///
/// For :echo: `:echo @a @a` is a valid expression. `:echo (@a @a)` is not.
#define MAY_HAVE_NEXT_EXPR \
(kv_size(ast_stack) == 1)
/// Record missing operator: for things like
///
/// :echo @a @a
///
/// (allowed) or
///
/// :echo (@a @a)
///
/// (parsed as OpMissing(@a, @a)).
#define OP_MISSING \
do { \
if (flags & kExprFlagsMulti && MAY_HAVE_NEXT_EXPR) { \
/* Multiple expressions allowed, return without calling */ \
/* viml_parser_advance(). */ \
goto viml_pexpr_parse_end; \
} else { \
assert(*top_node_p != NULL); \
ERROR_FROM_TOKEN_AND_MSG(cur_token, _("E15: Missing operator: %.*s")); \
NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeOpMissing); \
cur_node->len = 0; \
viml_pexpr_handle_bop(&ast_stack, cur_node, &want_level); \
is_invalid = true; \
goto viml_pexpr_parse_process_token; \
} \
} while (0)
/// Set AST error, unless AST already is not correct
///
/// @param[out] ret_ast AST to set error in.
/// @param[in] pstate Parser state, used to get error message argument.
/// @param[in] msg Error message, assumed to be already translated and
/// containing a single %token "%.*s".
/// @param[in] start Position at which error occurred.
static inline void east_set_error(ExprAST *const ret_ast,
const ParserState *const pstate,
const char *const msg,
const ParserPosition start)
FUNC_ATTR_NONNULL_ALL FUNC_ATTR_ALWAYS_INLINE
{
if (!ret_ast->correct) {
return;
}
const ParserLine pline = pstate->reader.lines.items[start.line];
ret_ast->correct = false;
ret_ast->err.msg = msg;
ret_ast->err.arg_len = (int)(pline.size - start.col);
ret_ast->err.arg = pline.data + start.col;
}
/// Set error from the given kExprLexInvalid token and given message
#define ERROR_FROM_TOKEN_AND_MSG(cur_token, msg) \
east_set_error(&ast, pstate, msg, cur_token.start)
/// Set error from the given kExprLexInvalid token
#define ERROR_FROM_TOKEN(cur_token) \
ERROR_FROM_TOKEN_AND_MSG(cur_token, cur_token.data.err.msg)
/// Parse one VimL expression
///
/// @param pstate Parser state.
/// @param[in] flags Additional flags, see ExprParserFlags
///
/// @return Parsed AST.
ExprAST viml_pexpr_parse(ParserState *const pstate, const int flags)
FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_NONNULL_ALL
{
ExprAST ast = {
.correct = true,
.err = {
.msg = NULL,
.arg_len = 0,
.arg = NULL,
},
.root = NULL,
};
ExprASTStack ast_stack;
kvi_init(ast_stack);
kvi_push(ast_stack, &ast.root);
// Expressions stack:
// 1. *last is NULL if want_level is kExprLexValue. Indicates where expression
// is to be put.
// 2. *last is not NULL otherwise, indicates current expression to be used as
// an operator argument.
ExprASTLevel want_level = kELvlValue;
LexExprToken prev_token = { .type = kExprLexMissing };
bool highlighted_prev_spacing = false;
do {
LexExprToken cur_token = viml_pexpr_next_token(pstate, true);
if (cur_token.type == kExprLexEOC) {
if (flags & kExprFlagsDisallowEOC) {
if (cur_token.len == 0) {
// It is end of string, break.
break;
} else {
// It is NL, NUL or bar.
//
// Note: `<C-r>=1 | 2<CR>` actually yields 1 in Vim without any
// errors. This will be changed here.
cur_token.type = kExprLexInvalid;
cur_token.data.err.msg = _("E15: Unexpected EOC character: %.*s");
const ParserLine pline = (
pstate->reader.lines.items[cur_token.start.line]);
const char eoc_char = pline.data[cur_token.start.col];
cur_token.data.err.type = ((eoc_char == NUL || eoc_char == NL)
? kExprLexSpacing
: kExprLexOr);
}
} else {
break;
}
}
LexExprTokenType tok_type = cur_token.type;
const bool token_invalid = (tok_type == kExprLexInvalid);
bool is_invalid = token_invalid;
viml_pexpr_parse_process_token:
if (tok_type == kExprLexSpacing) {
if (is_invalid) {
viml_parser_highlight(pstate, cur_token.start, cur_token.len,
HL(Spacing));
} else {
// Do not do anything: let regular spacing be highlighted as normal.
// This also allows later to highlight spacing as invalid.
}
goto viml_pexpr_parse_cycle_end;
} else if (is_invalid && prev_token.type == kExprLexSpacing
&& !highlighted_prev_spacing) {
viml_parser_highlight(pstate, prev_token.start, prev_token.len,
HL(Spacing));
is_invalid = false;
highlighted_prev_spacing = true;
}
ExprASTNode **const top_node_p = kv_last(ast_stack);
ExprASTNode *cur_node = NULL;
// Keep these two asserts separate for debugging purposes.
assert(want_level == kELvlValue || *top_node_p != NULL);
assert(want_level != kELvlValue || *top_node_p == NULL);
switch (tok_type) {
case kExprLexEOC: {
assert(false);
}
case kExprLexInvalid: {
ERROR_FROM_TOKEN(cur_token);
tok_type = cur_token.data.err.type;
goto viml_pexpr_parse_process_token;
}
case kExprLexRegister: {
if (want_level == kELvlValue) {
NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeRegister);
cur_node->data.reg.name = cur_token.data.reg.name;
*top_node_p = cur_node;
want_level = kELvlOperator;
viml_parser_highlight(pstate, cur_token.start, cur_token.len,
HL(Register));
} else {
// Register in operator position: e.g. @a @a
OP_MISSING;
}
break;
}
case kExprLexPlus: {
if (want_level == kELvlValue) {
// Value level: assume unary plus
NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeUnaryPlus);
*top_node_p = cur_node;
kvi_push(ast_stack, &cur_node->children);
HL_CUR_TOKEN(UnaryPlus);
} else if (want_level < kELvlValue) {
NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeBinaryPlus);
viml_pexpr_handle_bop(&ast_stack, cur_node, &want_level);
HL_CUR_TOKEN(BinaryPlus);
}
want_level = kELvlValue;
break;
}
case kExprLexParenthesis: {
if (cur_token.data.brc.closing) {
if (want_level == kELvlValue) {
if (kv_size(ast_stack) > 1) {
const ExprASTNode *const prev_top_node = *kv_Z(ast_stack, 1);
if (prev_top_node->type == kExprNodeCall) {
// Function call without arguments, this is not an error.
// But further code does not expect NULL nodes.
kv_drop(ast_stack, 1);
goto viml_pexpr_parse_no_paren_closing_error;
}
}
is_invalid = true;
ERROR_FROM_TOKEN_AND_MSG(cur_token, _("E15: Expected value: %.*s"));
NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeMissing);
cur_node->len = 0;
*top_node_p = cur_node;
} else {
// Always drop the topmost value: when want_level != kELvlValue
// topmost item on stack is a *finished* left operand, which may as
// well be "(@a)" which needs not be finished.
kv_drop(ast_stack, 1);
}
viml_pexpr_parse_no_paren_closing_error: {}
ExprASTNode **new_top_node_p = NULL;
while (kv_size(ast_stack)
&& (new_top_node_p == NULL
|| ((*new_top_node_p)->type != kExprNodeNested
&& (*new_top_node_p)->type != kExprNodeCall))) {
new_top_node_p = kv_pop(ast_stack);
}
if (new_top_node_p != NULL
&& ((*new_top_node_p)->type == kExprNodeNested
|| (*new_top_node_p)->type == kExprNodeCall)) {
if ((*new_top_node_p)->type == kExprNodeNested) {
HL_CUR_TOKEN(NestingParenthesis);
} else {
HL_CUR_TOKEN(CallingParenthesis);
}
} else {
// “Always drop the topmost value” branch has got rid of the single
// value stack had, so there is nothing known to enclose. Correct
// this.
if (new_top_node_p == NULL) {
new_top_node_p = top_node_p;
}
is_invalid = true;
HL_CUR_TOKEN(NestingParenthesis);
ERROR_FROM_TOKEN_AND_MSG(
cur_token, _("E15: Unexpected closing parenthesis: %.*s"));
cur_node = NEW_NODE(kExprNodeNested);
cur_node->start = cur_token.start;
cur_node->len = 0;
// Unexpected closing parenthesis, assume that it was wanted to
// enclose everything in ().
cur_node->children = *new_top_node_p;
*new_top_node_p = cur_node;
assert(cur_node->next == NULL);
}
kvi_push(ast_stack, new_top_node_p);
want_level = kELvlOperator;
} else {
if (want_level == kELvlValue) {
NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeNested);
*top_node_p = cur_node;
kvi_push(ast_stack, &cur_node->children);
HL_CUR_TOKEN(NestingParenthesis);
} else if (want_level == kELvlOperator) {
if (prev_token.type == kExprLexSpacing) {
// For some reason "function (args)" is a function call, but
// "(funcref) (args)" is not. AFAIR this somehow involves
// compatibility and Bram was commenting that this is
// intentionally inconsistent and he is not very happy with the
// situation himself.
if ((*top_node_p)->type != kExprNodePlainIdentifier
&& (*top_node_p)->type != kExprNodeComplexIdentifier) {
OP_MISSING;
}
}
NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeCall);
viml_pexpr_handle_bop(&ast_stack, cur_node, &want_level);
HL_CUR_TOKEN(CallingParenthesis);
} else {
// Currently it is impossible to reach this.
assert(false);
}
want_level = kELvlValue;
}
break;
}
}
viml_pexpr_parse_cycle_end:
prev_token = cur_token;
highlighted_prev_spacing = false;
viml_parser_advance(pstate, cur_token.len);
} while (true);
viml_pexpr_parse_end:
if (want_level == kELvlValue) {
east_set_error(&ast, pstate, _("E15: Expected value: %.*s"), pstate->pos);
} else if (kv_size(ast_stack) != 1) {
// Something may be wrong, check whether it really is.
// Pointer to ast.root must never be dropped, so “!= 1” is expected to be
// the same as “> 1”.
assert(kv_size(ast_stack));
// Topmost stack item must be a *finished* value, so it must not be
// analyzed. E.g. it may contain an already finished nested expression.
kv_drop(ast_stack, 1);
while (ast.correct && kv_size(ast_stack)) {
const ExprASTNode *const cur_node = (*kv_pop(ast_stack));
// This should only happen when want_level == kELvlValue.
assert(cur_node != NULL);
switch (cur_node->type) {
case kExprNodeOpMissing:
case kExprNodeMissing: {
// Error shouldve been already reported.
break;
}
case kExprNodeCall: {
// TODO(ZyX-I): Rehighlight as invalid?
east_set_error(
&ast, pstate,
_("E116: Missing closing parenthesis for function call: %.*s"),
cur_node->start);
break;
}
case kExprNodeNested: {
// TODO(ZyX-I): Rehighlight as invalid?
east_set_error(
&ast, pstate,
_("E110: Missing closing parenthesis for nested expression"
": %.*s"),
cur_node->start);
break;
}
case kExprNodeBinaryPlus:
case kExprNodeUnaryPlus:
case kExprNodeRegister: {
// It is OK to see these in the stack.
break;
}
// TODO(ZyX-I): handle other values
}
}
}
kvi_destroy(ast_stack);
return ast;
}
#undef NEW_NODE
#undef HL

View File

@ -111,6 +111,80 @@ typedef struct {
} data; ///< Additional data, if needed.
} LexExprToken;
/// Expression AST node type
typedef enum {
kExprNodeMissing = 'X',
kExprNodeOpMissing = '_',
kExprNodeTernary = '?', ///< Ternary operator, valid one has three children.
kExprNodeRegister = '@', ///< Register, no children.
kExprNodeSubscript = 's', ///< Subscript, should have two or three children.
kExprNodeListLiteral = 'l', ///< List literal, any number of children.
kExprNodeUnaryPlus = 'p',
kExprNodeBinaryPlus = '+',
kExprNodeNested = 'e', ///< Nested parenthesised expression.
kExprNodeCall = 'c', ///< Function call.
/// Plain identifier: simple variable/function name
///
/// Looks like "string", "g:Foo", etc: consists from a single
/// kExprLexPlainIdentifier token.
kExprNodePlainIdentifier = 'i',
/// Complex identifier: variable/function name with curly braces
kExprNodeComplexIdentifier = 'I',
} ExprASTNodeType;
typedef struct expr_ast_node ExprASTNode;
/// Structure representing one AST node
struct expr_ast_node {
ExprASTNodeType type; ///< Node type.
/// Node children: e.g. for 1 + 2 nodes 1 and 2 will be children of +.
ExprASTNode *children;
/// Next node: e.g. for 1 + 2 child nodes 1 and 2 are put into a single-linked
/// list: `(+)->children` references only node 1, node 2 is in
/// `(+)->children->next`.
ExprASTNode *next;
ParserPosition start;
size_t len;
union {
struct {
int name; ///< Register name, may be -1 if name not present.
} reg; ///< For kExprNodeRegister.
} data;
};
enum {
/// Allow multiple expressions in a row: e.g. for :echo
///
/// Parser will still parse only one of them though.
kExprFlagsMulti = (1 << 0),
/// Allow NL, NUL and bar to be EOC
///
/// When parsing expressions input by user bar is assumed to be a binary
/// operator and other two are spacings.
kExprFlagsDisallowEOC = (1 << 1),
/// Print errors when encountered
///
/// Without the flag they are only taken into account when parsing.
kExprFlagsPrintError = (1 << 2),
} ExprParserFlags;
/// Structure representing complety AST for one expression
typedef struct {
/// True if represented AST is correct and can be executed. Incorrect ones may
/// still be used for completion, or in linters.
bool correct;
/// When AST is not correct this message will be printed.
///
/// Uses `emsgf(msg, arg_len, arg);`, `msg` is assumed to contain only `%.*s`.
struct {
const char *msg;
int arg_len;
const char *arg;
} err;
/// Root node of the AST.
ExprASTNode *root;
} ExprAST;
#ifdef INCLUDE_GENERATED_DECLARATIONS
# include "viml/parser/expressions.h.generated.h"
#endif

View File

@ -1,5 +1,6 @@
local helpers = require('test.unit.helpers')(nil)
local ptr2key = helpers.ptr2key
local cimport = helpers.cimport
local to_cstr = helpers.to_cstr
local ffi = helpers.ffi
@ -91,10 +92,6 @@ local function populate_partial(pt, lua_pt, processed)
return pt
end
local ptr2key = function(ptr)
return tostring(ptr)
end
local lst2tbl
local dct2tbl

View File

@ -783,6 +783,31 @@ local function kvi_new(ct)
return kvi_init(ffi.new(ct))
end
local function make_enum_conv_tab(lib, values, skip_pref, set_cb)
child_call_once(function()
local ret = {}
for _, v in ipairs(values) do
local str_v = v
if v:sub(1, #skip_pref) == skip_pref then
str_v = v:sub(#skip_pref + 1)
end
ret[tonumber(lib[v])] = str_v
end
set_cb(ret)
end)
end
local function ptr2addr(ptr)
return tonumber(ffi.cast('intptr_t', ffi.cast('void *', ptr)))
end
local s = ffi.new('char[64]', {0})
local function ptr2key(ptr)
ffi.C.snprintf(s, ffi.sizeof(s), '%p', ffi.cast('void *', ptr))
return ffi.string(s)
end
local module = {
cimport = cimport,
cppimport = cppimport,
@ -808,6 +833,9 @@ local module = {
kvi_size = kvi_size,
kvi_init = kvi_init,
kvi_new = kvi_new,
make_enum_conv_tab = make_enum_conv_tab,
ptr2addr = ptr2addr,
ptr2key = ptr2key,
}
return function()
return module

View File

@ -0,0 +1,887 @@
local helpers = require('test.unit.helpers')(after_each)
local viml_helpers = require('test.unit.viml.helpers')
local itp = helpers.gen_itp(it)
local make_enum_conv_tab = helpers.make_enum_conv_tab
local child_call_once = helpers.child_call_once
local conv_enum = helpers.conv_enum
local ptr2key = helpers.ptr2key
local cimport = helpers.cimport
local ffi = helpers.ffi
local eq = helpers.eq
local pline2lua = viml_helpers.pline2lua
local new_pstate = viml_helpers.new_pstate
local intchar2lua = viml_helpers.intchar2lua
local pstate_set_str = viml_helpers.pstate_set_str
local lib = cimport('./src/nvim/viml/parser/expressions.h')
local east_node_type_tab
make_enum_conv_tab(lib, {
'kExprNodeMissing',
'kExprNodeOpMissing',
'kExprNodeTernary',
'kExprNodeRegister',
'kExprNodeSubscript',
'kExprNodeListLiteral',
'kExprNodeUnaryPlus',
'kExprNodeBinaryPlus',
'kExprNodeNested',
'kExprNodeCall',
'kExprNodePlainIdentifier',
'kExprNodeComplexIdentifier',
}, 'kExprNode', function(ret) east_node_type_tab = ret end)
local function conv_east_node_type(typ)
return conv_enum(east_node_type_tab, typ)
end
local eastnodelist2lua
local function eastnode2lua(pstate, eastnode, checked_nodes)
local key = ptr2key(eastnode)
if checked_nodes[key] then
checked_nodes[key].duplicate_key = key
return { duplicate = key }
end
local typ = conv_east_node_type(eastnode.type)
local ret = {}
checked_nodes[key] = ret
ret.children = eastnodelist2lua(pstate, eastnode.children, checked_nodes)
local str = pstate_set_str(pstate, eastnode.start, eastnode.len)
local ret_str
if str.error then
ret_str = 'error:' .. str.error
else
ret_str = ('%u:%u:%s'):format(str.start.line, str.start.col, str.str)
end
if typ == 'Register' then
typ = typ .. ('(name=%s)'):format(
tostring(intchar2lua(eastnode.data.reg.name)))
end
ret_str = typ .. ':' .. ret_str
local can_simplify = true
for k, v in pairs(ret) do
can_simplify = false
end
if can_simplify then
ret = ret_str
else
ret[1] = ret_str
end
return ret
end
eastnodelist2lua = function(pstate, eastnode, checked_nodes)
local ret = {}
while eastnode ~= nil do
ret[#ret + 1] = eastnode2lua(pstate, eastnode, checked_nodes)
eastnode = eastnode.next
end
if #ret == 0 then
ret = nil
end
return ret
end
local function east2lua(pstate, east)
local checked_nodes = {}
return {
err = (not east.correct) and {
msg = ffi.string(east.err.msg),
arg = ('%u:%s'):format(
tonumber(east.err.arg_len),
ffi.string(east.err.arg, east.err.arg_len)),
} or nil,
ast = eastnodelist2lua(pstate, east.root, checked_nodes),
}
end
local function phl2lua(pstate)
local ret = {}
for i = 0, (tonumber(pstate.colors.size) - 1) do
local chunk = pstate.colors.items[i]
local chunk_tbl = pstate_set_str(
pstate, chunk.start, chunk.end_col - chunk.start.col, {
group = ffi.string(chunk.group),
})
chunk_str = ('%s:%u:%u:%s'):format(
chunk_tbl.group,
chunk_tbl.start.line,
chunk_tbl.start.col,
chunk_tbl.str)
ret[i + 1] = chunk_str
end
return ret
end
child_call_once(function()
assert:set_parameter('TableFormatLevel', 1000000)
end)
describe('Expressions parser', function()
itp('works', function()
local function check_parsing(str, flags, exp_ast, exp_highlighting_fs)
local pstate = new_pstate({str})
local east = lib.viml_pexpr_parse(pstate, flags)
local ast = east2lua(pstate, east)
eq(exp_ast, ast)
if exp_highlighting_fs then
local exp_highlighting = {}
local next_col = 0
for i, h in ipairs(exp_highlighting_fs) do
exp_highlighting[i], next_col = h(next_col)
end
eq(exp_highlighting, phl2lua(pstate))
end
end
local function hl(group, str, shift)
return function(next_col)
local col = next_col + (shift or 0)
return (('%s:%u:%u:%s'):format(
'NVim' .. group,
0,
col,
str)), (col + #str)
end
end
check_parsing('@a', 0, {
ast = {
'Register(name=a):0:0:@a',
},
}, {
hl('Register', '@a'),
})
check_parsing('+@a', 0, {
ast = {
{
'UnaryPlus:0:0:+',
children = {
'Register(name=a):0:1:@a',
},
},
},
}, {
hl('UnaryPlus', '+'),
hl('Register', '@a'),
})
check_parsing('@a+@b', 0, {
ast = {
{
'BinaryPlus:0:2:+',
children = {
'Register(name=a):0:0:@a',
'Register(name=b):0:3:@b',
},
},
},
}, {
hl('Register', '@a'),
hl('BinaryPlus', '+'),
hl('Register', '@b'),
})
check_parsing('@a+@b+@c', 0, {
ast = {
{
'BinaryPlus:0:5:+',
children = {
{
'BinaryPlus:0:2:+',
children = {
'Register(name=a):0:0:@a',
'Register(name=b):0:3:@b',
},
},
'Register(name=c):0:6:@c',
},
},
},
}, {
hl('Register', '@a'),
hl('BinaryPlus', '+'),
hl('Register', '@b'),
hl('BinaryPlus', '+'),
hl('Register', '@c'),
})
check_parsing('+@a+@b', 0, {
ast = {
{
'BinaryPlus:0:3:+',
children = {
{
'UnaryPlus:0:0:+',
children = {
'Register(name=a):0:1:@a',
},
},
'Register(name=b):0:4:@b',
},
},
},
}, {
hl('UnaryPlus', '+'),
hl('Register', '@a'),
hl('BinaryPlus', '+'),
hl('Register', '@b'),
})
check_parsing('+@a++@b', 0, {
ast = {
{
'BinaryPlus:0:3:+',
children = {
{
'UnaryPlus:0:0:+',
children = {
'Register(name=a):0:1:@a',
},
},
{
'UnaryPlus:0:4:+',
children = {
'Register(name=b):0:5:@b',
},
},
},
},
},
}, {
hl('UnaryPlus', '+'),
hl('Register', '@a'),
hl('BinaryPlus', '+'),
hl('UnaryPlus', '+'),
hl('Register', '@b'),
})
check_parsing('@a@b', 0, {
ast = {
{
'OpMissing:0:2:',
children = {
'Register(name=a):0:0:@a',
'Register(name=b):0:2:@b',
},
},
},
err = {
arg = '2:@b',
msg = 'E15: Missing operator: %.*s',
},
}, {
hl('Register', '@a'),
hl('InvalidRegister', '@b'),
})
check_parsing(' @a \t @b', 0, {
ast = {
{
'OpMissing:0:3:',
children = {
'Register(name=a):0:0: @a',
'Register(name=b):0:3: \t @b',
},
},
},
err = {
arg = '2:@b',
msg = 'E15: Missing operator: %.*s',
},
}, {
hl('Register', '@a', 1),
hl('InvalidSpacing', ' \t '),
hl('Register', '@b'),
})
check_parsing('+', 0, {
ast = {
'UnaryPlus:0:0:+',
},
err = {
arg = '0:',
msg = 'E15: Expected value: %.*s',
},
}, {
hl('UnaryPlus', '+'),
})
check_parsing(' +', 0, {
ast = {
'UnaryPlus:0:0: +',
},
err = {
arg = '0:',
msg = 'E15: Expected value: %.*s',
},
}, {
hl('UnaryPlus', '+', 1),
})
check_parsing('@a+ ', 0, {
ast = {
{
'BinaryPlus:0:2:+',
children = {
'Register(name=a):0:0:@a',
},
},
},
err = {
arg = '0:',
msg = 'E15: Expected value: %.*s',
},
}, {
hl('Register', '@a'),
hl('BinaryPlus', '+'),
})
check_parsing('(@a)', 0, {
ast = {
{
'Nested:0:0:(',
children = {
'Register(name=a):0:1:@a',
},
},
},
}, {
hl('NestingParenthesis', '('),
hl('Register', '@a'),
hl('NestingParenthesis', ')'),
})
check_parsing('()', 0, {
ast = {
{
'Nested:0:0:(',
children = {
'Missing:0:1:',
},
},
},
err = {
arg = '1:)',
msg = 'E15: Expected value: %.*s',
},
}, {
hl('NestingParenthesis', '('),
hl('InvalidNestingParenthesis', ')'),
})
check_parsing(')', 0, {
ast = {
{
'Nested:0:0:',
children = {
'Missing:0:0:',
},
},
},
err = {
arg = '1:)',
msg = 'E15: Expected value: %.*s',
},
}, {
hl('InvalidNestingParenthesis', ')'),
})
check_parsing('+)', 0, {
ast = {
{
'Nested:0:1:',
children = {
{
'UnaryPlus:0:0:+',
children = {
'Missing:0:1:',
},
},
},
},
},
err = {
arg = '1:)',
msg = 'E15: Expected value: %.*s',
},
}, {
hl('UnaryPlus', '+'),
hl('InvalidNestingParenthesis', ')'),
})
check_parsing('+@a(@b)', 0, {
ast = {
{
'UnaryPlus:0:0:+',
children = {
{
'Call:0:3:(',
children = {
'Register(name=a):0:1:@a',
'Register(name=b):0:4:@b',
},
},
},
},
},
}, {
hl('UnaryPlus', '+'),
hl('Register', '@a'),
hl('CallingParenthesis', '('),
hl('Register', '@b'),
hl('CallingParenthesis', ')'),
})
check_parsing('@a+@b(@c)', 0, {
ast = {
{
'BinaryPlus:0:2:+',
children = {
'Register(name=a):0:0:@a',
{
'Call:0:5:(',
children = {
'Register(name=b):0:3:@b',
'Register(name=c):0:6:@c',
},
},
},
},
},
}, {
hl('Register', '@a'),
hl('BinaryPlus', '+'),
hl('Register', '@b'),
hl('CallingParenthesis', '('),
hl('Register', '@c'),
hl('CallingParenthesis', ')'),
})
check_parsing('@a()', 0, {
ast = {
{
'Call:0:2:(',
children = {
'Register(name=a):0:0:@a',
},
},
},
}, {
hl('Register', '@a'),
hl('CallingParenthesis', '('),
hl('CallingParenthesis', ')'),
})
check_parsing('@a ()', 0, {
ast = {
{
'OpMissing:0:2:',
children = {
'Register(name=a):0:0:@a',
{
'Nested:0:2: (',
children = {
'Missing:0:4:',
},
},
},
},
},
err = {
arg = '2:()',
msg = 'E15: Missing operator: %.*s',
},
}, {
hl('Register', '@a'),
hl('InvalidSpacing', ' '),
hl('NestingParenthesis', '('),
hl('InvalidNestingParenthesis', ')'),
})
check_parsing(
'@a + (@b)', 0, {
ast = {
{
'BinaryPlus:0:2: +',
children = {
'Register(name=a):0:0:@a',
{
'Nested:0:4: (',
children = {
'Register(name=b):0:6:@b',
},
},
},
},
},
}, {
hl('Register', '@a'),
hl('BinaryPlus', '+', 1),
hl('NestingParenthesis', '(', 1),
hl('Register', '@b'),
hl('NestingParenthesis', ')'),
})
check_parsing(
'@a + (+@b)', 0, {
ast = {
{
'BinaryPlus:0:2: +',
children = {
'Register(name=a):0:0:@a',
{
'Nested:0:4: (',
children = {
{
'UnaryPlus:0:6:+',
children = {
'Register(name=b):0:7:@b',
},
},
},
},
},
},
},
}, {
hl('Register', '@a'),
hl('BinaryPlus', '+', 1),
hl('NestingParenthesis', '(', 1),
hl('UnaryPlus', '+'),
hl('Register', '@b'),
hl('NestingParenthesis', ')'),
})
check_parsing(
'@a + (@b + @c)', 0, {
ast = {
{
'BinaryPlus:0:2: +',
children = {
'Register(name=a):0:0:@a',
{
'Nested:0:4: (',
children = {
{
'BinaryPlus:0:8: +',
children = {
'Register(name=b):0:6:@b',
'Register(name=c):0:10: @c',
},
},
},
},
},
},
},
}, {
hl('Register', '@a'),
hl('BinaryPlus', '+', 1),
hl('NestingParenthesis', '(', 1),
hl('Register', '@b'),
hl('BinaryPlus', '+', 1),
hl('Register', '@c', 1),
hl('NestingParenthesis', ')'),
})
check_parsing('(@a)+@b', 0, {
ast = {
{
'BinaryPlus:0:4:+',
children = {
{
'Nested:0:0:(',
children = {
'Register(name=a):0:1:@a',
},
},
'Register(name=b):0:5:@b',
},
},
},
}, {
hl('NestingParenthesis', '('),
hl('Register', '@a'),
hl('NestingParenthesis', ')'),
hl('BinaryPlus', '+'),
hl('Register', '@b'),
})
check_parsing('@a+(@b)(@c)', 0, {
-- 01234567890
ast = {
{
'BinaryPlus:0:2:+',
children = {
'Register(name=a):0:0:@a',
{
'Call:0:7:(',
children = {
{
'Nested:0:3:(',
children = { 'Register(name=b):0:4:@b' },
},
'Register(name=c):0:8:@c',
},
},
},
},
},
}, {
hl('Register', '@a'),
hl('BinaryPlus', '+'),
hl('NestingParenthesis', '('),
hl('Register', '@b'),
hl('NestingParenthesis', ')'),
hl('CallingParenthesis', '('),
hl('Register', '@c'),
hl('CallingParenthesis', ')'),
})
check_parsing('@a+((@b))(@c)', 0, {
-- 01234567890123456890123456789
-- 0 1 2
ast = {
{
'BinaryPlus:0:2:+',
children = {
'Register(name=a):0:0:@a',
{
'Call:0:9:(',
children = {
{
'Nested:0:3:(',
children = {
{
'Nested:0:4:(',
children = { 'Register(name=b):0:5:@b' }
},
},
},
'Register(name=c):0:10:@c',
},
},
},
},
},
}, {
hl('Register', '@a'),
hl('BinaryPlus', '+'),
hl('NestingParenthesis', '('),
hl('NestingParenthesis', '('),
hl('Register', '@b'),
hl('NestingParenthesis', ')'),
hl('NestingParenthesis', ')'),
hl('CallingParenthesis', '('),
hl('Register', '@c'),
hl('CallingParenthesis', ')'),
})
check_parsing('@a+((@b))+@c', 0, {
-- 01234567890123456890123456789
-- 0 1 2
ast = {
{
'BinaryPlus:0:9:+',
children = {
{
'BinaryPlus:0:2:+',
children = {
'Register(name=a):0:0:@a',
{
'Nested:0:3:(',
children = {
{
'Nested:0:4:(',
children = { 'Register(name=b):0:5:@b' }
},
},
},
},
},
'Register(name=c):0:10:@c',
},
},
},
}, {
hl('Register', '@a'),
hl('BinaryPlus', '+'),
hl('NestingParenthesis', '('),
hl('NestingParenthesis', '('),
hl('Register', '@b'),
hl('NestingParenthesis', ')'),
hl('NestingParenthesis', ')'),
hl('BinaryPlus', '+'),
hl('Register', '@c'),
})
check_parsing(
'@a + (@b + @c) + @d(@e) + (+@f) + ((+@g(@h))(@j)(@k))(@l)', 0, {--[[
| | | | | | | | || | | || | | ||| || || || ||
000000000011111111112222222222333333333344444444445555555
012345678901234567890123456789012345678901234567890123456
]]
ast = {{
'BinaryPlus:0:31: +',
children = {
{
'BinaryPlus:0:23: +',
children = {
{
'BinaryPlus:0:14: +',
children = {
{
'BinaryPlus:0:2: +',
children = {
'Register(name=a):0:0:@a',
{
'Nested:0:4: (',
children = {
{
'BinaryPlus:0:8: +',
children = {
'Register(name=b):0:6:@b',
'Register(name=c):0:10: @c',
},
},
},
},
},
},
{
'Call:0:19:(',
children = {
'Register(name=d):0:16: @d',
'Register(name=e):0:20:@e',
},
},
},
},
{
'Nested:0:25: (',
children = {
{
'UnaryPlus:0:27:+',
children = {
'Register(name=f):0:28:@f',
},
},
},
},
},
},
{
'Call:0:53:(',
children = {
{
'Nested:0:33: (',
children = {
{
'Call:0:48:(',
children = {
{
'Call:0:44:(',
children = {
{
'Nested:0:35:(',
children = {
{
'UnaryPlus:0:36:+',
children = {
{
'Call:0:39:(',
children = {
'Register(name=g):0:37:@g',
'Register(name=h):0:40:@h',
},
},
},
},
},
},
'Register(name=j):0:45:@j',
},
},
'Register(name=k):0:49:@k',
},
},
},
},
'Register(name=l):0:54:@l',
},
},
},
}},
}, {
hl('Register', '@a'),
hl('BinaryPlus', '+', 1),
hl('NestingParenthesis', '(', 1),
hl('Register', '@b'),
hl('BinaryPlus', '+', 1),
hl('Register', '@c', 1),
hl('NestingParenthesis', ')'),
hl('BinaryPlus', '+', 1),
hl('Register', '@d', 1),
hl('CallingParenthesis', '('),
hl('Register', '@e'),
hl('CallingParenthesis', ')'),
hl('BinaryPlus', '+', 1),
hl('NestingParenthesis', '(', 1),
hl('UnaryPlus', '+'),
hl('Register', '@f'),
hl('NestingParenthesis', ')'),
hl('BinaryPlus', '+', 1),
hl('NestingParenthesis', '(', 1),
hl('NestingParenthesis', '('),
hl('UnaryPlus', '+'),
hl('Register', '@g'),
hl('CallingParenthesis', '('),
hl('Register', '@h'),
hl('CallingParenthesis', ')'),
hl('NestingParenthesis', ')'),
hl('CallingParenthesis', '('),
hl('Register', '@j'),
hl('CallingParenthesis', ')'),
hl('CallingParenthesis', '('),
hl('Register', '@k'),
hl('CallingParenthesis', ')'),
hl('NestingParenthesis', ')'),
hl('CallingParenthesis', '('),
hl('Register', '@l'),
hl('CallingParenthesis', ')'),
})
check_parsing('@a)', 0, {
-- 012
ast = {
{
'Nested:0:2:',
children = {
'Register(name=a):0:0:@a',
},
},
},
err = {
arg = '1:)',
msg = 'E15: Unexpected closing parenthesis: %.*s',
},
}, {
hl('Register', '@a'),
hl('InvalidNestingParenthesis', ')'),
})
check_parsing('(@a', 0, {
-- 012
ast = {
{
'Nested:0:0:(',
children = {
'Register(name=a):0:1:@a',
},
},
},
err = {
arg = '3:(@a',
msg = 'E110: Missing closing parenthesis for nested expression: %.*s',
},
}, {
hl('NestingParenthesis', '('),
hl('Register', '@a'),
})
check_parsing('@a(@b', 0, {
-- 01234
ast = {
{
'Call:0:2:(',
children = {
'Register(name=a):0:0:@a',
'Register(name=b):0:3:@b',
},
},
},
err = {
arg = '3:(@b',
msg = 'E116: Missing closing parenthesis for function call: %.*s',
},
}, {
hl('Register', '@a'),
hl('CallingParenthesis', '('),
hl('Register', '@b'),
})
end)
end)