mirror of
https://github.com/neovim/neovim.git
synced 2024-12-21 19:55:04 -07:00
vim-patch:8.2.2278: falling back to old regexp engine can some patterns
Problem: Falling back to old regexp engine can some patterns.
Solution: Do not fall back once [[:lower:]] or [[:upper:]] is used.
(Christian Brabandt, closes vim/vim#7572)
66c50c5653
This commit is contained in:
parent
7e0d50b16e
commit
a7061359b7
@ -692,6 +692,7 @@ static char_u *regparse; ///< Input-scan pointer.
|
||||
static int prevchr_len; ///< byte length of previous char
|
||||
static int num_complex_braces; ///< Complex \{...} count
|
||||
static int regnpar; ///< () count.
|
||||
static bool wants_nfa; ///< regex should use NFA engine
|
||||
static int regnzpar; ///< \z() count.
|
||||
static int re_has_z; ///< \z item detected
|
||||
static char_u *regcode; ///< Code-emit pointer, or JUST_CALC_SIZE
|
||||
@ -7248,7 +7249,7 @@ regprog_T *vim_regcomp(char_u *expr_arg, int re_flags)
|
||||
// Check for error compiling regexp with initial engine.
|
||||
if (prog == NULL) {
|
||||
#ifdef BT_REGEXP_DEBUG_LOG
|
||||
// Debugging log for NFA.
|
||||
// Debugging log for BT engine.
|
||||
if (regexp_engine != BACKTRACKING_ENGINE) {
|
||||
FILE *f = fopen(BT_REGEXP_DEBUG_LOG_NAME, "a");
|
||||
if (f) {
|
||||
@ -7265,6 +7266,7 @@ regprog_T *vim_regcomp(char_u *expr_arg, int re_flags)
|
||||
// But don't try if an error message was given.
|
||||
if (regexp_engine == AUTOMATIC_ENGINE && !called_emsg) {
|
||||
regexp_engine = BACKTRACKING_ENGINE;
|
||||
report_re_switch(expr);
|
||||
prog = bt_regengine.regcomp(expr, re_flags);
|
||||
}
|
||||
}
|
||||
|
@ -328,6 +328,11 @@ static int *post_start; ///< holds the postfix form of r.e.
|
||||
static int *post_end;
|
||||
static int *post_ptr;
|
||||
|
||||
// Set when the pattern should use the NFA engine.
|
||||
// E.g. [[:upper:]] only allows 8bit characters for BT engine,
|
||||
// while NFA engine handles multibyte characters correctly.
|
||||
static bool wants_nfa;
|
||||
|
||||
static int nstate; ///< Number of states in the NFA. Also used when executing.
|
||||
static int istate; ///< Index in the state vector, used in alloc_state()
|
||||
|
||||
@ -377,6 +382,7 @@ nfa_regcomp_start (
|
||||
post_start = (int *)xmalloc(postfix_size);
|
||||
post_ptr = post_start;
|
||||
post_end = post_start + nstate_max;
|
||||
wants_nfa = false;
|
||||
rex.nfa_has_zend = false;
|
||||
rex.nfa_has_backref = false;
|
||||
|
||||
@ -1618,6 +1624,7 @@ collection:
|
||||
EMIT(NFA_CLASS_GRAPH);
|
||||
break;
|
||||
case CLASS_LOWER:
|
||||
wants_nfa = true;
|
||||
EMIT(NFA_CLASS_LOWER);
|
||||
break;
|
||||
case CLASS_PRINT:
|
||||
@ -1630,6 +1637,7 @@ collection:
|
||||
EMIT(NFA_CLASS_SPACE);
|
||||
break;
|
||||
case CLASS_UPPER:
|
||||
wants_nfa = true;
|
||||
EMIT(NFA_CLASS_UPPER);
|
||||
break;
|
||||
case CLASS_XDIGIT:
|
||||
@ -1998,10 +2006,17 @@ static int nfa_regpiece(void)
|
||||
return OK;
|
||||
}
|
||||
|
||||
// The engine is very inefficient (uses too many states) when the maximum
|
||||
// is much larger than the minimum and when the maximum is large. Bail out
|
||||
// if we can use the other engine.
|
||||
if ((nfa_re_flags & RE_AUTO) && (maxval > 500 || maxval > minval + 200)) {
|
||||
// The engine is very inefficient (uses too many states) when the
|
||||
// maximum is much larger than the minimum and when the maximum is
|
||||
// large. However, when maxval is MAX_LIMIT, it is okay, as this
|
||||
// will emit NFA_STAR.
|
||||
// Bail out if we can use the other engine, but only, when the
|
||||
// pattern does not need the NFA engine like (e.g. [[:upper:]]\{2,\}
|
||||
// does not work with with characters > 8 bit with the BT engine)
|
||||
if ((nfa_re_flags & RE_AUTO)
|
||||
&& (maxval > 500 || maxval > minval + 200)
|
||||
&& (maxval != MAX_LIMIT && minval < 200)
|
||||
&& !wants_nfa) {
|
||||
return FAIL;
|
||||
}
|
||||
|
||||
|
@ -542,6 +542,52 @@ func Test_match_start_of_line_combining()
|
||||
bwipe!
|
||||
endfunc
|
||||
|
||||
" Check that [[:upper:]] matches for automatic engine
|
||||
func Test_match_char_class_upper()
|
||||
new
|
||||
let _engine=®expengine
|
||||
|
||||
" Test 1: [[:upper:]]\{2,\}
|
||||
set regexpengine=0
|
||||
call setline(1, ['05. ПЕСНЯ О ГЕРОЯХ муз. А. Давиденко, М. Коваля и Б. Шехтера ...', '05. PJESNJA O GJEROJAKH mus. A. Davidjenko, M. Kovalja i B. Shjekhtjera ...'])
|
||||
call cursor(1,1)
|
||||
let search_cmd='norm /\<[[:upper:]]\{2,\}\>' .. "\<CR>"
|
||||
exe search_cmd
|
||||
call assert_equal(4, searchcount().total, 'TEST 1')
|
||||
set regexpengine=1
|
||||
exe search_cmd
|
||||
call assert_equal(2, searchcount().total, 'TEST 1')
|
||||
set regexpengine=2
|
||||
exe search_cmd
|
||||
call assert_equal(4, searchcount().total, 'TEST 1')
|
||||
|
||||
" Test 2: [[:upper:]].\+
|
||||
let search_cmd='norm /\<[[:upper:]].\+\>' .. "\<CR>"
|
||||
set regexpengine=0
|
||||
exe search_cmd
|
||||
call assert_equal(2, searchcount().total, 'TEST 2')
|
||||
set regexpengine=1
|
||||
exe search_cmd
|
||||
call assert_equal(1, searchcount().total, 'TEST 2')
|
||||
set regexpengine=2
|
||||
exe search_cmd
|
||||
call assert_equal(2, searchcount().total, 'TEST 2')
|
||||
|
||||
" Test 3: [[:lower:]]\+
|
||||
let search_cmd='norm /\<[[:lower:]]\+\>' .. "\<CR>"
|
||||
set regexpengine=0
|
||||
exe search_cmd
|
||||
call assert_equal(4, searchcount().total, 'TEST 3 lower')
|
||||
set regexpengine=1
|
||||
exe search_cmd
|
||||
call assert_equal(2, searchcount().total, 'TEST 3 lower')
|
||||
set regexpengine=2
|
||||
exe search_cmd
|
||||
call assert_equal(4, searchcount().total, 'TEST 3 lower')
|
||||
|
||||
" clean up
|
||||
let ®expengine=_engine
|
||||
bwipe!
|
||||
endfunc
|
||||
|
||||
" vim: shiftwidth=2 sts=2 expandtab
|
||||
|
Loading…
Reference in New Issue
Block a user