vim-patch:7.4.497 #2295

Problem:    With some regexp patterns the NFA engine uses many states and
            becomes very slow.  To the user it looks like Vim freezes.
Solution:   When the number of states reaches a limit fall back to the old
            engine. (Christian Brabandt)

https://github.com/vim/vim/releases/tag/v7-4-497

Helped-by: David Bürgin <676c7473@gmail.com>
Helped-by: Justin M. Keyes <justinkz@gmail.com>
Helped-by: Scott Prager <splinterofchaos@gmail.com>
This commit is contained in:
David Bürgin 2015-03-28 22:28:37 +01:00 committed by Justin M. Keyes
parent a69e464f70
commit d3bb177f1e
10 changed files with 239 additions and 73 deletions

View File

@ -331,4 +331,16 @@ if(BUSTED_PRG)
-DTEST_TYPE=functional
-P ${PROJECT_SOURCE_DIR}/cmake/RunTests.cmake
DEPENDS nvim tty-test)
add_custom_target(benchmark
COMMAND ${CMAKE_COMMAND}
-DBUSTED_PRG=${BUSTED_PRG}
-DNVIM_PRG=$<TARGET_FILE:nvim>
-DWORKING_DIR=${CMAKE_CURRENT_SOURCE_DIR}
-DBUSTED_OUTPUT_TYPE=${BUSTED_OUTPUT_TYPE}
-DTEST_DIR=${CMAKE_CURRENT_SOURCE_DIR}/test
-DBUILD_DIR=${CMAKE_BINARY_DIR}
-DTEST_TYPE=benchmark
-P ${PROJECT_SOURCE_DIR}/cmake/RunTests.cmake
DEPENDS nvim tty-test)
endif()

View File

@ -89,6 +89,9 @@ test: functionaltest
unittest: | nvim
+$(BUILD_CMD) -C build unittest
benchmark: | nvim
+$(BUILD_CMD) -C build benchmark
clean:
+test -d build && $(BUILD_CMD) -C build clean || true
$(MAKE) -C src/nvim/testdir clean

View File

@ -5383,6 +5383,10 @@ A jump table for the options with a short description can be found at |Q_op|.
Note that when using the NFA engine and the pattern contains something
that is not supported the pattern will not match. This is only useful
for debugging the regexp engine.
Using automatic selection enables Vim to switch the engine, if the
default engine becomes too costly. E.g., when the NFA engine uses too
many states. This should prevent Vim from hanging on a combination of
a complex pattern with long text.
*'relativenumber'* *'rnu'* *'norelativenumber'* *'nornu'*
'relativenumber' 'rnu' boolean (default off)

View File

@ -6914,13 +6914,8 @@ static regengine_T bt_regengine =
bt_regcomp,
bt_regfree,
bt_regexec_nl,
bt_regexec_multi
#ifdef REGEXP_DEBUG
,(char_u *)""
#endif
#ifdef DEBUG
,NULL
#endif
bt_regexec_multi,
(char_u *)""
};
@ -6934,21 +6929,14 @@ static regengine_T nfa_regengine =
nfa_regcomp,
nfa_regfree,
nfa_regexec_nl,
nfa_regexec_multi
#ifdef REGEXP_DEBUG
,(char_u *)""
#endif
#ifdef DEBUG
, NULL
#endif
nfa_regexec_multi,
(char_u *)""
};
/* Which regexp engine to use? Needed for vim_regcomp().
* Must match with 'regexpengine'. */
static int regexp_engine = 0;
#define AUTOMATIC_ENGINE 0
#define BACKTRACKING_ENGINE 1
#define NFA_ENGINE 2
#ifdef REGEXP_DEBUG
static char_u regname[][30] = {
"AUTOMATIC Regexp Engine",
@ -6990,10 +6978,8 @@ regprog_T *vim_regcomp(char_u *expr_arg, int re_flags)
regexp_engine = AUTOMATIC_ENGINE;
}
}
#ifdef REGEXP_DEBUG
bt_regengine.expr = expr;
nfa_regengine.expr = expr;
#endif
/*
* First try the NFA engine, unless backtracking was requested.
@ -7003,11 +6989,12 @@ regprog_T *vim_regcomp(char_u *expr_arg, int re_flags)
else
prog = bt_regengine.regcomp(expr, re_flags);
if (prog == NULL) { /* error compiling regexp with initial engine */
// Check for error compiling regexp with initial engine.
if (prog == NULL) {
#ifdef BT_REGEXP_DEBUG_LOG
if (regexp_engine != BACKTRACKING_ENGINE) { /* debugging log for NFA */
FILE *f;
f = fopen(BT_REGEXP_DEBUG_LOG_NAME, "a");
// Debugging log for NFA.
if (regexp_engine != BACKTRACKING_ENGINE) {
FILE *f = fopen(BT_REGEXP_DEBUG_LOG_NAME, "a");
if (f) {
fprintf(f, "Syntax error in \"%s\"\n", expr);
fclose(f);
@ -7016,12 +7003,22 @@ regprog_T *vim_regcomp(char_u *expr_arg, int re_flags)
BT_REGEXP_DEBUG_LOG_NAME);
}
#endif
/*
* If the NFA engine failed, the backtracking engine won't work either.
*
if (regexp_engine == AUTOMATIC_ENGINE)
prog = bt_regengine.regcomp(expr, re_flags);
*/
// If the NFA engine failed, try the backtracking engine.
// Disabled for now, both engines fail on the same patterns.
// Re-enable when regcomp() fails when the pattern would work better
// with the other engine.
//
// if (regexp_engine == AUTOMATIC_ENGINE) {
// prog = bt_regengine.regcomp(expr, re_flags);
// regexp_engine = BACKTRACKING_ENGINE;
// }
}
if (prog != NULL) {
// Store the info needed to call regcomp() again when the engine turns out
// to be very slow when executing it.
prog->re_engine = regexp_engine;
prog->re_flags = re_flags;
}
return prog;
@ -7036,29 +7033,62 @@ void vim_regfree(regprog_T *prog)
prog->engine->regfree(prog);
}
/*
* Match a regexp against a string.
* "rmp->regprog" is a compiled regexp as returned by vim_regcomp().
* Uses curbuf for line count and 'iskeyword'.
*
* Return TRUE if there is a match, FALSE if not.
*/
int
vim_regexec (
regmatch_T *rmp,
char_u *line, /* string to match against */
colnr_T col /* column to start looking for match */
)
static void report_re_switch(char_u *pat)
{
return rmp->regprog->engine->regexec_nl(rmp, line, col, false);
if (p_verbose > 0) {
verbose_enter();
MSG_PUTS(_("Switching to backtracking RE engine for pattern: "));
MSG_PUTS(pat);
verbose_leave();
}
}
/*
* Like vim_regexec(), but consider a "\n" in "line" to be a line break.
*/
/// Matches a regexp against a string.
/// "rmp->regprog" is a compiled regexp as returned by vim_regcomp().
/// Uses curbuf for line count and 'iskeyword'.
/// When "nl" is true consider a "\n" in "line" to be a line break.
///
/// @param rmp
/// @param line the string to match against
/// @param col the column to start looking for match
/// @param nl
///
/// @return TRUE if there is a match, FALSE if not.
static int vim_regexec_both(regmatch_T *rmp, char_u *line, colnr_T col, bool nl)
{
int result = rmp->regprog->engine->regexec_nl(rmp, line, col, nl);
// NFA engine aborted because it's very slow, use backtracking engine instead.
if (rmp->regprog->re_engine == AUTOMATIC_ENGINE
&& result == NFA_TOO_EXPENSIVE) {
int save_p_re = p_re;
int re_flags = rmp->regprog->re_flags;
char_u *pat = vim_strsave(((nfa_regprog_T *)rmp->regprog)->pattern);
p_re = BACKTRACKING_ENGINE;
vim_regfree(rmp->regprog);
report_re_switch(pat);
rmp->regprog = vim_regcomp(pat, re_flags);
if (rmp->regprog != NULL) {
result = rmp->regprog->engine->regexec_nl(rmp, line, col, nl);
}
free(pat);
p_re = save_p_re;
}
return result;
}
int vim_regexec(regmatch_T *rmp, char_u *line, colnr_T col)
{
return vim_regexec_both(rmp, line, col, false);
}
// Like vim_regexec(), but consider a "\n" in "line" to be a line break.
int vim_regexec_nl(regmatch_T *rmp, char_u *line, colnr_T col)
{
return rmp->regprog->engine->regexec_nl(rmp, line, col, true);
return vim_regexec_both(rmp, line, col, true);
}
/*
@ -7078,5 +7108,28 @@ long vim_regexec_multi(
proftime_T *tm /* timeout limit or NULL */
)
{
return rmp->regprog->engine->regexec_multi(rmp, win, buf, lnum, col, tm);
int result = rmp->regprog->engine->regexec_multi(rmp, win, buf, lnum, col,
tm);
// NFA engine aborted because it's very slow, use backtracking engine instead.
if (rmp->regprog->re_engine == AUTOMATIC_ENGINE
&& result == NFA_TOO_EXPENSIVE) {
int save_p_re = p_re;
int re_flags = rmp->regprog->re_flags;
char_u *pat = vim_strsave(((nfa_regprog_T *)rmp->regprog)->pattern);
p_re = BACKTRACKING_ENGINE;
vim_regfree(rmp->regprog);
report_re_switch(pat);
rmp->regprog = vim_regcomp(pat, re_flags);
if (rmp->regprog != NULL) {
result = rmp->regprog->engine->regexec_multi(rmp, win, buf, lnum, col,
tm);
}
free(pat);
p_re = save_p_re;
}
return result;
}

View File

@ -30,6 +30,16 @@
*/
#define NFA_MAX_BRACES 20
// In the NFA engine: how many states are allowed.
#define NFA_MAX_STATES 100000
#define NFA_TOO_EXPENSIVE -1
// Which regexp engine to use? Needed for vim_regcomp().
// Must match with 'regexpengine'.
#define AUTOMATIC_ENGINE 0
#define BACKTRACKING_ENGINE 1
#define NFA_ENGINE 2
typedef struct regengine regengine_T;
/*
@ -40,6 +50,8 @@ typedef struct regengine regengine_T;
typedef struct regprog {
regengine_T *engine;
unsigned regflags;
unsigned re_engine; ///< Automatic, backtracking or NFA engine.
unsigned re_flags; ///< Second argument for vim_regcomp().
} regprog_T;
/*
@ -48,9 +60,11 @@ typedef struct regprog {
* See regexp.c for an explanation.
*/
typedef struct {
/* These two members implement regprog_T */
// These four members implement regprog_T.
regengine_T *engine;
unsigned regflags;
unsigned re_engine;
unsigned re_flags; ///< Second argument for vim_regcomp().
int regstart;
char_u reganch;
@ -78,9 +92,11 @@ struct nfa_state {
* Structure used by the NFA matcher.
*/
typedef struct {
/* These two members implement regprog_T */
// These four members implement regprog_T.
regengine_T *engine;
unsigned regflags;
unsigned re_engine;
unsigned re_flags; ///< Second argument for vim_regcomp().
nfa_state_T *start; /* points into state[] */
@ -91,9 +107,7 @@ typedef struct {
int has_zend; /* pattern contains \ze */
int has_backref; /* pattern contains \1 .. \9 */
int reghasz;
#ifdef DEBUG
char_u *pattern;
#endif
int nsubexp; /* number of () */
int nstate;
nfa_state_T state[1]; /* actually longer.. */
@ -143,9 +157,7 @@ struct regengine {
int (*regexec_nl)(regmatch_T*, char_u*, colnr_T, bool);
long (*regexec_multi)(regmmatch_T*, win_T*, buf_T*, linenr_T, colnr_T,
proftime_T*);
#ifdef DEBUG
char_u *expr;
#endif
};
#endif // NVIM_REGEXP_DEFS_H

View File

@ -2347,7 +2347,6 @@ static void nfa_set_code(int c)
}
#ifdef REGEXP_DEBUG
static FILE *log_fd;
/*
@ -2468,7 +2467,6 @@ static void nfa_dump(nfa_regprog_T *prog)
}
}
#endif /* REGEXP_DEBUG */
#endif /* REGEXP_DEBUG */
/*
* Parse r.e. @expr and convert it into postfix form.
@ -4908,6 +4906,12 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, regsubs_T *subm
nextlist->n = 0; /* clear nextlist */
nextlist->has_pim = FALSE;
++nfa_listid;
if (prog->re_engine == AUTOMATIC_ENGINE && nfa_listid >= NFA_MAX_STATES) {
// Too many states, retry with old engine.
nfa_match = NFA_TOO_EXPENSIVE;
goto theend;
}
thislist->id = nfa_listid;
nextlist->id = nfa_listid + 1;
@ -5082,6 +5086,10 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, regsubs_T *subm
*/
result = recursive_regmatch(t->state, NULL, prog,
submatch, m, &listids);
if (result == NFA_TOO_EXPENSIVE) {
nfa_match = result;
goto theend;
}
/* for \@! and \@<! it is a match when the result is
* FALSE */
@ -5180,6 +5188,10 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, regsubs_T *subm
/* First try matching the pattern. */
result = recursive_regmatch(t->state, NULL, prog,
submatch, m, &listids);
if (result == NFA_TOO_EXPENSIVE) {
nfa_match = result;
goto theend;
}
if (result) {
int bytelen;
@ -6019,6 +6031,7 @@ nextchar:
log_fd = NULL;
#endif
theend:
/* Free memory */
free(list[0].t);
free(list[1].t);
@ -6068,8 +6081,12 @@ static long nfa_regtry(nfa_regprog_T *prog, colnr_T col)
clear_sub(&subs.synt);
clear_sub(&m.synt);
if (nfa_regmatch(prog, start, &subs, &m) == FALSE)
int result = nfa_regmatch(prog, start, &subs, &m);
if (result == FALSE) {
return 0;
} else if (result == NFA_TOO_EXPENSIVE) {
return result;
}
cleanup_subexpr();
if (REG_MULTI) {
@ -6186,9 +6203,7 @@ nfa_regexec_both (
nfa_nsubexpr = prog->nsubexp;
nfa_listid = 1;
nfa_alt_listid = 2;
#ifdef REGEXP_DEBUG
nfa_regengine.expr = prog->pattern;
#endif
if (prog->reganch && col > 0)
return 0L;
@ -6228,9 +6243,7 @@ nfa_regexec_both (
retval = nfa_regtry(prog, col);
#ifdef REGEXP_DEBUG
nfa_regengine.expr = NULL;
#endif
theend:
return retval;
@ -6248,9 +6261,7 @@ static regprog_T *nfa_regcomp(char_u *expr, int re_flags)
if (expr == NULL)
return NULL;
#ifdef REGEXP_DEBUG
nfa_regengine.expr = expr;
#endif
init_class_tab();
@ -6325,10 +6336,8 @@ static regprog_T *nfa_regcomp(char_u *expr, int re_flags)
#endif
/* Remember whether this pattern has any \z specials in it. */
prog->reghasz = re_has_z;
#ifdef REGEXP_DEBUG
prog->pattern = vim_strsave(expr);
nfa_regengine.expr = NULL;
#endif
out:
free(post_start);
@ -6342,9 +6351,7 @@ fail:
#ifdef REGEXP_DEBUG
nfa_postfix_dump(expr, FAIL);
#endif
#ifdef REGEXP_DEBUG
nfa_regengine.expr = NULL;
#endif
goto out;
}
@ -6355,9 +6362,7 @@ static void nfa_regfree(regprog_T *prog)
{
if (prog != NULL) {
free(((nfa_regprog_T *)prog)->match_text);
#ifdef REGEXP_DEBUG
free(((nfa_regprog_T *)prog)->pattern);
#endif
free(prog);
}
}

View File

@ -243,7 +243,7 @@ static int included_patches[] = {
500,
499,
//498 NA
//497,
497,
//496 NA
//495 NA
494,

View File

@ -0,0 +1,67 @@
-- Test for benchmarking RE engine.
local helpers = require('test.functional.helpers')
local feed, insert, source = helpers.feed, helpers.insert, helpers.source
local clear, execute, wait = helpers.clear, helpers.execute, helpers.wait
-- Temporary file for gathering benchmarking results for each regexp engine.
local result_file = 'benchmark.out'
-- Fixture containing an HTML fragment that can make a search appear to freeze.
local sample_file = 'test/benchmark/samples/re.freeze.txt'
-- Vim script code that does both the work and the benchmarking of that work.
local measure_cmd =
[[call Measure(%d, ']] .. sample_file .. [[', '\s\+\%%#\@<!$', '+5')]]
local measure_script = [[
func! Measure(re, file, pattern, arg)
let sstart=reltime()
execute 'set re=' . a:re
execute 'split' a:arg a:file
call search(a:pattern, '', '', 10000)
q!
$put =printf('file: %s, re: %d, time: %s', a:file, a:re, reltimestr(reltime(sstart)))
endfunc]]
describe('regexp search', function()
-- The test cases rely on a small Vim script, which we source here, and also
-- on a temporary result file, which we prepare and write to disk.
setup(function()
clear()
source(measure_script)
insert('" Benchmark_results:')
execute('write! ' .. result_file)
end)
-- At the end of the test run we just print the contents of the result file
-- for human inspection and promptly delete the file.
teardown(function()
print ''
for line in io.lines(result_file) do
print(line)
end
os.remove(result_file)
end)
it('is working with regexpengine=0', function()
local regexpengine = 0
execute(string.format(measure_cmd, regexpengine))
execute('write')
wait()
end)
it('is working with regexpengine=1', function()
local regexpengine = 1
execute(string.format(measure_cmd, regexpengine))
execute('write')
wait()
end)
it('is working with regexpengine=2', function()
local regexpengine = 2
execute(string.format(measure_cmd, regexpengine))
execute('write')
wait()
end)
end)

View File

@ -0,0 +1,4 @@
-- Modules loaded here will not be cleared and reloaded by Busted.
-- Busted started doing this to help provide more isolation. See issue #62
-- for more information about this.
local helpers = require('test.functional.helpers')

File diff suppressed because one or more lines are too long