vim-patch:7.4.497 #2295

Problem:    With some regexp patterns the NFA engine uses many states and
            becomes very slow.  To the user it looks like Vim freezes.
Solution:   When the number of states reaches a limit fall back to the old
            engine. (Christian Brabandt)

https://github.com/vim/vim/releases/tag/v7-4-497

Helped-by: David Bürgin <676c7473@gmail.com>
Helped-by: Justin M. Keyes <justinkz@gmail.com>
Helped-by: Scott Prager <splinterofchaos@gmail.com>
This commit is contained in:
David Bürgin 2015-03-28 22:28:37 +01:00 committed by Justin M. Keyes
parent a69e464f70
commit d3bb177f1e
10 changed files with 239 additions and 73 deletions

View File

@ -331,4 +331,16 @@ if(BUSTED_PRG)
-DTEST_TYPE=functional -DTEST_TYPE=functional
-P ${PROJECT_SOURCE_DIR}/cmake/RunTests.cmake -P ${PROJECT_SOURCE_DIR}/cmake/RunTests.cmake
DEPENDS nvim tty-test) DEPENDS nvim tty-test)
add_custom_target(benchmark
COMMAND ${CMAKE_COMMAND}
-DBUSTED_PRG=${BUSTED_PRG}
-DNVIM_PRG=$<TARGET_FILE:nvim>
-DWORKING_DIR=${CMAKE_CURRENT_SOURCE_DIR}
-DBUSTED_OUTPUT_TYPE=${BUSTED_OUTPUT_TYPE}
-DTEST_DIR=${CMAKE_CURRENT_SOURCE_DIR}/test
-DBUILD_DIR=${CMAKE_BINARY_DIR}
-DTEST_TYPE=benchmark
-P ${PROJECT_SOURCE_DIR}/cmake/RunTests.cmake
DEPENDS nvim tty-test)
endif() endif()

View File

@ -89,6 +89,9 @@ test: functionaltest
unittest: | nvim unittest: | nvim
+$(BUILD_CMD) -C build unittest +$(BUILD_CMD) -C build unittest
benchmark: | nvim
+$(BUILD_CMD) -C build benchmark
clean: clean:
+test -d build && $(BUILD_CMD) -C build clean || true +test -d build && $(BUILD_CMD) -C build clean || true
$(MAKE) -C src/nvim/testdir clean $(MAKE) -C src/nvim/testdir clean

View File

@ -5383,6 +5383,10 @@ A jump table for the options with a short description can be found at |Q_op|.
Note that when using the NFA engine and the pattern contains something Note that when using the NFA engine and the pattern contains something
that is not supported the pattern will not match. This is only useful that is not supported the pattern will not match. This is only useful
for debugging the regexp engine. for debugging the regexp engine.
Using automatic selection enables Vim to switch the engine, if the
default engine becomes too costly. E.g., when the NFA engine uses too
many states. This should prevent Vim from hanging on a combination of
a complex pattern with long text.
*'relativenumber'* *'rnu'* *'norelativenumber'* *'nornu'* *'relativenumber'* *'rnu'* *'norelativenumber'* *'nornu'*
'relativenumber' 'rnu' boolean (default off) 'relativenumber' 'rnu' boolean (default off)

View File

@ -6914,13 +6914,8 @@ static regengine_T bt_regengine =
bt_regcomp, bt_regcomp,
bt_regfree, bt_regfree,
bt_regexec_nl, bt_regexec_nl,
bt_regexec_multi bt_regexec_multi,
#ifdef REGEXP_DEBUG (char_u *)""
,(char_u *)""
#endif
#ifdef DEBUG
,NULL
#endif
}; };
@ -6934,21 +6929,14 @@ static regengine_T nfa_regengine =
nfa_regcomp, nfa_regcomp,
nfa_regfree, nfa_regfree,
nfa_regexec_nl, nfa_regexec_nl,
nfa_regexec_multi nfa_regexec_multi,
#ifdef REGEXP_DEBUG (char_u *)""
,(char_u *)""
#endif
#ifdef DEBUG
, NULL
#endif
}; };
/* Which regexp engine to use? Needed for vim_regcomp(). /* Which regexp engine to use? Needed for vim_regcomp().
* Must match with 'regexpengine'. */ * Must match with 'regexpengine'. */
static int regexp_engine = 0; static int regexp_engine = 0;
#define AUTOMATIC_ENGINE 0
#define BACKTRACKING_ENGINE 1
#define NFA_ENGINE 2
#ifdef REGEXP_DEBUG #ifdef REGEXP_DEBUG
static char_u regname[][30] = { static char_u regname[][30] = {
"AUTOMATIC Regexp Engine", "AUTOMATIC Regexp Engine",
@ -6990,10 +6978,8 @@ regprog_T *vim_regcomp(char_u *expr_arg, int re_flags)
regexp_engine = AUTOMATIC_ENGINE; regexp_engine = AUTOMATIC_ENGINE;
} }
} }
#ifdef REGEXP_DEBUG
bt_regengine.expr = expr; bt_regengine.expr = expr;
nfa_regengine.expr = expr; nfa_regengine.expr = expr;
#endif
/* /*
* First try the NFA engine, unless backtracking was requested. * First try the NFA engine, unless backtracking was requested.
@ -7003,11 +6989,12 @@ regprog_T *vim_regcomp(char_u *expr_arg, int re_flags)
else else
prog = bt_regengine.regcomp(expr, re_flags); prog = bt_regengine.regcomp(expr, re_flags);
if (prog == NULL) { /* error compiling regexp with initial engine */ // Check for error compiling regexp with initial engine.
if (prog == NULL) {
#ifdef BT_REGEXP_DEBUG_LOG #ifdef BT_REGEXP_DEBUG_LOG
if (regexp_engine != BACKTRACKING_ENGINE) { /* debugging log for NFA */ // Debugging log for NFA.
FILE *f; if (regexp_engine != BACKTRACKING_ENGINE) {
f = fopen(BT_REGEXP_DEBUG_LOG_NAME, "a"); FILE *f = fopen(BT_REGEXP_DEBUG_LOG_NAME, "a");
if (f) { if (f) {
fprintf(f, "Syntax error in \"%s\"\n", expr); fprintf(f, "Syntax error in \"%s\"\n", expr);
fclose(f); fclose(f);
@ -7016,12 +7003,22 @@ regprog_T *vim_regcomp(char_u *expr_arg, int re_flags)
BT_REGEXP_DEBUG_LOG_NAME); BT_REGEXP_DEBUG_LOG_NAME);
} }
#endif #endif
/* // If the NFA engine failed, try the backtracking engine.
* If the NFA engine failed, the backtracking engine won't work either. // Disabled for now, both engines fail on the same patterns.
* // Re-enable when regcomp() fails when the pattern would work better
if (regexp_engine == AUTOMATIC_ENGINE) // with the other engine.
prog = bt_regengine.regcomp(expr, re_flags); //
*/ // if (regexp_engine == AUTOMATIC_ENGINE) {
// prog = bt_regengine.regcomp(expr, re_flags);
// regexp_engine = BACKTRACKING_ENGINE;
// }
}
if (prog != NULL) {
// Store the info needed to call regcomp() again when the engine turns out
// to be very slow when executing it.
prog->re_engine = regexp_engine;
prog->re_flags = re_flags;
} }
return prog; return prog;
@ -7036,29 +7033,62 @@ void vim_regfree(regprog_T *prog)
prog->engine->regfree(prog); prog->engine->regfree(prog);
} }
/* static void report_re_switch(char_u *pat)
* Match a regexp against a string.
* "rmp->regprog" is a compiled regexp as returned by vim_regcomp().
* Uses curbuf for line count and 'iskeyword'.
*
* Return TRUE if there is a match, FALSE if not.
*/
int
vim_regexec (
regmatch_T *rmp,
char_u *line, /* string to match against */
colnr_T col /* column to start looking for match */
)
{ {
return rmp->regprog->engine->regexec_nl(rmp, line, col, false); if (p_verbose > 0) {
verbose_enter();
MSG_PUTS(_("Switching to backtracking RE engine for pattern: "));
MSG_PUTS(pat);
verbose_leave();
}
} }
/* /// Matches a regexp against a string.
* Like vim_regexec(), but consider a "\n" in "line" to be a line break. /// "rmp->regprog" is a compiled regexp as returned by vim_regcomp().
*/ /// Uses curbuf for line count and 'iskeyword'.
/// When "nl" is true consider a "\n" in "line" to be a line break.
///
/// @param rmp
/// @param line the string to match against
/// @param col the column to start looking for match
/// @param nl
///
/// @return TRUE if there is a match, FALSE if not.
static int vim_regexec_both(regmatch_T *rmp, char_u *line, colnr_T col, bool nl)
{
int result = rmp->regprog->engine->regexec_nl(rmp, line, col, nl);
// NFA engine aborted because it's very slow, use backtracking engine instead.
if (rmp->regprog->re_engine == AUTOMATIC_ENGINE
&& result == NFA_TOO_EXPENSIVE) {
int save_p_re = p_re;
int re_flags = rmp->regprog->re_flags;
char_u *pat = vim_strsave(((nfa_regprog_T *)rmp->regprog)->pattern);
p_re = BACKTRACKING_ENGINE;
vim_regfree(rmp->regprog);
report_re_switch(pat);
rmp->regprog = vim_regcomp(pat, re_flags);
if (rmp->regprog != NULL) {
result = rmp->regprog->engine->regexec_nl(rmp, line, col, nl);
}
free(pat);
p_re = save_p_re;
}
return result;
}
int vim_regexec(regmatch_T *rmp, char_u *line, colnr_T col)
{
return vim_regexec_both(rmp, line, col, false);
}
// Like vim_regexec(), but consider a "\n" in "line" to be a line break.
int vim_regexec_nl(regmatch_T *rmp, char_u *line, colnr_T col) int vim_regexec_nl(regmatch_T *rmp, char_u *line, colnr_T col)
{ {
return rmp->regprog->engine->regexec_nl(rmp, line, col, true); return vim_regexec_both(rmp, line, col, true);
} }
/* /*
@ -7078,5 +7108,28 @@ long vim_regexec_multi(
proftime_T *tm /* timeout limit or NULL */ proftime_T *tm /* timeout limit or NULL */
) )
{ {
return rmp->regprog->engine->regexec_multi(rmp, win, buf, lnum, col, tm); int result = rmp->regprog->engine->regexec_multi(rmp, win, buf, lnum, col,
tm);
// NFA engine aborted because it's very slow, use backtracking engine instead.
if (rmp->regprog->re_engine == AUTOMATIC_ENGINE
&& result == NFA_TOO_EXPENSIVE) {
int save_p_re = p_re;
int re_flags = rmp->regprog->re_flags;
char_u *pat = vim_strsave(((nfa_regprog_T *)rmp->regprog)->pattern);
p_re = BACKTRACKING_ENGINE;
vim_regfree(rmp->regprog);
report_re_switch(pat);
rmp->regprog = vim_regcomp(pat, re_flags);
if (rmp->regprog != NULL) {
result = rmp->regprog->engine->regexec_multi(rmp, win, buf, lnum, col,
tm);
}
free(pat);
p_re = save_p_re;
}
return result;
} }

View File

@ -30,6 +30,16 @@
*/ */
#define NFA_MAX_BRACES 20 #define NFA_MAX_BRACES 20
// In the NFA engine: how many states are allowed.
#define NFA_MAX_STATES 100000
#define NFA_TOO_EXPENSIVE -1
// Which regexp engine to use? Needed for vim_regcomp().
// Must match with 'regexpengine'.
#define AUTOMATIC_ENGINE 0
#define BACKTRACKING_ENGINE 1
#define NFA_ENGINE 2
typedef struct regengine regengine_T; typedef struct regengine regengine_T;
/* /*
@ -38,8 +48,10 @@ typedef struct regengine regengine_T;
* structures are used. See code below. * structures are used. See code below.
*/ */
typedef struct regprog { typedef struct regprog {
regengine_T *engine; regengine_T *engine;
unsigned regflags; unsigned regflags;
unsigned re_engine; ///< Automatic, backtracking or NFA engine.
unsigned re_flags; ///< Second argument for vim_regcomp().
} regprog_T; } regprog_T;
/* /*
@ -48,9 +60,11 @@ typedef struct regprog {
* See regexp.c for an explanation. * See regexp.c for an explanation.
*/ */
typedef struct { typedef struct {
/* These two members implement regprog_T */ // These four members implement regprog_T.
regengine_T *engine; regengine_T *engine;
unsigned regflags; unsigned regflags;
unsigned re_engine;
unsigned re_flags; ///< Second argument for vim_regcomp().
int regstart; int regstart;
char_u reganch; char_u reganch;
@ -78,9 +92,11 @@ struct nfa_state {
* Structure used by the NFA matcher. * Structure used by the NFA matcher.
*/ */
typedef struct { typedef struct {
/* These two members implement regprog_T */ // These four members implement regprog_T.
regengine_T *engine; regengine_T *engine;
unsigned regflags; unsigned regflags;
unsigned re_engine;
unsigned re_flags; ///< Second argument for vim_regcomp().
nfa_state_T *start; /* points into state[] */ nfa_state_T *start; /* points into state[] */
@ -91,9 +107,7 @@ typedef struct {
int has_zend; /* pattern contains \ze */ int has_zend; /* pattern contains \ze */
int has_backref; /* pattern contains \1 .. \9 */ int has_backref; /* pattern contains \1 .. \9 */
int reghasz; int reghasz;
#ifdef DEBUG
char_u *pattern; char_u *pattern;
#endif
int nsubexp; /* number of () */ int nsubexp; /* number of () */
int nstate; int nstate;
nfa_state_T state[1]; /* actually longer.. */ nfa_state_T state[1]; /* actually longer.. */
@ -143,9 +157,7 @@ struct regengine {
int (*regexec_nl)(regmatch_T*, char_u*, colnr_T, bool); int (*regexec_nl)(regmatch_T*, char_u*, colnr_T, bool);
long (*regexec_multi)(regmmatch_T*, win_T*, buf_T*, linenr_T, colnr_T, long (*regexec_multi)(regmmatch_T*, win_T*, buf_T*, linenr_T, colnr_T,
proftime_T*); proftime_T*);
#ifdef DEBUG
char_u *expr; char_u *expr;
#endif
}; };
#endif // NVIM_REGEXP_DEFS_H #endif // NVIM_REGEXP_DEFS_H

View File

@ -2347,7 +2347,6 @@ static void nfa_set_code(int c)
} }
#ifdef REGEXP_DEBUG
static FILE *log_fd; static FILE *log_fd;
/* /*
@ -2468,7 +2467,6 @@ static void nfa_dump(nfa_regprog_T *prog)
} }
} }
#endif /* REGEXP_DEBUG */ #endif /* REGEXP_DEBUG */
#endif /* REGEXP_DEBUG */
/* /*
* Parse r.e. @expr and convert it into postfix form. * Parse r.e. @expr and convert it into postfix form.
@ -4908,6 +4906,12 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, regsubs_T *subm
nextlist->n = 0; /* clear nextlist */ nextlist->n = 0; /* clear nextlist */
nextlist->has_pim = FALSE; nextlist->has_pim = FALSE;
++nfa_listid; ++nfa_listid;
if (prog->re_engine == AUTOMATIC_ENGINE && nfa_listid >= NFA_MAX_STATES) {
// Too many states, retry with old engine.
nfa_match = NFA_TOO_EXPENSIVE;
goto theend;
}
thislist->id = nfa_listid; thislist->id = nfa_listid;
nextlist->id = nfa_listid + 1; nextlist->id = nfa_listid + 1;
@ -5082,6 +5086,10 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, regsubs_T *subm
*/ */
result = recursive_regmatch(t->state, NULL, prog, result = recursive_regmatch(t->state, NULL, prog,
submatch, m, &listids); submatch, m, &listids);
if (result == NFA_TOO_EXPENSIVE) {
nfa_match = result;
goto theend;
}
/* for \@! and \@<! it is a match when the result is /* for \@! and \@<! it is a match when the result is
* FALSE */ * FALSE */
@ -5180,6 +5188,10 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, regsubs_T *subm
/* First try matching the pattern. */ /* First try matching the pattern. */
result = recursive_regmatch(t->state, NULL, prog, result = recursive_regmatch(t->state, NULL, prog,
submatch, m, &listids); submatch, m, &listids);
if (result == NFA_TOO_EXPENSIVE) {
nfa_match = result;
goto theend;
}
if (result) { if (result) {
int bytelen; int bytelen;
@ -6019,6 +6031,7 @@ nextchar:
log_fd = NULL; log_fd = NULL;
#endif #endif
theend:
/* Free memory */ /* Free memory */
free(list[0].t); free(list[0].t);
free(list[1].t); free(list[1].t);
@ -6068,8 +6081,12 @@ static long nfa_regtry(nfa_regprog_T *prog, colnr_T col)
clear_sub(&subs.synt); clear_sub(&subs.synt);
clear_sub(&m.synt); clear_sub(&m.synt);
if (nfa_regmatch(prog, start, &subs, &m) == FALSE) int result = nfa_regmatch(prog, start, &subs, &m);
if (result == FALSE) {
return 0; return 0;
} else if (result == NFA_TOO_EXPENSIVE) {
return result;
}
cleanup_subexpr(); cleanup_subexpr();
if (REG_MULTI) { if (REG_MULTI) {
@ -6186,9 +6203,7 @@ nfa_regexec_both (
nfa_nsubexpr = prog->nsubexp; nfa_nsubexpr = prog->nsubexp;
nfa_listid = 1; nfa_listid = 1;
nfa_alt_listid = 2; nfa_alt_listid = 2;
#ifdef REGEXP_DEBUG
nfa_regengine.expr = prog->pattern; nfa_regengine.expr = prog->pattern;
#endif
if (prog->reganch && col > 0) if (prog->reganch && col > 0)
return 0L; return 0L;
@ -6228,9 +6243,7 @@ nfa_regexec_both (
retval = nfa_regtry(prog, col); retval = nfa_regtry(prog, col);
#ifdef REGEXP_DEBUG
nfa_regengine.expr = NULL; nfa_regengine.expr = NULL;
#endif
theend: theend:
return retval; return retval;
@ -6248,9 +6261,7 @@ static regprog_T *nfa_regcomp(char_u *expr, int re_flags)
if (expr == NULL) if (expr == NULL)
return NULL; return NULL;
#ifdef REGEXP_DEBUG
nfa_regengine.expr = expr; nfa_regengine.expr = expr;
#endif
init_class_tab(); init_class_tab();
@ -6325,10 +6336,8 @@ static regprog_T *nfa_regcomp(char_u *expr, int re_flags)
#endif #endif
/* Remember whether this pattern has any \z specials in it. */ /* Remember whether this pattern has any \z specials in it. */
prog->reghasz = re_has_z; prog->reghasz = re_has_z;
#ifdef REGEXP_DEBUG
prog->pattern = vim_strsave(expr); prog->pattern = vim_strsave(expr);
nfa_regengine.expr = NULL; nfa_regengine.expr = NULL;
#endif
out: out:
free(post_start); free(post_start);
@ -6342,9 +6351,7 @@ fail:
#ifdef REGEXP_DEBUG #ifdef REGEXP_DEBUG
nfa_postfix_dump(expr, FAIL); nfa_postfix_dump(expr, FAIL);
#endif #endif
#ifdef REGEXP_DEBUG
nfa_regengine.expr = NULL; nfa_regengine.expr = NULL;
#endif
goto out; goto out;
} }
@ -6355,9 +6362,7 @@ static void nfa_regfree(regprog_T *prog)
{ {
if (prog != NULL) { if (prog != NULL) {
free(((nfa_regprog_T *)prog)->match_text); free(((nfa_regprog_T *)prog)->match_text);
#ifdef REGEXP_DEBUG
free(((nfa_regprog_T *)prog)->pattern); free(((nfa_regprog_T *)prog)->pattern);
#endif
free(prog); free(prog);
} }
} }

View File

@ -243,7 +243,7 @@ static int included_patches[] = {
500, 500,
499, 499,
//498 NA //498 NA
//497, 497,
//496 NA //496 NA
//495 NA //495 NA
494, 494,

View File

@ -0,0 +1,67 @@
-- Test for benchmarking RE engine.
local helpers = require('test.functional.helpers')
local feed, insert, source = helpers.feed, helpers.insert, helpers.source
local clear, execute, wait = helpers.clear, helpers.execute, helpers.wait
-- Temporary file for gathering benchmarking results for each regexp engine.
local result_file = 'benchmark.out'
-- Fixture containing an HTML fragment that can make a search appear to freeze.
local sample_file = 'test/benchmark/samples/re.freeze.txt'
-- Vim script code that does both the work and the benchmarking of that work.
local measure_cmd =
[[call Measure(%d, ']] .. sample_file .. [[', '\s\+\%%#\@<!$', '+5')]]
local measure_script = [[
func! Measure(re, file, pattern, arg)
let sstart=reltime()
execute 'set re=' . a:re
execute 'split' a:arg a:file
call search(a:pattern, '', '', 10000)
q!
$put =printf('file: %s, re: %d, time: %s', a:file, a:re, reltimestr(reltime(sstart)))
endfunc]]
describe('regexp search', function()
-- The test cases rely on a small Vim script, which we source here, and also
-- on a temporary result file, which we prepare and write to disk.
setup(function()
clear()
source(measure_script)
insert('" Benchmark_results:')
execute('write! ' .. result_file)
end)
-- At the end of the test run we just print the contents of the result file
-- for human inspection and promptly delete the file.
teardown(function()
print ''
for line in io.lines(result_file) do
print(line)
end
os.remove(result_file)
end)
it('is working with regexpengine=0', function()
local regexpengine = 0
execute(string.format(measure_cmd, regexpengine))
execute('write')
wait()
end)
it('is working with regexpengine=1', function()
local regexpengine = 1
execute(string.format(measure_cmd, regexpengine))
execute('write')
wait()
end)
it('is working with regexpengine=2', function()
local regexpengine = 2
execute(string.format(measure_cmd, regexpengine))
execute('write')
wait()
end)
end)

View File

@ -0,0 +1,4 @@
-- Modules loaded here will not be cleared and reloaded by Busted.
-- Busted started doing this to help provide more isolation. See issue #62
-- for more information about this.
local helpers = require('test.functional.helpers')

File diff suppressed because one or more lines are too long