From d3bb177f1e1dcbc4104d7bba6f2ac4ebe3ffc9c9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20B=C3=BCrgin?= <676c7473@gmail.com> Date: Sat, 28 Mar 2015 22:28:37 +0100 Subject: [PATCH] vim-patch:7.4.497 #2295 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Problem: With some regexp patterns the NFA engine uses many states and becomes very slow. To the user it looks like Vim freezes. Solution: When the number of states reaches a limit fall back to the old engine. (Christian Brabandt) https://github.com/vim/vim/releases/tag/v7-4-497 Helped-by: David Bürgin <676c7473@gmail.com> Helped-by: Justin M. Keyes Helped-by: Scott Prager --- CMakeLists.txt | 12 ++ Makefile | 3 + runtime/doc/options.txt | 4 + src/nvim/regexp.c | 149 ++++++++++++++++-------- src/nvim/regexp_defs.h | 30 +++-- src/nvim/regexp_nfa.c | 35 +++--- src/nvim/version.c | 2 +- test/benchmark/bench_re_freeze_spec.lua | 67 +++++++++++ test/benchmark/preload.lua | 4 + test/benchmark/samples/re.freeze.txt | 6 + 10 files changed, 239 insertions(+), 73 deletions(-) create mode 100644 test/benchmark/bench_re_freeze_spec.lua create mode 100644 test/benchmark/preload.lua create mode 100644 test/benchmark/samples/re.freeze.txt diff --git a/CMakeLists.txt b/CMakeLists.txt index a026af7a1a..4219c69b62 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -331,4 +331,16 @@ if(BUSTED_PRG) -DTEST_TYPE=functional -P ${PROJECT_SOURCE_DIR}/cmake/RunTests.cmake DEPENDS nvim tty-test) + + add_custom_target(benchmark + COMMAND ${CMAKE_COMMAND} + -DBUSTED_PRG=${BUSTED_PRG} + -DNVIM_PRG=$ + -DWORKING_DIR=${CMAKE_CURRENT_SOURCE_DIR} + -DBUSTED_OUTPUT_TYPE=${BUSTED_OUTPUT_TYPE} + -DTEST_DIR=${CMAKE_CURRENT_SOURCE_DIR}/test + -DBUILD_DIR=${CMAKE_BINARY_DIR} + -DTEST_TYPE=benchmark + -P ${PROJECT_SOURCE_DIR}/cmake/RunTests.cmake + DEPENDS nvim tty-test) endif() diff --git a/Makefile b/Makefile index 1683c558e8..721474a3ec 100644 --- a/Makefile +++ b/Makefile @@ -89,6 +89,9 @@ test: functionaltest unittest: | nvim +$(BUILD_CMD) -C build unittest +benchmark: | nvim + +$(BUILD_CMD) -C build benchmark + clean: +test -d build && $(BUILD_CMD) -C build clean || true $(MAKE) -C src/nvim/testdir clean diff --git a/runtime/doc/options.txt b/runtime/doc/options.txt index 0c964ae519..c0f1888d84 100644 --- a/runtime/doc/options.txt +++ b/runtime/doc/options.txt @@ -5383,6 +5383,10 @@ A jump table for the options with a short description can be found at |Q_op|. Note that when using the NFA engine and the pattern contains something that is not supported the pattern will not match. This is only useful for debugging the regexp engine. + Using automatic selection enables Vim to switch the engine, if the + default engine becomes too costly. E.g., when the NFA engine uses too + many states. This should prevent Vim from hanging on a combination of + a complex pattern with long text. *'relativenumber'* *'rnu'* *'norelativenumber'* *'nornu'* 'relativenumber' 'rnu' boolean (default off) diff --git a/src/nvim/regexp.c b/src/nvim/regexp.c index 8b7033b64b..d5e963c2f8 100644 --- a/src/nvim/regexp.c +++ b/src/nvim/regexp.c @@ -6914,13 +6914,8 @@ static regengine_T bt_regengine = bt_regcomp, bt_regfree, bt_regexec_nl, - bt_regexec_multi -#ifdef REGEXP_DEBUG - ,(char_u *)"" -#endif -#ifdef DEBUG - ,NULL -#endif + bt_regexec_multi, + (char_u *)"" }; @@ -6934,21 +6929,14 @@ static regengine_T nfa_regengine = nfa_regcomp, nfa_regfree, nfa_regexec_nl, - nfa_regexec_multi -#ifdef REGEXP_DEBUG - ,(char_u *)"" -#endif -#ifdef DEBUG - , NULL -#endif + nfa_regexec_multi, + (char_u *)"" }; /* Which regexp engine to use? Needed for vim_regcomp(). * Must match with 'regexpengine'. */ static int regexp_engine = 0; -#define AUTOMATIC_ENGINE 0 -#define BACKTRACKING_ENGINE 1 -#define NFA_ENGINE 2 + #ifdef REGEXP_DEBUG static char_u regname[][30] = { "AUTOMATIC Regexp Engine", @@ -6990,10 +6978,8 @@ regprog_T *vim_regcomp(char_u *expr_arg, int re_flags) regexp_engine = AUTOMATIC_ENGINE; } } -#ifdef REGEXP_DEBUG bt_regengine.expr = expr; nfa_regengine.expr = expr; -#endif /* * First try the NFA engine, unless backtracking was requested. @@ -7003,11 +6989,12 @@ regprog_T *vim_regcomp(char_u *expr_arg, int re_flags) else prog = bt_regengine.regcomp(expr, re_flags); - if (prog == NULL) { /* error compiling regexp with initial engine */ + // Check for error compiling regexp with initial engine. + if (prog == NULL) { #ifdef BT_REGEXP_DEBUG_LOG - if (regexp_engine != BACKTRACKING_ENGINE) { /* debugging log for NFA */ - FILE *f; - f = fopen(BT_REGEXP_DEBUG_LOG_NAME, "a"); + // Debugging log for NFA. + if (regexp_engine != BACKTRACKING_ENGINE) { + FILE *f = fopen(BT_REGEXP_DEBUG_LOG_NAME, "a"); if (f) { fprintf(f, "Syntax error in \"%s\"\n", expr); fclose(f); @@ -7016,12 +7003,22 @@ regprog_T *vim_regcomp(char_u *expr_arg, int re_flags) BT_REGEXP_DEBUG_LOG_NAME); } #endif - /* - * If the NFA engine failed, the backtracking engine won't work either. - * - if (regexp_engine == AUTOMATIC_ENGINE) - prog = bt_regengine.regcomp(expr, re_flags); - */ + // If the NFA engine failed, try the backtracking engine. + // Disabled for now, both engines fail on the same patterns. + // Re-enable when regcomp() fails when the pattern would work better + // with the other engine. + // + // if (regexp_engine == AUTOMATIC_ENGINE) { + // prog = bt_regengine.regcomp(expr, re_flags); + // regexp_engine = BACKTRACKING_ENGINE; + // } + } + + if (prog != NULL) { + // Store the info needed to call regcomp() again when the engine turns out + // to be very slow when executing it. + prog->re_engine = regexp_engine; + prog->re_flags = re_flags; } return prog; @@ -7036,29 +7033,62 @@ void vim_regfree(regprog_T *prog) prog->engine->regfree(prog); } -/* - * Match a regexp against a string. - * "rmp->regprog" is a compiled regexp as returned by vim_regcomp(). - * Uses curbuf for line count and 'iskeyword'. - * - * Return TRUE if there is a match, FALSE if not. - */ -int -vim_regexec ( - regmatch_T *rmp, - char_u *line, /* string to match against */ - colnr_T col /* column to start looking for match */ -) +static void report_re_switch(char_u *pat) { - return rmp->regprog->engine->regexec_nl(rmp, line, col, false); + if (p_verbose > 0) { + verbose_enter(); + MSG_PUTS(_("Switching to backtracking RE engine for pattern: ")); + MSG_PUTS(pat); + verbose_leave(); + } } -/* - * Like vim_regexec(), but consider a "\n" in "line" to be a line break. - */ +/// Matches a regexp against a string. +/// "rmp->regprog" is a compiled regexp as returned by vim_regcomp(). +/// Uses curbuf for line count and 'iskeyword'. +/// When "nl" is true consider a "\n" in "line" to be a line break. +/// +/// @param rmp +/// @param line the string to match against +/// @param col the column to start looking for match +/// @param nl +/// +/// @return TRUE if there is a match, FALSE if not. +static int vim_regexec_both(regmatch_T *rmp, char_u *line, colnr_T col, bool nl) +{ + int result = rmp->regprog->engine->regexec_nl(rmp, line, col, nl); + + // NFA engine aborted because it's very slow, use backtracking engine instead. + if (rmp->regprog->re_engine == AUTOMATIC_ENGINE + && result == NFA_TOO_EXPENSIVE) { + int save_p_re = p_re; + int re_flags = rmp->regprog->re_flags; + char_u *pat = vim_strsave(((nfa_regprog_T *)rmp->regprog)->pattern); + + p_re = BACKTRACKING_ENGINE; + vim_regfree(rmp->regprog); + report_re_switch(pat); + rmp->regprog = vim_regcomp(pat, re_flags); + if (rmp->regprog != NULL) { + result = rmp->regprog->engine->regexec_nl(rmp, line, col, nl); + } + + free(pat); + p_re = save_p_re; + } + + return result; +} + +int vim_regexec(regmatch_T *rmp, char_u *line, colnr_T col) +{ + return vim_regexec_both(rmp, line, col, false); +} + +// Like vim_regexec(), but consider a "\n" in "line" to be a line break. int vim_regexec_nl(regmatch_T *rmp, char_u *line, colnr_T col) { - return rmp->regprog->engine->regexec_nl(rmp, line, col, true); + return vim_regexec_both(rmp, line, col, true); } /* @@ -7078,5 +7108,28 @@ long vim_regexec_multi( proftime_T *tm /* timeout limit or NULL */ ) { - return rmp->regprog->engine->regexec_multi(rmp, win, buf, lnum, col, tm); + int result = rmp->regprog->engine->regexec_multi(rmp, win, buf, lnum, col, + tm); + + // NFA engine aborted because it's very slow, use backtracking engine instead. + if (rmp->regprog->re_engine == AUTOMATIC_ENGINE + && result == NFA_TOO_EXPENSIVE) { + int save_p_re = p_re; + int re_flags = rmp->regprog->re_flags; + char_u *pat = vim_strsave(((nfa_regprog_T *)rmp->regprog)->pattern); + + p_re = BACKTRACKING_ENGINE; + vim_regfree(rmp->regprog); + report_re_switch(pat); + rmp->regprog = vim_regcomp(pat, re_flags); + if (rmp->regprog != NULL) { + result = rmp->regprog->engine->regexec_multi(rmp, win, buf, lnum, col, + tm); + } + + free(pat); + p_re = save_p_re; + } + + return result; } diff --git a/src/nvim/regexp_defs.h b/src/nvim/regexp_defs.h index 1e00f14ac6..6426ee441b 100644 --- a/src/nvim/regexp_defs.h +++ b/src/nvim/regexp_defs.h @@ -30,6 +30,16 @@ */ #define NFA_MAX_BRACES 20 +// In the NFA engine: how many states are allowed. +#define NFA_MAX_STATES 100000 +#define NFA_TOO_EXPENSIVE -1 + +// Which regexp engine to use? Needed for vim_regcomp(). +// Must match with 'regexpengine'. +#define AUTOMATIC_ENGINE 0 +#define BACKTRACKING_ENGINE 1 +#define NFA_ENGINE 2 + typedef struct regengine regengine_T; /* @@ -38,8 +48,10 @@ typedef struct regengine regengine_T; * structures are used. See code below. */ typedef struct regprog { - regengine_T *engine; + regengine_T *engine; unsigned regflags; + unsigned re_engine; ///< Automatic, backtracking or NFA engine. + unsigned re_flags; ///< Second argument for vim_regcomp(). } regprog_T; /* @@ -48,9 +60,11 @@ typedef struct regprog { * See regexp.c for an explanation. */ typedef struct { - /* These two members implement regprog_T */ - regengine_T *engine; + // These four members implement regprog_T. + regengine_T *engine; unsigned regflags; + unsigned re_engine; + unsigned re_flags; ///< Second argument for vim_regcomp(). int regstart; char_u reganch; @@ -78,9 +92,11 @@ struct nfa_state { * Structure used by the NFA matcher. */ typedef struct { - /* These two members implement regprog_T */ - regengine_T *engine; + // These four members implement regprog_T. + regengine_T *engine; unsigned regflags; + unsigned re_engine; + unsigned re_flags; ///< Second argument for vim_regcomp(). nfa_state_T *start; /* points into state[] */ @@ -91,9 +107,7 @@ typedef struct { int has_zend; /* pattern contains \ze */ int has_backref; /* pattern contains \1 .. \9 */ int reghasz; -#ifdef DEBUG char_u *pattern; -#endif int nsubexp; /* number of () */ int nstate; nfa_state_T state[1]; /* actually longer.. */ @@ -143,9 +157,7 @@ struct regengine { int (*regexec_nl)(regmatch_T*, char_u*, colnr_T, bool); long (*regexec_multi)(regmmatch_T*, win_T*, buf_T*, linenr_T, colnr_T, proftime_T*); -#ifdef DEBUG char_u *expr; -#endif }; #endif // NVIM_REGEXP_DEFS_H diff --git a/src/nvim/regexp_nfa.c b/src/nvim/regexp_nfa.c index 6ed42303ef..1de167c40f 100644 --- a/src/nvim/regexp_nfa.c +++ b/src/nvim/regexp_nfa.c @@ -2347,7 +2347,6 @@ static void nfa_set_code(int c) } -#ifdef REGEXP_DEBUG static FILE *log_fd; /* @@ -2468,7 +2467,6 @@ static void nfa_dump(nfa_regprog_T *prog) } } #endif /* REGEXP_DEBUG */ -#endif /* REGEXP_DEBUG */ /* * Parse r.e. @expr and convert it into postfix form. @@ -4908,6 +4906,12 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, regsubs_T *subm nextlist->n = 0; /* clear nextlist */ nextlist->has_pim = FALSE; ++nfa_listid; + if (prog->re_engine == AUTOMATIC_ENGINE && nfa_listid >= NFA_MAX_STATES) { + // Too many states, retry with old engine. + nfa_match = NFA_TOO_EXPENSIVE; + goto theend; + } + thislist->id = nfa_listid; nextlist->id = nfa_listid + 1; @@ -5082,6 +5086,10 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, regsubs_T *subm */ result = recursive_regmatch(t->state, NULL, prog, submatch, m, &listids); + if (result == NFA_TOO_EXPENSIVE) { + nfa_match = result; + goto theend; + } /* for \@! and \@state, NULL, prog, submatch, m, &listids); + if (result == NFA_TOO_EXPENSIVE) { + nfa_match = result; + goto theend; + } if (result) { int bytelen; @@ -6019,6 +6031,7 @@ nextchar: log_fd = NULL; #endif +theend: /* Free memory */ free(list[0].t); free(list[1].t); @@ -6068,8 +6081,12 @@ static long nfa_regtry(nfa_regprog_T *prog, colnr_T col) clear_sub(&subs.synt); clear_sub(&m.synt); - if (nfa_regmatch(prog, start, &subs, &m) == FALSE) + int result = nfa_regmatch(prog, start, &subs, &m); + if (result == FALSE) { return 0; + } else if (result == NFA_TOO_EXPENSIVE) { + return result; + } cleanup_subexpr(); if (REG_MULTI) { @@ -6186,9 +6203,7 @@ nfa_regexec_both ( nfa_nsubexpr = prog->nsubexp; nfa_listid = 1; nfa_alt_listid = 2; -#ifdef REGEXP_DEBUG nfa_regengine.expr = prog->pattern; -#endif if (prog->reganch && col > 0) return 0L; @@ -6228,9 +6243,7 @@ nfa_regexec_both ( retval = nfa_regtry(prog, col); -#ifdef REGEXP_DEBUG nfa_regengine.expr = NULL; -#endif theend: return retval; @@ -6248,9 +6261,7 @@ static regprog_T *nfa_regcomp(char_u *expr, int re_flags) if (expr == NULL) return NULL; -#ifdef REGEXP_DEBUG nfa_regengine.expr = expr; -#endif init_class_tab(); @@ -6325,10 +6336,8 @@ static regprog_T *nfa_regcomp(char_u *expr, int re_flags) #endif /* Remember whether this pattern has any \z specials in it. */ prog->reghasz = re_has_z; -#ifdef REGEXP_DEBUG prog->pattern = vim_strsave(expr); nfa_regengine.expr = NULL; -#endif out: free(post_start); @@ -6342,9 +6351,7 @@ fail: #ifdef REGEXP_DEBUG nfa_postfix_dump(expr, FAIL); #endif -#ifdef REGEXP_DEBUG nfa_regengine.expr = NULL; -#endif goto out; } @@ -6355,9 +6362,7 @@ static void nfa_regfree(regprog_T *prog) { if (prog != NULL) { free(((nfa_regprog_T *)prog)->match_text); -#ifdef REGEXP_DEBUG free(((nfa_regprog_T *)prog)->pattern); -#endif free(prog); } } diff --git a/src/nvim/version.c b/src/nvim/version.c index 8cdc06dba5..bbe5500d3d 100644 --- a/src/nvim/version.c +++ b/src/nvim/version.c @@ -243,7 +243,7 @@ static int included_patches[] = { 500, 499, //498 NA - //497, + 497, //496 NA //495 NA 494, diff --git a/test/benchmark/bench_re_freeze_spec.lua b/test/benchmark/bench_re_freeze_spec.lua new file mode 100644 index 0000000000..7242f43c8e --- /dev/null +++ b/test/benchmark/bench_re_freeze_spec.lua @@ -0,0 +1,67 @@ +-- Test for benchmarking RE engine. + +local helpers = require('test.functional.helpers') +local feed, insert, source = helpers.feed, helpers.insert, helpers.source +local clear, execute, wait = helpers.clear, helpers.execute, helpers.wait + +-- Temporary file for gathering benchmarking results for each regexp engine. +local result_file = 'benchmark.out' +-- Fixture containing an HTML fragment that can make a search appear to freeze. +local sample_file = 'test/benchmark/samples/re.freeze.txt' + +-- Vim script code that does both the work and the benchmarking of that work. +local measure_cmd = + [[call Measure(%d, ']] .. sample_file .. [[', '\s\+\%%#\@55555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555 +