Merge pull request #30526 from lewis6991/fix/linematchnul

This commit is contained in:
Lewis Russell 2024-09-30 12:09:36 +01:00 committed by GitHub
commit 2c88cfcac4
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
9 changed files with 165 additions and 125 deletions

View File

@ -713,42 +713,42 @@ vim.diff({a}, {b}, {opts}) *vim.diff()*
Parameters: ~
• {a} (`string`) First string to compare
• {b} (`string`) Second string to compare
• {opts} (`table`) Optional parameters:
• {on_hunk}
(`fun(start_a: integer, count_a: integer, start_b: integer, count_b: integer): integer`)
• {opts} (`table?`) Optional parameters:
• {on_hunk}?
(`fun(start_a: integer, count_a: integer, start_b: integer, count_b: integer): integer?`)
Invoked for each hunk in the diff. Return a negative number
to cancel the callback for any remaining hunks. Arguments:
• `start_a` (`integer`): Start line of hunk in {a}.
• `count_a` (`integer`): Hunk size in {a}.
• `start_b` (`integer`): Start line of hunk in {b}.
• `count_b` (`integer`): Hunk size in {b}.
• {result_type} (`'unified'|'indices'`, default: `'unified'`)
• {result_type}? (`'unified'|'indices'`, default: `'unified'`)
Form of the returned diff:
• `unified`: String in unified format.
• `indices`: Array of hunk locations. Note: This option is
ignored if `on_hunk` is used.
• {linematch} (`boolean|integer`) Run linematch on the
• {linematch}? (`boolean|integer`) Run linematch on the
resulting hunks from xdiff. When integer, only hunks upto
this size in lines are run through linematch. Requires
`result_type = indices`, ignored otherwise.
• {algorithm} (`'myers'|'minimal'|'patience'|'histogram'`,
• {algorithm}? (`'myers'|'minimal'|'patience'|'histogram'`,
default: `'myers'`) Diff algorithm to use. Values:
• `myers`: the default algorithm
• `minimal`: spend extra time to generate the smallest
possible diff
• `patience`: patience diff algorithm
• `histogram`: histogram diff algorithm
• {ctxlen} (`integer`) Context length
• {interhunkctxlen} (`integer`) Inter hunk context length
• {ignore_whitespace} (`boolean`) Ignore whitespace
• {ignore_whitespace_change} (`boolean`) Ignore whitespace
• {ctxlen}? (`integer`) Context length
• {interhunkctxlen}? (`integer`) Inter hunk context length
• {ignore_whitespace}? (`boolean`) Ignore whitespace
• {ignore_whitespace_change}? (`boolean`) Ignore whitespace
change
• {ignore_whitespace_change_at_eol} (`boolean`) Ignore
• {ignore_whitespace_change_at_eol}? (`boolean`) Ignore
whitespace change at end-of-line.
• {ignore_cr_at_eol} (`boolean`) Ignore carriage return at
• {ignore_cr_at_eol}? (`boolean`) Ignore carriage return at
end-of-line
• {ignore_blank_lines} (`boolean`) Ignore blank lines
• {indent_heuristic} (`boolean`) Use the indent heuristic for
• {ignore_blank_lines}? (`boolean`) Ignore blank lines
• {indent_heuristic}? (`boolean`) Use the indent heuristic for
the internal diff library.
Return: ~

View File

@ -11,19 +11,19 @@
--- - `count_a` (`integer`): Hunk size in {a}.
--- - `start_b` (`integer`): Start line of hunk in {b}.
--- - `count_b` (`integer`): Hunk size in {b}.
--- @field on_hunk fun(start_a: integer, count_a: integer, start_b: integer, count_b: integer): integer
--- @field on_hunk? fun(start_a: integer, count_a: integer, start_b: integer, count_b: integer): integer?
---
--- Form of the returned diff:
--- - `unified`: String in unified format.
--- - `indices`: Array of hunk locations.
--- Note: This option is ignored if `on_hunk` is used.
--- (default: `'unified'`)
--- @field result_type 'unified'|'indices'
--- @field result_type? 'unified'|'indices'
---
--- Run linematch on the resulting hunks from xdiff. When integer, only hunks
--- upto this size in lines are run through linematch.
--- Requires `result_type = indices`, ignored otherwise.
--- @field linematch boolean|integer
--- @field linematch? boolean|integer
---
--- Diff algorithm to use. Values:
--- - `myers`: the default algorithm
@ -31,15 +31,15 @@
--- - `patience`: patience diff algorithm
--- - `histogram`: histogram diff algorithm
--- (default: `'myers'`)
--- @field algorithm 'myers'|'minimal'|'patience'|'histogram'
--- @field ctxlen integer Context length
--- @field interhunkctxlen integer Inter hunk context length
--- @field ignore_whitespace boolean Ignore whitespace
--- @field ignore_whitespace_change boolean Ignore whitespace change
--- @field ignore_whitespace_change_at_eol boolean Ignore whitespace change at end-of-line.
--- @field ignore_cr_at_eol boolean Ignore carriage return at end-of-line
--- @field ignore_blank_lines boolean Ignore blank lines
--- @field indent_heuristic boolean Use the indent heuristic for the internal diff library.
--- @field algorithm? 'myers'|'minimal'|'patience'|'histogram'
--- @field ctxlen? integer Context length
--- @field interhunkctxlen? integer Inter hunk context length
--- @field ignore_whitespace? boolean Ignore whitespace
--- @field ignore_whitespace_change? boolean Ignore whitespace change
--- @field ignore_whitespace_change_at_eol? boolean Ignore whitespace change at end-of-line.
--- @field ignore_cr_at_eol? boolean Ignore carriage return at end-of-line
--- @field ignore_blank_lines? boolean Ignore blank lines
--- @field indent_heuristic? boolean Use the indent heuristic for the internal diff library.
-- luacheck: no unused args
@ -65,7 +65,7 @@
---
---@param a string First string to compare
---@param b string Second string to compare
---@param opts vim.diff.Opts
---@param opts? vim.diff.Opts
---@return string|integer[][]?
--- See {opts.result_type}. `nil` if {opts.on_hunk} is given.
function vim.diff(a, b, opts) end

View File

@ -881,6 +881,7 @@ def CheckIncludes(filename, lines, error):
"nvim/func_attr.h",
"termkey/termkey.h",
"vterm/vterm.h",
"xdiff/xdiff.h",
]
for i in check_includes_ignore:

View File

@ -2005,7 +2005,7 @@ static void run_linematch_algorithm(diff_T *dp)
{
// define buffers for diff algorithm
mmfile_t diffbufs_mm[DB_COUNT];
const char *diffbufs[DB_COUNT];
const mmfile_t *diffbufs[DB_COUNT];
int diff_length[DB_COUNT];
size_t ndiffs = 0;
for (int i = 0; i < DB_COUNT; i++) {
@ -2015,9 +2015,7 @@ static void run_linematch_algorithm(diff_T *dp)
diff_write_buffer(curtab->tp_diffbuf[i], &diffbufs_mm[ndiffs],
dp->df_lnum[i], dp->df_lnum[i] + dp->df_count[i] - 1);
// we want to get the char* to the diff buffer that was just written
// we add it to the array of char*, diffbufs
diffbufs[ndiffs] = diffbufs_mm[ndiffs].ptr;
diffbufs[ndiffs] = &diffbufs_mm[ndiffs];
// keep track of the length of this diff block to pass it to the linematch
// algorithm

View File

@ -10,6 +10,8 @@
#include "nvim/macros_defs.h"
#include "nvim/memory.h"
#include "nvim/pos_defs.h"
#include "nvim/strings.h"
#include "xdiff/xdiff.h"
#define LN_MAX_BUFS 8
#define LN_DECISION_MAX 255 // pow(2, LN_MAX_BUFS(8)) - 1 = 255
@ -29,48 +31,48 @@ struct diffcmppath_S {
# include "linematch.c.generated.h"
#endif
static size_t line_len(const char *s)
static size_t line_len(const mmfile_t *m)
{
char *end = strchr(s, '\n');
char *s = m->ptr;
size_t n = (size_t)m->size;
char *end = strnchr(s, &n, '\n');
if (end) {
return (size_t)(end - s);
}
return strlen(s);
return (size_t)m->size;
}
#define MATCH_CHAR_MAX_LEN 800
/// Same as matching_chars but ignore whitespace
///
/// @param s1
/// @param s2
static int matching_chars_iwhite(const char *s1, const char *s2)
static int matching_chars_iwhite(const mmfile_t *s1, const mmfile_t *s2)
{
// the newly processed strings that will be compared
// delete the white space characters, and/or replace all upper case with lower
char *strsproc[2];
const char *strsorig[2] = { s1, s2 };
// delete the white space characters
mmfile_t sp[2];
char p[2][MATCH_CHAR_MAX_LEN];
for (int k = 0; k < 2; k++) {
size_t d = 0;
size_t i = 0;
size_t slen = line_len(strsorig[k]);
strsproc[k] = xmalloc((slen + 1) * sizeof(char));
while (d + i < slen) {
char e = strsorig[k][i + d];
const mmfile_t *s = k == 0 ? s1 : s2;
size_t pi = 0;
size_t slen = MIN(MATCH_CHAR_MAX_LEN - 1, line_len(s));
for (size_t i = 0; i <= slen; i++) {
char e = s->ptr[i];
if (e != ' ' && e != '\t') {
strsproc[k][i] = e;
i++;
} else {
d++;
p[k][pi] = e;
pi++;
}
}
strsproc[k][i] = NUL;
}
int matching = matching_chars(strsproc[0], strsproc[1]);
xfree(strsproc[0]);
xfree(strsproc[1]);
return matching;
}
#define MATCH_CHAR_MAX_LEN 800
sp[k] = (mmfile_t){
.ptr = p[k],
.size = (int)pi,
};
}
return matching_chars(&sp[0], &sp[1]);
}
/// Return matching characters between "s1" and "s2" whilst respecting sequence order.
/// Consider the case of two strings 'AAACCC' and 'CCCAAA', the
@ -83,12 +85,14 @@ static int matching_chars_iwhite(const char *s1, const char *s2)
/// matching_chars("abcdefg", "gfedcba") -> 1 // all characters in common,
/// // but only at most 1 in sequence
///
/// @param s1
/// @param s2
static int matching_chars(const char *s1, const char *s2)
/// @param m1
/// @param m2
static int matching_chars(const mmfile_t *m1, const mmfile_t *m2)
{
size_t s1len = MIN(MATCH_CHAR_MAX_LEN - 1, line_len(s1));
size_t s2len = MIN(MATCH_CHAR_MAX_LEN - 1, line_len(s2));
size_t s1len = MIN(MATCH_CHAR_MAX_LEN - 1, line_len(m1));
size_t s2len = MIN(MATCH_CHAR_MAX_LEN - 1, line_len(m2));
char *s1 = m1->ptr;
char *s2 = m2->ptr;
int matrix[2][MATCH_CHAR_MAX_LEN] = { 0 };
bool icur = 1; // save space by storing only two rows for i axis
for (size_t i = 0; i < s1len; i++) {
@ -119,13 +123,13 @@ static int matching_chars(const char *s1, const char *s2)
/// @param sp
/// @param fomvals
/// @param n
static int count_n_matched_chars(const char **sp, const size_t n, bool iwhite)
static int count_n_matched_chars(mmfile_t **sp, const size_t n, bool iwhite)
{
int matched_chars = 0;
int matched = 0;
for (size_t i = 0; i < n; i++) {
for (size_t j = i + 1; j < n; j++) {
if (sp[i] != NULL && sp[j] != NULL) {
if (sp[i]->ptr != NULL && sp[j]->ptr != NULL) {
matched++;
// TODO(lewis6991): handle whitespace ignoring higher up in the stack
matched_chars += iwhite ? matching_chars_iwhite(sp[i], sp[j])
@ -143,15 +147,17 @@ static int count_n_matched_chars(const char **sp, const size_t n, bool iwhite)
return matched_chars;
}
void fastforward_buf_to_lnum(const char **s, linenr_T lnum)
mmfile_t fastforward_buf_to_lnum(mmfile_t s, linenr_T lnum)
{
for (int i = 0; i < lnum - 1; i++) {
*s = strchr(*s, '\n');
if (!*s) {
return;
s.ptr = strnchr(s.ptr, (size_t *)&s.size, '\n');
if (!s.ptr) {
break;
}
(*s)++;
s.ptr++;
s.size--;
}
return s;
}
/// try all the different ways to compare these lines and use the one that
@ -167,25 +173,25 @@ void fastforward_buf_to_lnum(const char **s, linenr_T lnum)
/// @param diff_blk
static void try_possible_paths(const int *df_iters, const size_t *paths, const int npaths,
const int path_idx, int *choice, diffcmppath_T *diffcmppath,
const int *diff_len, const size_t ndiffs, const char **diff_blk,
const int *diff_len, const size_t ndiffs, const mmfile_t **diff_blk,
bool iwhite)
{
if (path_idx == npaths) {
if ((*choice) > 0) {
int from_vals[LN_MAX_BUFS] = { 0 };
const int *to_vals = df_iters;
const char *current_lines[LN_MAX_BUFS];
mmfile_t mm[LN_MAX_BUFS]; // stack memory for current_lines
mmfile_t *current_lines[LN_MAX_BUFS];
for (size_t k = 0; k < ndiffs; k++) {
from_vals[k] = df_iters[k];
// get the index at all of the places
if ((*choice) & (1 << k)) {
from_vals[k]--;
const char *p = diff_blk[k];
fastforward_buf_to_lnum(&p, df_iters[k]);
current_lines[k] = p;
mm[k] = fastforward_buf_to_lnum(*diff_blk[k], df_iters[k]);
} else {
current_lines[k] = NULL;
mm[k] = (mmfile_t){ 0 };
}
current_lines[k] = &mm[k];
}
size_t unwrapped_idx_from = unwrap_indexes(from_vals, diff_len, ndiffs);
size_t unwrapped_idx_to = unwrap_indexes(to_vals, diff_len, ndiffs);
@ -244,7 +250,7 @@ static size_t unwrap_indexes(const int *values, const int *diff_len, const size_
/// @param ndiffs
/// @param diff_blk
static void populate_tensor(int *df_iters, const size_t ch_dim, diffcmppath_T *diffcmppath,
const int *diff_len, const size_t ndiffs, const char **diff_blk,
const int *diff_len, const size_t ndiffs, const mmfile_t **diff_blk,
bool iwhite)
{
if (ch_dim == ndiffs) {
@ -327,7 +333,7 @@ static void populate_tensor(int *df_iters, const size_t ch_dim, diffcmppath_T *d
/// @param ndiffs
/// @param [out] [allocated] decisions
/// @return the length of decisions
size_t linematch_nbuffers(const char **diff_blk, const int *diff_len, const size_t ndiffs,
size_t linematch_nbuffers(const mmfile_t **diff_blk, const int *diff_len, const size_t ndiffs,
int **decisions, bool iwhite)
{
assert(ndiffs <= LN_MAX_BUFS);

View File

@ -3,6 +3,7 @@
#include <stddef.h> // IWYU pragma: keep
#include "nvim/pos_defs.h" // IWYU pragma: keep
#include "xdiff/xdiff.h"
#ifdef INCLUDE_GENERATED_DECLARATIONS
# include "linematch.h.generated.h"

View File

@ -67,11 +67,11 @@ static void get_linematch_results(lua_State *lstate, mmfile_t *ma, mmfile_t *mb,
int count_a, int start_b, int count_b, bool iwhite)
{
// get the pointer to char of the start of the diff to pass it to linematch algorithm
const char *diff_begin[2] = { ma->ptr, mb->ptr };
int diff_length[2] = { count_a, count_b };
mmfile_t ma0 = fastforward_buf_to_lnum(*ma, (linenr_T)start_a + 1);
mmfile_t mb0 = fastforward_buf_to_lnum(*mb, (linenr_T)start_b + 1);
fastforward_buf_to_lnum(&diff_begin[0], (linenr_T)start_a + 1);
fastforward_buf_to_lnum(&diff_begin[1], (linenr_T)start_b + 1);
const mmfile_t *diff_begin[2] = { &ma0, &mb0 };
int diff_length[2] = { count_a, count_b };
int *decisions = NULL;
size_t decisions_length = linematch_nbuffers(diff_begin, diff_length, 2, &decisions, iwhite);

View File

@ -496,6 +496,20 @@ char *vim_strchr(const char *const string, const int c)
}
}
// Sized version of strchr that can handle embedded NULs.
// Adjusts n to the new size.
char *strnchr(const char *p, size_t *n, int c)
{
while (*n > 0) {
if (*p == c) {
return (char *)p;
}
p++;
(*n)--;
}
return NULL;
}
// Sort an array of strings.
static int sort_compare(const void *s1, const void *s2)

View File

@ -12,15 +12,11 @@ describe('xdiff bindings', function()
end)
describe('can diff text', function()
before_each(function()
exec_lua [[
a1 = 'Hello\n'
b1 = 'Helli\n'
local a1 = 'Hello\n'
local b1 = 'Helli\n'
a2 = 'Hello\nbye\nfoo\n'
b2 = 'Helli\nbye\nbar\nbaz\n'
]]
end)
local a2 = 'Hello\nbye\nfoo\n'
local b2 = 'Helli\nbye\nbar\nbaz\n'
it('with no callback', function()
eq(
@ -30,7 +26,9 @@ describe('xdiff bindings', function()
'+Helli',
'',
}, '\n'),
exec_lua('return vim.diff(a1, b1)')
exec_lua(function()
return vim.diff(a1, b1)
end)
)
eq(
@ -44,63 +42,81 @@ describe('xdiff bindings', function()
'+baz',
'',
}, '\n'),
exec_lua('return vim.diff(a2, b2)')
exec_lua(function()
return vim.diff(a2, b2)
end)
)
end)
it('with callback', function()
exec_lua([[on_hunk = function(sa, ca, sb, cb)
exp[#exp+1] = {sa, ca, sb, cb}
end]])
eq(
{ { 1, 1, 1, 1 } },
exec_lua [[
exp = {}
assert(vim.diff(a1, b1, {on_hunk = on_hunk}) == nil)
exec_lua(function()
local exp = {} --- @type table[]
assert(vim.diff(a1, b1, {
on_hunk = function(...)
exp[#exp + 1] = { ... }
end,
}) == nil)
return exp
]]
end)
)
eq(
{ { 1, 1, 1, 1 }, { 3, 1, 3, 2 } },
exec_lua [[
exp = {}
assert(vim.diff(a2, b2, {on_hunk = on_hunk}) == nil)
exec_lua(function()
local exp = {} --- @type table[]
assert(vim.diff(a2, b2, {
on_hunk = function(...)
exp[#exp + 1] = { ... }
end,
}) == nil)
return exp
]]
end)
)
-- gives higher precedence to on_hunk over result_type
eq(
{ { 1, 1, 1, 1 }, { 3, 1, 3, 2 } },
exec_lua [[
exp = {}
assert(vim.diff(a2, b2, {on_hunk = on_hunk, result_type='indices'}) == nil)
exec_lua(function()
local exp = {} --- @type table[]
assert(vim.diff(a2, b2, {
on_hunk = function(...)
exp[#exp + 1] = { ... }
end,
result_type = 'indices',
}) == nil)
return exp
]]
end)
)
end)
it('with error callback', function()
exec_lua [[
on_hunk = function(sa, ca, sb, cb)
error('ERROR1')
end
]]
eq(
[[error running function on_hunk: [string "<nvim>"]:0: ERROR1]],
pcall_err(exec_lua, [[vim.diff(a1, b1, {on_hunk = on_hunk})]])
[[.../xdiff_spec.lua:0: error running function on_hunk: .../xdiff_spec.lua:0: ERROR1]],
pcall_err(exec_lua, function()
vim.diff(a1, b1, {
on_hunk = function()
error('ERROR1')
end,
})
end)
)
end)
it('with hunk_lines', function()
eq({ { 1, 1, 1, 1 } }, exec_lua([[return vim.diff(a1, b1, {result_type = 'indices'})]]))
eq(
{ { 1, 1, 1, 1 } },
exec_lua(function()
return vim.diff(a1, b1, { result_type = 'indices' })
end)
)
eq(
{ { 1, 1, 1, 1 }, { 3, 1, 3, 2 } },
exec_lua([[return vim.diff(a2, b2, {result_type = 'indices'})]])
exec_lua(function()
return vim.diff(a2, b2, { result_type = 'indices' })
end)
)
end)
@ -143,16 +159,11 @@ describe('xdiff bindings', function()
'+}',
'',
}, '\n'),
exec_lua(
[[
local args = {...}
return vim.diff(args[1], args[2], {
algorithm = 'patience'
exec_lua(function()
return vim.diff(a, b, {
algorithm = 'patience',
})
]],
a,
b
)
end)
)
end)
end)
@ -174,4 +185,13 @@ describe('xdiff bindings', function()
pcall_err(exec_lua, [[vim.diff('a', 'b', { on_hunk = true })]])
)
end)
it('can handle strings with embedded NUL characters (GitHub #30305)', function()
eq(
{ { 0, 0, 1, 1 }, { 1, 0, 3, 2 } },
exec_lua(function()
return vim.diff('\n', '\0\n\n\nb', { linematch = true, result_type = 'indices' })
end)
)
end)
end)