mirror of
https://github.com/neovim/neovim.git
synced 2024-12-29 14:41:06 -07:00
lua: support getting UTF-32 and UTF-16 sizes of replaced text
This commit is contained in:
parent
b0e26199ec
commit
c0993ed343
@ -208,14 +208,17 @@ they are allowed.
|
||||
|
||||
|nvim_buf_attach| will take keyword args for the callbacks. "on_lines" will
|
||||
receive parameters ("lines", {buf}, {changedtick}, {firstline}, {lastline},
|
||||
{new_lastline}, {old_bytecount}).
|
||||
{new_lastline}, {old_byte_size}[, {old_utf32_size}, {old_utf16_size}]).
|
||||
Unlike remote channel events the text contents are not passed. The new text can
|
||||
be accessed inside the callback as
|
||||
|
||||
`vim.api.nvim_buf_get_lines(buf, firstline, new_lastline, true)`
|
||||
|
||||
{old_bytecount} is the total size of the replaced region {firstline} to
|
||||
{lastline} in bytes, including the final newline after {lastline}.
|
||||
{old_byte_size} is the total size of the replaced region {firstline} to
|
||||
{lastline} in bytes, including the final newline after {lastline}. if
|
||||
`utf_sizes` is set to true in |nvim_buf_attach()| keyword args, then the
|
||||
UTF-32 and UTF-16 sizes of the deleted region is also passed as additional
|
||||
arguments {old_utf32_size} and {old_utf16_size}.
|
||||
|
||||
"on_changedtick" is invoked when |b:changedtick| was incremented but no text
|
||||
was changed. The parameters recieved are ("changedtick", {buf}, {changedtick}).
|
||||
|
@ -109,9 +109,11 @@ String buffer_get_line(Buffer buffer, Integer index, Error *err)
|
||||
/// `nvim_buf_lines_event`. Otherwise, the first notification will be
|
||||
/// a `nvim_buf_changedtick_event`. Not used for lua callbacks.
|
||||
/// @param opts Optional parameters.
|
||||
/// `on_lines`: lua callback received on change.
|
||||
/// `on_lines`: lua callback received on change.
|
||||
/// `on_changedtick`: lua callback received on changedtick
|
||||
/// increment without text change.
|
||||
/// `utf_sizes`: include UTF-32 and UTF-16 size of
|
||||
/// the replaced region.
|
||||
/// See |api-buffer-updates-lua| for more information
|
||||
/// @param[out] err Error details, if any
|
||||
/// @return False when updates couldn't be enabled because the buffer isn't
|
||||
@ -156,6 +158,12 @@ Boolean nvim_buf_attach(uint64_t channel_id,
|
||||
}
|
||||
cb.on_detach = v->data.luaref;
|
||||
v->data.integer = LUA_NOREF;
|
||||
} else if (is_lua && strequal("utf_sizes", k.data)) {
|
||||
if (v->type != kObjectTypeBoolean) {
|
||||
api_set_error(err, kErrorTypeValidation, "utf_sizes must be boolean");
|
||||
goto error;
|
||||
}
|
||||
cb.utf_sizes = v->data.boolean;
|
||||
} else {
|
||||
api_set_error(err, kErrorTypeValidation, "unexpected key: %s", k.data);
|
||||
goto error;
|
||||
@ -1196,6 +1204,7 @@ Dictionary nvim__buf_stats(Buffer buffer, Error *err)
|
||||
// NB: this should be zero at any time API functions are called,
|
||||
// this exists to debug issues
|
||||
PUT(rv, "dirty_bytes", INTEGER_OBJ((Integer)buf->deleted_bytes));
|
||||
|
||||
return rv;
|
||||
}
|
||||
|
||||
|
@ -459,8 +459,9 @@ typedef struct {
|
||||
LuaRef on_lines;
|
||||
LuaRef on_changedtick;
|
||||
LuaRef on_detach;
|
||||
bool utf_sizes;
|
||||
} BufUpdateCallbacks;
|
||||
#define BUF_UPDATE_CALLBACKS_INIT { LUA_NOREF, LUA_NOREF, LUA_NOREF }
|
||||
#define BUF_UPDATE_CALLBACKS_INIT { LUA_NOREF, LUA_NOREF, LUA_NOREF, false }
|
||||
|
||||
#define BUF_HAS_QF_ENTRY 1
|
||||
#define BUF_HAS_LL_ENTRY 2
|
||||
@ -802,12 +803,24 @@ struct file_buffer {
|
||||
|
||||
kvec_t(BufhlLine *) b_bufhl_move_space; // temporary space for highlights
|
||||
|
||||
// array of channelids which have asked to receive updates for this
|
||||
// array of channel_id:s which have asked to receive updates for this
|
||||
// buffer.
|
||||
kvec_t(uint64_t) update_channels;
|
||||
// array of lua callbacks for buffer updates.
|
||||
kvec_t(BufUpdateCallbacks) update_callbacks;
|
||||
|
||||
// whether an update callback has requested codepoint size of deleted regions.
|
||||
bool update_need_codepoints;
|
||||
|
||||
// Measurements of the deleted or replaced region since the last update
|
||||
// event. Some consumers of buffer changes need to know the byte size (like
|
||||
// tree-sitter) or the corresponding UTF-32/UTF-16 size (like LSP) of the
|
||||
// deleted text.
|
||||
size_t deleted_bytes;
|
||||
size_t deleted_codepoints;
|
||||
size_t deleted_codeunits;
|
||||
|
||||
// The number for times the current line has been flushed in the memline.
|
||||
int flush_count;
|
||||
|
||||
int b_diff_failed; // internal diff failed for this buffer
|
||||
|
@ -26,6 +26,9 @@ bool buf_updates_register(buf_T *buf, uint64_t channel_id,
|
||||
|
||||
if (channel_id == LUA_INTERNAL_CALL) {
|
||||
kv_push(buf->update_callbacks, cb);
|
||||
if (cb.utf_sizes) {
|
||||
buf->update_need_codepoints = true;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -169,7 +172,9 @@ void buf_updates_send_changes(buf_T *buf,
|
||||
int64_t num_removed,
|
||||
bool send_tick)
|
||||
{
|
||||
size_t deleted_bytes = ml_flush_deleted_bytes(buf);
|
||||
size_t deleted_codepoints, deleted_codeunits;
|
||||
size_t deleted_bytes = ml_flush_deleted_bytes(buf, &deleted_codepoints,
|
||||
&deleted_codeunits);
|
||||
|
||||
if (!buf_updates_active(buf)) {
|
||||
return;
|
||||
@ -233,8 +238,8 @@ void buf_updates_send_changes(buf_T *buf,
|
||||
bool keep = true;
|
||||
if (cb.on_lines != LUA_NOREF) {
|
||||
Array args = ARRAY_DICT_INIT;
|
||||
Object items[6];
|
||||
args.size = 6;
|
||||
Object items[8];
|
||||
args.size = 6; // may be increased to 8 below
|
||||
args.items = items;
|
||||
|
||||
// the first argument is always the buffer handle
|
||||
@ -254,6 +259,11 @@ void buf_updates_send_changes(buf_T *buf,
|
||||
|
||||
// byte count of previous contents
|
||||
args.items[5] = INTEGER_OBJ((Integer)deleted_bytes);
|
||||
if (cb.utf_sizes) {
|
||||
args.size = 8;
|
||||
args.items[6] = INTEGER_OBJ((Integer)deleted_codepoints);
|
||||
args.items[7] = INTEGER_OBJ((Integer)deleted_codeunits);
|
||||
}
|
||||
textlock++;
|
||||
Object res = executor_exec_lua_cb(cb.on_lines, "lines", args, true);
|
||||
textlock--;
|
||||
|
@ -1756,6 +1756,8 @@ failed:
|
||||
linecnt--;
|
||||
}
|
||||
curbuf->deleted_bytes = 0;
|
||||
curbuf->deleted_codepoints = 0;
|
||||
curbuf->deleted_codeunits = 0;
|
||||
linecnt = curbuf->b_ml.ml_line_count - linecnt;
|
||||
if (filesize == 0)
|
||||
linecnt = 0;
|
||||
|
@ -627,6 +627,8 @@ EXTERN pos_T Insstart_orig;
|
||||
EXTERN int orig_line_count INIT(= 0); /* Line count when "gR" started */
|
||||
EXTERN int vr_lines_changed INIT(= 0); /* #Lines changed by "gR" so far */
|
||||
|
||||
// increase around internal delete/replace
|
||||
EXTERN int inhibit_delete_count INIT(= 0);
|
||||
|
||||
/*
|
||||
* These flags are set based upon 'fileencoding'.
|
||||
|
@ -1438,6 +1438,39 @@ int utf16_to_utf8(const wchar_t *strw, char **str)
|
||||
|
||||
#endif
|
||||
|
||||
/// Measure the length of a string in corresponding UTF-32 and UTF-16 units.
|
||||
///
|
||||
/// Invalid UTF-8 bytes, or embedded surrogates, count as one code point/unit
|
||||
/// each.
|
||||
///
|
||||
/// The out parameters are incremented. This is used to measure the size of
|
||||
/// a buffer region consisting of multiple line segments.
|
||||
///
|
||||
/// @param s the string
|
||||
/// @param len maximum length (an earlier NUL terminates)
|
||||
/// @param[out] codepoints incremented with UTF-32 code point size
|
||||
/// @param[out] codeunits incremented with UTF-16 code unit size
|
||||
void mb_utflen(const char_u *s, size_t len, size_t *codepoints,
|
||||
size_t *codeunits)
|
||||
FUNC_ATTR_NONNULL_ALL
|
||||
{
|
||||
size_t count = 0, extra = 0;
|
||||
size_t clen;
|
||||
for (size_t i = 0; i < len && s[i] != NUL; i += clen) {
|
||||
clen = utf_ptr2len_len(s+i, len-i);
|
||||
// NB: gets the byte value of invalid sequence bytes.
|
||||
// we only care whether the char fits in the BMP or not
|
||||
int c = (clen > 1) ? utf_ptr2char(s+i) : s[i];
|
||||
count++;
|
||||
if (c > 0xFFFF) {
|
||||
extra++;
|
||||
}
|
||||
}
|
||||
*codepoints += count;
|
||||
*codeunits += count + extra;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Version of strnicmp() that handles multi-byte characters.
|
||||
* Needed for Big5, Shift-JIS and UTF-8 encoding. Other DBCS encodings can
|
||||
|
@ -2383,6 +2383,23 @@ static int ml_append_int(
|
||||
return OK;
|
||||
}
|
||||
|
||||
void ml_add_deleted_len(char_u *ptr, ssize_t len)
|
||||
{
|
||||
if (inhibit_delete_count) {
|
||||
return;
|
||||
}
|
||||
if (len == -1) {
|
||||
len = STRLEN(ptr);
|
||||
}
|
||||
curbuf->deleted_bytes += len+1;
|
||||
if (curbuf->update_need_codepoints) {
|
||||
mb_utflen(ptr, len, &curbuf->deleted_codepoints,
|
||||
&curbuf->deleted_codeunits);
|
||||
curbuf->deleted_codepoints++; // NL char
|
||||
curbuf->deleted_codeunits++;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Replace line lnum, with buffering, in current buffer.
|
||||
*
|
||||
@ -2408,19 +2425,17 @@ int ml_replace(linenr_T lnum, char_u *line, bool copy)
|
||||
if (copy) {
|
||||
line = vim_strsave(line);
|
||||
}
|
||||
if (curbuf->b_ml.ml_line_lnum != lnum) { /* other line buffered */
|
||||
ml_flush_line(curbuf); /* flush it */
|
||||
} else if (curbuf->b_ml.ml_flags & ML_LINE_DIRTY) { /* same line allocated */
|
||||
// TODO FIXME: see other "TODO FIXME"
|
||||
curbuf->deleted_bytes += STRLEN(curbuf->b_ml.ml_line_ptr)+1;
|
||||
xfree(curbuf->b_ml.ml_line_ptr); /* free it */
|
||||
readlen = false; // already read it.
|
||||
if (curbuf->b_ml.ml_line_lnum != lnum) { // other line buffered
|
||||
ml_flush_line(curbuf); // flush it
|
||||
} else if (curbuf->b_ml.ml_flags & ML_LINE_DIRTY) { // same line allocated
|
||||
ml_add_deleted_len(curbuf->b_ml.ml_line_ptr, -1);
|
||||
readlen = false; // already added the length
|
||||
|
||||
xfree(curbuf->b_ml.ml_line_ptr); // free it
|
||||
}
|
||||
|
||||
if (readlen) {
|
||||
if (true) { // TODO: buffer updates active
|
||||
curbuf->deleted_bytes += STRLEN(ml_get_buf(curbuf, lnum, false))+1;
|
||||
}
|
||||
if (readlen && kv_size(curbuf->update_callbacks)) {
|
||||
ml_add_deleted_len(ml_get_buf(curbuf, lnum, false), -1);
|
||||
}
|
||||
|
||||
curbuf->b_ml.ml_line_ptr = line;
|
||||
@ -2504,7 +2519,10 @@ static int ml_delete_int(buf_T *buf, linenr_T lnum, bool message)
|
||||
else
|
||||
line_size = ((dp->db_index[idx - 1]) & DB_INDEX_MASK) - line_start;
|
||||
|
||||
buf->deleted_bytes += line_size;
|
||||
// Line should always have an NL char internally (represented as NUL),
|
||||
// even if 'noeol' is set.
|
||||
assert(line_size >= 1);
|
||||
ml_add_deleted_len((char_u *)dp + line_start, line_size-1);
|
||||
|
||||
/*
|
||||
* special case: If there is only one line in the data block it becomes empty.
|
||||
@ -2690,10 +2708,14 @@ void ml_clearmarked(void)
|
||||
return;
|
||||
}
|
||||
|
||||
size_t ml_flush_deleted_bytes(buf_T *buf)
|
||||
size_t ml_flush_deleted_bytes(buf_T *buf, size_t *codepoints, size_t *codeunits)
|
||||
{
|
||||
size_t ret = buf->deleted_bytes;
|
||||
*codepoints = buf->deleted_codepoints;
|
||||
*codeunits = buf->deleted_codeunits;
|
||||
buf->deleted_bytes = 0;
|
||||
buf->deleted_codepoints = 0;
|
||||
buf->deleted_codeunits = 0;
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -780,6 +780,7 @@ open_line (
|
||||
did_append = FALSE;
|
||||
}
|
||||
|
||||
inhibit_delete_count++;
|
||||
if (newindent
|
||||
|| did_si
|
||||
) {
|
||||
@ -821,6 +822,7 @@ open_line (
|
||||
did_si = false;
|
||||
}
|
||||
}
|
||||
inhibit_delete_count--;
|
||||
|
||||
/*
|
||||
* In REPLACE mode, for each character in the extra leader, there must be
|
||||
@ -1685,7 +1687,7 @@ int del_bytes(colnr_T count, bool fixpos_arg, bool use_delcombine)
|
||||
bool was_alloced = ml_line_alloced(); // check if oldp was allocated
|
||||
char_u *newp;
|
||||
if (was_alloced) {
|
||||
curbuf->deleted_bytes += (size_t)oldlen+1;
|
||||
ml_add_deleted_len(curbuf->b_ml.ml_line_ptr, oldlen);
|
||||
newp = oldp; // use same allocated memory
|
||||
} else { // need to allocate a new line
|
||||
newp = xmalloc((size_t)(oldlen + 1 - count));
|
||||
|
@ -13,7 +13,8 @@ local origlines = {"original line 1",
|
||||
"original line 3",
|
||||
"original line 4",
|
||||
"original line 5",
|
||||
"original line 6"}
|
||||
"original line 6",
|
||||
" indented line"}
|
||||
|
||||
describe('lua: buffer event callbacks', function()
|
||||
before_each(function()
|
||||
@ -21,14 +22,14 @@ describe('lua: buffer event callbacks', function()
|
||||
exec_lua([[
|
||||
local events = {}
|
||||
|
||||
function test_register(bufnr, id, changedtick)
|
||||
function test_register(bufnr, id, changedtick, utf_sizes)
|
||||
local function callback(...)
|
||||
table.insert(events, {id, ...})
|
||||
if test_unreg == id then
|
||||
return true
|
||||
end
|
||||
end
|
||||
local opts = {on_lines=callback, on_detach=callback}
|
||||
local opts = {on_lines=callback, on_detach=callback, utf_sizes=utf_sizes}
|
||||
if changedtick then
|
||||
opts.on_changedtick = callback
|
||||
end
|
||||
@ -48,18 +49,26 @@ describe('lua: buffer event callbacks', function()
|
||||
-- assert the wrong thing), but masks errors with unflushed lines (as
|
||||
-- nvim_buf_get_offset forces a flush of the memline). To be safe run the
|
||||
-- test both ways.
|
||||
local function check(verify)
|
||||
local function check(verify,utf_sizes)
|
||||
local lastsize
|
||||
meths.buf_set_lines(0, 0, -1, true, origlines)
|
||||
if verify then
|
||||
lastsize = meths.buf_get_offset(0, meths.buf_line_count(0))
|
||||
end
|
||||
exec_lua("return test_register(...)", 0, "test1")
|
||||
exec_lua("return test_register(...)", 0, "test1",false,utf_sizes)
|
||||
local tick = meths.buf_get_changedtick(0)
|
||||
|
||||
local verify_name = "test1"
|
||||
local function check_events(expected)
|
||||
local events = exec_lua("return get_events(...)" )
|
||||
if utf_sizes then
|
||||
-- this test case uses ASCII only, so sizes sshould be the same.
|
||||
-- Unicode is tested below.
|
||||
for _, event in ipairs(expected) do
|
||||
event[9] = event[8]
|
||||
event[10] = event[8]
|
||||
end
|
||||
end
|
||||
eq(expected, events)
|
||||
if verify then
|
||||
for _, event in ipairs(events) do
|
||||
@ -75,6 +84,7 @@ describe('lua: buffer event callbacks', function()
|
||||
end
|
||||
end
|
||||
|
||||
command('set autoindent')
|
||||
command('normal! GyyggP')
|
||||
tick = tick + 1
|
||||
check_events({{ "test1", "lines", 1, tick, 0, 0, 1, 0}})
|
||||
@ -83,7 +93,7 @@ describe('lua: buffer event callbacks', function()
|
||||
tick = tick + 1
|
||||
check_events({{ "test1", "lines", 1, tick, 3, 5, 4, 32 }})
|
||||
|
||||
exec_lua("return test_register(...)", 0, "test2", true)
|
||||
exec_lua("return test_register(...)", 0, "test2", true, utf_sizes)
|
||||
tick = tick + 1
|
||||
command('undo')
|
||||
|
||||
@ -124,7 +134,13 @@ describe('lua: buffer event callbacks', function()
|
||||
tick = tick + 1
|
||||
check_events({{ "test2", "lines", 1, tick, 4, 5, 5, 19 }})
|
||||
|
||||
feed('<esc>')
|
||||
feed('<esc>Go')
|
||||
tick = tick + 1
|
||||
check_events({{ "test2", "lines", 1, tick, 11, 11, 12, 0 }})
|
||||
|
||||
feed('x')
|
||||
tick = tick + 1
|
||||
check_events({{ "test2", "lines", 1, tick, 11, 12, 12, 5 }})
|
||||
|
||||
command('bwipe!')
|
||||
check_events({{ "test2", "detach", 1 }})
|
||||
@ -137,4 +153,54 @@ describe('lua: buffer event callbacks', function()
|
||||
it('works with verify', function()
|
||||
check(true)
|
||||
end)
|
||||
|
||||
it('works with utf_sizes and ASCII text', function()
|
||||
check(false,true)
|
||||
end)
|
||||
|
||||
it('works with utf_sizes and unicode text', function()
|
||||
local unicode_text = {"ascii text",
|
||||
"latin text åäö",
|
||||
"BMP text ɧ αλφά",
|
||||
"BMP text 汉语 ↥↧",
|
||||
"SMP 🤦 🦄🦃",
|
||||
"combining å بِيَّة"}
|
||||
meths.buf_set_lines(0, 0, -1, true, unicode_text)
|
||||
feed('gg')
|
||||
exec_lua("return test_register(...)", 0, "test1", false, true)
|
||||
local tick = meths.buf_get_changedtick(0)
|
||||
|
||||
feed('dd')
|
||||
tick = tick + 1
|
||||
eq({{ "test1", "lines", 1, tick, 0, 1, 0, 11, 11, 11 }}, exec_lua("return get_events(...)" ))
|
||||
|
||||
feed('A<bs>')
|
||||
tick = tick + 1
|
||||
eq({{ "test1", "lines", 1, tick, 0, 1, 1, 18, 15, 15 }}, exec_lua("return get_events(...)" ))
|
||||
|
||||
feed('<esc>jylp')
|
||||
tick = tick + 1
|
||||
eq({{ "test1", "lines", 1, tick, 1, 2, 2, 21, 16, 16 }}, exec_lua("return get_events(...)" ))
|
||||
|
||||
feed('+eea<cr>')
|
||||
tick = tick + 1
|
||||
eq({{ "test1", "lines", 1, tick, 2, 3, 4, 23, 15, 15 }}, exec_lua("return get_events(...)" ))
|
||||
|
||||
feed('<esc>jdw')
|
||||
tick = tick + 1
|
||||
-- non-BMP chars count as 2 UTF-2 codeunits
|
||||
eq({{ "test1", "lines", 1, tick, 4, 5, 5, 18, 9, 12 }}, exec_lua("return get_events(...)" ))
|
||||
|
||||
feed('+rx')
|
||||
tick = tick + 1
|
||||
-- count the individual codepoints of a composed character.
|
||||
eq({{ "test1", "lines", 1, tick, 5, 6, 6, 27, 20, 20 }}, exec_lua("return get_events(...)" ))
|
||||
|
||||
feed('kJ')
|
||||
tick = tick + 1
|
||||
-- NB: this is inefficient (but not really wrong).
|
||||
eq({{ "test1", "lines", 1, tick, 4, 5, 5, 14, 5, 8 },
|
||||
{ "test1", "lines", 1, tick+1, 5, 6, 5, 27, 20, 20 }}, exec_lua("return get_events(...)" ))
|
||||
end)
|
||||
|
||||
end)
|
||||
|
Loading…
Reference in New Issue
Block a user