fix(multibyte): handle backspace of wide clusters in replace mode

Make utf_head_off more robust against invalid sequences
and embedded NUL chars
This commit is contained in:
bfredl 2024-09-04 12:09:42 +02:00
parent 7b7c95dac9
commit fa99afe35e
9 changed files with 195 additions and 150 deletions

View File

@ -28,6 +28,7 @@
#include "nvim/cursor.h" #include "nvim/cursor.h"
#include "nvim/decoration.h" #include "nvim/decoration.h"
#include "nvim/drawscreen.h" #include "nvim/drawscreen.h"
#include "nvim/edit.h"
#include "nvim/errors.h" #include "nvim/errors.h"
#include "nvim/eval.h" #include "nvim/eval.h"
#include "nvim/eval/typval.h" #include "nvim/eval/typval.h"

View File

@ -756,10 +756,8 @@ void ins_char_bytes(char *buf, size_t charlen)
// put back when BS is used. The bytes of a multi-byte character are // put back when BS is used. The bytes of a multi-byte character are
// done the other way around, so that the first byte is popped off // done the other way around, so that the first byte is popped off
// first (it tells the byte length of the character). // first (it tells the byte length of the character).
replace_push(NUL); replace_push_nul();
for (size_t i = 0; i < oldlen; i++) { replace_push(oldp + col, oldlen);
i += (size_t)replace_push_mb(oldp + col + i) - 1;
}
} }
char *newp = xmalloc(linelen + newlen - oldlen); char *newp = xmalloc(linelen + newlen - oldlen);
@ -1137,12 +1135,10 @@ bool open_line(int dir, int flags, int second_line_indent, bool *did_do_comment)
// on the line onto the replace stack. We'll push any other characters // on the line onto the replace stack. We'll push any other characters
// that might be replaced at the start of the next line (due to // that might be replaced at the start of the next line (due to
// autoindent etc) a bit later. // autoindent etc) a bit later.
replace_push(NUL); // Call twice because BS over NL expects it replace_push_nul(); // Call twice because BS over NL expects it
replace_push(NUL); replace_push_nul();
p = saved_line + curwin->w_cursor.col; p = saved_line + curwin->w_cursor.col;
while (*p != NUL) { replace_push(p, strlen(p));
p += replace_push_mb(p);
}
saved_line[curwin->w_cursor.col] = NUL; saved_line[curwin->w_cursor.col] = NUL;
} }
@ -1691,13 +1687,13 @@ bool open_line(int dir, int flags, int second_line_indent, bool *did_do_comment)
// stack, preceded by a NUL, so they can be put back when a BS is // stack, preceded by a NUL, so they can be put back when a BS is
// entered. // entered.
if (REPLACE_NORMAL(State)) { if (REPLACE_NORMAL(State)) {
replace_push(NUL); // end of extra blanks replace_push_nul(); // end of extra blanks
} }
if (curbuf->b_p_ai || (flags & OPENLINE_DELSPACES)) { if (curbuf->b_p_ai || (flags & OPENLINE_DELSPACES)) {
while ((*p_extra == ' ' || *p_extra == '\t') while ((*p_extra == ' ' || *p_extra == '\t')
&& !utf_iscomposing_first(utf_ptr2char(p_extra + 1))) { && !utf_iscomposing_first(utf_ptr2char(p_extra + 1))) {
if (REPLACE_NORMAL(State)) { if (REPLACE_NORMAL(State)) {
replace_push(*p_extra); replace_push(p_extra, 1); // always ascii, len = 1
} }
p_extra++; p_extra++;
less_cols_off++; less_cols_off++;
@ -1794,7 +1790,7 @@ bool open_line(int dir, int flags, int second_line_indent, bool *did_do_comment)
// must be a NUL on the replace stack, for when it is deleted with BS // must be a NUL on the replace stack, for when it is deleted with BS
if (REPLACE_NORMAL(State)) { if (REPLACE_NORMAL(State)) {
for (colnr_T n = 0; n < curwin->w_cursor.col; n++) { for (colnr_T n = 0; n < curwin->w_cursor.col; n++) {
replace_push(NUL); replace_push_nul();
} }
} }
newcol += curwin->w_cursor.col; newcol += curwin->w_cursor.col;
@ -1808,7 +1804,7 @@ bool open_line(int dir, int flags, int second_line_indent, bool *did_do_comment)
// must be a NUL on the replace stack, for when it is deleted with BS. // must be a NUL on the replace stack, for when it is deleted with BS.
if (REPLACE_NORMAL(State)) { if (REPLACE_NORMAL(State)) {
while (lead_len-- > 0) { while (lead_len-- > 0) {
replace_push(NUL); replace_push_nul();
} }
} }

View File

@ -136,6 +136,8 @@ static TriState dont_sync_undo = kFalse; // CTRL-G U prevents syncing undo
static linenr_T o_lnum = 0; static linenr_T o_lnum = 0;
static kvec_t(char) replace_stack = KV_INITIAL_VALUE;
static void insert_enter(InsertState *s) static void insert_enter(InsertState *s)
{ {
s->did_backspace = true; s->did_backspace = true;
@ -1618,9 +1620,8 @@ void undisplay_dollar(void)
/// type == INDENT_SET set indent to "amount" /// type == INDENT_SET set indent to "amount"
/// ///
/// @param round if true, round the indent to 'shiftwidth' (only with _INC and _Dec). /// @param round if true, round the indent to 'shiftwidth' (only with _INC and _Dec).
/// @param replaced replaced character, put on replace stack
/// @param call_changed_bytes call changed_bytes() /// @param call_changed_bytes call changed_bytes()
void change_indent(int type, int amount, int round, int replaced, bool call_changed_bytes) void change_indent(int type, int amount, int round, bool call_changed_bytes)
{ {
int insstart_less; // reduction for Insstart.col int insstart_less; // reduction for Insstart.col
colnr_T orig_col = 0; // init for GCC colnr_T orig_col = 0; // init for GCC
@ -1767,12 +1768,8 @@ void change_indent(int type, int amount, int round, int replaced, bool call_chan
replace_join(0); // remove a NUL from the replace stack replace_join(0); // remove a NUL from the replace stack
start_col--; start_col--;
} }
while (start_col < (int)curwin->w_cursor.col || replaced) { while (start_col < (int)curwin->w_cursor.col) {
replace_push(NUL); replace_push_nul();
if (replaced) {
replace_push(replaced);
replaced = NUL;
}
start_col++; start_col++;
} }
} }
@ -2325,7 +2322,7 @@ int stop_arrow(void)
static void stop_insert(pos_T *end_insert_pos, int esc, int nomove) static void stop_insert(pos_T *end_insert_pos, int esc, int nomove)
{ {
stop_redo_ins(); stop_redo_ins();
replace_flush(); // abandon replace stack kv_destroy(replace_stack); // abandon replace stack (reinitializes)
// Save the inserted text for later redo with ^@ and CTRL-A. // Save the inserted text for later redo with ^@ and CTRL-A.
// Don't do it when "restart_edit" was set and nothing was inserted, // Don't do it when "restart_edit" was set and nothing was inserted,
@ -2802,57 +2799,51 @@ static bool echeck_abbr(int c)
// that the NL replaced. The extra one stores the characters after the cursor // that the NL replaced. The extra one stores the characters after the cursor
// that were deleted (always white space). // that were deleted (always white space).
static uint8_t *replace_stack = NULL;
static ssize_t replace_stack_nr = 0; // next entry in replace stack
static ssize_t replace_stack_len = 0; // max. number of entries
/// Push character that is replaced onto the replace stack. /// Push character that is replaced onto the replace stack.
/// ///
/// replace_offset is normally 0, in which case replace_push will add a new /// replace_offset is normally 0, in which case replace_push will add a new
/// character at the end of the stack. If replace_offset is not 0, that many /// character at the end of the stack. If replace_offset is not 0, that many
/// characters will be left on the stack above the newly inserted character. /// characters will be left on the stack above the newly inserted character.
/// ///
/// @param c character that is replaced (NUL is none) /// @param str character that is replaced (NUL is none)
void replace_push(int c) /// @param len length of character in bytes
void replace_push(char *str, size_t len)
{ {
if (replace_stack_nr < replace_offset) { // nothing to do // TODO(bfredl): replace_offset is suss af, if we don't need it, this
// function is just kv_concat() :p
if (kv_size(replace_stack) < (size_t)replace_offset) { // nothing to do
return; return;
} }
if (replace_stack_len <= replace_stack_nr) { kv_ensure_space(replace_stack, len);
replace_stack_len += 50;
replace_stack = xrealloc(replace_stack, (size_t)replace_stack_len); char *p = replace_stack.items + kv_size(replace_stack) - replace_offset;
}
uint8_t *p = replace_stack + replace_stack_nr - replace_offset;
if (replace_offset) { if (replace_offset) {
memmove(p + 1, p, (size_t)replace_offset); memmove(p + len, p, (size_t)replace_offset);
} }
*p = (uint8_t)c; memcpy(p, str, len);
replace_stack_nr++; kv_size(replace_stack) += len;
} }
/// Push a character onto the replace stack. Handles a multi-byte character in /// push NUL as separator between entries in the stack
/// reverse byte order, so that the first byte is popped off first. void replace_push_nul(void)
///
/// @return the number of bytes done (includes composing characters).
int replace_push_mb(char *p)
{ {
int l = utfc_ptr2len(p); replace_push("", 1);
// TODO(bfredl): stop doing this insantity and instead use utf_head_off() when popping.
// or just keep a secondary array with char byte lenghts
for (int j = l - 1; j >= 0; j--) {
replace_push(p[j]);
}
return l;
} }
/// Pop one item from the replace stack. /// Check top of replace stack, pop it if it was NUL
/// ///
/// @return -1 if stack is empty, replaced character or NUL otherwise /// when a non-NUL byte is found, use mb_replace_pop_ins() to
static int replace_pop(void) /// pop one complete multibyte character.
///
/// @return -1 if stack is empty, last byte of char or NUL otherwise
static int replace_pop_if_nul(void)
{ {
return (replace_stack_nr == 0) ? -1 : (int)replace_stack[--replace_stack_nr]; int ch = (kv_size(replace_stack)) ? (uint8_t)kv_A(replace_stack, kv_size(replace_stack) - 1) : -1;
if (ch == NUL) {
kv_size(replace_stack)--;
}
return ch;
} }
/// Join the top two items on the replace stack. This removes to "off"'th NUL /// Join the top two items on the replace stack. This removes to "off"'th NUL
@ -2861,11 +2852,11 @@ static int replace_pop(void)
/// @param off offset for which NUL to remove /// @param off offset for which NUL to remove
static void replace_join(int off) static void replace_join(int off)
{ {
for (ssize_t i = replace_stack_nr; --i >= 0;) { for (ssize_t i = (ssize_t)kv_size(replace_stack); --i >= 0;) {
if (replace_stack[i] == NUL && off-- <= 0) { if (kv_A(replace_stack, i) == NUL && off-- <= 0) {
replace_stack_nr--; kv_size(replace_stack)--;
memmove(replace_stack + i, replace_stack + i + 1, memmove(&kv_A(replace_stack, i), &kv_A(replace_stack, i + 1),
(size_t)(replace_stack_nr - i)); (kv_size(replace_stack) - (size_t)i));
return; return;
} }
} }
@ -2875,72 +2866,25 @@ static void replace_join(int off)
/// before the cursor. Can only be used in MODE_REPLACE or MODE_VREPLACE state. /// before the cursor. Can only be used in MODE_REPLACE or MODE_VREPLACE state.
static void replace_pop_ins(void) static void replace_pop_ins(void)
{ {
int cc;
int oldState = State; int oldState = State;
State = MODE_NORMAL; // don't want MODE_REPLACE here State = MODE_NORMAL; // don't want MODE_REPLACE here
while ((cc = replace_pop()) > 0) { while ((replace_pop_if_nul()) > 0) {
mb_replace_pop_ins(cc); mb_replace_pop_ins();
dec_cursor(); dec_cursor();
} }
State = oldState; State = oldState;
} }
// Insert bytes popped from the replace stack. "cc" is the first byte. If it /// Insert multibyte char popped from the replace stack.
// indicates a multi-byte char, pop the other bytes too. ///
static void mb_replace_pop_ins(int cc) /// caller must already have checked the top of the stack is not NUL!!
static void mb_replace_pop_ins(void)
{ {
int n; int len = utf_head_off(&kv_A(replace_stack, 0),
uint8_t buf[MB_MAXBYTES + 1]; &kv_A(replace_stack, kv_size(replace_stack) - 1)) + 1;
kv_size(replace_stack) -= (size_t)len;
if ((n = MB_BYTE2LEN(cc)) > 1) { ins_bytes_len(&kv_A(replace_stack, kv_size(replace_stack)), (size_t)len);
buf[0] = (uint8_t)cc;
for (int i = 1; i < n; i++) {
buf[i] = (uint8_t)replace_pop();
}
ins_bytes_len((char *)buf, (size_t)n);
} else {
ins_char(cc);
}
// Handle composing chars.
while (true) {
int c = replace_pop();
if (c == -1) { // stack empty
break;
}
if ((n = MB_BYTE2LEN(c)) == 1) {
// Not a multi-byte char, put it back.
replace_push(c);
break;
}
buf[0] = (uint8_t)c;
assert(n > 1);
for (int i = 1; i < n; i++) {
buf[i] = (uint8_t)replace_pop();
}
// TODO(bfredl): by fixing replace_push_mb, upgrade to use
// the new composing algorithm
if (utf_iscomposing_legacy(utf_ptr2char((char *)buf))) {
ins_bytes_len((char *)buf, (size_t)n);
} else {
// Not a composing char, put it back.
for (int i = n - 1; i >= 0; i--) {
replace_push(buf[i]);
}
break;
}
}
}
// make the replace stack empty
// (called when exiting replace mode)
static void replace_flush(void)
{
XFREE_CLEAR(replace_stack);
replace_stack_len = 0;
replace_stack_nr = 0;
} }
// Handle doing a BS for one character. // Handle doing a BS for one character.
@ -2955,7 +2899,7 @@ static void replace_do_bs(int limit_col)
colnr_T start_vcol; colnr_T start_vcol;
const int l_State = State; const int l_State = State;
int cc = replace_pop(); int cc = replace_pop_if_nul();
if (cc > 0) { if (cc > 0) {
int orig_len = 0; int orig_len = 0;
int orig_vcols = 0; int orig_vcols = 0;
@ -2969,7 +2913,6 @@ static void replace_do_bs(int limit_col)
if (l_State & VREPLACE_FLAG) { if (l_State & VREPLACE_FLAG) {
orig_len = get_cursor_pos_len(); orig_len = get_cursor_pos_len();
} }
replace_push(cc);
replace_pop_ins(); replace_pop_ins();
if (l_State & VREPLACE_FLAG) { if (l_State & VREPLACE_FLAG) {
@ -3628,9 +3571,9 @@ static void ins_shift(int c, int lastc)
if (lastc == '^') { if (lastc == '^') {
old_indent = get_indent(); // remember curr. indent old_indent = get_indent(); // remember curr. indent
} }
change_indent(INDENT_SET, 0, true, 0, true); change_indent(INDENT_SET, 0, true, true);
} else { } else {
change_indent(c == Ctrl_D ? INDENT_DEC : INDENT_INC, 0, true, 0, true); change_indent(c == Ctrl_D ? INDENT_DEC : INDENT_INC, 0, true, true);
} }
if (did_ai && *skipwhite(get_cursor_line_ptr()) != NUL) { if (did_ai && *skipwhite(get_cursor_line_ptr()) != NUL) {
@ -3749,7 +3692,7 @@ static bool ins_bs(int c, int mode, int *inserted_space_p)
// cc >= 0: NL was replaced, put original characters back // cc >= 0: NL was replaced, put original characters back
cc = -1; cc = -1;
if (State & REPLACE_FLAG) { if (State & REPLACE_FLAG) {
cc = replace_pop(); // returns -1 if NL was inserted cc = replace_pop_if_nul(); // returns -1 if NL was inserted
} }
// In replace mode, in the line we started replacing, we only move the // In replace mode, in the line we started replacing, we only move the
// cursor. // cursor.
@ -3795,9 +3738,9 @@ static bool ins_bs(int c, int mode, int *inserted_space_p)
// restore characters (blanks) deleted after cursor // restore characters (blanks) deleted after cursor
while (cc > 0) { while (cc > 0) {
colnr_T save_col = curwin->w_cursor.col; colnr_T save_col = curwin->w_cursor.col;
mb_replace_pop_ins(cc); mb_replace_pop_ins();
curwin->w_cursor.col = save_col; curwin->w_cursor.col = save_col;
cc = replace_pop(); cc = replace_pop_if_nul();
} }
// restore the characters that NL replaced // restore the characters that NL replaced
replace_pop_ins(); replace_pop_ins();
@ -3906,7 +3849,7 @@ static bool ins_bs(int c, int mode, int *inserted_space_p)
} else { } else {
ins_str(" "); ins_str(" ");
if ((State & REPLACE_FLAG)) { if ((State & REPLACE_FLAG)) {
replace_push(NUL); replace_push_nul();
} }
} }
} }
@ -4316,7 +4259,7 @@ static bool ins_tab(void)
} else { } else {
ins_str(" "); ins_str(" ");
if (State & REPLACE_FLAG) { // no char replaced if (State & REPLACE_FLAG) { // no char replaced
replace_push(NUL); replace_push_nul();
} }
} }
} }
@ -4483,7 +4426,7 @@ bool ins_eol(int c)
// character under the cursor. Only push a NUL on the replace stack, // character under the cursor. Only push a NUL on the replace stack,
// nothing to put back when the NL is deleted. // nothing to put back when the NL is deleted.
if ((State & REPLACE_FLAG) && !(State & VREPLACE_FLAG)) { if ((State & REPLACE_FLAG) && !(State & VREPLACE_FLAG)) {
replace_push(NUL); replace_push_nul();
} }
// In MODE_VREPLACE state, a NL replaces the rest of the line, and starts // In MODE_VREPLACE state, a NL replaces the rest of the line, and starts
@ -4684,7 +4627,7 @@ static void ins_try_si(int c)
i = get_indent(); i = get_indent();
curwin->w_cursor = old_pos; curwin->w_cursor = old_pos;
if (State & VREPLACE_FLAG) { if (State & VREPLACE_FLAG) {
change_indent(INDENT_SET, i, false, NUL, true); change_indent(INDENT_SET, i, false, true);
} else { } else {
set_indent(i, SIN_CHANGED); set_indent(i, SIN_CHANGED);
} }

View File

@ -1407,7 +1407,7 @@ void fixthisline(IndentGetter get_the_indent)
return; return;
} }
change_indent(INDENT_SET, amount, false, 0, true); change_indent(INDENT_SET, amount, false, true);
if (linewhite(curwin->w_cursor.lnum)) { if (linewhite(curwin->w_cursor.lnum)) {
did_ai = true; // delete the indent if the line stays empty did_ai = true; // delete the indent if the line stays empty
} }

View File

@ -523,12 +523,14 @@ int utf_ptr2cells(const char *p_in)
} }
/// Convert a UTF-8 byte sequence to a character number. /// Convert a UTF-8 byte sequence to a character number.
/// Doesn't handle ascii! only multibyte and illegal sequences. /// Doesn't handle ascii! only multibyte and illegal sequences. ASCII (including NUL)
/// are treated like illegal sequences.
/// ///
/// @param[in] p String to convert. /// @param[in] p String to convert.
/// @param[in] len Length of the character in bytes, 0 or 1 if illegal. /// @param[in] len Length of the character in bytes, 0 or 1 if illegal.
/// ///
/// @return Unicode codepoint. A negative value when the sequence is illegal. /// @return Unicode codepoint. A negative value when the sequence is illegal (or
/// ASCII, including NUL).
int32_t utf_ptr2CharInfo_impl(uint8_t const *p, uintptr_t const len) int32_t utf_ptr2CharInfo_impl(uint8_t const *p, uintptr_t const len)
FUNC_ATTR_PURE FUNC_ATTR_NONNULL_ALL FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_PURE FUNC_ATTR_NONNULL_ALL FUNC_ATTR_WARN_UNUSED_RESULT
{ {
@ -1780,15 +1782,15 @@ int utf_head_off(const char *base_in, const char *p_in)
start--; start--;
} }
uint8_t cur_len = utf8len_tab[*start]; const uint8_t last_len = utf8len_tab[*start];
int32_t cur_code = utf_ptr2CharInfo_impl(start, (uintptr_t)cur_len); int32_t cur_code = utf_ptr2CharInfo_impl(start, (uintptr_t)last_len);
if (cur_code < 0) { if (cur_code < 0 || p - start >= last_len) {
return 0; // p must be part of an illegal sequence return 0; // p must be part of an illegal sequence
} }
const uint8_t * const safe_end = start + cur_len; const uint8_t * const safe_end = start + last_len;
int cur_bc = utf8proc_get_property(cur_code)->boundclass; int cur_bc = utf8proc_get_property(cur_code)->boundclass;
if (always_break(cur_bc)) { if (always_break(cur_bc) || start == base) {
return (int)(p - start); return (int)(p - start);
} }
@ -1796,18 +1798,23 @@ int utf_head_off(const char *base_in, const char *p_in)
const uint8_t *cur_pos = start; const uint8_t *cur_pos = start;
const uint8_t *const p_start = start; const uint8_t *const p_start = start;
if (start == base) { while (true) {
return (int)(p - start); if (start[-1] == NUL) {
} break;
}
start--;
if (*start < 0x80) { // stop on ascii, we are done
break;
}
start--;
while (*start >= 0x80) { // stop on ascii, we are done
while (start > base && (*start & 0xc0) == 0x80 && (cur_pos - start) < 6) { while (start > base && (*start & 0xc0) == 0x80 && (cur_pos - start) < 6) {
start--; start--;
} }
int32_t prev_code = utf_ptr2CharInfo_impl(start, (uintptr_t)utf8len_tab[*start]); int prev_len = utf8len_tab[*start];
if (prev_code < 0) { int32_t prev_code = utf_ptr2CharInfo_impl(start, (uintptr_t)prev_len);
if (prev_code < 0 || prev_len < cur_pos - start) {
start = cur_pos; // start at valid sequence after invalid bytes start = cur_pos; // start at valid sequence after invalid bytes
break; break;
} }
@ -1822,12 +1829,10 @@ int utf_head_off(const char *base_in, const char *p_in)
cur_pos = start; cur_pos = start;
cur_bc = prev_bc; cur_bc = prev_bc;
cur_code = prev_code; cur_code = prev_code;
start--;
} }
// hot path: we are already on the first codepoint of a sequence // hot path: we are already on the first codepoint of a sequence
if (start == p_start) { if (start == p_start && last_len > p - start) {
return (int)(p - start); return (int)(p - start);
} }

View File

@ -306,7 +306,7 @@ void shift_line(bool left, bool round, int amount, int call_changed_bytes)
// Set new indent // Set new indent
if (State & VREPLACE_FLAG) { if (State & VREPLACE_FLAG) {
change_indent(INDENT_SET, count, false, NUL, call_changed_bytes); change_indent(INDENT_SET, count, false, call_changed_bytes);
} else { } else {
set_indent(count, call_changed_bytes ? SIN_CHANGED : 0); set_indent(count, call_changed_bytes ? SIN_CHANGED : 0);
} }

View File

@ -400,7 +400,7 @@ void internal_format(int textwidth, int second_indent, int flags, bool format_on
} }
if (second_indent >= 0) { if (second_indent >= 0) {
if (State & VREPLACE_FLAG) { if (State & VREPLACE_FLAG) {
change_indent(INDENT_SET, second_indent, false, NUL, true); change_indent(INDENT_SET, second_indent, false, true);
} else if (leader_len > 0 && second_indent - leader_len > 0) { } else if (leader_len > 0 && second_indent - leader_len > 0) {
int padding = second_indent - leader_len; int padding = second_indent - leader_len;

View File

@ -351,4 +351,97 @@ describe('insert-mode', function()
eq(2, api.nvim_win_get_cursor(0)[1]) eq(2, api.nvim_win_get_cursor(0)[1])
end) end)
end) end)
it('backspace after replacing multibyte chars', function()
local screen = Screen.new(30, 3)
screen:attach()
api.nvim_buf_set_lines(0, 0, -1, true, { 'test ȧ̟̜̝̅̚m̆̉̐̐̇̈ å' })
feed('^Rabcdefghi')
screen:expect([[
abcdefghi^ |
{1:~ }|
{5:-- REPLACE --} |
]])
feed('<bs>')
screen:expect([[
abcdefgh^å |
{1:~ }|
{5:-- REPLACE --} |
]])
feed('<bs>')
screen:expect([[
abcdefg^ å |
{1:~ }|
{5:-- REPLACE --} |
]])
feed('<bs>')
screen:expect([[
abcdef^m̆̉̐̐̇̈ å |
{1:~ }|
{5:-- REPLACE --} |
]])
feed('<bs>')
screen:expect([[
abcde^ȧ̟̜̝̅̚m̆̉̐̐̇̈ å |
{1:~ }|
{5:-- REPLACE --} |
]])
feed('<bs>')
screen:expect([[
abcd^ ȧ̟̜̝̅̚m̆̉̐̐̇̈ å |
{1:~ }|
{5:-- REPLACE --} |
]])
feed('<esc>')
api.nvim_buf_set_lines(0, 0, -1, true, { 'wow 🧑🌾🏳x' })
feed('^Rabcd')
screen:expect([[
abcd^🧑🌾🏳x |
{1:~ }|
{5:-- REPLACE --} |
]])
feed('e')
screen:expect([[
abcde^🏳x |
{1:~ }|
{5:-- REPLACE --} |
]])
feed('f')
screen:expect([[
abcdef^x |
{1:~ }|
{5:-- REPLACE --} |
]])
feed('<bs>')
screen:expect([[
abcde^🏳x |
{1:~ }|
{5:-- REPLACE --} |
]])
feed('<bs>')
screen:expect([[
abcd^🧑🌾🏳x |
{1:~ }|
{5:-- REPLACE --} |
]])
feed('<bs>')
screen:expect([[
abc^ 🧑🌾🏳x |
{1:~ }|
{5:-- REPLACE --} |
]])
end)
end) end)

View File

@ -4,7 +4,6 @@ local itp = t.gen_itp(it)
local ffi = t.ffi local ffi = t.ffi
local eq = t.eq local eq = t.eq
local to_cstr = t.to_cstr local to_cstr = t.to_cstr
local ok = t.ok
local lib = t.cimport( local lib = t.cimport(
'./src/nvim/mbyte.h', './src/nvim/mbyte.h',
@ -302,7 +301,10 @@ describe('mbyte', function()
local mb_glyphs = {} local mb_glyphs = {}
while pos < len do while pos < len do
local clen = lib.utfc_ptr2len(cstr + pos) local clen = lib.utfc_ptr2len(cstr + pos)
ok(clen > 0) -- otherwise we get stuck if clen == 0 then
eq(0, string.byte(str, pos + 1)) -- only NUL bytes can has length zery
clen = 1 -- but skip it, otherwise we get stuck
end
if clen > 1 then if clen > 1 then
table.insert(mb_glyphs, string.sub(str, pos + 1, pos + clen)) table.insert(mb_glyphs, string.sub(str, pos + 1, pos + clen))
end end
@ -325,13 +327,18 @@ describe('mbyte', function()
-- stylua doesn't like ZWJ chars.. -- stylua doesn't like ZWJ chars..
-- stylua: ignore start -- stylua: ignore start
check('hej och hå 🧑‍🌾!', { 'å', '🧑‍🌾' }) check('hej och hå 🧑‍🌾!', { 'å', '🧑‍🌾' })
-- emoji only (various kinds of combinations, use g8 to see them)
-- emoji (various kinds of combinations, use g8 to see them)
check("🏳️‍⚧️🧑‍🌾❤️😂🏴‍☠️", {"🏳️‍⚧️", "🧑‍🌾", "❤️", "😂", "🏴‍☠️"}) check("🏳️‍⚧️🧑‍🌾❤️😂🏴‍☠️", {"🏳️‍⚧️", "🧑‍🌾", "❤️", "😂", "🏴‍☠️"})
check('🏳xy🧑🌾\r❤️😂å🏴‍☠️€', { '🏳️‍⚧️', '🧑‍🌾', '❤️', '😂', 'å', '🏴‍☠️', '€' }) check('🏳xy🧑🌾\r❤️😂å🏴‍☠️€', { '🏳️‍⚧️', '🧑‍🌾', '❤️', '😂', 'å', '🏴‍☠️', '€' })
check('🏳️‍⚧️\000🧑‍🌾\000❤️\000😂\000å\000🏴‍☠️\000€', { '🏳️‍⚧️', '🧑‍🌾', '❤️', '😂', 'å', '🏴‍☠️', '€' })
check('\195🏳️‍⚧️\198🧑‍🌾\165❤️\168\195😂\255🏴‍☠️\129€\165', { '🏳️‍⚧️', '🧑‍🌾', '❤️', '😂', '🏴‍☠️', '€' })
check('🇦🅱️ 🇦🇽 🇦🇨🇦 🇲🇽🇹🇱',{'🇦', '🅱️', '🇦🇽', '🇦🇨', '🇦', '🇲🇽', '🇹🇱'}) check('🇦🅱️ 🇦🇽 🇦🇨🇦 🇲🇽🇹🇱',{'🇦', '🅱️', '🇦🇽', '🇦🇨', '🇦', '🇲🇽', '🇹🇱'})
check('🏴󠁧󠁢󠁳󠁣󠁴󠁿🏴󠁧󠁢󠁷󠁬󠁳󠁿', {'🏴󠁧󠁢󠁳󠁣󠁴󠁿', '🏴󠁧󠁢󠁷󠁬󠁳󠁿'}) check('🏴󠁧󠁢󠁳󠁣󠁴󠁿🏴󠁧󠁢󠁷󠁬󠁳󠁿', {'🏴󠁧󠁢󠁳󠁣󠁴󠁿', '🏴󠁧󠁢󠁷󠁬󠁳󠁿'})
check('å\165ü\195aëq\168β\000\169\255', {'å', 'ü', 'ë', 'β', ''})
lib.p_arshape = true -- default lib.p_arshape = true -- default
check('سلام', { 'س', 'لا', 'م' }) check('سلام', { 'س', 'لا', 'م' })
lib.p_arshape = false lib.p_arshape = false