fix(lsp): correctly align start and end range to codepoints during incremental sync (#16670)

Closes #16624

Fixes two issues with aligning the start position and end position to
codepoints when calculating the start and end range.

When aligning the start position:
* use aligned byte index to calculate character index rather than 
  the unadjusted byte

When aligning the end position:
* do not adjust the end byte if it falls on a UTF-8 codepoint
* align byte to the first byte of the next codepoint rather than the
  last byte of the current codepoint
* compute character character end range on the aligned byte index

This commit also adds additional test coverage, including multibyte operations
that previously failed before this commit.
This commit is contained in:
github-actions[bot] 2021-12-21 09:24:50 -08:00 committed by GitHub
parent 7f4fa077cd
commit 222d1414dc
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 277 additions and 7 deletions

View File

@ -105,15 +105,16 @@ local function align_end_position(line, byte, offset_encoding)
char = compute_line_length(line, offset_encoding) + 1
else
-- Modifying line, find the nearest utf codepoint
local offset = str_utf_end(line, byte)
local offset = str_utf_start(line, byte)
-- If the byte does not fall on the start of the character, then
-- align to the start of the next character.
if offset > 0 then
char = byte_to_utf(line, byte, offset_encoding) + 1
byte = byte + offset
else
if offset < 0 then
byte = byte + str_utf_end(line, byte) + 1
end
if byte <= #line then
char = byte_to_utf(line, byte, offset_encoding)
byte = byte + offset
else
char = compute_line_length(line, offset_encoding) + 1
end
-- Extending line, find the nearest utf codepoint for the last valid character
end
@ -167,7 +168,7 @@ local function compute_start_range(prev_lines, curr_lines, firstline, lastline,
char_idx = compute_line_length(prev_line, offset_encoding) + 1
else
byte_idx = start_byte_idx + str_utf_start(prev_line, start_byte_idx)
char_idx = byte_to_utf(prev_line, start_byte_idx, offset_encoding)
char_idx = byte_to_utf(prev_line, byte_idx, offset_encoding)
end
-- Return the start difference (shared for new and prev lines)

View File

@ -164,6 +164,201 @@ describe('incremental synchronization', function()
}
test_edit({"a"}, {"rb"}, expected_text_changes, 'utf-16', '\n')
end)
it('deleting a line', function()
local expected_text_changes = {
{
range = {
['start'] = {
character = 0,
line = 0
},
['end'] = {
character = 0,
line = 1
}
},
rangeLength = 12,
text = ''
}
}
test_edit({"hello world"}, {"dd"}, expected_text_changes, 'utf-16', '\n')
end)
it('deleting an empty line', function()
local expected_text_changes = {
{
range = {
['start'] = {
character = 0,
line = 1
},
['end'] = {
character = 0,
line = 2
}
},
rangeLength = 1,
text = ''
}
}
test_edit({"hello world", ""}, {"jdd"}, expected_text_changes, 'utf-16', '\n')
end)
it('adding a line', function()
local expected_text_changes = {
{
range = {
['start'] = {
character = 0,
line = 1
},
['end'] = {
character = 0,
line = 1
}
},
rangeLength = 0,
text = 'hello world\n'
}
}
test_edit({"hello world"}, {"yyp"}, expected_text_changes, 'utf-16', '\n')
end)
it('adding an empty line', function()
local expected_text_changes = {
{
range = {
['start'] = {
character = 0,
line = 1
},
['end'] = {
character = 0,
line = 1
}
},
rangeLength = 0,
text = '\n'
}
}
test_edit({"hello world"}, {"o"}, expected_text_changes, 'utf-16', '\n')
end)
end)
describe('multi line edit', function()
it('deletion and insertion', function()
local expected_text_changes = {
-- delete "_fsda" from end of line 1
{
range = {
['start'] = {
character = 4,
line = 1
},
['end'] = {
character = 9,
line = 1
}
},
rangeLength = 5,
text = ''
},
-- delete "hello world\n" from line 2
{
range = {
['start'] = {
character = 0,
line = 2
},
['end'] = {
character = 0,
line = 3
}
},
rangeLength = 12,
text = ''
},
-- delete "1234" from beginning of line 2
{
range = {
['start'] = {
character = 0,
line = 2
},
['end'] = {
character = 4,
line = 2
}
},
rangeLength = 4,
text = ''
},
-- add " asdf" to end of line 1
{
range = {
['start'] = {
character = 4,
line = 1
},
['end'] = {
character = 4,
line = 1
}
},
rangeLength = 0,
text = ' asdf'
},
-- delete " asdf\n" from line 2
{
range = {
['start'] = {
character = 0,
line = 2
},
['end'] = {
character = 0,
line = 3
}
},
rangeLength = 6,
text = ''
},
-- undo entire deletion
{
range = {
['start'] = {
character = 4,
line = 1
},
['end'] = {
character = 9,
line = 1
}
},
rangeLength = 5,
text = "_fdsa\nhello world\n1234 asdf"
},
-- redo entire deletion
{
range = {
['start'] = {
character = 4,
line = 1
},
['end'] = {
character = 9,
line = 3
}
},
rangeLength = 27,
text = ' asdf'
},
}
local original_lines = {
"\\begin{document}",
"test_fdsa",
"hello world",
"1234 asdf",
"\\end{document}"
}
test_edit(original_lines, {"jf_vejjbhhdu<C-R>"}, expected_text_changes, 'utf-16', '\n')
end)
end)
describe('multi-operation edits', function()
@ -297,6 +492,80 @@ describe('incremental synchronization', function()
}
test_edit({"🔥"}, {"x"}, expected_text_changes, 'utf-16', '\n')
end)
it('replacing a multibyte character with matching prefix', function()
local expected_text_changes = {
{
range = {
['start'] = {
character = 0,
line = 1
},
['end'] = {
character = 1,
line = 1
}
},
rangeLength = 1,
text = ''
}
}
-- ⟨ is e29fa8, ⟩ is e29fa9
local original_lines = {
"\\begin{document}",
"",
"\\end{document}",
}
test_edit(original_lines, {"jr⟩"}, expected_text_changes, 'utf-16', '\n')
end)
it('replacing a multibyte character with matching suffix', function()
local expected_text_changes = {
{
range = {
['start'] = {
character = 0,
line = 1
},
['end'] = {
character = 1,
line = 1
}
},
rangeLength = 1,
text = ''
}
}
-- ฟ is e0b89f, ḟ is e1b89f
local original_lines = {
"\\begin{document}",
"",
"\\end{document}",
}
test_edit(original_lines, {"jrḟ"}, expected_text_changes, 'utf-16', '\n')
end)
it('inserting before a multibyte character', function()
local expected_text_changes = {
{
range = {
['start'] = {
character = 0,
line = 1
},
['end'] = {
character = 0,
line = 1
}
},
rangeLength = 0,
text = ' '
}
}
local original_lines = {
"\\begin{document}",
"",
"\\end{document}",
}
test_edit(original_lines, {"ji "}, expected_text_changes, 'utf-16', '\n')
end)
it('deleting a multibyte character from a long line', function()
local expected_text_changes = {
{