mirror of
https://github.com/neovim/neovim.git
synced 2024-12-19 18:55:14 -07:00
eec529cf9e
14:13:04,119 INFO - # ./test/unit/helpers.lua @ 760: mbyte utf_char2bytes for chars 0xa000 - 0xafff 14:13:06,307 WARN - E908: using an invalid value as a String /usr/home/quickbuild/buildagent/workspace/root/neovim/pull-requests-automated/.deps/usr/bin/luajit: ./test/unit/helpers.lua:459: write() error: 32: Broken pipe 14:13:06,308 WARN - stack traceback: 14:13:06,308 WARN - [C]: in function 'throw' 14:13:06,308 WARN - ...quests-automated/.deps/usr/share/lua/5.1/busted/core.lua:149: in function 'error' 14:13:06,308 WARN - ...ts-automated/.deps/usr/share/lua/5.1/luassert/assert.lua:171: in function 'assert' 14:13:06,308 WARN - ./test/unit/helpers.lua:459: in function 'write' 14:13:06,308 WARN - ./test/unit/helpers.lua:626: in function 'hook' 14:13:06,308 WARN - ./test/unit/helpers.lua:574: in function <./test/unit/helpers.lua:557> 14:13:06,308 WARN - [C]: in function 'type' 14:13:06,308 WARN - ...d/.deps/usr/share/lua/5.1/busted/outputHandlers/base.lua:57: in function 'copyElement' 14:13:06,308 WARN - ...d/.deps/usr/share/lua/5.1/busted/outputHandlers/base.lua:66: in function 'format' 14:13:06,308 WARN - ...d/.deps/usr/share/lua/5.1/busted/outputHandlers/base.lua:172: in function 'fn' 14:13:06,308 WARN - ...-requests-automated/.deps/usr/share/lua/5.1/mediator.lua:103: in function 'publish' 14:13:06,308 WARN - ...quests-automated/.deps/usr/share/lua/5.1/busted/core.lua:201: in function 'safe' 14:13:06,308 WARN - ...quests-automated/.deps/usr/share/lua/5.1/busted/core.lua:312: in function 'execute' 14:13:06,308 WARN - ...sts-automated/.deps/usr/share/lua/5.1/busted/execute.lua:58: in function 'execute' 14:13:06,308 WARN - ...ests-automated/.deps/usr/share/lua/5.1/busted/runner.lua:197: in function <...ests-automated/.deps/usr/share/lua/5.1/busted/runner.lua:11> 14:13:06,308 WARN - ./.deps/usr/lib/luarocks/rocks/busted/2.0.0-1/bin/busted:3: in main chunk 14:13:06,308 WARN - [C]: at 0x004041a0 14:13:06,323 WARN - Terminated 14:13:06,325 INFO - Executing post-execute action... 14:13:06,526 INFO - Checking step execute condition... 14:13:06,526 INFO - Step execute condition satisfied, executing... 14:13:06,706 INFO - Executing pre-execute action... 14:13:06,706 INFO - Running step...
293 lines
9.4 KiB
Lua
293 lines
9.4 KiB
Lua
local helpers = require("test.unit.helpers")(after_each)
|
|
local itp = helpers.gen_itp(it)
|
|
|
|
local ffi = helpers.ffi
|
|
local eq = helpers.eq
|
|
|
|
local mbyte = helpers.cimport("./src/nvim/mbyte.h")
|
|
local charset = helpers.cimport('./src/nvim/charset.h')
|
|
|
|
describe('mbyte', function()
|
|
if helpers.isCI('quickbuild') then
|
|
pending("crashes on quickbuild", function() end)
|
|
end
|
|
|
|
-- Array for composing characters
|
|
local intp = ffi.typeof('int[?]')
|
|
local function to_intp()
|
|
-- how to get MAX_MCO from globals.h?
|
|
return intp(7, 1)
|
|
end
|
|
|
|
-- Convert from bytes to string
|
|
local function to_string(bytes)
|
|
local s = {}
|
|
for i = 1, #bytes do
|
|
s[i] = string.char(bytes[i])
|
|
end
|
|
return table.concat(s)
|
|
end
|
|
|
|
before_each(function()
|
|
end)
|
|
|
|
itp('utf_ptr2char', function()
|
|
-- For strings with length 1 the first byte is returned.
|
|
for c = 0, 255 do
|
|
eq(c, mbyte.utf_ptr2char(to_string({c, 0})))
|
|
end
|
|
|
|
-- Some ill formed byte sequences that should not be recognized as UTF-8
|
|
-- First byte: 0xc0 or 0xc1
|
|
-- Second byte: 0x80 .. 0xbf
|
|
--eq(0x00c0, mbyte.utf_ptr2char(to_string({0xc0, 0x80})))
|
|
--eq(0x00c1, mbyte.utf_ptr2char(to_string({0xc1, 0xbf})))
|
|
--
|
|
-- Sequences with more than four bytes
|
|
end)
|
|
|
|
for n = 0, 0xF do
|
|
itp(('utf_char2bytes for chars 0x%x - 0x%x'):format(n * 0x1000, n * 0x1000 + 0xFFF), function()
|
|
local char_p = ffi.typeof('char[?]')
|
|
for c = n * 0x1000, n * 0x1000 + 0xFFF do
|
|
local p = char_p(4, 0)
|
|
mbyte.utf_char2bytes(c, p)
|
|
eq(c, mbyte.utf_ptr2char(p))
|
|
eq(charset.vim_iswordc(c), charset.vim_iswordp(p))
|
|
end
|
|
end)
|
|
end
|
|
|
|
describe('utfc_ptr2char_len', function()
|
|
|
|
itp('1-byte sequences', function()
|
|
local pcc = to_intp()
|
|
for c = 0, 255 do
|
|
eq(c, mbyte.utfc_ptr2char_len(to_string({c}), pcc, 1))
|
|
eq(0, pcc[0])
|
|
end
|
|
end)
|
|
|
|
itp('2-byte sequences', function()
|
|
local pcc = to_intp()
|
|
-- No combining characters
|
|
eq(0x007f, mbyte.utfc_ptr2char_len(to_string({0x7f, 0x7f}), pcc, 2))
|
|
eq(0, pcc[0])
|
|
-- No combining characters
|
|
pcc = to_intp()
|
|
eq(0x007f, mbyte.utfc_ptr2char_len(to_string({0x7f, 0x80}), pcc, 2))
|
|
eq(0, pcc[0])
|
|
|
|
-- No UTF-8 sequence
|
|
pcc = to_intp()
|
|
eq(0x00c2, mbyte.utfc_ptr2char_len(to_string({0xc2, 0x7f}), pcc, 2))
|
|
eq(0, pcc[0])
|
|
-- One UTF-8 character
|
|
pcc = to_intp()
|
|
eq(0x0080, mbyte.utfc_ptr2char_len(to_string({0xc2, 0x80}), pcc, 2))
|
|
eq(0, pcc[0])
|
|
-- No UTF-8 sequence
|
|
pcc = to_intp()
|
|
eq(0x00c2, mbyte.utfc_ptr2char_len(to_string({0xc2, 0xc0}), pcc, 2))
|
|
eq(0, pcc[0])
|
|
end)
|
|
|
|
itp('3-byte sequences', function()
|
|
local pcc = to_intp()
|
|
|
|
-- No second UTF-8 character
|
|
eq(0x007f, mbyte.utfc_ptr2char_len(to_string({0x7f, 0x80, 0x80}), pcc, 3))
|
|
eq(0, pcc[0])
|
|
-- No combining character
|
|
pcc = to_intp()
|
|
eq(0x007f, mbyte.utfc_ptr2char_len(to_string({0x7f, 0xc2, 0x80}), pcc, 3))
|
|
eq(0, pcc[0])
|
|
|
|
-- Combining character is U+0300
|
|
pcc = to_intp()
|
|
eq(0x007f, mbyte.utfc_ptr2char_len(to_string({0x7f, 0xcc, 0x80}), pcc, 3))
|
|
eq(0x0300, pcc[0])
|
|
eq(0x0000, pcc[1])
|
|
|
|
-- No UTF-8 sequence
|
|
pcc = to_intp()
|
|
eq(0x00c2, mbyte.utfc_ptr2char_len(to_string({0xc2, 0x7f, 0xcc}), pcc, 3))
|
|
eq(0, pcc[0])
|
|
-- Incomplete combining character
|
|
pcc = to_intp()
|
|
eq(0x0080, mbyte.utfc_ptr2char_len(to_string({0xc2, 0x80, 0xcc}), pcc, 3))
|
|
eq(0, pcc[0])
|
|
|
|
-- One UTF-8 character
|
|
pcc = to_intp()
|
|
eq(0x20d0, mbyte.utfc_ptr2char_len(to_string({0xe2, 0x83, 0x90}), pcc, 3))
|
|
eq(0, pcc[0])
|
|
end)
|
|
|
|
itp('4-byte sequences', function()
|
|
local pcc = to_intp()
|
|
|
|
-- No following combining character
|
|
eq(0x007f, mbyte.utfc_ptr2char_len(to_string({0x7f, 0x7f, 0xcc, 0x80}), pcc, 4))
|
|
eq(0, pcc[0])
|
|
-- No second UTF-8 character
|
|
pcc = to_intp()
|
|
eq(0x007f, mbyte.utfc_ptr2char_len(to_string({0x7f, 0xc2, 0xcc, 0x80}), pcc, 4))
|
|
eq(0, pcc[0])
|
|
|
|
-- Combining character U+0300
|
|
pcc = to_intp()
|
|
eq(0x007f, mbyte.utfc_ptr2char_len(to_string({0x7f, 0xcc, 0x80, 0xcc}), pcc, 4))
|
|
eq(0x0300, pcc[0])
|
|
eq(0x0000, pcc[1])
|
|
|
|
-- No UTF-8 sequence
|
|
pcc = to_intp()
|
|
eq(0x00c2, mbyte.utfc_ptr2char_len(to_string({0xc2, 0x7f, 0xcc, 0x80}), pcc, 4))
|
|
eq(0, pcc[0])
|
|
-- No following UTF-8 character
|
|
pcc = to_intp()
|
|
eq(0x0080, mbyte.utfc_ptr2char_len(to_string({0xc2, 0x80, 0xcc, 0xcc}), pcc, 4))
|
|
eq(0, pcc[0])
|
|
-- Combining character U+0301
|
|
pcc = to_intp()
|
|
eq(0x0080, mbyte.utfc_ptr2char_len(to_string({0xc2, 0x80, 0xcc, 0x81}), pcc, 4))
|
|
eq(0x0301, pcc[0])
|
|
eq(0x0000, pcc[1])
|
|
|
|
-- One UTF-8 character
|
|
pcc = to_intp()
|
|
eq(0x100000, mbyte.utfc_ptr2char_len(to_string({0xf4, 0x80, 0x80, 0x80}), pcc, 4))
|
|
eq(0, pcc[0])
|
|
end)
|
|
|
|
itp('5+-byte sequences', function()
|
|
local pcc = to_intp()
|
|
|
|
-- No following combining character
|
|
eq(0x007f, mbyte.utfc_ptr2char_len(to_string({0x7f, 0x7f, 0xcc, 0x80, 0x80}), pcc, 5))
|
|
eq(0, pcc[0])
|
|
-- No second UTF-8 character
|
|
pcc = to_intp()
|
|
eq(0x007f, mbyte.utfc_ptr2char_len(to_string({0x7f, 0xc2, 0xcc, 0x80, 0x80}), pcc, 5))
|
|
eq(0, pcc[0])
|
|
|
|
-- Combining character U+0300
|
|
pcc = to_intp()
|
|
eq(0x007f, mbyte.utfc_ptr2char_len(to_string({0x7f, 0xcc, 0x80, 0xcc}), pcc, 5))
|
|
eq(0x0300, pcc[0])
|
|
eq(0x0000, pcc[1])
|
|
|
|
-- Combining characters U+0300 and U+0301
|
|
pcc = to_intp()
|
|
eq(0x007f, mbyte.utfc_ptr2char_len(to_string({0x7f, 0xcc, 0x80, 0xcc, 0x81}), pcc, 5))
|
|
eq(0x0300, pcc[0])
|
|
eq(0x0301, pcc[1])
|
|
eq(0x0000, pcc[2])
|
|
-- Combining characters U+0300, U+0301, U+0302
|
|
pcc = to_intp()
|
|
eq(0x007f, mbyte.utfc_ptr2char_len(to_string({0x7f, 0xcc, 0x80, 0xcc, 0x81, 0xcc, 0x82}), pcc, 7))
|
|
eq(0x0300, pcc[0])
|
|
eq(0x0301, pcc[1])
|
|
eq(0x0302, pcc[2])
|
|
eq(0x0000, pcc[3])
|
|
-- Combining characters U+0300, U+0301, U+0302, U+0303
|
|
pcc = to_intp()
|
|
eq(0x007f, mbyte.utfc_ptr2char_len(to_string({0x7f, 0xcc, 0x80, 0xcc, 0x81, 0xcc, 0x82, 0xcc, 0x83}), pcc, 9))
|
|
eq(0x0300, pcc[0])
|
|
eq(0x0301, pcc[1])
|
|
eq(0x0302, pcc[2])
|
|
eq(0x0303, pcc[3])
|
|
eq(0x0000, pcc[4])
|
|
-- Combining characters U+0300, U+0301, U+0302, U+0303, U+0304
|
|
pcc = to_intp()
|
|
eq(0x007f, mbyte.utfc_ptr2char_len(to_string(
|
|
{0x7f, 0xcc, 0x80, 0xcc, 0x81, 0xcc, 0x82, 0xcc, 0x83, 0xcc, 0x84}), pcc, 11))
|
|
eq(0x0300, pcc[0])
|
|
eq(0x0301, pcc[1])
|
|
eq(0x0302, pcc[2])
|
|
eq(0x0303, pcc[3])
|
|
eq(0x0304, pcc[4])
|
|
eq(0x0000, pcc[5])
|
|
-- Combining characters U+0300, U+0301, U+0302, U+0303, U+0304,
|
|
-- U+0305
|
|
pcc = to_intp()
|
|
eq(0x007f, mbyte.utfc_ptr2char_len(to_string(
|
|
{0x7f, 0xcc, 0x80, 0xcc, 0x81, 0xcc, 0x82, 0xcc, 0x83, 0xcc, 0x84, 0xcc, 0x85}), pcc, 13))
|
|
eq(0x0300, pcc[0])
|
|
eq(0x0301, pcc[1])
|
|
eq(0x0302, pcc[2])
|
|
eq(0x0303, pcc[3])
|
|
eq(0x0304, pcc[4])
|
|
eq(0x0305, pcc[5])
|
|
eq(1, pcc[6])
|
|
|
|
-- Combining characters U+0300, U+0301, U+0302, U+0303, U+0304,
|
|
-- U+0305, U+0306, but only save six (= MAX_MCO).
|
|
pcc = to_intp()
|
|
eq(0x007f, mbyte.utfc_ptr2char_len(to_string(
|
|
{0x7f, 0xcc, 0x80, 0xcc, 0x81, 0xcc, 0x82, 0xcc, 0x83, 0xcc, 0x84, 0xcc, 0x85, 0xcc, 0x86}), pcc, 15))
|
|
eq(0x0300, pcc[0])
|
|
eq(0x0301, pcc[1])
|
|
eq(0x0302, pcc[2])
|
|
eq(0x0303, pcc[3])
|
|
eq(0x0304, pcc[4])
|
|
eq(0x0305, pcc[5])
|
|
eq(0x0001, pcc[6])
|
|
|
|
-- Only three following combining characters U+0300, U+0301, U+0302
|
|
pcc = to_intp()
|
|
eq(0x007f, mbyte.utfc_ptr2char_len(to_string(
|
|
{0x7f, 0xcc, 0x80, 0xcc, 0x81, 0xcc, 0x82, 0xc2, 0x80, 0xcc, 0x84, 0xcc, 0x85}), pcc, 13))
|
|
eq(0x0300, pcc[0])
|
|
eq(0x0301, pcc[1])
|
|
eq(0x0302, pcc[2])
|
|
eq(0x0000, pcc[3])
|
|
|
|
|
|
-- No UTF-8 sequence
|
|
pcc = to_intp()
|
|
eq(0x00c2, mbyte.utfc_ptr2char_len(to_string({0xc2, 0x7f, 0xcc, 0x80, 0x80}), pcc, 5))
|
|
eq(0, pcc[0])
|
|
-- No following UTF-8 character
|
|
pcc = to_intp()
|
|
eq(0x0080, mbyte.utfc_ptr2char_len(to_string({0xc2, 0x80, 0xcc, 0xcc, 0x80}), pcc, 5))
|
|
eq(0, pcc[0])
|
|
-- Combining character U+0301
|
|
pcc = to_intp()
|
|
eq(0x0080, mbyte.utfc_ptr2char_len(to_string({0xc2, 0x80, 0xcc, 0x81, 0x7f}), pcc, 5))
|
|
eq(0x0301, pcc[0])
|
|
eq(0x0000, pcc[1])
|
|
-- Combining character U+0301
|
|
pcc = to_intp()
|
|
eq(0x0080, mbyte.utfc_ptr2char_len(to_string({0xc2, 0x80, 0xcc, 0x81, 0xcc}), pcc, 5))
|
|
eq(0x0301, pcc[0])
|
|
eq(0x0000, pcc[1])
|
|
|
|
-- One UTF-8 character
|
|
pcc = to_intp()
|
|
eq(0x100000, mbyte.utfc_ptr2char_len(to_string({0xf4, 0x80, 0x80, 0x80, 0x7f}), pcc, 5))
|
|
eq(0, pcc[0])
|
|
|
|
-- One UTF-8 character
|
|
pcc = to_intp()
|
|
eq(0x100000, mbyte.utfc_ptr2char_len(to_string({0xf4, 0x80, 0x80, 0x80, 0x80}), pcc, 5))
|
|
eq(0, pcc[0])
|
|
-- One UTF-8 character
|
|
pcc = to_intp()
|
|
eq(0x100000, mbyte.utfc_ptr2char_len(to_string({0xf4, 0x80, 0x80, 0x80, 0xcc}), pcc, 5))
|
|
eq(0, pcc[0])
|
|
|
|
-- Combining characters U+1AB0 and U+0301
|
|
pcc = to_intp()
|
|
eq(0x100000, mbyte.utfc_ptr2char_len(to_string(
|
|
{0xf4, 0x80, 0x80, 0x80, 0xe1, 0xaa, 0xb0, 0xcc, 0x81}), pcc, 9))
|
|
eq(0x1ab0, pcc[0])
|
|
eq(0x0301, pcc[1])
|
|
eq(0x0000, pcc[2])
|
|
end)
|
|
|
|
end)
|
|
|
|
end)
|