mirror of
https://github.com/neovim/neovim.git
synced 2024-12-19 18:55:14 -07:00
12b062b2c8
Used sed -r -i -e '/ helpers =/ s/$/\nlocal itp = helpers.gen_itp(it)/; s/^(\s*)it\(/\1itp(/' test/unit/**/*_spec.lua to alter all tests. Locally they all run fine now. Reasoning: 1. General: state from one test should not affect other tests. 2. Local: travis build is failing with something which may be an output of garbage collector. This should prevent state of the garbage collector from interferring as well.
278 lines
8.8 KiB
Lua
278 lines
8.8 KiB
Lua
local helpers = require("test.unit.helpers")
|
|
local itp = helpers.gen_itp(it)
|
|
|
|
local ffi = helpers.ffi
|
|
local eq = helpers.eq
|
|
|
|
local mbyte = helpers.cimport("./src/nvim/mbyte.h")
|
|
|
|
describe('mbyte', function()
|
|
|
|
-- Array for composing characters
|
|
local intp = ffi.typeof('int[?]')
|
|
local function to_intp()
|
|
-- how to get MAX_MCO from globals.h?
|
|
return intp(7, 1)
|
|
end
|
|
|
|
-- Convert from bytes to string
|
|
local function to_string(bytes)
|
|
local s = {}
|
|
for i = 1, #bytes do
|
|
s[i] = string.char(bytes[i])
|
|
end
|
|
return table.concat(s)
|
|
end
|
|
|
|
before_each(function()
|
|
end)
|
|
|
|
itp('utf_ptr2char', function()
|
|
-- For strings with length 1 the first byte is returned.
|
|
for c = 0, 255 do
|
|
eq(c, mbyte.utf_ptr2char(to_string({c, 0})))
|
|
end
|
|
|
|
-- Some ill formed byte sequences that should not be recognized as UTF-8
|
|
-- First byte: 0xc0 or 0xc1
|
|
-- Second byte: 0x80 .. 0xbf
|
|
--eq(0x00c0, mbyte.utf_ptr2char(to_string({0xc0, 0x80})))
|
|
--eq(0x00c1, mbyte.utf_ptr2char(to_string({0xc1, 0xbf})))
|
|
--
|
|
-- Sequences with more than four bytes
|
|
end)
|
|
|
|
|
|
describe('utfc_ptr2char_len', function()
|
|
|
|
itp('1-byte sequences', function()
|
|
local pcc = to_intp()
|
|
for c = 0, 255 do
|
|
eq(c, mbyte.utfc_ptr2char_len(to_string({c}), pcc, 1))
|
|
eq(0, pcc[0])
|
|
end
|
|
end)
|
|
|
|
itp('2-byte sequences', function()
|
|
local pcc = to_intp()
|
|
-- No combining characters
|
|
eq(0x007f, mbyte.utfc_ptr2char_len(to_string({0x7f, 0x7f}), pcc, 2))
|
|
eq(0, pcc[0])
|
|
-- No combining characters
|
|
pcc = to_intp()
|
|
eq(0x007f, mbyte.utfc_ptr2char_len(to_string({0x7f, 0x80}), pcc, 2))
|
|
eq(0, pcc[0])
|
|
|
|
-- No UTF-8 sequence
|
|
pcc = to_intp()
|
|
eq(0x00c2, mbyte.utfc_ptr2char_len(to_string({0xc2, 0x7f}), pcc, 2))
|
|
eq(0, pcc[0])
|
|
-- One UTF-8 character
|
|
pcc = to_intp()
|
|
eq(0x0080, mbyte.utfc_ptr2char_len(to_string({0xc2, 0x80}), pcc, 2))
|
|
eq(0, pcc[0])
|
|
-- No UTF-8 sequence
|
|
pcc = to_intp()
|
|
eq(0x00c2, mbyte.utfc_ptr2char_len(to_string({0xc2, 0xc0}), pcc, 2))
|
|
eq(0, pcc[0])
|
|
end)
|
|
|
|
itp('3-byte sequences', function()
|
|
local pcc = to_intp()
|
|
|
|
-- No second UTF-8 character
|
|
eq(0x007f, mbyte.utfc_ptr2char_len(to_string({0x7f, 0x80, 0x80}), pcc, 3))
|
|
eq(0, pcc[0])
|
|
-- No combining character
|
|
pcc = to_intp()
|
|
eq(0x007f, mbyte.utfc_ptr2char_len(to_string({0x7f, 0xc2, 0x80}), pcc, 3))
|
|
eq(0, pcc[0])
|
|
|
|
-- Combining character is U+0300
|
|
pcc = to_intp()
|
|
eq(0x007f, mbyte.utfc_ptr2char_len(to_string({0x7f, 0xcc, 0x80}), pcc, 3))
|
|
eq(0x0300, pcc[0])
|
|
eq(0x0000, pcc[1])
|
|
|
|
-- No UTF-8 sequence
|
|
pcc = to_intp()
|
|
eq(0x00c2, mbyte.utfc_ptr2char_len(to_string({0xc2, 0x7f, 0xcc}), pcc, 3))
|
|
eq(0, pcc[0])
|
|
-- Incomplete combining character
|
|
pcc = to_intp()
|
|
eq(0x0080, mbyte.utfc_ptr2char_len(to_string({0xc2, 0x80, 0xcc}), pcc, 3))
|
|
eq(0, pcc[0])
|
|
|
|
-- One UTF-8 character
|
|
pcc = to_intp()
|
|
eq(0x20d0, mbyte.utfc_ptr2char_len(to_string({0xe2, 0x83, 0x90}), pcc, 3))
|
|
eq(0, pcc[0])
|
|
end)
|
|
|
|
itp('4-byte sequences', function()
|
|
local pcc = to_intp()
|
|
|
|
-- No following combining character
|
|
eq(0x007f, mbyte.utfc_ptr2char_len(to_string({0x7f, 0x7f, 0xcc, 0x80}), pcc, 4))
|
|
eq(0, pcc[0])
|
|
-- No second UTF-8 character
|
|
pcc = to_intp()
|
|
eq(0x007f, mbyte.utfc_ptr2char_len(to_string({0x7f, 0xc2, 0xcc, 0x80}), pcc, 4))
|
|
eq(0, pcc[0])
|
|
|
|
-- Combining character U+0300
|
|
pcc = to_intp()
|
|
eq(0x007f, mbyte.utfc_ptr2char_len(to_string({0x7f, 0xcc, 0x80, 0xcc}), pcc, 4))
|
|
eq(0x0300, pcc[0])
|
|
eq(0x0000, pcc[1])
|
|
|
|
-- No UTF-8 sequence
|
|
pcc = to_intp()
|
|
eq(0x00c2, mbyte.utfc_ptr2char_len(to_string({0xc2, 0x7f, 0xcc, 0x80}), pcc, 4))
|
|
eq(0, pcc[0])
|
|
-- No following UTF-8 character
|
|
pcc = to_intp()
|
|
eq(0x0080, mbyte.utfc_ptr2char_len(to_string({0xc2, 0x80, 0xcc, 0xcc}), pcc, 4))
|
|
eq(0, pcc[0])
|
|
-- Combining character U+0301
|
|
pcc = to_intp()
|
|
eq(0x0080, mbyte.utfc_ptr2char_len(to_string({0xc2, 0x80, 0xcc, 0x81}), pcc, 4))
|
|
eq(0x0301, pcc[0])
|
|
eq(0x0000, pcc[1])
|
|
|
|
-- One UTF-8 character
|
|
pcc = to_intp()
|
|
eq(0x100000, mbyte.utfc_ptr2char_len(to_string({0xf4, 0x80, 0x80, 0x80}), pcc, 4))
|
|
eq(0, pcc[0])
|
|
end)
|
|
|
|
itp('5+-byte sequences', function()
|
|
local pcc = to_intp()
|
|
|
|
-- No following combining character
|
|
eq(0x007f, mbyte.utfc_ptr2char_len(to_string({0x7f, 0x7f, 0xcc, 0x80, 0x80}), pcc, 5))
|
|
eq(0, pcc[0])
|
|
-- No second UTF-8 character
|
|
pcc = to_intp()
|
|
eq(0x007f, mbyte.utfc_ptr2char_len(to_string({0x7f, 0xc2, 0xcc, 0x80, 0x80}), pcc, 5))
|
|
eq(0, pcc[0])
|
|
|
|
-- Combining character U+0300
|
|
pcc = to_intp()
|
|
eq(0x007f, mbyte.utfc_ptr2char_len(to_string({0x7f, 0xcc, 0x80, 0xcc}), pcc, 5))
|
|
eq(0x0300, pcc[0])
|
|
eq(0x0000, pcc[1])
|
|
|
|
-- Combining characters U+0300 and U+0301
|
|
pcc = to_intp()
|
|
eq(0x007f, mbyte.utfc_ptr2char_len(to_string({0x7f, 0xcc, 0x80, 0xcc, 0x81}), pcc, 5))
|
|
eq(0x0300, pcc[0])
|
|
eq(0x0301, pcc[1])
|
|
eq(0x0000, pcc[2])
|
|
-- Combining characters U+0300, U+0301, U+0302
|
|
pcc = to_intp()
|
|
eq(0x007f, mbyte.utfc_ptr2char_len(to_string({0x7f, 0xcc, 0x80, 0xcc, 0x81, 0xcc, 0x82}), pcc, 7))
|
|
eq(0x0300, pcc[0])
|
|
eq(0x0301, pcc[1])
|
|
eq(0x0302, pcc[2])
|
|
eq(0x0000, pcc[3])
|
|
-- Combining characters U+0300, U+0301, U+0302, U+0303
|
|
pcc = to_intp()
|
|
eq(0x007f, mbyte.utfc_ptr2char_len(to_string({0x7f, 0xcc, 0x80, 0xcc, 0x81, 0xcc, 0x82, 0xcc, 0x83}), pcc, 9))
|
|
eq(0x0300, pcc[0])
|
|
eq(0x0301, pcc[1])
|
|
eq(0x0302, pcc[2])
|
|
eq(0x0303, pcc[3])
|
|
eq(0x0000, pcc[4])
|
|
-- Combining characters U+0300, U+0301, U+0302, U+0303, U+0304
|
|
pcc = to_intp()
|
|
eq(0x007f, mbyte.utfc_ptr2char_len(to_string(
|
|
{0x7f, 0xcc, 0x80, 0xcc, 0x81, 0xcc, 0x82, 0xcc, 0x83, 0xcc, 0x84}), pcc, 11))
|
|
eq(0x0300, pcc[0])
|
|
eq(0x0301, pcc[1])
|
|
eq(0x0302, pcc[2])
|
|
eq(0x0303, pcc[3])
|
|
eq(0x0304, pcc[4])
|
|
eq(0x0000, pcc[5])
|
|
-- Combining characters U+0300, U+0301, U+0302, U+0303, U+0304,
|
|
-- U+0305
|
|
pcc = to_intp()
|
|
eq(0x007f, mbyte.utfc_ptr2char_len(to_string(
|
|
{0x7f, 0xcc, 0x80, 0xcc, 0x81, 0xcc, 0x82, 0xcc, 0x83, 0xcc, 0x84, 0xcc, 0x85}), pcc, 13))
|
|
eq(0x0300, pcc[0])
|
|
eq(0x0301, pcc[1])
|
|
eq(0x0302, pcc[2])
|
|
eq(0x0303, pcc[3])
|
|
eq(0x0304, pcc[4])
|
|
eq(0x0305, pcc[5])
|
|
eq(1, pcc[6])
|
|
|
|
-- Combining characters U+0300, U+0301, U+0302, U+0303, U+0304,
|
|
-- U+0305, U+0306, but only save six (= MAX_MCO).
|
|
pcc = to_intp()
|
|
eq(0x007f, mbyte.utfc_ptr2char_len(to_string(
|
|
{0x7f, 0xcc, 0x80, 0xcc, 0x81, 0xcc, 0x82, 0xcc, 0x83, 0xcc, 0x84, 0xcc, 0x85, 0xcc, 0x86}), pcc, 15))
|
|
eq(0x0300, pcc[0])
|
|
eq(0x0301, pcc[1])
|
|
eq(0x0302, pcc[2])
|
|
eq(0x0303, pcc[3])
|
|
eq(0x0304, pcc[4])
|
|
eq(0x0305, pcc[5])
|
|
eq(0x0001, pcc[6])
|
|
|
|
-- Only three following combining characters U+0300, U+0301, U+0302
|
|
pcc = to_intp()
|
|
eq(0x007f, mbyte.utfc_ptr2char_len(to_string(
|
|
{0x7f, 0xcc, 0x80, 0xcc, 0x81, 0xcc, 0x82, 0xc2, 0x80, 0xcc, 0x84, 0xcc, 0x85}), pcc, 13))
|
|
eq(0x0300, pcc[0])
|
|
eq(0x0301, pcc[1])
|
|
eq(0x0302, pcc[2])
|
|
eq(0x0000, pcc[3])
|
|
|
|
|
|
-- No UTF-8 sequence
|
|
pcc = to_intp()
|
|
eq(0x00c2, mbyte.utfc_ptr2char_len(to_string({0xc2, 0x7f, 0xcc, 0x80, 0x80}), pcc, 5))
|
|
eq(0, pcc[0])
|
|
-- No following UTF-8 character
|
|
pcc = to_intp()
|
|
eq(0x0080, mbyte.utfc_ptr2char_len(to_string({0xc2, 0x80, 0xcc, 0xcc, 0x80}), pcc, 5))
|
|
eq(0, pcc[0])
|
|
-- Combining character U+0301
|
|
pcc = to_intp()
|
|
eq(0x0080, mbyte.utfc_ptr2char_len(to_string({0xc2, 0x80, 0xcc, 0x81, 0x7f}), pcc, 5))
|
|
eq(0x0301, pcc[0])
|
|
eq(0x0000, pcc[1])
|
|
-- Combining character U+0301
|
|
pcc = to_intp()
|
|
eq(0x0080, mbyte.utfc_ptr2char_len(to_string({0xc2, 0x80, 0xcc, 0x81, 0xcc}), pcc, 5))
|
|
eq(0x0301, pcc[0])
|
|
eq(0x0000, pcc[1])
|
|
|
|
-- One UTF-8 character
|
|
pcc = to_intp()
|
|
eq(0x100000, mbyte.utfc_ptr2char_len(to_string({0xf4, 0x80, 0x80, 0x80, 0x7f}), pcc, 5))
|
|
eq(0, pcc[0])
|
|
|
|
-- One UTF-8 character
|
|
pcc = to_intp()
|
|
eq(0x100000, mbyte.utfc_ptr2char_len(to_string({0xf4, 0x80, 0x80, 0x80, 0x80}), pcc, 5))
|
|
eq(0, pcc[0])
|
|
-- One UTF-8 character
|
|
pcc = to_intp()
|
|
eq(0x100000, mbyte.utfc_ptr2char_len(to_string({0xf4, 0x80, 0x80, 0x80, 0xcc}), pcc, 5))
|
|
eq(0, pcc[0])
|
|
|
|
-- Combining characters U+1AB0 and U+0301
|
|
pcc = to_intp()
|
|
eq(0x100000, mbyte.utfc_ptr2char_len(to_string(
|
|
{0xf4, 0x80, 0x80, 0x80, 0xe1, 0xaa, 0xb0, 0xcc, 0x81}), pcc, 9))
|
|
eq(0x1ab0, pcc[0])
|
|
eq(0x0301, pcc[1])
|
|
eq(0x0000, pcc[2])
|
|
end)
|
|
|
|
end)
|
|
|
|
end)
|