diff --git a/runtime/doc/builtin.txt b/runtime/doc/builtin.txt index d0b28ce875..719953bc22 100644 --- a/runtime/doc/builtin.txt +++ b/runtime/doc/builtin.txt @@ -77,6 +77,7 @@ changenr() Number current change number chanclose({id} [, {stream}]) Number Closes a channel or one of its streams chansend({id}, {data}) Number Writes {data} to channel char2nr({expr} [, {utf8}]) Number ASCII/UTF-8 value of first char in {expr} +charclass({string}) Number character class of {string} charcol({expr}) Number column number of cursor or mark charidx({string}, {idx} [, {countcc}]) Number char index of byte {idx} in {string} @@ -1064,7 +1065,19 @@ char2nr({string} [, {utf8}]) *char2nr()* Can also be used as a |method|: > GetChar()->char2nr() -< + +charclass({string}) *charclass()* + Return the character class of the first character in {string}. + The character class is one of: + 0 blank + 1 punctuation + 2 word character + 3 emoji + other specific Unicode class + The class is used in patterns and word motions. + Returns 0 if {string} is not a |String|. + + *charcol()* charcol({expr}) Same as |col()| but returns the character index of the column position given with {expr} instead of the byte position. diff --git a/runtime/doc/usr_41.txt b/runtime/doc/usr_41.txt index 235925c033..0c907bfb68 100644 --- a/runtime/doc/usr_41.txt +++ b/runtime/doc/usr_41.txt @@ -606,6 +606,7 @@ String manipulation: *string-functions* strtrans() translate a string to make it printable tolower() turn a string to lowercase toupper() turn a string to uppercase + charclass() class of a character match() position where a pattern matches in a string matchend() position where a pattern match ends in a string matchfuzzy() fuzzy matches a string in a list of strings diff --git a/runtime/tools/emoji_list.vim b/runtime/tools/emoji_list.vim new file mode 100644 index 0000000000..c335b8c88f --- /dev/null +++ b/runtime/tools/emoji_list.vim @@ -0,0 +1,21 @@ +" Script to fill the window with emoji characters, one per line. +" Source this script: :source % + +if &modified + new +else + enew +endif + +lua << EOF + local lnum = 1 + for c = 0x100, 0x1ffff do + local cs = vim.fn.nr2char(c) + if vim.fn.charclass(cs) == 3 then + vim.fn.setline(lnum, '|' .. cs .. '| ' .. vim.fn.strwidth(cs)) + lnum = lnum + 1 + end + end +EOF + +set nomodified diff --git a/src/nvim/eval.lua b/src/nvim/eval.lua index a2272f0c98..e4e9b34ec6 100644 --- a/src/nvim/eval.lua +++ b/src/nvim/eval.lua @@ -72,6 +72,7 @@ return { chanclose={args={1, 2}}, chansend={args=2}, char2nr={args={1, 2}, base=1}, + charclass={args=1, base=1}, charcol={args=1, base=1}, charidx={args={2, 3}, base=1}, chdir={args=1, base=1}, diff --git a/src/nvim/mbyte.c b/src/nvim/mbyte.c index e4d2d35c1b..378a08131d 100644 --- a/src/nvim/mbyte.c +++ b/src/nvim/mbyte.c @@ -1182,6 +1182,11 @@ int utf_class_tab(const int c, const uint64_t *const chartab) return 1; // punctuation } + // emoji + if (intable(emoji_all, ARRAY_SIZE(emoji_all), c)) { + return 3; + } + // binary search in table while (top >= bot) { mid = (bot + top) / 2; @@ -1194,11 +1199,6 @@ int utf_class_tab(const int c, const uint64_t *const chartab) } } - // emoji - if (intable(emoji_all, ARRAY_SIZE(emoji_all), c)) { - return 3; - } - // most other characters are "word" characters return 2; } @@ -2858,3 +2858,14 @@ void f_setcellwidths(typval_T *argvars, typval_T *rettv, FunPtr fptr) xfree(cw_table_save); redraw_all_later(NOT_VALID); } + +void f_charclass(typval_T *argvars, typval_T *rettv, FunPtr fptr) +{ + if (argvars[0].v_type != VAR_STRING + || argvars[0].vval.v_string == NULL + || *argvars[0].vval.v_string == NUL) { + emsg(_(e_stringreq)); + return; + } + rettv->vval.v_number = mb_get_class((const char_u *)argvars[0].vval.v_string); +} diff --git a/src/nvim/testdir/test_functions.vim b/src/nvim/testdir/test_functions.vim index c11e7b4fea..e0e0c1ca38 100644 --- a/src/nvim/testdir/test_functions.vim +++ b/src/nvim/testdir/test_functions.vim @@ -1769,6 +1769,13 @@ func Test_char2nr() call assert_equal(12354, char2nr('あ', 1)) endfunc +func Test_charclass() + call assert_equal(0, charclass(' ')) + call assert_equal(1, charclass('.')) + call assert_equal(2, charclass('x')) + call assert_equal(3, charclass("\u203c")) +endfunc + func Test_eventhandler() call assert_equal(0, eventhandler()) endfunc