From 7ed4837298414488e238763953d40a4c7f2d7976 Mon Sep 17 00:00:00 2001 From: Jan Edmund Lazo Date: Thu, 20 Feb 2020 22:53:38 -0500 Subject: [PATCH] vim-patch:8.1.1122: char2nr() does not handle composing characters Problem: char2nr() does not handle composing characters. Solution: Add str2list() and list2str(). (Ozaki Kiichi, closes vim/vim#4190) https://github.com/vim/vim/commit/9d40128afd7fcd038ff6532722b55b1a8c189ce8 'utf8' optional param is noop unlike Vim. --- runtime/doc/eval.txt | 29 +++++++++++++++++++++++ runtime/doc/usr_41.txt | 6 +++-- src/nvim/eval.lua | 2 ++ src/nvim/eval/funcs.c | 41 ++++++++++++++++++++++++++++++++ src/nvim/testdir/test_utf8.vim | 43 ++++++++++++++++++++++++++++++++++ 5 files changed, 119 insertions(+), 2 deletions(-) diff --git a/runtime/doc/eval.txt b/runtime/doc/eval.txt index b8739ff9b3..113a92d5e4 100644 --- a/runtime/doc/eval.txt +++ b/runtime/doc/eval.txt @@ -2240,6 +2240,7 @@ libcallnr({lib}, {func}, {arg}) Number idem, but return a Number line({expr}) Number line nr of cursor, last line or mark line2byte({lnum}) Number byte count of line {lnum} lispindent({lnum}) Number Lisp indent for line {lnum} +list2str({list} [, {utf8}]) String turn numbers in {list} into a String localtime() Number current time log({expr}) Float natural logarithm (base e) of {expr} log10({expr}) Float logarithm of Float {expr} to base 10 @@ -2394,6 +2395,8 @@ sqrt({expr}) Float square root of {expr} stdioopen({dict}) Number open stdio in a headless instance. stdpath({what}) String/List returns the standard path(s) for {what} str2float({expr}) Float convert String to Float +str2list({expr} [, {utf8}]) List convert each character of {expr} to + ASCII/UTF8 value str2nr({expr} [, {base}]) Number convert String to Number strchars({expr} [, {skipcc}]) Number character length of the String {expr} strcharpart({str}, {start} [, {len}]) @@ -5687,6 +5690,20 @@ lispindent({lnum}) *lispindent()* When {lnum} is invalid or Vim was not compiled the |+lispindent| feature, -1 is returned. +list2str({list} [, {utf8}]) *list2str()* + Convert each number in {list} to a character string can + concatenate them all. Examples: > + list2str([32]) returns " " + list2str([65, 66, 67]) returns "ABC" +< The same can be done (slowly) with: > + join(map(list, {nr, val -> nr2char(val)}), '') +< |str2list()| does the opposite. + + When {utf8} is omitted or zero, the current 'encoding' is used. + With {utf8} is 1, always return utf-8 characters. + With utf-8 composing characters work as expected: > + list2str([97, 769]) returns "á" +< localtime() *localtime()* Return the current time, measured as seconds since 1st Jan 1970. See also |strftime()| and |getftime()|. @@ -8197,6 +8214,18 @@ str2float({expr}) *str2float()* |substitute()|: > let f = str2float(substitute(text, ',', '', 'g')) +str2list({expr} [, {utf8}]) *str2list()* + Return a list containing the number values which represent + each character in String {expr}. Examples: > + str2list(" ") returns [32] + str2list("ABC") returns [65, 66, 67] +< |list2str()| does the opposite. + + When {utf8} is omitted or zero, the current 'encoding' is used. + With {utf8} set to 1, always treat the String as utf-8 + characters. With utf-8 composing characters are handled + properly: > + str2list("á") returns [97, 769] str2nr({expr} [, {base}]) *str2nr()* Convert string {expr} to a number. diff --git a/runtime/doc/usr_41.txt b/runtime/doc/usr_41.txt index c770950a96..5f9253cbd0 100644 --- a/runtime/doc/usr_41.txt +++ b/runtime/doc/usr_41.txt @@ -578,8 +578,10 @@ used for. You can find an alphabetical list here: |functions|. Use CTRL-] on the function name to jump to detailed help on it. String manipulation: *string-functions* - nr2char() get a character by its ASCII value - char2nr() get ASCII value of a character + nr2char() get a character by its number value + list2str() get a character string from a list of numbers + char2nr() get number value of a character + str2list() get list of numbers from a string str2nr() convert a string to a Number str2float() convert a string to a Float printf() format a string according to % items diff --git a/src/nvim/eval.lua b/src/nvim/eval.lua index 17d9cc56aa..4654f8cef6 100644 --- a/src/nvim/eval.lua +++ b/src/nvim/eval.lua @@ -214,6 +214,7 @@ return { line={args=1}, line2byte={args=1}, lispindent={args=1}, + list2str={args={1, 2}}, localtime={}, log={args=1, func="float_op_wrapper", data="&log"}, log10={args=1, func="float_op_wrapper", data="&log10"}, @@ -321,6 +322,7 @@ return { sqrt={args=1, func="float_op_wrapper", data="&sqrt"}, stdpath={args=1}, str2float={args=1}, + str2list={args={1, 2}}, str2nr={args={1, 2}}, strcharpart={args={2, 3}}, strchars={args={1,2}}, diff --git a/src/nvim/eval/funcs.c b/src/nvim/eval/funcs.c index 36782e4223..bd57cec794 100644 --- a/src/nvim/eval/funcs.c +++ b/src/nvim/eval/funcs.c @@ -5246,6 +5246,36 @@ static void f_lispindent(typval_T *argvars, typval_T *rettv, FunPtr fptr) } } +// "list2str()" function +static void f_list2str(typval_T *argvars, typval_T *rettv, FunPtr fptr) +{ + garray_T ga; + + rettv->v_type = VAR_STRING; + rettv->vval.v_string = NULL; + if (argvars[0].v_type != VAR_LIST) { + EMSG(_(e_invarg)); + return; + } + + list_T *const l = argvars[0].vval.v_list; + if (l == NULL) { + return; // empty list results in empty string + } + + ga_init(&ga, 1, 80); + char_u buf[MB_MAXBYTES + 1]; + + TV_LIST_ITER_CONST(l, li, { + buf[utf_char2bytes(tv_get_number(TV_LIST_ITEM_TV(li)), buf)] = NUL; + ga_concat(&ga, buf); + }); + ga_append(&ga, NUL); + + rettv->v_type = VAR_STRING; + rettv->vval.v_string = ga.ga_data; +} + /* * "localtime()" function */ @@ -9353,6 +9383,17 @@ static void f_str2float(typval_T *argvars, typval_T *rettv, FunPtr fptr) rettv->v_type = VAR_FLOAT; } +// "str2list()" function +static void f_str2list(typval_T *argvars, typval_T *rettv, FunPtr fptr) +{ + tv_list_alloc_ret(rettv, kListLenUnknown); + const char_u *p = (const char_u *)tv_get_string(&argvars[0]); + + for (; *p != NUL; p += utf_ptr2len(p)) { + tv_list_append_number(rettv->vval.v_list, utf_ptr2char(p)); + } +} + // "str2nr()" function static void f_str2nr(typval_T *argvars, typval_T *rettv, FunPtr fptr) { diff --git a/src/nvim/testdir/test_utf8.vim b/src/nvim/testdir/test_utf8.vim index b1f33f56dd..1b4ce4c4af 100644 --- a/src/nvim/testdir/test_utf8.vim +++ b/src/nvim/testdir/test_utf8.vim @@ -60,3 +60,46 @@ func Test_getvcol() call assert_equal(2, virtcol("'[")) call assert_equal(2, virtcol("']")) endfunc + +func Test_list2str_str2list_utf8() + " One Unicode codepoint + let s = "\u3042\u3044" + let l = [0x3042, 0x3044] + call assert_equal(l, str2list(s, 1)) + call assert_equal(s, list2str(l, 1)) + if &enc ==# 'utf-8' + call assert_equal(str2list(s), str2list(s, 1)) + call assert_equal(list2str(l), list2str(l, 1)) + endif + + " With composing characters + let s = "\u304b\u3099\u3044" + let l = [0x304b, 0x3099, 0x3044] + call assert_equal(l, str2list(s, 1)) + call assert_equal(s, list2str(l, 1)) + if &enc ==# 'utf-8' + call assert_equal(str2list(s), str2list(s, 1)) + call assert_equal(list2str(l), list2str(l, 1)) + endif + + " Null list is the same as an empty list + call assert_equal('', list2str([])) + " call assert_equal('', list2str(test_null_list())) +endfunc + +func Test_list2str_str2list_latin1() + " When 'encoding' is not multi-byte can still get utf-8 string. + " But we need to create the utf-8 string while 'encoding' is utf-8. + let s = "\u3042\u3044" + let l = [0x3042, 0x3044] + + let save_encoding = &encoding + " set encoding=latin1 + + let lres = str2list(s, 1) + let sres = list2str(l, 1) + + let &encoding = save_encoding + call assert_equal(l, lres) + call assert_equal(s, sres) +endfunc