diff --git a/runtime/doc/builtin.txt b/runtime/doc/builtin.txt index c56ab70774..447f1c89e2 100644 --- a/runtime/doc/builtin.txt +++ b/runtime/doc/builtin.txt @@ -396,6 +396,7 @@ setbufline({expr}, {lnum}, {text}) Number set line {lnum} to {text} in buffer {expr} setbufvar({buf}, {varname}, {val}) set {varname} in buffer {buf} to {val} +setcellwidths({list}) none set character cell width overrides setcharpos({expr}, {list}) Number set the {expr} position to {list} setcharsearch({dict}) Dict set character search from {dict} setcmdpos({pos}) Number set cursor position in command-line @@ -6817,6 +6818,29 @@ setbufvar({buf}, {varname}, {val}) *setbufvar()* third argument: > GetValue()->setbufvar(buf, varname) + +setcellwidths({list}) *setcellwidths()* + Specify overrides for cell widths of character ranges. This + tells Vim how wide characters are, counted in screen cells. + This overrides 'ambiwidth'. Example: > + setcellwidths([[0xad, 0xad, 1], + \ [0x2194, 0x2199, 2]]) + +< *E1109* *E1110* *E1111* *E1112* *E1113* + The {list} argument is a list of lists with each three + numbers. These three numbers are [low, high, width]. "low" + and "high" can be the same, in which case this refers to one + character. Otherwise it is the range of characters from "low" + to "high" (inclusive). "width" is either 1 or 2, indicating + the character width in screen cells. + An error is given if the argument is invalid, also when a + range overlaps with another. + Only characters with value 0x100 and higher can be used. + + To clear the overrides pass an empty list: > + setcellwidths([]); + + setcharpos({expr}, {list}) *setcharpos()* Same as |setpos()| but uses the specified column number as the character index instead of the byte index in the line. diff --git a/runtime/doc/options.txt b/runtime/doc/options.txt index 9d03397821..28922e9c7f 100644 --- a/runtime/doc/options.txt +++ b/runtime/doc/options.txt @@ -591,7 +591,9 @@ A jump table for the options with a short description can be found at |Q_op|. "double": Use twice the width of ASCII characters. *E834* *E835* The value "double" cannot be used if 'listchars' or 'fillchars' - contains a character that would be double width. + + The values are overruled for characters specified with + |setcellwidths()|. There are a number of CJK fonts for which the width of glyphs for those characters are solely based on how many octets they take in diff --git a/runtime/doc/usr_41.txt b/runtime/doc/usr_41.txt index 008b9b4e58..bc2f7f077b 100644 --- a/runtime/doc/usr_41.txt +++ b/runtime/doc/usr_41.txt @@ -619,6 +619,7 @@ String manipulation: *string-functions* strchars() length of a string in characters strwidth() size of string when displayed strdisplaywidth() size of string when displayed, deals with tabs + setcellwidths() set character cell width overrides substitute() substitute a pattern match with a string submatch() get a specific match in ":s" and substitute() strpart() get part of a string using byte index diff --git a/src/nvim/eval.lua b/src/nvim/eval.lua index 6d8776d08b..a2272f0c98 100644 --- a/src/nvim/eval.lua +++ b/src/nvim/eval.lua @@ -327,6 +327,7 @@ return { serverstop={args=1}, setbufline={args=3, base=3}, setbufvar={args=3, base=3}, + setcellwidths={args=1, base=1}, setcharpos={args=2, base=2}, setcharsearch={args=1, base=1}, setcmdpos={args=1, base=1}, diff --git a/src/nvim/generators/gen_unicode_tables.lua b/src/nvim/generators/gen_unicode_tables.lua index aa96c97bc1..36553f4649 100644 --- a/src/nvim/generators/gen_unicode_tables.lua +++ b/src/nvim/generators/gen_unicode_tables.lua @@ -12,8 +12,8 @@ -- 2 then interval applies only to first, third, fifth, … character in range. -- Fourth value is number that should be added to the codepoint to yield -- folded/lower/upper codepoint. --- 4. emoji_width and emoji_all tables: sorted lists of non-overlapping closed --- intervals of Emoji characters. emoji_width contains all the characters +-- 4. emoji_wide and emoji_all tables: sorted lists of non-overlapping closed +-- intervals of Emoji characters. emoji_wide contains all the characters -- which don't have ambiguous or double width, and emoji_all has all Emojis. if arg[1] == '--help' then print('Usage:') @@ -288,7 +288,7 @@ local build_emoji_table = function(ut_fp, emojiprops, doublewidth, ambiwidth) end ut_fp:write('};\n') - ut_fp:write('static const struct interval emoji_width[] = {\n') + ut_fp:write('static const struct interval emoji_wide[] = {\n') for _, p in ipairs(emojiwidth) do ut_fp:write(make_range(p[1], p[2])) end diff --git a/src/nvim/mbyte.c b/src/nvim/mbyte.c index 223b4d6845..66262ebfad 100644 --- a/src/nvim/mbyte.c +++ b/src/nvim/mbyte.c @@ -74,6 +74,19 @@ struct interval { # include "unicode_tables.generated.h" #endif +static char e_list_item_nr_is_not_list[] + = N_("E1109: List item %d is not a List"); +static char e_list_item_nr_does_not_contain_3_numbers[] + = N_("E1110: List item %d does not contain 3 numbers"); +static char e_list_item_nr_range_invalid[] + = N_("E1111: List item %d range invalid"); +static char e_list_item_nr_cell_width_invalid[] + = N_("E1112: List item %d cell width invalid"); +static char e_overlapping_ranges_for_nr[] + = N_("E1113: Overlapping ranges for %lx"); +static char e_only_values_of_0x100_and_higher_supported[] + = N_("E1114: Only values of 0x100 and higher supported"); + // To speed up BYTELEN(); keep a lookup table to quickly get the length in // bytes of a UTF-8 character from the first byte of a UTF-8 string. Bytes // which are illegal when used as the first byte have a 1. The NUL byte has @@ -472,13 +485,18 @@ static bool intable(const struct interval *table, size_t n_items, int c) int utf_char2cells(int c) { if (c >= 0x100) { + int n = cw_value(c); + if (n != 0) { + return n; + } + if (!utf_printable(c)) { return 6; // unprintable, displays } if (intable(doublewidth, ARRAY_SIZE(doublewidth), c)) { return 2; } - if (p_emoji && intable(emoji_width, ARRAY_SIZE(emoji_width), c)) { + if (p_emoji && intable(emoji_wide, ARRAY_SIZE(emoji_wide), c)) { return 2; } } else if (c >= 0x80 && !vim_isprintc(c)) { @@ -2678,3 +2696,149 @@ char_u *string_convert_ext(const vimconv_T *const vcp, char_u *ptr, size_t *lenp return retval; } + +/// Table set by setcellwidths(). +typedef struct { + long first; + long last; + char width; +} cw_interval_T; + +static cw_interval_T *cw_table = NULL; +static size_t cw_table_size = 0; + +/// Return the value of the cellwidth table for the character `c`. +/// +/// @param c The source character. +/// @return 1 or 2 when `c` is in the cellwidth table, 0 if not. +static int cw_value(int c) +{ + if (cw_table == NULL) { + return 0; + } + + // first quick check for Latin1 etc. characters + if (c < cw_table[0].first) { + return 0; + } + + // binary search in table + int bot = 0; + int top = (int)cw_table_size - 1; + while (top >= bot) { + int mid = (bot + top) / 2; + if (cw_table[mid].last < c) { + bot = mid + 1; + } else if (cw_table[mid].first > c) { + top = mid - 1; + } else { + return cw_table[mid].width; + } + } + return 0; +} + +static int tv_nr_compare(const void *a1, const void *a2) +{ + const listitem_T *const li1 = (const listitem_T *)a1; + const listitem_T *const li2 = (const listitem_T *)a2; + + return (int)(TV_LIST_ITEM_TV(li1)->vval.v_number - TV_LIST_ITEM_TV(li2)->vval.v_number); +} + +/// "setcellwidths()" function +void f_setcellwidths(typval_T *argvars, typval_T *rettv, FunPtr fptr) +{ + if (argvars[0].v_type != VAR_LIST || argvars[0].vval.v_list == NULL) { + emsg(_(e_listreq)); + return; + } + const list_T *const l = argvars[0].vval.v_list; + if (tv_list_len(l) == 0) { + // Clearing the table. + xfree(cw_table); + cw_table = NULL; + cw_table_size = 0; + return; + } + + const listitem_T **ptrs = xmalloc(sizeof(const listitem_T *) * (size_t)tv_list_len(l)); + + // Check that all entries are a list with three numbers, the range is + // valid and the cell width is valid. + int item = 0; + TV_LIST_ITER_CONST(l, li, { + const typval_T *const li_tv = TV_LIST_ITEM_TV(li); + + if (li_tv->v_type != VAR_LIST || li_tv->vval.v_list == NULL) { + semsg(_(e_list_item_nr_is_not_list), item); + xfree(ptrs); + return; + } + + const list_T *const li_l = li_tv->vval.v_list; + const listitem_T *lili = tv_list_first(li_l); + int i = 0; + varnumber_T n1; + for (; lili != NULL; lili = TV_LIST_ITEM_NEXT(li_l, lili), i++) { + const typval_T *const lili_tv = TV_LIST_ITEM_TV(lili); + if (lili_tv->v_type != VAR_NUMBER) { + break; + } + if (i == 0) { + n1 = lili_tv->vval.v_number; + if (n1 < 0x100) { + emsg(_(e_only_values_of_0x100_and_higher_supported)); + xfree(ptrs); + return; + } + } else if (i == 1 && lili_tv->vval.v_number < n1) { + semsg(_(e_list_item_nr_range_invalid), item); + xfree(ptrs); + return; + } else if (i == 2 && (lili_tv->vval.v_number < 1 || lili_tv->vval.v_number > 2)) { + semsg(_(e_list_item_nr_cell_width_invalid), item); + xfree(ptrs); + return; + } + } + + if (i != 3) { + semsg(_(e_list_item_nr_does_not_contain_3_numbers), item); + xfree(ptrs); + return; + } + + ptrs[item++] = lili; + }); + + // Sort the list on the first number. + qsort((void *)ptrs, (size_t)tv_list_len(l), sizeof(const listitem_T *), tv_nr_compare); + + cw_interval_T *table = xmalloc(sizeof(cw_interval_T) * (size_t)tv_list_len(l)); + + // Store the items in the new table. + item = 0; + TV_LIST_ITER_CONST(l, li, { + const list_T *const li_l = TV_LIST_ITEM_TV(li)->vval.v_list; + const listitem_T *lili = tv_list_first(li_l); + const varnumber_T n1 = TV_LIST_ITEM_TV(lili)->vval.v_number; + if (item > 0 && n1 <= table[item - 1].last) { + semsg(_(e_overlapping_ranges_for_nr), (long)n1); + xfree(ptrs); + xfree(table); + return; + } + table[item].first = n1; + lili = TV_LIST_ITEM_NEXT(li_l, lili); + table[item].last = TV_LIST_ITEM_TV(lili)->vval.v_number; + lili = TV_LIST_ITEM_NEXT(li_l, lili); + table[item].width = (char)TV_LIST_ITEM_TV(lili)->vval.v_number; + item++; + }); + + xfree(ptrs); + xfree(cw_table); + cw_table = table; + cw_table_size = (size_t)tv_list_len(l); +} diff --git a/src/nvim/mbyte.h b/src/nvim/mbyte.h index ffa8411675..2a9afcbd03 100644 --- a/src/nvim/mbyte.h +++ b/src/nvim/mbyte.h @@ -5,6 +5,7 @@ #include #include +#include "nvim/eval/typval.h" #include "nvim/func_attr.h" #include "nvim/mbyte_defs.h" #include "nvim/os/os_defs.h" // For indirect diff --git a/src/nvim/testdir/test_utf8.vim b/src/nvim/testdir/test_utf8.vim index 9b010a5dbc..c5dfd85e5e 100644 --- a/src/nvim/testdir/test_utf8.vim +++ b/src/nvim/testdir/test_utf8.vim @@ -140,6 +140,41 @@ func Test_list2str_str2list_latin1() call assert_equal(s, sres) endfunc +func Test_setcellwidths() + call setcellwidths([ + \ [0x1330, 0x1330, 2], + \ [0x1337, 0x1339, 2], + \ [9999, 10000, 1], + \]) + + call assert_equal(2, strwidth("\u1330")) + call assert_equal(1, strwidth("\u1336")) + call assert_equal(2, strwidth("\u1337")) + call assert_equal(2, strwidth("\u1339")) + call assert_equal(1, strwidth("\u133a")) + + call setcellwidths([]) + + call assert_fails('call setcellwidths(1)', 'E714:') + + call assert_fails('call setcellwidths([1, 2, 0])', 'E1109:') + + call assert_fails('call setcellwidths([[0x101]])', 'E1110:') + call assert_fails('call setcellwidths([[0x101, 0x102]])', 'E1110:') + call assert_fails('call setcellwidths([[0x101, 0x102, 1, 4]])', 'E1110:') + call assert_fails('call setcellwidths([["a"]])', 'E1110:') + + call assert_fails('call setcellwidths([[0x102, 0x101, 1]])', 'E1111:') + + call assert_fails('call setcellwidths([[0x101, 0x102, 0]])', 'E1112:') + call assert_fails('call setcellwidths([[0x101, 0x102, 3]])', 'E1112:') + + call assert_fails('call setcellwidths([[0x111, 0x122, 1], [0x115, 0x116, 2]])', 'E1113:') + call assert_fails('call setcellwidths([[0x111, 0x122, 1], [0x122, 0x123, 2]])', 'E1113:') + + call assert_fails('call setcellwidths([[0x33, 0x44, 2]])', 'E1114:') +endfunc + func Test_print_overlong() " Text with more composing characters than MB_MAXBYTES. new