From 50a576ba576b5eff44d1c476da013eab11b4f3ed Mon Sep 17 00:00:00 2001 From: bfredl Date: Mon, 2 Sep 2024 12:00:19 +0200 Subject: [PATCH] fix(mbyte): mark any 0xFE0F sequence as a TUI ambiguous width char MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some sequences beginning with ASCII might be rendered as emoji, as for instance emoji 1️⃣ which is encoded as ascii 0x31 + U+FE0F + U+20E3. While it is tricky to make the width of such sequences configurable, we can make TUI be careful with such sequences and reset the cursor, just like for Extended_Pictogram based sequences. --- src/nvim/mbyte.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/src/nvim/mbyte.c b/src/nvim/mbyte.c index db4730408b..6fd51e773d 100644 --- a/src/nvim/mbyte.c +++ b/src/nvim/mbyte.c @@ -1339,13 +1339,22 @@ int utf_class_tab(const int c, const uint64_t *const chartab) bool utf_ambiguous_width(const char *p) { - int c = utf_ptr2char(p); - if (c < 0x80) { + // be quick if there is nothing to print or ASCII-only + if (p[0] == NUL || p[1] == NUL) { return false; } - const utf8proc_property_t *prop = utf8proc_get_property(c); - return prop->ambiguous_width || prop_is_emojilike(prop); + CharInfo info = utf_ptr2CharInfo(p); + if (info.value >= 0x80) { + const utf8proc_property_t *prop = utf8proc_get_property(info.value); + if (prop->ambiguous_width || prop_is_emojilike(prop)) { + return true; + } + } + + // check if second sequence is 0xFE0F VS-16 which can turn things into emoji, + // safe with NUL (no second sequence) + return memcmp(p + info.len, "\xef\xb8\x8f", 3) == 0; } // Return the folded-case equivalent of "a", which is a UCS-4 character. Uses