docs(lua): vim.str_utf_{start,end,pos} #24424

Closes #24422
2024-12-19 18:55:14 -07:00 · 2023-07-29 16:08:32 +02:00 · 2023-07-29 16:08:32 +02:00 · 2736cb3adf
commit 2736cb3adf
parent 42333ea98d
2 changed files with 106 additions and 0 deletions
--- a/runtime/doc/lua.txt
+++ b/runtime/doc/lua.txt
@ -969,6 +969,65 @@ vim.str_byteindex({str}, {index}, {use_utf16})           *vim.str_byteindex()*
      • {index}      (number)
      • {use_utf16}  any|nil

+vim.str_utf_end({str}, {index})                            *vim.str_utf_end()*
+    Gets the distance (in bytes) from the last byte of the codepoint
+    (character) that {index} points to.
+
+    Examples: >lua
+
+     -- The character 'æ' are equal to the bytes '\xc3\xa6' (using UTF-8)
+
+     -- Returns 0 because the index is pointing at the last byte of a character
+     vim.str_utf_end('æ', 2)
+
+     -- Returns 1 because the index is pointing at the second last byte of a character
+     vim.str_utf_end('æ', 1)
+<
+
+    Parameters: ~
+      • {str}    (string)
+      • {index}  (number)
+
+    Return: ~
+        (number)
+
+vim.str_utf_pos({str})                                     *vim.str_utf_pos()*
+    Gets a list of the starting byte positions of each UTF-8 codepoint in the
+    given string.
+
+    Embedded NUL bytes are treated as terminating the string.
+
+    Parameters: ~
+      • {str}  (string)
+
+    Return: ~
+        (table)
+
+vim.str_utf_start({str}, {index})                        *vim.str_utf_start()*
+    Gets the distance (in bytes) from the starting byte of the codepoint
+    (character) that {index} points to.
+
+    The result can be added to {index} to get the starting byte of a
+    character.
+
+    Examples: >lua
+
+     -- The character 'æ' are equal to the bytes '\xc3\xa6' (using UTF-8)
+
+     -- Returns 0 because the index is pointing at the first byte of a character
+     vim.str_utf_start('æ', 1)
+
+     -- Returns -1 because the index is pointing at the second byte of a character
+     vim.str_utf_start('æ', 2)
+<
+
+    Parameters: ~
+      • {str}    (string)
+      • {index}  (number)
+
+    Return: ~
+        (number)
+
 vim.str_utfindex({str}, {index})                          *vim.str_utfindex()*
    Convert byte index to UTF-32 and UTF-16 indices. If {index} is not
    supplied, the length of the string is used. All indices are zero-based.
--- a/runtime/lua/vim/_meta/builtin.lua
+++ b/runtime/lua/vim/_meta/builtin.lua
@ -118,6 +118,53 @@ function vim.stricmp(a, b) end
 --- @param use_utf16? any
 function vim.str_byteindex(str, index, use_utf16) end

+--- Gets a list of the starting byte positions of each UTF-8 codepoint in the given string.
+---
+--- Embedded NUL bytes are treated as terminating the string.
+--- @param str string
+--- @return table
+function vim.str_utf_pos(str) end
+
+--- Gets the distance (in bytes) from the starting byte of the codepoint (character) that {index}
+--- points to.
+---
+--- The result can be added to {index} to get the starting byte of a character.
+---
+--- Examples:
+--- <pre>lua
+--- -- The character 'æ' are equal to the bytes '\xc3\xa6' (using UTF-8)
+---
+--- -- Returns 0 because the index is pointing at the first byte of a character
+--- vim.str_utf_start('æ', 1)
+---
+--- -- Returns -1 because the index is pointing at the second byte of a character
+--- vim.str_utf_start('æ', 2)
+--- </pre>
+---
+--- @param str string
+--- @param index number
+--- @return number
+function vim.str_utf_start(str, index) end
+
+--- Gets the distance (in bytes) from the last byte of the codepoint (character) that {index} points
+--- to.
+---
+--- Examples:
+--- <pre>lua
+--- -- The character 'æ' are equal to the bytes '\xc3\xa6' (using UTF-8)
+---
+--- -- Returns 0 because the index is pointing at the last byte of a character
+--- vim.str_utf_end('æ', 2)
+---
+--- -- Returns 1 because the index is pointing at the second last byte of a character
+--- vim.str_utf_end('æ', 1)
+--- </pre>
+---
+--- @param str string
+--- @param index number
+--- @return number
+function vim.str_utf_end(str, index) end
+
 --- Convert byte index to UTF-32 and UTF-16 indices. If {index} is not
 --- supplied, the length of the string is used. All indices are zero-based.
 ---