docs(lua): vim.str_utf_{start,end,pos} #24424

Closes #24422
2024-12-19 18:55:14 -07:00 · 2023-07-29 16:08:32 +02:00 · 2023-07-29 16:08:32 +02:00 · 2736cb3adf
commit 2736cb3adf
parent 42333ea98d
2 changed files with 106 additions and 0 deletions
--- a/runtime/doc/lua.txt
+++ b/runtime/doc/lua.txt
@ -969,6 +969,65 @@ vim.str_byteindex({str}, {index}, {use_utf16})           *vim.str_byteindex()*
      • {index}      (number)
      • {use_utf16}  any|nil
 vim.str_utf_end({str}, {index})                            *vim.str_utf_end()*
    Gets the distance (in bytes) from the last byte of the codepoint
    (character) that {index} points to.
    Examples: >lua
     -- The character 'æ' are equal to the bytes '\xc3\xa6' (using UTF-8)
     -- Returns 0 because the index is pointing at the last byte of a character
     vim.str_utf_end('æ', 2)
     -- Returns 1 because the index is pointing at the second last byte of a character
     vim.str_utf_end('æ', 1)
 <
    Parameters: ~
      • {str}    (string)
      • {index}  (number)
    Return: ~
        (number)
 vim.str_utf_pos({str})                                     *vim.str_utf_pos()*
    Gets a list of the starting byte positions of each UTF-8 codepoint in the
    given string.
    Embedded NUL bytes are treated as terminating the string.
    Parameters: ~
      • {str}  (string)
    Return: ~
        (table)
 vim.str_utf_start({str}, {index})                        *vim.str_utf_start()*
    Gets the distance (in bytes) from the starting byte of the codepoint
    (character) that {index} points to.
    The result can be added to {index} to get the starting byte of a
    character.
    Examples: >lua
     -- The character 'æ' are equal to the bytes '\xc3\xa6' (using UTF-8)
     -- Returns 0 because the index is pointing at the first byte of a character
     vim.str_utf_start('æ', 1)
     -- Returns -1 because the index is pointing at the second byte of a character
     vim.str_utf_start('æ', 2)
 <
    Parameters: ~
      • {str}    (string)
      • {index}  (number)
    Return: ~
        (number)
 vim.str_utfindex({str}, {index})                          *vim.str_utfindex()*
    Convert byte index to UTF-32 and UTF-16 indices. If {index} is not
    supplied, the length of the string is used. All indices are zero-based.
--- a/runtime/lua/vim/_meta/builtin.lua
+++ b/runtime/lua/vim/_meta/builtin.lua
@ -118,6 +118,53 @@ function vim.stricmp(a, b) end
 --- @param use_utf16? any
 function vim.str_byteindex(str, index, use_utf16) end
 --- Gets a list of the starting byte positions of each UTF-8 codepoint in the given string.
 ---
 --- Embedded NUL bytes are treated as terminating the string.
 --- @param str string
 --- @return table
 function vim.str_utf_pos(str) end
 --- Gets the distance (in bytes) from the starting byte of the codepoint (character) that {index}
 --- points to.
 ---
 --- The result can be added to {index} to get the starting byte of a character.
 ---
 --- Examples:
 --- <pre>lua
 --- -- The character 'æ' are equal to the bytes '\xc3\xa6' (using UTF-8)
 ---
 --- -- Returns 0 because the index is pointing at the first byte of a character
 --- vim.str_utf_start('æ', 1)
 ---
 --- -- Returns -1 because the index is pointing at the second byte of a character
 --- vim.str_utf_start('æ', 2)
 --- </pre>
 ---
 --- @param str string
 --- @param index number
 --- @return number
 function vim.str_utf_start(str, index) end
 --- Gets the distance (in bytes) from the last byte of the codepoint (character) that {index} points
 --- to.
 ---
 --- Examples:
 --- <pre>lua
 --- -- The character 'æ' are equal to the bytes '\xc3\xa6' (using UTF-8)
 ---
 --- -- Returns 0 because the index is pointing at the last byte of a character
 --- vim.str_utf_end('æ', 2)
 ---
 --- -- Returns 1 because the index is pointing at the second last byte of a character
 --- vim.str_utf_end('æ', 1)
 --- </pre>
 ---
 --- @param str string
 --- @param index number
 --- @return number
 function vim.str_utf_end(str, index) end
 --- Convert byte index to UTF-32 and UTF-16 indices. If {index} is not
 --- supplied, the length of the string is used. All indices are zero-based.
 ---