From 99b5ffd688247f25295f3dd06e57c0d9ad85b072 Mon Sep 17 00:00:00 2001
From: Gregory Anders <greg@gpanders.com>
Date: Tue, 26 Nov 2024 13:56:01 -0600
Subject: [PATCH] perf(vim.text): use lookup table implementation for hex
 encoding (#30080)

Co-authored-by: glepnir <glephunter@gmail.com>
---
 runtime/lua/vim/text.lua          | 24 ++++++++++++--
 test/benchmark/text_spec.lua      | 52 +++++++++++++++++++++++++++++++
 test/functional/lua/text_spec.lua | 16 ++++++++++
 3 files changed, 89 insertions(+), 3 deletions(-)
 create mode 100644 test/benchmark/text_spec.lua

diff --git a/runtime/lua/vim/text.lua b/runtime/lua/vim/text.lua
index d45c8021c6..f910ab3a1d 100644
--- a/runtime/lua/vim/text.lua
+++ b/runtime/lua/vim/text.lua
@@ -2,6 +2,18 @@
 
 local M = {}
 
+local alphabet = '0123456789ABCDEF'
+local atoi = {} ---@type table<string, integer>
+local itoa = {} ---@type table<integer, string>
+do
+  for i = 1, #alphabet do
+    local char = alphabet:sub(i, i)
+    itoa[i - 1] = char
+    atoi[char] = i - 1
+    atoi[char:lower()] = i - 1
+  end
+end
+
 --- Hex encode a string.
 ---
 --- @param str string String to encode
@@ -9,7 +21,9 @@ local M = {}
 function M.hexencode(str)
   local enc = {} ---@type string[]
   for i = 1, #str do
-    enc[i] = string.format('%02X', str:byte(i, i + 1))
+    local byte = str:byte(i)
+    enc[2 * i - 1] = itoa[math.floor(byte / 16)]
+    enc[2 * i] = itoa[byte % 16]
   end
   return table.concat(enc)
 end
@@ -26,8 +40,12 @@ function M.hexdecode(enc)
 
   local str = {} ---@type string[]
   for i = 1, #enc, 2 do
-    local n = assert(tonumber(enc:sub(i, i + 1), 16))
-    str[#str + 1] = string.char(n)
+    local u = atoi[enc:sub(i, i)]
+    local l = atoi[enc:sub(i + 1, i + 1)]
+    if not u or not l then
+      return nil, 'string must contain only hex characters'
+    end
+    str[(i + 1) / 2] = string.char(u * 16 + l)
   end
   return table.concat(str), nil
 end
diff --git a/test/benchmark/text_spec.lua b/test/benchmark/text_spec.lua
new file mode 100644
index 0000000000..9cfeaf765b
--- /dev/null
+++ b/test/benchmark/text_spec.lua
@@ -0,0 +1,52 @@
+describe('vim.text', function()
+  --- @param t number[]
+  local function mean(t)
+    assert(#t > 0)
+    local sum = 0
+    for _, v in ipairs(t) do
+      sum = sum + v
+    end
+    return sum / #t
+  end
+
+  --- @param t number[]
+  local function median(t)
+    local len = #t
+    if len % 2 == 0 then
+      return t[len / 2]
+    end
+    return t[(len + 1) / 2]
+  end
+
+  --- @param f fun(t: number[]): table<number, number|string|table>
+  local function measure(f, input, N)
+    local stats = {} ---@type number[]
+    for _ = 1, N do
+      local tic = vim.uv.hrtime()
+      f(input)
+      local toc = vim.uv.hrtime()
+      stats[#stats + 1] = (toc - tic) / 1000000
+    end
+    table.sort(stats)
+    print(
+      string.format(
+        '\nN: %d, Min: %0.6f ms, Max: %0.6f ms, Median: %0.6f ms, Mean: %0.6f ms',
+        N,
+        math.min(unpack(stats)),
+        math.max(unpack(stats)),
+        median(stats),
+        mean(stats)
+      )
+    )
+  end
+
+  local input, output = string.rep('😂', 2 ^ 16), string.rep('F09F9882', 2 ^ 16)
+
+  it('hexencode', function()
+    measure(vim.text.hexencode, input, 100)
+  end)
+
+  it('hexdecode', function()
+    measure(vim.text.hexdecode, output, 100)
+  end)
+end)
diff --git a/test/functional/lua/text_spec.lua b/test/functional/lua/text_spec.lua
index be471bfd62..dd08a6ec04 100644
--- a/test/functional/lua/text_spec.lua
+++ b/test/functional/lua/text_spec.lua
@@ -26,5 +26,21 @@ describe('vim.text', function()
       eq(output, vim.text.hexencode(input))
       eq(input, vim.text.hexdecode(output))
     end)
+
+    it('errors on invalid input', function()
+      -- Odd number of hex characters
+      do
+        local res, err = vim.text.hexdecode('ABC')
+        eq(nil, res)
+        eq('string must have an even number of hex characters', err)
+      end
+
+      -- Non-hexadecimal input
+      do
+        local res, err = vim.text.hexdecode('nothex')
+        eq(nil, res)
+        eq('string must contain only hex characters', err)
+      end
+    end)
   end)
 end)