Compare commits

...

4 Commits

Author SHA1 Message Date
Mathias Fußenegger
8fc2951efc
Merge de17f182bb into 7121983c45 2024-12-18 14:40:43 +00:00
Mathias Fussenegger
de17f182bb fixup! perf(lsp): use string.buffer for rpc loop 2024-12-16 10:16:19 +01:00
Mathias Fussenegger
29ee7e600e perf(lsp): use string.buffer for rpc loop
Avoids some table allocations.
In a quick test over 50000 iterations it reduces the time from 130ms to
74 ms

For the test setup details see:

https://github.com/mfussenegger/nvim-dap/pull/1394#issue-2725352391
2024-12-16 10:16:19 +01:00
Mathias Fussenegger
b781e570b3 perf(lsp): optimize content length extraction from rpc headers
- No redundant `:gsub` to turn `-` in `Content-Length` into `_`
- No table allocations only to add and later get the content-length
  header
2024-12-16 10:16:16 +01:00

View File

@ -16,34 +16,21 @@ local function format_message_with_content_length(message)
})
end
---@class (private) vim.lsp.rpc.Headers: {string: any}
---@field content_length integer
--- Parses an LSP Message's header
--- Extract content-length from the msg header
---
---@param header string The header to parse.
---@return vim.lsp.rpc.Headers#parsed headers
local function parse_headers(header)
assert(type(header) == 'string', 'header must be a string')
--- @type vim.lsp.rpc.Headers
local headers = {}
for line in vim.gsplit(header, '\r\n', { plain = true }) do
---@param header string The header to parse
---@return integer?
local function get_content_length(header)
for line in header:gmatch('(.-)\r\n') do
if line == '' then
break
end
--- @type string?, string?
local key, value = line:match('^%s*(%S+)%s*:%s*(.+)%s*$')
if key then
key = key:lower():gsub('%-', '_') --- @type string
headers[key] = value
else
log.error('invalid header line %q', line)
error(string.format('invalid header line %q', line))
local key, value = line:match('^%s*(%S+)%s*:%s*(%d+)%s*$')
if key and key:lower() == 'content-length' then
return tonumber(value)
end
end
headers.content_length = tonumber(headers.content_length)
or error(string.format('Content-Length not found in headers. %q', header))
return headers
error('Content-Length not found in header: ' .. header)
end
-- This is the start of any possible header patterns. The gsub converts it to a
@ -52,70 +39,102 @@ local header_start_pattern = ('content'):gsub('%w', function(c)
return '[' .. c .. c:upper() .. ']'
end)
local has_strbuffer, strbuffer = pcall(require, "string.buffer")
--- The actual workhorse.
local function request_parser_loop()
local buffer = '' -- only for header part
while true do
-- A message can only be complete if it has a double CRLF and also the full
-- payload, so first let's check for the CRLFs
local start, finish = buffer:find('\r\n\r\n', 1, true)
-- Start parsing the headers
if start then
-- This is a workaround for servers sending initial garbage before
-- sending headers, such as if a bash script sends stdout. It assumes
-- that we know all of the headers ahead of time. At this moment, the
-- only valid headers start with "Content-*", so that's the thing we will
-- be searching for.
-- TODO(ashkan) I'd like to remove this, but it seems permanent :(
local buffer_start = buffer:find(header_start_pattern)
if not buffer_start then
error(
string.format(
"Headers were expected, a different response was received. The server response was '%s'.",
buffer
)
)
end
local headers = parse_headers(buffer:sub(buffer_start, start - 1))
local content_length = headers.content_length
-- Use table instead of just string to buffer the message. It prevents
-- a ton of strings allocating.
-- ref. http://www.lua.org/pil/11.6.html
---@type string[]
local body_chunks = { buffer:sub(finish + 1) }
local body_length = #body_chunks[1]
-- Keep waiting for data until we have enough.
while body_length < content_length do
---@type string
---@type function
local request_parser_loop
if has_strbuffer then
request_parser_loop = function()
local buf = strbuffer.new()
while true do
local msg = buf:tostring()
local header_end = msg:find('\r\n\r\n', 1, true)
if header_end then
local header = buf:get(header_end + 1)
buf:skip(2) -- skip past header boundary
local content_length = get_content_length(header)
while #buf < content_length do
local chunk = coroutine.yield()
buf:put(chunk)
end
local body = buf:get(content_length)
local chunk = coroutine.yield(body)
buf:put(chunk)
else
local chunk = coroutine.yield()
or error('Expected more data for the body. The server may have died.') -- TODO hmm.
table.insert(body_chunks, chunk)
body_length = body_length + #chunk
buf:put(chunk)
end
local last_chunk = body_chunks[#body_chunks]
end
end
else
request_parser_loop = function()
local buffer = '' -- only for header part
while true do
-- A message can only be complete if it has a double CRLF and also the full
-- payload, so first let's check for the CRLFs
local header_end, body_start = buffer:find('\r\n\r\n', 1, true)
-- Start parsing the headers
if header_end then
-- This is a workaround for servers sending initial garbage before
-- sending headers, such as if a bash script sends stdout. It assumes
-- that we know all of the headers ahead of time. At this moment, the
-- only valid headers start with "Content-*", so that's the thing we will
-- be searching for.
-- TODO(ashkan) I'd like to remove this, but it seems permanent :(
local buffer_start = buffer:find(header_start_pattern)
if not buffer_start then
error(
string.format(
"Headers were expected, a different response was received. The server response was '%s'.",
buffer
)
)
end
local header = buffer:sub(buffer_start, header_end + 1)
local content_length = get_content_length(header)
-- Use table instead of just string to buffer the message. It prevents
-- a ton of strings allocating.
-- ref. http://www.lua.org/pil/11.6.html
---@type string[]
local body_chunks = { buffer:sub(body_start + 1) }
local body_length = #body_chunks[1]
-- Keep waiting for data until we have enough.
while body_length < content_length do
---@type string
local chunk = coroutine.yield()
or error('Expected more data for the body. The server may have died.') -- TODO hmm.
table.insert(body_chunks, chunk)
body_length = body_length + #chunk
end
local last_chunk = body_chunks[#body_chunks]
body_chunks[#body_chunks] = last_chunk:sub(1, content_length - body_length - 1)
local rest = ''
if body_length > content_length then
rest = last_chunk:sub(content_length - body_length)
body_chunks[#body_chunks] = last_chunk:sub(1, content_length - body_length - 1)
local rest = ''
if body_length > content_length then
rest = last_chunk:sub(content_length - body_length)
end
local body = table.concat(body_chunks)
-- Yield our data.
--- @type string
local data = coroutine.yield(body)
or error('Expected more data for the body. The server may have died.')
buffer = rest .. data
else
-- Get more data since we don't have enough.
--- @type string
local data = coroutine.yield()
or error('Expected more data for the header. The server may have died.')
buffer = buffer .. data
end
local body = table.concat(body_chunks)
-- Yield our data.
--- @type string
local data = coroutine.yield(headers, body)
or error('Expected more data for the body. The server may have died.')
buffer = rest .. data
else
-- Get more data since we don't have enough.
--- @type string
local data = coroutine.yield()
or error('Expected more data for the header. The server may have died.')
buffer = buffer .. data
end
end
end
local M = {}
--- Mapping of error codes used by the client
@ -237,7 +256,7 @@ local default_dispatchers = {
--- @param on_exit? fun()
--- @param on_error fun(err: any)
function M.create_read_loop(handle_body, on_exit, on_error)
local parse_chunk = coroutine.wrap(request_parser_loop) --[[@as fun(chunk: string?): vim.lsp.rpc.Headers?, string?]]
local parse_chunk = coroutine.wrap(request_parser_loop) --[[@as fun(chunk: string?): string]]
parse_chunk()
return function(err, chunk)
if err then
@ -253,9 +272,9 @@ function M.create_read_loop(handle_body, on_exit, on_error)
end
while true do
local headers, body = parse_chunk(chunk)
if headers then
handle_body(assert(body))
local body = parse_chunk(chunk)
if body then
handle_body(body)
chunk = ''
else
break