-- Converts Vim :help files to HTML. Validates |tag| links and document syntax (parser errors). -- -- NOTE: :helptags checks for duplicate tags, whereas this script checks _links_ (to tags). -- -- USAGE (GENERATE HTML): -- 1. Run `make helptags` first; this script depends on vim.fn.taglist(). -- 2. nvim -V1 -es --clean +"lua require('scripts.gen_help_html').gen('./build/runtime/doc/', 'target/dir/')" -- - Read the docstring at gen(). -- 3. cd target/dir/ && jekyll serve --host 0.0.0.0 -- 4. Visit http://localhost:4000/…/help.txt.html -- -- USAGE (VALIDATE): -- 1. nvim -V1 -es +"lua require('scripts.gen_help_html').validate()" -- - validate() is 10x faster than gen(), so it is used in CI. -- -- SELF-TEST MODE: -- 1. nvim -V1 -es +"lua require('scripts.gen_help_html')._test()" -- -- NOTES: -- * gen() and validate() are the primary entrypoints. validate() only exists because gen() is too -- slow (~1 min) to run in per-commit CI. -- * visit_node() is the core function used by gen() to traverse the document tree and produce HTML. -- * visit_validate() is the core function used by validate(). -- * Files in `new_layout` will be generated with a "flow" layout instead of preformatted/fixed-width layout. local tagmap = nil local helpfiles = nil local invalid_links = {} local invalid_urls = {} local invalid_spelling = {} local spell_dict = { Neovim = 'Nvim', NeoVim = 'Nvim', neovim = 'Nvim', lua = 'Lua', } local M = {} -- These files are generated with "flow" layout (non fixed-width, wrapped text paragraphs). -- All other files are "legacy" files which require fixed-width layout. local new_layout = { ['api.txt'] = true, ['channel.txt'] = true, ['develop.txt'] = true, ['luaref.txt'] = true, ['nvim.txt'] = true, ['pi_health.txt'] = true, ['provider.txt'] = true, ['ui.txt'] = true, } -- TODO: These known invalid |links| require an update to the relevant docs. local exclude_invalid = { ["'previewpopup'"] = "quickref.txt", ["'pvp'"] = "quickref.txt", ["'string'"] = "eval.txt", Query = "treesitter.txt", ["eq?"] = "treesitter.txt", ["lsp-request"] = "lsp.txt", matchit = "vim_diff.txt", ["matchit.txt"] = "help.txt", ["set!"] = "treesitter.txt", ["v:_null_blob"] = "builtin.txt", ["v:_null_dict"] = "builtin.txt", ["v:_null_list"] = "builtin.txt", ["v:_null_string"] = "builtin.txt", ["vim.lsp.buf_request()"] = "lsp.txt", ["vim.lsp.util.get_progress_messages()"] = "lsp.txt", ["vim.treesitter.start()"] = "treesitter.txt", } local function tofile(fname, text) local f = io.open(fname, 'w') if not f then error(('failed to write: %s'):format(f)) else f:write(text) f:close() end end local function html_esc(s) return s:gsub( '&', '&'):gsub( '<', '<'):gsub( '>', '>') end local function url_encode(s) -- Credit: tpope / vim-unimpaired -- NOTE: these chars intentionally *not* escaped: ' ( ) return vim.fn.substitute(vim.fn.iconv(s, 'latin1', 'utf-8'), [=[[^A-Za-z0-9()'_.~-]]=], [=[\="%".printf("%02X",char2nr(submatch(0)))]=], 'g') end local function expandtabs(s) return s:gsub('\t', (' '):rep(8)) end local function to_titlecase(s) local text = '' for w in vim.gsplit(s, '[ \t]+') do text = ('%s %s%s'):format(text, vim.fn.toupper(w:sub(1, 1)), w:sub(2)) end return text end local function to_heading_tag(text) -- Prepend "_" to avoid conflicts with actual :help tags. return text and string.format('_%s', vim.fn.tolower((text:gsub('%s+', '-')))) or 'unknown' end local function basename_noext(f) return vim.fs.basename(f:gsub('%.txt', '')) end local function is_blank(s) return not not s:find([[^[\t ]*$]]) end local function trim(s, dir) return vim.fn.trim(s, '\r\t\n ', dir or 0) end -- Remove common punctuation from URLs. -- -- TODO: fix this in the parser instead... https://github.com/neovim/tree-sitter-vimdoc -- -- @returns (fixed_url, removed_chars) where `removed_chars` is in the order found in the input. local function fix_url(url) local removed_chars = '' local fixed_url = url -- Remove up to one of each char from end of the URL, in this order. for _, c in ipairs({ '.', ')', }) do if fixed_url:sub(-1) == c then removed_chars = c .. removed_chars fixed_url = fixed_url:sub(1, -2) end end return fixed_url, removed_chars end -- Checks if a given line is a "noise" line that doesn't look good in HTML form. local function is_noise(line, noise_lines) if ( -- First line is always noise. (noise_lines ~= nil and vim.tbl_count(noise_lines) == 0) or line:find('Type .*gO.* to see the table of contents') -- Title line of traditional :help pages. -- Example: "NVIM REFERENCE MANUAL by ..." or line:find([[^%s*N?VIM[ \t]*REFERENCE[ \t]*MANUAL]]) -- First line of traditional :help pages. -- Example: "*api.txt* Nvim" or line:find('%s*%*?[a-zA-Z]+%.txt%*?%s+N?[vV]im%s*$') -- modeline -- Example: "vim:tw=78:ts=8:sw=4:sts=4:et:ft=help:norl:" or line:find('^%s*vi[m]%:.*ft=help') or line:find('^%s*vi[m]%:.*filetype=help') or line:find('[*>]local%-additions[*<]') ) then -- table.insert(stats.noise_lines, getbuflinestr(root, opt.buf, 0)) table.insert(noise_lines or {}, line) return true end return false end -- Creates a github issue URL at neovim/tree-sitter-vimdoc with prefilled content. local function get_bug_url_vimdoc(fname, to_fname, sample_text) local this_url = string.format('https://neovim.io/doc/user/%s', vim.fs.basename(to_fname)) local bug_url = ('https://github.com/neovim/tree-sitter-vimdoc/issues/new?labels=bug&title=parse+error%3A+' ..vim.fs.basename(fname) ..'+&body=Found+%60tree-sitter-vimdoc%60+parse+error+at%3A+' ..this_url ..'%0D%0DContext%3A%0D%0D%60%60%60%0D' ..url_encode(sample_text) ..'%0D%60%60%60') return bug_url end -- Creates a github issue URL at neovim/neovim with prefilled content. local function get_bug_url_nvim(fname, to_fname, sample_text, token_name) local this_url = string.format('https://neovim.io/doc/user/%s', vim.fs.basename(to_fname)) local bug_url = ('https://github.com/neovim/neovim/issues/new?labels=bug&title=user+docs+HTML%3A+' ..vim.fs.basename(fname) ..'+&body=%60gen_help_html.lua%60+problem+at%3A+' ..this_url ..'%0D' ..(token_name and '+unhandled+token%3A+%60'..token_name..'%60' or '') ..'%0DContext%3A%0D%0D%60%60%60%0D' ..url_encode(sample_text) ..'%0D%60%60%60') return bug_url end -- Gets a "foo.html" name from a "foo.txt" helpfile name. local function get_helppage(f) if not f then return nil end -- Special case: help.txt is the "main landing page" of :help files, not index.txt. if f == 'index.txt' then return 'vimindex.html' elseif f == 'help.txt' then return 'index.html' end return f:gsub('%.txt$', '.html') end -- Counts leading spaces (tab=8) to decide the indent size of multiline text. -- -- Blank lines (empty or whitespace-only) are ignored. local function get_indent(s) local min_indent = nil for line in vim.gsplit(s, '\n') do if line and not is_blank(line) then local ws = expandtabs(line:match('^%s+') or '') min_indent = (not min_indent or ws:len() < min_indent) and ws:len() or min_indent end end return min_indent or 0 end -- Removes the common indent level, after expanding tabs to 8 spaces. local function trim_indent(s) local indent_size = get_indent(s) local trimmed = '' for line in vim.gsplit(s, '\n') do line = expandtabs(line) trimmed = ('%s%s\n'):format(trimmed, line:sub(indent_size + 1)) end return trimmed:sub(1, -2) end -- Gets raw buffer text in the node's range (+/- an offset), as a newline-delimited string. local function getbuflinestr(node, bufnr, offset) local line1, _, line2, _ = node:range() line1 = line1 - offset line2 = line2 + offset local lines = vim.fn.getbufline(bufnr, line1 + 1, line2 + 1) return table.concat(lines, '\n') end local function get_tagname(node, bufnr) local text = vim.treesitter.get_node_text(node, bufnr) local tag = (node:type() == 'optionlink' or node:parent():type() == 'optionlink') and ("'%s'"):format(text) or text local helpfile = vim.fs.basename(tagmap[tag]) or nil -- "api.txt" local helppage = get_helppage(helpfile) -- "api.html" return helppage, tag end -- Returns true if the given invalid tagname is a false positive. local function ignore_invalid(s) -- Strings like |~/====| appear in various places and the parser thinks they are links, but they -- are just table borders. return not not (s:find('===') or exclude_invalid[s]) end local function ignore_parse_error(s) -- Ignore parse errors for unclosed codespan/optionlink/tag. -- This is common in vimdocs and is treated as plaintext by :help. return s:find("^[`'|*]") end local function has_ancestor(node, ancestor_name) local p = node while true do p = p:parent() if not p or p:type() == 'help_file' then break elseif p:type() == ancestor_name then return true end end return false end local function validate_link(node, bufnr, fname) local helppage, tagname = get_tagname(node:child(1), bufnr) if not has_ancestor(node, 'column_heading') and not node:has_error() and not tagmap[tagname] and not ignore_invalid(tagname) then invalid_links[tagname] = vim.fs.basename(fname) end return helppage, tagname end -- Traverses the tree at `root` and checks that |tag| links point to valid helptags. local function visit_validate(root, level, lang_tree, opt, stats) level = level or 0 local node_name = (root.named and root:named()) and root:type() or nil local toplevel = level < 1 local function node_text(node) return vim.treesitter.get_node_text(node or root, opt.buf) end local text = trim(node_text()) if root:child_count() > 0 then for node, _ in root:iter_children() do if node:named() then visit_validate(node, level + 1, lang_tree, opt, stats) end end end if node_name == 'ERROR' then if ignore_parse_error(text) then return end -- Store the raw text to give context to the error report. local sample_text = not toplevel and getbuflinestr(root, opt.buf, 3) or '[top level!]' table.insert(stats.parse_errors, sample_text) elseif node_name == 'word' or node_name == 'uppercase_name' then if spell_dict[text] then if not invalid_spelling[text] then invalid_spelling[text] = { vim.fs.basename(opt.fname) } else table.insert(invalid_spelling[text], vim.fs.basename(opt.fname)) end end elseif node_name == 'url' then if text:find('http%:') then invalid_urls[text] = vim.fs.basename(opt.fname) end elseif node_name == 'taglink' or node_name == 'optionlink' then local _, _ = validate_link(root, opt.buf, opt.fname) end end -- Generates HTML from node `root` recursively. local function visit_node(root, level, lang_tree, headings, opt, stats) level = level or 0 local node_name = (root.named and root:named()) and root:type() or nil -- Previous sibling kind (string). local prev = root:prev_sibling() and (root:prev_sibling().named and root:prev_sibling():named()) and root:prev_sibling():type() or nil -- Next sibling kind (string). local next_ = root:next_sibling() and (root:next_sibling().named and root:next_sibling():named()) and root:next_sibling():type() or nil -- Parent kind (string). local parent = root:parent() and root:parent():type() or nil local text = '' local toplevel = level < 1 local function node_text(node) return vim.treesitter.get_node_text(node or root, opt.buf) end -- Gets leading whitespace of the current node. local function ws() return node_text():match('^%s+') or '' end if root:named_child_count() == 0 or node_name == 'ERROR' then text = node_text() else -- Process children and join them with whitespace. for node, _ in root:iter_children() do if node:named() then local r = visit_node(node, level + 1, lang_tree, headings, opt, stats) text = string.format('%s%s', text, r) end end end local trimmed = trim(text) if node_name == 'help_file' then -- root node return text elseif node_name == 'url' then local fixed_url, removed_chars = fix_url(trimmed) return ('%s%s%s'):format(ws(), fixed_url, fixed_url, removed_chars) elseif node_name == 'word' or node_name == 'uppercase_name' then return html_esc(text) elseif node_name == 'h1' or node_name == 'h2' or node_name == 'h3' then if is_noise(text, stats.noise_lines) then return '' -- Discard common "noise" lines. end -- Remove "===" and tags from ToC text. local hname = (node_text():gsub('%-%-%-%-+', ''):gsub('%=%=%=%=+', ''):gsub('%*.*%*', '')) if node_name == 'h1' or #headings == 0 then table.insert(headings, { name = hname, subheadings = {}, }) else table.insert(headings[#headings].subheadings, { name = hname, subheadings = {}, }) end local el = node_name == 'h1' and 'h2' or 'h3' return ('<%s class="help-heading">%s%s>\n'):format(to_heading_tag(hname), el, text, el) elseif node_name == 'column_heading' or node_name == 'column_name' then if root:has_error() then return text end return ('
%s
'):format(ws(), text)
elseif node_name == 'argument' then
return ('%s{%s}
'):format(ws(), text)
elseif node_name == 'codeblock' then
if is_blank(text) then
return ''
end
return ('%s'):format(html_esc(trim(trim_indent(text), 2))) elseif node_name == 'tag' then -- anchor if root:has_error() then return text end local in_heading = (parent == 'h1' or parent == 'h2') local cssclass = (not in_heading and get_indent(node_text()) > 8) and 'help-tag-right' or 'help-tag' local tagname = node_text(root:child(1)) if vim.tbl_count(stats.first_tags) < 2 then -- First 2 tags in the doc will be anchored at the main heading. table.insert(stats.first_tags, tagname) return '' end local s = ('%s%s'):format(ws(), url_encode(tagname), cssclass, trimmed) if in_heading and prev ~= 'tag' then -- Start the container for tags in a heading. -- This makes "justify-content:space-between" right-align the tags. --