neovim/scripts/gen_vimdoc.py
2019-05-19 17:30:20 +02:00

764 lines
25 KiB
Python
Executable File

#!/usr/bin/env python3
"""Generates Nvim help docs from C docstrings, by parsing Doxygen XML.
This would be easier using lxml and XSLT, but:
1. This should avoid needing Python dependencies, especially ones that are
C modules that have library dependencies (lxml requires libxml and
libxslt).
2. I wouldn't know how to deal with nested indentation in <para> tags using
XSLT.
Each function documentation is formatted with the following rules:
- Maximum width of 78 characters (`text_width`).
- Spaces for indentation.
- Function signature and helptag are on the same line.
- Helptag is right aligned.
- Signature and helptag must have a minimum of 8 spaces between them.
- If the signature is too long, it is placed on the line after the
helptag. The signature wraps at `text_width - 8` characters with
subsequent lines indented to the open parenthesis.
- Documentation body will be indented by 16 spaces.
- Subsection bodies are indented an additional 4 spaces.
- Documentation body consists of the function description, parameter details,
return description, and C declaration.
- Parameters are omitted for the `void` and `Error *` types, or if the
parameter is marked as [out].
- Each function documentation is separated by a single line.
The C declaration is added to the end to show actual argument types.
"""
import os
import re
import sys
import shutil
import textwrap
import subprocess
import collections
import pprint
from xml.dom import minidom
if sys.version_info[0] < 3:
print("use Python 3")
sys.exit(1)
DEBUG = ('DEBUG' in os.environ)
INCLUDE_C_DECL = ('INCLUDE_C_DECL' in os.environ)
INCLUDE_DEPRECATED = ('INCLUDE_DEPRECATED' in os.environ)
text_width = 78
script_path = os.path.abspath(__file__)
base_dir = os.path.dirname(os.path.dirname(script_path))
out_dir = os.path.join(base_dir, 'tmp-{mode}-doc')
filter_cmd = '%s %s' % (sys.executable, script_path)
seen_funcs = set()
lua2dox_filter = os.path.join(base_dir, 'scripts', 'lua2dox_filter')
CONFIG = {
'api': {
'filename': 'api.txt',
# String used to find the start of the generated part of the doc.
'section_start_token': '*api-global*',
# Section ordering.
'section_order' : [
'vim.c',
'buffer.c',
'window.c',
'tabpage.c',
'ui.c',
],
# List of files/directories for doxygen to read, separated by blanks
'files': os.path.join(base_dir, 'src/nvim/api'),
# file patterns used by doxygen
'file_patterns': '*.h *.c',
# Only function with this prefix are considered
'func_name_prefix': 'nvim_',
# Section name overrides.
'section_name': {
'vim.c': 'Global',
},
# Module name overrides (for Lua).
'module_override': {},
# Append the docs for these modules, do not start a new section.
'append_only' : [],
},
'lua': {
'filename': 'if_lua.txt',
'section_start_token': '*lua-vim*',
'section_order' : [
'vim.lua',
'shared.lua',
],
'files': ' '.join([
os.path.join(base_dir, 'src/nvim/lua/vim.lua'),
os.path.join(base_dir, 'runtime/lua/vim/shared.lua'),
]),
'file_patterns': '*.lua',
'func_name_prefix': '',
'section_name': {},
'module_override': {
'shared': 'vim', # `shared` functions are exposed on the `vim` module.
},
'append_only' : [
'shared.lua',
],
},
}
param_exclude = (
'channel_id',
)
# Annotations are displayed as line items after API function descriptions.
annotation_map = {
'FUNC_API_ASYNC': '{async}',
}
# Tracks `xrefsect` titles. As of this writing, used only for separating
# deprecated functions.
xrefs = set()
def debug_this(s, n):
o = n if isinstance(n, str) else n.toprettyxml(indent=' ', newl='\n')
name = '' if isinstance(n, str) else n.nodeName
if s in o:
raise RuntimeError('xxx: {}\n{}'.format(name, o))
# XML Parsing Utilities {{{
def find_first(parent, name):
"""Finds the first matching node within parent."""
sub = parent.getElementsByTagName(name)
if not sub:
return None
return sub[0]
def get_children(parent, name):
"""Yield matching child nodes within parent."""
for child in parent.childNodes:
if child.nodeType == child.ELEMENT_NODE and child.nodeName == name:
yield child
def get_child(parent, name):
"""Get the first matching child node."""
for child in get_children(parent, name):
return child
return None
def clean_text(text):
"""Cleans text.
Only cleans superfluous whitespace at the moment.
"""
return ' '.join(text.split()).strip()
def clean_lines(text):
"""Removes superfluous lines.
The beginning and end of the string is trimmed. Empty lines are collapsed.
"""
return re.sub(r'\A\n\s*\n*|\n\s*\n*\Z', '', re.sub(r'(\n\s*\n+)+', '\n\n', text))
def is_blank(text):
return '' == clean_lines(text)
def get_text(parent, preformatted=False):
"""Combine all text in a node."""
if parent.nodeType == parent.TEXT_NODE:
return parent.data
out = ''
for node in parent.childNodes:
if node.nodeType == node.TEXT_NODE:
out += node.data if preformatted else clean_text(node.data)
elif node.nodeType == node.ELEMENT_NODE:
out += ' ' + get_text(node, preformatted)
return out
# Gets the length of the last line in `text`, excluding newline ("\n") char.
def len_lastline(text):
lastnl = text.rfind('\n')
if -1 == lastnl:
return len(text)
if '\n' == text[-1]:
return lastnl - (1+ text.rfind('\n', 0, lastnl))
return len(text) - (1 + lastnl)
def len_lastline_withoutindent(text, indent):
n = len_lastline(text)
return (n - len(indent)) if n > len(indent) else 0
# Returns True if node `n` contains only inline (not block-level) elements.
def is_inline(n):
for c in n.childNodes:
if c.nodeType != c.TEXT_NODE and c.nodeName != 'computeroutput':
return False
if not is_inline(c):
return False
return True
def doc_wrap(text, prefix='', width=70, func=False, indent=None):
"""Wraps text to `width`.
First line is prefixed with `prefix`, subsequent lines are aligned.
If `func` is True, only wrap at commas.
"""
if not width:
# return prefix + text
return text
# Whitespace used to indent all lines except the first line.
indent = ' ' * len(prefix) if indent is None else indent
indent_only = (prefix == '' and indent is not None)
if func:
lines = [prefix]
for part in text.split(', '):
if part[-1] not in ');':
part += ', '
if len(lines[-1]) + len(part) > width:
lines.append(indent)
lines[-1] += part
return '\n'.join(x.rstrip() for x in lines).rstrip()
# XXX: Dummy prefix to force TextWrapper() to wrap the first line.
if indent_only:
prefix = indent
tw = textwrap.TextWrapper(break_long_words = False,
break_on_hyphens = False,
width=width,
initial_indent=prefix,
subsequent_indent=indent)
result = '\n'.join(tw.wrap(text.strip()))
# XXX: Remove the dummy prefix.
if indent_only:
result = result[len(indent):]
return result
def has_nonexcluded_params(nodes):
"""Returns true if any of the given <parameterlist> elements has at least
one non-excluded item."""
for n in nodes:
if render_params(n) != '':
return True
def render_params(parent, width=62):
"""Renders Doxygen <parameterlist> tag as Vim help text."""
name_length = 0
items = []
for node in parent.childNodes:
if node.nodeType == node.TEXT_NODE:
continue
name_node = find_first(node, 'parametername')
if name_node.getAttribute('direction') == 'out':
continue
name = get_text(name_node)
if name in param_exclude:
continue
name = '{%s}' % name
name_length = max(name_length, len(name) + 2)
items.append((name.strip(), node))
out = ''
for name, node in items:
name = ' {}'.format(name.ljust(name_length))
desc = ''
desc_node = get_child(node, 'parameterdescription')
if desc_node:
desc = parse_parblock(desc_node, width=width,
indent=(' ' * len(name)))
out += '{}{}\n'.format(name, desc)
return out.rstrip()
# Renders a node as Vim help text, recursively traversing all descendants.
def render_node(n, text, prefix='', indent='', width=62):
text = ''
# space_preceding = (len(text) > 0 and ' ' == text[-1][-1])
# text += (int(not space_preceding) * ' ')
if n.nodeType == n.TEXT_NODE:
# `prefix` is NOT sent to doc_wrap, it was already handled by now.
text += doc_wrap(n.data, indent=indent, width=width)
elif n.nodeName == 'computeroutput':
text += ' `{}` '.format(get_text(n))
elif n.nodeName == 'preformatted':
o = get_text(n, preformatted=True)
ensure_nl = '' if o[-1] == '\n' else '\n'
text += ' >{}{}\n<'.format(ensure_nl, o)
elif is_inline(n):
for c in n.childNodes:
text += render_node(c, text)
text = doc_wrap(text, indent=indent, width=width)
elif n.nodeName == 'verbatim':
# TODO: currently we don't use this. The "[verbatim]" hint is there as
# a reminder that we must decide how to format this if we do use it.
text += ' [verbatim] {}'.format(get_text(n))
elif n.nodeName == 'listitem':
for c in n.childNodes:
text += indent + prefix + render_node(c, text, indent=indent+(' ' * len(prefix)), width=width)
elif n.nodeName in ('para', 'heading'):
for c in n.childNodes:
text += render_node(c, text, indent=indent, width=width)
if is_inline(n):
text = doc_wrap(text, indent=indent, width=width)
elif n.nodeName == 'itemizedlist':
for c in n.childNodes:
text += '{}\n'.format(render_node(c, text, prefix='- ',
indent=indent, width=width))
elif n.nodeName == 'orderedlist':
i = 1
for c in n.childNodes:
if is_blank(get_text(c)):
text += '\n'
continue
text += '{}\n'.format(render_node(c, text, prefix='{}. '.format(i),
indent=indent, width=width))
i = i + 1
elif n.nodeName == 'simplesect' and 'note' == n.getAttribute('kind'):
text += 'Note:\n '
for c in n.childNodes:
text += render_node(c, text, indent=' ', width=width)
text += '\n'
elif n.nodeName == 'simplesect' and 'warning' == n.getAttribute('kind'):
text += 'Warning:\n '
for c in n.childNodes:
text += render_node(c, text, indent=' ', width=width)
text += '\n'
elif (n.nodeName == 'simplesect'
and n.getAttribute('kind') in ('return', 'see')):
text += ' '
for c in n.childNodes:
text += render_node(c, text, indent=' ', width=width)
else:
raise RuntimeError('unhandled node type: {}\n{}'.format(
n.nodeName, n.toprettyxml(indent=' ', newl='\n')))
return text
def render_para(parent, indent='', width=62):
"""Renders Doxygen <para> containing arbitrary nodes.
NB: Blank lines in a docstring manifest as <para> tags.
"""
if is_inline(parent):
return clean_lines(doc_wrap(render_node(parent, ''),
indent=indent, width=width).strip())
# Ordered dict of ordered lists.
groups = collections.OrderedDict([
('params', []),
('return', []),
('seealso', []),
('xrefs', []),
])
# Gather nodes into groups. Mostly this is because we want "parameterlist"
# nodes to appear together.
text = ''
kind = ''
last = ''
for child in parent.childNodes:
if child.nodeName == 'parameterlist':
groups['params'].append(child)
elif child.nodeName == 'xrefsect':
groups['xrefs'].append(child)
elif child.nodeName == 'simplesect':
last = kind
kind = child.getAttribute('kind')
if kind == 'return' or (kind == 'note' and last == 'return'):
groups['return'].append(child)
elif kind == 'see':
groups['seealso'].append(child)
elif kind in ('note', 'warning'):
text += render_node(child, text, indent=indent, width=width)
else:
raise RuntimeError('unhandled simplesect: {}\n{}'.format(
child.nodeName, child.toprettyxml(indent=' ', newl='\n')))
else:
text += render_node(child, text, indent=indent, width=width)
chunks = [text]
# Generate text from the gathered items.
if len(groups['params']) > 0 and has_nonexcluded_params(groups['params']):
chunks.append('\nParameters: ~')
for child in groups['params']:
chunks.append(render_params(child, width=width))
if len(groups['return']) > 0:
chunks.append('\nReturn: ~')
for child in groups['return']:
chunks.append(render_node(child, chunks[-1][-1], indent=indent, width=width))
if len(groups['seealso']) > 0:
chunks.append('\nSee also: ~')
for child in groups['seealso']:
chunks.append(render_node(child, chunks[-1][-1], indent=indent, width=width))
for child in groups['xrefs']:
title = get_text(get_child(child, 'xreftitle'))
xrefs.add(title)
xrefdesc = render_para(get_child(child, 'xrefdescription'), width=width)
chunks.append(doc_wrap(xrefdesc, prefix='{}: '.format(title),
width=width) + '\n')
return clean_lines('\n'.join(chunks).strip())
def parse_parblock(parent, prefix='', width=62, indent=''):
"""Renders a nested block of <para> tags as Vim help text."""
paragraphs = []
for child in parent.childNodes:
paragraphs.append(render_para(child, width=width, indent=indent))
paragraphs.append('')
return clean_lines('\n'.join(paragraphs).strip())
# }}}
def parse_source_xml(filename, mode):
"""Collects API functions.
Returns two strings:
1. API functions
2. Deprecated API functions
Caller decides what to do with the deprecated documentation.
"""
global xrefs
xrefs = set()
functions = []
deprecated_functions = []
dom = minidom.parse(filename)
compoundname = get_text(dom.getElementsByTagName('compoundname')[0])
for member in dom.getElementsByTagName('memberdef'):
if member.getAttribute('static') == 'yes' or \
member.getAttribute('kind') != 'function' or \
member.getAttribute('prot') == 'private' or \
get_text(get_child(member, 'name')).startswith('_'):
continue
loc = find_first(member, 'location')
if 'private' in loc.getAttribute('file'):
continue
return_type = get_text(get_child(member, 'type'))
if return_type == '':
continue
if return_type.startswith(('ArrayOf', 'DictionaryOf')):
parts = return_type.strip('_').split('_')
return_type = '{}({})'.format(parts[0], ', '.join(parts[1:]))
name = get_text(get_child(member, 'name'))
annotations = get_text(get_child(member, 'argsstring'))
if annotations and ')' in annotations:
annotations = annotations.rsplit(')', 1)[-1].strip()
# XXX: (doxygen 1.8.11) 'argsstring' only includes attributes of
# non-void functions. Special-case void functions here.
if name == 'nvim_get_mode' and len(annotations) == 0:
annotations += 'FUNC_API_ASYNC'
annotations = filter(None, map(lambda x: annotation_map.get(x),
annotations.split()))
if mode == 'lua':
fstem = compoundname.split('.')[0]
fstem = CONFIG[mode]['module_override'].get(fstem, fstem)
vimtag = '*{}.{}()*'.format(fstem, name)
else:
vimtag = '*{}()*'.format(name)
params = []
type_length = 0
for param in get_children(member, 'param'):
param_type = get_text(get_child(param, 'type')).strip()
param_name = ''
declname = get_child(param, 'declname')
if declname:
param_name = get_text(declname).strip()
elif mode == 'lua':
# that's how it comes out of lua2dox
param_name = param_type
param_type = ''
if param_name in param_exclude:
continue
if param_type.endswith('*'):
param_type = param_type.strip('* ')
param_name = '*' + param_name
type_length = max(type_length, len(param_type))
params.append((param_type, param_name))
c_args = []
for param_type, param_name in params:
c_args.append(' ' + (
'%s %s' % (param_type.ljust(type_length), param_name)).strip())
c_decl = textwrap.indent('%s %s(\n%s\n);' % (return_type, name,
',\n'.join(c_args)),
' ')
prefix = '%s(' % name
suffix = '%s)' % ', '.join('{%s}' % a[1] for a in params
if a[0] not in ('void', 'Error'))
# Minimum 8 chars between signature and vimtag
lhs = (text_width - 8) - len(prefix)
if len(prefix) + len(suffix) > lhs:
signature = vimtag.rjust(text_width) + '\n'
signature += doc_wrap(suffix, width=text_width-8, prefix=prefix,
func=True)
else:
signature = prefix + suffix
signature += vimtag.rjust(text_width - len(signature))
doc = ''
desc = find_first(member, 'detaileddescription')
if desc:
doc = parse_parblock(desc)
if DEBUG:
print(textwrap.indent(
re.sub(r'\n\s*\n+', '\n',
desc.toprettyxml(indent=' ', newl='\n')), ' ' * 16))
if not doc:
doc = 'TODO: Documentation'
annotations = '\n'.join(annotations)
if annotations:
annotations = ('\n\nAttributes: ~\n' +
textwrap.indent(annotations, ' '))
i = doc.rfind('Parameters: ~')
if i == -1:
doc += annotations
else:
doc = doc[:i] + annotations + '\n\n' + doc[i:]
if INCLUDE_C_DECL:
doc += '\n\nC Declaration: ~\n>\n'
doc += c_decl
doc += '\n<'
func_doc = signature + '\n'
func_doc += textwrap.indent(clean_lines(doc), ' ' * 16)
func_doc = re.sub(r'^\s+([<>])$', r'\1', func_doc, flags=re.M)
if 'Deprecated' in xrefs:
deprecated_functions.append(func_doc)
elif name.startswith(CONFIG[mode]['func_name_prefix']):
functions.append(func_doc)
xrefs.clear()
return '\n\n'.join(functions), '\n\n'.join(deprecated_functions)
def delete_lines_below(filename, tokenstr):
"""Deletes all lines below the line containing `tokenstr`, the line itself,
and one line above it.
"""
lines = open(filename).readlines()
i = 0
for i, line in enumerate(lines, 1):
if tokenstr in line:
break
i = max(0, i - 2)
with open(filename, 'wt') as fp:
fp.writelines(lines[0:i])
def gen_docs(config):
"""Generate documentation.
Doxygen is called and configured through stdin.
"""
for mode in CONFIG:
output_dir = out_dir.format(mode=mode)
p = subprocess.Popen(['doxygen', '-'], stdin=subprocess.PIPE)
p.communicate(
config.format(
input=CONFIG[mode]['files'],
output=output_dir,
filter=filter_cmd,
file_patterns=CONFIG[mode]['file_patterns'])
.encode('utf8')
)
if p.returncode:
sys.exit(p.returncode)
sections = {}
intros = {}
sep = '=' * text_width
base = os.path.join(output_dir, 'xml')
dom = minidom.parse(os.path.join(base, 'index.xml'))
# generate docs for section intros
for compound in dom.getElementsByTagName('compound'):
if compound.getAttribute('kind') != 'group':
continue
groupname = get_text(find_first(compound, 'name'))
groupxml = os.path.join(base, '%s.xml' % compound.getAttribute('refid'))
desc = find_first(minidom.parse(groupxml), 'detaileddescription')
if desc:
doc = parse_parblock(desc)
if doc:
intros[groupname] = doc
for compound in dom.getElementsByTagName('compound'):
if compound.getAttribute('kind') != 'file':
continue
filename = get_text(find_first(compound, 'name'))
if filename.endswith('.c') or filename.endswith('.lua'):
functions, deprecated = parse_source_xml(
os.path.join(base, '%s.xml' %
compound.getAttribute('refid')), mode)
if not functions and not deprecated:
continue
if functions or deprecated:
name = os.path.splitext(os.path.basename(filename))[0]
if name == 'ui':
name = name.upper()
else:
name = name.title()
doc = ''
intro = intros.get('api-%s' % name.lower())
if intro:
doc += '\n\n' + intro
if functions:
doc += '\n\n' + functions
if INCLUDE_DEPRECATED and deprecated:
doc += '\n\n\nDeprecated %s Functions: ~\n\n' % name
doc += deprecated
if doc:
filename = os.path.basename(filename)
name = CONFIG[mode]['section_name'].get(filename, name)
if mode == 'lua':
title = 'Lua module: {}'.format(name.lower())
helptag = '*lua-{}*'.format(name.lower())
else:
title = '{} Functions'.format(name)
helptag = '*api-{}*'.format(name.lower())
sections[filename] = (title, helptag, doc)
if not sections:
return
docs = ''
i = 0
for filename in CONFIG[mode]['section_order']:
if filename not in sections:
raise RuntimeError('found new module "{}"; update the "section_order" map'.format(filename))
title, helptag, section_doc = sections.pop(filename)
i += 1
if filename not in CONFIG[mode]['append_only']:
docs += sep
docs += '\n%s%s' % (title, helptag.rjust(text_width - len(title)))
docs += section_doc
docs += '\n\n\n'
docs = docs.rstrip() + '\n\n'
docs += ' vim:tw=78:ts=8:ft=help:norl:\n'
doc_file = os.path.join(base_dir, 'runtime', 'doc',
CONFIG[mode]['filename'])
delete_lines_below(doc_file, CONFIG[mode]['section_start_token'])
with open(doc_file, 'ab') as fp:
fp.write(docs.encode('utf8'))
shutil.rmtree(output_dir)
def filter_source(filename):
name, extension = os.path.splitext(filename)
if extension == '.lua':
p = subprocess.run([lua2dox_filter, filename], stdout=subprocess.PIPE)
op = ('?' if 0 != p.returncode else p.stdout.decode('utf-8'))
print(op)
else:
"""Filters the source to fix macros that confuse Doxygen."""
with open(filename, 'rt') as fp:
print(re.sub(r'^(ArrayOf|DictionaryOf)(\(.*?\))',
lambda m: m.group(1)+'_'.join(
re.split(r'[^\w]+', m.group(2))),
fp.read(), flags=re.M))
# Doxygen Config {{{
Doxyfile = '''
OUTPUT_DIRECTORY = {output}
INPUT = {input}
INPUT_ENCODING = UTF-8
FILE_PATTERNS = {file_patterns}
RECURSIVE = YES
INPUT_FILTER = "{filter}"
EXCLUDE =
EXCLUDE_SYMLINKS = NO
EXCLUDE_PATTERNS = */private/*
EXCLUDE_SYMBOLS =
EXTENSION_MAPPING = lua=C
EXTRACT_PRIVATE = NO
GENERATE_HTML = NO
GENERATE_DOCSET = NO
GENERATE_HTMLHELP = NO
GENERATE_QHP = NO
GENERATE_TREEVIEW = NO
GENERATE_LATEX = NO
GENERATE_RTF = NO
GENERATE_MAN = NO
GENERATE_DOCBOOK = NO
GENERATE_AUTOGEN_DEF = NO
GENERATE_XML = YES
XML_OUTPUT = xml
XML_PROGRAMLISTING = NO
ENABLE_PREPROCESSING = YES
MACRO_EXPANSION = YES
EXPAND_ONLY_PREDEF = NO
MARKDOWN_SUPPORT = YES
'''
# }}}
if __name__ == "__main__":
if len(sys.argv) > 1:
filter_source(sys.argv[1])
else:
gen_docs(Doxyfile)
# vim: set ft=python ts=4 sw=4 tw=79 et fdm=marker :