# Converts Vim/Nvim documentation to HTML. # # Adapted from https://github.com/c4rlo/vimhelp/ # License: MIT # # Copyright (c) 2016 Carlo Teubner # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. import re, urllib.parse from itertools import chain HEAD = """\ Nvim: {filename} """ HEAD_END = '\n\n' INTRO = """

Nvim help files

HTML export of the Nvim help pages{vers-note}. Updated automatically from the Nvim source repository. Also includes the Vim FAQ, pulled from its source repository.

""" VERSION_NOTE = ", current as of Vim {version}" SITENAVI_LINKS = """ Quick links: help overview · quick reference · user manual toc · reference manual toc · faq """ SITENAVI_LINKS_PLAIN = SITENAVI_LINKS.format(helptxt='help.txt.html') SITENAVI_LINKS_WEB = SITENAVI_LINKS.format(helptxt='/') SITENAVI_PLAIN = '

' + SITENAVI_LINKS_PLAIN + '

' SITENAVI_WEB = '

' + SITENAVI_LINKS_WEB + '

' SITENAVI_SEARCH = '
' + SITENAVI_LINKS_WEB + \ '
' TEXTSTART = """
                                                                                
"""

FOOTER = '
' FOOTER2 = """
""" VIM_FAQ_LINE = '' \ 'vim_faq.txt Frequently Asked Questions\n' RE_TAGLINE = re.compile(r'(\S+)\s+(\S+)') PAT_WORDCHAR = '[!#-)+-{}~\xC0-\xFF]' PAT_HEADER = r'(^.*~$)' PAT_GRAPHIC = r'(^.* `$)' PAT_PIPEWORD = r'(?|.)?)' PAT_SPECIAL = r'(<.+?>|\{.+?}|' \ r'\[(?:range|line|count|offset|\+?cmd|[-+]?num|\+\+opt|' \ r'arg|arguments|ident|addr|group)]|' \ r'(?<=\s)\[[-a-z^A-Z0-9_]{2,}])' PAT_TITLE = r'(Vim version [0-9.a-z]+|VIM REFERENCE.*)' PAT_NOTE = r'((? \t]+[a-zA-Z0-9/])' PAT_WORD = r'((?$') RE_EG_END = re.compile(r'\S') RE_SECTION = re.compile(r'[-A-Z .][-A-Z0-9 .()]*(?=\s+\*)') RE_STARTAG = re.compile(r'\s\*([^ \t|]+)\*(?:\s|$)') RE_LOCAL_ADD = re.compile(r'LOCAL ADDITIONS:\s+\*local-additions\*$') class Link(object): __slots__ = 'link_plain_same', 'link_pipe_same', \ 'link_plain_foreign', 'link_pipe_foreign', \ 'filename' def __init__(self, link_plain_same, link_plain_foreign, link_pipe_same, link_pipe_foreign, filename): self.link_plain_same = link_plain_same self.link_plain_foreign = link_plain_foreign self.link_pipe_same = link_pipe_same self.link_pipe_foreign = link_pipe_foreign self.filename = filename class VimH2H(object): def __init__(self, tags, version=None, is_web_version=True): self._urls = { } self._version = version self._is_web_version = is_web_version for line in RE_NEWLINE.split(tags): m = RE_TAGLINE.match(line) if m: tag, filename = m.group(1, 2) self.do_add_tag(filename, tag) def add_tags(self, filename, contents): for match in RE_STARTAG.finditer(contents): tag = match.group(1).replace('\\', '\\\\').replace('/', '\\/') self.do_add_tag(str(filename), tag) def do_add_tag(self, filename, tag): tag_quoted = urllib.parse.quote_plus(tag) def mkpart1(doc): return '' + html_escape[tag] + '' def mklinks(cssclass): return (part1_same + cssclass + part2, part1_foreign + cssclass + part2) cssclass_plain = 'd' m = RE_LINKWORD.match(tag) if m: opt, ctrl, special = m.groups() if opt is not None: cssclass_plain = 'o' elif ctrl is not None: cssclass_plain = 'k' elif special is not None: cssclass_plain = 's' links_plain = mklinks(cssclass_plain) links_pipe = mklinks('l') self._urls[tag] = Link( links_plain[0], links_plain[1], links_pipe[0], links_pipe[1], filename) def maplink(self, tag, curr_filename, css_class=None): links = self._urls.get(tag) if links is not None: if links.filename == curr_filename: if css_class == 'l': return links.link_pipe_same else: return links.link_plain_same else: if css_class == 'l': return links.link_pipe_foreign else: return links.link_plain_foreign elif css_class is not None: return '' + html_escape[tag] + \ '' else: return html_escape[tag] def to_html(self, filename, contents, encoding): out = [ ] inexample = 0 filename = str(filename) is_help_txt = (filename == 'help.txt') faq_line = False for line in RE_NEWLINE.split(contents): line = line.rstrip('\r\n') line_tabs = line line = line.expandtabs() if RE_HRULE.match(line): out.extend(('', line, '\n')) continue if inexample == 2: if RE_EG_END.match(line): inexample = 0 if line[0] == '<': line = line[1:] else: out.extend(('', html_escape[line], '\n')) continue if RE_EG_START.match(line_tabs): inexample = 1 line = line[0:-1] if RE_SECTION.match(line_tabs): m = RE_SECTION.match(line) out.extend((r'', m.group(0), r'')) line = line[m.end():] if is_help_txt and RE_LOCAL_ADD.match(line_tabs): faq_line = True lastpos = 0 for match in RE_TAGWORD.finditer(line): pos = match.start() if pos > lastpos: out.append(html_escape[line[lastpos:pos]]) lastpos = match.end() header, graphic, pipeword, starword, command, opt, ctrl, \ special, title, note, url, word = match.groups() if pipeword is not None: out.append(self.maplink(pipeword, filename, 'l')) elif starword is not None: out.extend(('', html_escape[starword], '')) elif command is not None: out.extend(('', html_escape[command], '')) elif opt is not None: out.append(self.maplink(opt, filename, 'o')) elif ctrl is not None: out.append(self.maplink(ctrl, filename, 'k')) elif special is not None: out.append(self.maplink(special, filename, 's')) elif title is not None: out.extend(('', html_escape[title], '')) elif note is not None: out.extend(('', html_escape[note], '')) elif header is not None: out.extend(('', html_escape[header[:-1]], '')) elif graphic is not None: out.append(html_escape[graphic[:-2]]) elif url is not None: out.extend(('' + html_escape[url], '')) elif word is not None: out.append(self.maplink(word, filename)) if lastpos < len(line): out.append(html_escape[line[lastpos:]]) out.append('\n') if inexample == 1: inexample = 2 if faq_line: out.append(VIM_FAQ_LINE) faq_line = False header = [] header.append(HEAD.format(encoding=encoding, filename=filename)) header.append(HEAD_END) if self._is_web_version and is_help_txt: vers_note = VERSION_NOTE.replace('{version}', self._version) \ if self._version else '' header.append(INTRO.replace('{vers-note}', vers_note)) if self._is_web_version: header.append(SITENAVI_SEARCH) sitenavi_footer = SITENAVI_WEB else: header.append(SITENAVI_PLAIN) sitenavi_footer = SITENAVI_PLAIN header.append(TEXTSTART) return ''.join(chain(header, out, (FOOTER, sitenavi_footer, FOOTER2))) class HtmlEscCache(dict): def __missing__(self, key): r = key.replace('&', '&') \ .replace('<', '<') \ .replace('>', '>') self[key] = r return r html_escape = HtmlEscCache() import sys, os, os.path #import cProfile sys.path.append('.') def slurp(filename): try: with open(filename, encoding='UTF-8') as f: return f.read(), 'UTF-8' except UnicodeError: # 'ISO-8859-1' ? with open(filename, encoding='latin-1') as f: return f.read(), 'latin-1' def usage(): return "usage: " + sys.argv[0] + " IN_DIR OUT_DIR [BASENAMES...]" def main(): if len(sys.argv) < 3: sys.exit(usage()) in_dir = sys.argv[1] out_dir = sys.argv[2] basenames = sys.argv[3:] print( "Processing tags...") h2h = VimH2H(slurp(os.path.join(in_dir, 'tags'))[0], is_web_version=False) if len(basenames) == 0: basenames = os.listdir(in_dir) for basename in basenames: if os.path.splitext(basename)[1] != '.txt' and basename != 'tags': print( "Ignoring " + basename) continue print( "Processing " + basename + "...") path = os.path.join(in_dir, basename) text, encoding = slurp(path) outpath = os.path.join(out_dir, basename + '.html') of = open(outpath, 'w') of.write(h2h.to_html(basename, text, encoding)) of.close() main() #cProfile.run('main()')