From 11844dde81c41bded54f2383b57f8eef406f2736 Mon Sep 17 00:00:00 2001 From: Christian Clason Date: Sat, 1 Jul 2023 11:08:06 +0200 Subject: [PATCH] feat(treesitter): bundle markdown parser and queries (#22481) * bundle split Markdown parser from https://github.com/MDeiml/tree-sitter-markdown * add queries from https://github.com/nvim-treesitter/nvim-treesitter/tree/main * upstream `#trim!` and `#inject-language!` directives Co-authored-by: dundargoc --- cmake.deps/cmake/BuildTreesitterParsers.cmake | 1 + cmake.deps/cmake/MarkdownParserCMakeLists.txt | 28 ++++++ cmake.deps/deps.txt | 2 + runtime/doc/news.txt | 5 +- runtime/doc/treesitter.txt | 32 +++++++ runtime/lua/vim/treesitter/query.lua | 60 +++++++++++- runtime/queries/markdown/folds.scm | 9 ++ runtime/queries/markdown/highlights.scm | 63 +++++++++++++ runtime/queries/markdown/injections.scm | 26 ++++++ .../queries/markdown_inline/highlights.scm | 92 +++++++++++++++++++ .../queries/markdown_inline/injections.scm | 8 ++ test/functional/treesitter/parser_spec.lua | 2 +- 12 files changed, 325 insertions(+), 3 deletions(-) create mode 100644 cmake.deps/cmake/MarkdownParserCMakeLists.txt create mode 100644 runtime/queries/markdown/folds.scm create mode 100644 runtime/queries/markdown/highlights.scm create mode 100644 runtime/queries/markdown/injections.scm create mode 100644 runtime/queries/markdown_inline/highlights.scm create mode 100644 runtime/queries/markdown_inline/injections.scm diff --git a/cmake.deps/cmake/BuildTreesitterParsers.cmake b/cmake.deps/cmake/BuildTreesitterParsers.cmake index 56c8a5a7c6..89c1ec5cc9 100644 --- a/cmake.deps/cmake/BuildTreesitterParsers.cmake +++ b/cmake.deps/cmake/BuildTreesitterParsers.cmake @@ -37,3 +37,4 @@ endfunction() foreach(lang c lua vim vimdoc query) BuildTSParser(LANG ${lang}) endforeach() +BuildTSParser(LANG markdown CMAKE_FILE MarkdownParserCMakeLists.txt) diff --git a/cmake.deps/cmake/MarkdownParserCMakeLists.txt b/cmake.deps/cmake/MarkdownParserCMakeLists.txt new file mode 100644 index 0000000000..a356aaa391 --- /dev/null +++ b/cmake.deps/cmake/MarkdownParserCMakeLists.txt @@ -0,0 +1,28 @@ +cmake_minimum_required(VERSION 3.10) +project(${PARSERLANG} C) + +set(CMAKE_C_STANDARD 99) + +add_library(markdown MODULE +tree-sitter-markdown/src/parser.c +tree-sitter-markdown/src/scanner.c) +target_include_directories(markdown + PRIVATE + tree-sitter-markdown/src) + +add_library(markdown_inline MODULE +tree-sitter-markdown-inline/src/parser.c +tree-sitter-markdown-inline/src/scanner.c) +target_include_directories(markdown_inline + PRIVATE + tree-sitter-markdown-inline/src) + +set_target_properties( + markdown markdown_inline + PROPERTIES + PREFIX "" +) + +install(TARGETS markdown markdown_inline LIBRARY DESTINATION lib/nvim/parser) + +# vim: set ft=cmake: diff --git a/cmake.deps/deps.txt b/cmake.deps/deps.txt index 57b577dc33..fd0b4aa400 100644 --- a/cmake.deps/deps.txt +++ b/cmake.deps/deps.txt @@ -54,5 +54,7 @@ TREESITTER_VIMDOC_URL https://github.com/neovim/tree-sitter-vimdoc/archive/v2.0. TREESITTER_VIMDOC_SHA256 61e165df29778dc0c9277c2a7bc67447cc4e1bed36ca916a2f476dd25ce3260e TREESITTER_QUERY_URL https://github.com/nvim-treesitter/tree-sitter-query/archive/v0.1.0.tar.gz TREESITTER_QUERY_SHA256 e2b806f80e8bf1c4f4e5a96248393fe6622fc1fc6189d6896d269658f67f914c +TREESITTER_MARKDOWN_URL https://github.com/MDeiml/tree-sitter-markdown/archive/936cc84289f6de83c263ae8e659fb342867ceb16.tar.gz +TREESITTER_MARKDOWN_SHA256 4f2315930dc2c1bd42971a0b728cf4dafc57830c61f8abe3e2548cf230968713 TREESITTER_URL https://github.com/tree-sitter/tree-sitter/archive/91e4d940169a0c0b024560632ef53c4f119117ca.tar.gz TREESITTER_SHA256 e15e335d127d38aaa73e727f3169df6015f43de1010d806e69b9e9222ad50fe1 diff --git a/runtime/doc/news.txt b/runtime/doc/news.txt index 0a9b926b7f..5ee9cb7a14 100644 --- a/runtime/doc/news.txt +++ b/runtime/doc/news.txt @@ -101,6 +101,8 @@ The following new APIs and features were added. • Implemented LSP inlay hints: |vim.lsp.inlay_hint()| https://microsoft.github.io/language-server-protocol/specifications/lsp/3.17/specification/#textDocument_inlayHint +• Bundled Markdown parser for treesitter highlighting and folding. + ============================================================================== CHANGED FEATURES *news-changed* @@ -120,7 +122,8 @@ The following changes to existing APIs or features add new behavior. • Automatic linting of treesitter query files (see |ft-query-plugin|). Can be disabled via: >lua vim.g.query_lint_on = {} -< • Enabled treesitter highlighting for treesitter query files. +< + • Enabled treesitter highlighting for treesitter query files. • The `workspace/didChangeWatchedFiles` LSP client capability is now enabled by default. diff --git a/runtime/doc/treesitter.txt b/runtime/doc/treesitter.txt index 28ab3ecd8e..7539417197 100644 --- a/runtime/doc/treesitter.txt +++ b/runtime/doc/treesitter.txt @@ -292,7 +292,39 @@ The following directives are built in: Example: >query ((identifier) @constant (#offset! @constant 0 1 0 -1)) < + `gsub!` *treesitter-directive-gsub!* + Transforms the content of the node using a Lua pattern. This will set + a new `metadata[capture_id].text`. + Parameters: ~ + {capture_id} + {pattern} + + Example: >query + (#gsub! @_node ".*%.(.*)" "%1") +< + `trim!` *treesitter-directive-trim!* + Trim blank lines from the end of the node. This will set a new + `metadata[capture_id].range`. + + Parameters: ~ + {capture_id} + + Example: >query + (#inject-language! @_lang) +< + `inject-language!` *treesitter-directive-inject-language!* + Set the injection language from the node text, interpreted first as a + language name, then (if a parser is not found) a filetype. Custom + aliases can be added via |vim.treesitter.language.register()|. This + will set a new `metadata[capture_id]['injection.language']`. + + Parameters: ~ + {capture_id} + + Example: >query + (#inject-language! @_lang) +< Further directives can be added via |vim.treesitter.query.add_directive()|. Use |vim.treesitter.query.list_directives()| to list all available directives. diff --git a/runtime/lua/vim/treesitter/query.lua b/runtime/lua/vim/treesitter/query.lua index 73b561c777..7f90fa10e8 100644 --- a/runtime/lua/vim/treesitter/query.lua +++ b/runtime/lua/vim/treesitter/query.lua @@ -475,7 +475,6 @@ local directive_handlers = { metadata[capture_id].range = range end end, - -- Transform the content of the node -- Example: (#gsub! @_node ".*%.(.*)" "%1") ['gsub!'] = function(match, _, bufnr, pred, metadata) @@ -497,6 +496,65 @@ local directive_handlers = { metadata[id].text = text:gsub(pattern, replacement) end, + -- Trim blank lines from end of the node + -- Example: (#trim! @fold) + -- TODO(clason): generalize to arbitrary whitespace removal + ['trim!'] = function(match, _, bufnr, pred, metadata) + local node = match[pred[2]] + if not node then + return + end + + local start_row, start_col, end_row, end_col = node:range() + + -- Don't trim if region ends in middle of a line + if end_col ~= 0 then + return + end + + while true do + -- As we only care when end_col == 0, always inspect one line above end_row. + local end_line = vim.api.nvim_buf_get_lines(bufnr, end_row - 1, end_row, true)[1] + + if end_line ~= '' then + break + end + + end_row = end_row - 1 + end + + -- If this produces an invalid range, we just skip it. + if start_row < end_row or (start_row == end_row and start_col <= end_col) then + metadata.range = { start_row, start_col, end_row, end_col } + end + end, + -- Set injection language from node text, interpreted first as language and then as filetype + -- Example: (#inject-language! @_lang) + ['inject-language!'] = function(match, _, bufnr, pred, metadata) + local id = pred[2] + local node = match[id] + if not node then + return + end + + -- TODO(clason): replace by refactored `ts.has_parser` API + local has_parser = function(lang) + return vim._ts_has_language(lang) + or #vim.api.nvim_get_runtime_file('parser/' .. lang .. '.*', false) > 0 + end + + local alias = vim.treesitter.get_node_text(node, bufnr, { metadata = metadata[id] }) + if not alias then + return + elseif has_parser(alias) then + metadata['injection.language'] = alias + else + local lang = vim.treesitter.language.get_lang(alias) + if lang and has_parser(lang) then + metadata['injection.language'] = lang + end + end + end, } --- Adds a new predicate to be used in queries diff --git a/runtime/queries/markdown/folds.scm b/runtime/queries/markdown/folds.scm new file mode 100644 index 0000000000..5900f7ffbe --- /dev/null +++ b/runtime/queries/markdown/folds.scm @@ -0,0 +1,9 @@ +( + [ + (fenced_code_block) + (indented_code_block) + (list) + (section) + ] @fold + (#trim! @fold) +) diff --git a/runtime/queries/markdown/highlights.scm b/runtime/queries/markdown/highlights.scm new file mode 100644 index 0000000000..e78d233cc6 --- /dev/null +++ b/runtime/queries/markdown/highlights.scm @@ -0,0 +1,63 @@ +;From MDeiml/tree-sitter-markdown & Helix +(setext_heading (paragraph) @text.title.1 (setext_h1_underline) @text.title.1.marker) +(setext_heading (paragraph) @text.title.2 (setext_h2_underline) @text.title.2.marker) + +(atx_heading (atx_h1_marker) @text.title.1.marker (inline) @text.title.1) +(atx_heading (atx_h2_marker) @text.title.2.marker (inline) @text.title.2) +(atx_heading (atx_h3_marker) @text.title.3.marker (inline) @text.title.3) +(atx_heading (atx_h4_marker) @text.title.4.marker (inline) @text.title.4) +(atx_heading (atx_h5_marker) @text.title.5.marker (inline) @text.title.5) +(atx_heading (atx_h6_marker) @text.title.6.marker (inline) @text.title.6) + +(link_title) @text.literal +(indented_code_block) @text.literal.block +((fenced_code_block) @text.literal.block (#set! "priority" 90)) + +(info_string) @label + +(pipe_table_header (pipe_table_cell) @text.title) + +(pipe_table_header "|" @punctuation.special) +(pipe_table_row "|" @punctuation.special) +(pipe_table_delimiter_row "|" @punctuation.special) +(pipe_table_delimiter_cell) @punctuation.special + +[ + (fenced_code_block_delimiter) +] @punctuation.delimiter + +(code_fence_content) @none + +[ + (link_destination) +] @text.uri + +[ + (link_label) +] @text.reference + +[ + (list_marker_plus) + (list_marker_minus) + (list_marker_star) + (list_marker_dot) + (list_marker_parenthesis) + (thematic_break) +] @punctuation.special + + +(task_list_marker_unchecked) @text.todo.unchecked +(task_list_marker_checked) @text.todo.checked + +(block_quote) @text.quote + +[ + (block_continuation) + (block_quote_marker) +] @punctuation.special + +[ + (backslash_escape) +] @string.escape + +(inline) @spell diff --git a/runtime/queries/markdown/injections.scm b/runtime/queries/markdown/injections.scm new file mode 100644 index 0000000000..0bead6f4ac --- /dev/null +++ b/runtime/queries/markdown/injections.scm @@ -0,0 +1,26 @@ +(fenced_code_block + (info_string + (language) @_lang) + (code_fence_content) @injection.content + (#inject-language! @_lang)) + +((html_block) @injection.content + (#set! injection.language "html") + (#set! injection.combined) + (#set! injection.include-children)) + +((minus_metadata) @injection.content + (#set! injection.language "yaml") + (#offset! @injection.content 1 0 -1 0) + (#set! injection.include-children)) + +((plus_metadata) @injection.content + (#set! injection.language "toml") + (#offset! @injection.content 1 0 -1 0) + (#set! injection.include-children)) + +([ + (inline) + (pipe_table_cell) + ] @injection.content + (#set! injection.language "markdown_inline")) diff --git a/runtime/queries/markdown_inline/highlights.scm b/runtime/queries/markdown_inline/highlights.scm new file mode 100644 index 0000000000..a70e34bb87 --- /dev/null +++ b/runtime/queries/markdown_inline/highlights.scm @@ -0,0 +1,92 @@ +;; From MDeiml/tree-sitter-markdown +[ + (code_span) + (link_title) +] @text.literal @nospell + +[ + (emphasis_delimiter) + (code_span_delimiter) +] @punctuation.delimiter + +(emphasis) @text.emphasis + +(strong_emphasis) @text.strong + +(strikethrough) @text.strike + +[ + (link_destination) + (uri_autolink) +] @text.uri @nospell + +[ + (link_label) + (link_text) + (image_description) +] @text.reference + +[ + (backslash_escape) + (hard_line_break) +] @string.escape + +(image "!" @punctuation.special) +(image ["[" "]" "(" ")"] @punctuation.bracket) +(inline_link ["[" "]" "(" ")"] @punctuation.bracket) +(shortcut_link ["[" "]"] @punctuation.bracket) + +; Conceal codeblock and text style markers +([ + (code_span_delimiter) + (emphasis_delimiter) +] @conceal +(#set! conceal "")) + +; Conceal inline links +(inline_link + [ + "[" + "]" + "(" + (link_destination) + ")" + ] @conceal + (#set! conceal "")) + +; Conceal image links +(image + [ + "!" + "[" + "]" + "(" + (link_destination) + ")" + ] @conceal + (#set! conceal "")) + +; Conceal full reference links +(full_reference_link + [ + "[" + "]" + (link_label) + ] @conceal + (#set! conceal "")) + +; Conceal collapsed reference links +(collapsed_reference_link + [ + "[" + "]" + ] @conceal + (#set! conceal "")) + +; Conceal shortcut links +(shortcut_link + [ + "[" + "]" + ] @conceal + (#set! conceal "")) diff --git a/runtime/queries/markdown_inline/injections.scm b/runtime/queries/markdown_inline/injections.scm new file mode 100644 index 0000000000..f7aa19caff --- /dev/null +++ b/runtime/queries/markdown_inline/injections.scm @@ -0,0 +1,8 @@ +((html_tag) @injection.content + (#set! injection.language "html") + (#set! injection.combined) + (#set! injection.include-children)) + +((latex_block) @injection.content + (#set! injection.language "latex") + (#set! injection.include-children)) diff --git a/test/functional/treesitter/parser_spec.lua b/test/functional/treesitter/parser_spec.lua index d2f40bfe00..7cfe5b69de 100644 --- a/test/functional/treesitter/parser_spec.lua +++ b/test/functional/treesitter/parser_spec.lua @@ -783,7 +783,7 @@ int x = INT_MAX; return list ]] - eq({ 'gsub!', 'offset!', 'set!' }, res_list) + eq({ 'gsub!', 'inject-language!', 'offset!', 'set!', 'trim!' }, res_list) end) end) end)