From 245035d6db7399cc077de0eaa8e97e2eecc08a9b Mon Sep 17 00:00:00 2001 From: zeertzjq Date: Fri, 18 Nov 2022 13:25:21 +0800 Subject: [PATCH] vim-patch:8.2.4494: the find_tags() function is much too long Problem: The find_tags() function is much too long. Solution: Refactor the function. (Yegappan Lakshmanan, closes vim/vim#9869) https://github.com/vim/vim/commit/2f87a99b6e9b559d51e130769e7f8377db6749f8 Co-authored-by: Yegappan Lakshmanan --- src/nvim/quickfix.c | 15 +- src/nvim/tag.c | 1527 +++++++++++++++-------------- src/nvim/testdir/test_tagjump.vim | 5 + 3 files changed, 804 insertions(+), 743 deletions(-) diff --git a/src/nvim/quickfix.c b/src/nvim/quickfix.c index d7590f6f57..27a0842016 100644 --- a/src/nvim/quickfix.c +++ b/src/nvim/quickfix.c @@ -4308,10 +4308,17 @@ void ex_make(exarg_T *eap) incr_quickfix_busy(); - int res = qf_init(wp, fname, (eap->cmdidx != CMD_make - && eap->cmdidx != CMD_lmake) ? p_gefm : p_efm, - (eap->cmdidx != CMD_grepadd && eap->cmdidx != CMD_lgrepadd), - qf_cmdtitle(*eap->cmdlinep), enc); + char *errorformat = p_efm; + bool newlist = true; + + if (eap->cmdidx != CMD_make && eap->cmdidx != CMD_lmake) { + errorformat = p_gefm; + } + if (eap->cmdidx == CMD_grepadd || eap->cmdidx == CMD_lgrepadd) { + newlist = false; + } + + int res = qf_init(wp, fname, errorformat, newlist, qf_cmdtitle(*eap->cmdlinep), enc); qf_info_T *qi = &ql_info; if (wp != NULL) { diff --git a/src/nvim/tag.c b/src/nvim/tag.c index 36d6432c06..e309f05f62 100644 --- a/src/nvim/tag.c +++ b/src/nvim/tag.c @@ -110,6 +110,23 @@ static char *mt_names[MT_COUNT/2] = #define NOTAGFILE 99 // return value for jumpto_tag static char *nofile_fname = NULL; // fname for NOTAGFILE error +/// State information used during a tag search +typedef struct { + pat_T orgpat; ///< holds unconverted pattern info + char *help_lang_find; ///< lang to be found + bool is_txt; ///< flag of file extension + bool did_open; ///< did open a tag file + int mincount; ///< MAXCOL: find all matches + ///< other: minimal number of matches + bool linear; ///< do a linear search + char *lbuf; ///< line buffer + int lbuf_size; ///< length of lbuf + int match_count; ///< number of matches found + garray_T ga_match[MT_COUNT]; ///< stores matches in sequence + hashtab_T ht_match[MT_COUNT]; ///< stores matches by key + bool stop_searching; ///< stop when match found or error +} findtags_state_T; + #ifdef INCLUDE_GENERATED_DECLARATIONS # include "tag.c.generated.h" #endif @@ -1355,6 +1372,740 @@ static int find_tagfunc_tags(char_u *pat, garray_T *ga, int *match_count, int fl return result; } +/// Initialize the state used by find_tags() +static void findtags_state_init(findtags_state_T *st, char *pat, int mincount) +{ + st->orgpat.pat = (char_u *)pat; + st->orgpat.len = (int)strlen(pat); + st->orgpat.regmatch.regprog = NULL; + st->help_lang_find = NULL; + st->is_txt = false; + st->did_open = false; + st->mincount = mincount; + st->lbuf_size = LSIZE; + st->lbuf = xmalloc((size_t)st->lbuf_size); + st->match_count = 0; + st->stop_searching = false; + + for (int mtt = 0; mtt < MT_COUNT; mtt++) { + ga_init(&st->ga_match[mtt], sizeof(char *), 100); + hash_init(&st->ht_match[mtt]); + } +} + +/// Search for tags in the "tag_fname" tags file. +/// Information needed to search for the tags is in the "st" state structure. +/// The matching tags are returned in "st". +static void find_tags_in_file(char *tag_fname, findtags_state_T *st, int flags, char *buf_ffname) +{ + FILE *fp; + tagptrs_T tagp; + int is_static; // current tag line is static + int is_current; // file name matches + bool eof = false; // found end-of-file + char *p; + char_u *s; + int i; + int help_pri = 0; + char_u help_lang[3]; // lang of current tags file + int tag_file_sorted = NUL; // !_TAG_FILE_SORTED value + int tagcmp; + off_T offset; + enum { + TS_START, ///< at start of file + TS_LINEAR, ///< linear searching forward, till EOF + TS_BINARY, ///< binary searching + TS_SKIP_BACK, ///< skipping backwards + TS_STEP_FORWARD, ///< stepping forwards + } state; // Current search state + struct tag_search_info { // Binary search file offsets + off_T low_offset; // offset for first char of first line that + // could match + off_T high_offset; // offset of char after last line that could + // match + off_T curr_offset; // Current file offset in search range + off_T curr_offset_used; // curr_offset used when skipping back + off_T match_offset; // Where the binary search found a tag + int low_char; // first char at low_offset + int high_char; // first char at high_offset + } search_info; + + int cmplen; + int match; // matches + int match_no_ic = 0; // matches with rm_ic == false + int match_re; // match with regexp + int matchoff = 0; + + char *mfp; + int mtt; + hash_T hash = 0; + + bool sort_error = false; // tags file not sorted + bool sortic = false; // tag file sorted in nocase + int noic = (flags & TAG_NOIC); + bool line_error = false; // syntax error + int has_re = (flags & TAG_REGEXP); // regexp used + int help_only = (flags & TAG_HELP); + int name_only = (flags & TAG_NAMES); + int get_it_again = false; + vimconv_T vimconv; + + vimconv.vc_type = CONV_NONE; + + // This is only to avoid a compiler warning for using search_info + // uninitialised. + CLEAR_FIELD(search_info); + + // A file that doesn't exist is silently ignored. Only when not a + // single file is found, an error message is given (further on). + if (curbuf->b_help) { + // Keep en if the file extension is .txt + if (st->is_txt) { + STRCPY(help_lang, "en"); + } else { + // Prefer help tags according to 'helplang'. Put the + // two-letter language name in help_lang[]. + i = (int)STRLEN(tag_fname); + if (i > 3 && tag_fname[i - 3] == '-') { + STRCPY(help_lang, tag_fname + i - 2); + } else { + STRCPY(help_lang, "en"); + } + } + + // When searching for a specific language skip tags files + // for other languages. + if (st->help_lang_find != NULL + && STRICMP(help_lang, st->help_lang_find) != 0) { + return; + } + + // For CTRL-] in a help file prefer a match with the same + // language. + if ((flags & TAG_KEEP_LANG) + && st->help_lang_find == NULL + && curbuf->b_fname != NULL + && (i = (int)strlen(curbuf->b_fname)) > 4 + && curbuf->b_fname[i - 1] == 'x' + && curbuf->b_fname[i - 4] == '.' + && STRNICMP(curbuf->b_fname + i - 3, help_lang, 2) == 0) { + help_pri = 0; + } else { + help_pri = 1; + for (s = p_hlg; *s != NUL; s++) { + if (STRNICMP(s, help_lang, 2) == 0) { + break; + } + help_pri++; + if ((s = (char_u *)vim_strchr((char *)s, ',')) == NULL) { + break; + } + } + if (s == NULL || *s == NUL) { + // Language not in 'helplang': use last, prefer English, + // unless found already. + help_pri++; + if (STRICMP(help_lang, "en") != 0) { + help_pri++; + } + } + } + } + + if ((fp = os_fopen(tag_fname, "r")) == NULL) { + return; + } + + if (p_verbose >= 5) { + verbose_enter(); + smsg(_("Searching tags file %s"), tag_fname); + verbose_leave(); + } + + st->did_open = true; // remember that we found at least one file + + state = TS_START; // we're at the start of the file + + // Read and parse the lines in the file one by one + for (;;) { + // check for CTRL-C typed, more often when jumping around + if (state == TS_BINARY || state == TS_SKIP_BACK) { + line_breakcheck(); + } else { + fast_breakcheck(); + } + if ((flags & TAG_INS_COMP)) { // Double brackets for gcc + ins_compl_check_keys(30, false); + } + if (got_int || ins_compl_interrupted()) { + st->stop_searching = true; + break; + } + // When mincount is TAG_MANY, stop when enough matches have been + // found (for completion). + if (st->mincount == TAG_MANY && st->match_count >= TAG_MANY) { + st->stop_searching = true; + return; + } + if (get_it_again) { + goto line_read_in; + } + // For binary search: compute the next offset to use. + if (state == TS_BINARY) { + offset = search_info.low_offset + ((search_info.high_offset + - search_info.low_offset) / 2); + if (offset == search_info.curr_offset) { + break; // End the binary search without a match. + } + search_info.curr_offset = offset; + } else if (state == TS_SKIP_BACK) { + // Skipping back (after a match during binary search). + search_info.curr_offset -= st->lbuf_size * 2; + if (search_info.curr_offset < 0) { + search_info.curr_offset = 0; + rewind(fp); + state = TS_STEP_FORWARD; + } + } + + // When jumping around in the file, first read a line to find the + // start of the next line. + if (state == TS_BINARY || state == TS_SKIP_BACK) { + // Adjust the search file offset to the correct position + search_info.curr_offset_used = search_info.curr_offset; + vim_fseek(fp, search_info.curr_offset, SEEK_SET); + eof = vim_fgets((char_u *)st->lbuf, st->lbuf_size, fp); + if (!eof && search_info.curr_offset != 0) { + search_info.curr_offset = vim_ftell(fp); + if (search_info.curr_offset == search_info.high_offset) { + // oops, gone a bit too far; try from low offset + vim_fseek(fp, search_info.low_offset, SEEK_SET); + search_info.curr_offset = search_info.low_offset; + } + eof = vim_fgets((char_u *)st->lbuf, st->lbuf_size, fp); + } + // skip empty and blank lines + while (!eof && vim_isblankline(st->lbuf)) { + search_info.curr_offset = vim_ftell(fp); + eof = vim_fgets((char_u *)st->lbuf, st->lbuf_size, fp); + } + if (eof) { + // Hit end of file. Skip backwards. + state = TS_SKIP_BACK; + search_info.match_offset = vim_ftell(fp); + search_info.curr_offset = search_info.curr_offset_used; + continue; + } + } else { + // Not jumping around in the file: Read the next line. + + // skip empty and blank lines + do { + search_info.curr_offset = vim_ftell(fp); + eof = vim_fgets((char_u *)st->lbuf, st->lbuf_size, fp); + } while (!eof && vim_isblankline(st->lbuf)); + + if (eof) { + break; // end of file + } + } +line_read_in: + + if (vimconv.vc_type != CONV_NONE) { + char *conv_line; + int len; + + // Convert every line. Converting the pattern from 'enc' to + // the tags file encoding doesn't work, because characters are + // not recognized. + conv_line = string_convert(&vimconv, st->lbuf, NULL); + if (conv_line != NULL) { + // Copy or swap lbuf and conv_line. + len = (int)strlen(conv_line) + 1; + if (len > st->lbuf_size) { + xfree(st->lbuf); + st->lbuf = conv_line; + st->lbuf_size = len; + } else { + STRCPY(st->lbuf, conv_line); + xfree(conv_line); + } + } + } + + // When still at the start of the file, check for Emacs tags file + // format, and for "not sorted" flag. + if (state == TS_START) { + // The header ends when the line sorts below "!_TAG_". When + // case is folded lower case letters sort before "_". + if (strncmp(st->lbuf, "!_TAG_", 6) <= 0 + || (st->lbuf[0] == '!' && ASCII_ISLOWER(st->lbuf[1]))) { + if (strncmp(st->lbuf, "!_TAG_", 6) != 0) { + // Non-header item before the header, e.g. "!" itself. + goto parse_line; + } + + // Read header line. + if (strncmp(st->lbuf, "!_TAG_FILE_SORTED\t", 18) == 0) { + tag_file_sorted = (uint8_t)st->lbuf[18]; + } + if (strncmp(st->lbuf, "!_TAG_FILE_ENCODING\t", 20) == 0) { + // Prepare to convert every line from the specified + // encoding to 'encoding'. + for (p = st->lbuf + 20; *p > ' ' && *p < 127; p++) {} + *p = NUL; + convert_setup(&vimconv, st->lbuf + 20, p_enc); + } + + // Read the next line. Unrecognized flags are ignored. + continue; + } + + // Headers ends. + + // When there is no tag head, or ignoring case, need to do a + // linear search. + // When no "!_TAG_" is found, default to binary search. If + // the tag file isn't sorted, the second loop will find it. + // When "!_TAG_FILE_SORTED" found: start binary search if + // flag set. + if (st->linear) { + state = TS_LINEAR; + } else if (tag_file_sorted == NUL) { + state = TS_BINARY; + } else if (tag_file_sorted == '1') { + state = TS_BINARY; + } else if (tag_file_sorted == '2') { + state = TS_BINARY; + sortic = true; + st->orgpat.regmatch.rm_ic = (p_ic || !noic); + } else { + state = TS_LINEAR; + } + + if (state == TS_BINARY && st->orgpat.regmatch.rm_ic && !sortic) { + // Binary search won't work for ignoring case, use linear + // search. + st->linear = true; + state = TS_LINEAR; + } + + // When starting a binary search, get the size of the file and + // compute the first offset. + if (state == TS_BINARY) { + if (vim_fseek(fp, 0, SEEK_END) != 0) { + // can't seek, don't use binary search + state = TS_LINEAR; + } else { + // Get the tag file size. + // Don't use lseek(), it doesn't work + // properly on MacOS Catalina. + const off_T filesize = vim_ftell(fp); + vim_fseek(fp, 0, SEEK_SET); + + // Calculate the first read offset in the file. Start + // the search in the middle of the file. + search_info.low_offset = 0; + search_info.low_char = 0; + search_info.high_offset = filesize; + search_info.curr_offset = 0; + search_info.high_char = 0xff; + } + continue; + } + } + +parse_line: + // When the line is too long the NUL will not be in the + // last-but-one byte (see vim_fgets()). + // Has been reported for Mozilla JS with extremely long names. + // In that case we need to increase lbuf_size. + if (st->lbuf[st->lbuf_size - 2] != NUL) { + st->lbuf_size *= 2; + xfree(st->lbuf); + st->lbuf = xmalloc((size_t)st->lbuf_size); + + if (state == TS_STEP_FORWARD) { + // Seek to the same position to read the same line again + vim_fseek(fp, search_info.curr_offset, SEEK_SET); + } + // this will try the same thing again, make sure the offset is + // different + search_info.curr_offset = 0; + continue; + } + + // Figure out where the different strings are in this line. + // For "normal" tags: Do a quick check if the tag matches. + // This speeds up tag searching a lot! + if (st->orgpat.headlen) { + CLEAR_FIELD(tagp); + tagp.tagname = st->lbuf; + tagp.tagname_end = (char_u *)vim_strchr(st->lbuf, TAB); + if (tagp.tagname_end == NULL) { + // Corrupted tag line. + line_error = true; + break; + } + + // Skip this line if the length of the tag is different and + // there is no regexp, or the tag is too short. + cmplen = (int)(tagp.tagname_end - (char_u *)tagp.tagname); + if (p_tl != 0 && cmplen > p_tl) { // adjust for 'taglength' + cmplen = (int)p_tl; + } + if (has_re && st->orgpat.headlen < cmplen) { + cmplen = st->orgpat.headlen; + } else if (state == TS_LINEAR && st->orgpat.headlen != cmplen) { + continue; + } + + if (state == TS_BINARY) { + // Simplistic check for unsorted tags file. + i = (int)tagp.tagname[0]; + if (sortic) { + i = TOUPPER_ASC(tagp.tagname[0]); + } + if (i < search_info.low_char || i > search_info.high_char) { + sort_error = true; + } + + // Compare the current tag with the searched tag. + if (sortic) { + tagcmp = tag_strnicmp((char_u *)tagp.tagname, st->orgpat.head, + (size_t)cmplen); + } else { + tagcmp = STRNCMP(tagp.tagname, st->orgpat.head, cmplen); + } + + // A match with a shorter tag means to search forward. + // A match with a longer tag means to search backward. + if (tagcmp == 0) { + if (cmplen < st->orgpat.headlen) { + tagcmp = -1; + } else if (cmplen > st->orgpat.headlen) { + tagcmp = 1; + } + } + + if (tagcmp == 0) { + // We've located the tag, now skip back and search + // forward until the first matching tag is found. + state = TS_SKIP_BACK; + search_info.match_offset = search_info.curr_offset; + continue; + } + if (tagcmp < 0) { + search_info.curr_offset = vim_ftell(fp); + if (search_info.curr_offset < search_info.high_offset) { + search_info.low_offset = search_info.curr_offset; + if (sortic) { + search_info.low_char = + TOUPPER_ASC(tagp.tagname[0]); + } else { + search_info.low_char = (uint8_t)tagp.tagname[0]; + } + continue; + } + } + if (tagcmp > 0 + && search_info.curr_offset != search_info.high_offset) { + search_info.high_offset = search_info.curr_offset; + if (sortic) { + search_info.high_char = + TOUPPER_ASC(tagp.tagname[0]); + } else { + search_info.high_char = (uint8_t)tagp.tagname[0]; + } + continue; + } + + // No match yet and are at the end of the binary search. + break; + } else if (state == TS_SKIP_BACK) { + assert(cmplen >= 0); + if (mb_strnicmp(tagp.tagname, (char *)st->orgpat.head, (size_t)cmplen) != 0) { + state = TS_STEP_FORWARD; + } else { + // Have to skip back more. Restore the curr_offset + // used, otherwise we get stuck at a long line. + search_info.curr_offset = search_info.curr_offset_used; + } + continue; + } else if (state == TS_STEP_FORWARD) { + assert(cmplen >= 0); + if (mb_strnicmp(tagp.tagname, (char *)st->orgpat.head, (size_t)cmplen) != 0) { + if ((off_T)vim_ftell(fp) > search_info.match_offset) { + break; // past last match + } else { + continue; // before first match + } + } + } else { + // skip this match if it can't match + assert(cmplen >= 0); + if (mb_strnicmp(tagp.tagname, (char *)st->orgpat.head, (size_t)cmplen) != 0) { + continue; + } + } + + // Can be a matching tag, isolate the file name and command. + tagp.fname = tagp.tagname_end + 1; + tagp.fname_end = (char_u *)vim_strchr((char *)tagp.fname, TAB); + tagp.command = tagp.fname_end + 1; + if (tagp.fname_end == NULL) { + i = FAIL; + } else { + i = OK; + } + } else { + i = parse_tag_line((char_u *)st->lbuf, &tagp); + } + if (i == FAIL) { + line_error = true; + break; + } + + // First try matching with the pattern literally (also when it is + // a regexp). + cmplen = (int)(tagp.tagname_end - (char_u *)tagp.tagname); + if (p_tl != 0 && cmplen > p_tl) { // adjust for 'taglength' + cmplen = (int)p_tl; + } + // if tag length does not match, don't try comparing + if (st->orgpat.len != cmplen) { + match = false; + } else { + if (st->orgpat.regmatch.rm_ic) { + assert(cmplen >= 0); + match = mb_strnicmp(tagp.tagname, (char *)st->orgpat.pat, (size_t)cmplen) == 0; + if (match) { + match_no_ic = (STRNCMP(tagp.tagname, st->orgpat.pat, + cmplen) == 0); + } + } else { + match = (STRNCMP(tagp.tagname, st->orgpat.pat, cmplen) == 0); + } + } + + // Has a regexp: Also find tags matching regexp. + match_re = false; + if (!match && st->orgpat.regmatch.regprog != NULL) { + int cc; + + cc = *tagp.tagname_end; + *tagp.tagname_end = NUL; + match = vim_regexec(&st->orgpat.regmatch, tagp.tagname, (colnr_T)0); + if (match) { + matchoff = (int)(st->orgpat.regmatch.startp[0] - tagp.tagname); + if (st->orgpat.regmatch.rm_ic) { + st->orgpat.regmatch.rm_ic = false; + match_no_ic = vim_regexec(&st->orgpat.regmatch, tagp.tagname, (colnr_T)0); + st->orgpat.regmatch.rm_ic = true; + } + } + *tagp.tagname_end = (char_u)cc; + match_re = true; + } + + // If a match is found, add it to ht_match[] and ga_match[]. + if (match) { + size_t len = 0; + + // Decide in which array to store this match. + is_current = test_for_current((char *)tagp.fname, (char *)tagp.fname_end, + tag_fname, + buf_ffname); + is_static = test_for_static(&tagp); + + // Decide in which of the sixteen tables to store this match. + if (is_static) { + if (is_current) { + mtt = MT_ST_CUR; + } else { + mtt = MT_ST_OTH; + } + } else { + if (is_current) { + mtt = MT_GL_CUR; + } else { + mtt = MT_GL_OTH; + } + } + if (st->orgpat.regmatch.rm_ic && !match_no_ic) { + mtt += MT_IC_OFF; + } + if (match_re) { + mtt += MT_RE_OFF; + } + + // Add the found match in ht_match[mtt] and ga_match[mtt]. + // Store the info we need later, which depends on the kind of + // tags we are dealing with. + if (help_only) { +#define ML_EXTRA 3 + // Append the help-heuristic number after the tagname, for + // sorting it later. The heuristic is ignored for + // detecting duplicates. + // The format is {tagname}@{lang}NUL{heuristic}NUL + *tagp.tagname_end = NUL; + len = (size_t)(tagp.tagname_end - (char_u *)tagp.tagname); + mfp = xmalloc(sizeof(char) + len + 10 + ML_EXTRA + 1); + + p = mfp; + STRCPY(p, tagp.tagname); + p[len] = '@'; + STRCPY(p + len + 1, help_lang); + snprintf(p + len + 1 + ML_EXTRA, strlen(p) + len + 1 + ML_EXTRA, "%06d", + help_heuristic(tagp.tagname, + match_re ? matchoff : 0, !match_no_ic) + help_pri); + + *tagp.tagname_end = TAB; + } else if (name_only) { + if (get_it_again) { + char_u *temp_end = tagp.command; + + if (*temp_end == '/') { + while (*temp_end && *temp_end != '\r' + && *temp_end != '\n' + && *temp_end != '$') { + temp_end++; + } + } + + if (tagp.command + 2 < temp_end) { + len = (size_t)(temp_end - tagp.command - 2); + mfp = xmalloc(len + 2); + STRLCPY(mfp, tagp.command + 2, len + 1); + } else { + mfp = NULL; + } + get_it_again = false; + } else { + len = (size_t)(tagp.tagname_end - (char_u *)tagp.tagname); + mfp = xmalloc(sizeof(char) + len + 1); + STRLCPY(mfp, tagp.tagname, len + 1); + + // if wanted, re-read line to get long form too + if (State & MODE_INSERT) { + get_it_again = p_sft; + } + } + } else { + size_t tag_fname_len = strlen(tag_fname); + // Save the tag in a buffer. + // Use 0x02 to separate fields (Can't use NUL, because the + // hash key is terminated by NUL). + // Emacs tag: <0x02><0x02> + // other tag: <0x02><0x02> + // without Emacs tags: <0x02> + // Here is the "mtt" value plus 1 to avoid NUL. + len = tag_fname_len + strlen(st->lbuf) + 3; + mfp = xmalloc(sizeof(char) + len + 1); + p = mfp; + p[0] = (char)(mtt + 1); + STRCPY(p + 1, tag_fname); +#ifdef BACKSLASH_IN_FILENAME + // Ignore differences in slashes, avoid adding + // both path/file and path\file. + slash_adjust(p + 1); +#endif + p[tag_fname_len + 1] = TAG_SEP; + s = (char_u *)p + 1 + tag_fname_len + 1; + STRCPY(s, st->lbuf); + } + + if (mfp != NULL) { + hashitem_T *hi; + + // Don't add identical matches. + // "mfp" is used as a hash key, there is a NUL byte to end + // the part that matters for comparing, more bytes may + // follow after it. E.g. help tags store the priority + // after the NUL. + hash = hash_hash((char_u *)mfp); + hi = hash_lookup(&st->ht_match[mtt], (const char *)mfp, strlen(mfp), hash); + if (HASHITEM_EMPTY(hi)) { + hash_add_item(&st->ht_match[mtt], hi, (char_u *)mfp, hash); + GA_APPEND(char *, &st->ga_match[mtt], mfp); + st->match_count++; + } else { + // duplicate tag, drop it + xfree(mfp); + } + } + } + } // forever + + if (line_error) { + semsg(_("E431: Format error in tags file \"%s\""), tag_fname); + semsg(_("Before byte %" PRId64), (int64_t)vim_ftell(fp)); + st->stop_searching = true; + line_error = false; + } + + fclose(fp); + if (vimconv.vc_type != CONV_NONE) { + convert_setup(&vimconv, NULL, NULL); + } + + tag_file_sorted = NUL; + if (sort_error) { + semsg(_("E432: Tags file not sorted: %s"), tag_fname); + sort_error = false; + } + + // Stop searching if sufficient tags have been found. + if (st->match_count >= st->mincount) { + st->stop_searching = true; + } +} + +/// Copy the tags found by find_tags() to "matchesp". +static void findtags_copy_matches(findtags_state_T *st, char ***matchesp, int *num_matches, + int name_only) +{ + char **matches; + int mtt; + int i; + char *mfp; + char *p; + + if (st->match_count > 0) { + matches = xmalloc((size_t)st->match_count * sizeof(char *)); + } else { + matches = NULL; + } + st->match_count = 0; + for (mtt = 0; mtt < MT_COUNT; mtt++) { + for (i = 0; i < st->ga_match[mtt].ga_len; i++) { + mfp = ((char **)(st->ga_match[mtt].ga_data))[i]; + if (matches == NULL) { + xfree(mfp); + } else { + if (!name_only) { + // Change mtt back to zero-based. + *mfp = (char)(*mfp - 1); + + // change the TAG_SEP back to NUL + for (p = mfp + 1; *p != NUL; p++) { + if (*p == TAG_SEP) { + *p = NUL; + } + } + } + matches[st->match_count++] = mfp; + } + } + + ga_clear(&st->ga_match[mtt]); + hash_clear(&st->ht_match[mtt]); + } + + *matchesp = matches; + *num_matches = st->match_count; +} + /// find_tags() - search for tags in tags files /// /// Return FAIL if search completely failed (*num_matches will be 0, *matchesp @@ -1383,85 +2134,30 @@ static int find_tagfunc_tags(char_u *pat, garray_T *ga, int *match_count, int fl /// @param pat pattern to search for /// @param num_matches return: number of matches found /// @param matchesp return: array of matches found -/// @param mincount MAXCOL: find all matches other: minimal number of matches */ +/// @param mincount MAXCOL: find all matches +/// other: minimal number of matches /// @param buf_ffname name of buffer for priority int find_tags(char *pat, int *num_matches, char ***matchesp, int flags, int mincount, char *buf_ffname) { - FILE *fp; - char *lbuf; // line buffer - int lbuf_size = LSIZE; // length of lbuf + findtags_state_T st; char *tag_fname; // name of tag file tagname_T tn; // info for get_tagfname() int first_file; // trying first tag file - tagptrs_T tagp; - bool did_open = false; // did open a tag file - bool stop_searching = false; // stop when match found or error int retval = FAIL; // return value - int is_static; // current tag line is static - int is_current; // file name matches - bool eof = false; // found end-of-file - char *p; - char_u *s; - int i; - int tag_file_sorted = NUL; // !_TAG_FILE_SORTED value - struct tag_search_info { // Binary search file offsets - off_T low_offset; // offset for first char of first line that - // could match - off_T high_offset; // offset of char after last line that could - // match - off_T curr_offset; // Current file offset in search range - off_T curr_offset_used; // curr_offset used when skipping back - off_T match_offset; // Where the binary search found a tag - int low_char; // first char at low_offset - int high_char; // first char at high_offset - } search_info; - int tagcmp; - off_T offset; int round; - enum { - TS_START, // at start of file - TS_LINEAR, // linear searching forward, till EOF - TS_BINARY, // binary searching - TS_SKIP_BACK, // skipping backwards - TS_STEP_FORWARD, // stepping forwards - } state; // Current search state - int cmplen; - int match; // matches - int match_no_ic = 0; // matches with rm_ic == false - int match_re; // match with regexp - int matchoff = 0; int save_emsg_off; - char *mfp; - garray_T ga_match[MT_COUNT]; // stores matches in sequence - hashtab_T ht_match[MT_COUNT]; // stores matches by key - hash_T hash = 0; - int match_count = 0; // number of matches found - char **matches; - int mtt; int help_save; - int help_pri = 0; - char_u *help_lang_find = NULL; // lang to be found - char_u help_lang[3]; // lang of current tags file + int i; char *saved_pat = NULL; // copy of pat[] - bool is_txt = false; - pat_T orgpat; // holds unconverted pattern info - vimconv_T vimconv; - - int findall = (mincount == MAXCOL || mincount == TAG_MANY); - // find all matching tags - bool sort_error = false; // tags file not sorted - int linear; // do a linear search - bool sortic = false; // tag file sorted in nocase - bool line_error = false; // syntax error + int findall = (mincount == MAXCOL || mincount == TAG_MANY); // find all matching tags int has_re = (flags & TAG_REGEXP); // regexp used int help_only = (flags & TAG_HELP); int name_only = (flags & TAG_NAMES); int noic = (flags & TAG_NOIC); - int get_it_again = false; int verbose = (flags & TAG_VERBOSE); int use_tfu = ((flags & TAG_NO_TAGFUNC) == 0); int save_p_ic = p_ic; @@ -1488,55 +2184,44 @@ int find_tags(char *pat, int *num_matches, char ***matchesp, int flags, int minc } help_save = curbuf->b_help; - orgpat.pat = (char_u *)pat; - orgpat.regmatch.regprog = NULL; - vimconv.vc_type = CONV_NONE; // Allocate memory for the buffers that are used - lbuf = xmalloc((size_t)lbuf_size); tag_fname = xmalloc(MAXPATHL + 1); - for (mtt = 0; mtt < MT_COUNT; mtt++) { - ga_init(&ga_match[mtt], sizeof(char *), 100); - hash_init(&ht_match[mtt]); - } + + findtags_state_init(&st, pat, mincount); // Initialize a few variables if (help_only) { // want tags from help file curbuf->b_help = true; // will be restored later } - orgpat.len = (int)strlen(pat); if (curbuf->b_help) { // When "@ab" is specified use only the "ab" language, otherwise // search all languages. - if (orgpat.len > 3 && pat[orgpat.len - 3] == '@' - && ASCII_ISALPHA(pat[orgpat.len - 2]) - && ASCII_ISALPHA(pat[orgpat.len - 1])) { - saved_pat = xstrnsave(pat, (size_t)orgpat.len - 3); - help_lang_find = (char_u *)&pat[orgpat.len - 2]; - orgpat.pat = (char_u *)saved_pat; - orgpat.len -= 3; + if (st.orgpat.len > 3 && pat[st.orgpat.len - 3] == '@' + && ASCII_ISALPHA(pat[st.orgpat.len - 2]) + && ASCII_ISALPHA(pat[st.orgpat.len - 1])) { + saved_pat = xstrnsave(pat, (size_t)st.orgpat.len - 3); + st.help_lang_find = &pat[st.orgpat.len - 2]; + st.orgpat.pat = (char_u *)saved_pat; + st.orgpat.len -= 3; } } - if (p_tl != 0 && orgpat.len > p_tl) { // adjust for 'taglength' - orgpat.len = (int)p_tl; + if (p_tl != 0 && st.orgpat.len > p_tl) { // adjust for 'taglength' + st.orgpat.len = (int)p_tl; } save_emsg_off = emsg_off; emsg_off = true; // don't want error for invalid RE here - prepare_pats(&orgpat, has_re); + prepare_pats(&st.orgpat, has_re); emsg_off = save_emsg_off; - if (has_re && orgpat.regmatch.regprog == NULL) { + if (has_re && st.orgpat.regmatch.regprog == NULL) { goto findtag_end; } - // This is only to avoid a compiler warning for using search_info - // uninitialised. - CLEAR_FIELD(search_info); - if (*curbuf->b_p_tfu != NUL && use_tfu && !tfu_in_use) { tfu_in_use = true; - retval = find_tagfunc_tags((char_u *)pat, &ga_match[0], &match_count, flags, + retval = find_tagfunc_tags((char_u *)pat, &st.ga_match[0], &st.match_count, flags, (char_u *)buf_ffname); tfu_in_use = false; if (retval != NOTDONE) { @@ -1555,631 +2240,24 @@ int find_tags(char *pat, int *num_matches, char ***matchesp, int flags, int minc // Set a flag if the file extension is .txt if ((flags & TAG_KEEP_LANG) - && help_lang_find == NULL + && st.help_lang_find == NULL && curbuf->b_fname != NULL && (i = (int)strlen(curbuf->b_fname)) > 4 && STRICMP(curbuf->b_fname + i - 4, ".txt") == 0) { - is_txt = true; + st.is_txt = true; } - orgpat.regmatch.rm_ic = ((p_ic || !noic) - && (findall || orgpat.headlen == 0 || !p_tbs)); + st.orgpat.regmatch.rm_ic = ((p_ic || !noic) + && (findall || st.orgpat.headlen == 0 || !p_tbs)); for (round = 1; round <= 2; round++) { - linear = (orgpat.headlen == 0 || !p_tbs || round == 2); + st.linear = (st.orgpat.headlen == 0 || !p_tbs || round == 2); // Try tag file names from tags option one by one. for (first_file = true; get_tagfname(&tn, first_file, tag_fname) == OK; first_file = false) { - // A file that doesn't exist is silently ignored. Only when not a - // single file is found, an error message is given (further on). - if (curbuf->b_help) { - // Keep en if the file extension is .txt - if (is_txt) { - STRCPY(help_lang, "en"); - } else { - // Prefer help tags according to 'helplang'. Put the - // two-letter language name in help_lang[]. - i = (int)STRLEN(tag_fname); - if (i > 3 && tag_fname[i - 3] == '-') { - STRCPY(help_lang, tag_fname + i - 2); - } else { - STRCPY(help_lang, "en"); - } - } - - // When searching for a specific language skip tags files - // for other languages. - if (help_lang_find != NULL - && STRICMP(help_lang, help_lang_find) != 0) { - continue; - } - - // For CTRL-] in a help file prefer a match with the same - // language. - if ((flags & TAG_KEEP_LANG) - && help_lang_find == NULL - && curbuf->b_fname != NULL - && (i = (int)strlen(curbuf->b_fname)) > 4 - && curbuf->b_fname[i - 1] == 'x' - && curbuf->b_fname[i - 4] == '.' - && STRNICMP(curbuf->b_fname + i - 3, help_lang, 2) == 0) { - help_pri = 0; - } else { - help_pri = 1; - for (s = p_hlg; *s != NUL; s++) { - if (STRNICMP(s, help_lang, 2) == 0) { - break; - } - help_pri++; - if ((s = (char_u *)vim_strchr((char *)s, ',')) == NULL) { - break; - } - } - if (s == NULL || *s == NUL) { - // Language not in 'helplang': use last, prefer English, - // unless found already. - help_pri++; - if (STRICMP(help_lang, "en") != 0) { - help_pri++; - } - } - } - } - - if ((fp = os_fopen(tag_fname, "r")) == NULL) { - continue; - } - - if (p_verbose >= 5) { - verbose_enter(); - smsg(_("Searching tags file %s"), tag_fname); - verbose_leave(); - } - - did_open = true; // remember that we found at least one file - - state = TS_START; // we're at the start of the file - - // Read and parse the lines in the file one by one - for (;;) { - // check for CTRL-C typed, more often when jumping around - if (state == TS_BINARY || state == TS_SKIP_BACK) { - line_breakcheck(); - } else { - fast_breakcheck(); - } - if ((flags & TAG_INS_COMP)) { // Double brackets for gcc - ins_compl_check_keys(30, false); - } - if (got_int || ins_compl_interrupted()) { - stop_searching = true; - break; - } - // When mincount is TAG_MANY, stop when enough matches have been - // found (for completion). - if (mincount == TAG_MANY && match_count >= TAG_MANY) { - stop_searching = true; - retval = OK; - break; - } - if (get_it_again) { - goto line_read_in; - } - // For binary search: compute the next offset to use. - if (state == TS_BINARY) { - offset = search_info.low_offset + ((search_info.high_offset - - search_info.low_offset) / 2); - if (offset == search_info.curr_offset) { - break; // End the binary search without a match. - } - search_info.curr_offset = offset; - } else if (state == TS_SKIP_BACK) { - // Skipping back (after a match during binary search). - search_info.curr_offset -= lbuf_size * 2; - if (search_info.curr_offset < 0) { - search_info.curr_offset = 0; - rewind(fp); - state = TS_STEP_FORWARD; - } - } - - // When jumping around in the file, first read a line to find the - // start of the next line. - if (state == TS_BINARY || state == TS_SKIP_BACK) { - // Adjust the search file offset to the correct position - search_info.curr_offset_used = search_info.curr_offset; - vim_fseek(fp, search_info.curr_offset, SEEK_SET); - eof = vim_fgets((char_u *)lbuf, lbuf_size, fp); - if (!eof && search_info.curr_offset != 0) { - search_info.curr_offset = vim_ftell(fp); - if (search_info.curr_offset == search_info.high_offset) { - // oops, gone a bit too far; try from low offset - vim_fseek(fp, search_info.low_offset, SEEK_SET); - search_info.curr_offset = search_info.low_offset; - } - eof = vim_fgets((char_u *)lbuf, lbuf_size, fp); - } - // skip empty and blank lines - while (!eof && vim_isblankline(lbuf)) { - search_info.curr_offset = vim_ftell(fp); - eof = vim_fgets((char_u *)lbuf, lbuf_size, fp); - } - if (eof) { - // Hit end of file. Skip backwards. - state = TS_SKIP_BACK; - search_info.match_offset = vim_ftell(fp); - search_info.curr_offset = search_info.curr_offset_used; - continue; - } - } else { - // Not jumping around in the file: Read the next line. - - // skip empty and blank lines - do { - search_info.curr_offset = vim_ftell(fp); - eof = vim_fgets((char_u *)lbuf, lbuf_size, fp); - } while (!eof && vim_isblankline(lbuf)); - - if (eof) { - break; // end of file - } - } -line_read_in: - - if (vimconv.vc_type != CONV_NONE) { - char *conv_line; - int len; - - // Convert every line. Converting the pattern from 'enc' to - // the tags file encoding doesn't work, because characters are - // not recognized. - conv_line = string_convert(&vimconv, lbuf, NULL); - if (conv_line != NULL) { - // Copy or swap lbuf and conv_line. - len = (int)strlen(conv_line) + 1; - if (len > lbuf_size) { - xfree(lbuf); - lbuf = conv_line; - lbuf_size = len; - } else { - STRCPY(lbuf, conv_line); - xfree(conv_line); - } - } - } - - // When still at the start of the file, check for Emacs tags file - // format, and for "not sorted" flag. - if (state == TS_START) { - // The header ends when the line sorts below "!_TAG_". When - // case is folded lower case letters sort before "_". - if (STRNCMP(lbuf, "!_TAG_", 6) <= 0 - || (lbuf[0] == '!' && ASCII_ISLOWER(lbuf[1]))) { - if (STRNCMP(lbuf, "!_TAG_", 6) != 0) { - // Non-header item before the header, e.g. "!" itself. - goto parse_line; - } - - // Read header line. - if (STRNCMP(lbuf, "!_TAG_FILE_SORTED\t", 18) == 0) { - tag_file_sorted = (uint8_t)lbuf[18]; - } - if (STRNCMP(lbuf, "!_TAG_FILE_ENCODING\t", 20) == 0) { - // Prepare to convert every line from the specified - // encoding to 'encoding'. - for (p = lbuf + 20; *p > ' ' && *p < 127; p++) {} - *p = NUL; - convert_setup(&vimconv, lbuf + 20, p_enc); - } - - // Read the next line. Unrecognized flags are ignored. - continue; - } - - // Headers ends. - - // When there is no tag head, or ignoring case, need to do a - // linear search. - // When no "!_TAG_" is found, default to binary search. If - // the tag file isn't sorted, the second loop will find it. - // When "!_TAG_FILE_SORTED" found: start binary search if - // flag set. - if (linear) { - state = TS_LINEAR; - } else if (tag_file_sorted == NUL) { - state = TS_BINARY; - } else if (tag_file_sorted == '1') { - state = TS_BINARY; - } else if (tag_file_sorted == '2') { - state = TS_BINARY; - sortic = true; - orgpat.regmatch.rm_ic = (p_ic || !noic); - } else { - state = TS_LINEAR; - } - - if (state == TS_BINARY && orgpat.regmatch.rm_ic && !sortic) { - // Binary search won't work for ignoring case, use linear - // search. - linear = true; - state = TS_LINEAR; - } - - // When starting a binary search, get the size of the file and - // compute the first offset. - if (state == TS_BINARY) { - if (vim_fseek(fp, 0, SEEK_END) != 0) { - // can't seek, don't use binary search - state = TS_LINEAR; - } else { - // Get the tag file size. - // Don't use lseek(), it doesn't work - // properly on MacOS Catalina. - const off_T filesize = vim_ftell(fp); - vim_fseek(fp, 0, SEEK_SET); - - // Calculate the first read offset in the file. Start - // the search in the middle of the file. - search_info.low_offset = 0; - search_info.low_char = 0; - search_info.high_offset = filesize; - search_info.curr_offset = 0; - search_info.high_char = 0xff; - } - continue; - } - } - -parse_line: - // When the line is too long the NUL will not be in the - // last-but-one byte (see vim_fgets()). - // Has been reported for Mozilla JS with extremely long names. - // In that case we need to increase lbuf_size. - if (lbuf[lbuf_size - 2] != NUL) { - lbuf_size *= 2; - xfree(lbuf); - lbuf = xmalloc((size_t)lbuf_size); - - if (state == TS_STEP_FORWARD) { - // Seek to the same position to read the same line again - vim_fseek(fp, search_info.curr_offset, SEEK_SET); - } - // this will try the same thing again, make sure the offset is - // different - search_info.curr_offset = 0; - continue; - } - - // Figure out where the different strings are in this line. - // For "normal" tags: Do a quick check if the tag matches. - // This speeds up tag searching a lot! - if (orgpat.headlen) { - CLEAR_FIELD(tagp); - tagp.tagname = lbuf; - tagp.tagname_end = (char_u *)vim_strchr(lbuf, TAB); - if (tagp.tagname_end == NULL) { - // Corrupted tag line. - line_error = true; - break; - } - - // Skip this line if the length of the tag is different and - // there is no regexp, or the tag is too short. - cmplen = (int)(tagp.tagname_end - (char_u *)tagp.tagname); - if (p_tl != 0 && cmplen > p_tl) { // adjust for 'taglength' - cmplen = (int)p_tl; - } - if (has_re && orgpat.headlen < cmplen) { - cmplen = orgpat.headlen; - } else if (state == TS_LINEAR && orgpat.headlen != cmplen) { - continue; - } - - if (state == TS_BINARY) { - // Simplistic check for unsorted tags file. - i = (int)tagp.tagname[0]; - if (sortic) { - i = TOUPPER_ASC(tagp.tagname[0]); - } - if (i < search_info.low_char || i > search_info.high_char) { - sort_error = true; - } - - // Compare the current tag with the searched tag. - if (sortic) { - tagcmp = tag_strnicmp((char_u *)tagp.tagname, orgpat.head, - (size_t)cmplen); - } else { - tagcmp = STRNCMP(tagp.tagname, orgpat.head, cmplen); - } - - // A match with a shorter tag means to search forward. - // A match with a longer tag means to search backward. - if (tagcmp == 0) { - if (cmplen < orgpat.headlen) { - tagcmp = -1; - } else if (cmplen > orgpat.headlen) { - tagcmp = 1; - } - } - - if (tagcmp == 0) { - // We've located the tag, now skip back and search - // forward until the first matching tag is found. - state = TS_SKIP_BACK; - search_info.match_offset = search_info.curr_offset; - continue; - } - if (tagcmp < 0) { - search_info.curr_offset = vim_ftell(fp); - if (search_info.curr_offset < search_info.high_offset) { - search_info.low_offset = search_info.curr_offset; - if (sortic) { - search_info.low_char = - TOUPPER_ASC(tagp.tagname[0]); - } else { - search_info.low_char = (uint8_t)tagp.tagname[0]; - } - continue; - } - } - if (tagcmp > 0 - && search_info.curr_offset != search_info.high_offset) { - search_info.high_offset = search_info.curr_offset; - if (sortic) { - search_info.high_char = - TOUPPER_ASC(tagp.tagname[0]); - } else { - search_info.high_char = (uint8_t)tagp.tagname[0]; - } - continue; - } - - // No match yet and are at the end of the binary search. - break; - } else if (state == TS_SKIP_BACK) { - assert(cmplen >= 0); - if (mb_strnicmp(tagp.tagname, (char *)orgpat.head, (size_t)cmplen) != 0) { - state = TS_STEP_FORWARD; - } else { - // Have to skip back more. Restore the curr_offset - // used, otherwise we get stuck at a long line. - search_info.curr_offset = search_info.curr_offset_used; - } - continue; - } else if (state == TS_STEP_FORWARD) { - assert(cmplen >= 0); - if (mb_strnicmp(tagp.tagname, (char *)orgpat.head, (size_t)cmplen) != 0) { - if ((off_T)vim_ftell(fp) > search_info.match_offset) { - break; // past last match - } else { - continue; // before first match - } - } - } else { - // skip this match if it can't match - assert(cmplen >= 0); - } - if (mb_strnicmp(tagp.tagname, (char *)orgpat.head, (size_t)cmplen) != 0) { - continue; - } - - // Can be a matching tag, isolate the file name and command. - tagp.fname = tagp.tagname_end + 1; - tagp.fname_end = (char_u *)vim_strchr((char *)tagp.fname, TAB); - tagp.command = tagp.fname_end + 1; - if (tagp.fname_end == NULL) { - i = FAIL; - } else { - i = OK; - } - } else { - i = parse_tag_line((char_u *)lbuf, &tagp); - } - if (i == FAIL) { - line_error = true; - break; - } - - // First try matching with the pattern literally (also when it is - // a regexp). - cmplen = (int)(tagp.tagname_end - (char_u *)tagp.tagname); - if (p_tl != 0 && cmplen > p_tl) { // adjust for 'taglength' - cmplen = (int)p_tl; - } - // if tag length does not match, don't try comparing - if (orgpat.len != cmplen) { - match = false; - } else { - if (orgpat.regmatch.rm_ic) { - assert(cmplen >= 0); - match = mb_strnicmp(tagp.tagname, (char *)orgpat.pat, (size_t)cmplen) == 0; - if (match) { - match_no_ic = (STRNCMP(tagp.tagname, orgpat.pat, - cmplen) == 0); - } - } else { - match = (STRNCMP(tagp.tagname, orgpat.pat, cmplen) == 0); - } - } - - // Has a regexp: Also find tags matching regexp. - match_re = false; - if (!match && orgpat.regmatch.regprog != NULL) { - int cc; - - cc = *tagp.tagname_end; - *tagp.tagname_end = NUL; - match = vim_regexec(&orgpat.regmatch, tagp.tagname, (colnr_T)0); - if (match) { - matchoff = (int)(orgpat.regmatch.startp[0] - tagp.tagname); - if (orgpat.regmatch.rm_ic) { - orgpat.regmatch.rm_ic = false; - match_no_ic = vim_regexec(&orgpat.regmatch, tagp.tagname, (colnr_T)0); - orgpat.regmatch.rm_ic = true; - } - } - *tagp.tagname_end = (char_u)cc; - match_re = true; - } - - // If a match is found, add it to ht_match[] and ga_match[]. - if (match) { - size_t len = 0; - - // Decide in which array to store this match. - is_current = test_for_current((char *)tagp.fname, (char *)tagp.fname_end, - tag_fname, - buf_ffname); - is_static = test_for_static(&tagp); - - // Decide in which of the sixteen tables to store this match. - if (is_static) { - if (is_current) { - mtt = MT_ST_CUR; - } else { - mtt = MT_ST_OTH; - } - } else { - if (is_current) { - mtt = MT_GL_CUR; - } else { - mtt = MT_GL_OTH; - } - } - if (orgpat.regmatch.rm_ic && !match_no_ic) { - mtt += MT_IC_OFF; - } - if (match_re) { - mtt += MT_RE_OFF; - } - - // Add the found match in ht_match[mtt] and ga_match[mtt]. - // Store the info we need later, which depends on the kind of - // tags we are dealing with. - if (help_only) { -#define ML_EXTRA 3 - // Append the help-heuristic number after the tagname, for - // sorting it later. The heuristic is ignored for - // detecting duplicates. - // The format is {tagname}@{lang}NUL{heuristic}NUL - *tagp.tagname_end = NUL; - len = (size_t)(tagp.tagname_end - (char_u *)tagp.tagname); - mfp = xmalloc(sizeof(char) + len + 10 + ML_EXTRA + 1); - - p = mfp; - STRCPY(p, tagp.tagname); - p[len] = '@'; - STRCPY(p + len + 1, help_lang); - snprintf(p + len + 1 + ML_EXTRA, strlen(p) + len + 1 + ML_EXTRA, "%06d", - help_heuristic(tagp.tagname, - match_re ? matchoff : 0, !match_no_ic) - + help_pri); - - *tagp.tagname_end = TAB; - } else if (name_only) { - if (get_it_again) { - char_u *temp_end = tagp.command; - - if (*temp_end == '/') { - while (*temp_end && *temp_end != '\r' - && *temp_end != '\n' - && *temp_end != '$') { - temp_end++; - } - } - - if (tagp.command + 2 < temp_end) { - len = (size_t)(temp_end - tagp.command - 2); - mfp = xmalloc(len + 2); - STRLCPY(mfp, tagp.command + 2, len + 1); - } else { - mfp = NULL; - } - get_it_again = false; - } else { - len = (size_t)(tagp.tagname_end - (char_u *)tagp.tagname); - mfp = xmalloc(sizeof(char) + len + 1); - STRLCPY(mfp, tagp.tagname, len + 1); - - // if wanted, re-read line to get long form too - if (State & MODE_INSERT) { - get_it_again = p_sft; - } - } - } else { - size_t tag_fname_len = strlen(tag_fname); - // Save the tag in a buffer. - // Use 0x02 to separate fields (Can't use NUL, because the - // hash key is terminated by NUL). - // Emacs tag: <0x02><0x02> - // other tag: <0x02><0x02> - // without Emacs tags: <0x02> - // Here is the "mtt" value plus 1 to avoid NUL. - len = tag_fname_len + strlen(lbuf) + 3; - mfp = xmalloc(sizeof(char) + len + 1); - p = mfp; - p[0] = (char)(mtt + 1); - STRCPY(p + 1, tag_fname); -#ifdef BACKSLASH_IN_FILENAME - // Ignore differences in slashes, avoid adding - // both path/file and path\file. - slash_adjust(p + 1); -#endif - p[tag_fname_len + 1] = TAG_SEP; - s = (char_u *)p + 1 + tag_fname_len + 1; - STRCPY(s, lbuf); - } - - if (mfp != NULL) { - hashitem_T *hi; - - // Don't add identical matches. - // "mfp" is used as a hash key, there is a NUL byte to end - // the part that matters for comparing, more bytes may - // follow after it. E.g. help tags store the priority - // after the NUL. - hash = hash_hash((char_u *)mfp); - hi = hash_lookup(&ht_match[mtt], (const char *)mfp, - strlen(mfp), hash); - if (HASHITEM_EMPTY(hi)) { - hash_add_item(&ht_match[mtt], hi, (char_u *)mfp, hash); - ga_grow(&ga_match[mtt], 1); - ((char **)(ga_match[mtt].ga_data))[ga_match[mtt].ga_len++] = mfp; - match_count++; - } else { - // duplicate tag, drop it - xfree(mfp); - } - } - } - } // forever - - if (line_error) { - semsg(_("E431: Format error in tags file \"%s\""), tag_fname); - semsg(_("Before byte %" PRId64), (int64_t)vim_ftell(fp)); - stop_searching = true; - line_error = false; - } - - fclose(fp); - if (vimconv.vc_type != CONV_NONE) { - convert_setup(&vimconv, NULL, NULL); - } - - tag_file_sorted = NUL; - if (sort_error) { - semsg(_("E432: Tags file not sorted: %s"), tag_fname); - sort_error = false; - } - - // Stop searching if sufficient tags have been found. - if (match_count >= mincount) { + find_tags_in_file(tag_fname, &st, flags, buf_ffname); + if (st.stop_searching) { retval = OK; - stop_searching = true; - } - - if (stop_searching) { break; } } // end of for-each-file loop @@ -2188,63 +2266,34 @@ parse_line: // stop searching when already did a linear search, or when TAG_NOIC // used, and 'ignorecase' not set or already did case-ignore search - if (stop_searching || linear || (!p_ic && noic) || orgpat.regmatch.rm_ic) { + if (st.stop_searching || st.linear || (!p_ic && noic) + || st.orgpat.regmatch.rm_ic) { break; } - orgpat.regmatch.rm_ic = true; // try another time while ignoring case + + // try another time while ignoring case + st.orgpat.regmatch.rm_ic = true; } - if (!stop_searching) { - if (!did_open && verbose) { // never opened any tags file + if (!st.stop_searching) { + if (!st.did_open && verbose) { // never opened any tags file emsg(_("E433: No tags file")); } retval = OK; // It's OK even when no tag found } findtag_end: - xfree(lbuf); - vim_regfree(orgpat.regmatch.regprog); + xfree(st.lbuf); + vim_regfree(st.orgpat.regmatch.regprog); xfree(tag_fname); // Move the matches from the ga_match[] arrays into one list of // matches. When retval == FAIL, free the matches. if (retval == FAIL) { - match_count = 0; + st.match_count = 0; } - if (match_count > 0) { - matches = xmalloc((size_t)match_count * sizeof(char *)); - } else { - matches = NULL; - } - match_count = 0; - for (mtt = 0; mtt < MT_COUNT; mtt++) { - for (i = 0; i < ga_match[mtt].ga_len; i++) { - mfp = ((char **)(ga_match[mtt].ga_data))[i]; - if (matches == NULL) { - xfree(mfp); - } else { - if (!name_only) { - // Change mtt back to zero-based. - *mfp = (char)(*mfp - 1); - - // change the TAG_SEP back to NUL - for (p = mfp + 1; *p != NUL; p++) { - if (*p == TAG_SEP) { - *p = NUL; - } - } - } - matches[match_count++] = mfp; - } - } - - ga_clear(&ga_match[mtt]); - hash_clear(&ht_match[mtt]); - } - - *matchesp = matches; - *num_matches = match_count; + findtags_copy_matches(&st, matchesp, num_matches, name_only); curbuf->b_help = help_save; xfree(saved_pat); diff --git a/src/nvim/testdir/test_tagjump.vim b/src/nvim/testdir/test_tagjump.vim index bfc61e7b48..361aa23291 100644 --- a/src/nvim/testdir/test_tagjump.vim +++ b/src/nvim/testdir/test_tagjump.vim @@ -1448,6 +1448,11 @@ func Test_tagfile_errors() endtry call assert_equal(v:true, caught_431) + " tag name and file name are not separated by a tab + call writefile(["!_TAG_FILE_ENCODING\tutf-8\t//", + \ "foo Xfile 1"], 'Xtags') + call assert_fails('tag foo', 'E431:') + call delete('Xtags') call delete('Xfile') set tags&