Merge #6460 from ZyX-I/1476-changes

Refactor functions which find character in a string
2024-12-24 21:25:04 -07:00 · 2017-05-08 13:45:14 +02:00 · 2017-05-08 13:45:14 +02:00 · a9605bb4af
commit a9605bb4af
parent 631d55ada0 22fb9d8d25
6 changed files with 71 additions and 142 deletions
--- a/src/nvim/ex_cmds.c
+++ b/src/nvim/ex_cmds.c
@ -5101,14 +5101,13 @@ static void helptags_one(char_u *dir, char_u *ext, char_u *tagfname,
      }
      p1 = vim_strchr(IObuff, '*');             /* find first '*' */
      while (p1 != NULL) {
-        /* Use vim_strbyte() instead of vim_strchr() so that when
+        p2 = (char_u *)strchr((const char *)p1 + 1, '*');  // Find second '*'.
-         * 'encoding' is dbcs it still works, don't find '*' in the
+        if (p2 != NULL && p2 > p1 + 1) {  // Skip "*" and "**".
-         * second byte. */
+          for (s = p1 + 1; s < p2; s++) {
-        p2 = vim_strbyte(p1 + 1, '*');          /* find second '*' */
+            if (*s == ' ' || *s == '\t' || *s == '|') {
        if (p2 != NULL && p2 > p1 + 1) {        /* skip "*" and "**" */
          for (s = p1 + 1; s < p2; ++s)
            if (*s == ' ' || *s == '\t' || *s == '|')
              break;
            }
          }
          /*
           * Only accept a *tag* when it consists of valid
--- a/src/nvim/getchar.c
+++ b/src/nvim/getchar.c
@ -3596,8 +3596,8 @@ int check_abbr(int c, char_u *ptr, int col, int mincol)
      char_u *q = mp->m_keys;
      int match;
-      if (vim_strbyte(mp->m_keys, K_SPECIAL) != NULL) {
+      if (strchr((const char *)mp->m_keys, K_SPECIAL) != NULL) {
-        /* might have CSI escaped mp->m_keys */
+        // Might have CSI escaped mp->m_keys.
        q = vim_strsave(mp->m_keys);
        vim_unescape_csi(q);
        qlen = (int)STRLEN(q);
--- a/src/nvim/mbyte.c
+++ b/src/nvim/mbyte.c
@ -359,10 +359,10 @@ int bomb_size(void)
 */
 void remove_bom(char_u *s)
 {
-  char_u *p = s;
+  char *p = (char *)s;
-  while ((p = vim_strbyte(p, 0xef)) != NULL) {
+  while ((p = strchr(p, 0xef)) != NULL) {
-    if (p[1] == 0xbb && p[2] == 0xbf) {
+    if ((uint8_t)p[1] == 0xbb && (uint8_t)p[2] == 0xbf) {
      STRMOVE(p, p + 3);
    } else {
      p++;
--- a/src/nvim/regexp.c
+++ b/src/nvim/regexp.c
@ -3427,32 +3427,26 @@ static long bt_regexec_both(char_u *line,
      c = *prog->regmust;
    s = line + col;
-    /*
+    // This is used very often, esp. for ":global".  Use two versions of
-     * This is used very often, esp. for ":global".  Use three versions of
+    // the loop to avoid overhead of conditions.
-     * the loop to avoid overhead of conditions.
+    if (!ireg_ic) {
     */
    if (!ireg_ic
        && !has_mbyte
        )
      while ((s = vim_strbyte(s, c)) != NULL) {
        if (cstrncmp(s, prog->regmust, &prog->regmlen) == 0)
          break;                        /* Found it. */
        ++s;
      }
    else if (!ireg_ic || (!enc_utf8 && mb_char2len(c) > 1))
      while ((s = vim_strchr(s, c)) != NULL) {
-        if (cstrncmp(s, prog->regmust, &prog->regmlen) == 0)
+        if (cstrncmp(s, prog->regmust, &prog->regmlen) == 0) {
-          break;                        /* Found it. */
+          break;  // Found it.
        }
        mb_ptr_adv(s);
      }
-    else
+    } else {
      while ((s = cstrchr(s, c)) != NULL) {
-        if (cstrncmp(s, prog->regmust, &prog->regmlen) == 0)
+        if (cstrncmp(s, prog->regmust, &prog->regmlen) == 0) {
-          break;                        /* Found it. */
+          break;  // Found it.
        }
        mb_ptr_adv(s);
      }
-    if (s == NULL)              /* Not present. */
+    }
    if (s == NULL) {  // Not present.
      goto theend;
    }
  }
  regline = line;
@ -3482,14 +3476,8 @@ static long bt_regexec_both(char_u *line,
    /* Messy cases:  unanchored match. */
    while (!got_int) {
      if (prog->regstart != NUL) {
-        /* Skip until the char we know it must start with.
+        // Skip until the char we know it must start with.
-         * Used often, do some work to avoid call overhead. */
+        s = cstrchr(regline + col, prog->regstart);
        if (!ireg_ic
            && !has_mbyte
            )
          s = vim_strbyte(regline + col, prog->regstart);
        else
          s = cstrchr(regline + col, prog->regstart);
        if (s == NULL) {
          retval = 0;
          break;
@ -6299,44 +6287,37 @@ static int cstrncmp(char_u *s1, char_u *s2, int *n)
 /*
 * cstrchr: This function is used a lot for simple searches, keep it fast!
 */
-static char_u *cstrchr(char_u *s, int c)
+static inline char_u *cstrchr(const char_u *const s, const int c)
  FUNC_ATTR_PURE FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_NONNULL_ALL
  FUNC_ATTR_ALWAYS_INLINE
 {
-  char_u      *p;
+  if (!ireg_ic) {
  int cc;
  if (!ireg_ic
      || (!enc_utf8 && mb_char2len(c) > 1)
      )
    return vim_strchr(s, c);
  }
-  /* tolower() and toupper() can be slow, comparing twice should be a lot
+  // Use folded case for UTF-8, slow! For ASCII use libc strpbrk which is
-   * faster (esp. when using MS Visual C++!).
+  // expected to be highly optimized.
   * For UTF-8 need to use folded case. */
  if (c > 0x80) {
-    cc = utf_fold(c);
+    const int folded_c = utf_fold(c);
-  } else if (mb_isupper(c)) {
+    for (const char_u *p = s; *p != NUL; p += utfc_ptr2len(p)) {
-    cc = mb_tolower(c);
+      if (utf_fold(utf_ptr2char(p)) == folded_c) {
-  } else if (mb_islower(c)) {
+        return (char_u *)p;
-    cc = mb_toupper(c);
+      }
    }
    return NULL;
  }
  int cc;
  if (ASCII_ISUPPER(c)) {
    cc = TOLOWER_ASC(c);
  } else if (ASCII_ISLOWER(c)) {
    cc = TOUPPER_ASC(c);
  } else {
    return vim_strchr(s, c);
  }
-  if (has_mbyte) {
+  char tofind[] = { (char)c, (char)cc, NUL };
-    for (p = s; *p != NUL; p += (*mb_ptr2len)(p)) {
+  return (char_u *)strpbrk((const char *)s, tofind);
      if (enc_utf8 && c > 0x80) {
        if (utf_fold(utf_ptr2char(p)) == cc)
          return p;
      } else if (*p == c || *p == cc)
        return p;
    }
  } else
    /* Faster version for when there are no multi-byte characters. */
    for (p = s; *p != NUL; ++p)
      if (*p == c || *p == cc)
        return p;
  return NULL;
 }
 /***************************************************************
--- a/src/nvim/regexp_nfa.c
+++ b/src/nvim/regexp_nfa.c
@ -4855,17 +4855,10 @@ static int failure_chance(nfa_state_T *state, int depth)
 */
 static int skip_to_start(int c, colnr_T *colp)
 {
-  char_u *s;
+  const char_u *const s = cstrchr(regline + *colp, c);
-
+  if (s == NULL) {
  /* Used often, do some work to avoid call overhead. */
  if (!ireg_ic
      && !has_mbyte
      )
    s = vim_strbyte(regline + *colp, c);
  else
    s = cstrchr(regline + *colp, c);
  if (s == NULL)
    return FAIL;
  }
  *colp = (int)(s - regline);
  return OK;
 }
--- a/src/nvim/strings.c
+++ b/src/nvim/strings.c
@ -425,72 +425,28 @@ int vim_strnicmp(const char *s1, const char *s2, size_t len)
 }
 #endif
-/*
+/// strchr() version which handles multibyte strings
- * Version of strchr() and strrchr() that handle unsigned char strings
+///
- * with characters from 128 to 255 correctly.  It also doesn't return a
+/// @param[in]  string  String to search in.
- * pointer to the NUL at the end of the string.
+/// @param[in]  c  Character to search for. Must be a valid character.
- */
+///
-char_u *vim_strchr(const char_u *string, int c)
+/// @return Pointer to the first byte of the found character in string or NULL
-  FUNC_ATTR_NONNULL_ALL FUNC_ATTR_PURE
+///         if it was not found. NUL character is never found, use `strlen()`
 ///         instead.
 char_u *vim_strchr(const char_u *const string, const int c)
  FUNC_ATTR_NONNULL_ALL FUNC_ATTR_PURE FUNC_ATTR_WARN_UNUSED_RESULT
 {
-  int b;
+  assert(c >= 0);
-
+  if (c == 0) {
  const char_u *p = string;
  if (enc_utf8 && c >= 0x80) {
    while (*p != NUL) {
      int l = (*mb_ptr2len)(p);
      // Avoid matching an illegal byte here.
      if (l > 1 && utf_ptr2char(p) == c) {
        return (char_u *) p;
      }
      p += l;
    }
    return NULL;
  } else if (c < 0x80) {
    return (char_u *)strchr((const char *)string, c);
  } else {
    char u8char[MB_MAXBYTES + 1];
    const int len = utf_char2bytes(c, (char_u *)u8char);
    u8char[len] = NUL;
    return (char_u *)strstr((const char *)string, u8char);
  }
  if (enc_dbcs != 0 && c > 255) {
    int n2 = c & 0xff;
    c = ((unsigned)c >> 8) & 0xff;
    while ((b = *p) != NUL) {
      if (b == c && p[1] == n2)
        return (char_u *) p;
      p += (*mb_ptr2len)(p);
    }
    return NULL;
  }
  if (has_mbyte) {
    while ((b = *p) != NUL) {
      if (b == c)
        return (char_u *) p;
      p += (*mb_ptr2len)(p);
    }
    return NULL;
  }
  while ((b = *p) != NUL) {
    if (b == c)
      return (char_u *) p;
    ++p;
  }
  return NULL;
 }
 /*
 * Version of strchr() that only works for bytes and handles unsigned char
 * strings with characters above 128 correctly. It also doesn't return a
 * pointer to the NUL at the end of the string.
 */
 char_u *vim_strbyte(const char_u *string, int c)
  FUNC_ATTR_NONNULL_ALL FUNC_ATTR_PURE
 {
  const char_u *p = string;
  while (*p != NUL) {
    if (*p == c)
      return (char_u *) p;
    ++p;
  }
  return NULL;
 }
 /*