vim-patch:9.0.1810: camel-case spelling has issues with digits (#24909)

Problem:  camel-case spelling has issues with digits
Solution: Improve the camCase spell checking by taking digits
          and caps into account

Rewrite the conditions to check for word boundaries by taking into
account the presence of digits and all-caps sequences such as acronyms.

closes: vim/vim#12644
closes: vim/vim#12933

d08745040b

Co-authored-by: LemonBoy <thatlemon@gmail.com>
This commit is contained in:
zeertzjq 2023-08-28 07:16:15 +08:00 committed by GitHub
parent 6b12eda220
commit 656be8a591
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 102 additions and 23 deletions

View File

@ -184,12 +184,21 @@ int did_set_spelltab;
# include "spell.c.generated.h"
#endif
// mode values for find_word
#define FIND_FOLDWORD 0 // find word case-folded
#define FIND_KEEPWORD 1 // find keep-case word
#define FIND_PREFIX 2 // find word after prefix
#define FIND_COMPOUND 3 // find case-folded compound word
#define FIND_KEEPCOMPOUND 4 // find keep-case compound word
/// mode values for find_word
enum {
FIND_FOLDWORD = 0, ///< find word case-folded
FIND_KEEPWORD = 1, ///< find keep-case word
FIND_PREFIX = 2, ///< find word after prefix
FIND_COMPOUND = 3, ///< find case-folded compound word
FIND_KEEPCOMPOUND = 4, ///< find keep-case compound word
};
/// type values for get_char_type
enum {
CHAR_OTHER = 0,
CHAR_UPPER = 1,
CHAR_DIGIT = 2,
};
char *e_format = N_("E759: Format error in spell file");
@ -222,7 +231,7 @@ size_t spell_check(win_T *wp, char *ptr, hlf_T *attrp, int *capcol, bool docount
size_t wrongcaplen = 0;
bool count_word = docount;
bool use_camel_case = (wp->w_s->b_p_spo_flags & SPO_CAMEL) != 0;
bool camel_case = false;
bool is_camel_case = false;
// A word never starts at a space or a control character. Return quickly
// then, skipping over the character.
@ -255,24 +264,14 @@ size_t spell_check(win_T *wp, char *ptr, hlf_T *attrp, int *capcol, bool docount
mi.mi_word = ptr;
mi.mi_fend = ptr;
if (spell_iswordp(mi.mi_fend, wp)) {
bool this_upper = false; // init for gcc
if (use_camel_case) {
int c = utf_ptr2char(mi.mi_fend);
this_upper = SPELL_ISUPPER(c);
mi.mi_fend = advance_camelcase_word(ptr, wp, &is_camel_case);
} else {
do {
MB_PTR_ADV(mi.mi_fend);
} while (*mi.mi_fend != NUL && spell_iswordp(mi.mi_fend, wp));
}
do {
MB_PTR_ADV(mi.mi_fend);
if (use_camel_case) {
const bool prev_upper = this_upper;
int c = utf_ptr2char(mi.mi_fend);
this_upper = SPELL_ISUPPER(c);
camel_case = !prev_upper && this_upper;
}
} while (*mi.mi_fend != NUL && spell_iswordp(mi.mi_fend, wp)
&& !camel_case);
if (capcol != NULL && *capcol == 0 && wp->w_s->b_cap_prog != NULL) {
// Check word starting with capital letter.
int c = utf_ptr2char(ptr);
@ -304,7 +303,7 @@ size_t spell_check(win_T *wp, char *ptr, hlf_T *attrp, int *capcol, bool docount
MAXWLEN + 1);
mi.mi_fwordlen = (int)strlen(mi.mi_fword);
if (camel_case && mi.mi_fwordlen > 0) {
if (is_camel_case && mi.mi_fwordlen > 0) {
// introduce a fake word end space into the folded word.
mi.mi_fword[mi.mi_fwordlen - 1] = ' ';
}
@ -424,6 +423,66 @@ size_t spell_check(win_T *wp, char *ptr, hlf_T *attrp, int *capcol, bool docount
return (size_t)(mi.mi_end - ptr);
}
/// Determine the type of character "c".
static int get_char_type(int c)
{
if (ascii_isdigit(c)) {
return CHAR_DIGIT;
}
if (SPELL_ISUPPER(c)) {
return CHAR_UPPER;
}
return CHAR_OTHER;
}
/// Returns a pointer to the end of the word starting at "str".
/// Supports camelCase words.
static char *advance_camelcase_word(char *str, win_T *wp, bool *is_camel_case)
{
char *end = str;
*is_camel_case = false;
if (*str == NUL) {
return str;
}
int c = utf_ptr2char(end);
MB_PTR_ADV(end);
// We need at most the types of the type of the last two chars.
int last_last_type = -1;
int last_type = get_char_type(c);
while (*end != NUL && spell_iswordp(end, wp)) {
c = utf_ptr2char(end);
int this_type = get_char_type(c);
if (last_last_type == CHAR_UPPER && last_type == CHAR_UPPER
&& this_type == CHAR_OTHER) {
// Handle the following cases:
// UpperUpperLower
*is_camel_case = true;
// Back up by one char.
MB_PTR_BACK(str, end);
break;
} else if ((this_type == CHAR_UPPER && last_type == CHAR_OTHER)
|| (this_type != last_type
&& (this_type == CHAR_DIGIT || last_type == CHAR_DIGIT))) {
// Handle the following cases:
// LowerUpper LowerDigit UpperDigit DigitUpper DigitLower
*is_camel_case = true;
break;
}
last_last_type = last_type;
last_type = this_type;
MB_PTR_ADV(end);
}
return end;
}
// Check if the word at "mip->mi_word" is in the tree.
// When "mode" is FIND_FOLDWORD check in fold-case word tree.
// When "mode" is FIND_KEEPWORD check in keep-case word tree.

View File

@ -131,6 +131,26 @@ foobar/?
set spell&
endfunc
func Test_spell_camelcase()
set spell spelloptions=camel
let words = [
\ 'UPPER',
\ 'lower',
\ 'mixedCase',
\ 'HTML',
\ 'XMLHttpRequest',
\ 'foo123bar',
\ '12345678',
\ 'HELLO123world',
\]
for word in words
call assert_equal(['', ''], spellbadword(word))
endfor
set spell& spelloptions&
endfunc
func Test_spell_file_missing()
let s:spell_file_missing = 0
augroup TestSpellFileMissing