neovim/test/old/testdir/test_regexp_utf8.vim

" Tests for regexp in utf8 encoding

source shared.vim

func s:equivalence_test()
  let str = "AÀÁÂÃÄÅĀĂĄǍǞǠǺȂȦȺḀẠẢẤẦẨẪẬẮẰẲẴẶ BƁɃḂḄḆ CÇĆĈĊČƇȻḈꞒ DĎĐƊḊḌḎḐḒ EÈÉÊËĒĔĖĘĚȄȆȨɆḔḖḘḚḜẸẺẼẾỀỂỄỆ FƑḞꞘ GĜĞĠĢƓǤǦǴḠꞠ HĤĦȞḢḤḦḨḪⱧ IÌÍÎÏĨĪĬĮİƗǏȈȊḬḮỈỊ JĴɈ KĶƘǨḰḲḴⱩꝀ LĹĻĽĿŁȽḶḸḺḼⱠ MḾṀṂ NÑŃŅŇǸṄṆṈṊꞤ OÒÓÔÕÖØŌŎŐƟƠǑǪǬǾȌȎȪȬȮȰṌṎṐṒỌỎỐỒỔỖỘỚỜỞỠỢ PƤṔṖⱣ QɊ RŔŖŘȐȒɌṘṚṜṞⱤꞦ SŚŜŞŠȘṠṢṤṦṨⱾꞨ TŢŤŦƬƮȚȾṪṬṮṰ UÙÚÛÜŨŪŬŮŰƯǕǙǛǓǗȔȖɄṲṴṶṸṺỤỦỨỪỬỮỰ  VƲṼṾ WŴẀẂẄẆẈ XẊẌ YÝŶŸƳȲɎẎỲỴỶỸ ZŹŻŽƵẐẒẔⱫ aàáâãäåāăąǎǟǡǻȃȧᶏḁẚạảấầẩẫậắằẳẵặⱥ bƀɓᵬᶀḃḅḇ cçćĉċčƈȼḉꞓꞔ dďđɗᵭᶁᶑḋḍḏḑḓ eèéêëēĕėęěȅȇȩɇᶒḕḗḙḛḝẹẻẽếềểễệ fƒᵮᶂḟꞙ gĝğġģǥǧǵɠᶃḡꞡ hĥħȟḣḥḧḩḫẖⱨꞕ iìíîïĩīĭįǐȉȋɨᶖḭḯỉị jĵǰɉ kķƙǩᶄḱḳḵⱪꝁ lĺļľŀłƚḷḹḻḽⱡ mᵯḿṁṃ nñńņňŉǹᵰᶇṅṇṉṋꞥ oòóôõöøōŏőơǒǫǭǿȍȏȫȭȯȱɵṍṏṑṓọỏốồổỗộớờởỡợ pƥᵱᵽᶈṕṗ qɋʠ rŕŗřȑȓɍɽᵲᵳᶉṛṝṟꞧ sśŝşšșȿᵴᶊṡṣṥṧṩꞩ tţťŧƫƭțʈᵵṫṭṯṱẗⱦ uùúûüũūŭůűųǚǖưǔǘǜȕȗʉᵾᶙṳṵṷṹṻụủứừửữự vʋᶌṽṿ wŵẁẃẅẇẉẘ xẋẍ yýÿŷƴȳɏẏẙỳỵỷỹ zźżžƶᵶᶎẑẓẕⱬ"
  let groups = split(str)
  for group1 in groups
      for c in split(group1, '\zs')
	" next statement confirms that equivalence class matches every
	" character in group
        call assert_match('^[[=' .. c .. '=]]*$', group1)
        for group2 in groups
          if group2 != group1
	    " next statement converts that equivalence class doesn't match
	    " character in any other group
            call assert_equal(-1, match(group2, '[[=' .. c .. '=]]'), c)
          endif
        endfor
      endfor
  endfor
endfunc

func Test_equivalence_re1()
  set re=1
  call s:equivalence_test()
  set re=0
endfunc

func Test_equivalence_re2()
  set re=2
  call s:equivalence_test()
  set re=0
endfunc

func s:classes_test()
  if has('win32')
    set iskeyword=@,48-57,_,192-255
  endif
  set isprint=@,161-255
  call assert_equal('Motörhead', matchstr('Motörhead', '[[:print:]]\+'))

  let alnumchars = ''
  let alphachars = ''
  let backspacechar = ''
  let blankchars = ''
  let cntrlchars = ''
  let digitchars = ''
  let escapechar = ''
  let graphchars = ''
  let lowerchars = ''
  let printchars = ''
  let punctchars = ''
  let returnchar = ''
  let spacechars = ''
  let tabchar = ''
  let upperchars = ''
  let xdigitchars = ''
  let identchars = ''
  let identchars1 = ''
  let kwordchars = ''
  let kwordchars1 = ''
  let fnamechars = ''
  let fnamechars1 = ''
  let i = 1
  while i <= 255
    let c = nr2char(i)
    if c =~ '[[:alpha:]]'
      let alphachars .= c
    endif
    if c =~ '[[:alnum:]]'
      let alnumchars .= c
    endif
    if c =~ '[[:backspace:]]'
      let backspacechar .= c
    endif
    if c =~ '[[:blank:]]'
      let blankchars .= c
    endif
    if c =~ '[[:cntrl:]]'
      let cntrlchars .= c
    endif
    if c =~ '[[:digit:]]'
      let digitchars .= c
    endif
    if c =~ '[[:escape:]]'
      let escapechar .= c
    endif
    if c =~ '[[:graph:]]'
      let graphchars .= c
    endif
    if c =~ '[[:lower:]]'
      let lowerchars .= c
    endif
    if c =~ '[[:print:]]'
      let printchars .= c
    endif
    if c =~ '[[:punct:]]'
      let punctchars .= c
    endif
    if c =~ '[[:return:]]'
      let returnchar .= c
    endif
    if c =~ '[[:space:]]'
      let spacechars .= c
    endif
    if c =~ '[[:tab:]]'
      let tabchar .= c
    endif
    if c =~ '[[:upper:]]'
      let upperchars .= c
    endif
    if c =~ '[[:xdigit:]]'
      let xdigitchars .= c
    endif
    if c =~ '[[:ident:]]'
      let identchars .= c
    endif
    if c =~ '\i'
      let identchars1 .= c
    endif
    if c =~ '[[:keyword:]]'
      let kwordchars .= c
    endif
    if c =~ '\k'
      let kwordchars1 .= c
    endif
    if c =~ '[[:fname:]]'
      let fnamechars .= c
    endif
    if c =~ '\f'
      let fnamechars1 .= c
    endif
    let i += 1
  endwhile

  call assert_equal('ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz', alphachars)
  call assert_equal('0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz', alnumchars)
  call assert_equal("\b", backspacechar)
  call assert_equal("\t ", blankchars)
  call assert_equal("\x01\x02\x03\x04\x05\x06\x07\b\t\n\x0b\f\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\e\x1c\x1d\x1e\x1f\x7f", cntrlchars)
  call assert_equal("0123456789", digitchars)
  call assert_equal("\<Esc>", escapechar)
  call assert_equal('!"#$%&''()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~', graphchars)
  call assert_equal('abcdefghijklmnopqrstuvwxyzµßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ', lowerchars)
  call assert_equal(' !"#$%&''()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~ ¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ', printchars)
  call assert_equal('!"#$%&''()*+,-./:;<=>?@[\]^_`{|}~', punctchars)
  call assert_equal('ABCDEFGHIJKLMNOPQRSTUVWXYZÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞ', upperchars)
  call assert_equal("\r", returnchar)
  call assert_equal("\t\n\x0b\f\r ", spacechars)
  call assert_equal("\t", tabchar)
  call assert_equal('0123456789ABCDEFabcdef', xdigitchars)

  if has('win32')
    let identchars_ok = '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz ¡¢£¤¥¦§µÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ'
    let kwordchars_ok = '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyzµÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ'
  else
    let identchars_ok = '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyzµÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ'
    let kwordchars_ok = '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyzµÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ'
  endif

  if has('win32')
    let fnamechars_ok = '!#$%+,-./0123456789:=@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]_abcdefghijklmnopqrstuvwxyz{}~ ¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ'
  elseif has('amiga')
    let fnamechars_ok = '$+,-./0123456789:ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz~ ¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ'
  elseif has('vms')
    let fnamechars_ok = '#$%+,-./0123456789:;<>ABCDEFGHIJKLMNOPQRSTUVWXYZ[]_abcdefghijklmnopqrstuvwxyz~ ¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ'
  else
    let fnamechars_ok = '#$%+,-./0123456789=ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz~ ¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ'
  endif

  call assert_equal(identchars_ok, identchars)
  call assert_equal(kwordchars_ok, kwordchars)
  call assert_equal(fnamechars_ok, fnamechars)

  call assert_equal(identchars1, identchars)
  call assert_equal(kwordchars1, kwordchars)
  call assert_equal(fnamechars1, fnamechars)
endfunc

func Test_classes_re1()
  set re=1
  call s:classes_test()
  set re=0
endfunc

func Test_classes_re2()
  set re=2
  call s:classes_test()
  set re=0
endfunc

func Test_reversed_range()
  for re in range(0, 2)
    exe 'set re=' . re
    call assert_fails('call match("abc def", "[c-a]")', 'E944:', re)
  endfor
  set re=0
endfunc

func Test_large_class()
  set re=1
  call assert_fails('call match("abc def", "[\u3000-\u4000]")', 'E945:')
  set re=2
  call assert_equal(0, 'abc def' =~# '[\u3000-\u4000]')
  call assert_equal(1, "\u3042" =~# '[\u3000-\u4000]')
  set re=0
endfunc

func Test_optmatch_toolong()
  set re=1
  " Can only handle about 8000 characters.
  let pat = '\\%[' .. repeat('x', 9000) .. ']'
  call assert_fails('call match("abc def", "' .. pat .. '")', 'E339:')
  set re=0
endfunc

" Test for regexp patterns with multi-byte support, using utf-8.
func Test_multibyte_chars()
  " tl is a List of Lists with:
  "    2: test auto/old/new  0: test auto/old  1: test auto/new
  "    regexp pattern
  "    text to test the pattern on
  "    expected match (optional)
  "    expected submatch 1 (optional)
  "    expected submatch 2 (optional)
  "    etc.
  "  When there is no match use only the first two items.
  let tl = []

  " Multi-byte character tests.
  call add(tl, [2, '[[:alpha:][=a=]]\+', '879 aiaãâaiuvna ', 'aiaãâaiuvna'])
  call add(tl, [2, '[[=a=]]\+', 'ddaãâbcd', 'aãâ'])								" equivalence classes
  call add(tl, [2, '[^ม ]\+', 'มม oijasoifjos ifjoisj f osij j มมมมม abcd', 'oijasoifjos'])
  call add(tl, [2, ' [^ ]\+', 'start มabcdม ', ' มabcdม'])
  call add(tl, [2, '[ม[:alpha:][=a=]]\+', '879 aiaãมâมaiuvna ', 'aiaãมâมaiuvna'])

  " this is not a normal "i" but 0xec
  call add(tl, [2, '\p\+', 'ìa', 'ìa'])
  call add(tl, [2, '\p*', 'aあ', 'aあ'])

  " Test recognition of some character classes
  call add(tl, [2, '\i\+', '&*¨xx ', 'xx'])
  call add(tl, [2, '\f\+', '&*fname ', 'fname'])

  " Test composing character matching
  call add(tl, [2, '.ม', 'xม่x yมy', 'yม'])
  call add(tl, [2, '.ม่', 'xม่x yมy', 'xม่'])
  call add(tl, [2, "\u05b9", " x\u05b9 ", "x\u05b9"])
  call add(tl, [2, ".\u05b9", " x\u05b9 ", "x\u05b9"])
  call add(tl, [2, "\u05b9\u05bb", " x\u05b9\u05bb ", "x\u05b9\u05bb"])
  call add(tl, [2, ".\u05b9\u05bb", " x\u05b9\u05bb ", "x\u05b9\u05bb"])
  call add(tl, [2, "\u05bb\u05b9", " x\u05b9\u05bb ", "x\u05b9\u05bb"])
  call add(tl, [2, ".\u05bb\u05b9", " x\u05b9\u05bb ", "x\u05b9\u05bb"])
  call add(tl, [2, "\u05b9", " y\u05bb x\u05b9 ", "x\u05b9"])
  call add(tl, [2, ".\u05b9", " y\u05bb x\u05b9 ", "x\u05b9"])
  call add(tl, [2, "\u05b9", " y\u05bb\u05b9 x\u05b9 ", "y\u05bb\u05b9"])
  call add(tl, [2, ".\u05b9", " y\u05bb\u05b9 x\u05b9 ", "y\u05bb\u05b9"])
  call add(tl, [1, "\u05b9\u05bb", " y\u05b9 x\u05b9\u05bb ", "x\u05b9\u05bb"])
  call add(tl, [2, ".\u05b9\u05bb", " y\u05bb x\u05b9\u05bb ", "x\u05b9\u05bb"])
  call add(tl, [2, "a", "ca\u0300t"])
  call add(tl, [2, "ca", "ca\u0300t"])
  call add(tl, [2, "a\u0300", "ca\u0300t", "a\u0300"])
  call add(tl, [2, 'a\%C', "ca\u0300t", "a\u0300"])
  call add(tl, [2, 'ca\%C', "ca\u0300t", "ca\u0300"])
  call add(tl, [2, 'ca\%Ct', "ca\u0300t", "ca\u0300t"])

  " Test \Z
  call add(tl, [2, 'ú\Z', 'x'])
  call add(tl, [2, 'יהוה\Z', 'יהוה', 'יהוה'])
  call add(tl, [2, 'יְהוָה\Z', 'יהוה', 'יהוה'])
  call add(tl, [2, 'יהוה\Z', 'יְהוָה', 'יְהוָה'])
  call add(tl, [2, 'יְהוָה\Z', 'יְהוָה', 'יְהוָה'])
  call add(tl, [2, 'יְ\Z', 'וְיַ', 'יַ'])
  call add(tl, [2, "ק\u200d\u05b9x\\Z", "xק\u200d\u05b9xy", "ק\u200d\u05b9x"])
  call add(tl, [2, "ק\u200d\u05b9x\\Z", "xק\u200dxy", "ק\u200dx"])
  call add(tl, [2, "ק\u200dx\\Z", "xק\u200d\u05b9xy", "ק\u200d\u05b9x"])
  call add(tl, [2, "ק\u200dx\\Z", "xק\u200dxy", "ק\u200dx"])
  call add(tl, [2, "\u05b9\\Z", "xyz"])
  call add(tl, [2, "\\Z\u05b9", "xyz"])
  call add(tl, [2, "\u05b9\\Z", "xy\u05b9z", "y\u05b9"])
  call add(tl, [2, "\\Z\u05b9", "xy\u05b9z", "y\u05b9"])
  call add(tl, [1, "\u05b9\\+\\Z", "xy\u05b9z\u05b9 ", "y\u05b9z\u05b9"])
  call add(tl, [1, "\\Z\u05b9\\+", "xy\u05b9z\u05b9 ", "y\u05b9z\u05b9"])

  " Combining different tests and features
  call add(tl, [2, '[^[=a=]]\+', 'ddaãâbcd', 'dd'])

  " Run the tests
  for t in tl
    let re = t[0]
    let pat = t[1]
    let text = t[2]
    let matchidx = 3
    for engine in [0, 1, 2]
      if engine == 2 && re == 0 || engine == 1 && re == 1
        continue
      endif
      let &regexpengine = engine
      try
        let l = matchlist(text, pat)
      catch
        call assert_report('Error ' . engine . ': pat: \"' . pat .
		    \ '\", text: \"' . text .
		    \ '\", caused an exception: \"' . v:exception . '\"')
      endtry
      " check the match itself
      if len(l) == 0 && len(t) > matchidx
        call assert_report('Error ' . engine . ': pat: \"' . pat .
		    \ '\", text: \"' . text .
		    \ '\", did not match, expected: \"' . t[matchidx] . '\"')
      elseif len(l) > 0 && len(t) == matchidx
        call assert_report('Error ' . engine . ': pat: \"' . pat .
		    \ '\", text: \"' . text . '\", match: \"' . l[0] .
		    \ '\", expected no match')
      elseif len(t) > matchidx && l[0] != t[matchidx]
        call assert_report('Error ' . engine . ': pat: \"' . pat .
		    \ '\", text: \"' . text . '\", match: \"' . l[0] .
		    \ '\", expected: \"' . t[matchidx] . '\"')
      else
        " Test passed
      endif
      if len(l) > 0
        " check all the nine submatches
        for i in range(1, 9)
          if len(t) <= matchidx + i
            let e = ''
          else
            let e = t[matchidx + i]
          endif
          if l[i] != e
            call assert_report('Error ' . engine . ': pat: \"' . pat .
                  \ '\", text: \"' . text . '\", submatch ' . i .
                  \ ': \"' . l[i] . '\", expected: \"' . e . '\"')
          endif
        endfor
        unlet i
      endif
    endfor
  endfor
  set regexpengine&
endfunc

" check that 'ambiwidth' does not change the meaning of \p
func Test_regexp_ambiwidth()
  set regexpengine=1 ambiwidth=single
  call assert_equal(0, match("\u00EC", '\p'))
  set regexpengine=1 ambiwidth=double
  call assert_equal(0, match("\u00EC", '\p'))
  set regexpengine=2 ambiwidth=single
  call assert_equal(0, match("\u00EC", '\p'))
  set regexpengine=2 ambiwidth=double
  call assert_equal(0, match("\u00EC", '\p'))
  set regexpengine& ambiwidth&
endfunc

func Run_regexp_ignore_case()
  call assert_equal('iIİ', substitute('iIİ', '\([iIİ]\)', '\1', 'g'))

  call assert_equal('iIx', substitute('iIİ', '\c\([İ]\)', 'x', 'g'))
  call assert_equal('xxİ', substitute('iIİ', '\(i\c\)', 'x', 'g'))
  call assert_equal('iIx', substitute('iIİ', '\(İ\c\)', 'x', 'g'))
  call assert_equal('iIx', substitute('iIİ', '\c\(\%u0130\)', 'x', 'g'))
  call assert_equal('iIx', substitute('iIİ', '\c\([\u0130]\)', 'x', 'g'))
  call assert_equal('iIx', substitute('iIİ', '\c\([\u012f-\u0131]\)', 'x', 'g'))
endfunc

func Test_regexp_ignore_case()
  set regexpengine=1
  call Run_regexp_ignore_case()
  set regexpengine=2
  call Run_regexp_ignore_case()
  set regexpengine&
endfunc

" Tests for regexp with multi-byte encoding and various magic settings
func Run_regexp_multibyte_magic()
  let text =<< trim END
    1 a aa abb abbccc
    2 d dd dee deefff
    3 g gg ghh ghhiii
    4 j jj jkk jkklll
    5 m mm mnn mnnooo
    6 x ^aa$ x
    7 (a)(b) abbaa
    8 axx [ab]xx
    9 หม่x อมx
    a อมx หม่x
    b ちカヨは
    c x ¬€x
    d 天使x
    e <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>y
    f <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>z
    g a啷bb
    j 0123❤x
    k combinations
    l äö üᾱ̆́
  END

  new
  call setline(1, text)
  exe 'normal /a*b\{2}c\+/e' .. "\<CR>x"
  call assert_equal('1 a aa abb abbcc', getline('.'))
  exe 'normal /\Md\*e\{2}f\+/e' .. "\<CR>x"
  call assert_equal('2 d dd dee deeff', getline('.'))
  set nomagic
  exe 'normal /g\*h\{2}i\+/e' .. "\<CR>x"
  call assert_equal('3 g gg ghh ghhii', getline('.'))
  exe 'normal /\mj*k\{2}l\+/e' .. "\<CR>x"
  call assert_equal('4 j jj jkk jkkll', getline('.'))
  exe 'normal /\vm*n{2}o+/e' .. "\<CR>x"
  call assert_equal('5 m mm mnn mnnoo', getline('.'))
  exe 'normal /\V^aa$/' .. "\<CR>x"
  call assert_equal('6 x aa$ x', getline('.'))
  set magic
  exe 'normal /\v(a)(b)\2\1\1/e' .. "\<CR>x"
  call assert_equal('7 (a)(b) abba', getline('.'))
  exe 'normal /\V[ab]\(\[xy]\)\1' .. "\<CR>x"
  call assert_equal('8 axx ab]xx', getline('.'))

  " search for multi-byte without composing char
  exe 'normal /ม' .. "\<CR>x"
  call assert_equal('9 หม่x อx', getline('.'))

  " search for multi-byte with composing char
  exe 'normal /ม่' .. "\<CR>x"
  call assert_equal('a อมx หx', getline('.'))

  " find word by change of word class
  exe 'normal /ち\<カヨ\>は' .. "\<CR>x"
  call assert_equal('b カヨは', getline('.'))

  " Test \%u, [\u] and friends
  " c
  exe 'normal /\%u20ac' .. "\<CR>x"
  call assert_equal('c x ¬x', getline('.'))
  " d
  exe 'normal /[\u4f7f\u5929]\+' .. "\<CR>x"
  call assert_equal('d 使x', getline('.'))
  " e
  exe 'normal /\%U12345678' .. "\<CR>x"
  call assert_equal('e y', getline('.'))
  " f
  exe 'normal /[\U1234abcd\u1234\uabcd]' .. "\<CR>x"
  call assert_equal('f z', getline('.'))
  " g
  exe 'normal /\%d21879b' .. "\<CR>x"
  call assert_equal('g abb', getline('.'))

  " j Test backwards search from a multi-byte char
  exe "normal /x\<CR>x?.\<CR>x"
  call assert_equal('j 012❤', getline('.'))
  " k
  let @w=':%s#comb[i]nations#œ̄ṣ́m̥̄ᾱ̆́#g'
  @w
  call assert_equal('k œ̄ṣ́m̥̄ᾱ̆́', getline(18))

  close!
endfunc

func Test_regexp_multibyte_magic()
  set regexpengine=1
  call Run_regexp_multibyte_magic()
  set regexpengine=2
  call Run_regexp_multibyte_magic()
  set regexpengine&
endfunc

" Test for 7.3.192
" command ":s/ \?/ /g" splits multi-byte characters into bytes
func Test_split_multibyte_to_bytes()
  new
  call setline(1, 'l äö üᾱ̆́')
  s/ \?/ /g
  call assert_equal(' l ä ö ü ᾱ̆́', getline(1))
  close!
endfunc

" Test for matchstr() with multibyte characters
func Test_matchstr_multibyte()
  new
  call assert_equal('ב', matchstr("אבגד", ".", 0, 2))
  call assert_equal('בג', matchstr("אבגד", "..", 0, 2))
  call assert_equal('א', matchstr("אבגד", ".", 0, 0))
  call assert_equal('ג', matchstr("אבגד", ".", 4, -1))
  close!
endfunc

" Test for 7.4.636
" A search with end offset gets stuck at end of file.
func Test_search_with_end_offset()
  new
  call setline(1, ['', 'dog(a', 'cat('])
  exe "normal /(/e+\<CR>"
  normal n"ayn
  call assert_equal("a\ncat(", @a)
  close!
endfunc

" Check that "^" matches even when the line starts with a combining char
func Test_match_start_of_line_combining()
  new
  call setline(1, ['', "\u05ae", ''])
  exe "normal gg/^\<CR>"
  call assert_equal(2, getcurpos()[1])
  bwipe!
endfunc

" Check that [[:upper:]] matches for automatic engine
func Test_match_char_class_upper()
  new

  " Test 1: [[:upper:]]\{2,\}
  set regexpengine=0
  call setline(1, ['05. ПЕСНЯ О ГЕРОЯХ муз. А. Давиденко, М. Коваля и Б. Шехтера ...', '05. PJESNJA O GJEROJAKH mus. A. Davidjenko, M. Kovalja i B. Shjekhtjera ...'])
  call cursor(1,1)
  let search_cmd='norm /\<[[:upper:]]\{2,\}\>' .. "\<CR>"
  exe search_cmd
  call assert_equal(4, searchcount().total, 'TEST 1')
  set regexpengine=1
  exe search_cmd
  call assert_equal(2, searchcount().total, 'TEST 1')
  set regexpengine=2
  exe search_cmd
  call assert_equal(4, searchcount().total, 'TEST 1')

  " Test 2: [[:upper:]].\+
  let search_cmd='norm /\<[[:upper:]].\+\>' .. "\<CR>"
  set regexpengine=0
  exe search_cmd
  call assert_equal(2, searchcount().total, 'TEST 2')
  set regexpengine=1
  exe search_cmd
  call assert_equal(1, searchcount().total, 'TEST 2')
  set regexpengine=2
  exe search_cmd
  call assert_equal(2, searchcount().total, 'TEST 2')

  " Test 3: [[:lower:]]\+
  let search_cmd='norm /\<[[:lower:]]\+\>' .. "\<CR>"
  set regexpengine=0
  exe search_cmd
  call assert_equal(4, searchcount().total, 'TEST 3 lower')
  set regexpengine=1
  exe search_cmd
  call assert_equal(2, searchcount().total, 'TEST 3 lower')
  set regexpengine=2
  exe search_cmd
  call assert_equal(4, searchcount().total, 'TEST 3 lower')

  " clean up
  set regexpengine=0
  bwipe!
endfunc

func Test_match_invalid_byte()
  call writefile(0z630a.765d30aa0a.2e0a.790a.4030, 'Xinvalid')
  new
  source Xinvalid
  bwipe!
  call delete('Xinvalid')
endfunc

func Test_match_illegal_byte()
  let lines =<< trim END
      silent! buffer ÿ\c
      next ÿ
      0scriptnames
      source
  END
  call writefile(lines, 'Xregexp')
  call system(GetVimCommand() .. ' -X -Z -e -s -S Xregexp -c qa!')

  call delete('Xregexp')
endfunc

func Test_match_too_complicated()
  set regexpengine=1
  exe "noswapfile vsplit \xeb\xdb\x99"
  silent! buf \&\zs*\zs*0
  bwipe!
  set regexpengine=0
endfunc

func Test_combining_chars_in_collection()
  new
  for i in range(0,2)
    exe "set re=".i
    put =['ɔ̃', 'ɔ',  '̃  ã', 'abcd']
    :%s/[ɔ̃]//
    call assert_equal(['', '', 'ɔ', '̃  ã', 'abcd'], getline(1,'$'))
    %d
  endfor
  bw!
endfunc

func Test_search_multibyte_match_ascii()
  new
  " Match single 'ſ' and 's'
  call setline(1,  'das abc heraus abc ſich abc ſind')
  for i in range(0, 2)
    exe "set re="..i
    let ic_match = matchbufline('%', '\c\%u17f', 1, '$')->mapnew({idx, val -> val.text})
    let noic_match = matchbufline('%', '\C\%u17f', 1, '$')->mapnew({idx, val -> val.text})
    call assert_equal(['s', 's', 'ſ','ſ'], ic_match, "Ignorecase Regex-engine: " .. &re)
    call assert_equal(['ſ','ſ'], noic_match, "No-Ignorecase Regex-engine: " .. &re)
  endfor
  " Match several 'ſſ' and 'ss'
  call setline(1,  'das abc herauss abc ſſich abc ſind')
  for i in range(0, 2)
    exe "set re="..i
    let ic_match = matchbufline('%', '\c\%u17f\%u17f', 1, '$')->mapnew({idx, val -> val.text})
    let noic_match = matchbufline('%', '\C\%u17f\%u17f', 1, '$')->mapnew({idx, val -> val.text})
    let ic_match2 = matchbufline('%', '\c\%u17f\+', 1, '$')->mapnew({idx, val -> val.text})
    let noic_match2 = matchbufline('%', '\C\%u17f\+', 1, '$')->mapnew({idx, val -> val.text})
    let ic_match3 = matchbufline('%', '\c[\u17f]\+', 1, '$')->mapnew({idx, val -> val.text})
    let noic_match3 = matchbufline('%', '\C[\u17f]\+', 1, '$')->mapnew({idx, val -> val.text})

    call assert_equal(['ss', 'ſſ'], ic_match, "Ignorecase Regex-engine: " .. &re)
    call assert_equal(['ſſ'], noic_match, "No-Ignorecase Regex-engine: " .. &re)
    call assert_equal(['s', 'ss', 'ſſ', 'ſ'], ic_match2, "Ignorecase Regex-engine: " .. &re)
    call assert_equal(['ſſ','ſ'], noic_match2, "No-Ignorecase Regex-engine: " .. &re)
    call assert_equal(['s', 'ss', 'ſſ', 'ſ'], ic_match3, "Ignorecase Collection Regex-engine: " .. &re)
    call assert_equal(['ſſ','ſ'], noic_match3, "No-Ignorecase Collection Regex-engine: " .. &re)
  endfor
  bw!
endfunc

" vim: shiftwidth=2 sts=2 expandtab
-												vim-patch:7.4.1700

Problem:    Equivalence classes are not properly tested.
Solution:   Add tests for multi-byte and latin1. Fix an error. (Owen Leibman)

https://github.com/vim/vim/commit/22e421549d54147d003f6444de007cb1d73f1d27

src/regexp.c changes weren't applied because they're specific to EBCDIC
handling, which has been dropped from nvim.

The latin1-specific tests were also removed since neovim intends to
remove the ability to have 'encoding' set to anything other than utf8.

											
										
										
											2016-08-16 18:47:54 -07:00
+								" Tests for regexp in utf8 encoding
-												vim-patch:9.0.0105: illegal memory access when pattern starts with illegal byte

Problem:    Illegal memory access when pattern starts with illegal byte.
Solution:   Do not match a character with an illegal byte.

https://github.com/vim/vim/commit/f50940531dd57135fe60aa393ac9d3281f352d88

Co-authored-by: Bram Moolenaar <Bram@vim.org>

											
										
										
											2024-07-30 15:37:58 -07:00
+								source shared.vim
-												vim-patch:7.4.1700

Problem:    Equivalence classes are not properly tested.
Solution:   Add tests for multi-byte and latin1. Fix an error. (Owen Leibman)

https://github.com/vim/vim/commit/22e421549d54147d003f6444de007cb1d73f1d27

src/regexp.c changes weren't applied because they're specific to EBCDIC
handling, which has been dropped from nvim.

The latin1-specific tests were also removed since neovim intends to
remove the ability to have 'encoding' set to anything other than utf8.

											
										
										
											2016-08-16 18:47:54 -07:00
+								func s:equivalence_test()
-												vim-patch:8.2.2716: the equivalent class regexp is missing some characters

Problem:    The equivalent class regexp is missing some characters.
Solution:   Update the list of equivalent characters. (Dominique Pellé,
            closes vim/vim#8029)
https://github.com/vim/vim/commit/0b94e297afd072c51bf2eed12c7ffe3978d93399

Match upstream's indent in s:equivalence_class().

											
										
										
											2022-03-19 00:50:25 -07:00
+								  let str = "AÀÁÂÃÄÅĀĂĄǍǞǠǺȂȦȺḀẠẢẤẦẨẪẬẮẰẲẴẶ BƁɃḂḄḆ CÇĆĈĊČƇȻḈꞒ DĎĐƊḊḌḎḐḒ EÈÉÊËĒĔĖĘĚȄȆȨɆḔḖḘḚḜẸẺẼẾỀỂỄỆ FƑḞꞘ GĜĞĠĢƓǤǦǴḠꞠ HĤĦȞḢḤḦḨḪⱧ IÌÍÎÏĨĪĬĮİƗǏȈȊḬḮỈỊ JĴɈ KĶƘǨḰḲḴⱩꝀ LĹĻĽĿŁȽḶḸḺḼⱠ MḾṀṂ NÑŃŅŇǸṄṆṈṊꞤ OÒÓÔÕÖØŌŎŐƟƠǑǪǬǾȌȎȪȬȮȰṌṎṐṒỌỎỐỒỔỖỘỚỜỞỠỢ PƤṔṖⱣ QɊ RŔŖŘȐȒɌṘṚṜṞⱤꞦ SŚŜŞŠȘṠṢṤṦṨⱾꞨ TŢŤŦƬƮȚȾṪṬṮṰ UÙÚÛÜŨŪŬŮŰƯǕǙǛǓǗȔȖɄṲṴṶṸṺỤỦỨỪỬỮỰ  VƲṼṾ WŴẀẂẄẆẈ XẊẌ YÝŶŸƳȲɎẎỲỴỶỸ ZŹŻŽƵẐẒẔⱫ aàáâãäåāăąǎǟǡǻȃȧᶏḁẚạảấầẩẫậắằẳẵặⱥ bƀɓᵬᶀḃḅḇ cçćĉċčƈȼḉꞓꞔ dďđɗᵭᶁᶑḋḍḏḑḓ eèéêëēĕėęěȅȇȩɇᶒḕḗḙḛḝẹẻẽếềểễệ fƒᵮᶂḟꞙ gĝğġģǥǧǵɠᶃḡꞡ hĥħȟḣḥḧḩḫẖⱨꞕ iìíîïĩīĭįǐȉȋɨᶖḭḯỉị jĵǰɉ kķƙǩᶄḱḳḵⱪꝁ lĺļľŀłƚḷḹḻḽⱡ mᵯḿṁṃ nñńņňŉǹᵰᶇṅṇṉṋꞥ oòóôõöøōŏőơǒǫǭǿȍȏȫȭȯȱɵṍṏṑṓọỏốồổỗộớờởỡợ pƥᵱᵽᶈṕṗ qɋʠ rŕŗřȑȓɍɽᵲᵳᶉṛṝṟꞧ sśŝşšșȿᵴᶊṡṣṥṧṩꞩ tţťŧƫƭțʈᵵṫṭṯṱẗⱦ uùúûüũūŭůűųǚǖưǔǘǜȕȗʉᵾᶙṳṵṷṹṻụủứừửữự vʋᶌṽṿ wŵẁẃẅẇẉẘ xẋẍ yýÿŷƴȳɏẏẙỳỵỷỹ zźżžƶᵶᶎẑẓẕⱬ"
-												vim-patch:7.4.1700

Problem:    Equivalence classes are not properly tested.
Solution:   Add tests for multi-byte and latin1. Fix an error. (Owen Leibman)

https://github.com/vim/vim/commit/22e421549d54147d003f6444de007cb1d73f1d27

src/regexp.c changes weren't applied because they're specific to EBCDIC
handling, which has been dropped from nvim.

The latin1-specific tests were also removed since neovim intends to
remove the ability to have 'encoding' set to anything other than utf8.

											
										
										
											2016-08-16 18:47:54 -07:00
+								  let groups = split(str)
 								  for group1 in groups
-												vim-patch:8.2.2716: the equivalent class regexp is missing some characters

Problem:    The equivalent class regexp is missing some characters.
Solution:   Update the list of equivalent characters. (Dominique Pellé,
            closes vim/vim#8029)
https://github.com/vim/vim/commit/0b94e297afd072c51bf2eed12c7ffe3978d93399

Match upstream's indent in s:equivalence_class().

											
										
										
											2022-03-19 00:50:25 -07:00
+								      for c in split(group1, '\zs')
 									" next statement confirms that equivalence class matches every
 									" character in group
 								        call assert_match('^[[=' .. c .. '=]]*$', group1)
 								        for group2 in groups
 								          if group2 != group1
 									    " next statement converts that equivalence class doesn't match
 									    " character in any other group
 								            call assert_equal(-1, match(group2, '[[=' .. c .. '=]]'), c)
 								          endif
 								        endfor
-												vim-patch:7.4.1700

Problem:    Equivalence classes are not properly tested.
Solution:   Add tests for multi-byte and latin1. Fix an error. (Owen Leibman)

https://github.com/vim/vim/commit/22e421549d54147d003f6444de007cb1d73f1d27

src/regexp.c changes weren't applied because they're specific to EBCDIC
handling, which has been dropped from nvim.

The latin1-specific tests were also removed since neovim intends to
remove the ability to have 'encoding' set to anything other than utf8.

											
										
										
											2016-08-16 18:47:54 -07:00
+								      endfor
 								  endfor
 								endfunc
 								func Test_equivalence_re1()
 								  set re=1
 								  call s:equivalence_test()
 								  set re=0
 								endfunc
 								func Test_equivalence_re2()
 								  set re=2
 								  call s:equivalence_test()
 								  set re=0
 								endfunc
-												vim-patch:7.4.2219

Problem:    Recursive call to substitute gets stuck in sandbox. (Nikolai
            Pavlov)
Solution:   Handle the recursive call. (Christian Brabandt, closes vim/vim#950)
            Add a test.

https://github.com/vim/vim/commit/f5a39447a8ebe162ee62caa2ee502cd0e65eecaa

Closes #5118

											
										
										
											2016-08-16 13:07:15 -07:00
-												vim-patch:7.4.2222

Problem:    Sourcing a script where a character has 0x80 as a second byte does
            not work. (Filipe L B Correia)
Solution:   Turn 0x80 into K_SPECIAL KS_SPECIAL KE_FILLER. (Christian
            Brabandt, closes vim/vim#728)  Add a test case.

https://github.com/vim/vim/commit/6bff02eb530aa29aafa2cb5627399837be7a5dd5

											
										
										
											2017-03-10 19:22:42 -07:00
+								func s:classes_test()
-												vim-patch:8.2.1254: MS-Windows: regexp test may fail if 'iskeyword' set wrongly

Problem:    MS-Windows: regexp test may fail if 'iskeyword' set wrongly.
Solution:   Override the 'iskeyword' value. (Taro Muraoka, closes vim/vim#6502)
https://github.com/vim/vim/commit/470adb827f3d9e6cf62f685738d2db216daf3738

											
										
										
											2020-07-20 17:22:00 -07:00
+								  if has('win32')
 								    set iskeyword=@,48-57,_,192-255
 								  endif
-												vim-patch:7.4.2222

Problem:    Sourcing a script where a character has 0x80 as a second byte does
            not work. (Filipe L B Correia)
Solution:   Turn 0x80 into K_SPECIAL KS_SPECIAL KE_FILLER. (Christian
            Brabandt, closes vim/vim#728)  Add a test case.

https://github.com/vim/vim/commit/6bff02eb530aa29aafa2cb5627399837be7a5dd5

											
										
										
											2017-03-10 19:22:42 -07:00
+								  set isprint=@,161-255
 								  call assert_equal('Motörhead', matchstr('Motörhead', '[[:print:]]\+'))
-												vim-patch:8.0.0519: character classes not well tested (#8460)

Problem:    Character classes are not well tested. They can differ between
            platforms.
Solution:   Add tests.  In the documentation make clear which classes depend
            on what library function.  Only use :cntrl: and :graph: for ASCII.
            (Kazunobu Kuriyama, Dominique Pelle, closes vim/vim#1560)
            Update the documentation.
https://github.com/vim/vim/commit/0c078fc7db2902d4ccba04506db082ddbef45a8c
											
										
										
											2018-06-01 10:57:22 -07:00
+								  let alnumchars = ''
-												vim-patch:7.4.2222

Problem:    Sourcing a script where a character has 0x80 as a second byte does
            not work. (Filipe L B Correia)
Solution:   Turn 0x80 into K_SPECIAL KS_SPECIAL KE_FILLER. (Christian
            Brabandt, closes vim/vim#728)  Add a test case.

https://github.com/vim/vim/commit/6bff02eb530aa29aafa2cb5627399837be7a5dd5

											
										
										
											2017-03-10 19:22:42 -07:00
+								  let alphachars = ''
-												vim-patch:8.0.0519: character classes not well tested (#8460)

Problem:    Character classes are not well tested. They can differ between
            platforms.
Solution:   Add tests.  In the documentation make clear which classes depend
            on what library function.  Only use :cntrl: and :graph: for ASCII.
            (Kazunobu Kuriyama, Dominique Pelle, closes vim/vim#1560)
            Update the documentation.
https://github.com/vim/vim/commit/0c078fc7db2902d4ccba04506db082ddbef45a8c
											
										
										
											2018-06-01 10:57:22 -07:00
+								  let backspacechar = ''
 								  let blankchars = ''
 								  let cntrlchars = ''
 								  let digitchars = ''
 								  let escapechar = ''
 								  let graphchars = ''
-												vim-patch:7.4.2222

Problem:    Sourcing a script where a character has 0x80 as a second byte does
            not work. (Filipe L B Correia)
Solution:   Turn 0x80 into K_SPECIAL KS_SPECIAL KE_FILLER. (Christian
            Brabandt, closes vim/vim#728)  Add a test case.

https://github.com/vim/vim/commit/6bff02eb530aa29aafa2cb5627399837be7a5dd5

											
										
										
											2017-03-10 19:22:42 -07:00
+								  let lowerchars = ''
 								  let printchars = ''
 								  let punctchars = ''
-												vim-patch:8.0.0519: character classes not well tested (#8460)

Problem:    Character classes are not well tested. They can differ between
            platforms.
Solution:   Add tests.  In the documentation make clear which classes depend
            on what library function.  Only use :cntrl: and :graph: for ASCII.
            (Kazunobu Kuriyama, Dominique Pelle, closes vim/vim#1560)
            Update the documentation.
https://github.com/vim/vim/commit/0c078fc7db2902d4ccba04506db082ddbef45a8c
											
										
										
											2018-06-01 10:57:22 -07:00
+								  let returnchar = ''
 								  let spacechars = ''
 								  let tabchar = ''
 								  let upperchars = ''
-												vim-patch:7.4.2222

Problem:    Sourcing a script where a character has 0x80 as a second byte does
            not work. (Filipe L B Correia)
Solution:   Turn 0x80 into K_SPECIAL KS_SPECIAL KE_FILLER. (Christian
            Brabandt, closes vim/vim#728)  Add a test case.

https://github.com/vim/vim/commit/6bff02eb530aa29aafa2cb5627399837be7a5dd5

											
										
										
											2017-03-10 19:22:42 -07:00
+								  let xdigitchars = ''
-												vim-patch:8.1.0862: no verbose version of character classes

Problem:    No verbose version of character classes.
Solution:   Add [:ident:], [:keyword:] and [:fname:]. (Ozaki Kiichi,
            closes vim/vim#1373)
https://github.com/vim/vim/commit/221cd9f4dd866503777b2fffa721c1403716ad63

											
										
										
											2020-07-24 16:53:07 -07:00
+								  let identchars = ''
 								  let identchars1 = ''
 								  let kwordchars = ''
 								  let kwordchars1 = ''
 								  let fnamechars = ''
 								  let fnamechars1 = ''
-												vim-patch:7.4.2222

Problem:    Sourcing a script where a character has 0x80 as a second byte does
            not work. (Filipe L B Correia)
Solution:   Turn 0x80 into K_SPECIAL KS_SPECIAL KE_FILLER. (Christian
            Brabandt, closes vim/vim#728)  Add a test case.

https://github.com/vim/vim/commit/6bff02eb530aa29aafa2cb5627399837be7a5dd5

											
										
										
											2017-03-10 19:22:42 -07:00
+								  let i = 1
 								  while i <= 255
 								    let c = nr2char(i)
 								    if c =~ '[[:alpha:]]'
 								      let alphachars .= c
 								    endif
 								    if c =~ '[[:alnum:]]'
 								      let alnumchars .= c
 								    endif
-												vim-patch:8.0.0519: character classes not well tested (#8460)

Problem:    Character classes are not well tested. They can differ between
            platforms.
Solution:   Add tests.  In the documentation make clear which classes depend
            on what library function.  Only use :cntrl: and :graph: for ASCII.
            (Kazunobu Kuriyama, Dominique Pelle, closes vim/vim#1560)
            Update the documentation.
https://github.com/vim/vim/commit/0c078fc7db2902d4ccba04506db082ddbef45a8c
											
										
										
											2018-06-01 10:57:22 -07:00
+								    if c =~ '[[:backspace:]]'
 								      let backspacechar .= c
 								    endif
 								    if c =~ '[[:blank:]]'
 								      let blankchars .= c
 								    endif
 								    if c =~ '[[:cntrl:]]'
 								      let cntrlchars .= c
 								    endif
 								    if c =~ '[[:digit:]]'
 								      let digitchars .= c
 								    endif
 								    if c =~ '[[:escape:]]'
 								      let escapechar .= c
 								    endif
 								    if c =~ '[[:graph:]]'
 								      let graphchars .= c
 								    endif
 								    if c =~ '[[:lower:]]'
 								      let lowerchars .= c
 								    endif
-												vim-patch:7.4.2222

Problem:    Sourcing a script where a character has 0x80 as a second byte does
            not work. (Filipe L B Correia)
Solution:   Turn 0x80 into K_SPECIAL KS_SPECIAL KE_FILLER. (Christian
            Brabandt, closes vim/vim#728)  Add a test case.

https://github.com/vim/vim/commit/6bff02eb530aa29aafa2cb5627399837be7a5dd5

											
										
										
											2017-03-10 19:22:42 -07:00
+								    if c =~ '[[:print:]]'
 								      let printchars .= c
 								    endif
 								    if c =~ '[[:punct:]]'
 								      let punctchars .= c
 								    endif
-												vim-patch:8.0.0519: character classes not well tested (#8460)

Problem:    Character classes are not well tested. They can differ between
            platforms.
Solution:   Add tests.  In the documentation make clear which classes depend
            on what library function.  Only use :cntrl: and :graph: for ASCII.
            (Kazunobu Kuriyama, Dominique Pelle, closes vim/vim#1560)
            Update the documentation.
https://github.com/vim/vim/commit/0c078fc7db2902d4ccba04506db082ddbef45a8c
											
										
										
											2018-06-01 10:57:22 -07:00
+								    if c =~ '[[:return:]]'
 								      let returnchar .= c
 								    endif
 								    if c =~ '[[:space:]]'
 								      let spacechars .= c
 								    endif
 								    if c =~ '[[:tab:]]'
 								      let tabchar .= c
 								    endif
 								    if c =~ '[[:upper:]]'
 								      let upperchars .= c
 								    endif
-												vim-patch:7.4.2222

Problem:    Sourcing a script where a character has 0x80 as a second byte does
            not work. (Filipe L B Correia)
Solution:   Turn 0x80 into K_SPECIAL KS_SPECIAL KE_FILLER. (Christian
            Brabandt, closes vim/vim#728)  Add a test case.

https://github.com/vim/vim/commit/6bff02eb530aa29aafa2cb5627399837be7a5dd5

											
										
										
											2017-03-10 19:22:42 -07:00
+								    if c =~ '[[:xdigit:]]'
 								      let xdigitchars .= c
 								    endif
-												vim-patch:8.1.0862: no verbose version of character classes

Problem:    No verbose version of character classes.
Solution:   Add [:ident:], [:keyword:] and [:fname:]. (Ozaki Kiichi,
            closes vim/vim#1373)
https://github.com/vim/vim/commit/221cd9f4dd866503777b2fffa721c1403716ad63

											
										
										
											2020-07-24 16:53:07 -07:00
+								    if c =~ '[[:ident:]]'
 								      let identchars .= c
 								    endif
 								    if c =~ '\i'
 								      let identchars1 .= c
 								    endif
 								    if c =~ '[[:keyword:]]'
 								      let kwordchars .= c
 								    endif
 								    if c =~ '\k'
 								      let kwordchars1 .= c
 								    endif
 								    if c =~ '[[:fname:]]'
 								      let fnamechars .= c
 								    endif
 								    if c =~ '\f'
 								      let fnamechars1 .= c
 								    endif
-												vim-patch:7.4.2222

Problem:    Sourcing a script where a character has 0x80 as a second byte does
            not work. (Filipe L B Correia)
Solution:   Turn 0x80 into K_SPECIAL KS_SPECIAL KE_FILLER. (Christian
            Brabandt, closes vim/vim#728)  Add a test case.

https://github.com/vim/vim/commit/6bff02eb530aa29aafa2cb5627399837be7a5dd5

											
										
										
											2017-03-10 19:22:42 -07:00
+								    let i += 1
 								  endwhile
 								  call assert_equal('ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz', alphachars)
 								  call assert_equal('0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz', alnumchars)
-												vim-patch:8.0.0519: character classes not well tested (#8460)

Problem:    Character classes are not well tested. They can differ between
            platforms.
Solution:   Add tests.  In the documentation make clear which classes depend
            on what library function.  Only use :cntrl: and :graph: for ASCII.
            (Kazunobu Kuriyama, Dominique Pelle, closes vim/vim#1560)
            Update the documentation.
https://github.com/vim/vim/commit/0c078fc7db2902d4ccba04506db082ddbef45a8c
											
										
										
											2018-06-01 10:57:22 -07:00
+								  call assert_equal("\b", backspacechar)
 								  call assert_equal("\t ", blankchars)
-												vim-patch:8.0.0529: line in test commented out

Problem:    Line in test commented out.
Solution:   Uncomment the lines for character classes that were failing before
            8.0.0519. (Dominique Pelle, closes vim/vim#1599)
https://github.com/vim/vim/commit/13489b9c41922b9ecb16fa47564ec76641515c08

											
										
										
											2018-06-04 05:36:50 -07:00
+								  call assert_equal("\x01\x02\x03\x04\x05\x06\x07\b\t\n\x0b\f\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\e\x1c\x1d\x1e\x1f\x7f", cntrlchars)
-												vim-patch:8.0.0519: character classes not well tested (#8460)

Problem:    Character classes are not well tested. They can differ between
            platforms.
Solution:   Add tests.  In the documentation make clear which classes depend
            on what library function.  Only use :cntrl: and :graph: for ASCII.
            (Kazunobu Kuriyama, Dominique Pelle, closes vim/vim#1560)
            Update the documentation.
https://github.com/vim/vim/commit/0c078fc7db2902d4ccba04506db082ddbef45a8c
											
										
										
											2018-06-01 10:57:22 -07:00
+								  call assert_equal("0123456789", digitchars)
 								  call assert_equal("\<Esc>", escapechar)
-												vim-patch:8.0.0529: line in test commented out

Problem:    Line in test commented out.
Solution:   Uncomment the lines for character classes that were failing before
            8.0.0519. (Dominique Pelle, closes vim/vim#1599)
https://github.com/vim/vim/commit/13489b9c41922b9ecb16fa47564ec76641515c08

											
										
										
											2018-06-04 05:36:50 -07:00
+								  call assert_equal('!"#$%&''()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~', graphchars)
-												vim-patch:8.0.0519: character classes not well tested (#8460)

Problem:    Character classes are not well tested. They can differ between
            platforms.
Solution:   Add tests.  In the documentation make clear which classes depend
            on what library function.  Only use :cntrl: and :graph: for ASCII.
            (Kazunobu Kuriyama, Dominique Pelle, closes vim/vim#1560)
            Update the documentation.
https://github.com/vim/vim/commit/0c078fc7db2902d4ccba04506db082ddbef45a8c
											
										
										
											2018-06-01 10:57:22 -07:00
+								  call assert_equal('abcdefghijklmnopqrstuvwxyzµßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ', lowerchars)
-												vim-patch:7.4.2222

Problem:    Sourcing a script where a character has 0x80 as a second byte does
            not work. (Filipe L B Correia)
Solution:   Turn 0x80 into K_SPECIAL KS_SPECIAL KE_FILLER. (Christian
            Brabandt, closes vim/vim#728)  Add a test case.

https://github.com/vim/vim/commit/6bff02eb530aa29aafa2cb5627399837be7a5dd5

											
										
										
											2017-03-10 19:22:42 -07:00
+								  call assert_equal(' !"#$%&''()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~ ¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ', printchars)
 								  call assert_equal('!"#$%&''()*+,-./:;<=>?@[\]^_`{|}~', punctchars)
-												vim-patch:8.0.0519: character classes not well tested (#8460)

Problem:    Character classes are not well tested. They can differ between
            platforms.
Solution:   Add tests.  In the documentation make clear which classes depend
            on what library function.  Only use :cntrl: and :graph: for ASCII.
            (Kazunobu Kuriyama, Dominique Pelle, closes vim/vim#1560)
            Update the documentation.
https://github.com/vim/vim/commit/0c078fc7db2902d4ccba04506db082ddbef45a8c
											
										
										
											2018-06-01 10:57:22 -07:00
+								  call assert_equal('ABCDEFGHIJKLMNOPQRSTUVWXYZÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞ', upperchars)
 								  call assert_equal("\r", returnchar)
 								  call assert_equal("\t\n\x0b\f\r ", spacechars)
 								  call assert_equal("\t", tabchar)
-												vim-patch:7.4.2222

Problem:    Sourcing a script where a character has 0x80 as a second byte does
            not work. (Filipe L B Correia)
Solution:   Turn 0x80 into K_SPECIAL KS_SPECIAL KE_FILLER. (Christian
            Brabandt, closes vim/vim#728)  Add a test case.

https://github.com/vim/vim/commit/6bff02eb530aa29aafa2cb5627399837be7a5dd5

											
										
										
											2017-03-10 19:22:42 -07:00
+								  call assert_equal('0123456789ABCDEFabcdef', xdigitchars)
-												vim-patch:8.1.0862: no verbose version of character classes

Problem:    No verbose version of character classes.
Solution:   Add [:ident:], [:keyword:] and [:fname:]. (Ozaki Kiichi,
            closes vim/vim#1373)
https://github.com/vim/vim/commit/221cd9f4dd866503777b2fffa721c1403716ad63

											
										
										
											2020-07-24 16:53:07 -07:00
 								  if has('win32')
 								    let identchars_ok = '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz ¡¢£¤¥¦§µÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ'
 								    let kwordchars_ok = '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyzµÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ'
 								  else
 								    let identchars_ok = '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyzµÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ'
 								    let kwordchars_ok = '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyzµÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ'
 								  endif
 								  if has('win32')
 								    let fnamechars_ok = '!#$%+,-./0123456789:=@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]_abcdefghijklmnopqrstuvwxyz{}~ ¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ'
 								  elseif has('amiga')
 								    let fnamechars_ok = '$+,-./0123456789:ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz~ ¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ'
 								  elseif has('vms')
 								    let fnamechars_ok = '#$%+,-./0123456789:;<>ABCDEFGHIJKLMNOPQRSTUVWXYZ[]_abcdefghijklmnopqrstuvwxyz~ ¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ'
 								  else
 								    let fnamechars_ok = '#$%+,-./0123456789=ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz~ ¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ'
 								  endif
 								  call assert_equal(identchars_ok, identchars)
 								  call assert_equal(kwordchars_ok, kwordchars)
 								  call assert_equal(fnamechars_ok, fnamechars)
 								  call assert_equal(identchars1, identchars)
 								  call assert_equal(kwordchars1, kwordchars)
 								  call assert_equal(fnamechars1, fnamechars)
-												vim-patch:7.4.2222

Problem:    Sourcing a script where a character has 0x80 as a second byte does
            not work. (Filipe L B Correia)
Solution:   Turn 0x80 into K_SPECIAL KS_SPECIAL KE_FILLER. (Christian
            Brabandt, closes vim/vim#728)  Add a test case.

https://github.com/vim/vim/commit/6bff02eb530aa29aafa2cb5627399837be7a5dd5

											
										
										
											2017-03-10 19:22:42 -07:00
+								endfunc
 								func Test_classes_re1()
 								  set re=1
 								  call s:classes_test()
 								  set re=0
 								endfunc
 								func Test_classes_re2()
 								  set re=2
 								  call s:classes_test()
 								  set re=0
 								endfunc
-												vim-patch:8.0.0623: error for invalid regexp is not very informative

Problem:    The message "Invalid range" is used for multiple errors.
Solution:   Add two more specific error messages. (Itchyny, Ken Hamada)
https://github.com/vim/vim/commit/966e58e413ffa88af8d748e697aa2999571fcd7b

											
										
										
											2018-06-04 05:40:54 -07:00
+								func Test_reversed_range()
 								  for re in range(0, 2)
 								    exe 'set re=' . re
-												vim-patch:8.2.2716: the equivalent class regexp is missing some characters

Problem:    The equivalent class regexp is missing some characters.
Solution:   Update the list of equivalent characters. (Dominique Pellé,
            closes vim/vim#8029)
https://github.com/vim/vim/commit/0b94e297afd072c51bf2eed12c7ffe3978d93399

Match upstream's indent in s:equivalence_class().

											
										
										
											2022-03-19 00:50:25 -07:00
+								    call assert_fails('call match("abc def", "[c-a]")', 'E944:', re)
-												vim-patch:8.0.0623: error for invalid regexp is not very informative

Problem:    The message "Invalid range" is used for multiple errors.
Solution:   Add two more specific error messages. (Itchyny, Ken Hamada)
https://github.com/vim/vim/commit/966e58e413ffa88af8d748e697aa2999571fcd7b

											
										
										
											2018-06-04 05:40:54 -07:00
+								  endfor
 								  set re=0
 								endfunc
 								func Test_large_class()
 								  set re=1
 								  call assert_fails('call match("abc def", "[\u3000-\u4000]")', 'E945:')
 								  set re=2
 								  call assert_equal(0, 'abc def' =~# '[\u3000-\u4000]')
 								  call assert_equal(1, "\u3042" =~# '[\u3000-\u4000]')
 								  set re=0
 								endfunc
-												vim-patch:8.1.1720: crash with very long %[] pattern

Problem:    Crash with very long %[] pattern. (Reza Mirzazade farkhani)
Solution:   Check for reg_toolong. (closes vim/vim#4703)
https://github.com/vim/vim/commit/2a5b52758bb327b89d22660cc28c157ec29782e5

											
										
										
											2019-07-20 14:22:17 -07:00
 								func Test_optmatch_toolong()
 								  set re=1
 								  " Can only handle about 8000 characters.
 								  let pat = '\\%[' .. repeat('x', 9000) .. ']'
 								  call assert_fails('call match("abc def", "' .. pat .. '")', 'E339:')
 								  set re=0
 								endfunc
-												vim-patch:8.2.0014: test69 and test95 are old style

Problem:    Test69 and test95 are old style.
Solution:   Convert to new style tests. (Yegappan Lakshmanan, closes vim/vim#5365)
https://github.com/vim/vim/commit/afc13bd8271819c7871ff2ae2cfebb22190a0d39

											
										
										
											2020-01-30 19:28:51 -07:00
+								" Test for regexp patterns with multi-byte support, using utf-8.
 								func Test_multibyte_chars()
 								  " tl is a List of Lists with:
 								  "    2: test auto/old/new  0: test auto/old  1: test auto/new
 								  "    regexp pattern
 								  "    text to test the pattern on
 								  "    expected match (optional)
 								  "    expected submatch 1 (optional)
 								  "    expected submatch 2 (optional)
 								  "    etc.
 								  "  When there is no match use only the first two items.
 								  let tl = []
-												vim-patch:9.0.0700: there is no real need for a "big" build (#20563)

Problem:    There is no real need for a "big" build.
Solution:   Move common features to "normal" build, less often used features
            to the "huge" build. (Martin Tournoij, closes vim/vim#11283)
https://github.com/vim/vim/commit/25f3a146a0e4c731b8608f4cfbbfdf7a71b2d05e
											
										
										
											2022-10-09 20:34:53 -07:00
+								  " Multi-byte character tests.
-												vim-patch:8.2.0014: test69 and test95 are old style

Problem:    Test69 and test95 are old style.
Solution:   Convert to new style tests. (Yegappan Lakshmanan, closes vim/vim#5365)
https://github.com/vim/vim/commit/afc13bd8271819c7871ff2ae2cfebb22190a0d39

											
										
										
											2020-01-30 19:28:51 -07:00
+								  call add(tl, [2, '[[:alpha:][=a=]]\+', '879 aiaãâaiuvna ', 'aiaãâaiuvna'])
 								  call add(tl, [2, '[[=a=]]\+', 'ddaãâbcd', 'aãâ'])								" equivalence classes
 								  call add(tl, [2, '[^ม ]\+', 'มม oijasoifjos ifjoisj f osij j มมมมม abcd', 'oijasoifjos'])
 								  call add(tl, [2, ' [^ ]\+', 'start มabcdม ', ' มabcdม'])
 								  call add(tl, [2, '[ม[:alpha:][=a=]]\+', '879 aiaãมâมaiuvna ', 'aiaãมâมaiuvna'])
 								  " this is not a normal "i" but 0xec
 								  call add(tl, [2, '\p\+', 'ìa', 'ìa'])
 								  call add(tl, [2, '\p*', 'aあ', 'aあ'])
 								  " Test recognition of some character classes
 								  call add(tl, [2, '\i\+', '&*¨xx ', 'xx'])
 								  call add(tl, [2, '\f\+', '&*fname ', 'fname'])
 								  " Test composing character matching
 								  call add(tl, [2, '.ม', 'xม่x yมy', 'yม'])
 								  call add(tl, [2, '.ม่', 'xม่x yมy', 'xม่'])
 								  call add(tl, [2, "\u05b9", " x\u05b9 ", "x\u05b9"])
 								  call add(tl, [2, ".\u05b9", " x\u05b9 ", "x\u05b9"])
 								  call add(tl, [2, "\u05b9\u05bb", " x\u05b9\u05bb ", "x\u05b9\u05bb"])
 								  call add(tl, [2, ".\u05b9\u05bb", " x\u05b9\u05bb ", "x\u05b9\u05bb"])
 								  call add(tl, [2, "\u05bb\u05b9", " x\u05b9\u05bb ", "x\u05b9\u05bb"])
 								  call add(tl, [2, ".\u05bb\u05b9", " x\u05b9\u05bb ", "x\u05b9\u05bb"])
 								  call add(tl, [2, "\u05b9", " y\u05bb x\u05b9 ", "x\u05b9"])
 								  call add(tl, [2, ".\u05b9", " y\u05bb x\u05b9 ", "x\u05b9"])
 								  call add(tl, [2, "\u05b9", " y\u05bb\u05b9 x\u05b9 ", "y\u05bb\u05b9"])
 								  call add(tl, [2, ".\u05b9", " y\u05bb\u05b9 x\u05b9 ", "y\u05bb\u05b9"])
 								  call add(tl, [1, "\u05b9\u05bb", " y\u05b9 x\u05b9\u05bb ", "x\u05b9\u05bb"])
 								  call add(tl, [2, ".\u05b9\u05bb", " y\u05bb x\u05b9\u05bb ", "x\u05b9\u05bb"])
 								  call add(tl, [2, "a", "ca\u0300t"])
 								  call add(tl, [2, "ca", "ca\u0300t"])
 								  call add(tl, [2, "a\u0300", "ca\u0300t", "a\u0300"])
 								  call add(tl, [2, 'a\%C', "ca\u0300t", "a\u0300"])
 								  call add(tl, [2, 'ca\%C', "ca\u0300t", "ca\u0300"])
 								  call add(tl, [2, 'ca\%Ct', "ca\u0300t", "ca\u0300t"])
 								  " Test \Z
 								  call add(tl, [2, 'ú\Z', 'x'])
 								  call add(tl, [2, 'יהוה\Z', 'יהוה', 'יהוה'])
 								  call add(tl, [2, 'יְהוָה\Z', 'יהוה', 'יהוה'])
 								  call add(tl, [2, 'יהוה\Z', 'יְהוָה', 'יְהוָה'])
 								  call add(tl, [2, 'יְהוָה\Z', 'יְהוָה', 'יְהוָה'])
 								  call add(tl, [2, 'יְ\Z', 'וְיַ', 'יַ'])
 								  call add(tl, [2, "ק\u200d\u05b9x\\Z", "xק\u200d\u05b9xy", "ק\u200d\u05b9x"])
 								  call add(tl, [2, "ק\u200d\u05b9x\\Z", "xק\u200dxy", "ק\u200dx"])
 								  call add(tl, [2, "ק\u200dx\\Z", "xק\u200d\u05b9xy", "ק\u200d\u05b9x"])
 								  call add(tl, [2, "ק\u200dx\\Z", "xק\u200dxy", "ק\u200dx"])
 								  call add(tl, [2, "\u05b9\\Z", "xyz"])
 								  call add(tl, [2, "\\Z\u05b9", "xyz"])
 								  call add(tl, [2, "\u05b9\\Z", "xy\u05b9z", "y\u05b9"])
 								  call add(tl, [2, "\\Z\u05b9", "xy\u05b9z", "y\u05b9"])
 								  call add(tl, [1, "\u05b9\\+\\Z", "xy\u05b9z\u05b9 ", "y\u05b9z\u05b9"])
 								  call add(tl, [1, "\\Z\u05b9\\+", "xy\u05b9z\u05b9 ", "y\u05b9z\u05b9"])
 								  " Combining different tests and features
 								  call add(tl, [2, '[^[=a=]]\+', 'ddaãâbcd', 'dd'])
 								  " Run the tests
 								  for t in tl
 								    let re = t[0]
 								    let pat = t[1]
 								    let text = t[2]
 								    let matchidx = 3
 								    for engine in [0, 1, 2]
 								      if engine == 2 && re == 0 || engine == 1 && re == 1
 								        continue
 								      endif
 								      let &regexpengine = engine
 								      try
 								        let l = matchlist(text, pat)
 								      catch
 								        call assert_report('Error ' . engine . ': pat: \"' . pat .
 										    \ '\", text: \"' . text .
 										    \ '\", caused an exception: \"' . v:exception . '\"')
 								      endtry
 								      " check the match itself
 								      if len(l) == 0 && len(t) > matchidx
 								        call assert_report('Error ' . engine . ': pat: \"' . pat .
 										    \ '\", text: \"' . text .
 										    \ '\", did not match, expected: \"' . t[matchidx] . '\"')
 								      elseif len(l) > 0 && len(t) == matchidx
 								        call assert_report('Error ' . engine . ': pat: \"' . pat .
 										    \ '\", text: \"' . text . '\", match: \"' . l[0] .
 										    \ '\", expected no match')
 								      elseif len(t) > matchidx && l[0] != t[matchidx]
 								        call assert_report('Error ' . engine . ': pat: \"' . pat .
 										    \ '\", text: \"' . text . '\", match: \"' . l[0] .
 										    \ '\", expected: \"' . t[matchidx] . '\"')
 								      else
 								        " Test passed
 								      endif
 								      if len(l) > 0
 								        " check all the nine submatches
 								        for i in range(1, 9)
 								          if len(t) <= matchidx + i
 								            let e = ''
 								          else
 								            let e = t[matchidx + i]
 								          endif
 								          if l[i] != e
 								            call assert_report('Error ' . engine . ': pat: \"' . pat .
 								                  \ '\", text: \"' . text . '\", submatch ' . i .
 								                  \ ': \"' . l[i] . '\", expected: \"' . e . '\"')
 								          endif
 								        endfor
 								        unlet i
 								      endif
 								    endfor
 								  endfor
 								  set regexpengine&
 								endfunc
 								" check that 'ambiwidth' does not change the meaning of \p
-												vim-patch:9.1.0334: No test for highlight behavior with 'ambiwidth' (#28354)

Problem:  No test for highlight behavior with 'ambiwidth'.
Solution: Add a screendump test for 'ambiwidth' with 'cursorline'.
          (zeertzjq)

closes: vim/vim#14554

https://github.com/vim/vim/commit/a59e031aa0bdc5cc3d1f4ed719126bf1a1b858ce
											
										
										
											2024-04-15 15:50:23 -07:00
+								func Test_regexp_ambiwidth()
-												vim-patch:8.2.0014: test69 and test95 are old style

Problem:    Test69 and test95 are old style.
Solution:   Convert to new style tests. (Yegappan Lakshmanan, closes vim/vim#5365)
https://github.com/vim/vim/commit/afc13bd8271819c7871ff2ae2cfebb22190a0d39

											
										
										
											2020-01-30 19:28:51 -07:00
+								  set regexpengine=1 ambiwidth=single
 								  call assert_equal(0, match("\u00EC", '\p'))
 								  set regexpengine=1 ambiwidth=double
 								  call assert_equal(0, match("\u00EC", '\p'))
 								  set regexpengine=2 ambiwidth=single
 								  call assert_equal(0, match("\u00EC", '\p'))
 								  set regexpengine=2 ambiwidth=double
 								  call assert_equal(0, match("\u00EC", '\p'))
 								  set regexpengine& ambiwidth&
 								endfunc
-												vim-patch:8.2.0938: NFA regexp uses tolower ()to compare ignore-case

Problem:    NFA regexp uses tolower() to compare ignore-case. (Thayne McCombs)
Solution:   Use utf_fold() when possible. (ref. neovim vim/vim#12456)
https://github.com/vim/vim/commit/59de417b904bbd204e313f015839317b577bd124

											
										
										
											2020-06-09 15:17:47 -07:00
+								func Run_regexp_ignore_case()
 								  call assert_equal('iIİ', substitute('iIİ', '\([iIİ]\)', '\1', 'g'))
 								  call assert_equal('iIx', substitute('iIİ', '\c\([İ]\)', 'x', 'g'))
 								  call assert_equal('xxİ', substitute('iIİ', '\(i\c\)', 'x', 'g'))
 								  call assert_equal('iIx', substitute('iIİ', '\(İ\c\)', 'x', 'g'))
 								  call assert_equal('iIx', substitute('iIİ', '\c\(\%u0130\)', 'x', 'g'))
 								  call assert_equal('iIx', substitute('iIİ', '\c\([\u0130]\)', 'x', 'g'))
 								  call assert_equal('iIx', substitute('iIİ', '\c\([\u012f-\u0131]\)', 'x', 'g'))
 								endfunc
 								func Test_regexp_ignore_case()
 								  set regexpengine=1
 								  call Run_regexp_ignore_case()
 								  set regexpengine=2
 								  call Run_regexp_ignore_case()
 								  set regexpengine&
 								endfunc
-												vim-patch:8.2.1295: tests 44 and 99 are old style

Problem:    Tests 44 and 99 are old style.
Solution:   Convert to new style tests. (Yegappan Lakshmanan, closes vim/vim#6536)
https://github.com/vim/vim/commit/7d40b8a532fa2ce768f200ab118870d17875a7fe

											
										
										
											2020-07-26 10:43:02 -07:00
+								" Tests for regexp with multi-byte encoding and various magic settings
 								func Run_regexp_multibyte_magic()
 								  let text =<< trim END
 a aa abb abbccc
 d dd dee deefff
 g gg ghh ghhiii
 j jj jkk jkklll
 m mm mnn mnnooo
 x ^aa$ x
 (a)(b) abbaa
 axx [ab]xx
 หม่x อมx
 								    a อมx หม่x
 								    b ちカヨは
 								    c x ¬€x
 								    d 天使x
 								    e <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>y
 								    f <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>z
 								    g a啷bb
 								    j 0123❤x
 								    k combinations
 								    l äö üᾱ̆́
 								  END
 								  new
 								  call setline(1, text)
 								  exe 'normal /a*b\{2}c\+/e' .. "\<CR>x"
 								  call assert_equal('1 a aa abb abbcc', getline('.'))
 								  exe 'normal /\Md\*e\{2}f\+/e' .. "\<CR>x"
 								  call assert_equal('2 d dd dee deeff', getline('.'))
 								  set nomagic
 								  exe 'normal /g\*h\{2}i\+/e' .. "\<CR>x"
 								  call assert_equal('3 g gg ghh ghhii', getline('.'))
 								  exe 'normal /\mj*k\{2}l\+/e' .. "\<CR>x"
 								  call assert_equal('4 j jj jkk jkkll', getline('.'))
 								  exe 'normal /\vm*n{2}o+/e' .. "\<CR>x"
 								  call assert_equal('5 m mm mnn mnnoo', getline('.'))
 								  exe 'normal /\V^aa$/' .. "\<CR>x"
 								  call assert_equal('6 x aa$ x', getline('.'))
 								  set magic
 								  exe 'normal /\v(a)(b)\2\1\1/e' .. "\<CR>x"
 								  call assert_equal('7 (a)(b) abba', getline('.'))
 								  exe 'normal /\V[ab]\(\[xy]\)\1' .. "\<CR>x"
 								  call assert_equal('8 axx ab]xx', getline('.'))
 								  " search for multi-byte without composing char
 								  exe 'normal /ม' .. "\<CR>x"
 								  call assert_equal('9 หม่x อx', getline('.'))
 								  " search for multi-byte with composing char
 								  exe 'normal /ม่' .. "\<CR>x"
 								  call assert_equal('a อมx หx', getline('.'))
 								  " find word by change of word class
 								  exe 'normal /ち\<カヨ\>は' .. "\<CR>x"
 								  call assert_equal('b カヨは', getline('.'))
 								  " Test \%u, [\u] and friends
 								  " c
 								  exe 'normal /\%u20ac' .. "\<CR>x"
 								  call assert_equal('c x ¬x', getline('.'))
 								  " d
 								  exe 'normal /[\u4f7f\u5929]\+' .. "\<CR>x"
 								  call assert_equal('d 使x', getline('.'))
 								  " e
 								  exe 'normal /\%U12345678' .. "\<CR>x"
 								  call assert_equal('e y', getline('.'))
 								  " f
 								  exe 'normal /[\U1234abcd\u1234\uabcd]' .. "\<CR>x"
 								  call assert_equal('f z', getline('.'))
 								  " g
 								  exe 'normal /\%d21879b' .. "\<CR>x"
 								  call assert_equal('g abb', getline('.'))
 								  " j Test backwards search from a multi-byte char
 								  exe "normal /x\<CR>x?.\<CR>x"
 								  call assert_equal('j 012❤', getline('.'))
 								  " k
 								  let @w=':%s#comb[i]nations#œ̄ṣ́m̥̄ᾱ̆́#g'
 								  @w
 								  call assert_equal('k œ̄ṣ́m̥̄ᾱ̆́', getline(18))
 								  close!
 								endfunc
 								func Test_regexp_multibyte_magic()
 								  set regexpengine=1
 								  call Run_regexp_multibyte_magic()
 								  set regexpengine=2
 								  call Run_regexp_multibyte_magic()
 								  set regexpengine&
 								endfunc
 								" Test for 7.3.192
 								" command ":s/ \?/ /g" splits multi-byte characters into bytes
 								func Test_split_multibyte_to_bytes()
 								  new
 								  call setline(1, 'l äö üᾱ̆́')
 								  s/ \?/ /g
 								  call assert_equal(' l ä ö ü ᾱ̆́', getline(1))
 								  close!
 								endfunc
 								" Test for matchstr() with multibyte characters
 								func Test_matchstr_multibyte()
 								  new
 								  call assert_equal('ב', matchstr("אבגד", ".", 0, 2))
 								  call assert_equal('בג', matchstr("אבגד", "..", 0, 2))
 								  call assert_equal('א', matchstr("אבגד", ".", 0, 0))
 								  call assert_equal('ג', matchstr("אבגד", ".", 4, -1))
 								  close!
 								endfunc
 								" Test for 7.4.636
 								" A search with end offset gets stuck at end of file.
 								func Test_search_with_end_offset()
 								  new
 								  call setline(1, ['', 'dog(a', 'cat('])
-												vim-patch:8.2.1786: various Normal mode commands not fully tested

Problem:    Various Normal mode commands not fully tested.
Solution:   Add more tests. (Yegappan Lakshmanan, closes vim/vim#7059)
https://github.com/vim/vim/commit/8a9bc95eaec53f4e0c951ff8f2686ae5113a5709

Cherry-pick Test_normal_gdollar_cmd() change from patch 8.2.0540.

											
										
										
											2022-07-16 19:04:49 -07:00
+								  exe "normal /(/e+\<CR>"
 								  normal n"ayn
-												vim-patch:8.2.1295: tests 44 and 99 are old style

Problem:    Tests 44 and 99 are old style.
Solution:   Convert to new style tests. (Yegappan Lakshmanan, closes vim/vim#6536)
https://github.com/vim/vim/commit/7d40b8a532fa2ce768f200ab118870d17875a7fe

											
										
										
											2020-07-26 10:43:02 -07:00
+								  call assert_equal("a\ncat(", @a)
 								  close!
 								endfunc
-												vim-patch:8.2.2177: pattern "^" does not match if first character is combining

Problem:    Pattern "^" does not match if the first character in the line is
            combining. (Rene Kita)
Solution:   Do accept a match at the start of the line. (closes vim/vim#6963)
https://github.com/vim/vim/commit/ef2dff52de52c17fe1bd7c06cbb32d8955901f5a

											
										
										
											2020-12-21 17:11:09 -07:00
+								" Check that "^" matches even when the line starts with a combining char
 								func Test_match_start_of_line_combining()
 								  new
 								  call setline(1, ['', "\u05ae", ''])
 								  exe "normal gg/^\<CR>"
 								  call assert_equal(2, getcurpos()[1])
 								  bwipe!
 								endfunc
-												vim-patch:8.2.2278: falling back to old regexp engine can some patterns

Problem:    Falling back to old regexp engine can some patterns.
Solution:   Do not fall back once [[:lower:]] or [[:upper:]] is used.
            (Christian Brabandt, closes vim/vim#7572)
https://github.com/vim/vim/commit/66c50c565321d4d49d8d5620912e5e8fe4825644

											
										
										
											2021-05-25 21:21:09 -07:00
+								" Check that [[:upper:]] matches for automatic engine
 								func Test_match_char_class_upper()
 								  new
 								  " Test 1: [[:upper:]]\{2,\}
 								  set regexpengine=0
 								  call setline(1, ['05. ПЕСНЯ О ГЕРОЯХ муз. А. Давиденко, М. Коваля и Б. Шехтера ...', '05. PJESNJA O GJEROJAKH mus. A. Davidjenko, M. Kovalja i B. Shjekhtjera ...'])
 								  call cursor(1,1)
 								  let search_cmd='norm /\<[[:upper:]]\{2,\}\>' .. "\<CR>"
 								  exe search_cmd
 								  call assert_equal(4, searchcount().total, 'TEST 1')
 								  set regexpengine=1
 								  exe search_cmd
 								  call assert_equal(2, searchcount().total, 'TEST 1')
 								  set regexpengine=2
 								  exe search_cmd
 								  call assert_equal(4, searchcount().total, 'TEST 1')
 								  " Test 2: [[:upper:]].\+
 								  let search_cmd='norm /\<[[:upper:]].\+\>' .. "\<CR>"
 								  set regexpengine=0
 								  exe search_cmd
 								  call assert_equal(2, searchcount().total, 'TEST 2')
 								  set regexpengine=1
 								  exe search_cmd
 								  call assert_equal(1, searchcount().total, 'TEST 2')
 								  set regexpengine=2
 								  exe search_cmd
 								  call assert_equal(2, searchcount().total, 'TEST 2')
 								  " Test 3: [[:lower:]]\+
 								  let search_cmd='norm /\<[[:lower:]]\+\>' .. "\<CR>"
 								  set regexpengine=0
 								  exe search_cmd
 								  call assert_equal(4, searchcount().total, 'TEST 3 lower')
 								  set regexpengine=1
 								  exe search_cmd
 								  call assert_equal(2, searchcount().total, 'TEST 3 lower')
 								  set regexpengine=2
 								  exe search_cmd
 								  call assert_equal(4, searchcount().total, 'TEST 3 lower')
-												vim-patch:8.2.2177: pattern "^" does not match if first character is combining

Problem:    Pattern "^" does not match if the first character in the line is
            combining. (Rene Kita)
Solution:   Do accept a match at the start of the line. (closes vim/vim#6963)
https://github.com/vim/vim/commit/ef2dff52de52c17fe1bd7c06cbb32d8955901f5a

											
										
										
											2020-12-21 17:11:09 -07:00
-												vim-patch:8.2.2278: falling back to old regexp engine can some patterns

Problem:    Falling back to old regexp engine can some patterns.
Solution:   Do not fall back once [[:lower:]] or [[:upper:]] is used.
            (Christian Brabandt, closes vim/vim#7572)
https://github.com/vim/vim/commit/66c50c565321d4d49d8d5620912e5e8fe4825644

											
										
										
											2021-05-25 21:21:09 -07:00
+								  " clean up
-												vim-patch:8.2.4440: crash with specific regexp pattern and string

Problem:    Crash with specific regexp pattern and string.
Solution:   Stop at the start of the string.
https://github.com/vim/vim/commit/6456fae9ba8e72c74b2c0c499eaf09974604ff30

											
										
										
											2022-03-11 07:59:26 -07:00
+								  set regexpengine=0
-												vim-patch:8.2.2278: falling back to old regexp engine can some patterns

Problem:    Falling back to old regexp engine can some patterns.
Solution:   Do not fall back once [[:lower:]] or [[:upper:]] is used.
            (Christian Brabandt, closes vim/vim#7572)
https://github.com/vim/vim/commit/66c50c565321d4d49d8d5620912e5e8fe4825644

											
										
										
											2021-05-25 21:21:09 -07:00
+								  bwipe!
 								endfunc
-												vim-patch:8.2.2177: pattern "^" does not match if first character is combining

Problem:    Pattern "^" does not match if the first character in the line is
            combining. (Rene Kita)
Solution:   Do accept a match at the start of the line. (closes vim/vim#6963)
https://github.com/vim/vim/commit/ef2dff52de52c17fe1bd7c06cbb32d8955901f5a

											
										
										
											2020-12-21 17:11:09 -07:00
-												vim-patch:8.2.3409: reading beyond end of line with invalid utf-8 character

Problem:    Reading beyond end of line with invalid utf-8 character.
Solution:   Check for NUL when advancing.
https://github.com/vim/vim/commit/65b605665997fad54ef39a93199e305af2fe4d7f

Dropped the non-test part, since it is only non-utf8 related.

											
										
										
											2022-01-06 04:53:47 -07:00
+								func Test_match_invalid_byte()
 								  call writefile(0z630a.765d30aa0a.2e0a.790a.4030, 'Xinvalid')
 								  new
 								  source Xinvalid
 								  bwipe!
 								  call delete('Xinvalid')
 								endfunc
-												vim-patch:9.0.0105: illegal memory access when pattern starts with illegal byte

Problem:    Illegal memory access when pattern starts with illegal byte.
Solution:   Do not match a character with an illegal byte.

https://github.com/vim/vim/commit/f50940531dd57135fe60aa393ac9d3281f352d88

Co-authored-by: Bram Moolenaar <Bram@vim.org>

											
										
										
											2024-07-30 15:37:58 -07:00
+								func Test_match_illegal_byte()
 								  let lines =<< trim END
 								      silent! buffer ÿ\c
 								      next ÿ
 scriptnames
 								      source
 								  END
 								  call writefile(lines, 'Xregexp')
 								  call system(GetVimCommand() .. ' -X -Z -e -s -S Xregexp -c qa!')
 								  call delete('Xregexp')
 								endfunc
-												vim-patch:8.2.4440: crash with specific regexp pattern and string

Problem:    Crash with specific regexp pattern and string.
Solution:   Stop at the start of the string.
https://github.com/vim/vim/commit/6456fae9ba8e72c74b2c0c499eaf09974604ff30

											
										
										
											2022-03-11 07:59:26 -07:00
+								func Test_match_too_complicated()
 								  set regexpengine=1
-												vim-patch:8.2.4443: regexp pattern test fails on Mac

Problem:    Regexp pattern test fails on Mac.
Solution:   Do not use a swapfile for the buffer.
https://github.com/vim/vim/commit/2457b2bbc28cce6e8c1106d427b8e867d4f58cfa

											
										
										
											2022-03-11 08:01:20 -07:00
+								  exe "noswapfile vsplit \xeb\xdb\x99"
-												vim-patch:8.2.4440: crash with specific regexp pattern and string

Problem:    Crash with specific regexp pattern and string.
Solution:   Stop at the start of the string.
https://github.com/vim/vim/commit/6456fae9ba8e72c74b2c0c499eaf09974604ff30

											
										
										
											2022-03-11 07:59:26 -07:00
+								  silent! buf \&\zs*\zs*0
 								  bwipe!
 								  set regexpengine=0
 								endfunc
-												vim-patch:9.1.0011: regexp cannot match combining chars in collection (#26992)

Problem:  regexp cannot match combining chars in collection
Solution: Check for combining characters in regex collections for the
          NFA and BT Regex Engine

Also, while at it, make debug mode work again.

fixes vim/vim#10286
closes: vim/vim#12871

https://github.com/vim/vim/commit/d2cc51f9a1a5a30ef5d2e732f49d7f495cae24cf

Co-authored-by: Christian Brabandt <cb@256bit.org>
											
										
										
											2024-01-11 23:09:10 -07:00
+								func Test_combining_chars_in_collection()
 								  new
 								  for i in range(0,2)
 								    exe "set re=".i
 								    put =['ɔ̃', 'ɔ',  '̃  ã', 'abcd']
 								    :%s/[ɔ̃]//
 								    call assert_equal(['', '', 'ɔ', '̃  ã', 'abcd'], getline(1,'$'))
 								    %d
 								  endfor
 								  bw!
 								endfunc
-												vim-patch:8.2.4440: crash with specific regexp pattern and string

Problem:    Crash with specific regexp pattern and string.
Solution:   Stop at the start of the string.
https://github.com/vim/vim/commit/6456fae9ba8e72c74b2c0c499eaf09974604ff30

											
										
										
											2022-03-11 07:59:26 -07:00
-												vim-patch:9.1.0645: regex: wrong match when searching multi-byte char case-insensitive

Problem:  regex: wrong match when searching multi-byte char
          case-insensitive (diffsetter)
Solution: Apply proper case-folding for characters and search-string

This patch does the following 4 things:

1) When the regexp engine compares two utf-8 codepoints case
   insensitive it may match an adjacent character, because it assumes
   it can step over as many bytes as the pattern contains.

   This however is not necessarily true because of case-folding, a
   multi-byte UTF-8 character can be considered equal to some
   single-byte value.

   Let's consider the pattern 'ſ' and the string 's'. When comparing and
   ignoring case, the single character 's' matches, and since it matches
   Vim will try to step over the match (by the amount of bytes of the
   pattern), assuming that since it matches, the length of both strings is
   the same.

   However in that case, it should only step over the single byte value
   's' by 1 byte and try to start matching after it again. So for the
   backtracking engine we need to ensure:
   * we try to match the correct length for the pattern and the text
   * in case of a match, we step over it correctly

   There is one tricky thing for the backtracing engine. We also need to
   calculate correctly the number of bytes to compare the 2 different
   utf-8 strings s1 and s2. So we will count the number of characters in
   s1 that the byte len specified. Then we count the number of bytes to
   step over the same number of characters in string s2 and then we can
   correctly compare the 2 utf-8 strings.

2) A similar thing can happen for the NFA engine, when skipping to the
   next character to test for a match. We are skipping over the regstart
   pointer, however we do not consider the case that because of
   case-folding we may need to adjust the number of bytes to skip over.
   So this needs to be adjusted in find_match_text() as well.

3) A related issue turned out, when prog->match_text is actually empty.
   In that case we should try to find the next match and skip this
   condition.

4) When comparing characters using collections, we must also apply case
   folding to each character in the collection and not just to the
   current character from the search string.  This doesn't apply to the
   NFA engine, because internally it converts collections to branches
   [abc] -> a\|b\|c

fixes: vim/vim#14294
closes: vim/vim#14756

https://github.com/vim/vim/commit/22e8e12d9f5034e1984db0c567b281fda4de8dd7

N/A patches:
vim-patch:9.0.1771: regex: combining chars in collections not handled
vim-patch:9.0.1777: patch 9.0.1771 causes problems

Co-authored-by: Christian Brabandt <cb@256bit.org>

											
										
										
											2024-07-30 15:06:09 -07:00
+								func Test_search_multibyte_match_ascii()
 								  new
 								  " Match single 'ſ' and 's'
 								  call setline(1,  'das abc heraus abc ſich abc ſind')
 								  for i in range(0, 2)
 								    exe "set re="..i
 								    let ic_match = matchbufline('%', '\c\%u17f', 1, '$')->mapnew({idx, val -> val.text})
 								    let noic_match = matchbufline('%', '\C\%u17f', 1, '$')->mapnew({idx, val -> val.text})
 								    call assert_equal(['s', 's', 'ſ','ſ'], ic_match, "Ignorecase Regex-engine: " .. &re)
 								    call assert_equal(['ſ','ſ'], noic_match, "No-Ignorecase Regex-engine: " .. &re)
 								  endfor
 								  " Match several 'ſſ' and 'ss'
 								  call setline(1,  'das abc herauss abc ſſich abc ſind')
 								  for i in range(0, 2)
 								    exe "set re="..i
 								    let ic_match = matchbufline('%', '\c\%u17f\%u17f', 1, '$')->mapnew({idx, val -> val.text})
 								    let noic_match = matchbufline('%', '\C\%u17f\%u17f', 1, '$')->mapnew({idx, val -> val.text})
 								    let ic_match2 = matchbufline('%', '\c\%u17f\+', 1, '$')->mapnew({idx, val -> val.text})
 								    let noic_match2 = matchbufline('%', '\C\%u17f\+', 1, '$')->mapnew({idx, val -> val.text})
 								    let ic_match3 = matchbufline('%', '\c[\u17f]\+', 1, '$')->mapnew({idx, val -> val.text})
 								    let noic_match3 = matchbufline('%', '\C[\u17f]\+', 1, '$')->mapnew({idx, val -> val.text})
 								    call assert_equal(['ss', 'ſſ'], ic_match, "Ignorecase Regex-engine: " .. &re)
 								    call assert_equal(['ſſ'], noic_match, "No-Ignorecase Regex-engine: " .. &re)
 								    call assert_equal(['s', 'ss', 'ſſ', 'ſ'], ic_match2, "Ignorecase Regex-engine: " .. &re)
 								    call assert_equal(['ſſ','ſ'], noic_match2, "No-Ignorecase Regex-engine: " .. &re)
 								    call assert_equal(['s', 'ss', 'ſſ', 'ſ'], ic_match3, "Ignorecase Collection Regex-engine: " .. &re)
 								    call assert_equal(['ſſ','ſ'], noic_match3, "No-Ignorecase Collection Regex-engine: " .. &re)
 								  endfor
 								  bw!
 								endfunc
-												vim-patch:8.2.0014: test69 and test95 are old style

Problem:    Test69 and test95 are old style.
Solution:   Convert to new style tests. (Yegappan Lakshmanan, closes vim/vim#5365)
https://github.com/vim/vim/commit/afc13bd8271819c7871ff2ae2cfebb22190a0d39

											
										
										
											2020-01-30 19:28:51 -07:00
+								" vim: shiftwidth=2 sts=2 expandtab