To: vim_dev@googlegroups.com Subject: Patch 7.4.001 Fcc: outbox From: Bram Moolenaar Mime-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ------------ Patch 7.4.001 Problem: Character classes such as [a-z] to not react to 'ignorecase'. Breaks man page highlighting. (Mario Grgic) Solution: Add separate items for classes that react to 'ignorecase'. Clean up logic handling character classes. Add more tests. Files: src/regexp_nfa.c, src/testdir/test64.in, src/testdir/test64.ok *** ../vim-7.4.000/src/regexp_nfa.c 2013-08-01 18:27:51.000000000 +0200 --- src/regexp_nfa.c 2013-08-14 11:49:50.000000000 +0200 *************** *** 29,34 **** --- 29,37 ---- # define NFA_REGEXP_DEBUG_LOG "nfa_regexp_debug.log" #endif + /* Added to NFA_ANY - NFA_NUPPER_IC to include a NL. */ + #define NFA_ADD_NL 31 + enum { NFA_SPLIT = -1024, *************** *** 183,188 **** --- 186,198 ---- NFA_NLOWER, /* Match non-lowercase char */ NFA_UPPER, /* Match uppercase char */ NFA_NUPPER, /* Match non-uppercase char */ + NFA_LOWER_IC, /* Match [a-z] */ + NFA_NLOWER_IC, /* Match [^a-z] */ + NFA_UPPER_IC, /* Match [A-Z] */ + NFA_NUPPER_IC, /* Match [^A-Z] */ + + NFA_FIRST_NL = NFA_ANY + NFA_ADD_NL, + NFA_LAST_NL = NFA_NUPPER_IC + NFA_ADD_NL, NFA_CURSOR, /* Match cursor pos */ NFA_LNUM, /* Match line number */ *************** *** 199,207 **** NFA_MARK_LT, /* Match < mark */ NFA_VISUAL, /* Match Visual area */ - NFA_FIRST_NL = NFA_ANY + ADD_NL, - NFA_LAST_NL = NFA_NUPPER + ADD_NL, - /* Character classes [:alnum:] etc */ NFA_CLASS_ALNUM, NFA_CLASS_ALPHA, --- 209,214 ---- *************** *** 578,583 **** --- 585,592 ---- * On failure, return 0 (=FAIL) * Start points to the first char of the range, while end should point * to the closing brace. + * Keep in mind that 'ignorecase' applies at execution time, thus [a-z] may + * need to be interpreted as [a-zA-Z]. */ static int nfa_recognize_char_class(start, end, extra_newl) *************** *** 681,687 **** return FAIL; if (newl == TRUE) ! extra_newl = ADD_NL; switch (config) { --- 690,696 ---- return FAIL; if (newl == TRUE) ! extra_newl = NFA_ADD_NL; switch (config) { *************** *** 710,722 **** case CLASS_not | CLASS_az | CLASS_AZ: return extra_newl + NFA_NALPHA; case CLASS_az: ! return extra_newl + NFA_LOWER; case CLASS_not | CLASS_az: ! return extra_newl + NFA_NLOWER; case CLASS_AZ: ! return extra_newl + NFA_UPPER; case CLASS_not | CLASS_AZ: ! return extra_newl + NFA_NUPPER; } return FAIL; } --- 719,731 ---- case CLASS_not | CLASS_az | CLASS_AZ: return extra_newl + NFA_NALPHA; case CLASS_az: ! return extra_newl + NFA_LOWER_IC; case CLASS_not | CLASS_az: ! return extra_newl + NFA_NLOWER_IC; case CLASS_AZ: ! return extra_newl + NFA_UPPER_IC; case CLASS_not | CLASS_AZ: ! return extra_newl + NFA_NUPPER_IC; } return FAIL; } *************** *** 914,920 **** break; } ! extra = ADD_NL; /* "\_[" is collection plus newline */ if (c == '[') --- 923,929 ---- break; } ! extra = NFA_ADD_NL; /* "\_[" is collection plus newline */ if (c == '[') *************** *** 970,976 **** } #endif EMIT(nfa_classcodes[p - classchars]); ! if (extra == ADD_NL) { EMIT(NFA_NEWL); EMIT(NFA_OR); --- 979,985 ---- } #endif EMIT(nfa_classcodes[p - classchars]); ! if (extra == NFA_ADD_NL) { EMIT(NFA_NEWL); EMIT(NFA_OR); *************** *** 1240,1260 **** { /* * Try to reverse engineer character classes. For example, ! * recognize that [0-9] stands for \d and [A-Za-z_] with \h, * and perform the necessary substitutions in the NFA. */ result = nfa_recognize_char_class(regparse, endp, ! extra == ADD_NL); if (result != FAIL) { ! if (result >= NFA_DIGIT && result <= NFA_NUPPER) ! EMIT(result); ! else /* must be char class + newline */ { ! EMIT(result - ADD_NL); EMIT(NFA_NEWL); EMIT(NFA_OR); } regparse = endp; mb_ptr_adv(regparse); return OK; --- 1249,1269 ---- { /* * Try to reverse engineer character classes. For example, ! * recognize that [0-9] stands for \d and [A-Za-z_] for \h, * and perform the necessary substitutions in the NFA. */ result = nfa_recognize_char_class(regparse, endp, ! extra == NFA_ADD_NL); if (result != FAIL) { ! if (result >= NFA_FIRST_NL && result <= NFA_LAST_NL) { ! EMIT(result - NFA_ADD_NL); EMIT(NFA_NEWL); EMIT(NFA_OR); } + else + EMIT(result); regparse = endp; mb_ptr_adv(regparse); return OK; *************** *** 1504,1510 **** * collection, add an OR below. But not for negated * range. */ if (!negated) ! extra = ADD_NL; } else { --- 1513,1519 ---- * collection, add an OR below. But not for negated * range. */ if (!negated) ! extra = NFA_ADD_NL; } else { *************** *** 1537,1543 **** EMIT(NFA_END_COLL); /* \_[] also matches \n but it's not negated */ ! if (extra == ADD_NL) { EMIT(reg_string ? NL : NFA_NEWL); EMIT(NFA_OR); --- 1546,1552 ---- EMIT(NFA_END_COLL); /* \_[] also matches \n but it's not negated */ ! if (extra == NFA_ADD_NL) { EMIT(reg_string ? NL : NFA_NEWL); EMIT(NFA_OR); *************** *** 2011,2017 **** if (c >= NFA_FIRST_NL && c <= NFA_LAST_NL) { addnl = TRUE; ! c -= ADD_NL; } STRCPY(code, ""); --- 2020,2026 ---- if (c >= NFA_FIRST_NL && c <= NFA_LAST_NL) { addnl = TRUE; ! c -= NFA_ADD_NL; } STRCPY(code, ""); *************** *** 2217,2222 **** --- 2226,2235 ---- case NFA_NLOWER:STRCPY(code, "NFA_NLOWER"); break; case NFA_UPPER: STRCPY(code, "NFA_UPPER"); break; case NFA_NUPPER:STRCPY(code, "NFA_NUPPER"); break; + case NFA_LOWER_IC: STRCPY(code, "NFA_LOWER_IC"); break; + case NFA_NLOWER_IC: STRCPY(code, "NFA_NLOWER_IC"); break; + case NFA_UPPER_IC: STRCPY(code, "NFA_UPPER_IC"); break; + case NFA_NUPPER_IC: STRCPY(code, "NFA_NUPPER_IC"); break; default: STRCPY(code, "CHAR(x)"); *************** *** 2687,2692 **** --- 2700,2709 ---- case NFA_NLOWER: case NFA_UPPER: case NFA_NUPPER: + case NFA_LOWER_IC: + case NFA_NLOWER_IC: + case NFA_UPPER_IC: + case NFA_NUPPER_IC: /* possibly non-ascii */ #ifdef FEAT_MBYTE if (has_mbyte) *************** *** 3841,3846 **** --- 3858,3867 ---- case NFA_NLOWER: case NFA_UPPER: case NFA_NUPPER: + case NFA_LOWER_IC: + case NFA_NLOWER_IC: + case NFA_UPPER_IC: + case NFA_NUPPER_IC: case NFA_START_COLL: case NFA_START_NEG_COLL: case NFA_NEWL: *************** *** 5872,5877 **** --- 5893,5920 ---- ADD_STATE_IF_MATCH(t->state); break; + case NFA_LOWER_IC: /* [a-z] */ + result = ri_lower(curc) || (ireg_ic && ri_upper(curc)); + ADD_STATE_IF_MATCH(t->state); + break; + + case NFA_NLOWER_IC: /* [^a-z] */ + result = curc != NUL + && !(ri_lower(curc) || (ireg_ic && ri_upper(curc))); + ADD_STATE_IF_MATCH(t->state); + break; + + case NFA_UPPER_IC: /* [A-Z] */ + result = ri_upper(curc) || (ireg_ic && ri_lower(curc)); + ADD_STATE_IF_MATCH(t->state); + break; + + case NFA_NUPPER_IC: /* ^[A-Z] */ + result = curc != NUL + && !(ri_upper(curc) || (ireg_ic && ri_lower(curc))); + ADD_STATE_IF_MATCH(t->state); + break; + case NFA_BACKREF1: case NFA_BACKREF2: case NFA_BACKREF3: *** ../vim-7.4.000/src/testdir/test64.in 2013-08-01 17:45:33.000000000 +0200 --- src/testdir/test64.in 2013-08-14 11:50:11.000000000 +0200 *************** *** 289,303 **** :call add(tl, [2, '.a\%$', " a\n "]) :call add(tl, [2, '.a\%$', " a\n_a", "_a"]) :" ! :"""" Test recognition of some character classes ! :call add(tl, [2, '[0-9]', '8', '8']) ! :call add(tl, [2, '[^0-9]', '8']) ! :call add(tl, [2, '[0-9a-fA-F]*', '0a7', '0a7']) ! :call add(tl, [2, '[^0-9A-Fa-f]\+', '0a7']) ! :call add(tl, [2, '[a-z_A-Z0-9]\+', 'aso_sfoij', 'aso_sfoij']) ! :call add(tl, [2, '[a-z]', 'a', 'a']) ! :call add(tl, [2, '[a-zA-Z]', 'a', 'a']) ! :call add(tl, [2, '[A-Z]', 'a']) :call add(tl, [2, '\C[^A-Z]\+', 'ABCOIJDEOIFNSD jsfoij sa', ' jsfoij sa']) :" :"""" Tests for \z features --- 289,317 ---- :call add(tl, [2, '.a\%$', " a\n "]) :call add(tl, [2, '.a\%$', " a\n_a", "_a"]) :" ! :"""" Test recognition of character classes ! :call add(tl, [2, '[0-7]\+', 'x0123456789x', '01234567']) ! :call add(tl, [2, '[^0-7]\+', '0a;X+% 897', 'a;X+% 89']) ! :call add(tl, [2, '[0-9]\+', 'x0123456789x', '0123456789']) ! :call add(tl, [2, '[^0-9]\+', '0a;X+% 9', 'a;X+% ']) ! :call add(tl, [2, '[0-9a-fA-F]\+', 'x0189abcdefg', '0189abcdef']) ! :call add(tl, [2, '[^0-9A-Fa-f]\+', '0189g;X+% ab', 'g;X+% ']) ! :call add(tl, [2, '[a-z_A-Z0-9]\+', ';+aso_SfOij ', 'aso_SfOij']) ! :call add(tl, [2, '[^a-z_A-Z0-9]\+', 'aSo_;+% sfOij', ';+% ']) ! :call add(tl, [2, '[a-z_A-Z]\+', '0abyz_ABYZ;', 'abyz_ABYZ']) ! :call add(tl, [2, '[^a-z_A-Z]\+', 'abAB_09;+% yzYZ', '09;+% ']) ! :call add(tl, [2, '[a-z]\+', '0abcxyz1', 'abcxyz']) ! :call add(tl, [2, '[a-z]\+', 'AabxyzZ', 'abxyz']) ! :call add(tl, [2, '[^a-z]\+', 'a;X09+% x', ';X09+% ']) ! :call add(tl, [2, '[^a-z]\+', 'abX0;%yz', 'X0;%']) ! :call add(tl, [2, '[a-zA-Z]\+', '0abABxzXZ9', 'abABxzXZ']) ! :call add(tl, [2, '[^a-zA-Z]\+', 'ab09_;+ XZ', '09_;+ ']) ! :call add(tl, [2, '[A-Z]\+', 'aABXYZz', 'ABXYZ']) ! :call add(tl, [2, '[^A-Z]\+', 'ABx0;%YZ', 'x0;%']) ! :call add(tl, [2, '[a-z]\+\c', '0abxyzABXYZ;', 'abxyzABXYZ']) ! :call add(tl, [2, '[A-Z]\+\c', '0abABxzXZ9', 'abABxzXZ']) ! :call add(tl, [2, '\c[^a-z]\+', 'ab09_;+ XZ', '09_;+ ']) ! :call add(tl, [2, '\c[^A-Z]\+', 'ab09_;+ XZ', '09_;+ ']) :call add(tl, [2, '\C[^A-Z]\+', 'ABCOIJDEOIFNSD jsfoij sa', ' jsfoij sa']) :" :"""" Tests for \z features *** ../vim-7.4.000/src/testdir/test64.ok 2013-08-01 18:28:56.000000000 +0200 --- src/testdir/test64.ok 2013-08-14 11:50:37.000000000 +0200 *************** *** 650,679 **** OK 0 - .a\%$ OK 1 - .a\%$ OK 2 - .a\%$ ! OK 0 - [0-9] ! OK 1 - [0-9] ! OK 2 - [0-9] ! OK 0 - [^0-9] ! OK 1 - [^0-9] ! OK 2 - [^0-9] ! OK 0 - [0-9a-fA-F]* ! OK 1 - [0-9a-fA-F]* ! OK 2 - [0-9a-fA-F]* OK 0 - [^0-9A-Fa-f]\+ OK 1 - [^0-9A-Fa-f]\+ OK 2 - [^0-9A-Fa-f]\+ OK 0 - [a-z_A-Z0-9]\+ OK 1 - [a-z_A-Z0-9]\+ OK 2 - [a-z_A-Z0-9]\+ ! OK 0 - [a-z] ! OK 1 - [a-z] ! OK 2 - [a-z] ! OK 0 - [a-zA-Z] ! OK 1 - [a-zA-Z] ! OK 2 - [a-zA-Z] ! OK 0 - [A-Z] ! OK 1 - [A-Z] ! OK 2 - [A-Z] OK 0 - \C[^A-Z]\+ OK 1 - \C[^A-Z]\+ OK 2 - \C[^A-Z]\+ --- 650,721 ---- OK 0 - .a\%$ OK 1 - .a\%$ OK 2 - .a\%$ ! OK 0 - [0-7]\+ ! OK 1 - [0-7]\+ ! OK 2 - [0-7]\+ ! OK 0 - [^0-7]\+ ! OK 1 - [^0-7]\+ ! OK 2 - [^0-7]\+ ! OK 0 - [0-9]\+ ! OK 1 - [0-9]\+ ! OK 2 - [0-9]\+ ! OK 0 - [^0-9]\+ ! OK 1 - [^0-9]\+ ! OK 2 - [^0-9]\+ ! OK 0 - [0-9a-fA-F]\+ ! OK 1 - [0-9a-fA-F]\+ ! OK 2 - [0-9a-fA-F]\+ OK 0 - [^0-9A-Fa-f]\+ OK 1 - [^0-9A-Fa-f]\+ OK 2 - [^0-9A-Fa-f]\+ OK 0 - [a-z_A-Z0-9]\+ OK 1 - [a-z_A-Z0-9]\+ OK 2 - [a-z_A-Z0-9]\+ ! OK 0 - [^a-z_A-Z0-9]\+ ! OK 1 - [^a-z_A-Z0-9]\+ ! OK 2 - [^a-z_A-Z0-9]\+ ! OK 0 - [a-z_A-Z]\+ ! OK 1 - [a-z_A-Z]\+ ! OK 2 - [a-z_A-Z]\+ ! OK 0 - [^a-z_A-Z]\+ ! OK 1 - [^a-z_A-Z]\+ ! OK 2 - [^a-z_A-Z]\+ ! OK 0 - [a-z]\+ ! OK 1 - [a-z]\+ ! OK 2 - [a-z]\+ ! OK 0 - [a-z]\+ ! OK 1 - [a-z]\+ ! OK 2 - [a-z]\+ ! OK 0 - [^a-z]\+ ! OK 1 - [^a-z]\+ ! OK 2 - [^a-z]\+ ! OK 0 - [^a-z]\+ ! OK 1 - [^a-z]\+ ! OK 2 - [^a-z]\+ ! OK 0 - [a-zA-Z]\+ ! OK 1 - [a-zA-Z]\+ ! OK 2 - [a-zA-Z]\+ ! OK 0 - [^a-zA-Z]\+ ! OK 1 - [^a-zA-Z]\+ ! OK 2 - [^a-zA-Z]\+ ! OK 0 - [A-Z]\+ ! OK 1 - [A-Z]\+ ! OK 2 - [A-Z]\+ ! OK 0 - [^A-Z]\+ ! OK 1 - [^A-Z]\+ ! OK 2 - [^A-Z]\+ ! OK 0 - [a-z]\+\c ! OK 1 - [a-z]\+\c ! OK 2 - [a-z]\+\c ! OK 0 - [A-Z]\+\c ! OK 1 - [A-Z]\+\c ! OK 2 - [A-Z]\+\c ! OK 0 - \c[^a-z]\+ ! OK 1 - \c[^a-z]\+ ! OK 2 - \c[^a-z]\+ ! OK 0 - \c[^A-Z]\+ ! OK 1 - \c[^A-Z]\+ ! OK 2 - \c[^A-Z]\+ OK 0 - \C[^A-Z]\+ OK 1 - \C[^A-Z]\+ OK 2 - \C[^A-Z]\+ *** ../vim-7.4.000/src/version.c 2013-08-10 13:29:20.000000000 +0200 --- src/version.c 2013-08-14 11:54:57.000000000 +0200 *************** *** 729,730 **** --- 729,732 ---- { /* Add new patch number below this line */ + /**/ + 1, /**/ -- How many light bulbs does it take to change a person? /// Bram Moolenaar -- Bram@Moolenaar.net -- http://www.Moolenaar.net \\\ /// sponsor Vim, vote for features -- http://www.Vim.org/sponsor/ \\\ \\\ an exciting new programming language -- http://www.Zimbu.org /// \\\ help me help AIDS victims -- http://ICCF-Holland.org ///