diff --git a/basic.c b/basic.c index 2d31f45..7bca596 100644 --- a/basic.c +++ b/basic.c @@ -980,7 +980,7 @@ gotoeosent(int f, int n) exp = b_val_rexp(curbp, VAL_SENTENCES)->reg; /* if we're on the end of a sentence now, don't bother scanning further, or we'll miss the immediately following sentence */ - if (!(lregexec(exp, DOT.l, DOT.o, llength(DOT.l)) && + if (!(lregexec(exp, DOT.l, DOT.o, llength(DOT.l), FALSE) && exp->startp[0] - lvalue(DOT.l) == DOT.o)) { if (findpat(f, n, exp, FORWARD) != TRUE) { DOT = curbp->b_line; diff --git a/buffer.c b/buffer.c index 06c7932..d921fbd 100644 --- a/buffer.c +++ b/buffer.c @@ -1187,7 +1187,7 @@ found_modeline(LINE *lp, int *first, int *last) for (n = 0; n < TABLESIZE(mls_patterns); ++n) { regexp *prog = mls_regcomp((int) n); - if (lregexec(prog, lp, 0, limit)) { + if (lregexec(prog, lp, 0, limit, FALSE)) { int j = mls_patterns[n].mark; *first = (int) (prog->startp[j] - prog->startp[0]); *last = (int) (prog->endp[j] - prog->startp[0]); @@ -1564,11 +1564,9 @@ int has_C_suffix(BUFFER *bp) { int s; - int save = ignorecase; - ignorecase = global_g_val(GMDFILENAME_IC); s = nregexec(global_g_val_rexp(GVAL_CSUFFIXES)->reg, - bp->b_fname, (char *) 0, 0, -1); - ignorecase = save; + bp->b_fname, (char *) 0, 0, -1, + global_g_val(GMDFILENAME_IC)); return s; } #endif @@ -1642,7 +1640,7 @@ make_buffer_list(char *bufn) if ((exp = regcomp(bufn, strlen(bufn), TRUE)) != 0) { for_each_buffer(bp) { - if (nregexec(exp, bp->b_bname, (char *) 0, 0, -1)) { + if (nregexec(exp, bp->b_bname, (char *) 0, 0, -1, FALSE)) { result[count++] = strmalloc(bp->b_bname); } } diff --git a/doc/vile-hlp.html b/doc/vile-hlp.html index 21a26fc..c40a22a 100644 --- a/doc/vile-hlp.html +++ b/doc/vile-hlp.html @@ -3279,6 +3279,12 @@ set-rs-crlf or set-dos-mode If your keyboard repeats really fast and you have smoothscroll enabled, it may take a while for vile to catch up. (U) +
smartcase + (scs)
+ +
Overrides the setting of ignorecase + when the pattern contains uppercase characters. (B)
+
spaces-after-sentence (sas)
diff --git a/eightbit.c b/eightbit.c index 04c59bd..9830cc4 100644 --- a/eightbit.c +++ b/eightbit.c @@ -297,7 +297,7 @@ vl_narrowed(const char *wide) if ((result = malloc(strlen(wide) + 2 + strlen(on_right))) != 0) { strcpy(result, wide); for (n = 0; n < len; ++n) { - found = regexec(exp, result, result + len, n, len); + found = regexec(exp, result, result + len, n, len, FALSE); if (found) break; } @@ -587,7 +587,7 @@ vl_get_encoding(char **target, const char *locale) exp = regcomp(tb_values(latin1_expr), (size_t) tb_length0(latin1_expr), TRUE); if (exp != 0) { - if (nregexec(exp, mylocale, (char *) 0, 0, -1)) { + if (nregexec(exp, mylocale, (char *) 0, 0, -1, FALSE)) { TRACE(("... found match in $latin1-expr\n")); result = iso_latin1; } diff --git a/eval.c b/eval.c index 58bcedd..4a5487d 100644 --- a/eval.c +++ b/eval.c @@ -294,7 +294,7 @@ match_charclass_regexp(int ch, REGEXVAL * exp) char temp[2]; temp[0] = (char) ch; - return nregexec(exp->reg, temp, temp + 1, 0, 0); + return nregexec(exp->reg, temp, temp + 1, 0, 0, FALSE); } static int @@ -1469,15 +1469,12 @@ run_func(int fnum) break; case UFCMATCH: if ((exp = new_regexval(arg[0], TRUE)) != 0) { - int save_flag = ignorecase; - ignorecase = TRUE; - value = nregexec(exp->reg, arg[1], (char *) 0, 0, -1); - ignorecase = save_flag; + value = nregexec(exp->reg, arg[1], (char *) 0, 0, -1, TRUE); } break; case UFMATCH: if ((exp = new_regexval(arg[0], TRUE)) != 0) - value = nregexec(exp->reg, arg[1], (char *) 0, 0, -1); + value = nregexec(exp->reg, arg[1], (char *) 0, 0, -1, FALSE); break; case UFRANDOM: /* FALLTHRU */ case UFRND: diff --git a/exec.c b/exec.c index 426abb8..4dafd44 100644 --- a/exec.c +++ b/exec.c @@ -246,6 +246,7 @@ parse_linespec(const char *s, LINE **markptr) last_srch_direc, (DOT.o == 0), FALSE, + FALSE, (int *) 0)) { lp = DOT.l; found = TRUE; diff --git a/fences.c b/fences.c index 8096bc7..2c63724 100644 --- a/fences.c +++ b/fences.c @@ -149,7 +149,7 @@ typeof_complex(int code) static int match_complex(TRACEARG(int group) - LINE *lp, struct VAL *vals) + LINE *lp, struct VAL *vals, int ic) { static int modes[] = {CPP_IF, CPP_ELIF, CPP_ELSE, CPP_ENDIF}; @@ -176,7 +176,7 @@ match_complex(TRACEARG(int group) default: continue; } - if (lregexec(any_rexp(vals, k)->reg, lp, 0, llength(lp))) { + if (lregexec(any_rexp(vals, k)->reg, lp, 0, llength(lp), ic)) { code = modes[j]; TRACE(("match_complex(%d) %s\n", group, typeof_complex(code))); break; @@ -198,7 +198,7 @@ match_simple(void) TRACE(("match_simple %d:%s\n", line_no(curbp, DOT.l), lp_visible(DOT.l))); for (first = 0; first < last; first = S_COL(BlkBegin) + 1) { - if (!lregexec(BlkBegin, DOT.l, first, last)) + if (!lregexec(BlkBegin, DOT.l, first, last, FALSE)) break; if ((S_COL(BlkBegin) <= DOT.o) && (E_COL(BlkBegin) > DOT.o)) { @@ -208,7 +208,7 @@ match_simple(void) } for (first = 0; first < last && DOT.o <= last; last = E_COL(BlkEnd) - 1) { - if (!lregexec(BlkEnd, DOT.l, first, last)) + if (!lregexec(BlkEnd, DOT.l, first, last, FALSE)) break; if ((S_COL(BlkEnd) <= DOT.o) && (E_COL(BlkEnd) > DOT.o)) { @@ -276,7 +276,7 @@ complex_fence(int sdir, int key, int group, int level, int *newkey) for_each_modegroup(curbp, result, group, vals) { DOT = savedot; count = savecount; - if (((that = match_complex(TRACEARG(result) DOT.l, vals)) != CPP_UNKNOWN)) { + if (((that = match_complex(TRACEARG(result) DOT.l, vals, FALSE)) != CPP_UNKNOWN)) { int done = FALSE; TRACE(("for_each_modegroup:%d:%d (line %d, count %d)\n", @@ -400,7 +400,6 @@ find_complex(int sdir, int *newkey) int rc = FALSE; int key; int group = -1; - int save_ic = ignorecase; MARK oldpos, oldpre; struct VAL *vals; @@ -410,8 +409,8 @@ find_complex(int sdir, int *newkey) TRACE(("find_complex %4d:%s\n", line_no(curbp, DOT.l), lp_visible(DOT.l))); limit_iterations(); for_each_modegroup(curbp, group, 0, vals) { - ignorecase = any_mode(vals, MDIGNCASE); - if ((key = match_complex(TRACEARG(group) DOT.l, vals)) != CPP_UNKNOWN) { + int ic = any_mode(vals, MDIGNCASE); + if ((key = match_complex(TRACEARG(group) DOT.l, vals, ic)) != CPP_UNKNOWN) { start_fence_op2(sdir, oldpos, oldpre); sdir = ((key == CPP_ENDIF) ? REVERSE @@ -427,7 +426,6 @@ find_complex(int sdir, int *newkey) #endif } } - ignorecase = save_ic; #if OPT_MAJORMODE TRACE(("...find_complex %d (iterations %ld)\n", rc, iterations)); #endif @@ -451,7 +449,7 @@ find_one_complex(int sdir, int level, int group, int *newkey) * Iterate over the complex fence groups */ TRACE(("find_one_complex %4d:%s\n", line_no(curbp, DOT.l), lp_visible(DOT.l))); - if ((key = match_complex(TRACEARG(group) DOT.l, vals)) != CPP_UNKNOWN) { + if ((key = match_complex(TRACEARG(group) DOT.l, vals, FALSE)) != CPP_UNKNOWN) { start_fence_op2(sdir, oldpos, oldpre); if (level == 0) sdir = ((key == CPP_ENDIF) @@ -543,7 +541,7 @@ comment_fence(int sdir) scanboundry(FALSE, DOT, sdir); if (scanner((sdir == FORWARD) ? BlkEnd : BlkBegin, - sdir, (DOT.o == 0), FALSE, (int *) 0)) { + sdir, (DOT.o == 0), FALSE, FALSE, (int *) 0)) { if (!doingopcmd || doingsweep) { sweephack = TRUE; if (sdir == FORWARD && (BlkEnd->mlen > 1)) diff --git a/finderr.c b/finderr.c index fa97281..854da63 100644 --- a/finderr.c +++ b/finderr.c @@ -717,7 +717,7 @@ finderr(int f GCC_UNUSED, int n GCC_UNUSED) count = 0; while ((exp = next_pattern(count++)) != 0) { if (exp->words[W_VERB] > 0) - if (lregexec(exp->exp_comp, tdotp, 0, llength(tdotp))) + if (lregexec(exp->exp_comp, tdotp, 0, llength(tdotp), FALSE)) break; } @@ -750,7 +750,7 @@ finderr(int f GCC_UNUSED, int n GCC_UNUSED) if (lisreal(dotp)) { count = 0; while ((exp = next_pattern(count++)) != 0 - && !lregexec(exp->exp_comp, dotp, 0, llength(dotp))) { + && !lregexec(exp->exp_comp, dotp, 0, llength(dotp), FALSE)) { ; } diff --git a/input.c b/input.c index fc09a2e..7c71268 100644 --- a/input.c +++ b/input.c @@ -952,7 +952,7 @@ vl_regex2tbuff_best(TBUFF **result, regexp * exp) vl_get_offset = -1; vl_get_length = -1; while (given >= 0) { - if (lregexec(exp, DOT.l, given, llength(DOT.l))) { + if (lregexec(exp, DOT.l, given, llength(DOT.l), FALSE)) { offset = (C_NUM) (exp->startp[0] - line_text); length = (C_NUM) (exp->endp[0] - exp->startp[0]); if ((length > vl_get_length) @@ -982,7 +982,7 @@ vl_regex2tbuff_dot(TBUFF **result, regexp * exp) vl_get_offset = -1; vl_get_length = -1; while (given >= 0) { - if (lregexec(exp, DOT.l, given, llength(DOT.l))) { + if (lregexec(exp, DOT.l, given, llength(DOT.l), FALSE)) { offset = (C_NUM) (exp->startp[0] - line_text); length = (C_NUM) (exp->endp[0] - exp->startp[0]); if (offset <= DOT.o) { diff --git a/isearch.c b/isearch.c index ee2cbd0..14417d9 100644 --- a/isearch.c +++ b/isearch.c @@ -42,7 +42,8 @@ scanmore( /* search forward or back for a pattern */ FreeIfNeeded(gregexp); gregexp = regcomp(tb_values(patrn), tb_length(patrn), b_val(curbp, MDMAGIC)); if (gregexp != 0) { - ignorecase = window_b_val(curwp, MDIGNCASE); + int ic = ic = window_b_val(curwp, MDIGNCASE) && + !(window_b_val(curwp, MDSMARTCASE) && gregexp->uppercase); if (curwp != 0) { sts = scanner(gregexp, @@ -51,6 +52,7 @@ scanmore( /* search forward or back for a pattern */ : FORWARD), FALSE, (DOT.o == 0), + ic, (int *) 0); } if (!sts) { diff --git a/line.c b/line.c index ae4b517..273fc82 100644 --- a/line.c +++ b/line.c @@ -976,7 +976,7 @@ lrepl_regex(REGEXVAL * rexp, const char *np, int length) mayneedundo(); - if (lregexec(exp, DOT.l, DOT.o, llength(DOT.l))) { + if (lregexec(exp, DOT.l, DOT.o, llength(DOT.l), FALSE)) { int old = (int) (exp->endp[0] - exp->startp[0]); if (old > 0) { regionshape = rgn_EXACT; diff --git a/modes.c b/modes.c index 8f06569..4877a49 100644 --- a/modes.c +++ b/modes.c @@ -4692,7 +4692,6 @@ test_by_suffix(int n, BUFFER *bp) if (my_majormodes[n].flag) { regexp *exp; - int savecase = ignorecase; char *pathname = bp->b_fname; char *filename; char *suffix; @@ -4701,7 +4700,7 @@ test_by_suffix(int n, BUFFER *bp) TBUFF *stripname = 0; #endif - ignorecase = global_g_val(GMDFILENAME_IC) || get_sm_b_val(n, MDIGNCASE); + int ic = global_g_val(GMDFILENAME_IC) || get_sm_b_val(n, MDIGNCASE); #if OPT_VMS_PATH tb_scopy(&stripname, pathname); pathname = tb_values(stripname); @@ -4710,7 +4709,7 @@ test_by_suffix(int n, BUFFER *bp) if (((exp = get_sm_rexp(n, VAL_STRIPSUFFIX)) != 0 || (exp = b_val_rexp(bp, VAL_STRIPSUFFIX)->reg) != 0) - && nregexec(exp, pathname, (char *) 0, 0, -1)) { + && nregexec(exp, pathname, (char *) 0, 0, -1, ic)) { if (tb_scopy(&savename, pathname) != 0) { strcpy(tb_values(savename) + (exp->startp[0] - pathname), exp->endp[0]); @@ -4727,13 +4726,13 @@ test_by_suffix(int n, BUFFER *bp) #endif if ((exp = get_mm_rexp(n, MVAL_MODE_PATHNAME)) != 0 - && nregexec(exp, pathname, (char *) 0, 0, -1)) { + && nregexec(exp, pathname, (char *) 0, 0, -1, ic)) { TRACE(("test_by_pathname(%s) %s\n", pathname, my_majormodes[n].shortname)); result = n; } else if ((exp = get_mm_rexp(n, MVAL_MODE_FILENAME)) != 0 - && nregexec(exp, filename, (char *) 0, 0, -1)) { + && nregexec(exp, filename, (char *) 0, 0, -1, ic)) { TRACE(("test_by_filename(%s) %s %s\n", pathname, filename, @@ -4742,14 +4741,13 @@ test_by_suffix(int n, BUFFER *bp) } else if (!isShellOrPipe(pathname) && suffix != 0 && (exp = get_mm_rexp(n, MVAL_MODE_SUFFIXES)) != 0 - && nregexec(exp, suffix, (char *) 0, 0, -1)) { + && nregexec(exp, suffix, (char *) 0, 0, -1, ic)) { TRACE(("test_by_suffixes(%s) %s %s\n", pathname, suffix, my_majormodes[n].shortname)); result = n; } - ignorecase = savecase; tb_free(&savename); #if OPT_VMS_PATH tb_free(&stripname); @@ -4780,16 +4778,14 @@ test_by_preamble(int n, BUFFER *bp GCC_UNUSED, LINE *lp) if (lp != 0 && my_majormodes[n].flag) { regexp *exp = get_mm_rexp(n, MVAL_PREAMBLE); - int savecase = ignorecase; - ignorecase = global_g_val(GMDFILENAME_IC) || get_sm_b_val(n, MDIGNCASE); + int ic = global_g_val(GMDFILENAME_IC) || get_sm_b_val(n, MDIGNCASE); if (exp != 0 - && lregexec(exp, lp, 0, llength(lp))) { + && lregexec(exp, lp, 0, llength(lp), ic)) { TRACE(("test_by_preamble(%s) %s\n", bp->b_fname, my_majormodes[n].shortname)); result = n; } - ignorecase = savecase; } return result; } diff --git a/modetbl b/modetbl index a97e5ac..4084aed 100644 --- a/modetbl +++ b/modetbl @@ -121,6 +121,7 @@ bool # MD prefix "reuse-position" REUSE_POS 0 OPT_UPBUFF # reuse position when (re)creating scratch-buffers "ShowMatch" SHOWMAT 0 # show matching braces during insert "ShowMoDe" SHOWMODE chgd_status # show insert/replace/command mode + "SmartCaSe" SMARTCASE chgd_hilite # override "ignorecase" if search contains uppercase letters "Spaces-After-Sentence" SPACESENT 0 # add two spaces after a "TabInsert" TABINSERT 0 # okay to insert tab chars "TagignoreCase" TAGIGNORECASE 0 !SMALLER # ignore upper/lower case in tags diff --git a/oneliner.c b/oneliner.c index e629004..007f0c4 100644 --- a/oneliner.c +++ b/oneliner.c @@ -317,6 +317,7 @@ substline(regexp * exp, int nth_occur, int printit, int globally, int *confirmp) int s; int which_occur = 0; int at_bol = (DOT.o <= b_left_margin(curbp)); + int ic; int matched_at_eol = FALSE; int yes, c, skipped; @@ -332,18 +333,18 @@ substline(regexp * exp, int nth_occur, int printit, int globally, int *confirmp) returnCode(FALSE); } - ignorecase = window_b_val(curwp, MDIGNCASE); - if (curwp == 0) returnCode(FALSE); + ic = window_b_val(curwp, MDIGNCASE); + foundit = FALSE; scanboundpos.l = DOT.l; scanbound_is_header = FALSE; DOT.o = b_left_margin(curbp); do { scanboundpos.o = llength(DOT.l); - s = scanner(exp, FORWARD, FALSE, at_bol, (int *) 0); + s = scanner(exp, FORWARD, FALSE, at_bol, ic, (int *) 0); if (s != TRUE) break; diff --git a/proto.h b/proto.h index 91c95e9..81d81bd 100644 --- a/proto.h +++ b/proto.h @@ -1169,9 +1169,9 @@ extern void update_dos_drv_dir (char * cwd); /* regexp.c */ #define lregexec vl_lregexec -extern int cregexec (regexp *prog, LINE *lp, int startoff, int endoff, int at_bol); -extern int lregexec (regexp *prog, LINE *lp, int startoff, int endoff); -extern int nregexec (regexp *prog, char *string, char *stringend, int startoff, int endoff); +extern int cregexec (regexp *prog, LINE *lp, int startoff, int endoff, int at_bol, int ic); +extern int lregexec (regexp *prog, LINE *lp, int startoff, int endoff, int ic); +extern int nregexec (regexp *prog, char *string, char *stringend, int startoff, int endoff, int ic); /* region.c */ typedef int (*DORGNLINES)(int (*)(REGN_ARGS), void *, int); @@ -1210,7 +1210,7 @@ extern TBUFF * encode_attributes (LINE *lp, BUFFER *bp, REGION * top_region); extern int findpat (int f, int n, regexp *exp, int direc); extern int fsearch (int f, int n, int marking, int fromscreen); extern int readpattern (const char *prompt, TBUFF **apat, regexp **srchexpp, int c, int fromscreen); -extern int scanner (regexp *exp, int direct, int wrapok, int at_bol, int *wrappedp); +extern int scanner (regexp *exp, int direct, int wrapok, int at_bol, int ic, int *wrappedp); extern void attrib_matches (void); extern void scanboundry (int wrapok, MARK dot, int dir); diff --git a/regexp.c b/regexp.c index 03d0f6e..07101fa 100644 --- a/regexp.c +++ b/regexp.c @@ -187,19 +187,15 @@ typedef ULONG B_COUNT; /* byte-count */ #undef min #undef max -#if !OPT_VILE_CTYPE -int ignorecase = FALSE; -#endif - static char *reg(int paren, int *flagp); static char *regatom(int *flagp, int at_bop); static char *regbranch(int *flagp); static char *regnode(int op); static char *regpiece(int *flagp, int at_bop); static inline char *regnext(char *p); -static int regmatch(char *prog, int plevel); -static int regrepeat(const char *p); -static int regtry(regexp * prog, char *string, char *stringend, int plevel); +static int regmatch(char *prog, int plevel, int ic); +static int regrepeat(const char *p, int ic); +static int regtry(regexp * prog, char *string, char *stringend, int plevel, int ic); static void regc(int b); static void regninsert(int n, char *opnd); static void regopinsert(int op, char *opnd); @@ -501,7 +497,7 @@ typedef enum { #define RR_MIN(p) ((p)[3]) #define RR_MAX(p) ((p)[3 + RR_BYTES]) -#define SAME(a,b) (ignorecase ? nocase_eq(a,b) : (CharOf(a) == CharOf(b))) +#define SAME(a,b,ic) (ic ? nocase_eq(a,b) : (CharOf(a) == CharOf(b))) #define STRSKIP(s) ((s) + strlen(s)) /* @@ -689,27 +685,27 @@ use_system_ctype(UINT * target, const char *source) : (int) toUpper(ch)) /* - * Check if 'p' (from pattern) and 'q' (from actual data) are the "same". - * This is where ignorecase is evaluated. + * Check if 'p' (from pattern) and 'q' (from actual data) are the + * "same", ignoring case if 'ic' is true. */ static inline int -same_char(int p, int q) +same_char(int p, int q, int ic) { int rc; if (reg_utf8flag) { /* both parameters are Unicode */ - if (ignorecase) { + if (ic) { rc = (vl_toupper(p) == vl_toupper(q)); } else { rc = (p == q); } } else { - rc = SAME(p, q); + rc = SAME(p, q, ic); } return rc; } -#define EQ_CHARS(p,q) (reg_utf8flag ? same_char(p, q) : SAME(p,q)) +#define EQ_CHARS(p,q,ic) (reg_utf8flag ? same_char(p, q, ic) : SAME(p,q,ic)) /* * Evaluate a character-class expression, using vile's ctype (if possible), @@ -757,7 +753,7 @@ reg_CTYPE(XDIGIT, sys_isxdigit((sys_WINT_T) target)) #define WCHAR_AT(source, last) UCHAR_AT(source) #define WCHAR_BEFORE(source, first) UCHAR_AT(source[-1]) #define PUT_REGC(c) regc(c) -#define EQ_CHARS(p,q) SAME(p,q) +#define EQ_CHARS(p,q,ic) SAME(p,q,ic) #define set_utf8flag(bp) /* nothing */ #endif /* @@ -799,6 +795,7 @@ regmassage(const char *in_text, size_t in_size, char *out_text, size_t *out_size, + size_t *out_uppercase, int magic) { const char *metas = ((magic > 0) @@ -808,8 +805,11 @@ regmassage(const char *in_text, : NOMAGICMETA); char *nxt = out_text; size_t n; + size_t uc = 0; for (n = 0; n < in_size; ++n) { + /* count uppercase chars as a side-effect for smartcase */ + if (is_CLASS(UPPER, in_text + n)) uc++; if (in_text[n] == BACKSLASH) { /* remove \ from these metas */ if ((n + 1) >= in_size) { #ifdef FAIL_TRAILING_BS @@ -832,6 +832,7 @@ regmassage(const char *in_text, } *nxt = EOS; *out_size = (size_t) (nxt - out_text); + *out_uppercase = uc; return TRUE; } @@ -859,6 +860,7 @@ regcomp(const char *exp_text, size_t exp_len, int magic) char *longest; size_t len; size_t parsed_len; + size_t uppercase = 0; int flags; static char *exp; static size_t explen; @@ -890,7 +892,7 @@ regcomp(const char *exp_text, size_t exp_len, int magic) explen = 2 * len + 20; } - if (!regmassage(exp_text, exp_len, exp, &parsed_len, magic)) + if (!regmassage(exp_text, exp_len, exp, &parsed_len, &uppercase, magic)) return NULL; TRACE(("after regmassage: '%s'\n", visible_buff(exp, (int) strlen(exp), 0))); @@ -922,6 +924,7 @@ regcomp(const char *exp_text, size_t exp_len, int magic) /* how big is it? (vile addition) */ r->size = sizeof(regexp) + (size_t) regsize; + r->uppercase = uppercase; /* Second pass: emit code. */ REGTRACE(("Second pass: emit code\n")); @@ -1753,12 +1756,14 @@ regoptail(char *p, char *val) * * txt_b holds data from the regular expression. * txt_a holds the data within which we're searching. + * ic is true for when a use caseless match is required */ static int regstrncmp(const char *txt_a, const char *txt_b, size_t len_b, - const char *end_a) + const char *end_a, + int ic) { int chr_a = 0, chr_b = 0; int rc; @@ -1774,7 +1779,7 @@ regstrncmp(const char *txt_a, while (txt_a < end_a) { chr_a = WCHAR_AT(txt_a, end_a); chr_b = WCHAR_AT(txt_b, txt_b + len_b); - if (!EQ_CHARS(chr_a, chr_b)) { + if (!EQ_CHARS(chr_a, chr_b, ic)) { diff = TRUE; break; } @@ -1791,12 +1796,12 @@ regstrncmp(const char *txt_a, } static char * -regstrchr(char *s, int c, const char *e) +regstrchr(char *s, int c, const char *e, int ic) { char *result = 0; while (s < e) { - if (EQ_CHARS(WCHAR_AT(s, e), c)) { + if (EQ_CHARS(WCHAR_AT(s, e), c, ic)) { result = s; break; } @@ -1810,7 +1815,7 @@ regstrchr(char *s, int c, const char *e) * escaped. The 's' argument is always null-terminated. */ static int -RegStrChr2(const char *s, unsigned length, const char *cs) +RegStrChr2(const char *s, unsigned length, const char *cs, int ic) { int matched = 0; int compare = WCHAR_AT(cs, regnomore); @@ -1841,7 +1846,7 @@ RegStrChr2(const char *s, unsigned length, const char *cs) } } else { pattern = WCHAR_AT(s, reglimit); - matched = EQ_CHARS(pattern, compare); + matched = EQ_CHARS(pattern, compare, ic); } s += BYTES_AT(s, regnomore); } @@ -1860,7 +1865,8 @@ regexec2(regexp * prog, char *stringend, /* pointer to the null, if there were one */ int startoff, int endoff, - int at_bol) + int at_bol, + int ic) { char *s, *endsrch; int skip; @@ -1900,9 +1906,9 @@ regexec2(regexp * prog, char *prog_must = &(prog->program[prog->regmust]); int char_must = WCHAR_AT(prog_must, regnomore); s = &string[startoff]; - while ((s = regstrchr(s, char_must, stringend)) + while ((s = regstrchr(s, char_must, stringend, ic)) != NULL && s < endsrch) { - if (regstrncmp(s, prog_must, prog->regmlen, stringend) == 0) + if (regstrncmp(s, prog_must, prog->regmlen, stringend, ic) == 0) break; /* Found it. */ s += BYTES_AT(s, stringend); } @@ -1917,7 +1923,7 @@ regexec2(regexp * prog, /* Simplest case: anchored match need be tried only once. */ if (startoff == 0 && prog->reganch) { - return (regtry(prog, string, stringend, 0)); + return (regtry(prog, string, stringend, 0, ic)); } /* Messy cases: unanchored match. */ @@ -1926,9 +1932,9 @@ regexec2(regexp * prog, /* We know what char it must start with. */ skip = 1; while (skip > 0 && - (s = regstrchr(s, prog->regstart, stringend)) != NULL && + (s = regstrchr(s, prog->regstart, stringend, ic)) != NULL && s < endsrch) { - if (regtry(prog, s, stringend, 0)) + if (regtry(prog, s, stringend, 0, ic)) return (1); skip = BYTES_AT(s, stringend); s += skip; @@ -1936,7 +1942,7 @@ regexec2(regexp * prog, } else { /* We don't -- general case. */ do { - if (regtry(prog, s, stringend, 0)) + if (regtry(prog, s, stringend, 0, ic)) return (1); skip = ((s < stringend) ? BYTES_AT(s, stringend) @@ -1953,9 +1959,10 @@ regexec(regexp * prog, char *string, char *stringend, /* pointer to the null, if there were one */ int startoff, - int endoff) + int endoff, + int ic) { - return regexec2(prog, string, stringend, startoff, endoff, TRUE); + return regexec2(prog, string, stringend, startoff, endoff, TRUE, ic); } /* @@ -1965,7 +1972,8 @@ static int /* 0 failure, 1 success */ regtry(regexp * prog, char *string, char *stringend, - int plevel) + int plevel, + int ic) { int i; char **sp; @@ -1986,7 +1994,7 @@ regtry(regexp * prog, *sp++ = NULL; *ep++ = NULL; } - if (regmatch(prog->program + 1, plevel)) { + if (regmatch(prog->program + 1, plevel, ic)) { prog->startp[0] = string; prog->endp[0] = reginput; prog->mlen = (size_t) (reginput - string); @@ -2076,7 +2084,7 @@ regtry(regexp * prog, * by recursion. */ static int /* 0 failure, 1 success */ -regmatch(char *prog, int plevel) +regmatch(char *prog, int plevel, int ic) { char *scan; /* Current node. */ char *next; /* Next node. */ @@ -2166,7 +2174,7 @@ regmatch(char *prog, int plevel) opnd = OPERAND(scan); /* Inline the first character, for speed. */ if (!EQ_CHARS(WCHAR_AT(opnd, regnext(scan)), - WCHAR_AT(reginput, regnomore))) { + WCHAR_AT(reginput, regnomore), ic)) { returnReg(0); } len = OPSIZE(scan); @@ -2174,20 +2182,20 @@ regmatch(char *prog, int plevel) && regstrncmp(reginput, opnd, (size_t) len, - regnomore) != 0) + regnomore, ic) != 0) returnReg(0); reginput += len; } break; case ANYOF: if (reginput >= regnomore - || RegStrChr2(OPERAND(scan), OPSIZE(scan), reginput) == 0) + || RegStrChr2(OPERAND(scan), OPSIZE(scan), reginput, ic) == 0) returnReg(0); reginput += BYTES_AT(reginput, regnomore); break; case ANYBUT: if (reginput >= regnomore - || RegStrChr2(OPERAND(scan), OPSIZE(scan), reginput) != 0) + || RegStrChr2(OPERAND(scan), OPSIZE(scan), reginput, ic) != 0) returnReg(0); reginput += BYTES_AT(reginput, regnomore); break; @@ -2213,7 +2221,7 @@ regmatch(char *prog, int plevel) regstartp[no] = save_input; REGTRACE(("match atom%d:\n", no)); } - if (regmatch(next, plevel + 1)) { + if (regmatch(next, plevel + 1, ic)) { returnReg(1); } else { restore_state1(); @@ -2232,7 +2240,7 @@ regmatch(char *prog, int plevel) reg_cnts[plevel] += 1; if ((plevel + 1) < NSUBEXP) reg_cnts[plevel + 1] = 0; - if (regmatch(next, plevel - 1)) { + if (regmatch(next, plevel - 1, ic)) { /* * Don't set endp if some earlier * invocation of the same parentheses @@ -2260,7 +2268,7 @@ regmatch(char *prog, int plevel) save_state2(); do { - if (regmatch(OPERAND(scan), plevel)) { + if (regmatch(OPERAND(scan), plevel, ic)) { update_greedy(); } restore_state2(); @@ -2289,7 +2297,7 @@ regmatch(char *prog, int plevel) if ((max == 0 || reg_cnts[plevel + 1] < max)) { - (void) regmatch(next, plevel); + (void) regmatch(next, plevel, ic); } REGTRACE(("compare %d vs \\{%d,%d\\}\n", @@ -2303,9 +2311,9 @@ regmatch(char *prog, int plevel) } else if (is_CLOSEn(OP(next))) { if ((plevel + 1) < NSUBEXP) reg_cnts[plevel + 1] = 0; - success = regmatch(next, plevel); + success = regmatch(next, plevel, ic); } else { - success = regmatch(next, plevel); + success = regmatch(next, plevel, ic); } if (first) { firstok = success; @@ -2373,7 +2381,7 @@ regmatch(char *prog, int plevel) } save_state(); - no = regrepeat(rpt); + no = regrepeat(rpt, ic); if (max > 0 && no > max) { no = max; @@ -2408,8 +2416,8 @@ regmatch(char *prog, int plevel) /* If it could work, try it. */ if ((nxtch == -1 || reginput >= regnomore - || EQ_CHARS(WCHAR_AT(reginput, regnomore), nxtch))) { - if (regmatch(next, plevel)) { + || EQ_CHARS(WCHAR_AT(reginput, regnomore), nxtch, ic))) { + if (regmatch(next, plevel, ic)) { #if OPT_MULTIBYTE if (rpts && (rpts != repeats)) free(rpts); @@ -2461,7 +2469,7 @@ regmatch(char *prog, int plevel) - regrepeat - repeatedly match something simple, report how many */ static int -regrepeat(const char *p) +regrepeat(const char *p, int ic) { int count = 0; char *scan = reginput; @@ -2476,20 +2484,20 @@ regrepeat(const char *p) break; case EXACTLY: while (scan < regnomore) { - if (!EQ_CHARS(data, WCHAR_AT(scan, regnomore))) + if (!EQ_CHARS(data, WCHAR_AT(scan, regnomore), ic)) break; count++; scan += BYTES_AT(scan, regnomore); } break; case ANYOF: - while (scan < regnomore && RegStrChr2(opnd, size, scan) != 0) { + while (scan < regnomore && RegStrChr2(opnd, size, scan, ic) != 0) { count++; scan += BYTES_AT(scan, regnomore); } break; case ANYBUT: - while (scan < regnomore && RegStrChr2(opnd, size, scan) == 0) { + while (scan < regnomore && RegStrChr2(opnd, size, scan, ic) == 0) { count++; scan += BYTES_AT(scan, regnomore); } @@ -2698,7 +2706,8 @@ cregexec(regexp * prog, LINE *lp, int startoff, int endoff, - int at_bol) + int at_bol, + int ic) { int s = 0; @@ -2711,7 +2720,7 @@ cregexec(regexp * prog, if (endoff >= startoff) { if (lvalue(lp)) { s = regexec2(prog, lvalue(lp), &(lvalue(lp)[llength(lp)]), - startoff, endoff, at_bol); + startoff, endoff, at_bol, ic); } else { /* the prog might be ^$, or something legal on a null string */ @@ -2720,7 +2729,7 @@ cregexec(regexp * prog, if (startoff > 0) { s = 0; } else { - s = regexec2(prog, nullstr, nullstr, 0, 0, at_bol); + s = regexec2(prog, nullstr, nullstr, 0, 0, at_bol, ic); } if (s) { if (prog->mlen > 0) { @@ -2742,9 +2751,10 @@ int lregexec(regexp * prog, LINE *lp, int startoff, - int endoff) + int endoff, + int ic) { - return cregexec(prog, lp, startoff, endoff, (startoff == 0)); + return cregexec(prog, lp, startoff, endoff, (startoff == 0), ic); } /* non-LINE regexec calls for vile */ @@ -2753,13 +2763,14 @@ nregexec(regexp * prog, char *string, char *stringend, /* pointer to the null, if there were one */ int startoff, - int endoff) + int endoff, + int ic) { int s; REGTRACE((T_CALLED "nregexec %d..%d\n", startoff, endoff)); set_utf8flag(0); - s = regexec(prog, string, stringend, startoff, endoff); + s = regexec(prog, string, stringend, startoff, endoff, ic); returnReg(s); } #endif /* VILE LINE */ diff --git a/search.c b/search.c index 7ac25f1..981c319 100644 --- a/search.c +++ b/search.c @@ -67,6 +67,7 @@ fsearch(int f, int n, int marking, int fromscreen) MARK curpos; int didmark = FALSE; int didwrap; + int ic; assert(curwp != 0); @@ -92,17 +93,18 @@ fsearch(int f, int n, int marking, int fromscreen) return status; } - ignorecase = window_b_val(curwp, MDIGNCASE); - if (curwp == 0) return FALSE; curpos = DOT; scanboundry(wrapok, curpos, FORWARD); didwrap = FALSE; + ic = window_b_val(curwp, MDIGNCASE) && + !(window_b_val(curwp, MDSMARTCASE) && gregexp->uppercase); + while (marking || n--) { movenext(&(DOT), FORWARD); - status = scanner(gregexp, FORWARD, wrapok, TRUE, &didwrap); + status = scanner(gregexp, FORWARD, wrapok, TRUE, ic, &didwrap); if (status == ABORT) { mlwarn("[Aborted]"); DOT = curpos; @@ -162,6 +164,7 @@ forwhunt(int f, int n) int wrapok; MARK curpos; int didwrap; + int ic; assert(curwp != 0); @@ -179,8 +182,6 @@ forwhunt(int f, int n) return FALSE; } - ignorecase = window_b_val(curwp, MDIGNCASE); - if (curwp == 0) return FALSE; @@ -189,9 +190,12 @@ forwhunt(int f, int n) curpos = DOT; scanboundry(wrapok, DOT, FORWARD); didwrap = FALSE; + ic = window_b_val(curwp, MDIGNCASE) && + !(window_b_val(curwp, MDSMARTCASE) && gregexp->uppercase); + while (n--) { movenext(&(DOT), FORWARD); - status = scanner(gregexp, FORWARD, wrapok, TRUE, &didwrap); + status = scanner(gregexp, FORWARD, wrapok, TRUE, ic, &didwrap); if (didwrap) { mlwrite("[Search wrapped past end of buffer]"); didwrap = FALSE; @@ -242,6 +246,7 @@ rsearch(int f, int n, int dummy GCC_UNUSED, int fromscreen) int wrapok; MARK curpos; int didwrap; + int ic; assert(curwp != 0); @@ -263,17 +268,18 @@ rsearch(int f, int n, int dummy GCC_UNUSED, int fromscreen) if (status != TRUE) return status; - ignorecase = window_b_val(curwp, MDIGNCASE); - if (curwp == 0) return FALSE; curpos = DOT; scanboundry(wrapok, DOT, REVERSE); didwrap = FALSE; + ic = window_b_val(curwp, MDIGNCASE) && + !(window_b_val(curwp, MDSMARTCASE) && gregexp->uppercase); + while (n--) { movenext(&(DOT), REVERSE); - status = scanner(gregexp, REVERSE, wrapok, TRUE, &didwrap); + status = scanner(gregexp, REVERSE, wrapok, TRUE, ic, &didwrap); if (didwrap) { mlwrite( "[Search wrapped past start of buffer]"); @@ -310,6 +316,7 @@ backhunt(int f, int n) int wrapok; MARK curpos; int didwrap; + int ic; assert(curwp != 0); @@ -327,8 +334,6 @@ backhunt(int f, int n) return FALSE; } - ignorecase = window_b_val(curwp, MDIGNCASE); - if (curwp == 0) return FALSE; @@ -337,9 +342,12 @@ backhunt(int f, int n) curpos = DOT; scanboundry(wrapok, DOT, REVERSE); didwrap = FALSE; + ic = window_b_val(curwp, MDIGNCASE) && + !(window_b_val(curwp, MDSMARTCASE) && gregexp->uppercase); + while (n--) { movenext(&(DOT), REVERSE); - status = scanner(gregexp, REVERSE, wrapok, TRUE, &didwrap); + status = scanner(gregexp, REVERSE, wrapok, TRUE, ic, &didwrap); if (didwrap) { mlwrite("[Search wrapped past start of buffer]"); didwrap = FALSE; @@ -387,14 +395,14 @@ revsearch(int f, int n) } static int -testit(LINE *lp, regexp * exp, int *end, int srchlim) +testit(LINE *lp, regexp * exp, int *end, int srchlim, int ic) { char *txt = lvalue(lp); C_NUM col = (C_NUM) (exp->startp[0] - txt) + 1; if (col > llength(lp)) col = llength(lp); - if (lregexec(exp, lp, col, srchlim)) { + if (lregexec(exp, lp, col, srchlim, ic)) { col = (C_NUM) (exp->startp[0] - txt) + 1; if (col > llength(lp) && !*end) { col = llength(lp); @@ -416,6 +424,7 @@ scanner( int direct, /* up or down */ int wrapok, /* ok to wrap around end of buffer? */ int at_bol, /* ok to match "^" ? */ + int ic, /* ignore case */ int *wrappedp) /* in/out: tells if we wrapped around */ { MARK curpos; @@ -474,7 +483,7 @@ scanner( srchlim = scanboundpos.o + 1; } } - found = cregexec(exp, curpos.l, startoff, srchlim, at_bol); + found = cregexec(exp, curpos.l, startoff, srchlim, at_bol, ic); } } else { if (direct == FORWARD) { @@ -486,7 +495,7 @@ scanner( if (srchlim > llength(curpos.l)) srchlim = llength(curpos.l); } - found = cregexec(exp, curpos.l, startoff, srchlim, at_bol); + found = cregexec(exp, curpos.l, startoff, srchlim, at_bol, ic); } if (found) { char *txt = lvalue(curpos.l); @@ -499,7 +508,7 @@ scanner( char *tst = 0; last++; - while (testit(curpos.l, exp, &end, srchlim)) { + while (testit(curpos.l, exp, &end, srchlim, ic)) { got = exp->startp[0]; /* guard against infinite loop: "?$" * or "?.*" @@ -510,7 +519,7 @@ scanner( } if (end) last++; - if (!cregexec(exp, curpos.l, (int) (got - txt), srchlim, at_bol)) { + if (!cregexec(exp, curpos.l, (int) (got - txt), srchlim, at_bol, ic)) { mlforce("BUG: prev. match no good"); returnCode(FALSE); } @@ -583,7 +592,7 @@ clobber_save_curbp(BUFFER *bp) /* keep track of enough state to give us a hint as to whether we need to redo the visual matches */ static int -need_to_rehilite(void) +need_to_rehilite(int ic) { /* save static copies of state that affects the search */ @@ -595,12 +604,12 @@ need_to_rehilite(void) memcmp(tb_values(searchpat), tb_values(savepat), tb_length(savepat))) || - save_igncase != ignorecase || + save_igncase != ic || save_vattr != b_val(curbp, VAL_HILITEMATCH) || save_magic != b_val(curbp, MDMAGIC) || (!hilite_suppressed && save_curbp != curbp)) { tb_copy(&savepat, searchpat); - save_igncase = ignorecase; + save_igncase = ic; save_vattr = (VIDEO_ATTR) b_val(curbp, VAL_HILITEMATCH); save_magic = b_val(curbp, MDMAGIC); save_curbp = curbp; @@ -650,15 +659,17 @@ attrib_matches(void) int status; REGIONSHAPE oregionshape = regionshape; VIDEO_ATTR vattr; + int ic; assert(curwp != 0); - ignorecase = window_b_val(curwp, MDIGNCASE); - - if (!need_to_rehilite()) + if (tb_length(searchpat) == 0 || gregexp == NULL) return; - if (tb_length(searchpat) == 0 || gregexp == NULL) + ic = window_b_val(curwp, MDIGNCASE) && + !(window_b_val(curwp, MDSMARTCASE) && gregexp->uppercase); + + if (!need_to_rehilite(ic)) return; /* #define track_hilite 1 */ @@ -688,7 +699,7 @@ attrib_matches(void) movenext(&nextdot, FORWARD); DOT = nextdot; } - status = scanner(gregexp, FORWARD, FALSE, TRUE, (int *) 0); + status = scanner(gregexp, FORWARD, FALSE, TRUE, ic, (int *) 0); if (status != TRUE) break; if (vattr != VACOLOR) @@ -893,7 +904,7 @@ findpat(int f, int n, regexp * exp, int direc) ? forwchar(TRUE, 1) : backchar(TRUE, 1)); if (s == TRUE) - s = scanner(exp, direc, FALSE, TRUE, (int *) 0); + s = scanner(exp, direc, FALSE, TRUE, FALSE, (int *) 0); } if (s != TRUE) DOT = savepos; diff --git a/tags.c b/tags.c index 40d962c..b5d70bd 100644 --- a/tags.c +++ b/tags.c @@ -604,12 +604,11 @@ cheap_buffer_scan(BUFFER *bp, char *patrn, int dir) LINE *lp; LINE *result = NULL; regexp *exp; + int ic = FALSE; if ((exp = regcomp(patrn, strlen(patrn), FALSE)) != NULL) { #ifdef MDTAGIGNORECASE - int savecase = ignorecase; - if (b_val(bp, MDTAGIGNORECASE)) - ignorecase = TRUE; + ic = b_val(bp, MDTAGIGNORECASE); #endif TRACE(("cheap_buffer_scan '%s' %s\n", @@ -619,7 +618,7 @@ cheap_buffer_scan(BUFFER *bp, char *patrn, int dir) for (lp = dir == FORWARD ? lforw(buf_head(bp)) : lback(buf_head(bp)); lp != buf_head(bp); lp = dir == FORWARD ? lforw(lp) : lback(lp)) { - if (lregexec(exp, lp, 0, llength(lp))) { + if (lregexec(exp, lp, 0, llength(lp), ic)) { result = lp; break; } @@ -628,10 +627,6 @@ cheap_buffer_scan(BUFFER *bp, char *patrn, int dir) beginDisplay(); free(TYPECAST(char, exp)); endofDisplay(); - -#ifdef MDTAGIGNORECASE - ignorecase = savecase; -#endif } return (result); } diff --git a/vl_regex.h b/vl_regex.h index 665b093..3ba639d 100644 --- a/vl_regex.h +++ b/vl_regex.h @@ -53,6 +53,7 @@ typedef struct regexp { int regmust; /* Internal use only. */ size_t regmlen; /* Internal use only. */ size_t size; /* vile addition -- how big is this */ + size_t uppercase; /* vile addition -- uppercase chars in pattern */ char program[1]; /* Unwarranted chumminess with compiler. */ } regexp; @@ -84,14 +85,10 @@ extern void mlforce(const char *fmt,...) GCC_PRINTFLIKE(1,2); /* *INDENT-OFF* */ extern void regerror (const char *s); extern regexp * regcomp (const char *origexp, size_t exp_len, int magic); -extern int regexec (regexp *prog, char *string, char *stringend, int startoff, int endoff); -extern int regexec2 (regexp *prog, char *string, char *stringend, int startoff, int endoff, int at_bol); +extern int regexec (regexp *prog, char *string, char *stringend, int startoff, int endoff, int ic); +extern int regexec2 (regexp *prog, char *string, char *stringend, int startoff, int endoff, int at_bol, int ic); extern void regfree (regexp *prog); extern char *regparser (const char **s); /* *INDENT-ON* */ -#if !OPT_VILE_CTYPE -extern int ignorecase; -#endif - #endif /* VL_REGEX_H_incl */ diff --git a/word.c b/word.c index ff9307f..6d35140 100644 --- a/word.c +++ b/word.c @@ -392,8 +392,8 @@ dot_at_section_break(void) regexp *expP = b_val_rexp(curbp, VAL_PARAGRAPHS)->reg; regexp *expC = b_val_rexp(curbp, VAL_COMMENTS)->reg; - return (lregexec(expP, DOT.l, 0, llength(DOT.l)) || - lregexec(expC, DOT.l, 0, llength(DOT.l))); + return (lregexec(expP, DOT.l, 0, llength(DOT.l), FALSE) || + lregexec(expC, DOT.l, 0, llength(DOT.l), FALSE)); } /* returns the length of the comment-prefix, if it matches, otherwise -1 */ @@ -402,7 +402,7 @@ comment_prefix(void) { regexp *expP = b_val_rexp(curbp, VAL_CMT_PREFIX)->reg; int result = -1; - if (lregexec(expP, DOT.l, 0, llength(DOT.l))) { + if (lregexec(expP, DOT.l, 0, llength(DOT.l), FALSE)) { result = (int) (expP->endp[0] - lvalue(DOT.l)); } return result;