diff --git a/lib/regcomp.c b/lib/regcomp.c index 4cbb1b2..3b5c68d 100644 --- a/lib/regcomp.c +++ b/lib/regcomp.c @@ -129,7 +129,7 @@ static reg_errcode_t mark_opt_subexp (void *extra, bin_tree_t *node); POSIX doesn't require that we do anything for REG_NOERROR, but why not be nice? */ -static const char __re_error_msgid[] = +static const char __re_error_msgid[] attribute_hidden = { #define REG_NOERROR_IDX 0 gettext_noop ("Success") /* REG_NOERROR */ @@ -153,9 +153,9 @@ static const char __re_error_msgid[] = gettext_noop ("Invalid back reference") /* REG_ESUBREG */ "\0" #define REG_EBRACK_IDX (REG_ESUBREG_IDX + sizeof "Invalid back reference") - gettext_noop ("Unmatched [ or [^") /* REG_EBRACK */ + gettext_noop ("Unmatched [, [^, [:, [., or [=") /* REG_EBRACK */ "\0" -#define REG_EPAREN_IDX (REG_EBRACK_IDX + sizeof "Unmatched [ or [^") +#define REG_EPAREN_IDX (REG_EBRACK_IDX + sizeof "Unmatched [, [^, [:, [., or [=") gettext_noop ("Unmatched ( or \\(") /* REG_EPAREN */ "\0" #define REG_EBRACE_IDX (REG_EPAREN_IDX + sizeof "Unmatched ( or \\(") @@ -183,7 +183,7 @@ static const char __re_error_msgid[] = gettext_noop ("Unmatched ) or \\)") /* REG_ERPAREN */ }; -static const size_t __re_error_msgid_idx[] = +static const size_t __re_error_msgid_idx[] attribute_hidden = { REG_NOERROR_IDX, REG_NOMATCH_IDX, @@ -277,7 +277,7 @@ int re_compile_fastmap (bufp) struct re_pattern_buffer *bufp; { - re_dfa_t *dfa = bufp->buffer; + re_dfa_t *dfa = (re_dfa_t *) bufp->buffer; char *fastmap = bufp->fastmap; memset (fastmap, '\0', sizeof (char) * SBC_MAX); @@ -311,7 +311,7 @@ static void re_compile_fastmap_iter (regex_t *bufp, const re_dfastate_t *init_state, char *fastmap) { - re_dfa_t *dfa = bufp->buffer; + re_dfa_t *dfa = (re_dfa_t *) bufp->buffer; Idx node_cnt; bool icase = (dfa->mb_cur_max == 1 && (bufp->syntax & RE_ICASE)); for (node_cnt = 0; node_cnt < init_state->nodes.nelem; ++node_cnt) @@ -665,7 +665,7 @@ void regfree (preg) regex_t *preg; { - re_dfa_t *dfa = preg->buffer; + re_dfa_t *dfa = (re_dfa_t *) preg->buffer; if (BE (dfa != NULL, 1)) { lock_fini (dfa->lock); @@ -775,7 +775,7 @@ re_compile_internal (regex_t *preg, const char * pattern, size_t length, preg->regs_allocated = REGS_UNALLOCATED; /* Initialize the dfa. */ - dfa = preg->buffer; + dfa = (re_dfa_t *) preg->buffer; if (BE (preg->allocated < sizeof (re_dfa_t), 0)) { /* If zero allocated, but buffer is non-null, try to realloc @@ -786,7 +786,7 @@ re_compile_internal (regex_t *preg, const char * pattern, size_t length, if (dfa == NULL) return REG_ESPACE; preg->allocated = sizeof (re_dfa_t); - preg->buffer = dfa; + preg->buffer = (unsigned char *) dfa; } preg->used = sizeof (re_dfa_t); @@ -1009,7 +1009,7 @@ init_word_char (re_dfa_t *dfa) static void free_workarea_compile (regex_t *preg) { - re_dfa_t *dfa = preg->buffer; + re_dfa_t *dfa = (re_dfa_t *) preg->buffer; bin_tree_storage_t *storage, *next; for (storage = dfa->str_tree_storage; storage; storage = next) { @@ -1193,7 +1193,7 @@ optimize_utf8 (re_dfa_t *dfa) static reg_errcode_t analyze (regex_t *preg) { - re_dfa_t *dfa = preg->buffer; + re_dfa_t *dfa = (re_dfa_t *) preg->buffer; reg_errcode_t ret; /* Allocate arrays. */ @@ -1205,7 +1205,12 @@ analyze (regex_t *preg) || dfa->eclosures == NULL, 0)) return REG_ESPACE; - dfa->subexp_map = re_malloc (Idx, preg->re_nsub); + /* some malloc()-checkers don't like zero allocations */ + if (preg->re_nsub > 0) + dfa->subexp_map = re_malloc (int, preg->re_nsub); + else + dfa->subexp_map = NULL; + if (dfa->subexp_map != NULL) { Idx i; @@ -1374,7 +1379,7 @@ lower_subexps (void *extra, bin_tree_t *node) static bin_tree_t * lower_subexp (reg_errcode_t *err, regex_t *preg, bin_tree_t *node) { - re_dfa_t *dfa = preg->buffer; + re_dfa_t *dfa = (re_dfa_t *) preg->buffer; bin_tree_t *body = node->left; bin_tree_t *op, *cls, *tree1, *tree; @@ -2155,7 +2160,7 @@ static bin_tree_t * parse (re_string_t *regexp, regex_t *preg, reg_syntax_t syntax, reg_errcode_t *err) { - re_dfa_t *dfa = preg->buffer; + re_dfa_t *dfa = (re_dfa_t *) preg->buffer; bin_tree_t *tree, *eor, *root; re_token_t current_token; dfa->syntax = syntax; @@ -2189,7 +2194,7 @@ static bin_tree_t * parse_reg_exp (re_string_t *regexp, regex_t *preg, re_token_t *token, reg_syntax_t syntax, Idx nest, reg_errcode_t *err) { - re_dfa_t *dfa = preg->buffer; + re_dfa_t *dfa = (re_dfa_t *) preg->buffer; bin_tree_t *tree, *branch = NULL; bitset_word_t initial_bkref_map = dfa->completed_bkref_map; tree = parse_branch (regexp, preg, token, syntax, nest, err); @@ -2239,7 +2244,7 @@ parse_branch (re_string_t *regexp, regex_t *preg, re_token_t *token, reg_syntax_t syntax, Idx nest, reg_errcode_t *err) { bin_tree_t *tree, *expr; - re_dfa_t *dfa = preg->buffer; + re_dfa_t *dfa = (re_dfa_t *) preg->buffer; tree = parse_expression (regexp, preg, token, syntax, nest, err); if (BE (*err != REG_NOERROR && tree == NULL, 0)) return NULL; @@ -2283,7 +2288,7 @@ static bin_tree_t * parse_expression (re_string_t *regexp, regex_t *preg, re_token_t *token, reg_syntax_t syntax, Idx nest, reg_errcode_t *err) { - re_dfa_t *dfa = preg->buffer; + re_dfa_t *dfa = (re_dfa_t *) preg->buffer; bin_tree_t *tree; switch (token->type) { @@ -2507,7 +2512,7 @@ static bin_tree_t * parse_sub_exp (re_string_t *regexp, regex_t *preg, re_token_t *token, reg_syntax_t syntax, Idx nest, reg_errcode_t *err) { - re_dfa_t *dfa = preg->buffer; + re_dfa_t *dfa = (re_dfa_t *) preg->buffer; bin_tree_t *tree; size_t cur_nsub; cur_nsub = preg->re_nsub++; @@ -2746,10 +2751,15 @@ build_range_exp (const reg_syntax_t syntax, end_ch = ((end_elem->type == SB_CHAR) ? end_elem->opr.ch : ((end_elem->type == COLL_SYM) ? end_elem->opr.name[0] : 0)); + /* + * Fedora Core 2, maybe others, have broken `btowc' that returns -1 + * for any value > 127. Sigh. Note that `start_ch' and `end_ch' are + * unsigned, so we don't have sign extension problems. + */ start_wc = ((start_elem->type == SB_CHAR || start_elem->type == COLL_SYM) - ? __btowc (start_ch) : start_elem->opr.wch); + ? start_ch : start_elem->opr.wch); end_wc = ((end_elem->type == SB_CHAR || end_elem->type == COLL_SYM) - ? __btowc (end_ch) : end_elem->opr.wch); + ? end_ch : end_elem->opr.wch); if (start_wc == WEOF || end_wc == WEOF) return REG_ECOLLATE; else if (BE ((syntax & RE_NO_EMPTY_RANGES) && start_wc > end_wc, 0)) @@ -2779,7 +2789,14 @@ build_range_exp (const reg_syntax_t syntax, new_nranges); if (BE (new_array_start == NULL || new_array_end == NULL, 0)) - return REG_ESPACE; + { + /* if one is not NULL, free it to avoid leaks */ + if (new_array_start != NULL) + re_free(new_array_start); + if (new_array_end != NULL) + re_free(new_array_end); + return REG_ESPACE; + } mbcset->range_starts = new_array_start; mbcset->range_ends = new_array_end; @@ -3517,6 +3534,7 @@ build_equiv_class (bitset_t sbcset, const unsigned char *name) return REG_ECOLLATE; /* Build single byte matching table for this equivalence class. */ + char_buf[1] = (unsigned char) '\0'; len = weights[idx1 & 0xffffff]; for (ch = 0; ch < SBC_MAX; ++ch) { @@ -3686,6 +3704,13 @@ build_charclass_op (re_dfa_t *dfa, RE_TRANSLATE_TYPE trans, if (BE (sbcset == NULL, 0)) #endif /* not RE_ENABLE_I18N */ { + /* if one is not NULL, free it to avoid leaks */ + if (sbcset != NULL) + free(sbcset); +#ifdef RE_ENABLE_I18N + if (mbcset != NULL) + free(mbcset); +#endif *err = REG_ESPACE; return NULL; } @@ -3728,6 +3753,7 @@ build_charclass_op (re_dfa_t *dfa, RE_TRANSLATE_TYPE trans, #endif /* Build a tree for simple bracket. */ + memset(& br_token, 0, sizeof(br_token)); /* silence "not initialized" errors froms static checkers */ br_token.type = SIMPLE_BRACKET; br_token.opr.sbcset = sbcset; tree = create_token_tree (dfa, NULL, NULL, &br_token); @@ -3822,6 +3848,7 @@ create_tree (re_dfa_t *dfa, bin_tree_t *left, bin_tree_t *right, re_token_type_t type) { re_token_t t; + memset(& t, 0, sizeof(t)); /* silence "not initialized" errors froms static checkers */ t.type = type; return create_token_tree (dfa, left, right, &t); } diff --git a/lib/regex_internal.c b/lib/regex_internal.c index 93d7ee9..c299759 100644 --- a/lib/regex_internal.c +++ b/lib/regex_internal.c @@ -512,11 +512,11 @@ re_string_skip_chars (re_string_t *pstr, Idx new_raw_idx, wint_t *last_wc) pstr->cur_state = prev_st; } else - wc = wc2; + wc = (wint_t) wc2; /* Then proceed the next character. */ rawbuf_idx += mbclen; } - *last_wc = wc; + *last_wc = (wint_t) wc; return rawbuf_idx; } #endif /* RE_ENABLE_I18N */ @@ -536,7 +536,10 @@ build_upper_buffer (re_string_t *pstr) int ch = pstr->raw_mbs[pstr->raw_mbs_idx + char_idx]; if (BE (pstr->trans != NULL, 0)) ch = pstr->trans[ch]; - pstr->mbs[char_idx] = toupper (ch); + if (islower (ch)) + pstr->mbs[char_idx] = toupper (ch); + else + pstr->mbs[char_idx] = ch; } pstr->valid_len = char_idx; pstr->valid_raw_len = char_idx; @@ -966,6 +969,16 @@ static reg_errcode_t internal_function __attribute_warn_unused_result__ re_node_set_alloc (re_node_set *set, Idx size) { + /* + * ADR: valgrind says size can be 0, which then doesn't + * free the block of size 0. Harumph. This seems + * to work ok, though. + */ + if (size == 0) + { + memset(set, 0, sizeof(*set)); + return REG_NOERROR; + } set->alloc = size; set->nelem = 0; set->elems = re_malloc (Idx, size); @@ -1428,7 +1441,18 @@ re_dfa_add_node (re_dfa_t *dfa, re_token_t token) new_eclosures = re_realloc (dfa->eclosures, re_node_set, new_nodes_alloc); if (BE (new_nexts == NULL || new_indices == NULL || new_edests == NULL || new_eclosures == NULL, 0)) - return REG_MISSING; + { + /* if any are not NULL, free them, avoid leaks */ + if (new_nexts != NULL) + re_free(new_nexts); + if (new_indices != NULL) + re_free(new_indices); + if (new_edests != NULL) + re_free(new_edests); + if (new_eclosures != NULL) + re_free(new_eclosures); + return REG_MISSING; + } dfa->nexts = new_nexts; dfa->org_indices = new_indices; dfa->edests = new_edests; diff --git a/lib/regex_internal.h b/lib/regex_internal.h index 0307a34..c634a00 100644 --- a/lib/regex_internal.h +++ b/lib/regex_internal.h @@ -117,6 +117,10 @@ # define BE(expr, val) __builtin_expect (expr, val) #else # define BE(expr, val) (expr) +# ifdef inline +# undef inline +# endif +# define inline #endif /* Number of ASCII characters. */