diff -u -d -x '*.[oea]*' -x '[.M]*' diffutils-3.0/src/diff.c diffutils-3.0.ZZZ/src/diff.c --- diffutils-3.0/src/diff.c 2010-04-15 20:53:08.000000000 +0100 +++ diffutils-3.0.ZZZ/src/diff.c 2010-09-27 11:14:46.906250000 +0100 @@ -64,6 +64,8 @@ }; static int compare_files (struct comparison const *, char const *, char const *); +static void add_ignore_string (struct ignore_strings *, char *); +static void compile_ignore_string (struct ignore_strings *); static void add_regexp (struct regexp_list *, char const *); static void summarize_regexp_list (struct regexp_list *); static void specify_style (enum output_style); @@ -106,7 +108,7 @@ static bool report_identical_files; static char const shortopts[] = -"0123456789abBcC:dD:eEfF:hHiI:lL:nNpPqrsS:tTuU:vwW:x:X:y"; +"0123456789abBcC:dD:eEfF:hHiI:lL:nNpPqrsS:tTuU:vwW:x:X:yz:"; /* Values for long options that do not have single-letter equivalents. */ enum @@ -174,6 +176,7 @@ {"ignore-blank-lines", 0, 0, 'B'}, {"ignore-case", 0, 0, 'i'}, {"ignore-file-name-case", 0, 0, IGNORE_FILE_NAME_CASE_OPTION}, + {"ignore-equivalent-strings", 1, 0, 'z'}, {"ignore-matching-lines", 1, 0, 'I'}, {"ignore-space-change", 0, 0, 'b'}, {"ignore-tab-expansion", 0, 0, 'E'}, @@ -281,6 +284,8 @@ c_stack_action (0); function_regexp_list.buf = &function_regexp; ignore_regexp_list.buf = &ignore_regexp; + ignore_string_list.count = 0; + ignore_string_list.alloc = 0; re_set_syntax (RE_SYNTAX_GREP | RE_NO_POSIX_BACKTRACKING); excluded = new_exclude (); @@ -514,6 +519,10 @@ } break; + case 'z': + add_ignore_string (&ignore_string_list, optarg); + break; + case BINARY_OPTION: #if O_BINARY binary = true; @@ -659,7 +668,6 @@ tabsize = 8; if (! width) width = 130; - { /* Maximize first the half line width, and then the gutter width, according to the following constraints: @@ -683,6 +691,7 @@ if (horizon_lines < context) horizon_lines = context; + compile_ignore_string (&ignore_string_list); summarize_regexp_list (&function_regexp_list); summarize_regexp_list (&ignore_regexp_list); @@ -714,7 +723,8 @@ files_can_be_treated_as_binary = (brief & binary & ~ (ignore_blank_lines | ignore_case | strip_trailing_cr - | (ignore_regexp_list.regexps || ignore_white_space))); + | (ignore_regexp_list.regexps || ignore_white_space || + ignore_string_list.count))); switch_string = option_list (argv + 1, optind - 1); @@ -761,6 +771,58 @@ return exit_status; } +/* Allocate space and add to IGNLIST the regexps for + ignoring strings specified by PATTERN. */ + +static void +add_ignore_string (struct ignore_strings *ignlist, char *pattern) +{ + /* Allocate space for data structures. */ + if (ignlist->count <= ignlist->alloc) + ignlist->regexp = (struct ignore_string *) x2nrealloc (ignlist->regexp, + &ignlist->alloc, sizeof (struct ignore_string)); + + size_t i = ignlist->count; + ignlist->regexp[i].regs = (struct re_registers *) + xmalloc (sizeof (struct re_registers)); + ignlist->regexp[i].buf = (struct re_pattern_buffer *) + xcalloc (1, sizeof (struct re_pattern_buffer)); + ignlist->regexp[i].buf->fastmap = xmalloc (1 << CHAR_BIT); + + /* Save the pattern, rather than compile it here, + since IGNORE_CASE may change. */ + ignlist->regexp[i].pattern = pattern; + ignlist->count++; +} + +/* Compile the regexps in IGNLIST. */ + +static void +compile_ignore_string (struct ignore_strings *ignlist) +{ + if (!ignlist->count) return; + + reg_syntax_t toggle_re_syntax = re_set_syntax (re_syntax_options | + RE_SYNTAX_EGREP | (ignore_case ? RE_ICASE : 0)); + + size_t i; + for (i = 0; i < ignlist->count; i++) + { + size_t patlen = strlen (ignlist->regexp[i].pattern); + char const *m = re_compile_pattern (ignlist->regexp[i].pattern, + patlen, ignlist->regexp[i].buf); + if (m != 0) + error (0, 0, "%s: %s", ignlist->regexp[i].pattern, m); + + /* Any regexp that can match a NULL string (e.g. -z a|) + must not update the hash value. */ + ignlist->regexp[i].update_hash = + re_search (ignlist->regexp[i].buf, "", 0, 0, 0, 0) == 0 ? false : true; + } + + toggle_re_syntax=re_set_syntax (toggle_re_syntax); +} + /* Append to REGLIST the regexp PATTERN. */ static void @@ -849,6 +911,8 @@ N_("-E --ignore-tab-expansion Ignore changes due to tab expansion."), N_("-b --ignore-space-change Ignore changes in the amount of white space."), N_("-w --ignore-all-space Ignore all white space."), + N_("-z ERE --ignore-equivalent-strings=ERE"), + N_(" Ignore strings matching ERE in both files."), N_("-B --ignore-blank-lines Ignore changes whose lines are all blank."), N_("-I RE --ignore-matching-lines=RE Ignore changes whose lines all match RE."), N_("--strip-trailing-cr Strip trailing carriage return on input."), @@ -1310,7 +1374,7 @@ if (status == EXIT_SUCCESS) { - if (report_identical_files && !DIR_P (0)) + if (report_identical_files && !DIR_P (0) && !DIR_P (1)) message ("Files %s and %s are identical\n", file_label[0] ? file_label[0] : cmp.file[0].name, file_label[1] ? file_label[1] : cmp.file[1].name); diff -u -d -x '*.[oea]*' -x '[.M]*' diffutils-3.0/src/diff.h diffutils-3.0.ZZZ/src/diff.h --- diffutils-3.0/src/diff.h 2010-04-15 20:53:08.000000000 +0100 +++ diffutils-3.0.ZZZ/src/diff.h 2010-09-27 11:14:46.906250000 +0100 @@ -135,6 +135,26 @@ /* Ignore changes that affect only lines matching this regexp (-I). */ XTERN struct re_pattern_buffer ignore_regexp; +/* Ignore strings that match a regexp in both files (-z). */ +struct ignore_string +{ + struct re_pattern_buffer *buf; + struct re_registers *regs; + char *pattern; + int begin1; + int begin2; + int size1; + int size2; + bool update_hash; +}; +struct ignore_strings +{ + struct ignore_string *regexp; + size_t alloc; + size_t count; +}; +XTERN struct ignore_strings ignore_string_list; + /* Say only whether files differ, not how (-q). */ XTERN bool brief; @@ -350,7 +370,7 @@ extern char const pr_program[]; char *concat (char const *, char const *, char const *); char *dir_file_pathname (char const *, char const *); -bool lines_differ (char const *, char const *); +bool lines_differ (char const *, size_t, char const *, size_t); lin translate_line_number (struct file_data const *, lin); struct change *find_change (struct change *); struct change *find_reverse_change (struct change *); diff -u -d -x '*.[oea]*' -x '[.M]*' diffutils-3.0/src/io.c diffutils-3.0.ZZZ/src/io.c --- diffutils-3.0/src/io.c 2010-04-17 07:15:46.000000000 +0100 +++ diffutils-3.0.ZZZ/src/io.c 2010-09-27 11:14:46.906250000 +0100 @@ -216,7 +216,7 @@ char const *suffix_begin = current->suffix_begin; char const *bufend = FILE_BUFFER (current) + current->buffered; bool diff_length_compare_anyway = - ignore_white_space != IGNORE_NO_WHITE_SPACE; + ignore_white_space != IGNORE_NO_WHITE_SPACE || ignore_string_list.count; bool same_length_diff_contents_compare_anyway = diff_length_compare_anyway | ignore_case; @@ -227,6 +227,7 @@ h = 0; /* Hash this line until we find a newline. */ + if (!ignore_string_list.count) if (ignore_case) switch (ignore_white_space) { @@ -263,20 +264,10 @@ switch (c) { - case '\b': - column -= 0 < column; - break; - case '\t': c = ' '; repetitions = tabsize - column % tabsize; - column = (column + repetitions < column - ? 0 - : column + repetitions); - break; - - case '\r': - column = 0; + column += repetitions; break; default: @@ -333,20 +324,10 @@ switch (c) { - case '\b': - column -= 0 < column; - break; - case '\t': c = ' '; repetitions = tabsize - column % tabsize; - column = (column + repetitions < column - ? 0 - : column + repetitions); - break; - - case '\r': - column = 0; + column += repetitions; break; default: @@ -366,6 +347,116 @@ h = HASH (h, c); break; } + else + { + size_t len = (size_t) strchr (p,'\n') - (size_t) p; + size_t column = 0; + size_t i; + for (i = 0 ; i < ignore_string_list.count; i++) + ignore_string_list.regexp[i].begin1 = -1; + + while (1) + { + start_loop : + for (i = 0; i < ignore_string_list.count; i++) + { + struct ignore_string *ign = &ignore_string_list.regexp[i]; + if (ign->begin1 == -2) continue; + if (ip + ign->begin1 < p) + { + if ((ign->begin1 = re_search (ign->buf, ip, len, p - ip, len, ign->regs)) < 0) + ign->begin1 = -2; + else + ign->size1 = ign->regs->end[0] - ign->regs->start[0]; + } + if (ip + ign->begin1 == p) + { + ign->begin1 = -1; + if (ign->size1 == 0) + continue; /* Proceed to the next regexp. */ + /* Update the hash value if the regexps + can only match non-NULL strings. */ + if (ignore_string_list.regexp[i].update_hash) + { + char* pp; + for (pp = ign->pattern; *pp; pp++) + h = HASH (h, *pp); + } + p += ign->size1; + column += ign->size1; + goto start_loop; + } + } + + c = *p++; + char const *pp = p; + + switch (ignore_white_space) + { + case IGNORE_ALL_SPACE: + /* For -w, just skip past any white space. */ + while (isspace (c) && c != '\n') + c = *p++; + break; + + case IGNORE_SPACE_CHANGE: + /* For -b, advance past any sequence of white space + and consider it just one space, or nothing at + all if it is at the end of the line. */ + if (isspace (c)) + { + while (c != '\n') + { + c = *p++; + if (! isspace (c)) + { + h = HASH (h, ' '); + break; + } + } + } + break; + + case IGNORE_TAB_EXPANSION: + { + if (c == ' ' || c == '\t') + { + for (;; c = *p++) + { + if (c == ' ') + column++; + else if (c == '\t') + column += tabsize - column % tabsize; + else + break; + } + /* With the -z option, we may need different 'lengths' of + white space to realign to the same column. Therefore we cannot + hash the whole length - so just use one space. */ + h = HASH (h, ' '); + } + } + break; + + case IGNORE_NO_WHITE_SPACE: + break; + } + + if (p != pp) + { + --p; + continue; + } + + if (c != '\n') + { + column++; + h = HASH (h, ignore_case ? tolower (c) : c); + } + else + break; + } + } hashing_done:; @@ -423,7 +514,7 @@ else if (!diff_length_compare_anyway) continue; - if (! lines_differ (eqline, ip)) + if (! lines_differ (eqline, eqs[i].length, ip, length)) break; } Only in diffutils-3.0.ZZZ/src: paths.h diff -u -d -x '*.[oea]*' -x '[.M]*' diffutils-3.0/src/util.c diffutils-3.0.ZZZ/src/util.c --- diffutils-3.0/src/util.c 2010-04-15 20:53:08.000000000 +0100 +++ diffutils-3.0.ZZZ/src/util.c 2010-09-27 11:14:46.906250000 +0100 @@ -317,16 +317,61 @@ Return nonzero if the lines differ. */ bool -lines_differ (char const *s1, char const *s2) +lines_differ (char const *s1, size_t len1, char const *s2, size_t len2) { register char const *t1 = s1; register char const *t2 = s2; - size_t column = 0; + size_t i; + for (i = 0; i < ignore_string_list.count; i++) { + ignore_string_list.regexp[i].begin1 = -1; + ignore_string_list.regexp[i].begin2 = -1; + } + size_t column1 = 0; + size_t column2 = 0; while (1) { + start_loop : + for (i = 0; i < ignore_string_list.count; i++) + { + struct ignore_string *ign = &ignore_string_list.regexp[i]; + if (ign->begin1 == -2 || ign->begin2 == -2) continue; + + if (s1 + ign->begin1 < t1) + { + if ((ign->begin1 = re_search (ign->buf, s1, len1, t1 - s1, len1, ign->regs)) < 0) + ign->begin1 = -2; + else + ign->size1 = ign->regs->end[0] - ign->regs->start[0]; + } + if (s1 + ign->begin1 == t1) + ign->begin1 = -1; + + if (s2 + ign->begin2 < t2) + { + if ((ign->begin2 = re_search (ign->buf, s2, len2, t2 - s2, len2, ign->regs)) <0 ) + ign->begin2 = -2; + else + ign->size2 = ign->regs->end[0] - ign->regs->start[0]; + } + if (s2 + ign->begin2 == t2) + ign->begin2 = -1; + + if (ign->begin1 == -1 && ign->begin2 == -1) + { + if (ign->size1 + ign->size2 == 0) + continue; /* Proceed to the next regexp, if matched strings are both empty. */ + t1 += ign->size1; + t2 += ign->size2; + column1 += ign->size1; + column2 += ign->size2; + goto start_loop; + } + } register unsigned char c1 = *t1++; register unsigned char c2 = *t2++; + char const *tt1 = t1; + char const *tt2 = t2; /* Test for exact char equality first, since it's a common case. */ if (c1 != c2) @@ -399,13 +444,12 @@ if ((c1 == ' ' && c2 == '\t') || (c1 == '\t' && c2 == ' ')) { - size_t column2 = column; for (;; c1 = *t1++) { if (c1 == ' ') - column++; + column1++; else if (c1 == '\t') - column += tabsize - column % tabsize; + column1 += tabsize - column1 % tabsize; else break; } @@ -418,7 +462,7 @@ else break; } - if (column != column2) + if (column1 != column2) return true; } break; @@ -426,6 +470,10 @@ case IGNORE_NO_WHITE_SPACE: break; } + if (ignore_string_list.count&&(t1!=tt1||t2!=tt2)) { + --t1;--t2; + continue; + } /* Lowercase all letters if -i is specified. */ @@ -441,7 +489,8 @@ if (c1 == '\n') return false; - column += c1 == '\t' ? tabsize - column % tabsize : 1; + column1 += c1 == '\t' ? tabsize - column1 % tabsize : 1; + column2 += c2 == '\t' ? tabsize - column2 % tabsize : 1; } return true;