[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[SCM] gawk branch, gawk-5.2-stable, updated. gawk-4.1.0-5076-gc85749da
From: |
Arnold Robbins |
Subject: |
[SCM] gawk branch, gawk-5.2-stable, updated. gawk-4.1.0-5076-gc85749da |
Date: |
Fri, 1 Sep 2023 19:28:55 -0400 (EDT) |
This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "gawk".
The branch, gawk-5.2-stable has been updated
via c85749daba596ba2b827bcea239db74fc5321665 (commit)
from 2874b94d73ff766b0f41ff7f259e7c8a0dab458c (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
- Log -----------------------------------------------------------------
http://git.sv.gnu.org/cgit/gawk.git/commit/?id=c85749daba596ba2b827bcea239db74fc5321665
commit c85749daba596ba2b827bcea239db74fc5321665
Author: Arnold D. Robbins <arnold@skeeve.com>
Date: Fri Sep 1 16:28:03 2023 -0700
Fix for match with multibyte chars and new tests.
diff --git a/ChangeLog b/ChangeLog
index 15a32135..bbdc869b 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,16 @@
+2023-09-01 Miguel Pineiro Jr <mpj@pineiro.cc>
+
+ Fix the handling of zero-length matches in multibyte locales.
+ Thanks to Ed Morton <mortoneccc@comcast.net> for the report.
+
+ * builtin.c (do_match): Translate rstart (byte idx to char idx)
+ even when rlength is zero. For this we tweak the conversion of
+ rlength to keep it in bounds when rstart and rlength are both 0.
+ * node.c (str2wstr): Add an entry to the indices array for the
+ terminating null. It facilitates the tweak above and is needed
+ to translate the idx of a zero-width match at the end of the
+ string.
+
2023-07-09 Arnold D. Robbins <arnold@skeeve.com>
* re.c (make_regexp): In error message, use the original text
diff --git a/builtin.c b/builtin.c
index e394cc34..2bc0aaa3 100644
--- a/builtin.c
+++ b/builtin.c
@@ -2791,9 +2791,9 @@ do_match(int nargs)
size_t *wc_indices = NULL;
rlength = REEND(rp, t1->stptr) - RESTART(rp, t1->stptr);
/* byte length */
- if (rlength > 0 && gawk_mb_cur_max > 1) {
+ if (gawk_mb_cur_max > 1) {
t1 = str2wstr(t1, & wc_indices);
- rlength = wc_indices[rstart + rlength - 1] -
wc_indices[rstart] + 1;
+ rlength = wc_indices[rstart + rlength] -
wc_indices[rstart];
rstart = wc_indices[rstart];
}
@@ -2816,9 +2816,9 @@ do_match(int nargs)
start = t1->stptr + s;
subpat_start = s;
subpat_len = len = SUBPATEND(rp,
t1->stptr, ii) - s;
- if (len > 0 && gawk_mb_cur_max > 1) {
+ if (gawk_mb_cur_max > 1) {
subpat_start = wc_indices[s];
- subpat_len = wc_indices[s + len
- 1] - subpat_start + 1;
+ subpat_len = wc_indices[s +
len] - subpat_start;
}
it = make_string(start, len);
diff --git a/node.c b/node.c
index 6c9a7306..fa120b10 100644
--- a/node.c
+++ b/node.c
@@ -757,7 +757,7 @@ str2wstr(NODE *n, size_t **ptr)
* Create the array.
*/
if (ptr != NULL) {
- ezalloc(*ptr, size_t *, sizeof(size_t) * n->stlen, "str2wstr");
+ ezalloc(*ptr, size_t *, sizeof(size_t) * (n->stlen + 1),
"str2wstr");
}
sp = n->stptr;
@@ -829,6 +829,11 @@ str2wstr(NODE *n, size_t **ptr)
}
}
+ /* Needed for zero-length matches at the end of a string */
+ assert(sp - n->stptr == n->stlen);
+ if (ptr != NULL)
+ (*ptr)[sp - n->stptr] = i;
+
*wsp = L'\0';
n->wstlen = wsp - n->wstptr;
n->flags |= WSTRCUR;
diff --git a/pc/ChangeLog b/pc/ChangeLog
index 6f5620f3..2eea265d 100644
--- a/pc/ChangeLog
+++ b/pc/ChangeLog
@@ -1,3 +1,7 @@
+2023-09-01 Arnold D. Robbins <arnold@skeeve.com>
+
+ * Makefile.tst: Regenerated.
+
2023-07-09 Arnold D. Robbins <arnold@skeeve.com>
* Makefile.tst: Regenerated.
diff --git a/pc/Makefile.tst b/pc/Makefile.tst
index c21c5fe1..d1d7c856 100644
--- a/pc/Makefile.tst
+++ b/pc/Makefile.tst
@@ -234,7 +234,7 @@ LOCALE_CHARSET_TESTS = \
asort asorti backbigs1 backsmalls1 backsmalls2 \
fmttest fnarydel fnparydl jarebug lc_num1 mbfw1 \
mbprintf1 mbprintf2 mbprintf3 mbprintf4 mbprintf5 \
- mtchi18n nlstringtest rebt8b2 rtlenmb sort1 sprintfc
+ mtchi18n mtchi18n2 nlstringtest rebt8b2 rtlenmb sort1 sprintfc
SHLIB_TESTS = \
apiterm \
@@ -311,7 +311,8 @@ NEED_LOCALE_C = \
NEED_LOCALE_EN = \
backbigs1 backsmalls1 backsmalls2 commas concat4 dfamb1 ignrcas2
lc_num1 \
- mbfw1 mbprintf1 mbprintf3 mbprintf4 mbstr1 mbstr2 posix_compare \
+ mbfw1 mbprintf1 mbprintf3 mbprintf4 mbstr1 mbstr2 \
+ mtchi18n2 posix_compare \
printhuge reint2 rri1 subamp subi18n wideidx wideidx2 \
widesub widesub2 widesub3 widesub4
@@ -3700,6 +3701,12 @@ mtchi18n:
AWKPATH="$(srcdir)" $(AWK) -f $@.awk < "$(srcdir)"/$@.in >_$@ 2>&1 ||
echo EXIT CODE: $$? >>_$@
@-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@
+mtchi18n2:
+ @echo $@
+ @-[ -z "$$GAWKLOCALE" ] && GAWKLOCALE=ENU_USA.1252; export GAWKLOCALE; \
+ AWKPATH="$(srcdir)" $(AWK) -f $@.awk >_$@ 2>&1 || echo EXIT CODE: $$?
>>_$@
+ @-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@
+
rebt8b2:
@echo $@ $(ZOS_FAIL)
@-AWKPATH="$(srcdir)" $(AWK) -f $@.awk >_$@ 2>&1 || echo EXIT CODE:
$$? >>_$@
diff --git a/test/ChangeLog b/test/ChangeLog
index 9608f5de..7ed0832b 100644
--- a/test/ChangeLog
+++ b/test/ChangeLog
@@ -1,3 +1,9 @@
+2023-09-01 Arnold D. Robbins <arnold@skeeve.com>
+
+ * Makefile.am (EXTRA_DIST): New test, mtchi18n2.
+ * mtchi18n2.sh, mtchi18n2.ok: New files.
+ Thanks to Miguel Pineiro Jr <mpj@pineiro.cc> for the tests.
+
2023-07-09 Arnold D. Robbins <arnold@skeeve.com>
* Makefile.am (EXTRA_DIST): New test, regexpbad.
diff --git a/test/Makefile.am b/test/Makefile.am
index 36b302eb..fe37d58f 100644
--- a/test/Makefile.am
+++ b/test/Makefile.am
@@ -820,6 +820,8 @@ EXTRA_DIST = \
mtchi18n.awk \
mtchi18n.in \
mtchi18n.ok \
+ mtchi18n2.awk \
+ mtchi18n2.ok \
nasty.awk \
nasty.ok \
nasty2.awk \
@@ -1560,7 +1562,7 @@ LOCALE_CHARSET_TESTS = \
asort asorti backbigs1 backsmalls1 backsmalls2 \
fmttest fnarydel fnparydl jarebug lc_num1 mbfw1 \
mbprintf1 mbprintf2 mbprintf3 mbprintf4 mbprintf5 \
- mtchi18n nlstringtest rebt8b2 rtlenmb sort1 sprintfc
+ mtchi18n mtchi18n2 nlstringtest rebt8b2 rtlenmb sort1 sprintfc
SHLIB_TESTS = \
apiterm \
@@ -1632,7 +1634,8 @@ NEED_LOCALE_C = \
NEED_LOCALE_EN = \
backbigs1 backsmalls1 backsmalls2 commas concat4 dfamb1 ignrcas2
lc_num1 \
- mbfw1 mbprintf1 mbprintf3 mbprintf4 mbstr1 mbstr2 posix_compare \
+ mbfw1 mbprintf1 mbprintf3 mbprintf4 mbstr1 mbstr2 \
+ mtchi18n2 posix_compare \
printhuge reint2 rri1 subamp subi18n wideidx wideidx2 \
widesub widesub2 widesub3 widesub4
diff --git a/test/Makefile.in b/test/Makefile.in
index a5981131..7fe4e7a1 100644
--- a/test/Makefile.in
+++ b/test/Makefile.in
@@ -1088,6 +1088,8 @@ EXTRA_DIST = \
mtchi18n.awk \
mtchi18n.in \
mtchi18n.ok \
+ mtchi18n2.awk \
+ mtchi18n2.ok \
nasty.awk \
nasty.ok \
nasty2.awk \
@@ -1824,7 +1826,7 @@ LOCALE_CHARSET_TESTS = \
asort asorti backbigs1 backsmalls1 backsmalls2 \
fmttest fnarydel fnparydl jarebug lc_num1 mbfw1 \
mbprintf1 mbprintf2 mbprintf3 mbprintf4 mbprintf5 \
- mtchi18n nlstringtest rebt8b2 rtlenmb sort1 sprintfc
+ mtchi18n mtchi18n2 nlstringtest rebt8b2 rtlenmb sort1 sprintfc
SHLIB_TESTS = \
apiterm \
@@ -1901,7 +1903,8 @@ NEED_LOCALE_C = \
NEED_LOCALE_EN = \
backbigs1 backsmalls1 backsmalls2 commas concat4 dfamb1 ignrcas2
lc_num1 \
- mbfw1 mbprintf1 mbprintf3 mbprintf4 mbstr1 mbstr2 posix_compare \
+ mbfw1 mbprintf1 mbprintf3 mbprintf4 mbstr1 mbstr2 \
+ mtchi18n2 posix_compare \
printhuge reint2 rri1 subamp subi18n wideidx wideidx2 \
widesub widesub2 widesub3 widesub4
@@ -5463,6 +5466,12 @@ mtchi18n:
AWKPATH="$(srcdir)" $(AWK) -f $@.awk < "$(srcdir)"/$@.in >_$@ 2>&1 ||
echo EXIT CODE: $$? >>_$@
@-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@
+mtchi18n2:
+ @echo $@
+ @-[ -z "$$GAWKLOCALE" ] && GAWKLOCALE=en_US.UTF-8; export GAWKLOCALE; \
+ AWKPATH="$(srcdir)" $(AWK) -f $@.awk >_$@ 2>&1 || echo EXIT CODE: $$?
>>_$@
+ @-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@
+
rebt8b2:
@echo $@ $(ZOS_FAIL)
@-AWKPATH="$(srcdir)" $(AWK) -f $@.awk >_$@ 2>&1 || echo EXIT CODE:
$$? >>_$@
diff --git a/test/Maketests b/test/Maketests
index f3d4c494..8284e165 100644
--- a/test/Maketests
+++ b/test/Maketests
@@ -2376,6 +2376,12 @@ mtchi18n:
AWKPATH="$(srcdir)" $(AWK) -f $@.awk < "$(srcdir)"/$@.in >_$@ 2>&1 ||
echo EXIT CODE: $$? >>_$@
@-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@
+mtchi18n2:
+ @echo $@
+ @-[ -z "$$GAWKLOCALE" ] && GAWKLOCALE=en_US.UTF-8; export GAWKLOCALE; \
+ AWKPATH="$(srcdir)" $(AWK) -f $@.awk >_$@ 2>&1 || echo EXIT CODE: $$?
>>_$@
+ @-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@
+
rebt8b2:
@echo $@ $(ZOS_FAIL)
@-AWKPATH="$(srcdir)" $(AWK) -f $@.awk >_$@ 2>&1 || echo EXIT CODE:
$$? >>_$@
diff --git a/test/mtchi18n2.awk b/test/mtchi18n2.awk
new file mode 100755
index 00000000..70433862
--- /dev/null
+++ b/test/mtchi18n2.awk
@@ -0,0 +1,14 @@
+BEGIN {
+ match("\342\200\257", /^/, m)
+ print RSTART, RLENGTH
+
+ #match("\342\200\257", /^(a?)\u202F(b?)$/, m)
+ match("\342\200\257", /^(a?)\342\200\257(b?)$/, m)
+ print RSTART, RLENGTH, m[1,"start"], m[1,"length"], m[2, "start"], m[2,
"length"]
+
+ match("\342\200\257", /$/, m)
+ print RSTART, RLENGTH
+
+ match("\342\200\257ac", /a(b?)c/, m)
+ print RSTART, RLENGTH, m[1,"start"], m[1,"length"]
+}
diff --git a/test/mtchi18n2.ok b/test/mtchi18n2.ok
new file mode 100755
index 00000000..10648642
--- /dev/null
+++ b/test/mtchi18n2.ok
@@ -0,0 +1,4 @@
+1 0
+1 1 1 0 2 0
+2 0
+2 2 3 0
-----------------------------------------------------------------------
Summary of changes:
ChangeLog | 13 +++++++++++++
builtin.c | 8 ++++----
node.c | 7 ++++++-
pc/ChangeLog | 4 ++++
pc/Makefile.tst | 11 +++++++++--
test/ChangeLog | 6 ++++++
test/Makefile.am | 7 +++++--
test/Makefile.in | 13 +++++++++++--
test/Maketests | 6 ++++++
test/mtchi18n2.awk | 14 ++++++++++++++
test/mtchi18n2.ok | 4 ++++
11 files changed, 82 insertions(+), 11 deletions(-)
create mode 100755 test/mtchi18n2.awk
create mode 100755 test/mtchi18n2.ok
hooks/post-receive
--
gawk
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- [SCM] gawk branch, gawk-5.2-stable, updated. gawk-4.1.0-5076-gc85749da,
Arnold Robbins <=