[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [bug-gettext] [PATCH] its: Add new preserveSpaceRule "paragraph"
From: |
Bruno Haible |
Subject: |
Re: [bug-gettext] [PATCH] its: Add new preserveSpaceRule "paragraph" |
Date: |
Tue, 26 Mar 2019 23:17:45 +0100 |
User-agent: |
KMail/5.1.3 (Linux/4.4.0-141-generic; KDE/5.18.0; x86_64; ; ) |
Hi Daiki,
> I have rewritten the
> loop with the parallel pointers instead of memmove as attached. As a
> bonus, the behavior becomes closer to intltool: now it can normalize
> paragraph boundaries with "\n\n".
Sorry, I did not review the rewrite in time. (With variables named 'p'
and 'pp' and state variables like 'last_ws', the code was frightening.)
But the test xgettext-its-1 fails. The reason is that in these two lines
pp += strspn (pp, " \t\n");
if (*pp == '\n')
the condition (*pp == '\n') is always false: after you go past all
whitespace characters, the next character is never a newline.
I fixed this by replacing this condition with a memchr() invocation.
While at it, I also removed the state variable 'last_ws' and the
initial trim() call, and renamed the variables to identifiers that
are more descriptive.
Bruno
index 7f6a1c0..f67c05e 100755
diff --git a/autogen.sh b/autogen.sh
*** a/autogen.sh
--- b/autogen.sh
***************
*** 226,231 ****
--- 226,232 ----
locale
localename
lock
+ memchr
memmove
memset
minmax
index c542db4..8b72e1e 100644
diff --git a/gettext-tools/src/its.c b/gettext-tools/src/its.c
*** a/gettext-tools/src/its.c
--- b/gettext-tools/src/its.c
***************
*** 403,463 ****
/* Normalize whitespaces within the text, keeping paragraph
boundaries. */
{
! char *result, *p, *out;
!
! result = trim (text);
! for (p = out = result; *p != '\0';)
{
! char *pp, *pend = NULL, *next = NULL;
! bool last_ws = false;
! /* Find a paragraph boundary. */
! for (pp = p; *pp != '\0';)
! {
! char *nl = strchrnul (pp, '\n');
! if (*nl == '\0')
! {
! pend = nl;
! next = pend;
! break;
! }
! pp = nl + 1;
! pp += strspn (pp, " \t\n");
! if (*pp == '\n')
! {
! pend = nl;
! next = pp + 1;
! break;
! }
! }
! /* Normalize whitespaces in the paragraph. */
! assert (pend != NULL);
! for (pp = p; pp < pend; pp++)
! if (!(*pp == ' ' || *pp == '\t' || *pp == '\n'))
! break;
! for (; pp < pend; pp++)
! {
! if (*pp == ' ' || *pp == '\t' || *pp == '\n')
{
! if (!last_ws)
{
! *out++ = ' ';
! last_ws = true;
}
}
! else
! {
! *out++ = *pp;
! last_ws = false;
! }
! }
! if (*pend != '\0')
{
memcpy (out, "\n\n", 2);
out += 2;
}
! p = next;
}
*out = '\0';
return result;
--- 403,480 ----
/* Normalize whitespaces within the text, keeping paragraph
boundaries. */
{
! char *result = xstrdup (text);
! /* Go through the string, shrinking it, reading from *p++
! and writing to *out++. (result <= out <= p.) */
! const char *start_of_paragraph;
! char *out;
!
! out = result;
! for (start_of_paragraph = result; *start_of_paragraph != '\0';)
{
! const char *end_of_paragraph;
! const char *next_paragraph;
! /* Find the next paragraph boundary. */
! {
! const char *p;
! for (p = start_of_paragraph;;)
! {
! const char *nl = strchrnul (p, '\n');
! if (*nl == '\0')
! {
! end_of_paragraph = nl;
! next_paragraph = end_of_paragraph;
! break;
! }
! p = nl + 1;
{
! const char *past_whitespace = p + strspn (p, " \t\n");
! if (memchr (p, '\n', past_whitespace - p) != NULL)
{
! end_of_paragraph = nl;
! next_paragraph = past_whitespace;
! break;
}
+ p = past_whitespace;
}
! }
! }
!
! /* Normalize whitespaces in the paragraph. */
! {
! const char *p;
!
! /* Remove whitespace at the beginning of the paragraph. */
! for (p = start_of_paragraph; p < end_of_paragraph; p++)
! if (!(*p == ' ' || *p == '\t' || *p == '\n'))
! break;
!
! for (; p < end_of_paragraph;)
! {
! if (*p == ' ' || *p == '\t' || *p == '\n')
! {
! /* Normalize whitespace inside the paragraph, and
! remove whitespace at the end of the paragraph. */
! do
! p++;
! while (p < end_of_paragraph
! && (*p == ' ' || *p == '\t' || *p == '\n'));
! if (p < end_of_paragraph)
! *out++ = ' ';
! }
! else
! *out++ = *p++;
! }
! }
!
! if (*next_paragraph != '\0')
{
memcpy (out, "\n\n", 2);
out += 2;
}
! start_of_paragraph = next_paragraph;
}
*out = '\0';
return result;
index 975a547..22e9163 100755
diff --git a/gettext-tools/tests/xgettext-its-1
b/gettext-tools/tests/xgettext-its-1
*** a/gettext-tools/tests/xgettext-its-1
--- b/gettext-tools/tests/xgettext-its-1
***************
*** 176,185 ****
This is the first paragraph with
a newline.
! This is the second paragprah with spaces.
! This is the last paragraph.</p>
</message>
<message>
<p xml:space="paragraph">This is the only one paragraph</p>
--- 176,185 ----
This is the first paragraph with
a newline.
! This is the second paragraph with spaces.
! This is the last paragraph. </p>
</message>
<message>
<p xml:space="paragraph">This is the only one paragraph</p>
***************
*** 277,283 ****
msgid ""
"This is the first paragraph with a newline.\n"
"\n"
! "This is the second paragprah with spaces.\n"
"\n"
"This is the last paragraph."
msgstr ""
--- 277,283 ----
msgid ""
"This is the first paragraph with a newline.\n"
"\n"
! "This is the second paragraph with spaces.\n"
"\n"
"This is the last paragraph."
msgstr ""
- Re: [bug-gettext] [PATCH] its: Add new preserveSpaceRule "paragraph",
Bruno Haible <=