[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[bug-gettext] [PATCH] its: Add new preserveSpaceRule "paragraph"
From: |
Daiki Ueno |
Subject: |
[bug-gettext] [PATCH] its: Add new preserveSpaceRule "paragraph" |
Date: |
Mon, 11 Feb 2019 12:26:26 +0100 |
This implements a new text extraction rule preserving paragraph
boundaries, as mentioned in:
https://gitlab.gnome.org/GNOME/glib/issues/1350
* gettext-tools/doc/gettext.texi (Preparing ITS Rules): Mention "paragraph".
* gettext-tools/src/its.c (its_rule_list_extract_text): Accept "paragraph".
(its_merge_context_merge_node): Likewise.
(normalize_whitespace): Handle "paragraph" rule.
* gettext-tools/src/its.h (ITS_WHITESPACE_NORMALIZE_PARAGRAPH): New enum value.
* gettext-tools/tests/xgettext-its-1: Add test for "paragraph" rule.
---
gettext-tools/doc/gettext.texi | 12 +++---
gettext-tools/src/its.c | 64 +++++++++++++++++++++++++++++-
gettext-tools/src/its.h | 1 +
gettext-tools/tests/xgettext-its-1 | 50 +++++++++++++++++++++++
4 files changed, 121 insertions(+), 6 deletions(-)
diff --git a/gettext-tools/doc/gettext.texi b/gettext-tools/doc/gettext.texi
index 892854c75..306605842 100644
--- a/gettext-tools/doc/gettext.texi
+++ b/gettext-tools/doc/gettext.texi
@@ -12354,10 +12354,12 @@ A required @code{escape} attribute with the value
@code{yes} or @code{no}.
@item Extended Preserve Space
This data category extends the standard @samp{Preserve Space} data
-category with the additional value @samp{trim}. The value means to
-remove the leading and trailing whitespaces of the content, but not to
-normalize whitespaces in the middle. In the global rule, the
address@hidden element contains the following:
+category with the additional values @samp{trim} and @samp{paragraph}.
address@hidden means to remove the leading and trailing whitespaces of the
+content, but not to normalize whitespaces in the middle.
address@hidden means to normalize the content but keep the paragraph
+boundaries. In the global
+rule, the @code{preserveSpaceRule} element contains the following:
@itemize
@item
@@ -12366,7 +12368,7 @@ that selects the nodes to which this rule applies.
@item
A required @code{space} attribute with the value @code{default},
address@hidden, or @code{trim}.
address@hidden, @code{trim}, or @code{paragraph}.
@end itemize
@end table
diff --git a/gettext-tools/src/its.c b/gettext-tools/src/its.c
index 9b4b397e5..445c0d9b5 100644
--- a/gettext-tools/src/its.c
+++ b/gettext-tools/src/its.c
@@ -399,6 +399,60 @@ normalize_whitespace (const char *text, enum
its_whitespace_type_ty whitespace)
case ITS_WHITESPACE_TRIM:
return trim (text);
+ case ITS_WHITESPACE_NORMALIZE_PARAGRAPH:
+ /* Normalize whitespaces within the text, keeping paragraph
+ boundaries. */
+ {
+ char *result, *p, *end;
+
+ result = xstrdup (text);
+ end = result + strlen (result);
+ for (p = result; *p != '\0';)
+ {
+ char *pp, *pend = NULL;
+ int c;
+
+ /* Find a paragraph boundary. */
+ for (pp = p; *pp != '\0';)
+ {
+ char *nl = strchrnul (pp, '\n');
+ if (*nl == '\0')
+ {
+ pend = nl;
+ break;
+ }
+ else if (*(nl + 1) == '\n')
+ {
+ pend = nl;
+ break;
+ }
+ pp = nl + 1;
+ }
+
+ /* Normalize whitespaces in the paragraph. */
+ assert (pend != NULL);
+ c = *pend;
+ *pend = '\0';
+ for (pp = p; *pp != '\0';)
+ {
+ size_t len = strspn (pp, " \t\n");
+ if (len > 0)
+ {
+ *pp = ' ';
+ memmove (pp + 1, pp + len, end - (pp + len));
+ end -= len - 1;
+ *end = '\0';
+ pend -= len - 1;
+ *pend = '\0';
+ pp++;
+ }
+ pp += strcspn (pp, " \t\n");
+ }
+ *pend = c;
+ p = pp + strspn (pend, "\n");
+ }
+ return result;
+ }
default:
/* Normalize whitespaces within the text, but not at the beginning
nor the end of the text. */
@@ -996,7 +1050,11 @@ its_preserve_space_rule_constructor (struct its_rule_ty
*pop,
|| strcmp (prop, "default") == 0
/* gettext extension: remove leading/trailing whitespaces only. */
|| (node->ns && xmlStrEqual (node->ns->href, BAD_CAST GT_NS)
- && strcmp (prop, "trim") == 0)))
+ && strcmp (prop, "trim") == 0)
+ /* gettext extension: same as default except keeping
+ paragraph boundaries. */
+ || (node->ns && xmlStrEqual (node->ns->href, BAD_CAST GT_NS)
+ && strcmp (prop, "paragraph") == 0)))
{
error (0, 0, _("invalid attribute value \"%s\" for \"%s\""),
prop, "space");
@@ -1715,6 +1773,8 @@ its_rule_list_extract_text (its_rule_list_ty *rules,
whitespace = ITS_WHITESPACE_PRESERVE;
else if (value && strcmp (value, "trim") == 0)
whitespace = ITS_WHITESPACE_TRIM;
+ else if (value && strcmp (value, "paragraph") == 0)
+ whitespace = ITS_WHITESPACE_NORMALIZE_PARAGRAPH;
else
whitespace = ITS_WHITESPACE_NORMALIZE;
@@ -1842,6 +1902,8 @@ its_merge_context_merge_node (struct its_merge_context_ty
*context,
whitespace = ITS_WHITESPACE_PRESERVE;
else if (value && strcmp (value, "trim") == 0)
whitespace = ITS_WHITESPACE_TRIM;
+ else if (value && strcmp (value, "paragraph") == 0)
+ whitespace = ITS_WHITESPACE_NORMALIZE_PARAGRAPH;
else
whitespace = ITS_WHITESPACE_NORMALIZE;
diff --git a/gettext-tools/src/its.h b/gettext-tools/src/its.h
index 72c30c992..49af5cec5 100644
--- a/gettext-tools/src/its.h
+++ b/gettext-tools/src/its.h
@@ -33,6 +33,7 @@ enum its_whitespace_type_ty
{
ITS_WHITESPACE_PRESERVE,
ITS_WHITESPACE_NORMALIZE,
+ ITS_WHITESPACE_NORMALIZE_PARAGRAPH,
ITS_WHITESPACE_TRIM
};
diff --git a/gettext-tools/tests/xgettext-its-1
b/gettext-tools/tests/xgettext-its-1
index 125f3e682..5fe70b366 100755
--- a/gettext-tools/tests/xgettext-its-1
+++ b/gettext-tools/tests/xgettext-its-1
@@ -171,6 +171,29 @@ cat <<\EOF >messages.xml
<message unescaped="This is an unescaped attribute <>&"">
<p></p>
</message>
+ <message>
+ <p xml:space="paragraph">
+ This is the first paragraph with
+a newline.
+
+ This is the second paragprah with spaces.
+
+ This is the last paragraph.</p>
+ </message>
+ <message>
+ <p xml:space="paragraph">This is the only one paragraph</p>
+ </message>
+ <message>
+ <p xml:space="paragraph">This is the only one paragraph with a boundary
+
+</p>
+ </message>
+ <message>
+ <p xml:space="paragraph"></p>
+ </message>
+ <message>
+ <p xml:space="paragraph"> </p>
+ </message>
</messages>
EOF
@@ -247,6 +270,33 @@ msgstr ""
#: messages.xml:61
msgid "This is an unescaped attribute <>&\""
msgstr ""
+
+#. (itstool) path: message/p
+#: messages.xml:65
+msgid ""
+" This is the first paragraph with a newline.\n"
+"\n"
+" This is the second paragprah with spaces.\n"
+"\n"
+" This is the last paragraph."
+msgstr ""
+
+#. (itstool) path: message/p
+#: messages.xml:74
+msgid "This is the only one paragraph"
+msgstr ""
+
+#. (itstool) path: message/p
+#: messages.xml:77
+msgid ""
+"This is the only one paragraph with a boundary\n"
+"\n"
+msgstr ""
+
+#. (itstool) path: message/p
+#: messages.xml:85
+msgid " "
+msgstr ""
EOF
: ${DIFF=diff}
--
2.20.1
- [bug-gettext] [PATCH] its: Add new preserveSpaceRule "paragraph",
Daiki Ueno <=