[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PATCH] python-brace-format: support conversion specifier and unnamed ar
From: |
Terence D. Honles |
Subject: |
[PATCH] python-brace-format: support conversion specifier and unnamed arguments |
Date: |
Wed, 19 Apr 2023 00:37:49 +0200 |
This change updates the `python-brace-format` to detect a format string
using unnamed arguments (supported since Python 2.7 & 3.1), and also
allows specifying a conversion specifier which was previously overlooked
in the parser.
x-ref: https://docs.python.org/3/library/string.html#format-string-syntax
---
gettext-tools/src/format-python-brace.c | 255 +++++++++++++++++-----
gettext-tools/src/format.h | 4 +
gettext-tools/src/xg-message.c | 4 +-
gettext-tools/tests/format-python-brace-1 | 34 ++-
gettext-tools/tests/format-python-brace-2 | 15 ++
5 files changed, 249 insertions(+), 63 deletions(-)
diff --git a/gettext-tools/src/format-python-brace.c
b/gettext-tools/src/format-python-brace.c
index cd5307721..4d0175522 100644
--- a/gettext-tools/src/format-python-brace.c
+++ b/gettext-tools/src/format-python-brace.c
@@ -39,7 +39,7 @@
https://docs.python.org/3/library/string.html#formatstrings
A format string directive here consists of
- an opening brace '{',
- - an identifier [_A-Za-z][_0-9A-Za-z]*|[0-9]+,
+ - an optional identifier [_A-Za-z][_0-9A-Za-z]*|[0-9]+,
- an optional sequence of
- getattr ('.' identifier) or
- getitem ('[' identifier ']')
@@ -61,6 +61,10 @@
- 'e', 'E', 'f', 'F', 'g', 'G', 'n', '%' for floating-point
values,
- a closing brace '}'.
Brace characters '{' and '}' can be escaped by doubling them: '{{' and '}}'.
+
+ When specifying a directive without an identifier it will be auto named
+ sequentially, and mixing named and unnamed arguments is not supported in
+ Python.
*/
struct named_arg
@@ -72,10 +76,13 @@ struct spec
{
unsigned int directives;
unsigned int named_arg_count;
+ unsigned int unnamed_arg_count;
unsigned int allocated;
struct named_arg *named;
};
+#define INVALID_MIXES_NAMED_UNNAMED() \
+ xstrdup (_("The string refers to arguments both through argument names and
through unnamed argument specifications."))
/* Forward declaration of local functions. */
static void free_named_args (struct spec *spec);
@@ -143,6 +150,7 @@ parse_directive (struct spec *spec,
const char *const format_start = format;
const char *name_start;
char c;
+ bool is_named = true;
c = *++format;
if (c == '{')
@@ -156,15 +164,20 @@ parse_directive (struct spec *spec,
if (!parse_named_field (spec, &format, translated, fdi, invalid_reason)
&& !parse_numeric_field (spec, &format, translated, fdi, invalid_reason))
{
- *invalid_reason =
- xasprintf (_("In the directive number %u, '%c' cannot start a field
name."),
- spec->directives, *format);
- FDI_SET (format, FMTDIR_ERROR);
- return false;
+ if (!is_toplevel)
+ {
+ *invalid_reason =
+ xasprintf (_("In the directive number %u, '%c' cannot start a
field name."),
+ spec->directives, *format);
+ FDI_SET (format, FMTDIR_ERROR);
+ return false;
+ }
+ else
+ is_named = false;
}
/* Parse '.' (getattr) or '[..]' (getitem) operators followed by a
- name. If must not recurse, but can be specifed in a chain, such
+ name. It must not recurse, but can be specifed in a chain, such
as "foo.bar.baz[0]". */
for (;;)
{
@@ -213,7 +226,31 @@ parse_directive (struct spec *spec,
}
/* Here c == *format. */
- if (c == ':')
+ if (c == '!')
+ {
+ if (!is_toplevel)
+ {
+ *invalid_reason =
+ xasprintf (_("In the directive number %u, no more nesting is
allowed in a format specifier."),
+ spec->directives);
+ FDI_SET (format, FMTDIR_ERROR);
+ return false;
+ }
+
+ c = *++format;
+ if (c != 'r' && c != 's' && c != 'a')
+ {
+ *invalid_reason =
+ xasprintf (_("In the directive number %u, there is an invalid
conversion for a format specifier."),
+ spec->directives);
+ FDI_SET (format, FMTDIR_ERROR);
+ return false;
+ }
+
+ format++;
+ }
+
+ if (*format == ':')
{
if (!is_toplevel)
{
@@ -318,26 +355,54 @@ parse_directive (struct spec *spec,
if (is_toplevel)
{
- char *name;
- size_t n = format - name_start;
+ if (is_named)
+ {
+ /* Named and unnamed specifications are exclusive. */
+ if (spec->unnamed_arg_count > 0)
+ {
+ *invalid_reason = INVALID_MIXES_NAMED_UNNAMED ();
+ FDI_SET (name_start - 1, FMTDIR_ERROR);
+ return false;
+ }
- FDI_SET (name_start - 1, FMTDIR_START);
+ char *name;
+ size_t n = format - name_start;
- name = XNMALLOC (n + 1, char);
- memcpy (name, name_start, n);
- name[n] = '\0';
+ FDI_SET (name_start - 1, FMTDIR_START);
- spec->directives++;
+ name = XNMALLOC (n + 1, char);
+ memcpy (name, name_start, n);
+ name[n] = '\0';
- if (spec->allocated == spec->named_arg_count)
- {
- spec->allocated = 2 * spec->allocated + 1;
- spec->named = (struct named_arg *) xrealloc (spec->named,
spec->allocated * sizeof (struct named_arg));
+ spec->directives++;
+
+ if (spec->allocated == spec->named_arg_count)
+ {
+ spec->allocated = 2 * spec->allocated + 1;
+ spec->named = (struct named_arg *) xrealloc (spec->named,
spec->allocated * sizeof (struct named_arg));
+ }
+ spec->named[spec->named_arg_count].name = name;
+ spec->named_arg_count++;
+
+ FDI_SET (format, FMTDIR_END);
}
- spec->named[spec->named_arg_count].name = name;
- spec->named_arg_count++;
+ else
+ {
+ /* Named and unnamed specifications are exclusive. */
+ if (spec->named_arg_count > 0)
+ {
+ *invalid_reason = INVALID_MIXES_NAMED_UNNAMED ();
+ FDI_SET (name_start - 1, FMTDIR_ERROR);
+ return false;
+ }
- FDI_SET (format, FMTDIR_END);
+ FDI_SET (name_start - 1, FMTDIR_START);
+
+ spec->directives++;
+ spec->unnamed_arg_count++;
+
+ FDI_SET (format, FMTDIR_END);
+ }
}
*formatp = ++format;
@@ -383,6 +448,7 @@ format_parse (const char *format, bool translated, char
*fdi,
spec.directives = 0;
spec.named_arg_count = 0;
+ spec.unnamed_arg_count = 0;
spec.allocated = 0;
spec.named = NULL;
@@ -456,49 +522,78 @@ format_check (void *msgid_descr, void *msgstr_descr, bool
equality,
struct spec *spec2 = (struct spec *) msgstr_descr;
bool err = false;
- if (spec1->named_arg_count + spec2->named_arg_count > 0)
+ if (spec1->named_arg_count > 0 && spec2->unnamed_arg_count > 0)
{
- unsigned int i, j;
- unsigned int n1 = spec1->named_arg_count;
- unsigned int n2 = spec2->named_arg_count;
-
- /* Check the argument names in spec1 are contained in those of spec2.
- Both arrays are sorted. We search for the differences. */
- for (i = 0, j = 0; i < n1 || j < n2; )
+ if (error_logger)
+ error_logger (_("format specifications in '%s' expect a mapping, those
in '%s' expect a tuple"),
+ pretty_msgid, pretty_msgstr);
+ err = true;
+ }
+ else if (spec1->unnamed_arg_count > 0 && spec2->named_arg_count > 0)
+ {
+ if (error_logger)
+ error_logger (_("format specifications in '%s' expect a tuple, those
in '%s' expect a mapping"),
+ pretty_msgid, pretty_msgstr);
+ err = true;
+ }
+ else
+ {
+ if (spec1->named_arg_count + spec2->named_arg_count > 0)
{
- int cmp = (i >= n1 ? 1 :
- j >= n2 ? -1 :
- strcmp (spec1->named[i].name, spec2->named[j].name));
+ unsigned int i, j;
+ unsigned int n1 = spec1->named_arg_count;
+ unsigned int n2 = spec2->named_arg_count;
- if (cmp > 0)
+ /* Check the argument names in spec1 are contained in those of spec2.
+ Both arrays are sorted. We search for the differences. */
+ for (i = 0, j = 0; i < n1 || j < n2; )
{
- if (equality)
+ int cmp = (i >= n1 ? 1 :
+ j >= n2 ? -1 :
+ strcmp (spec1->named[i].name, spec2->named[j].name));
+
+ if (cmp > 0)
{
- if (error_logger)
- error_logger (_("a format specification for argument '%s'
doesn't exist in '%s'"),
- spec2->named[i].name, pretty_msgid);
- err = true;
- break;
+ if (equality)
+ {
+ if (error_logger)
+ error_logger (_("a format specification for argument
'%s' doesn't exist in '%s'"),
+ spec2->named[i].name, pretty_msgid);
+ err = true;
+ break;
+ }
+ else
+ j++;
}
- else
- j++;
- }
- else if (cmp < 0)
- {
- if (equality)
+ else if (cmp < 0)
{
- if (error_logger)
- error_logger (_("a format specification for argument '%s'
doesn't exist in '%s'"),
- spec1->named[i].name, pretty_msgstr);
- err = true;
- break;
+ if (equality)
+ {
+ if (error_logger)
+ error_logger (_("a format specification for argument
'%s' doesn't exist in '%s'"),
+ spec1->named[i].name, pretty_msgstr);
+ err = true;
+ break;
+ }
+ else
+ i++;
}
else
- i++;
+ j++, i++;
}
- else
- j++, i++;
}
+
+ if (spec1->unnamed_arg_count + spec2->unnamed_arg_count > 0)
+ {
+ /* Check the argument types are the same. */
+ if (spec1->unnamed_arg_count != spec2->unnamed_arg_count)
+ {
+ if (error_logger)
+ error_logger (_("number of format specifications in '%s' and
'%s' does not match"),
+ pretty_msgid, pretty_msgstr);
+ err = true;
+ }
+ }
}
return err;
@@ -515,6 +610,29 @@ struct formatstring_parser formatstring_python_brace =
};
+unsigned int
+get_python_brace_format_unnamed_arg_count (const char *string)
+{
+ /* Parse the format string. */
+ char *invalid_reason = NULL;
+ struct spec *descr =
+ (struct spec *) format_parse (string, false, NULL, &invalid_reason);
+
+ if (descr != NULL)
+ {
+ unsigned int result = descr->unnamed_arg_count;
+
+ format_free (descr);
+ return result;
+ }
+ else
+ {
+ free (invalid_reason);
+ return 0;
+ }
+}
+
+
#ifdef TEST
/* Test program: Print the argument list specification returned by
@@ -534,14 +652,31 @@ format_print (void *descr)
return;
}
- printf ("{");
- for (i = 0; i < spec->named_arg_count; i++)
+ if (spec->named_arg_count > 0)
+ {
+ if (spec->unnamed_arg_count > 0)
+ abort ();
+
+ printf ("{");
+ for (i = 0; i < spec->named_arg_count; i++)
+ {
+ if (i > 0)
+ printf (", ");
+ printf ("'%s'", spec->named[i].name);
+ }
+ printf ("}");
+ }
+ else
{
- if (i > 0)
- printf (", ");
- printf ("'%s'", spec->named[i].name);
+ printf ("(");
+ for (i = 0; i < spec->unnamed_arg_count; i++)
+ {
+ if (i > 0)
+ printf (" ");
+ printf ("*");
+ }
+ printf (")");
}
- printf ("}");
}
int
diff --git a/gettext-tools/src/format.h b/gettext-tools/src/format.h
index 4cd318654..cad2579af 100644
--- a/gettext-tools/src/format.h
+++ b/gettext-tools/src/format.h
@@ -148,6 +148,10 @@ extern void
string. */
extern unsigned int get_python_format_unnamed_arg_count (const char *string);
+/* Returns the number of unnamed arguments consumed by a Python brace format
+ string. */
+extern unsigned int get_python_brace_format_unnamed_arg_count (const char
*string);
+
/* Check whether both formats strings contain compatible format
specifications for format type i (0 <= i < NFORMATS).
Return the number of errors that were seen. */
diff --git a/gettext-tools/src/xg-message.c b/gettext-tools/src/xg-message.c
index 7b053798c..c888bb1a5 100644
--- a/gettext-tools/src/xg-message.c
+++ b/gettext-tools/src/xg-message.c
@@ -214,8 +214,10 @@ static void
warn_format_string (enum is_format is_format[NFORMATS], const char *string,
lex_pos_ty *pos, const char *pretty_msgstr)
{
- if (possible_format_p (is_format[format_python])
+ if ((possible_format_p (is_format[format_python])
&& get_python_format_unnamed_arg_count (string) > 1)
+ || (possible_format_p(is_format[format_python_brace])
+ && get_python_brace_format_unnamed_arg_count (string) > 1))
{
char buffer[21];
diff --git a/gettext-tools/tests/format-python-brace-1
b/gettext-tools/tests/format-python-brace-1
index 0028f7fdb..6b4b17380 100755
--- a/gettext-tools/tests/format-python-brace-1
+++ b/gettext-tools/tests/format-python-brace-1
@@ -12,18 +12,32 @@ cat <<\EOF > f-pyb-1.data
"abc{0}"
# Valid: a named argument
"abc{value}"
-# Invalid: an empty name
+# Valid: an unnamed argument
"abc{}"
# Invalid: unterminated name
"abc{value"
# Valid: three arguments, two with equal names
"abc{addr},{char},{addr}"
+# Valid: three unnamed arguments
+"abc{},{},{}"
+# Invalid: mixing of named and unnamed arguments
+"abc{addr},{}"
# Valid: getattr operator
"abc{value.name}"
+# Valid: getattr operator on unnamed argument
+"abc{.name}"
# Invalid: getattr operator with numeric field name
"abc{value.0}"
+# Invalid: getattr operator with numeric field name on unnamed argument
+"abc{.0}"
# Valid: getitem operator
"abc{value[name]}"
+# Valid: getitem operator on unnamed argument
+"abc{[name]}"
+# Valid: numeric getitem operator
+"abc{value[0]}"
+# Valid: numeric getitem operator on unnamed argument
+"abc{[0]}"
# Invalid: unterminated getitem operator
"abc{value[name}"
# Invalid: unterminated getitem operator
@@ -32,14 +46,28 @@ cat <<\EOF > f-pyb-1.data
"abc{value[!]}"
# Valid: use of both getattr and getitem operators
"abc{value.v[name]}"
+# Valid: use of both getattr and getitem operators on unnamed argument
+"abc{.v[name]}"
# Valid: use of both getitem and getattr operators
"abc{value[name].v}"
+# Valid: use of both getitem and getattr operators on unnamed argument
+"abc{[name].v}"
+# Valid: conversion specifiers
+"abc{value!s},{value!r},{value!a}"
+# Invalid: conversion specifier
+"abc{value!x}"
# Valid: format specifier
"abc{value:0}"
+# Valid: conversion and format specifier
+"abc{value!s:0}"
# Valid: format specifier after getattr operator
"abc{value.name:0}"
+# Valid: conversion specifier after getattr operator
+"abc{value.name!r}"
# Valid: format specifier after getitem operator
"abc{value[name]:0}"
+# Valid: conversion specifier after getitem operator
+"abc{value[name]!r}"
# Valid: standard format specifier
"abc{value:<<-#012.34e}"
# Invalid: empty precision
@@ -48,10 +76,12 @@ cat <<\EOF > f-pyb-1.data
"abc{value:<c>}"
# Valid: nested format specifier
"abc{value:{foo}}"
-# Invalid: too many nesting of format specifier
+# Invalid: too much nesting of format specifier
"abc{value:{foo:0}}"
# Invalid: nested format specifier, in the middle of other format specifiers
"abc{value:0{foo}0}"
+# Invalid: nested conversion specifier
+"abc{value:{foo!r}}"
EOF
: ${XGETTEXT=xgettext}
diff --git a/gettext-tools/tests/format-python-brace-2
b/gettext-tools/tests/format-python-brace-2
index 376ad266e..4fb5ce0f1 100755
--- a/gettext-tools/tests/format-python-brace-2
+++ b/gettext-tools/tests/format-python-brace-2
@@ -7,6 +7,15 @@ cat <<\EOF > f-pyb-2.data
# Valid: same named arguments
msgid "abc{date}{time}"
msgstr "xyz{date}{time}"
+# Valid: same unnamed arguments
+msgid "abc{}{}"
+msgstr "xyz{}{}"
+# Invalid: too few arguments
+msgid "abc{}def{}"
+msgstr "xyz{}"
+# Invalid: too many arguments
+msgid "abc{}def"
+msgstr "xyz{}vw{}"
# Valid: permutation
msgid "abc{x3}{x1}{x2}def"
msgstr "xyz{x2}{x1}{x3}"
@@ -19,6 +28,12 @@ msgstr "xyz{x2}"
# Invalid: added argument
msgid "abc{foo}def"
msgstr "xyz{foo}uvw{zoo}"
+# Invalid: unnamed vs. named arguments
+msgid "abc{}def"
+msgstr "xyz{value}"
+# Invalid: named vs. unnamed arguments
+msgid "abc{value}def"
+msgstr "xyz{}"
# Valid: multiple reuse of same argument
msgid "{foo} {bar} {baz}"
msgstr "{baz} {bar} {foo} {bar}"
--
2.40.0
- [PATCH] python-brace-format: support conversion specifier and unnamed arguments,
Terence D. Honles <=