[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: php concatenation unknown to xgettext
From: |
Bruno Haible |
Subject: |
Re: php concatenation unknown to xgettext |
Date: |
Thu, 5 Jul 2007 02:09:39 +0200 |
User-agent: |
KMail/1.5.4 |
Hello,
Jan Engelhardt wrote:
> the following minimal php source is not correctly parsed by xgettext:
>
> <?php
> _("foo"."bar");
> ?>
>
> ran through `xgettext -o /dev/stdout test.php` gives:
>
> msgid "foo"
> msgstr ""
>
> expected result:
>
> msgid "foobar"
> msgstr ""
>
> The dot is the concatenation operator and is very handy when dealing
> with longer strings (esp when they take up more than 80 columns).
>
>
> $ xgettext --version
> xgettext (GNU gettext-tools) 0.15
Thank you for the precise report. I am applying this fix (relative to the
gettext CVS).
2007-07-04 Bruno Haible <address@hidden>
Recognize the PHP string concatenation operator.
* x-php.c (enum token_type_ty): New elements token_type_dot,
token_type_operator1, token_type_operator2.
(struct token_ty): Add comment field.
(free_token): Drop reference to comment field.
(phase4_pushback, phase4_pushback_length): New variables.
(phase4_get): Renamed from x_php_lex. Return last pushed-back token if
available. Recognize tokens '.', '+', '-', '*', '/', '%', '++', '--',
'!', '~', '@'. Fill in tp->comment.
(phase4_unget): New function.
(phase5_last): New variable.
(x_php_lex): New function.
(extract_balanced): Handle the new token types. Pass token's comment
to remember_a_message.
(extract_php): Initialize phase5_last.
Reported by Jan Engelhardt <address@hidden>.
*** gettext-tools/src/x-php.c 17 Mar 2007 12:00:22 -0000 1.23
--- gettext-tools/src/x-php.c 4 Jul 2007 23:55:36 -0000
***************
*** 42,48 ****
/* The PHP syntax is defined in phpdoc/manual/langref.html.
! See also php-4.1.0/Zend/zend_language_scanner.l.
Note that variable and function names can contain bytes in the range
0x7f..0xff; see
http://www.php.net/manual/en/language.variables.php
--- 42,49 ----
/* The PHP syntax is defined in phpdoc/manual/langref.html.
! See also php-4.1.0/Zend/zend_language_scanner.l
! and php-4.1.0/Zend/zend_language_parser.y.
Note that variable and function names can contain bytes in the range
0x7f..0xff; see
http://www.php.net/manual/en/language.variables.php
***************
*** 741,746 ****
--- 742,750 ----
token_type_comma, /* , */
token_type_lbracket, /* [ */
token_type_rbracket, /* ] */
+ token_type_dot, /* . */
+ token_type_operator1, /* * / % ++ -- */
+ token_type_operator2, /* + - ! ~ @ */
token_type_string_literal, /* "abc" */
token_type_symbol, /* symbol, number */
token_type_other /* misc. operator */
***************
*** 752,757 ****
--- 756,762 ----
{
token_type_ty type;
char *string; /* for token_type_string_literal,
token_type_symbol */
+ refcounted_string_list_ty *comment; /* for token_type_string_literal */
int line_number;
};
***************
*** 762,780 ****
{
if (tp->type == token_type_string_literal || tp->type == token_type_symbol)
free (tp->string);
}
/* 4. Combine characters into tokens. Discard whitespace. */
static void
! x_php_lex (token_ty *tp)
{
static char *buffer;
static int bufmax;
int bufpos;
int c;
tp->string = NULL;
for (;;)
--- 767,795 ----
{
if (tp->type == token_type_string_literal || tp->type == token_type_symbol)
free (tp->string);
+ if (tp->type == token_type_string_literal)
+ drop_reference (tp->comment);
}
/* 4. Combine characters into tokens. Discard whitespace. */
+ static token_ty phase4_pushback[3];
+ static int phase4_pushback_length;
+
static void
! phase4_get (token_ty *tp)
{
static char *buffer;
static int bufmax;
int bufpos;
int c;
+ if (phase4_pushback_length)
+ {
+ *tp = phase4_pushback[--phase4_pushback_length];
+ return;
+ }
tp->string = NULL;
for (;;)
***************
*** 927,932 ****
--- 942,948 ----
buffer[bufpos] = 0;
tp->type = token_type_string_literal;
tp->string = xstrdup (buffer);
+ tp->comment = add_reference (savable_comment);
return;
case '"':
***************
*** 1063,1069 ****
}
buffer[bufpos] = 0;
if (tp->type == token_type_string_literal)
! tp->string = xstrdup (buffer);
return;
case '?':
--- 1079,1088 ----
}
buffer[bufpos] = 0;
if (tp->type == token_type_string_literal)
! {
! tp->string = xstrdup (buffer);
! tp->comment = add_reference (savable_comment);
! }
return;
case '?':
***************
*** 1075,1084 ****
/* ?> and %> terminate PHP mode and switch back to HTML
mode. */
skip_html ();
}
else
! phase1_ungetc (c2);
! tp->type = token_type_other;
return;
}
--- 1094,1106 ----
/* ?> and %> terminate PHP mode and switch back to HTML
mode. */
skip_html ();
+ tp->type = token_type_other;
}
else
! {
! phase1_ungetc (c2);
! tp->type = (c == '%' ? token_type_operator1 : token_type_other);
! }
return;
}
***************
*** 1102,1107 ****
--- 1124,1160 ----
tp->type = token_type_rbracket;
return;
+ case '.':
+ tp->type = token_type_dot;
+ return;
+
+ case '*':
+ case '/':
+ tp->type = token_type_operator1;
+ return;
+
+ case '+':
+ case '-':
+ {
+ int c2 = phase1_getc ();
+ if (c2 == c)
+ /* ++ or -- */
+ tp->type = token_type_operator1;
+ else
+ /* + or - */
+ {
+ phase1_ungetc (c2);
+ tp->type = token_type_operator2;
+ }
+ return;
+ }
+
+ case '!':
+ case '~':
+ case '@':
+ tp->type = token_type_operator2;
+ return;
+
case '<':
{
int c2 = phase1_getc ();
***************
*** 1248,1253 ****
--- 1301,1388 ----
}
}
+ /* Supports 3 tokens of pushback. */
+ static void
+ phase4_unget (token_ty *tp)
+ {
+ if (tp->type != token_type_eof)
+ {
+ if (phase4_pushback_length == SIZEOF (phase4_pushback))
+ abort ();
+ phase4_pushback[phase4_pushback_length++] = *tp;
+ }
+ }
+
+
+ /* 5. Compile-time optimization of string literal concatenation.
+ Combine "string1" . ... . "stringN" to the concatenated string if
+ - the token before this expression is none of
+ '+' '-' '.' '*' '/' '%' '!' '~' '++' '--' ')' '@'
+ (because then the first string could be part of an expression with
+ the same or higher precedence as '.', such as an additive,
+ multiplicative, negation, preincrement, or cast expression),
+ - the token after this expression is none of
+ '*' '/' '%' '++' '--'
+ (because then the last string could be part of an expression with
+ higher precedence as '.', such as a multiplicative or postincrement
+ expression). */
+
+ static token_type_ty phase5_last;
+
+ static void
+ x_php_lex (token_ty *tp)
+ {
+ phase4_get (tp);
+ if (tp->type == token_type_string_literal
+ && !(phase5_last == token_type_dot
+ || phase5_last == token_type_operator1
+ || phase5_last == token_type_operator2
+ || phase5_last == token_type_rparen))
+ {
+ char *sum = tp->string;
+ size_t sum_len = strlen (sum);
+
+ for (;;)
+ {
+ token_ty token2;
+
+ phase4_get (&token2);
+ if (token2.type == token_type_dot)
+ {
+ token_ty token3;
+
+ phase4_get (&token3);
+ if (token3.type == token_type_string_literal)
+ {
+ token_ty token_after;
+
+ phase4_get (&token_after);
+ if (token_after.type != token_type_operator1)
+ {
+ char *addend = token3.string;
+ size_t addend_len = strlen (addend);
+
+ sum = (char *) xrealloc (sum, sum_len + addend_len + 1);
+ memcpy (sum + sum_len, addend, addend_len + 1);
+ sum_len += addend_len;
+
+ phase4_unget (&token_after);
+ free_token (&token3);
+ free_token (&token2);
+ continue;
+ }
+ phase4_unget (&token_after);
+ }
+ phase4_unget (&token3);
+ }
+ phase4_unget (&token2);
+ break;
+ }
+ tp->string = sum;
+ }
+ phase5_last = tp->type;
+ }
+
/* ========================= Extracting strings. ==========================
*/
***************
*** 1389,1405 ****
if (extract_all)
remember_a_message (mlp, NULL, token.string, inner_context,
! &pos, savable_comment);
else
arglist_parser_remember (argparser, arg, token.string,
inner_context,
pos.file_name, pos.line_number,
! savable_comment);
}
next_context_iter = null_context_list_iterator;
state = 0;
continue;
case token_type_other:
next_context_iter = null_context_list_iterator;
state = 0;
--- 1524,1544 ----
if (extract_all)
remember_a_message (mlp, NULL, token.string, inner_context,
! &pos, token.comment);
else
arglist_parser_remember (argparser, arg, token.string,
inner_context,
pos.file_name, pos.line_number,
! token.comment);
! drop_reference (token.comment);
}
next_context_iter = null_context_list_iterator;
state = 0;
continue;
+ case token_type_dot:
+ case token_type_operator1:
+ case token_type_operator2:
case token_type_other:
next_context_iter = null_context_list_iterator;
state = 0;
***************
*** 1432,1437 ****
--- 1571,1578 ----
last_comment_line = -1;
last_non_comment_line = -1;
+ phase5_last = token_type_eof;
+
flag_context_list_table = flag_table;
init_keywords ();