[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PATCH] parse-datetime: accept ISO 8601 date and time rep with "T" separ
From: |
Jim Meyering |
Subject: |
[PATCH] parse-datetime: accept ISO 8601 date and time rep with "T" separator |
Date: |
Wed, 31 Aug 2011 16:25:31 +0200 |
I've just pushed this.
Documentation coming in the next few days, one way or another.
>From c2ecbc9a8262595b27f741e41375d06213a30fb6 Mon Sep 17 00:00:00 2001
From: "J.T. Conklin" <address@hidden>
Date: Wed, 17 Aug 2011 16:40:49 -0700
Subject: [PATCH] parse-datetime: accept ISO 8601 date and time rep with "T"
separator
The parser now accepts ISO 8601 date-time strings with "T" as the
separator. It has long parsed dates like "2004-02-29 16:21:42"
with a space between the date and time strings. Now it also parses
"2004-02-29T16:21:42" and fractional-second and time-zone-annotated
variants like "2004-02-29T16:21:42.333-07:00"
* lib/parse-datetime.y: Parse ISO 8601 extended date and time
of day representation using the 'T' separator character.
* doc/parse-datetime.texi (General date syntax): replace use of
deprecated --iso-8601 option with --rfc-3339 in example of date
command output formats that can be parsed.
* tests/test-parse-datetime.c (tm_diff): New function, taken from
lib/parse-datetime.y.
(gmt_offset): New function.
(main): Add additional test cases to validate ISO8601 extended
date and time of day format parsing.
---
ChangeLog | 19 +++++
doc/parse-datetime.texi | 4 +-
lib/parse-datetime.y | 95 ++++++++++++++++++--------
tests/test-parse-datetime.c | 154 +++++++++++++++++++++++++++++++++++++++++++
4 files changed, 240 insertions(+), 32 deletions(-)
diff --git a/ChangeLog b/ChangeLog
index 6821057..b112f74 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,22 @@
+2011-08-17 J.T. Conklin <address@hidden>
+
+ parse-datetime: accept ISO 8601 date and time rep with "T" separator
+ The parser now accepts ISO 8601 date-time strings with "T" as the
+ separator. It has long parsed dates like "2004-02-29 16:21:42"
+ with a space between the date and time strings. Now it also parses
+ "2004-02-29T16:21:42" and fractional-second and time-zone-annotated
+ variants like "2004-02-29T16:21:42.333-07:00"
+ * lib/parse-datetime.y: Parse ISO 8601 extended date and time
+ of day representation using the 'T' separator character.
+ * doc/parse-datetime.texi (General date syntax): replace use of
+ deprecated --iso-8601 option with --rfc-3339 in example of date
+ command output formats that can be parsed.
+ * tests/test-parse-datetime.c (tm_diff): New function, taken from
+ lib/parse-datetime.y.
+ (gmt_offset): New function.
+ (main): Add additional test cases to validate ISO8601 extended
+ date and time of day parsing.
+
2011-08-31 Bruno Haible <address@hidden>
freopen: Documentation.
diff --git a/doc/parse-datetime.texi b/doc/parse-datetime.texi
index 2f1ab34..b159f96 100644
--- a/doc/parse-datetime.texi
+++ b/doc/parse-datetime.texi
@@ -126,8 +126,8 @@ General date syntax
Mon Mar 1 00:21:42 UTC 2004
$ TZ=UTC0 date +'%Y-%m-%d %H:%M:%SZ'
2004-03-01 00:21:42Z
-$ date --iso-8601=ns | tr T ' ' # --iso-8601 is a GNU extension.
-2004-02-29 16:21:42,692722128-0800
+$ date --rfc-3339=ns # --rfc-3339 is a GNU extension.
+2004-02-29 16:21:42.692722128-08:00
$ date --rfc-2822 # a GNU extension
Sun, 29 Feb 2004 16:21:42 -0800
$ date +'%Y-%m-%d %H:%M:%S %z' # %z is a GNU extension.
diff --git a/lib/parse-datetime.y b/lib/parse-datetime.y
index 23a9a41..027f797 100644
--- a/lib/parse-datetime.y
+++ b/lib/parse-datetime.y
@@ -285,8 +285,8 @@ set_hhmmss (parser_control *pc, long int hour, long int
minutes,
%parse-param { parser_control *pc }
%lex-param { parser_control *pc }
-/* This grammar has 20 shift/reduce conflicts. */
-%expect 20
+/* This grammar has 31 shift/reduce conflicts. */
+%expect 31
%union
{
@@ -307,7 +307,7 @@ set_hhmmss (parser_control *pc, long int hour, long int
minutes,
%token <textintval> tSNUMBER tUNUMBER
%token <timespec> tSDECIMAL_NUMBER tUDECIMAL_NUMBER
-%type <intval> o_colon_minutes o_merid
+%type <intval> o_colon_minutes
%type <timespec> seconds signed_seconds unsigned_seconds
%type <rel> relunit relunit_snumber dayshift
@@ -333,7 +333,9 @@ items:
;
item:
- time
+ datetime
+ { pc->times_seen++; pc->dates_seen++; }
+ | time
{ pc->times_seen++; }
| local_zone
{ pc->local_zones_seen++; }
@@ -348,35 +350,61 @@ item:
| hybrid
;
+datetime:
+ iso_8601_datetime
+ ;
+
+iso_8601_datetime:
+ iso_8601_date 'T' iso_8601_time
+ ;
+
time:
tUNUMBER tMERIDIAN
{
set_hhmmss (pc, $1.value, 0, 0, 0);
pc->meridian = $2;
}
- | tUNUMBER ':' tUNUMBER o_merid
+ | tUNUMBER ':' tUNUMBER tMERIDIAN
{
set_hhmmss (pc, $1.value, $3.value, 0, 0);
pc->meridian = $4;
}
- | tUNUMBER ':' tUNUMBER tSNUMBER o_colon_minutes
+ | tUNUMBER ':' tUNUMBER ':' unsigned_seconds tMERIDIAN
+ {
+ set_hhmmss (pc, $1.value, $3.value, $5.tv_sec, $5.tv_nsec);
+ pc->meridian = $6;
+ }
+ | iso_8601_time
+ ;
+
+iso_8601_time:
+ tUNUMBER zone_offset
+ {
+ set_hhmmss (pc, $1.value, 0, 0, 0);
+ pc->meridian = MER24;
+ }
+ | tUNUMBER ':' tUNUMBER o_zone_offset
{
set_hhmmss (pc, $1.value, $3.value, 0, 0);
pc->meridian = MER24;
- pc->zones_seen++;
- pc->time_zone = time_zone_hhmm (pc, $4, $5);
}
- | tUNUMBER ':' tUNUMBER ':' unsigned_seconds o_merid
+ | tUNUMBER ':' tUNUMBER ':' unsigned_seconds o_zone_offset
{
set_hhmmss (pc, $1.value, $3.value, $5.tv_sec, $5.tv_nsec);
- pc->meridian = $6;
+ pc->meridian = MER24;
}
- | tUNUMBER ':' tUNUMBER ':' unsigned_seconds tSNUMBER o_colon_minutes
+ ;
+
+o_zone_offset:
+ /* empty */
+ | zone_offset
+ ;
+
+zone_offset:
+ tSNUMBER o_colon_minutes
{
- set_hhmmss (pc, $1.value, $3.value, $5.tv_sec, $5.tv_nsec);
- pc->meridian = MER24;
pc->zones_seen++;
- pc->time_zone = time_zone_hhmm (pc, $6, $7);
+ pc->time_zone = time_zone_hhmm (pc, $1, $2);
}
;
@@ -393,12 +421,19 @@ local_zone:
}
;
+/* Note 'T' is a special case, as it is used as the separator in ISO
+ 8601 date and time of day representation. */
zone:
tZONE
{ pc->time_zone = $1; }
+ | 'T'
+ { pc->time_zone = HOUR(7); }
| tZONE relunit_snumber
{ pc->time_zone = $1;
apply_relative_time (pc, $2, 1); }
+ | 'T' relunit_snumber
+ { pc->time_zone = HOUR(7);
+ apply_relative_time (pc, $2, 1); }
| tZONE tSNUMBER o_colon_minutes
{ pc->time_zone = $1 + time_zone_hhmm (pc, $2, $3); }
| tDAYZONE
@@ -456,13 +491,6 @@ date:
pc->year = $5;
}
}
- | tUNUMBER tSNUMBER tSNUMBER
- {
- /* ISO 8601 format. YYYY-MM-DD. */
- pc->year = $1;
- pc->month = -$2.value;
- pc->day = -$3.value;
- }
| tUNUMBER tMONTH tSNUMBER
{
/* e.g. 17-JUN-1992. */
@@ -501,6 +529,17 @@ date:
pc->month = $2;
pc->year = $3;
}
+ | iso_8601_date
+ ;
+
+iso_8601_date:
+ tUNUMBER tSNUMBER tSNUMBER
+ {
+ /* ISO 8601 format. YYYY-MM-DD. */
+ pc->year = $1;
+ pc->month = -$2.value;
+ pc->day = -$3.value;
+ }
;
rel:
@@ -612,13 +651,6 @@ o_colon_minutes:
{ $$ = $2.value; }
;
-o_merid:
- /* empty */
- { $$ = MER24; }
- | tMERIDIAN
- { $$ = $1; }
- ;
-
%%
static table const meridian_table[] =
@@ -773,7 +805,10 @@ static table const time_zone_table[] =
{ NULL, 0, 0 }
};
-/* Military time zone table. */
+/* Military time zone table.
+
+ Note 'T' is a special case, as it is used as the separator in ISO
+ 8601 date and time of day representation. */
static table const military_table[] =
{
{ "A", tZONE, -HOUR ( 1) },
@@ -794,7 +829,7 @@ static table const military_table[] =
{ "Q", tZONE, HOUR ( 4) },
{ "R", tZONE, HOUR ( 5) },
{ "S", tZONE, HOUR ( 6) },
- { "T", tZONE, HOUR ( 7) },
+ { "T", 'T', 0 },
{ "U", tZONE, HOUR ( 8) },
{ "V", tZONE, HOUR ( 9) },
{ "W", tZONE, HOUR (10) },
diff --git a/tests/test-parse-datetime.c b/tests/test-parse-datetime.c
index 45dbae6..4cb85d5 100644
--- a/tests/test-parse-datetime.c
+++ b/tests/test-parse-datetime.c
@@ -48,17 +48,171 @@ static const char* const day_table[] =
NULL
};
+
+#if ! HAVE_TM_GMTOFF
+/* Shift A right by B bits portably, by dividing A by 2**B and
+ truncating towards minus infinity. A and B should be free of side
+ effects, and B should be in the range 0 <= B <= INT_BITS - 2, where
+ INT_BITS is the number of useful bits in an int. GNU code can
+ assume that INT_BITS is at least 32.
+
+ ISO C99 says that A >> B is implementation-defined if A < 0. Some
+ implementations (e.g., UNICOS 9.0 on a Cray Y-MP EL) don't shift
+ right in the usual way when A < 0, so SHR falls back on division if
+ ordinary A >> B doesn't seem to be the usual signed shift. */
+#define SHR(a, b) \
+ (-1 >> 1 == -1 \
+ ? (a) >> (b) \
+ : (a) / (1 << (b)) - ((a) % (1 << (b)) < 0))
+
+#define TM_YEAR_BASE 1900
+
+/* Yield the difference between *A and *B,
+ measured in seconds, ignoring leap seconds.
+ The body of this function is taken directly from the GNU C Library;
+ see src/strftime.c. */
+static long int
+tm_diff (struct tm const *a, struct tm const *b)
+{
+ /* Compute intervening leap days correctly even if year is negative.
+ Take care to avoid int overflow in leap day calculations. */
+ int a4 = SHR (a->tm_year, 2) + SHR (TM_YEAR_BASE, 2) - ! (a->tm_year & 3);
+ int b4 = SHR (b->tm_year, 2) + SHR (TM_YEAR_BASE, 2) - ! (b->tm_year & 3);
+ int a100 = a4 / 25 - (a4 % 25 < 0);
+ int b100 = b4 / 25 - (b4 % 25 < 0);
+ int a400 = SHR (a100, 2);
+ int b400 = SHR (b100, 2);
+ int intervening_leap_days = (a4 - b4) - (a100 - b100) + (a400 - b400);
+ long int ayear = a->tm_year;
+ long int years = ayear - b->tm_year;
+ long int days = (365 * years + intervening_leap_days
+ + (a->tm_yday - b->tm_yday));
+ return (60 * (60 * (24 * days + (a->tm_hour - b->tm_hour))
+ + (a->tm_min - b->tm_min))
+ + (a->tm_sec - b->tm_sec));
+}
+#endif /* ! HAVE_TM_GMTOFF */
+
+long
+gmt_offset()
+{
+ time_t now;
+ long gmtoff;
+
+ time(&now);
+
+#if !HAVE_TM_GMTOFF
+ struct tm tm_local = *localtime(&now);
+ struct tm tm_gmt = *gmtime(&now);
+
+ gmtoff = tm_diff(&tm_local, &tm_gmt);
+#else
+ gmtoff = localtime(&now)->tm_gmtoff;
+#endif
+
+ return gmtoff;
+}
+
int
main (int argc _GL_UNUSED, char **argv)
{
struct timespec result;
struct timespec result2;
+ struct timespec expected;
struct timespec now;
const char *p;
int i;
+ long gmtoff;
set_program_name (argv[0]);
+ gmtoff = gmt_offset();
+
+
+ /* ISO 8601 extended date and time of day representation,
+ 'T' separator, local time zone */
+ p = "2011-05-01T11:55:18";
+ expected.tv_sec = 1304250918 - gmtoff;
+ expected.tv_nsec = 0;
+ ASSERT (parse_datetime (&result, p, 0));
+ LOG (p, expected, result);
+ ASSERT (expected.tv_sec == result.tv_sec
+ && expected.tv_nsec == result.tv_nsec);
+
+ /* ISO 8601 extended date and time of day representation,
+ ' ' separator, local time zone */
+ p = "2011-05-01 11:55:18";
+ expected.tv_sec = 1304250918 - gmtoff;
+ expected.tv_nsec = 0;
+ ASSERT (parse_datetime (&result, p, 0));
+ LOG (p, expected, result);
+ ASSERT (expected.tv_sec == result.tv_sec
+ && expected.tv_nsec == result.tv_nsec);
+
+
+ /* ISO 8601, extended date and time of day representation,
+ 'T' separator, UTC */
+ p = "2011-05-01T11:55:18Z";
+ expected.tv_sec = 1304250918;
+ expected.tv_nsec = 0;
+ ASSERT (parse_datetime (&result, p, 0));
+ LOG (p, expected, result);
+ ASSERT (expected.tv_sec == result.tv_sec
+ && expected.tv_nsec == result.tv_nsec);
+
+ /* ISO 8601, extended date and time of day representation,
+ ' ' separator, UTC */
+ p = "2011-05-01 11:55:18Z";
+ expected.tv_sec = 1304250918;
+ expected.tv_nsec = 0;
+ ASSERT (parse_datetime (&result, p, 0));
+ LOG (p, expected, result);
+ ASSERT (expected.tv_sec == result.tv_sec
+ && expected.tv_nsec == result.tv_nsec);
+
+
+ /* ISO 8601 extended date and time of day representation,
+ 'T' separator, w/UTC offset */
+ p = "2011-05-01T11:55:18-07:00";
+ expected.tv_sec = 1304276118;
+ expected.tv_nsec = 0;
+ ASSERT (parse_datetime (&result, p, 0));
+ LOG (p, expected, result);
+ ASSERT (expected.tv_sec == result.tv_sec
+ && expected.tv_nsec == result.tv_nsec);
+
+ /* ISO 8601 extended date and time of day representation,
+ ' ' separator, w/UTC offset */
+ p = "2011-05-01 11:55:18-07:00";
+ expected.tv_sec = 1304276118;
+ expected.tv_nsec = 0;
+ ASSERT (parse_datetime (&result, p, 0));
+ LOG (p, expected, result);
+ ASSERT (expected.tv_sec == result.tv_sec
+ && expected.tv_nsec == result.tv_nsec);
+
+
+ /* ISO 8601 extended date and time of day representation,
+ 'T' separator, w/hour only UTC offset */
+ p = "2011-05-01T11:55:18-07";
+ expected.tv_sec = 1304276118;
+ expected.tv_nsec = 0;
+ ASSERT (parse_datetime (&result, p, 0));
+ LOG (p, expected, result);
+ ASSERT (expected.tv_sec == result.tv_sec
+ && expected.tv_nsec == result.tv_nsec);
+
+ /* ISO 8601 extended date and time of day representation,
+ ' ' separator, w/hour only UTC offset */
+ p = "2011-05-01 11:55:18-07";
+ expected.tv_sec = 1304276118;
+ expected.tv_nsec = 0;
+ ASSERT (parse_datetime (&result, p, 0));
+ LOG (p, expected, result);
+ ASSERT (expected.tv_sec == result.tv_sec
+ && expected.tv_nsec == result.tv_nsec);
+
+
now.tv_sec = 4711;
now.tv_nsec = 1267;
p = "now";
--
1.7.7.rc0.362.g5a14
- [PATCH] parse-datetime: accept ISO 8601 date and time rep with "T" separator,
Jim Meyering <=