bug-gnulib
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH] parse-datetime: accept ISO 8601 date and time rep with "T" separ


From: Jim Meyering
Subject: [PATCH] parse-datetime: accept ISO 8601 date and time rep with "T" separator
Date: Wed, 31 Aug 2011 16:25:31 +0200

I've just pushed this.
Documentation coming in the next few days, one way or another.

>From c2ecbc9a8262595b27f741e41375d06213a30fb6 Mon Sep 17 00:00:00 2001
From: "J.T. Conklin" <address@hidden>
Date: Wed, 17 Aug 2011 16:40:49 -0700
Subject: [PATCH] parse-datetime: accept ISO 8601 date and time rep with "T"
 separator

The parser now accepts ISO 8601 date-time strings with "T" as the
separator.  It has long parsed dates like "2004-02-29 16:21:42"
with a space between the date and time strings.  Now it also parses
"2004-02-29T16:21:42" and fractional-second and time-zone-annotated
variants like "2004-02-29T16:21:42.333-07:00"

* lib/parse-datetime.y: Parse ISO 8601 extended date and time
of day representation using the 'T' separator character.
* doc/parse-datetime.texi (General date syntax): replace use of
deprecated --iso-8601 option with --rfc-3339 in example of date
command output formats that can be parsed.
* tests/test-parse-datetime.c (tm_diff): New function, taken from
lib/parse-datetime.y.
(gmt_offset): New function.
(main): Add additional test cases to validate ISO8601 extended
date and time of day format parsing.
---
 ChangeLog                   |   19 +++++
 doc/parse-datetime.texi     |    4 +-
 lib/parse-datetime.y        |   95 ++++++++++++++++++--------
 tests/test-parse-datetime.c |  154 +++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 240 insertions(+), 32 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index 6821057..b112f74 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,22 @@
+2011-08-17  J.T. Conklin  <address@hidden>
+
+       parse-datetime: accept ISO 8601 date and time rep with "T" separator
+       The parser now accepts ISO 8601 date-time strings with "T" as the
+       separator.  It has long parsed dates like "2004-02-29 16:21:42"
+       with a space between the date and time strings.  Now it also parses
+       "2004-02-29T16:21:42" and fractional-second and time-zone-annotated
+       variants like "2004-02-29T16:21:42.333-07:00"
+       * lib/parse-datetime.y: Parse ISO 8601 extended date and time
+       of day representation using the 'T' separator character.
+       * doc/parse-datetime.texi (General date syntax): replace use of
+       deprecated --iso-8601 option with --rfc-3339 in example of date
+       command output formats that can be parsed.
+       * tests/test-parse-datetime.c (tm_diff): New function, taken from
+       lib/parse-datetime.y.
+       (gmt_offset): New function.
+       (main): Add additional test cases to validate ISO8601 extended
+       date and time of day parsing.
+
 2011-08-31  Bruno Haible  <address@hidden>

        freopen: Documentation.
diff --git a/doc/parse-datetime.texi b/doc/parse-datetime.texi
index 2f1ab34..b159f96 100644
--- a/doc/parse-datetime.texi
+++ b/doc/parse-datetime.texi
@@ -126,8 +126,8 @@ General date syntax
 Mon Mar  1 00:21:42 UTC 2004
 $ TZ=UTC0 date +'%Y-%m-%d %H:%M:%SZ'
 2004-03-01 00:21:42Z
-$ date --iso-8601=ns | tr T ' '  # --iso-8601 is a GNU extension.
-2004-02-29 16:21:42,692722128-0800
+$ date --rfc-3339=ns  # --rfc-3339 is a GNU extension.
+2004-02-29 16:21:42.692722128-08:00
 $ date --rfc-2822  # a GNU extension
 Sun, 29 Feb 2004 16:21:42 -0800
 $ date +'%Y-%m-%d %H:%M:%S %z'  # %z is a GNU extension.
diff --git a/lib/parse-datetime.y b/lib/parse-datetime.y
index 23a9a41..027f797 100644
--- a/lib/parse-datetime.y
+++ b/lib/parse-datetime.y
@@ -285,8 +285,8 @@ set_hhmmss (parser_control *pc, long int hour, long int 
minutes,
 %parse-param { parser_control *pc }
 %lex-param { parser_control *pc }

-/* This grammar has 20 shift/reduce conflicts. */
-%expect 20
+/* This grammar has 31 shift/reduce conflicts. */
+%expect 31

 %union
 {
@@ -307,7 +307,7 @@ set_hhmmss (parser_control *pc, long int hour, long int 
minutes,
 %token <textintval> tSNUMBER tUNUMBER
 %token <timespec> tSDECIMAL_NUMBER tUDECIMAL_NUMBER

-%type <intval> o_colon_minutes o_merid
+%type <intval> o_colon_minutes
 %type <timespec> seconds signed_seconds unsigned_seconds

 %type <rel> relunit relunit_snumber dayshift
@@ -333,7 +333,9 @@ items:
   ;

 item:
-    time
+    datetime
+      { pc->times_seen++; pc->dates_seen++; }
+  | time
       { pc->times_seen++; }
   | local_zone
       { pc->local_zones_seen++; }
@@ -348,35 +350,61 @@ item:
   | hybrid
   ;

+datetime:
+    iso_8601_datetime
+  ;
+
+iso_8601_datetime:
+    iso_8601_date 'T' iso_8601_time
+  ;
+
 time:
     tUNUMBER tMERIDIAN
       {
         set_hhmmss (pc, $1.value, 0, 0, 0);
         pc->meridian = $2;
       }
-  | tUNUMBER ':' tUNUMBER o_merid
+  | tUNUMBER ':' tUNUMBER tMERIDIAN
       {
         set_hhmmss (pc, $1.value, $3.value, 0, 0);
         pc->meridian = $4;
       }
-  | tUNUMBER ':' tUNUMBER tSNUMBER o_colon_minutes
+  | tUNUMBER ':' tUNUMBER ':' unsigned_seconds tMERIDIAN
+      {
+        set_hhmmss (pc, $1.value, $3.value, $5.tv_sec, $5.tv_nsec);
+        pc->meridian = $6;
+      }
+  | iso_8601_time
+  ;
+
+iso_8601_time:
+    tUNUMBER zone_offset
+      {
+        set_hhmmss (pc, $1.value, 0, 0, 0);
+       pc->meridian = MER24;
+      }
+  | tUNUMBER ':' tUNUMBER o_zone_offset
       {
         set_hhmmss (pc, $1.value, $3.value, 0, 0);
         pc->meridian = MER24;
-        pc->zones_seen++;
-        pc->time_zone = time_zone_hhmm (pc, $4, $5);
       }
-  | tUNUMBER ':' tUNUMBER ':' unsigned_seconds o_merid
+  | tUNUMBER ':' tUNUMBER ':' unsigned_seconds o_zone_offset
       {
         set_hhmmss (pc, $1.value, $3.value, $5.tv_sec, $5.tv_nsec);
-        pc->meridian = $6;
+        pc->meridian = MER24;
       }
-  | tUNUMBER ':' tUNUMBER ':' unsigned_seconds tSNUMBER o_colon_minutes
+  ;
+
+o_zone_offset:
+  /* empty */
+  | zone_offset
+  ;
+
+zone_offset:
+    tSNUMBER o_colon_minutes
       {
-        set_hhmmss (pc, $1.value, $3.value, $5.tv_sec, $5.tv_nsec);
-        pc->meridian = MER24;
         pc->zones_seen++;
-        pc->time_zone = time_zone_hhmm (pc, $6, $7);
+        pc->time_zone = time_zone_hhmm (pc, $1, $2);
       }
   ;

@@ -393,12 +421,19 @@ local_zone:
       }
   ;

+/* Note 'T' is a special case, as it is used as the separator in ISO
+   8601 date and time of day representation. */
 zone:
     tZONE
       { pc->time_zone = $1; }
+  | 'T'
+      { pc->time_zone = HOUR(7); }
   | tZONE relunit_snumber
       { pc->time_zone = $1;
         apply_relative_time (pc, $2, 1); }
+  | 'T' relunit_snumber
+      { pc->time_zone = HOUR(7);
+        apply_relative_time (pc, $2, 1); }
   | tZONE tSNUMBER o_colon_minutes
       { pc->time_zone = $1 + time_zone_hhmm (pc, $2, $3); }
   | tDAYZONE
@@ -456,13 +491,6 @@ date:
             pc->year = $5;
           }
       }
-  | tUNUMBER tSNUMBER tSNUMBER
-      {
-        /* ISO 8601 format.  YYYY-MM-DD.  */
-        pc->year = $1;
-        pc->month = -$2.value;
-        pc->day = -$3.value;
-      }
   | tUNUMBER tMONTH tSNUMBER
       {
         /* e.g. 17-JUN-1992.  */
@@ -501,6 +529,17 @@ date:
         pc->month = $2;
         pc->year = $3;
       }
+  | iso_8601_date
+  ;
+
+iso_8601_date:
+    tUNUMBER tSNUMBER tSNUMBER
+      {
+        /* ISO 8601 format.  YYYY-MM-DD.  */
+        pc->year = $1;
+        pc->month = -$2.value;
+        pc->day = -$3.value;
+      }
   ;

 rel:
@@ -612,13 +651,6 @@ o_colon_minutes:
       { $$ = $2.value; }
   ;

-o_merid:
-    /* empty */
-      { $$ = MER24; }
-  | tMERIDIAN
-      { $$ = $1; }
-  ;
-
 %%

 static table const meridian_table[] =
@@ -773,7 +805,10 @@ static table const time_zone_table[] =
   { NULL, 0, 0 }
 };

-/* Military time zone table. */
+/* Military time zone table.
+
+   Note 'T' is a special case, as it is used as the separator in ISO
+   8601 date and time of day representation. */
 static table const military_table[] =
 {
   { "A", tZONE, -HOUR ( 1) },
@@ -794,7 +829,7 @@ static table const military_table[] =
   { "Q", tZONE,  HOUR ( 4) },
   { "R", tZONE,  HOUR ( 5) },
   { "S", tZONE,  HOUR ( 6) },
-  { "T", tZONE,  HOUR ( 7) },
+  { "T", 'T',    0 },
   { "U", tZONE,  HOUR ( 8) },
   { "V", tZONE,  HOUR ( 9) },
   { "W", tZONE,  HOUR (10) },
diff --git a/tests/test-parse-datetime.c b/tests/test-parse-datetime.c
index 45dbae6..4cb85d5 100644
--- a/tests/test-parse-datetime.c
+++ b/tests/test-parse-datetime.c
@@ -48,17 +48,171 @@ static const char* const day_table[] =
   NULL
 };

+
+#if ! HAVE_TM_GMTOFF
+/* Shift A right by B bits portably, by dividing A by 2**B and
+   truncating towards minus infinity.  A and B should be free of side
+   effects, and B should be in the range 0 <= B <= INT_BITS - 2, where
+   INT_BITS is the number of useful bits in an int.  GNU code can
+   assume that INT_BITS is at least 32.
+
+   ISO C99 says that A >> B is implementation-defined if A < 0.  Some
+   implementations (e.g., UNICOS 9.0 on a Cray Y-MP EL) don't shift
+   right in the usual way when A < 0, so SHR falls back on division if
+   ordinary A >> B doesn't seem to be the usual signed shift.  */
+#define SHR(a, b)       \
+  (-1 >> 1 == -1        \
+   ? (a) >> (b)         \
+   : (a) / (1 << (b)) - ((a) % (1 << (b)) < 0))
+
+#define TM_YEAR_BASE 1900
+
+/* Yield the difference between *A and *B,
+   measured in seconds, ignoring leap seconds.
+   The body of this function is taken directly from the GNU C Library;
+   see src/strftime.c.  */
+static long int
+tm_diff (struct tm const *a, struct tm const *b)
+{
+  /* Compute intervening leap days correctly even if year is negative.
+     Take care to avoid int overflow in leap day calculations.  */
+  int a4 = SHR (a->tm_year, 2) + SHR (TM_YEAR_BASE, 2) - ! (a->tm_year & 3);
+  int b4 = SHR (b->tm_year, 2) + SHR (TM_YEAR_BASE, 2) - ! (b->tm_year & 3);
+  int a100 = a4 / 25 - (a4 % 25 < 0);
+  int b100 = b4 / 25 - (b4 % 25 < 0);
+  int a400 = SHR (a100, 2);
+  int b400 = SHR (b100, 2);
+  int intervening_leap_days = (a4 - b4) - (a100 - b100) + (a400 - b400);
+  long int ayear = a->tm_year;
+  long int years = ayear - b->tm_year;
+  long int days = (365 * years + intervening_leap_days
+                   + (a->tm_yday - b->tm_yday));
+  return (60 * (60 * (24 * days + (a->tm_hour - b->tm_hour))
+                + (a->tm_min - b->tm_min))
+          + (a->tm_sec - b->tm_sec));
+}
+#endif /* ! HAVE_TM_GMTOFF */
+
+long
+gmt_offset()
+{
+  time_t now;
+  long gmtoff;
+
+  time(&now);
+
+#if !HAVE_TM_GMTOFF
+  struct tm tm_local = *localtime(&now);
+  struct tm tm_gmt   = *gmtime(&now);
+
+  gmtoff = tm_diff(&tm_local, &tm_gmt);
+#else
+  gmtoff = localtime(&now)->tm_gmtoff;
+#endif
+
+  return gmtoff;
+}
+
 int
 main (int argc _GL_UNUSED, char **argv)
 {
   struct timespec result;
   struct timespec result2;
+  struct timespec expected;
   struct timespec now;
   const char *p;
   int i;
+  long gmtoff;

   set_program_name (argv[0]);

+  gmtoff = gmt_offset();
+
+
+  /* ISO 8601 extended date and time of day representation,
+     'T' separator, local time zone */
+  p = "2011-05-01T11:55:18";
+  expected.tv_sec = 1304250918 - gmtoff;
+  expected.tv_nsec = 0;
+  ASSERT (parse_datetime (&result, p, 0));
+  LOG (p, expected, result);
+  ASSERT (expected.tv_sec == result.tv_sec
+          && expected.tv_nsec == result.tv_nsec);
+
+  /* ISO 8601 extended date and time of day representation,
+     ' ' separator, local time zone */
+  p = "2011-05-01 11:55:18";
+  expected.tv_sec = 1304250918 - gmtoff;
+  expected.tv_nsec = 0;
+  ASSERT (parse_datetime (&result, p, 0));
+  LOG (p, expected, result);
+  ASSERT (expected.tv_sec == result.tv_sec
+          && expected.tv_nsec == result.tv_nsec);
+
+
+  /* ISO 8601, extended date and time of day representation,
+     'T' separator, UTC */
+  p = "2011-05-01T11:55:18Z";
+  expected.tv_sec = 1304250918;
+  expected.tv_nsec = 0;
+  ASSERT (parse_datetime (&result, p, 0));
+  LOG (p, expected, result);
+  ASSERT (expected.tv_sec == result.tv_sec
+          && expected.tv_nsec == result.tv_nsec);
+
+  /* ISO 8601, extended date and time of day representation,
+     ' ' separator, UTC */
+  p = "2011-05-01 11:55:18Z";
+  expected.tv_sec = 1304250918;
+  expected.tv_nsec = 0;
+  ASSERT (parse_datetime (&result, p, 0));
+  LOG (p, expected, result);
+  ASSERT (expected.tv_sec == result.tv_sec
+          && expected.tv_nsec == result.tv_nsec);
+
+
+  /* ISO 8601 extended date and time of day representation,
+     'T' separator, w/UTC offset */
+  p = "2011-05-01T11:55:18-07:00";
+  expected.tv_sec = 1304276118;
+  expected.tv_nsec = 0;
+  ASSERT (parse_datetime (&result, p, 0));
+  LOG (p, expected, result);
+  ASSERT (expected.tv_sec == result.tv_sec
+          && expected.tv_nsec == result.tv_nsec);
+
+  /* ISO 8601 extended date and time of day representation,
+     ' ' separator, w/UTC offset */
+  p = "2011-05-01 11:55:18-07:00";
+  expected.tv_sec = 1304276118;
+  expected.tv_nsec = 0;
+  ASSERT (parse_datetime (&result, p, 0));
+  LOG (p, expected, result);
+  ASSERT (expected.tv_sec == result.tv_sec
+          && expected.tv_nsec == result.tv_nsec);
+
+
+  /* ISO 8601 extended date and time of day representation,
+     'T' separator, w/hour only UTC offset */
+  p = "2011-05-01T11:55:18-07";
+  expected.tv_sec = 1304276118;
+  expected.tv_nsec = 0;
+  ASSERT (parse_datetime (&result, p, 0));
+  LOG (p, expected, result);
+  ASSERT (expected.tv_sec == result.tv_sec
+          && expected.tv_nsec == result.tv_nsec);
+
+  /* ISO 8601 extended date and time of day representation,
+     ' ' separator, w/hour only UTC offset */
+  p = "2011-05-01 11:55:18-07";
+  expected.tv_sec = 1304276118;
+  expected.tv_nsec = 0;
+  ASSERT (parse_datetime (&result, p, 0));
+  LOG (p, expected, result);
+  ASSERT (expected.tv_sec == result.tv_sec
+          && expected.tv_nsec == result.tv_nsec);
+
+
   now.tv_sec = 4711;
   now.tv_nsec = 1267;
   p = "now";
--
1.7.7.rc0.362.g5a14



reply via email to

[Prev in Thread] Current Thread [Next in Thread]