bug-gnulib
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH v2] Add support for ISO 8601 basic format


From: Mihai Capotă
Subject: [PATCH v2] Add support for ISO 8601 basic format
Date: Mon, 5 Aug 2013 18:51:13 +0200

The parser now accepts the basic format for combined date and time
representations, which ommits the date and time separators, "-" and ":".

See bug 23767 for GNU coreutils, <https://savannah.gnu.org/bugs/?23767>.

* lib/parse-datetime.y: Parse combined date and time representations in
ISO 8601 basic format.
(set_hhmmss_iso_8601_basic_time) New function.
(digits_iso_8601_basic_to_date) New function.
* tests/test-parse-datetime.c: Add tests for combined date and time
representations in ISO 8601 basic format.
* doc/parse-datetime.texi Document support for combined date and time
representations in ISO 8601 basic format.

Signed-off-by: Mihai Capotă <address@hidden>
---
On Thu, Apr 25, 2013 at 12:02 AM, Paul Eggert <address@hidden> wrote:
> The most important thing is that the patch also needs
> to update doc/parse-datetime.texi.

Done.

> On 03/30/13 12:18, Mihai Capotă wrote:
>> +      /* not ISO 8601 time, forcing mktime error */
>> +      pc->hour = 90;
>
> How does this force a mktime error?  mktime allows tm_hour == 90.

I meant to say mktime_ok. I changed the code to reject input by incrementing 
times_seen, like time_zone_hhmm.

>>  datetime:
>>      iso_8601_datetime
>> +  | iso_8601_basic_datetime
>>    ;
>>
>>  iso_8601_datetime:
>>      iso_8601_date 'T' iso_8601_time
>>    ;
>>
>> +iso_8601_basic_datetime:
>> +    number 'T' iso_8601_basic_time
>> +      { pc->dates_seen--; } /* already incremented in digits_to_date_time */
>
> This doesn't look right.  'number' accepts all sort of things that we
> would rather not accept here.

I was trying to make use of the existing digits_to_date_time function. I 
replaced it with tUNUMBER and a new function.

> Conversely, why require ":" in times to
> correlate with "-" in dates?  Shouldn't we accept a "-"less date along
> with a ":"ful time, and vice versa?

No, that is not allowed by the standard.

> And that "dates_seen--" business
> is a hack; can't we arrange things so that dates_seen is incremented
> just once?

The hack is gone.

>> +iso_8601_basic_time:
>> +    tUNUMBER o_zone_offset
>> +      {
>> +        set_hhmmss_iso_8601_basic_time (pc, $1.value, 0);
>> +        pc->meridian = MER24;
>> +      }
>> +  | tUDECIMAL_NUMBER o_zone_offset
>> +      {
>> +        /* FIXME avoid time_t to long int cast */
>
> Why is the cast needed?  Also, can't the grammar be simplified
> here, by using unsigned_seconds instead of using both
> tUDECIMAL_NUMBER and tUNUMBER?

I switched to using unsigned_seconds.

 doc/parse-datetime.texi     |    9 +++++-
 lib/parse-datetime.y        |   68 +++++++++++++++++++++++++++++++++++++++++--
 tests/test-parse-datetime.c |   61 ++++++++++++++++++++++++++++++++++++++
 3 files changed, 135 insertions(+), 3 deletions(-)

diff --git a/doc/parse-datetime.texi b/doc/parse-datetime.texi
index 6b3e973..9aa87ed 100644
--- a/doc/parse-datetime.texi
+++ b/doc/parse-datetime.texi
@@ -327,7 +327,12 @@ The ISO 8601 date and time of day extended format consists 
of an ISO
 day.  This format is also recognized if the @samp{T} is replaced by a
 space.
 
-In this format, the time of day should use 24-hour notation.
+The ISO 8601 basic format is also recognized. It is identical to the ISO 8601
+extended format, except for omitting the @samp{-} separator in the date and the
address@hidden:} separator in the time. Only the HHMMSS format is supported for 
the
+time of day, the reduced accuracy HHMM and HH formats are not supported.
+
+In these formats, the time of day should use 24-hour notation.
 Fractional seconds are allowed, with either comma or period preceding
 the fraction.  ISO 8601 fractional minutes and hours are not
 supported.  Typically, hosts support nanosecond timestamp resolution;
@@ -339,6 +344,8 @@ Here are some examples:
 2012-09-24T20:02:00.052-0500
 2012-12-31T23:59:59,999999999+1100
 1970-01-01 00:00Z
+20120924T200200.052-0500
+20121231T235959,999999999+1100
 @end example
 
 @node Day of week items
diff --git a/lib/parse-datetime.y b/lib/parse-datetime.y
index 4dce7fa..fa9719d 100644
--- a/lib/parse-datetime.y
+++ b/lib/parse-datetime.y
@@ -257,6 +257,38 @@ digits_to_date_time (parser_control *pc, textint text_int)
     }
 }
 
+/* Extract into *PC the date info from a string of digits in ISO 8601 basic
+   format, i.e., YYYYMMHH, YYYY, or YY meaning century. Note that YYYYMM is not
+   allowed to avoid confusion with YYMMHH  */
+static void
+digits_iso_8601_basic_to_date (parser_control *pc, textint text_int)
+{
+  switch (text_int.digits)
+    {
+    case 8:
+      pc->day = text_int.value % 100;
+      pc->month = (text_int.value / 100) % 100;
+      pc->year.value = text_int.value / 10000;
+      pc->year.digits = 4;
+      return;
+    case 4:
+      pc->day = 1;
+      pc->month = 1;
+      pc->year.value = text_int.value;
+      pc->year.digits = 4;
+      return;
+    case 2:
+      pc->day = 1;
+      pc->month = 1;
+      pc->year.value = text_int.value * 100;
+      pc->year.digits = 4;
+      return;
+    default:
+      pc->dates_seen++;
+      return;
+    }
+}
+
 /* Increment PC->rel by FACTOR * REL (FACTOR is 1 or -1).  */
 static void
 apply_relative_time (parser_control *pc, relative_time rel, int factor)
@@ -282,6 +314,28 @@ set_hhmmss (parser_control *pc, long int hour, long int 
minutes,
   pc->seconds.tv_nsec = nsec;
 }
 
+/* Set PC-> hour, minutes, seconds and nanoseconds members from ISO 8601 basic
+   time.  */
+static void
+set_hhmmss_iso_8601_basic_time (parser_control *pc, time_t integer_part,
+                                long int fractional_part)
+{
+  if (integer_part / 1000000 > 0)
+    {
+      /* Not ISO 8601 time, arrange to reject it by incrementing
+         pc->times_seen.*/
+      pc->times_seen++;
+    }
+  else
+    {
+      /* FIXME support reduced accuracy times, i.e. HHMM and HH */
+      pc->hour = integer_part / 10000;
+      pc->minutes = (integer_part % 10000) / 100;
+      pc->seconds.tv_sec = integer_part % 100;
+      pc->seconds.tv_nsec = fractional_part;
+    }
+}
+
 %}
 
 /* We want a reentrant parser, even if the TZ manipulation and the calls to
@@ -290,8 +344,8 @@ set_hhmmss (parser_control *pc, long int hour, long int 
minutes,
 %parse-param { parser_control *pc }
 %lex-param { parser_control *pc }
 
-/* This grammar has 31 shift/reduce conflicts. */
-%expect 31
+/* This grammar has 33 shift/reduce conflicts. */
+%expect 33
 
 %union
 {
@@ -358,12 +412,22 @@ item:
 
 datetime:
     iso_8601_datetime
+  | iso_8601_basic_datetime
   ;
 
 iso_8601_datetime:
     iso_8601_date 'T' iso_8601_time
   ;
 
+iso_8601_basic_datetime:
+    tUNUMBER 'T' unsigned_seconds o_zone_offset
+      {
+        digits_iso_8601_basic_to_date (pc, $1);
+        set_hhmmss_iso_8601_basic_time (pc, $3.tv_sec, $3.tv_nsec);
+        pc->meridian = MER24;
+      }
+  ;
+
 time:
     tUNUMBER tMERIDIAN
       {
diff --git a/tests/test-parse-datetime.c b/tests/test-parse-datetime.c
index 7eba9ad..c620009 100644
--- a/tests/test-parse-datetime.c
+++ b/tests/test-parse-datetime.c
@@ -216,6 +216,67 @@ main (int argc _GL_UNUSED, char **argv)
           && expected.tv_nsec == result.tv_nsec);
 
 
+  /* ISO 8601 basic date and time of day representation,
+     'T' separator, local time zone */
+  p = "20110501T115518";
+  expected.tv_sec = ref_time - gmtoff;
+  expected.tv_nsec = 0;
+  ASSERT (parse_datetime (&result, p, 0));
+  LOG (p, expected, result);
+  ASSERT (expected.tv_sec == result.tv_sec
+          && expected.tv_nsec == result.tv_nsec);
+
+
+  /* ISO 8601 basic date and time of day representation,
+     'T' separator, UTC */
+  p = "20110501T115518Z";
+  expected.tv_sec = ref_time;
+  expected.tv_nsec = 0;
+  ASSERT (parse_datetime (&result, p, 0));
+  LOG (p, expected, result);
+  ASSERT (expected.tv_sec == result.tv_sec
+          && expected.tv_nsec == result.tv_nsec);
+
+
+  /* ISO 8601 basic date and time of day representation,
+     'T' separator, w/UTC offset */
+  p = "20110501T115518-0700";
+  expected.tv_sec = 1304276118;
+  expected.tv_nsec = 0;
+  ASSERT (parse_datetime (&result, p, 0));
+  LOG (p, expected, result);
+  ASSERT (expected.tv_sec == result.tv_sec
+          && expected.tv_nsec == result.tv_nsec);
+
+
+  /* ISO 8601 basic date and time of day representation,
+     'T' separator, w/hour only UTC offset */
+  p = "20110501T115518-07";
+  expected.tv_sec = 1304276118;
+  expected.tv_nsec = 0;
+  ASSERT (parse_datetime (&result, p, 0));
+  LOG (p, expected, result);
+  ASSERT (expected.tv_sec == result.tv_sec
+          && expected.tv_nsec == result.tv_nsec);
+
+
+  /* ISO 8601 basic date and time of day representation,
+     'T' separator, w/hour only UTC offset, with ns */
+  p = "20110501T115518,123456789-07";
+  expected.tv_sec = 1304276118;
+  expected.tv_nsec = 123456789;
+  ASSERT (parse_datetime (&result, p, 0));
+  LOG (p, expected, result);
+  ASSERT (expected.tv_sec == result.tv_sec
+          && expected.tv_nsec == result.tv_nsec);
+
+
+  /* Invalid ISO 8601 basic date and time of day representation,
+     too many digits for time */
+  p = "20110501T11551800";
+  ASSERT (!parse_datetime (&result, p, 0));
+
+
   now.tv_sec = 4711;
   now.tv_nsec = 1267;
   p = "now";
-- 
1.7.9.5




reply via email to

[Prev in Thread] Current Thread [Next in Thread]