From 155cc945db54ab541594f3a59cfe808bc9aea3fd Mon Sep 17 00:00:00 2001
From: Paul Eggert <eggert@cs.ucla.edu>
Date: Tue, 22 Feb 2022 18:27:09 -0800
Subject: [PATCH] dd: counts ending in "B" now count bytes

This implements my suggestion in Bug#54112.
* src/dd.c (usage): Document the change.
(parse_integer, scanargs): Implement the change.
Omit some now-obsolete checks for invalid flags.
* tests/dd/bytes.sh: Test the new behavior, while retaining
checks for the now-obsolete usage.
* tests/dd/nocache_eof.sh: Avoid now-obsolete usage.
---
 NEWS                    |   6 +++
 doc/coreutils.texi      |  53 ++++++-------------
 src/dd.c                | 114 ++++++++++++++++++++--------------------
 tests/dd/bytes.sh       |  67 ++++++++++++-----------
 tests/dd/nocache_eof.sh |   2 +-
 5 files changed, 116 insertions(+), 126 deletions(-)

diff --git a/NEWS b/NEWS
index de03f0d47..b6713bfc5 100644
--- a/NEWS
+++ b/NEWS
@@ -60,6 +60,12 @@ GNU coreutils NEWS                                    -*- outline -*-
   dd now supports the aliases iseek=N for skip=N, and oseek=N for seek=N,
   like FreeBSD and other operating systems.
 
+  dd now counts bytes instead of blocks if a block count ends in "B".
+  For example, 'dd count=100KiB' now copies 100 KiB of data, not
+  102,400 blocks of data.  The flags count_bytes, skip_bytes and
+  seek_bytes are therefore obsolescent and are no longer documented,
+  though they still work.
+
   timeout --foreground --kill-after=... will now exit with status 137
   if the kill signal was sent, which is consistent with the behavior
   when the --foreground option is not specified.  This allows users to
diff --git a/doc/coreutils.texi b/doc/coreutils.texi
index 5419c61ef..641680e11 100644
--- a/doc/coreutils.texi
+++ b/doc/coreutils.texi
@@ -9268,9 +9268,9 @@ use @var{bytes} as the fixed record length.
 @opindex skip
 @opindex iseek
 Skip @var{n} @samp{ibs}-byte blocks in the input file before copying.
-With @samp{iflag=skip_bytes}, interpret @var{n}
+If @var{n} ends in the letter @samp{B}, interpret @var{n}
 as a byte count rather than a block count.
-(The @samp{iseek=} spelling is an extension to POSIX.)
+(@samp{B} and the @samp{iseek=} spelling are GNU extensions to POSIX.)
 
 @item seek=@var{n}
 @itemx oseek=@var{n}
@@ -9278,16 +9278,17 @@ as a byte count rather than a block count.
 @opindex oseek
 Skip @var{n} @samp{obs}-byte blocks in the output file before
 truncating or copying.
-With @samp{oflag=seek_bytes}, interpret @var{n}
+If @var{n} ends in the letter @samp{B}, interpret @var{n}
 as a byte count rather than a block count.
-(The @samp{oseek=} spelling is an extension to POSIX.)
+(@samp{B} and the @samp{oseek=} spelling are GNU extensions to POSIX.)
 
 @item count=@var{n}
 @opindex count
 Copy @var{n} @samp{ibs}-byte blocks from the input file, instead
 of everything until the end of the file.
-With @samp{iflag=count_bytes}, interpret @var{n}
-as a byte count rather than a block count.
+If @var{n} ends in the letter @samp{B},
+interpret @var{n} as a byte count rather than a block count;
+this is a GNU extension to POSIX.
 If short reads occur, as could be the case
 when reading from a pipe for example, @samp{iflag=fullblock}
 ensures that @samp{count=} counts complete input blocks
@@ -9627,27 +9628,6 @@ as they may return short reads. In that case,
 this flag is needed to ensure that a @samp{count=} argument is
 interpreted as a block count rather than a count of read operations.
 
-@item count_bytes
-@opindex count_bytes
-Interpret the @samp{count=} operand as a byte count,
-rather than a block count, which allows specifying
-a length that is not a multiple of the I/O block size.
-This flag can be used only with @code{iflag}.
-
-@item skip_bytes
-@opindex skip_bytes
-Interpret the @samp{skip=} or @samp{iseek=} operand as a byte count,
-rather than a block count, which allows specifying
-an offset that is not a multiple of the I/O block size.
-This flag can be used only with @code{iflag}.
-
-@item seek_bytes
-@opindex seek_bytes
-Interpret the @samp{seek=} or @samp{oseek=} operand as a byte count,
-rather than a block count, which allows specifying
-an offset that is not a multiple of the I/O block size.
-This flag can be used only with @code{oflag}.
-
 @end table
 
 These flags are all GNU extensions to POSIX.
@@ -9680,23 +9660,22 @@ should not be too large---values larger than a few megabytes
 are generally wasteful or (as in the gigabyte..exabyte case) downright
 counterproductive or error-inducing.
 
-To process data that is at an offset or size that is not a
-multiple of the I/O@ block size, you can use the @samp{skip_bytes},
-@samp{seek_bytes} and @samp{count_bytes} flags.  Alternatively
-the traditional method of separate @command{dd} invocations can be used.
+To process data with offset or size that is not a multiple of the I/O
+block size, you can use a numeric string @var{n} that ends in the
+letter @samp{B}.
 For example, the following shell commands copy data
-in 512 KiB blocks between a flash drive and a tape, but do not save
-or restore a 1 MiB area at the start of the flash drive:
+in 1 MiB blocks between a flash drive and a tape, but do not save
+or restore a 512-byte area at the start of the flash drive:
 
 @example
 flash=/dev/sda
 tape=/dev/st0
 
-# Copy all but the initial 1 MiB from flash to tape.
-(dd bs=1M skip=1 count=0 && dd bs=512k) <$flash >$tape
+# Copy all but the initial 512 bytes from flash to tape.
+dd if=$flash iseek=512B bs=1MiB of=$tape
 
-# Copy from tape back to flash, leaving initial 1 MiB alone.
-(dd bs=1M seek=1 count=0 && dd bs=512k) <$tape >$flash
+# Copy from tape back to flash, leaving initial 512 bytes alone.
+dd if=$tape bs=1MiB of=$flash oseek=512B
 @end example
 
 @cindex ddrescue
diff --git a/src/dd.c b/src/dd.c
index 1c30e414d..cfafb25a8 100644
--- a/src/dd.c
+++ b/src/dd.c
@@ -575,6 +575,7 @@ N and BYTES may be followed by the following multiplicative suffixes:\n\
 c=1, w=2, b=512, kB=1000, K=1024, MB=1000*1000, M=1024*1024, xM=M,\n\
 GB=1000*1000*1000, G=1024*1024*1024, and so on for T, P, E, Z, Y.\n\
 Binary prefixes can be used, too: KiB=K, MiB=M, and so on.\n\
+If N ends in 'B', it counts bytes not blocks.\n\
 \n\
 Each CONV symbol may be:\n\
 \n\
@@ -638,15 +639,6 @@ Each FLAG symbol may be:\n\
         fputs (_("  binary    use binary I/O for data\n"), stdout);
       if (O_TEXT)
         fputs (_("  text      use text I/O for data\n"), stdout);
-      if (O_COUNT_BYTES)
-        fputs (_("  count_bytes  treat 'count=N' as a byte count (iflag only)\n\
-"), stdout);
-      if (O_SKIP_BYTES)
-        fputs (_("  skip_bytes  treat 'skip=N' as a byte count (iflag only)\n\
-"), stdout);
-      if (O_SEEK_BYTES)
-        fputs (_("  seek_bytes  treat 'seek=N' as a byte count (oflag only)\n\
-"), stdout);
 
       {
         printf (_("\
@@ -1419,9 +1411,8 @@ parse_symbols (char const *str, struct symbol_value const *table,
 
 /* Return the value of STR, interpreted as a non-negative decimal integer,
    optionally multiplied by various values.
-   If STR does not represent a number in this format,
-   set *INVALID to a nonzero error value and return
-   INTMAX_MAX if it is an overflow, an indeterminate value otherwise.  */
+   Set *INVALID to an appropriate error value and return INTMAX_MAX if
+   it is an overflow, an indeterminate value if some other error occurred.  */
 
 static intmax_t
 parse_integer (char const *str, strtol_error *invalid)
@@ -1430,53 +1421,57 @@ parse_integer (char const *str, strtol_error *invalid)
      allow strings like " -0".  Initialize N to an interminate value;
      calling code should not rely on this function returning 0
      when *INVALID represents a non-overflow error.  */
-  uintmax_t n = 0;
+  int indeterminate = 0;
+  uintmax_t n = indeterminate;
   char *suffix;
-  strtol_error e = xstrtoumax (str, &suffix, 10, &n, "bcEGkKMPTwYZ0");
+  static char const suffixes[] = "bcEGkKMPTwYZ0";
+  strtol_error e = xstrtoumax (str, &suffix, 10, &n, suffixes);
+  intmax_t result;
+
+  if ((e & ~LONGINT_OVERFLOW) == LONGINT_INVALID_SUFFIX_CHAR
+      && suffix[-1] != 'B' && *suffix == 'B')
+    {
+      suffix++;
+      if (!*suffix)
+        e &= ~LONGINT_INVALID_SUFFIX_CHAR;
+    }
 
   if ((e & ~LONGINT_OVERFLOW) == LONGINT_INVALID_SUFFIX_CHAR
-      && *suffix == 'x')
+      && *suffix == 'x' && ! (suffix[-1] == 'B' && strchr (suffix + 1, 'B')))
     {
-      strtol_error invalid2 = LONGINT_OK;
-      intmax_t result = parse_integer (suffix + 1, &invalid2);
-      if ((invalid2 & ~LONGINT_OVERFLOW) != LONGINT_OK)
+      uintmax_t o;
+      strtol_error f = xstrtoumax (suffix + 1, &suffix, 10, &o, suffixes);
+      if ((f & ~LONGINT_OVERFLOW) != LONGINT_OK)
         {
-          *invalid = invalid2;
-          return result;
+          e = f;
+          result = indeterminate;
         }
-
-      if (INT_MULTIPLY_WRAPV (n, result, &result))
+      else if (INT_MULTIPLY_WRAPV (n, o, &result)
+               || (result != 0 && ((e | f) & LONGINT_OVERFLOW)))
         {
-          *invalid = LONGINT_OVERFLOW;
-          return INTMAX_MAX;
+          e = LONGINT_OVERFLOW;
+          result = INTMAX_MAX;
         }
-
-      if (result == 0)
+      else
         {
-          if (STRPREFIX (str, "0x"))
+          if (result == 0 && STRPREFIX (str, "0x"))
             error (0, 0,
                    _("warning: %s is a zero multiplier; "
                      "use %s if that is intended"),
                    quote_n (0, "0x"), quote_n (1, "00x"));
+          e = LONGINT_OK;
         }
-      else if ((e | invalid2) & LONGINT_OVERFLOW)
-        {
-          *invalid = LONGINT_OVERFLOW;
-          return INTMAX_MAX;
-        }
-
-      return result;
     }
-
-  if (INTMAX_MAX < n)
+  else if (n <= INTMAX_MAX)
+    result = n;
+  else
     {
-      *invalid = e | LONGINT_OVERFLOW;
-      return INTMAX_MAX;
+      e = LONGINT_OVERFLOW;
+      result = INTMAX_MAX;
     }
 
-  if (e != LONGINT_OK)
-    *invalid = e;
-  return n;
+  *invalid = e;
+  return result;
 }
 
 /* OPERAND is of the form "X=...".  Return true if X is NAME.  */
@@ -1495,6 +1490,7 @@ scanargs (int argc, char *const *argv)
   intmax_t count = INTMAX_MAX;
   intmax_t skip = 0;
   intmax_t seek = 0;
+  bool count_B = false, skip_B = false, seek_B = false;
 
   for (int i = optind; i < argc; i++)
     {
@@ -1529,6 +1525,7 @@ scanargs (int argc, char *const *argv)
         {
           strtol_error invalid = LONGINT_OK;
           intmax_t n = parse_integer (val, &invalid);
+          bool has_B = !!strchr (val, 'B');
           intmax_t n_min = 0;
           intmax_t n_max = INTMAX_MAX;
           idx_t *converted_idx = NULL;
@@ -1565,11 +1562,20 @@ scanargs (int argc, char *const *argv)
               converted_idx = &conversion_blocksize;
             }
           else if (operand_is (name, "skip") || operand_is (name, "iseek"))
-            skip = n;
+            {
+              skip = n;
+              skip_B = has_B;
+            }
           else if (operand_is (name + (*name == 'o'), "seek"))
-            seek = n;
+            {
+              seek = n;
+              seek_B = has_B;
+            }
           else if (operand_is (name, "count"))
-            count = n;
+            {
+              count = n;
+              count_B = has_B;
+            }
           else
             {
               error (0, 0, _("unrecognized operand %s"),
@@ -1615,20 +1621,8 @@ scanargs (int argc, char *const *argv)
       usage (EXIT_FAILURE);
     }
 
-  if (input_flags & O_SEEK_BYTES)
-    {
-      error (0, 0, "%s: %s", _("invalid input flag"), quote ("seek_bytes"));
-      usage (EXIT_FAILURE);
-    }
-
-  if (output_flags & (O_COUNT_BYTES | O_SKIP_BYTES))
-    {
-      error (0, 0, "%s: %s", _("invalid output flag"),
-             quote (output_flags & O_COUNT_BYTES
-                    ? "count_bytes" : "skip_bytes"));
-      usage (EXIT_FAILURE);
-    }
-
+  if (skip_B)
+    input_flags |= O_SKIP_BYTES;
   if (input_flags & O_SKIP_BYTES && skip != 0)
     {
       skip_records = skip / input_blocksize;
@@ -1637,6 +1631,8 @@ scanargs (int argc, char *const *argv)
   else if (skip != 0)
     skip_records = skip;
 
+  if (count_B)
+    input_flags |= O_COUNT_BYTES;
   if (input_flags & O_COUNT_BYTES && count != INTMAX_MAX)
     {
       max_records = count / input_blocksize;
@@ -1645,6 +1641,8 @@ scanargs (int argc, char *const *argv)
   else if (count != INTMAX_MAX)
     max_records = count;
 
+  if (seek_B)
+    output_flags |= O_SEEK_BYTES;
   if (output_flags & O_SEEK_BYTES && seek != 0)
     {
       seek_records = seek / output_blocksize;
diff --git a/tests/dd/bytes.sh b/tests/dd/bytes.sh
index 6bc6fb7ef..539f04172 100755
--- a/tests/dd/bytes.sh
+++ b/tests/dd/bytes.sh
@@ -18,39 +18,46 @@
 . "${srcdir=.}/tests/init.sh"; path_prepend_ ./src
 print_ver_ dd
 
-# count_bytes
 echo 0123456789abcdefghijklm > in || framework_failure_
-dd count=14 conv=swab iflag=count_bytes < in > out 2> /dev/null || fail=1
-case $(cat out) in
- 1032547698badc) ;;
- *) fail=1 ;;
-esac
 
-# skip_bytes
-echo 0123456789abcdefghijklm > in || framework_failure_
-dd skip=10 iflag=skip_bytes < in > out 2> /dev/null || fail=1
-case $(cat out) in
- abcdefghijklm) ;;
- *) fail=1 ;;
-esac
-
-# skip records and bytes from pipe
-echo 0123456789abcdefghijklm |
- dd skip=10 bs=2 iflag=skip_bytes > out 2> /dev/null || fail=1
-case $(cat out) in
- abcdefghijklm) ;;
- *) fail=1 ;;
-esac
-
-# seek bytes
-echo abcdefghijklm |
- dd bs=5 seek=8 oflag=seek_bytes > out 2> /dev/null || fail=1
-printf '\0\0\0\0\0\0\0\0abcdefghijklm\n' > expected
-compare expected out || fail=1
+# count bytes
+for operands in "count=14B" "count=14 iflag=count_bytes"; do
+  dd $operands conv=swab < in > out 2> /dev/null || fail=1
+  case $(cat out) in
+   1032547698badc) ;;
+   *) fail=1 ;;
+  esac
+done
+
+for operands in "iseek=10B" "skip=10 iflag=skip_bytes"; do
+  # skip bytes
+  dd $operands < in > out 2> /dev/null || fail=1
+  case $(cat out) in
+   abcdefghijklm) ;;
+   *) fail=1 ;;
+  esac
+
+  # skip records and bytes from pipe
+  echo 0123456789abcdefghijklm |
+    dd $operands bs=2 > out 2> /dev/null || fail=1
+  case $(cat out) in
+   abcdefghijklm) ;;
+   *) fail=1 ;;
+  esac
+done
 
-# Just truncation, no I/O
-dd bs=5 seek=8 oflag=seek_bytes of=out2 count=0 2> /dev/null || fail=1
 truncate -s8 expected2
-compare expected2 out2 || fail=1
+printf '\0\0\0\0\0\0\0\0abcdefghijklm\n' > expected
+
+for operands in "oseek=8B" "seek=8 oflag=seek_bytes"; do
+  # seek bytes
+  echo abcdefghijklm |
+    dd $operands bs=5 > out 2> /dev/null || fail=1
+  compare expected out || fail=1
+
+  # Just truncation, no I/O
+  dd $operands bs=5 of=out2 count=0 2> /dev/null || fail=1
+  compare expected2 out2 || fail=1
+done
 
 Exit $fail
diff --git a/tests/dd/nocache_eof.sh b/tests/dd/nocache_eof.sh
index 4215d6ce9..7de765c09 100755
--- a/tests/dd/nocache_eof.sh
+++ b/tests/dd/nocache_eof.sh
@@ -78,7 +78,7 @@ advised_to_eof || fail=1
 # Ensure sub page size offsets are handled.
 # I.e., only page aligned offsets are sent to fadvise.
 if ! strace -o dd.strace -e fadvise64,fadvise64_64 dd status=none \
- if=in.f of=out.f bs=1M oflag=direct seek=512 oflag=seek_bytes; then
+ if=in.f of=out.f bs=1M oflag=direct oseek=512B; then
   warn_ '512 byte aligned O_DIRECT is not supported on this (file) system'
   # The current file system may not support O_DIRECT,
   # or older XFS had a page size alignment requirement
-- 
2.35.1