pspp-dev
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH] Make text data file use system native line ends by default.


From: Ben Pfaff
Subject: [PATCH] Make text data file use system native line ends by default.
Date: Sun, 17 Mar 2013 23:44:30 -0700

Requested by Ronald Crichton.
---
This seems like a reasonable thing to do, but I'm not certain.
Please take a look and give me some feedback if you have any
before I apply it.

 NEWS                               |    6 +++++-
 doc/data-io.texi                   |   12 +++++++++---
 src/data/file-handle-def.c         |   21 +++++++++++++++++++--
 src/data/file-handle-def.h         |   15 ++++++++++++++-
 src/language/data-io/data-writer.c |    8 +++++++-
 src/language/data-io/file-handle.q |    7 ++++++-
 tests/language/data-io/print.at    |   33 +++++++++++++++++++++++++++++++++
 7 files changed, 93 insertions(+), 9 deletions(-)

diff --git a/NEWS b/NEWS
index e0910fe..635ef49 100644
--- a/NEWS
+++ b/NEWS
@@ -1,5 +1,5 @@
 PSPP NEWS -- history of user-visible changes.
-Copyright (C) 1996-2000, 2008-2012 Free Software Foundation, Inc.
+Copyright (C) 1996-2000, 2008-2013 Free Software Foundation, Inc.
 See the end for copying conditions.
 
 Please send PSPP bug reports to address@hidden
@@ -134,6 +134,10 @@ Changes from 0.6.2 to 0.7.9:
    See the documentation for the INSERT command in the PSPP manual for
    more information.
 
+ * Text data files that PRINT and WRITE creates now use the system
+   native line ends by default (CRLF on Windows, LF only elsewhere).
+   Use the new ENDS subcommand on FILE HANDLE to override the default.
+
  * A new Perl module allows Perl programs to read and write PSPP
    system files.
 
diff --git a/doc/data-io.texi b/doc/data-io.texi
index 88577a4..313e543 100644
--- a/doc/data-io.texi
+++ b/doc/data-io.texi
@@ -573,6 +573,7 @@ For text files:
         FILE HANDLE @var{handle_name}
                 /NAME='@var{file_name}
                 [/MODE=CHARACTER]
+                [/address@hidden,address@hidden
                 /address@hidden
                 [ENCODING='@var{encoding}']
 
@@ -619,9 +620,8 @@ The effect and syntax of @cmd{FILE HANDLE} depends on the 
selected MODE:
 
 @itemize
 @item
-In CHARACTER mode, the default, the data file is read as a text file,
-according to the local system's conventions, and each text line is
-read as one record.
+In CHARACTER mode, the default, the data file is read as a text file.
+Each text line is read as one record.
 
 In CHARACTER mode only, tabs are expanded to spaces by input programs,
 except by @cmd{DATA LIST FREE} with explicitly specified delimiters.
@@ -629,6 +629,12 @@ Each tab is 4 characters wide by default, but TABWIDTH (a 
@pspp{}
 extension) may be used to specify an alternate width.  Use a TABWIDTH
 of 0 to suppress tab expansion.
 
+A file written in CHARACTER mode by default uses the line ends of the
+system on which PSPP is running, that is, on Windows, the default is
+CR LF line ends, and on other systems the default is LF only.  Specify
+ENDS as CR or CRLF to override the default.  PSPP reads files using
+either convention on any kind of system, regardless of ENDS.
+
 @item
 In IMAGE mode, the data file is treated as a series of fixed-length
 binary records.  LRECL should be used to specify the record length in
diff --git a/src/data/file-handle-def.c b/src/data/file-handle-def.c
index 121a490..9c853e5 100644
--- a/src/data/file-handle-def.c
+++ b/src/data/file-handle-def.c
@@ -1,5 +1,5 @@
 /* PSPP - a program for statistical analysis.
-   Copyright (C) 1997-9, 2000, 2006, 2009, 2010, 2011, 2012 Free Software 
Foundation, Inc.
+   Copyright (C) 1997-9, 2000, 2006, 2009, 2010, 2011, 2012, 2013 Free 
Software Foundation, Inc.
 
    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
@@ -51,6 +51,7 @@ struct file_handle
     /* FH_REF_FILE only. */
     char *file_name;           /* File name as provided by user. */
     enum fh_mode mode;         /* File mode. */
+    enum fh_line_ends line_ends; /* Line ends for text files. */
 
     /* FH_REF_FILE and FH_REF_INLINE only. */
     size_t record_width;        /* Length of fixed-format records. */
@@ -236,6 +237,7 @@ fh_create_file (const char *id, const char *file_name,
   handle = create_handle (id, handle_name, FH_REF_FILE, properties->encoding);
   handle->file_name = xstrdup (file_name);
   handle->mode = properties->mode;
+  handle->line_ends = properties->line_ends;
   handle->record_width = properties->record_width;
   handle->tab_width = properties->tab_width;
   return handle;
@@ -263,8 +265,14 @@ fh_create_dataset (struct dataset *ds)
 const struct fh_properties *
 fh_default_properties (void)
 {
+#if defined _WIN32 || defined __WIN32__
+#define DEFAULT_LINE_ENDS FH_END_CRLF
+#else
+#define DEFAULT_LINE_ENDS FH_END_LF
+#endif
+
   static const struct fh_properties default_properties
-    = {FH_MODE_TEXT, 1024, 4, (char *) "Auto"};
+    = {FH_MODE_TEXT, DEFAULT_LINE_ENDS, 1024, 4, (char *) "Auto"};
   return &default_properties;
 }
 
@@ -314,6 +322,15 @@ fh_get_mode (const struct file_handle *handle)
   return handle->mode;
 }
 
+/* Returns the line ends of HANDLE, which must be a handle associated with a
+   file. */
+enum fh_line_ends
+fh_get_line_ends (const struct file_handle *handle)
+{
+  assert (handle->referent == FH_REF_FILE);
+  return handle->line_ends;
+}
+
 /* Returns the width of a logical record on HANDLE. */
 size_t
 fh_get_record_width (const struct file_handle *handle)
diff --git a/src/data/file-handle-def.h b/src/data/file-handle-def.h
index 53e1bbf..bd1fed7 100644
--- a/src/data/file-handle-def.h
+++ b/src/data/file-handle-def.h
@@ -1,5 +1,5 @@
 /* PSPP - a program for statistical analysis.
-   Copyright (C) 1997-9, 2000, 2005, 2006, 2010, 2011 Free Software 
Foundation, Inc.
+   Copyright (C) 1997-9, 2000, 2005, 2006, 2010, 2011, 2013 Free Software 
Foundation, Inc.
 
    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
@@ -49,10 +49,22 @@ enum fh_access
     FH_ACC_WRITE                /* Write to it. */
   };
 
+/* How a line ends.
+
+   This affects only writing FH_MODE_TEXT files.  Writing in other modes does
+   not use line ends, and reading in FH_MODE_TEXT mode accepts all forms of
+   line ends. */
+enum fh_line_ends
+  {
+    FH_END_LF,                  /* Unix line ends (\n). */
+    FH_END_CRLF                 /* MS-DOS line ends (\r\n). */
+  };
+
 /* Properties of a file handle. */
 struct fh_properties
   {
     enum fh_mode mode;          /* File mode. */
+    enum fh_line_ends line_ends; /* Line ends for text files. */
     size_t record_width;        /* Length of fixed-format records. */
     size_t tab_width;           /* Tab width, 0=do not expand tabs. */
     const char *encoding;       /* Charset for contents. */
@@ -87,6 +99,7 @@ const char *fh_get_encoding (const struct file_handle *);
 /* Properties of FH_REF_FILE file handles. */
 const char *fh_get_file_name (const struct file_handle *);
 enum fh_mode fh_get_mode (const struct file_handle *) ;
+enum fh_line_ends fh_get_line_ends (const struct file_handle *);
 
 /* Properties of FH_REF_FILE and FH_REF_INLINE file handles. */
 size_t fh_get_record_width (const struct file_handle *);
diff --git a/src/language/data-io/data-writer.c 
b/src/language/data-io/data-writer.c
index 5270db0..5f87d00 100644
--- a/src/language/data-io/data-writer.c
+++ b/src/language/data-io/data-writer.c
@@ -1,5 +1,5 @@
 /* PSPP - a program for statistical analysis.
-   Copyright (C) 1997-2004, 2006, 2010, 2011, 2012 Free Software Foundation, 
Inc.
+   Copyright (C) 1997-2004, 2006, 2010, 2011, 2012, 2013 Free Software 
Foundation, Inc.
 
    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
@@ -49,8 +49,10 @@ struct dfm_writer
     FILE *file;                 /* Associated file. */
     struct replace_file *rf;    /* Atomic file replacement support. */
     char *encoding;             /* Encoding. */
+    enum fh_line_ends line_ends; /* Line ends for text files. */
 
     int unit;                   /* Unit width, in bytes. */
+    char cr[MAX_UNIT];          /* \r in encoding, 'unit' bytes long. */
     char lf[MAX_UNIT];          /* \n in encoding, 'unit' bytes long. */
     char spaces[32];            /* 32 bytes worth of ' ' in encoding. */
   };
@@ -93,7 +95,9 @@ dfm_open_writer (struct file_handle *fh, const char *encoding)
   w->rf = replace_file_start (fh_get_file_name (w->fh), "wb", 0666,
                               &w->file, NULL);
   w->encoding = xstrdup (encoding);
+  w->line_ends = fh_get_line_ends (fh);
   w->unit = ei.unit;
+  memcpy (w->cr, ei.cr, sizeof w->cr);
   memcpy (w->lf, ei.lf, sizeof w->lf);
   for (ofs = 0; ofs + ei.unit <= sizeof w->spaces; ofs += ei.unit)
     memcpy (&w->spaces[ofs], ei.space, ei.unit);
@@ -134,6 +138,8 @@ dfm_put_record (struct dfm_writer *w, const char *rec, 
size_t len)
     {
     case FH_MODE_TEXT:
       fwrite (rec, len, 1, w->file);
+      if (w->line_ends == FH_END_CRLF)
+        fwrite (w->cr, w->unit, 1, w->file);
       fwrite (w->lf, w->unit, 1, w->file);
       break;
 
diff --git a/src/language/data-io/file-handle.q 
b/src/language/data-io/file-handle.q
index 26dfc97..313adc9 100644
--- a/src/language/data-io/file-handle.q
+++ b/src/language/data-io/file-handle.q
@@ -1,5 +1,5 @@
 /* PSPP - a program for statistical analysis.
-   Copyright (C) 1997-9, 2000, 2006, 2010, 2011, 2012 Free Software 
Foundation, Inc.
+   Copyright (C) 1997-9, 2000, 2006, 2010, 2011, 2012, 2013 Free Software 
Foundation, Inc.
 
    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
@@ -47,6 +47,7 @@
      lrecl=integer;
      tabwidth=integer;
      mode=mode:!character/binary/image/360;
+     ends=ends:lf/crlf;
      recform=recform:fixed/f/variable/v/spanned/vs;
      encoding=string.
 */
@@ -104,6 +105,10 @@ cmd_file_handle (struct lexer *lexer, struct dataset *ds)
           else
             msg (SE, _("%s must not be negative."), "TABWIDTH");
         }
+      if (cmd.ends == FH_LF)
+        properties.line_ends = FH_END_LF;
+      else if (cmd.ends == FH_CRLF)
+        properties.line_ends = FH_END_CRLF;
       break;
     case FH_IMAGE:
       properties.mode = FH_MODE_FIXED;
diff --git a/tests/language/data-io/print.at b/tests/language/data-io/print.at
index 71259e0..04701f6 100644
--- a/tests/language/data-io/print.at
+++ b/tests/language/data-io/print.at
@@ -302,3 +302,36 @@ AT_CHECK([cat foo2.out], [0], [dnl
 ls -l foo.out foo2.out
 AT_CHECK([test -c foo.out])
 AT_CLEANUP
+
+AT_SETUP([PRINT with special line ends])
+AT_DATA([print.sps], [dnl
+FILE HANDLE lf   /NAME='lf.txt'   /ENDS=LF.
+FILE HANDLE crlf /NAME='crlf.txt' /ENDS=CRLF.
+DATA LIST NOTABLE /x 1.
+BEGIN DATA.
+1
+2
+3
+4
+5
+END DATA.
+PRINT OUTFILE=lf/x.
+PRINT OUTFILE=crlf/x.
+EXECUTE.
+])
+AT_CHECK([pspp -O format=csv print.sps])
+AT_CHECK([cat lf.txt], [0], [dnl
+ 1 @&t@
+ 2 @&t@
+ 3 @&t@
+ 4 @&t@
+ 5 @&t@
+])
+AT_CHECK([tr '\r' R < crlf.txt], [0], [dnl
+ 1 R
+ 2 R
+ 3 R
+ 4 R
+ 5 R
+])
+AT_CLEANUP
-- 
1.7.10.4




reply via email to

[Prev in Thread] Current Thread [Next in Thread]