[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [PATCH] Make text data file use system native line ends by default.
From: |
John Darrington |
Subject: |
Re: [PATCH] Make text data file use system native line ends by default. |
Date: |
Mon, 18 Mar 2013 18:39:03 +0100 |
User-agent: |
Mutt/1.5.20 (2009-06-14) |
The patch looks fine. So long as it doesn't break any tests I'm happy
with it. Except that I have heard that Mac OS, to assert its penchant
for non-conformance, uses neither LF, not CRLF, but LFCR. However I
don't use Mac so I don't know if this rumour is correct.
I don't have any opinion about what the default on Windows should be.
J'
On Sun, Mar 17, 2013 at 11:44:30PM -0700, Ben Pfaff wrote:
Requested by Ronald Crichton.
---
This seems like a reasonable thing to do, but I'm not certain.
Please take a look and give me some feedback if you have any
before I apply it.
NEWS | 6 +++++-
doc/data-io.texi | 12 +++++++++---
src/data/file-handle-def.c | 21 +++++++++++++++++++--
src/data/file-handle-def.h | 15 ++++++++++++++-
src/language/data-io/data-writer.c | 8 +++++++-
src/language/data-io/file-handle.q | 7 ++++++-
tests/language/data-io/print.at | 33
+++++++++++++++++++++++++++++++++
7 files changed, 93 insertions(+), 9 deletions(-)
diff --git a/NEWS b/NEWS
index e0910fe..635ef49 100644
--- a/NEWS
+++ b/NEWS
@@ -1,5 +1,5 @@
PSPP NEWS -- history of user-visible changes.
-Copyright (C) 1996-2000, 2008-2012 Free Software Foundation, Inc.
+Copyright (C) 1996-2000, 2008-2013 Free Software Foundation, Inc.
See the end for copying conditions.
Please send PSPP bug reports to address@hidden
@@ -134,6 +134,10 @@ Changes from 0.6.2 to 0.7.9:
See the documentation for the INSERT command in the PSPP manual for
more information.
+ * Text data files that PRINT and WRITE creates now use the system
+ native line ends by default (CRLF on Windows, LF only elsewhere).
+ Use the new ENDS subcommand on FILE HANDLE to override the default.
+
* A new Perl module allows Perl programs to read and write PSPP
system files.
diff --git a/doc/data-io.texi b/doc/data-io.texi
index 88577a4..313e543 100644
--- a/doc/data-io.texi
+++ b/doc/data-io.texi
@@ -573,6 +573,7 @@ For text files:
FILE HANDLE @var{handle_name}
/NAME='@var{file_name}
[/MODE=CHARACTER]
+ [/address@hidden,address@hidden
/address@hidden
[ENCODING='@var{encoding}']
@@ -619,9 +620,8 @@ The effect and syntax of @cmd{FILE HANDLE} depends on
the selected MODE:
@itemize
@item
-In CHARACTER mode, the default, the data file is read as a text file,
-according to the local system's conventions, and each text line is
-read as one record.
+In CHARACTER mode, the default, the data file is read as a text file.
+Each text line is read as one record.
In CHARACTER mode only, tabs are expanded to spaces by input programs,
except by @cmd{DATA LIST FREE} with explicitly specified delimiters.
@@ -629,6 +629,12 @@ Each tab is 4 characters wide by default, but
TABWIDTH (a @pspp{}
extension) may be used to specify an alternate width. Use a TABWIDTH
of 0 to suppress tab expansion.
+A file written in CHARACTER mode by default uses the line ends of the
+system on which PSPP is running, that is, on Windows, the default is
+CR LF line ends, and on other systems the default is LF only. Specify
+ENDS as CR or CRLF to override the default. PSPP reads files using
+either convention on any kind of system, regardless of ENDS.
+
@item
In IMAGE mode, the data file is treated as a series of fixed-length
binary records. LRECL should be used to specify the record length in
diff --git a/src/data/file-handle-def.c b/src/data/file-handle-def.c
index 121a490..9c853e5 100644
--- a/src/data/file-handle-def.c
+++ b/src/data/file-handle-def.c
@@ -1,5 +1,5 @@
/* PSPP - a program for statistical analysis.
- Copyright (C) 1997-9, 2000, 2006, 2009, 2010, 2011, 2012 Free Software
Foundation, Inc.
+ Copyright (C) 1997-9, 2000, 2006, 2009, 2010, 2011, 2012, 2013 Free
Software Foundation, Inc.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -51,6 +51,7 @@ struct file_handle
/* FH_REF_FILE only. */
char *file_name; /* File name as provided by user. */
enum fh_mode mode; /* File mode. */
+ enum fh_line_ends line_ends; /* Line ends for text files. */
/* FH_REF_FILE and FH_REF_INLINE only. */
size_t record_width; /* Length of fixed-format records. */
@@ -236,6 +237,7 @@ fh_create_file (const char *id, const char *file_name,
handle = create_handle (id, handle_name, FH_REF_FILE,
properties->encoding);
handle->file_name = xstrdup (file_name);
handle->mode = properties->mode;
+ handle->line_ends = properties->line_ends;
handle->record_width = properties->record_width;
handle->tab_width = properties->tab_width;
return handle;
@@ -263,8 +265,14 @@ fh_create_dataset (struct dataset *ds)
const struct fh_properties *
fh_default_properties (void)
{
+#if defined _WIN32 || defined __WIN32__
+#define DEFAULT_LINE_ENDS FH_END_CRLF
+#else
+#define DEFAULT_LINE_ENDS FH_END_LF
+#endif
+
static const struct fh_properties default_properties
- = {FH_MODE_TEXT, 1024, 4, (char *) "Auto"};
+ = {FH_MODE_TEXT, DEFAULT_LINE_ENDS, 1024, 4, (char *) "Auto"};
return &default_properties;
}
@@ -314,6 +322,15 @@ fh_get_mode (const struct file_handle *handle)
return handle->mode;
}
+/* Returns the line ends of HANDLE, which must be a handle associated
with a
+ file. */
+enum fh_line_ends
+fh_get_line_ends (const struct file_handle *handle)
+{
+ assert (handle->referent == FH_REF_FILE);
+ return handle->line_ends;
+}
+
/* Returns the width of a logical record on HANDLE. */
size_t
fh_get_record_width (const struct file_handle *handle)
diff --git a/src/data/file-handle-def.h b/src/data/file-handle-def.h
index 53e1bbf..bd1fed7 100644
--- a/src/data/file-handle-def.h
+++ b/src/data/file-handle-def.h
@@ -1,5 +1,5 @@
/* PSPP - a program for statistical analysis.
- Copyright (C) 1997-9, 2000, 2005, 2006, 2010, 2011 Free Software
Foundation, Inc.
+ Copyright (C) 1997-9, 2000, 2005, 2006, 2010, 2011, 2013 Free Software
Foundation, Inc.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -49,10 +49,22 @@ enum fh_access
FH_ACC_WRITE /* Write to it. */
};
+/* How a line ends.
+
+ This affects only writing FH_MODE_TEXT files. Writing in other modes
does
+ not use line ends, and reading in FH_MODE_TEXT mode accepts all forms
of
+ line ends. */
+enum fh_line_ends
+ {
+ FH_END_LF, /* Unix line ends (\n). */
+ FH_END_CRLF /* MS-DOS line ends (\r\n). */
+ };
+
/* Properties of a file handle. */
struct fh_properties
{
enum fh_mode mode; /* File mode. */
+ enum fh_line_ends line_ends; /* Line ends for text files. */
size_t record_width; /* Length of fixed-format records. */
size_t tab_width; /* Tab width, 0=do not expand tabs. */
const char *encoding; /* Charset for contents. */
@@ -87,6 +99,7 @@ const char *fh_get_encoding (const struct file_handle *);
/* Properties of FH_REF_FILE file handles. */
const char *fh_get_file_name (const struct file_handle *);
enum fh_mode fh_get_mode (const struct file_handle *) ;
+enum fh_line_ends fh_get_line_ends (const struct file_handle *);
/* Properties of FH_REF_FILE and FH_REF_INLINE file handles. */
size_t fh_get_record_width (const struct file_handle *);
diff --git a/src/language/data-io/data-writer.c
b/src/language/data-io/data-writer.c
index 5270db0..5f87d00 100644
--- a/src/language/data-io/data-writer.c
+++ b/src/language/data-io/data-writer.c
@@ -1,5 +1,5 @@
/* PSPP - a program for statistical analysis.
- Copyright (C) 1997-2004, 2006, 2010, 2011, 2012 Free Software
Foundation, Inc.
+ Copyright (C) 1997-2004, 2006, 2010, 2011, 2012, 2013 Free Software
Foundation, Inc.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -49,8 +49,10 @@ struct dfm_writer
FILE *file; /* Associated file. */
struct replace_file *rf; /* Atomic file replacement support. */
char *encoding; /* Encoding. */
+ enum fh_line_ends line_ends; /* Line ends for text files. */
int unit; /* Unit width, in bytes. */
+ char cr[MAX_UNIT]; /* \r in encoding, 'unit' bytes long. */
char lf[MAX_UNIT]; /* \n in encoding, 'unit' bytes long. */
char spaces[32]; /* 32 bytes worth of ' ' in encoding. */
};
@@ -93,7 +95,9 @@ dfm_open_writer (struct file_handle *fh, const char
*encoding)
w->rf = replace_file_start (fh_get_file_name (w->fh), "wb", 0666,
&w->file, NULL);
w->encoding = xstrdup (encoding);
+ w->line_ends = fh_get_line_ends (fh);
w->unit = ei.unit;
+ memcpy (w->cr, ei.cr, sizeof w->cr);
memcpy (w->lf, ei.lf, sizeof w->lf);
for (ofs = 0; ofs + ei.unit <= sizeof w->spaces; ofs += ei.unit)
memcpy (&w->spaces[ofs], ei.space, ei.unit);
@@ -134,6 +138,8 @@ dfm_put_record (struct dfm_writer *w, const char *rec,
size_t len)
{
case FH_MODE_TEXT:
fwrite (rec, len, 1, w->file);
+ if (w->line_ends == FH_END_CRLF)
+ fwrite (w->cr, w->unit, 1, w->file);
fwrite (w->lf, w->unit, 1, w->file);
break;
diff --git a/src/language/data-io/file-handle.q
b/src/language/data-io/file-handle.q
index 26dfc97..313adc9 100644
--- a/src/language/data-io/file-handle.q
+++ b/src/language/data-io/file-handle.q
@@ -1,5 +1,5 @@
/* PSPP - a program for statistical analysis.
- Copyright (C) 1997-9, 2000, 2006, 2010, 2011, 2012 Free Software
Foundation, Inc.
+ Copyright (C) 1997-9, 2000, 2006, 2010, 2011, 2012, 2013 Free Software
Foundation, Inc.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -47,6 +47,7 @@
lrecl=integer;
tabwidth=integer;
mode=mode:!character/binary/image/360;
+ ends=ends:lf/crlf;
recform=recform:fixed/f/variable/v/spanned/vs;
encoding=string.
*/
@@ -104,6 +105,10 @@ cmd_file_handle (struct lexer *lexer, struct dataset
*ds)
else
msg (SE, _("%s must not be negative."), "TABWIDTH");
}
+ if (cmd.ends == FH_LF)
+ properties.line_ends = FH_END_LF;
+ else if (cmd.ends == FH_CRLF)
+ properties.line_ends = FH_END_CRLF;
break;
case FH_IMAGE:
properties.mode = FH_MODE_FIXED;
diff --git a/tests/language/data-io/print.at
b/tests/language/data-io/print.at
index 71259e0..04701f6 100644
--- a/tests/language/data-io/print.at
+++ b/tests/language/data-io/print.at
@@ -302,3 +302,36 @@ AT_CHECK([cat foo2.out], [0], [dnl
ls -l foo.out foo2.out
AT_CHECK([test -c foo.out])
AT_CLEANUP
+
+AT_SETUP([PRINT with special line ends])
+AT_DATA([print.sps], [dnl
+FILE HANDLE lf /NAME='lf.txt' /ENDS=LF.
+FILE HANDLE crlf /NAME='crlf.txt' /ENDS=CRLF.
+DATA LIST NOTABLE /x 1.
+BEGIN DATA.
+1
+2
+3
+4
+5
+END DATA.
+PRINT OUTFILE=lf/x.
+PRINT OUTFILE=crlf/x.
+EXECUTE.
+])
+AT_CHECK([pspp -O format=csv print.sps])
+AT_CHECK([cat lf.txt], [0], [dnl
+ 1 @&t@
+ 2 @&t@
+ 3 @&t@
+ 4 @&t@
+ 5 @&t@
+])
+AT_CHECK([tr '\r' R < crlf.txt], [0], [dnl
+ 1 R
+ 2 R
+ 3 R
+ 4 R
+ 5 R
+])
+AT_CLEANUP
--
1.7.10.4
_______________________________________________
pspp-dev mailing list
address@hidden
https://lists.gnu.org/mailman/listinfo/pspp-dev
--
PGP Public key ID: 1024D/2DE827B3
fingerprint = 8797 A26D 0854 2EAB 0285 A290 8A67 719C 2DE8 27B3
See http://keys.gnupg.net or any PGP keyserver for public key.
signature.asc
Description: Digital signature