[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PATCH] Make text data file use system native line ends by default.
From: |
Ben Pfaff |
Subject: |
[PATCH] Make text data file use system native line ends by default. |
Date: |
Sun, 17 Mar 2013 23:44:30 -0700 |
Requested by Ronald Crichton.
---
This seems like a reasonable thing to do, but I'm not certain.
Please take a look and give me some feedback if you have any
before I apply it.
NEWS | 6 +++++-
doc/data-io.texi | 12 +++++++++---
src/data/file-handle-def.c | 21 +++++++++++++++++++--
src/data/file-handle-def.h | 15 ++++++++++++++-
src/language/data-io/data-writer.c | 8 +++++++-
src/language/data-io/file-handle.q | 7 ++++++-
tests/language/data-io/print.at | 33 +++++++++++++++++++++++++++++++++
7 files changed, 93 insertions(+), 9 deletions(-)
diff --git a/NEWS b/NEWS
index e0910fe..635ef49 100644
--- a/NEWS
+++ b/NEWS
@@ -1,5 +1,5 @@
PSPP NEWS -- history of user-visible changes.
-Copyright (C) 1996-2000, 2008-2012 Free Software Foundation, Inc.
+Copyright (C) 1996-2000, 2008-2013 Free Software Foundation, Inc.
See the end for copying conditions.
Please send PSPP bug reports to address@hidden
@@ -134,6 +134,10 @@ Changes from 0.6.2 to 0.7.9:
See the documentation for the INSERT command in the PSPP manual for
more information.
+ * Text data files that PRINT and WRITE creates now use the system
+ native line ends by default (CRLF on Windows, LF only elsewhere).
+ Use the new ENDS subcommand on FILE HANDLE to override the default.
+
* A new Perl module allows Perl programs to read and write PSPP
system files.
diff --git a/doc/data-io.texi b/doc/data-io.texi
index 88577a4..313e543 100644
--- a/doc/data-io.texi
+++ b/doc/data-io.texi
@@ -573,6 +573,7 @@ For text files:
FILE HANDLE @var{handle_name}
/NAME='@var{file_name}
[/MODE=CHARACTER]
+ [/address@hidden,address@hidden
/address@hidden
[ENCODING='@var{encoding}']
@@ -619,9 +620,8 @@ The effect and syntax of @cmd{FILE HANDLE} depends on the
selected MODE:
@itemize
@item
-In CHARACTER mode, the default, the data file is read as a text file,
-according to the local system's conventions, and each text line is
-read as one record.
+In CHARACTER mode, the default, the data file is read as a text file.
+Each text line is read as one record.
In CHARACTER mode only, tabs are expanded to spaces by input programs,
except by @cmd{DATA LIST FREE} with explicitly specified delimiters.
@@ -629,6 +629,12 @@ Each tab is 4 characters wide by default, but TABWIDTH (a
@pspp{}
extension) may be used to specify an alternate width. Use a TABWIDTH
of 0 to suppress tab expansion.
+A file written in CHARACTER mode by default uses the line ends of the
+system on which PSPP is running, that is, on Windows, the default is
+CR LF line ends, and on other systems the default is LF only. Specify
+ENDS as CR or CRLF to override the default. PSPP reads files using
+either convention on any kind of system, regardless of ENDS.
+
@item
In IMAGE mode, the data file is treated as a series of fixed-length
binary records. LRECL should be used to specify the record length in
diff --git a/src/data/file-handle-def.c b/src/data/file-handle-def.c
index 121a490..9c853e5 100644
--- a/src/data/file-handle-def.c
+++ b/src/data/file-handle-def.c
@@ -1,5 +1,5 @@
/* PSPP - a program for statistical analysis.
- Copyright (C) 1997-9, 2000, 2006, 2009, 2010, 2011, 2012 Free Software
Foundation, Inc.
+ Copyright (C) 1997-9, 2000, 2006, 2009, 2010, 2011, 2012, 2013 Free
Software Foundation, Inc.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -51,6 +51,7 @@ struct file_handle
/* FH_REF_FILE only. */
char *file_name; /* File name as provided by user. */
enum fh_mode mode; /* File mode. */
+ enum fh_line_ends line_ends; /* Line ends for text files. */
/* FH_REF_FILE and FH_REF_INLINE only. */
size_t record_width; /* Length of fixed-format records. */
@@ -236,6 +237,7 @@ fh_create_file (const char *id, const char *file_name,
handle = create_handle (id, handle_name, FH_REF_FILE, properties->encoding);
handle->file_name = xstrdup (file_name);
handle->mode = properties->mode;
+ handle->line_ends = properties->line_ends;
handle->record_width = properties->record_width;
handle->tab_width = properties->tab_width;
return handle;
@@ -263,8 +265,14 @@ fh_create_dataset (struct dataset *ds)
const struct fh_properties *
fh_default_properties (void)
{
+#if defined _WIN32 || defined __WIN32__
+#define DEFAULT_LINE_ENDS FH_END_CRLF
+#else
+#define DEFAULT_LINE_ENDS FH_END_LF
+#endif
+
static const struct fh_properties default_properties
- = {FH_MODE_TEXT, 1024, 4, (char *) "Auto"};
+ = {FH_MODE_TEXT, DEFAULT_LINE_ENDS, 1024, 4, (char *) "Auto"};
return &default_properties;
}
@@ -314,6 +322,15 @@ fh_get_mode (const struct file_handle *handle)
return handle->mode;
}
+/* Returns the line ends of HANDLE, which must be a handle associated with a
+ file. */
+enum fh_line_ends
+fh_get_line_ends (const struct file_handle *handle)
+{
+ assert (handle->referent == FH_REF_FILE);
+ return handle->line_ends;
+}
+
/* Returns the width of a logical record on HANDLE. */
size_t
fh_get_record_width (const struct file_handle *handle)
diff --git a/src/data/file-handle-def.h b/src/data/file-handle-def.h
index 53e1bbf..bd1fed7 100644
--- a/src/data/file-handle-def.h
+++ b/src/data/file-handle-def.h
@@ -1,5 +1,5 @@
/* PSPP - a program for statistical analysis.
- Copyright (C) 1997-9, 2000, 2005, 2006, 2010, 2011 Free Software
Foundation, Inc.
+ Copyright (C) 1997-9, 2000, 2005, 2006, 2010, 2011, 2013 Free Software
Foundation, Inc.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -49,10 +49,22 @@ enum fh_access
FH_ACC_WRITE /* Write to it. */
};
+/* How a line ends.
+
+ This affects only writing FH_MODE_TEXT files. Writing in other modes does
+ not use line ends, and reading in FH_MODE_TEXT mode accepts all forms of
+ line ends. */
+enum fh_line_ends
+ {
+ FH_END_LF, /* Unix line ends (\n). */
+ FH_END_CRLF /* MS-DOS line ends (\r\n). */
+ };
+
/* Properties of a file handle. */
struct fh_properties
{
enum fh_mode mode; /* File mode. */
+ enum fh_line_ends line_ends; /* Line ends for text files. */
size_t record_width; /* Length of fixed-format records. */
size_t tab_width; /* Tab width, 0=do not expand tabs. */
const char *encoding; /* Charset for contents. */
@@ -87,6 +99,7 @@ const char *fh_get_encoding (const struct file_handle *);
/* Properties of FH_REF_FILE file handles. */
const char *fh_get_file_name (const struct file_handle *);
enum fh_mode fh_get_mode (const struct file_handle *) ;
+enum fh_line_ends fh_get_line_ends (const struct file_handle *);
/* Properties of FH_REF_FILE and FH_REF_INLINE file handles. */
size_t fh_get_record_width (const struct file_handle *);
diff --git a/src/language/data-io/data-writer.c
b/src/language/data-io/data-writer.c
index 5270db0..5f87d00 100644
--- a/src/language/data-io/data-writer.c
+++ b/src/language/data-io/data-writer.c
@@ -1,5 +1,5 @@
/* PSPP - a program for statistical analysis.
- Copyright (C) 1997-2004, 2006, 2010, 2011, 2012 Free Software Foundation,
Inc.
+ Copyright (C) 1997-2004, 2006, 2010, 2011, 2012, 2013 Free Software
Foundation, Inc.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -49,8 +49,10 @@ struct dfm_writer
FILE *file; /* Associated file. */
struct replace_file *rf; /* Atomic file replacement support. */
char *encoding; /* Encoding. */
+ enum fh_line_ends line_ends; /* Line ends for text files. */
int unit; /* Unit width, in bytes. */
+ char cr[MAX_UNIT]; /* \r in encoding, 'unit' bytes long. */
char lf[MAX_UNIT]; /* \n in encoding, 'unit' bytes long. */
char spaces[32]; /* 32 bytes worth of ' ' in encoding. */
};
@@ -93,7 +95,9 @@ dfm_open_writer (struct file_handle *fh, const char *encoding)
w->rf = replace_file_start (fh_get_file_name (w->fh), "wb", 0666,
&w->file, NULL);
w->encoding = xstrdup (encoding);
+ w->line_ends = fh_get_line_ends (fh);
w->unit = ei.unit;
+ memcpy (w->cr, ei.cr, sizeof w->cr);
memcpy (w->lf, ei.lf, sizeof w->lf);
for (ofs = 0; ofs + ei.unit <= sizeof w->spaces; ofs += ei.unit)
memcpy (&w->spaces[ofs], ei.space, ei.unit);
@@ -134,6 +138,8 @@ dfm_put_record (struct dfm_writer *w, const char *rec,
size_t len)
{
case FH_MODE_TEXT:
fwrite (rec, len, 1, w->file);
+ if (w->line_ends == FH_END_CRLF)
+ fwrite (w->cr, w->unit, 1, w->file);
fwrite (w->lf, w->unit, 1, w->file);
break;
diff --git a/src/language/data-io/file-handle.q
b/src/language/data-io/file-handle.q
index 26dfc97..313adc9 100644
--- a/src/language/data-io/file-handle.q
+++ b/src/language/data-io/file-handle.q
@@ -1,5 +1,5 @@
/* PSPP - a program for statistical analysis.
- Copyright (C) 1997-9, 2000, 2006, 2010, 2011, 2012 Free Software
Foundation, Inc.
+ Copyright (C) 1997-9, 2000, 2006, 2010, 2011, 2012, 2013 Free Software
Foundation, Inc.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -47,6 +47,7 @@
lrecl=integer;
tabwidth=integer;
mode=mode:!character/binary/image/360;
+ ends=ends:lf/crlf;
recform=recform:fixed/f/variable/v/spanned/vs;
encoding=string.
*/
@@ -104,6 +105,10 @@ cmd_file_handle (struct lexer *lexer, struct dataset *ds)
else
msg (SE, _("%s must not be negative."), "TABWIDTH");
}
+ if (cmd.ends == FH_LF)
+ properties.line_ends = FH_END_LF;
+ else if (cmd.ends == FH_CRLF)
+ properties.line_ends = FH_END_CRLF;
break;
case FH_IMAGE:
properties.mode = FH_MODE_FIXED;
diff --git a/tests/language/data-io/print.at b/tests/language/data-io/print.at
index 71259e0..04701f6 100644
--- a/tests/language/data-io/print.at
+++ b/tests/language/data-io/print.at
@@ -302,3 +302,36 @@ AT_CHECK([cat foo2.out], [0], [dnl
ls -l foo.out foo2.out
AT_CHECK([test -c foo.out])
AT_CLEANUP
+
+AT_SETUP([PRINT with special line ends])
+AT_DATA([print.sps], [dnl
+FILE HANDLE lf /NAME='lf.txt' /ENDS=LF.
+FILE HANDLE crlf /NAME='crlf.txt' /ENDS=CRLF.
+DATA LIST NOTABLE /x 1.
+BEGIN DATA.
+1
+2
+3
+4
+5
+END DATA.
+PRINT OUTFILE=lf/x.
+PRINT OUTFILE=crlf/x.
+EXECUTE.
+])
+AT_CHECK([pspp -O format=csv print.sps])
+AT_CHECK([cat lf.txt], [0], [dnl
+ 1 @&t@
+ 2 @&t@
+ 3 @&t@
+ 4 @&t@
+ 5 @&t@
+])
+AT_CHECK([tr '\r' R < crlf.txt], [0], [dnl
+ 1 R
+ 2 R
+ 3 R
+ 4 R
+ 5 R
+])
+AT_CLEANUP
--
1.7.10.4
- [PATCH] Make text data file use system native line ends by default.,
Ben Pfaff <=