[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[8059] move XS modules
From: |
gavinsmith0123 |
Subject: |
[8059] move XS modules |
Date: |
Tue, 14 Aug 2018 06:27:12 -0400 (EDT) |
Revision: 8059
http://svn.sv.gnu.org/viewvc/?view=rev&root=texinfo&revision=8059
Author: gavin
Date: 2018-08-14 06:27:11 -0400 (Tue, 14 Aug 2018)
Log Message:
-----------
move XS modules
Modified Paths:
--------------
trunk/ChangeLog
trunk/tp/Makefile.am
trunk/tp/Texinfo/Convert/Paragraph.pm
trunk/tp/Texinfo/ModulePath.pm.in
trunk/tp/Texinfo/XS/TestXS.pm
trunk/tp/Texinfo/XS/XSParagraph.xs
Added Paths:
-----------
trunk/tp/Texinfo/XS/
trunk/tp/Texinfo/XS/xspara.c
Removed Paths:
-------------
trunk/tp/Texinfo/Convert/XSParagraph/
trunk/tp/Texinfo/XS/xspara.c
Modified: trunk/ChangeLog
===================================================================
--- trunk/ChangeLog 2018-08-14 10:05:33 UTC (rev 8058)
+++ trunk/ChangeLog 2018-08-14 10:27:11 UTC (rev 8059)
@@ -1,5 +1,22 @@
2018-08-14 Gavin Smith <address@hidden>
+ * tp/Texinfo/Convert/XSParagraph, tp/Texinfo/XS:
+ Move subdirectory. This is intended to be a subdirectory for all
+ XS modules and to remove the duplication that exists between
+ tp/Texinfo/Convert/XSParagraph and tp/Texinfo/MiscXS.
+
+ * configure.ac,
+ * tp/Makefile.am,
+ * tp/Texinfo/Convert/Paragraph.pm,
+ * tp/Texinfo/ModulePath.pm.in,
+ * tp/Texinfo/XS/XSParagraph.xs,
+ * tp/Texinfo/XS/TestXS.pm:
+ Refer to Texinfo/XS subdirectory instead of
+ Texinfo/Convert/XSParagraph and namespaces beginning
+ "Texinfo::XS" instead of "Texinfo::Convert::XSParagraph".
+
+2018-08-14 Gavin Smith <address@hidden>
+
* info/t/Init-test.inc: Do not check whether the terminal needs
to be reset with stty, because the command that caused this
problem ("read -t") is not being used any more.
Modified: trunk/tp/Makefile.am
===================================================================
--- trunk/tp/Makefile.am 2018-08-14 10:05:33 UTC (rev 8058)
+++ trunk/tp/Makefile.am 2018-08-14 10:27:11 UTC (rev 8059)
@@ -1,8 +1,8 @@
# $Id$
# Makefile.am for texinfo/tp.
#
-# Copyright 2011, 2012, 2013, 2014, 2015, 2016 Free Software Foundation,
-# Inc.
+# Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018 Free Software
+# Foundation, Inc.
#
# This file is free software; as a special exception the author gives
# unlimited permission to copy and/or distribute it, with or without
@@ -29,12 +29,12 @@
SUBDIRS =
if ! DISABLE_XS
-SUBDIRS += Texinfo/Convert/XSParagraph Texinfo/MiscXS
+SUBDIRS += Texinfo/XS Texinfo/MiscXS
endif
SUBDIRS += . tests
-DIST_SUBDIRS = Texinfo/Convert/XSParagraph Texinfo/MiscXS tests
+DIST_SUBDIRS = Texinfo/XS Texinfo/MiscXS tests
bin_SCRIPTS = texi2any
@@ -197,8 +197,8 @@
T_LOG_COMPILER = $(PERL)
AM_T_LOG_FLAGS = -w
AM_T_LOG_FLAGS += -I .
-AM_T_LOG_FLAGS += -ITexinfo/Convert/XSParagraph
-AM_T_LOG_FLAGS += -I$(srcdir)/Texinfo/Convert/XSParagraph
+AM_T_LOG_FLAGS += -ITexinfo/XS
+AM_T_LOG_FLAGS += -I$(srcdir)/Texinfo/XS
AM_TESTS_ENVIRONMENT = srcdir="$(srcdir)"; export srcdir;
top_srcdir="$(top_srcdir)"; export top_srcdir; builddir="$(builddir)"; export
buildir; top_builddir="$(top_builddir)"; export top_builddir;
Modified: trunk/tp/Texinfo/Convert/Paragraph.pm
===================================================================
--- trunk/tp/Texinfo/Convert/Paragraph.pm 2018-08-14 10:05:33 UTC (rev
8058)
+++ trunk/tp/Texinfo/Convert/Paragraph.pm 2018-08-14 10:27:11 UTC (rev
8059)
@@ -1,4 +1,4 @@
-# Copyright 2014, 2015, 2016 Free Software Foundation, Inc.
+# Copyright 2014, 2015, 2016, 2017, 2018 Free Software Foundation, Inc.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
@@ -57,7 +57,7 @@
my $package = Texinfo::XSLoader::init (
"Texinfo::Convert::Paragraph",
- "Texinfo::Convert::XSParagraph::XSParagraph",
+ "Texinfo::XS::XSParagraph",
"Texinfo::Convert::ParagraphNonXS",
"XSParagraph",
1,
Modified: trunk/tp/Texinfo/ModulePath.pm.in
===================================================================
--- trunk/tp/Texinfo/ModulePath.pm.in 2018-08-14 10:05:33 UTC (rev 8058)
+++ trunk/tp/Texinfo/ModulePath.pm.in 2018-08-14 10:27:11 UTC (rev 8059)
@@ -79,7 +79,7 @@
# XSParagraph.la is generated in the build directory.
if (defined($ENV{'top_builddir'})) {
$libexec_dir = File::Spec->catdir($ENV{'top_builddir'}, 'tp',
- 'Texinfo', 'Convert', 'XSParagraph');
+ 'Texinfo', 'XS');
}
}
Modified: trunk/tp/Texinfo/XS/TestXS.pm
===================================================================
--- trunk/tp/Texinfo/Convert/XSParagraph/TestXS.pm 2018-06-04 09:46:18 UTC
(rev 8003)
+++ trunk/tp/Texinfo/XS/TestXS.pm 2018-08-14 10:27:11 UTC (rev 8059)
@@ -1,4 +1,4 @@
-# Copyright 2014, 2015, 2016 Free Software Foundation, Inc.
+# Copyright 2014, 2015, 2016, 2018 Free Software Foundation, Inc.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
@@ -13,7 +13,7 @@
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
-package Texinfo::Convert::XSParagraph::TestXS;
+package Texinfo::XS::TestXS;
use 5.00405;
use strict;
Modified: trunk/tp/Texinfo/XS/XSParagraph.xs
===================================================================
--- trunk/tp/Texinfo/Convert/XSParagraph/XSParagraph.xs 2018-06-04 09:46:18 UTC
(rev 8003)
+++ trunk/tp/Texinfo/XS/XSParagraph.xs 2018-08-14 10:27:11 UTC (rev 8059)
@@ -11,7 +11,7 @@
#include "xspara.h"
-MODULE = Texinfo::Convert::XSParagraph::XSParagraph PACKAGE =
Texinfo::Convert::XSParagraph::XSParagraph PREFIX = xspara_
+MODULE = Texinfo::XS::XSParagraph PACKAGE = Texinfo::XS::XSParagraph PREFIX =
xspara_
# Copyright 2010, 2011, 2012, 2013, 2014, 2015 Free Software Foundation, Inc.
#
Deleted: trunk/tp/Texinfo/XS/xspara.c
===================================================================
--- trunk/tp/Texinfo/Convert/XSParagraph/xspara.c 2018-06-04 09:46:18 UTC
(rev 8003)
+++ trunk/tp/Texinfo/XS/xspara.c 2018-08-14 10:27:11 UTC (rev 8059)
@@ -1,1188 +0,0 @@
-/* Copyright 2010, 2011, 2012, 2013, 2014, 2015, 2016 Free Software
- Foundation, Inc.
-
- This program is free software: you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation, either version 3 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see <http://www.gnu.org/licenses/>. */
-
-#ifdef HAVE_CONFIG_H
- #include <config.h>
-#endif
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-#include <locale.h>
-#ifndef _WIN32
-#include <langinfo.h>
-#else /* _WIN32 */
-/* Workaround for problems caused in mingw.org's MinGW build by
- Gnulib's wchar.h overriding the wint_t type definition, which
- causes compilation errors when perl.h is included below, because
- perl.h includes ctype.h. */
-#include <ctype.h>
-#endif
-#include <wchar.h>
-#include <wctype.h>
-
-/* See "How do I use all this in extensions" in 'man perlguts'. */
-#define PERL_NO_GET_CONTEXT
-
-#include "EXTERN.h"
-#include "perl.h"
-#include "XSUB.h"
-
-#include "ppport.h"
-
-#include "xspara.h"
-
-#include "text.h"
-
-typedef struct {
- TEXT space; /* Pending space, to be output before the pending word. */
- TEXT word; /* Pending word. If outputting this would have led to
- the line to be too long, the line should have been cut before
- saving it. */
-
- /* When word.end == 0, this indicates a word of length 0. */
- int invisible_pending_word;
-
- /* Length of space in multibyte characters. */
- int space_counter;
-
- /* Characters added so far in current word. */
- int word_counter;
-
- /* -2 means we are not at the end of a sentence (undefined in Perl),
- 1 means we are at the end of a sentence and French spacing is off,
- -1 means we are at the end of a sentence and French spacing is on.
- 0 means it is "inhibited". */
- int end_sentence;
-
- int max; /* Maximum length of line. */
- int indent_length; /* Columns to indent this line. */
- int indent_length_next; /* Columns to indent the rest of the lines. */
- int counter; /* Columns so far on this line. */
-
- int lines_counter; /* Lines so far added in paragraph. */
- int end_line_count; /* Number of newlines so far in an output unit, i.e.
- with add_text or add_next. */
-
- wint_t last_letter; /* Last letter in word, used to decide if we're
- at the end of a sentence. */
-
- /* Options set with set_space_protection. */
- int protect_spaces; /* Line break forbidden, as in @w. */
- int ignore_columns; /* Don't cut line at right margin. Used by
- @flushleft and @flushright. */
- int keep_end_lines; /* A newline in the input ends a line in the output.
- Used by @flushleft and @flushright. */
- int french_spacing; /* Only one space, not two, after a full stop. */
- int double_width_no_break; /* No line break between double width chars. */
-
- /* No wrapping of lines and spaces are kept as-is. */
- int unfilled;
-
- /* Do not terminate with a final newline. */
- int no_final_newline;
-
- /* Terminate with any trailing space. */
- int add_final_space;
-
- int in_use;
-} PARAGRAPH;
-
-static PARAGRAPH state;
-
-#ifdef _WIN32
-
-#define WIN32_LEAN_AND_MEAN
-#include <windows.h>
-#include <errno.h>
-
-/* If Gnulib overrides wint_t with a wider type, we cannot use
- iswspace etc. names, whose prototypes were seen with the original
- wint_t in effect. */
-#ifdef GNULIB_defined_wint_t
-# undef iswspace
-# define iswspace(w) w32_iswspace(w)
-# undef iswupper
-# define iswupper(w) w32_iswupper(w)
-#endif
-
-char *
-w32_setlocale (int category, const char *value)
-{
- if (_stricmp (value, "en_us.utf-8") != 0)
- return NULL;
-
- /* Switch to the Windows U.S. English locale with its default
- codeset. We will handle the non-ASCII text ourselves, so the
- codeset is unimportant, and Windows doesn't support UTF-8 as the
- codeset anyway. */
- return setlocale (category, "ENU");
-}
-#define setlocale(c,v) w32_setlocale(c,v)
-
-size_t
-mbrlen (const char * __restrict__ mbs, size_t n, mbstate_t * __restrict__ ps)
-{
- unsigned char byte1 = *mbs;
-
- if (ps != NULL)
- {
- errno = ENOSYS;
- return -1;
- }
-
- return
- ((byte1 & 0x80) == 0) ? 1 : ((byte1 & 0x20) == 0) ? 2 :
- ((byte1 & 0x10) == 0) ? 3 : 4;
-}
-
-/* Convert a UTF-8 encoded multibyte string to a wide character. */
-size_t
-mbrtowc (wchar_t * __restrict__ pwc, const char * __restrict__ mbs, size_t n,
- mbstate_t * __restrict__ ps)
-{
- int len = mbrlen (mbs, n, ps);
-
- if (mbs == NULL)
- return 0;
- else
- {
- wchar_t wc[2];
- size_t n_utf16 = MultiByteToWideChar (CP_UTF8, MB_ERR_INVALID_CHARS,
- mbs, len, wc, 2);
- if (n_utf16 == 0)
- {
- errno = EILSEQ;
- return (size_t)-1;
- }
- if (ps != NULL)
- {
- errno = ENOSYS;
- return (size_t)-1;
- }
- /* We don't support UTF-16 surrogates, because the calling code
- doesn't, and because character classification functions on
- Windows don't support anything beyond the BMP anyway. So we
- return the first character of the surrogate pair and set
- errno. */
- if (n_utf16 > 1)
- errno = ENOSYS;
- if (pwc != NULL)
- *pwc = wc[0];
-
- return len;
- }
-}
-
-int
-iswspace (wint_t wc)
-{
- /* See Unicode's Proplist.txt. */
- if ((wc >= 0x09 && wc <= 0x0D)
- || wc == 0x20
- || wc == 0x85
- || wc == 0xA0
- || wc == 0x1680
- || (wc >= 0x2000 && wc <= 0x200A)
- || wc == 0x2028
- || wc == 0x2029
- || wc == 0x202F
- || wc == 0x205F
- || wc == 0x3000)
- return 1;
-
- return 0;
-}
-
-/* FIXME: Provide a real implementation. */
-int
-wcwidth (const wchar_t wc)
-{
- return wc == 0 ? 0 : 1;
-}
-
-int
-iswupper (wint_t wi)
-{
- WORD char_type;
- wchar_t wc = wi;
- BOOL status = GetStringTypeW (CT_CTYPE1, &wc, 1, &char_type);
-
- if (!status || (char_type & C1_UPPER) == 0)
- return 0;
-
- return 1;
-}
-
-/* Avoid warnings due to redefinition of popen/pclose in Perl headers. */
-#ifdef popen
-# undef popen
-# define popen(c,m) _popen(c,m)
-#endif
-#ifdef pclose
-# undef pclose
-# define pclose(f) _pclose(f)
-#endif
-
-#endif
-
-int
-xspara_init (void)
-{
- char *utf8_locale = 0;
- int len;
- char *cur;
- char *dot;
-
- dTHX;
-
- if (setlocale (LC_CTYPE, "en_US.UTF-8")
- || setlocale (LC_CTYPE, "en_US.utf8"))
- goto success;
-
- cur = setlocale (LC_CTYPE, 0); /* Name of current locale. */
- if (!cur)
- goto failure;
- len = strlen (cur);
- if (len >= 6 && !memcmp (".UTF-8", cur + len - 6, 6)
- || len >= 5 && !memcmp (".utf8", cur + len - 5, 5)
- || len >= 6 && !memcmp (".utf-8", cur + len - 6, 6)
- || len >= 5 && !memcmp (".UTF8", cur + len - 5, 5))
- {
- setlocale (LC_CTYPE, ""); /* Use the locale from the environment. */
- goto success;
- }
-
- /* Otherwise try altering the current locale name. */
- dot = strchr (cur, '.');
- if (!dot)
- dot = cur + len;
- utf8_locale = malloc (len + 6 + 1); /* enough to add ".UTF-8" to end */
- memcpy (utf8_locale, cur, dot - cur);
- dot = utf8_locale + (dot - cur);
- memcpy (dot, ".UTF-8", 7);
- if (setlocale (LC_CTYPE, utf8_locale))
- goto success;
-
- memcpy (dot, ".utf8", 6);
- if (setlocale (LC_CTYPE, utf8_locale))
- goto success;
-
- /* Otherwise, look for any UTF-8 locale in the output of "locale -a". */
- {
- FILE *p;
- char *line = 0;
- size_t n = 0;
- ssize_t ret;
- p = popen ("locale -a", "r");
- if (!p)
- goto failure;
- while (1)
- {
- ret = getline (&line, &n, p);
- if (ret == (ssize_t) -1)
- {
- free (line);
- pclose (p);
- goto failure;
- }
- if (strstr (line, "UTF-8") || strstr (line, "utf8"))
- {
- line[ret - 1] = '\0'; /* Remove trailing newline. */
- if (setlocale (LC_CTYPE, line))
- {
- free (line);
- pclose (p);
- goto success;
- }
- }
- }
- }
-
- if (1)
- {
-failure:
- return 0; /* failure */
- }
- else
- {
-success: ;
- free (utf8_locale);
- /*
- fprintf (stderr, "tried to set LC_CTYPE to UTF-8.\n");
- fprintf (stderr, "character encoding is: %s\n",
- nl_langinfo (CODESET));
- */
- return 1; /* success */
- }
-}
-
-/* Array for storing paragraph states which aren't in use. */
-static PARAGRAPH *state_array;
-static int state_array_size;
-
-/* The slot in state_array for saving the current state. */
-static int current_state;
-
-static void
-xspara__switch_state (int id)
-{
- if (current_state == id)
- return;
- if (current_state != -1)
- memcpy (&state_array[current_state], &state, sizeof (PARAGRAPH));
-
- memcpy (&state, &state_array[id], sizeof (PARAGRAPH));
- current_state = id;
-}
-
-int
-xspara_new (HV *conf)
-{
- int i;
-
- dTHX; /* Perl boiler plate */
-
- TEXT saved_space, saved_word;
-
- /* Find an unused slot in state_array */
- for (i = 0; i < state_array_size; i++)
- {
- if (!state_array[i].in_use)
- break;
- }
- if (i == state_array_size)
- {
- state_array = realloc (state_array,
- (state_array_size += 10) * sizeof (PARAGRAPH));
- memset (state_array + i, 0, 10 * sizeof (PARAGRAPH));
- }
-
- state_array[i].in_use = 1;
- xspara__switch_state (i);
-
- /* Zero formatter, reusing storage. */
- saved_space = state.space;
- saved_word = state.word;
- memset (&state, 0, sizeof (state));
- state.space = saved_space;
- state.word = saved_word;
- state.space.end = state.word.end = 0;
- state.in_use = 1;
-
- /* Default values. */
- state.max = 72;
- state.indent_length_next = -1; /* Special value meaning undefined. */
- state.end_sentence = -2; /* Special value meaning undefined. */
- state.last_letter = L'\0';
-
- if (conf)
- xspara_init_state (conf);
-
- /* The paragraph ID. */
- return i;
-}
-
-
-/* SV is a blessed reference to an integer containing the paragraph ID. */
-void
-xspara_set_state (SV *sv)
-{
- dTHX;
-
- xspara__switch_state (SvIV (sv));
-}
-
-/* Set the state internal to this C module from the Perl hash. */
-void
-xspara_init_state (HV *hash)
-{
-#define FETCH(key) hv_fetch (hash, key, strlen (key), 0)
-#define FETCH_INT(key,where) { val = FETCH(key); \
- if (val) { where = SvIV (*val); } }
-
- SV **val;
-
- dTHX; /* This is boilerplate for interacting with Perl. */
-
- /* Fetch all these so they are set, and reset for each paragraph. */
- FETCH_INT("end_sentence", state.end_sentence);
- FETCH_INT("max", state.max);
-
- FETCH_INT("indent_length", state.indent_length);
- FETCH_INT("indent_length_next", state.indent_length_next);
- FETCH_INT("counter", state.counter);
-
- FETCH_INT("word_counter", state.word_counter);
-
- FETCH_INT("lines_counter", state.lines_counter);
- FETCH_INT("end_line_count", state.end_line_count);
-
- FETCH_INT("protect_spaces", state.protect_spaces);
- FETCH_INT("ignore_columns", state.ignore_columns);
- FETCH_INT("keep_end_lines", state.keep_end_lines);
- FETCH_INT("frenchspacing", state.french_spacing);
-
- FETCH_INT("unfilled", state.unfilled);
- FETCH_INT("no_final_newline", state.no_final_newline);
- FETCH_INT("add_final_space", state.add_final_space);
-
- val = FETCH("word");
- if (val)
- {
- fprintf (stderr, "Bug: setting 'word' is not supported.\n");
- abort ();
- }
- val = FETCH("space");
- if (val)
- {
- fprintf (stderr, "Bug: setting 'space' is not supported.\n");
- abort ();
- }
- return;
-
-#undef FETCH
-#undef FETCH_INT
-}
-
-/* Move the state back into the Perl hash. */
-void
-xspara_get_state (HV *hash)
-{
- /* TODO: The last argument of hv_store would be a precomputed hash, which
- would save the time of calculating it. */
-#define STORE(key) hv_store (hash, key, strlen (key), val, 0)
-
- SV *val;
-
- /* Don't do anything. */
- return;
-
- dTHX; /* Perl boilerplate. */
-
- val = newSViv (state.end_sentence);
- STORE("end_sentence");
-
- val = newSViv (state.counter);
- STORE("counter");
-
- val = newSViv (state.word_counter);
- STORE("word_counter");
-
- val = newSViv (state.lines_counter);
- STORE("lines_counter");
-
- return;
-
-
-#undef STORE
-}
-
-
-/************************************************************************/
-
-
-/* Append a newline character to RESULT. */
-void
-xspara__cut_line (TEXT *result)
-{
- if (!state.ignore_columns)
- {
- xspara__end_line ();
-
- text_append (result, "\n");
- }
-}
-
-int
-xspara_end_line_count (void)
-{
- return state.end_line_count;
-}
-
-/* End a line (throwing away a pending space, which we don't need)
- Note _end_line in Paragraph.pm returned "\n". */
-void
-xspara__end_line (void)
-{
- state.counter = 0;
- state.space.end = 0;
- state.space_counter = 0;
-
- /* This will only be true for the first line of output. */
- if (state.indent_length_next != -1)
- {
- state.indent_length = state.indent_length_next;
- state.indent_length_next = -1;
- }
-
- state.lines_counter++;
- state.end_line_count++;
-}
-
-char *
-xspara_end_line (void)
-{
- state.end_line_count = 0;
- xspara__end_line ();
- return "\n";
-}
-
-/* Return concatenation of SPACE and WORD. */
-char *
-xspara_get_pending (void)
-{
- TEXT t;
- text_init (&t);
- text_append_n (&t, state.space.text, state.space.end);
- text_append_n (&t, state.word.text, state.word.end);
- return t.text;
-}
-
-/* Append to RESULT pending space followed by pending word, clearing them
- afterwards. Assume we don't need to wrap a line. Only add spaces without
a
- word if ADD_SPACES. */
-void
-xspara__add_pending_word (TEXT *result, int add_spaces)
-{
- if (state.word.end == 0 && !state.invisible_pending_word && !add_spaces)
- return;
-
- if (state.indent_length > state.counter)
- {
- int i;
- /* If we are not up to the left margin yet, output spaces to get there,
- and ignore 'state.space', the pending space string. In this case
- state.counter is probably 0. */
-
- for (i = 0; i < state.indent_length - state.counter; i++)
- text_append (result, " ");
- state.counter = state.indent_length;
-
- /* Do not output leading spaces after the indent, unless 'unfilled'
- is on. */
- if (!state.unfilled)
- state.space.end = 0;
- }
-
- if (state.space.end > 0)
- {
- text_append_n (result, state.space.text, state.space.end);
-
- state.counter += state.space_counter;
- state.space.end = 0;
- state.space_counter = 0;
- }
-
- if (state.word.end > 0 || state.invisible_pending_word)
- {
- text_append_n (result, state.word.text, state.word.end);
- state.counter += state.word_counter;
-
- state.word.end = 0;
- state.word_counter = 0;
- state.invisible_pending_word = 0;
- }
-}
-
-/* Function for users of this module. */
-char *
-xspara_add_pending_word (int add_spaces)
-{
- TEXT ret;
-
- text_init (&ret);
- state.end_line_count = 0;
- xspara__add_pending_word (&ret, add_spaces);
- if (ret.text)
- return ret.text;
- else
- return "";
-}
-
-/* End a paragraph. */
-char *
-xspara_end (void)
-{
- TEXT ret;
- text_init (&ret);
- state.end_line_count = 0;
- xspara__add_pending_word (&ret, state.add_final_space);
- if (!state.no_final_newline && state.counter != 0)
- {
- text_append (&ret, "\n");
- state.lines_counter++;
- state.end_line_count++;
- }
-
- /* Now it's time to forget about the state. */
- state_array[current_state].in_use = 0;
- state.in_use = 0;
-
- /* Don't do this so we can get the closing line counts. */
- /* current_state = -1; */
-
- if (ret.text)
- return ret.text;
- else
- return "";
-}
-
-/* Add WORD to paragraph in RESULT, not refilling WORD. If we go past the end
- of the line start a new one. TRANSPARENT means that the letters in WORD
- are ignored for the purpose of deciding whether a full stop ends a sentence
- or not. */
-void
-xspara__add_next (TEXT *result, char *word, int word_len, int transparent)
-{
- int disinhibit = 0;
- if (!word)
- return;
-
- if (word_len >= 1 && word[word_len - 1] == '\b')
- {
- word[--word_len] = '\0';
- disinhibit = 1;
- }
-
- if (state.word.end == 0 && !state.invisible_pending_word)
- {
- /* Check if we are at the end of a sentence and if we need to
- output two spaces after the full stop. If so, check if the
- word we are given begins with whitespace. If it doesn't,
- double the pending space.
-
- We checked above if there was a pending word because if there
- was, it is due to be output after the end-sentence whitespace,
- not the string that was passed as an argument to this function.
- */
- state.last_letter = L'\0';
-
- if (state.counter != 0 && state.space.end > 0
- && state.end_sentence == 1 && !state.french_spacing)
- {
- wchar_t wc;
- size_t char_len;
-
- char_len = mbrtowc (&wc, word, word_len, NULL);
- if ((long) char_len > 0 && !iswspace (wc))
- {
- /* Make the pending space up to two spaces. */
- while (state.space_counter < 2)
- {
- text_append_n (&state.space, " ", 1);
- state.space_counter++;
- }
- }
-
- state.end_sentence = -2;
- }
- }
-
- text_append_n (&state.word, word, word_len);
- if (word_len == 0 && word)
- state.invisible_pending_word = 1;
-
- if (!transparent)
- {
- if (disinhibit)
- state.last_letter = L'a'; /* a lower-case letter */
- else
- {
- /* Save last character in WORD */
- char *p = word + word_len;
- int len = 0;
- while (p > word)
- {
- p--; len++;
- if ((long) mbrlen(p, len, NULL) > 0)
- {
- wchar_t wc = L'\0';
- mbrtowc (&wc, p, len, NULL);
- if (!wcschr (L".?!\"')]", wc))
- {
- state.last_letter = wc;
- break;
- }
- }
- }
-
- }
- }
-
- if (strchr (word, '\n'))
- {
- /* If there was a newline in the word we just added, put the entire
- pending ouput in the results string, and start a new line. */
- xspara__add_pending_word (result, 0);
- xspara__end_line ();
- }
- else
- {
- /* The possibility of two-column characters is ignored here. */
-
- /* Calculate length of multibyte string in characters. */
- int len = 0;
- int left = word_len;
- wchar_t w;
- char *p = word;
-
- while (left > 0)
- {
- int char_len = mbrtowc (&w, p, left, NULL);
- left -= char_len;
- p += char_len;
- len++;
- }
-
- state.word_counter += len;
- }
-
- /* TODO: Shift this into the "else" clause above, because
- xspara__end_line would have set state.counter to 0. */
- if (state.counter != 0
- && state.counter + state.word_counter + state.space_counter
- > state.max)
- {
- xspara__cut_line (result);
- }
-}
-
-/* Like _add_next but zero end_line_count at beginning. */
-char *
-xspara_add_next (char *text, int text_len, int transparent)
-{
- TEXT t;
-
- text_init (&t);
- state.end_line_count = 0;
- xspara__add_next (&t, text, text_len, transparent);
-
- if (t.space > 0)
- return t.text;
- else
- return "";
-}
-
-void
-xspara_remove_end_sentence (void)
-{
- state.end_sentence = 0;
-}
-
-void
-xspara_add_end_sentence (int value)
-{
- state.end_sentence = value;
-}
-
-void
-xspara_allow_end_sentence (void)
-{
- state.last_letter = L'a'; /* A lower-case letter. */
-}
-
-/* -1 in a parameter means leave that value as it is. */
-char *
-xspara_set_space_protection (int protect_spaces,
- int ignore_columns,
- int keep_end_lines,
- int french_spacing,
- int double_width_no_break)
-{
- if (protect_spaces != -1)
- state.protect_spaces = protect_spaces;
- if (ignore_columns != -1)
- state.ignore_columns = ignore_columns;
- if (keep_end_lines != -1)
- state.keep_end_lines = keep_end_lines;
- if (double_width_no_break != -1)
- state.double_width_no_break = double_width_no_break;
-
- /*fprintf (stderr, "SETTING SPACE (%d, %d, %d, %d)\n",
- protect_spaces,
- ignore_columns,
- keep_end_lines,
- french_spacing);*/
-
- /* If at the end of a sentence, and due to output the end of sentence
- space, and we switch to French spacing, then make the space up to
- two spaces.
-
- FIXME: This seems back-to-front: We want two spaces if we switch FROM
- French spacing. */
-
- if (state.french_spacing == 0
- && french_spacing != -1 && french_spacing != 0
- && state.end_sentence != -2 && state.end_sentence != 0
- && state.counter != 0
- && state.space.end > 0
- && state.word.end == 0 && !state.invisible_pending_word)
- {
- while (state.space_counter < 2)
- {
- text_append_n (&state.space, " ", 1);
- state.space_counter++;
- }
-
- /* End of sentence done. */
- state.end_sentence = -2;
- }
-
- if (french_spacing != -1)
- {
- state.french_spacing = french_spacing;
- }
-
- if (protect_spaces != -1 && state.protect_spaces)
- {
- if (state.word.end == 0)
- {
- /* In _add_pending_word this meant that an "empty word" would
- be output. This makes "a @w{} b" -> "a b", not "a b", and
- "a @w{}" at end of paragraph -> "a ", not "a". */
-
- state.invisible_pending_word = 1;
- }
- }
-
- return ""; /* TODO: Check if we can remove this. */
-}
-
-/*****************************************************************/
-
-
-/* Return string to be added to paragraph contents, wrapping text. This
- function relies on there being a UTF-8 locale in LC_CTYPE for mbrtowc to
- work correctly. */
-char *
-xspara_add_text (char *text)
-{
- char *p = text;
- int len;
- wchar_t wc;
- size_t char_len;
- TEXT result;
- dTHX;
-
- text_init (&result);
-
- len = strlen (text); /* FIXME: Get this as an argument */
- state.end_line_count = 0;
-
- while (len > 0)
- {
- char_len = mbrtowc (&wc, p, len, NULL);
- if ((long) char_len == 0)
- break; /* Null character. Shouldn't happen. */
- else if ((long) char_len < 0)
- {
- p++; len--; /* Invalid. Just try to keep going. */
- continue;
- }
-
- /* 00A0 and 202F are non-breaking spaces in Unicode. */
- if (iswspace (wc) && wc != L'\x00a0' && wc != L'\x202f')
- {
- state.last_letter = L'\0';
-
- /* If protect_spaces is on, ... */
- if (state.protect_spaces)
- {
- /* Append the spaces to the pending word. */
- text_append_n (&state.word, p, char_len);
- state.word_counter++;
-
- if (strchr (state.word.text, '\n'))
- {
- /* Replace any '\n' with a ' '. Note that state.word_counter
- will still be correct after this. */
- char *ptr = state.word.text;
- while (*ptr)
- {
- if (*ptr == '\n')
- *ptr = ' ';
- ptr++;
- }
- }
-
- if (state.counter != 0
- && state.counter + state.word_counter + state.space_counter
- > state.max)
- {
- xspara__cut_line (&result);
- }
- }
- else /* protect_spaces off */
- {
- int pending = state.invisible_pending_word;
- xspara__add_pending_word (&result, 0);
-
- if (state.counter != 0 || state.unfilled || pending)
- {
- /* If we are at the end of a sentence where two spaces
- are required. */
- if (state.end_sentence == 1
- && !state.french_spacing
- && !state.unfilled)
- {
- wchar_t q_char;
- size_t q_len;
- int at_least_two = 0;
-
- /* Check if the next character is whitespace as well. */
- q_len = mbrtowc (&q_char,
- p + char_len, len - char_len,
- NULL);
- if ((long) q_len > 0)
- {
- if (iswspace (q_char))
- at_least_two = 1;
- }
-
- /* If we have an existing pending space, or if we have
- at least two whitespace characters in a row, set the
- pending space to be two whitespace characters.
-
- I don't know why we do it this way. */
- if (state.space_counter >= 1 || at_least_two)
- {
- if (state.space_counter > 0)
- {
- /* Truncate to at most 2 spaces, and replace any
- '\n' or '\r' characters with ' '. */
-
- TEXT new_space;
- char *pspace;
- int pspace_left;
- int len;
- int i;
-
- text_init (&new_space);
- pspace = state.space.text;
- pspace_left = state.space.end;
- state.space_counter = 0;
-
- for (i = 0; i < 2; i++)
- {
- if (!*pspace)
- break;
- len = mbrlen (pspace, pspace_left, NULL);
-
- /* Substitute newlines in the pending space
- with spaces. */
- if (*pspace == '\n' || *pspace == '\r')
- text_append_n (&new_space, " ", 1);
- else if (len > 0)
- text_append_n (&new_space, pspace, len);
- else
- /* Skip one character and try again. */
- len = 1;
-
- state.space_counter++;
-
- pspace += len;
- pspace_left -= len;
- }
-
- state.space.end = 0;
- text_append_n (&state.space,
- new_space.text, new_space.end);
- text_destroy (&new_space);
- }
-
- /* Now get characters from the input. */
- while (state.space_counter < 2)
- {
- if (*p == '\n' || *p == '\r')
- text_append_n (&state.space, " ", 1);
- else
- text_append_n (&state.space, p, char_len);
- state.space_counter++;
-
- p += char_len; len -= char_len;
- char_len = mbrtowc (&wc, p, len, NULL);
- if ((long) char_len <= 0 || !iswspace (wc))
- break;
- }
-
- /* Skip any more following whitespace. */
- while ((long) char_len > 0 && iswspace (wc))
- {
- p += char_len; len -= char_len;
- char_len = mbrtowc (&wc, p, len, NULL);
- }
-
- /* Make it up to two characters. */
- while (state.space_counter < 2)
- {
- text_append_n (&state.space, " ", 1);
- state.space_counter++;
- }
-
- /* Reset the end_sentence flag. */
- state.end_sentence = -2;
- continue;
- }
- else
- {
- /* Otherwise, an extra space is added
- in _add_next. */
- state.space.end = 0;
- state.space_counter = 0;
- if (*p == '\n' || *p == '\r')
- text_append_n (&state.space, " ", 1);
- else
- text_append_n (&state.space, p, char_len);
- state.space_counter++;
- }
-
- }
- else /* Not at end of sentence. */
- {
- /* Only save the first space. */
- if (state.unfilled || state.space_counter < 1)
- {
- if (*p == '\n' || *p == '\r')
- {
- if (!state.unfilled)
- {
- text_append_n (&state.space, " ", 1);
- state.space_counter++;
- }
- else if (*p == '\n')
- {
- xspara__add_pending_word (&result, 0);
- xspara__end_line ();
- text_append (&result, "\n");
- }
- }
- else
- {
- text_append_n (&state.space, p, char_len);
- state.space_counter++;
- }
- }
- }
- }
- }
-
- /* If not enough space in the line for the pending space, start
- a new line. */
- if (state.counter + state.space_counter > state.max)
- {
- xspara__cut_line (&result);
- }
-
- if (!state.unfilled && *p == '\n' && state.keep_end_lines)
- {
- xspara__end_line ();
- text_append (&result, "\n");
- }
- }
- else /************** Not a white space character. *****************/
- {
- int width = wcwidth (wc);
- /*************** Double width character. *********************/
- if (width == 2)
- {
- state.last_letter = L'\0';
-
- /* We allow a line break in between Chinese characters even if
- there was no space between them, unlike single-width
- characters. */
-
- /* Append wc to state.word. */
- text_append_n (&state.word, p, char_len);
-
- state.word_counter += 2;
-
- if (state.counter != 0
- && state.counter + state.word_counter > state.max)
- {
- xspara__cut_line (&result);
- }
- /* If protect_spaces is on, accumulate the characters so that
- they can be pushed onto the next line if necessary. */
- if (!state.protect_spaces && !state.double_width_no_break)
- {
- xspara__add_pending_word (&result, 0);
- state.end_sentence = -2;
- }
- }
- /*************** Word character ******************************/
- else if (width == 1)
- {
- char *added_word;
- added_word = malloc (char_len + 1);
- memcpy (added_word, p, char_len);
- added_word[char_len] = '\0';
-
- xspara__add_next (&result, added_word, char_len, 0);
- free (added_word);
-
- /* Now check if it is considered as an end of sentence, and
- set state.end_sentence if it is. */
-
- if (strchr (".?!", *p) && !state.unfilled)
- {
- /* Doesn't count if preceded by an upper-case letter. */
- if (!iswupper (state.last_letter))
- {
- if (state.french_spacing)
- state.end_sentence = -1;
- else
- state.end_sentence = 1;
- }
- }
- else if (strchr ("\"')]", *p))
- {
- /* '"', '\'', ']' and ')' are ignored for the purpose
- of deciding whether a full stop ends a sentence. */
- }
- else
- {
- /* Otherwise reset the end of sentence marker: a full stop
in
- a string like "aaaa.bbbb" doesn't mark an end of
- sentence. */
- state.end_sentence = -2;
- state.last_letter = wc;
- }
- }
- else if (wc == L'\b')
- {
- /* Code to say that a following full stop (or question or
- exclamation mark) may be an end of sentence. */
- xspara_allow_end_sentence ();
- }
- else
- {
- /* Not printable, possibly a tab, or a combining character.
- Add it to the pending word without increasing the column
- count. */
- text_append_n (&state.word, p, char_len);
- }
- }
-
- p += char_len; len -= char_len;
- }
-
- if (result.space > 0)
- return result.text;
- else
- return "";
-}
-
-
Copied: trunk/tp/Texinfo/XS/xspara.c (from rev 8008,
trunk/tp/Texinfo/Convert/XSParagraph/xspara.c)
===================================================================
--- trunk/tp/Texinfo/XS/xspara.c (rev 0)
+++ trunk/tp/Texinfo/XS/xspara.c 2018-08-14 10:27:11 UTC (rev 8059)
@@ -0,0 +1,1197 @@
+/* Copyright 2010, 2011, 2012, 2013, 2014, 2015, 2016 Free Software
+ Foundation, Inc.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#ifdef HAVE_CONFIG_H
+ #include <config.h>
+#endif
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <locale.h>
+#ifndef _WIN32
+#include <langinfo.h>
+#else /* _WIN32 */
+/* Workaround for problems caused in mingw.org's MinGW build by
+ Gnulib's wchar.h overriding the wint_t type definition, which
+ causes compilation errors when perl.h is included below, because
+ perl.h includes ctype.h. */
+#include <ctype.h>
+#endif
+#include <wchar.h>
+#include <wctype.h>
+
+/* See "How do I use all this in extensions" in 'man perlguts'. */
+#define PERL_NO_GET_CONTEXT
+
+#include "EXTERN.h"
+#include "perl.h"
+#include "XSUB.h"
+
+#include "ppport.h"
+
+#include "xspara.h"
+
+#include "text.h"
+
+typedef struct {
+ TEXT space; /* Pending space, to be output before the pending word. */
+ TEXT word; /* Pending word. If outputting this would have led to
+ the line to be too long, the line should have been cut before
+ saving it. */
+
+ /* When word.end == 0, this indicates a word of length 0. */
+ int invisible_pending_word;
+
+ /* Length of space in multibyte characters. */
+ int space_counter;
+
+ /* Characters added so far in current word. */
+ int word_counter;
+
+ /* -2 means we are not at the end of a sentence (undefined in Perl),
+ 1 means we are at the end of a sentence and French spacing is off,
+ -1 means we are at the end of a sentence and French spacing is on.
+ 0 means it is "inhibited". */
+ int end_sentence;
+
+ int max; /* Maximum length of line. */
+ int indent_length; /* Columns to indent this line. */
+ int indent_length_next; /* Columns to indent the rest of the lines. */
+ int counter; /* Columns so far on this line. */
+
+ int lines_counter; /* Lines so far added in paragraph. */
+ int end_line_count; /* Number of newlines so far in an output unit, i.e.
+ with add_text or add_next. */
+
+ wint_t last_letter; /* Last letter in word, used to decide if we're
+ at the end of a sentence. */
+
+ /* Options set with set_space_protection. */
+ int protect_spaces; /* Line break forbidden, as in @w. */
+ int ignore_columns; /* Don't cut line at right margin. Used by
+ @flushleft and @flushright. */
+ int keep_end_lines; /* A newline in the input ends a line in the output.
+ Used by @flushleft and @flushright. */
+ int french_spacing; /* Only one space, not two, after a full stop. */
+ int double_width_no_break; /* No line break between double width chars. */
+
+ /* No wrapping of lines and spaces are kept as-is. */
+ int unfilled;
+
+ /* Do not terminate with a final newline. */
+ int no_final_newline;
+
+ /* Terminate with any trailing space. */
+ int add_final_space;
+
+ int in_use;
+} PARAGRAPH;
+
+static PARAGRAPH state;
+
+#ifdef _WIN32
+
+#define WIN32_LEAN_AND_MEAN
+#include <windows.h>
+#include <errno.h>
+
+/* If Gnulib overrides wint_t with a wider type, we cannot use
+ iswspace etc. names, whose prototypes were seen with the original
+ wint_t in effect. */
+#ifdef GNULIB_defined_wint_t
+# undef iswspace
+# define iswspace(w) w32_iswspace(w)
+# undef iswupper
+# define iswupper(w) w32_iswupper(w)
+#endif
+
+char *
+w32_setlocale (int category, const char *value)
+{
+ if (_stricmp (value, "en_us.utf-8") != 0)
+ return NULL;
+
+ /* Switch to the Windows U.S. English locale with its default
+ codeset. We will handle the non-ASCII text ourselves, so the
+ codeset is unimportant, and Windows doesn't support UTF-8 as the
+ codeset anyway. */
+ return setlocale (category, "ENU");
+}
+#define setlocale(c,v) w32_setlocale(c,v)
+
+size_t
+mbrlen (const char * __restrict__ mbs, size_t n, mbstate_t * __restrict__ ps)
+{
+ unsigned char byte1 = *mbs;
+
+ if (ps != NULL)
+ {
+ errno = ENOSYS;
+ return -1;
+ }
+
+ return
+ ((byte1 & 0x80) == 0) ? 1 : ((byte1 & 0x20) == 0) ? 2 :
+ ((byte1 & 0x10) == 0) ? 3 : 4;
+}
+
+/* Convert a UTF-8 encoded multibyte string to a wide character. */
+size_t
+mbrtowc (wchar_t * __restrict__ pwc, const char * __restrict__ mbs, size_t n,
+ mbstate_t * __restrict__ ps)
+{
+ int len = mbrlen (mbs, n, ps);
+
+ if (mbs == NULL)
+ return 0;
+ else
+ {
+ wchar_t wc[2];
+ size_t n_utf16 = MultiByteToWideChar (CP_UTF8, MB_ERR_INVALID_CHARS,
+ mbs, len, wc, 2);
+ if (n_utf16 == 0)
+ {
+ errno = EILSEQ;
+ return (size_t)-1;
+ }
+ if (ps != NULL)
+ {
+ errno = ENOSYS;
+ return (size_t)-1;
+ }
+ /* We don't support UTF-16 surrogates, because the calling code
+ doesn't, and because character classification functions on
+ Windows don't support anything beyond the BMP anyway. So we
+ return the first character of the surrogate pair and set
+ errno. */
+ if (n_utf16 > 1)
+ errno = ENOSYS;
+ if (pwc != NULL)
+ *pwc = wc[0];
+
+ return len;
+ }
+}
+
+int
+iswspace (wint_t wc)
+{
+ /* See Unicode's Proplist.txt. */
+ if ((wc >= 0x09 && wc <= 0x0D)
+ || wc == 0x20
+ || wc == 0x85
+ || wc == 0xA0
+ || wc == 0x1680
+ || (wc >= 0x2000 && wc <= 0x200A)
+ || wc == 0x2028
+ || wc == 0x2029
+ || wc == 0x202F
+ || wc == 0x205F
+ || wc == 0x3000)
+ return 1;
+
+ return 0;
+}
+
+/* FIXME: Provide a real implementation. */
+int
+wcwidth (const wchar_t wc)
+{
+ return wc == 0 ? 0 : 1;
+}
+
+int
+iswupper (wint_t wi)
+{
+ WORD char_type;
+ wchar_t wc = wi;
+ BOOL status = GetStringTypeW (CT_CTYPE1, &wc, 1, &char_type);
+
+ if (!status || (char_type & C1_UPPER) == 0)
+ return 0;
+
+ return 1;
+}
+
+/* Avoid warnings due to redefinition of popen/pclose in Perl headers. */
+#ifdef popen
+# undef popen
+# define popen(c,m) _popen(c,m)
+#endif
+#ifdef pclose
+# undef pclose
+# define pclose(f) _pclose(f)
+#endif
+
+#endif
+
+int
+xspara_init (void)
+{
+ char *utf8_locale = 0;
+ int len;
+ char *cur;
+ char *dot;
+
+ dTHX;
+
+#if PERL_VERSION > 27 || (PERL_VERSION == 27 && PERL_SUBVERSION > 8)
+ /* needed due to thread-safe locale handling in newer perls */
+ switch_to_global_locale();
+#endif
+
+ if (setlocale (LC_CTYPE, "en_US.UTF-8")
+ || setlocale (LC_CTYPE, "en_US.utf8"))
+ goto success;
+
+ cur = setlocale (LC_CTYPE, 0); /* Name of current locale. */
+ if (!cur)
+ goto failure;
+ len = strlen (cur);
+ if (len >= 6 && !memcmp (".UTF-8", cur + len - 6, 6)
+ || len >= 5 && !memcmp (".utf8", cur + len - 5, 5)
+ || len >= 6 && !memcmp (".utf-8", cur + len - 6, 6)
+ || len >= 5 && !memcmp (".UTF8", cur + len - 5, 5))
+ {
+ setlocale (LC_CTYPE, ""); /* Use the locale from the environment. */
+ goto success;
+ }
+
+ /* Otherwise try altering the current locale name. */
+ dot = strchr (cur, '.');
+ if (!dot)
+ dot = cur + len;
+ utf8_locale = malloc (len + 6 + 1); /* enough to add ".UTF-8" to end */
+ memcpy (utf8_locale, cur, dot - cur);
+ dot = utf8_locale + (dot - cur);
+ memcpy (dot, ".UTF-8", 7);
+ if (setlocale (LC_CTYPE, utf8_locale))
+ goto success;
+
+ memcpy (dot, ".utf8", 6);
+ if (setlocale (LC_CTYPE, utf8_locale))
+ goto success;
+
+ /* Otherwise, look for any UTF-8 locale in the output of "locale -a". */
+ {
+ FILE *p;
+ char *line = 0;
+ size_t n = 0;
+ ssize_t ret;
+ p = popen ("locale -a", "r");
+ if (!p)
+ goto failure;
+ while (1)
+ {
+ ret = getline (&line, &n, p);
+ if (ret == (ssize_t) -1)
+ {
+ free (line);
+ pclose (p);
+ goto failure;
+ }
+ if (strstr (line, "UTF-8") || strstr (line, "utf8"))
+ {
+ line[ret - 1] = '\0'; /* Remove trailing newline. */
+ if (setlocale (LC_CTYPE, line))
+ {
+ free (line);
+ pclose (p);
+ goto success;
+ }
+ }
+ }
+ }
+
+ if (1)
+ {
+failure:
+ return 0; /* failure */
+ }
+ else
+ {
+success: ;
+ free (utf8_locale);
+#if PERL_VERSION > 27 || (PERL_VERSION == 27 && PERL_SUBVERSION > 8)
+ /* needed due to thread-safe locale handling in newer perls */
+ sync_locale();
+#endif
+ /*
+ fprintf (stderr, "tried to set LC_CTYPE to UTF-8.\n");
+ fprintf (stderr, "character encoding is: %s\n",
+ nl_langinfo (CODESET));
+ */
+ return 1; /* success */
+ }
+}
+
+/* Array for storing paragraph states which aren't in use. */
+static PARAGRAPH *state_array;
+static int state_array_size;
+
+/* The slot in state_array for saving the current state. */
+static int current_state;
+
+static void
+xspara__switch_state (int id)
+{
+ if (current_state == id)
+ return;
+ if (current_state != -1)
+ memcpy (&state_array[current_state], &state, sizeof (PARAGRAPH));
+
+ memcpy (&state, &state_array[id], sizeof (PARAGRAPH));
+ current_state = id;
+}
+
+int
+xspara_new (HV *conf)
+{
+ int i;
+
+ dTHX; /* Perl boiler plate */
+
+ TEXT saved_space, saved_word;
+
+ /* Find an unused slot in state_array */
+ for (i = 0; i < state_array_size; i++)
+ {
+ if (!state_array[i].in_use)
+ break;
+ }
+ if (i == state_array_size)
+ {
+ state_array = realloc (state_array,
+ (state_array_size += 10) * sizeof (PARAGRAPH));
+ memset (state_array + i, 0, 10 * sizeof (PARAGRAPH));
+ }
+
+ state_array[i].in_use = 1;
+ xspara__switch_state (i);
+
+ /* Zero formatter, reusing storage. */
+ saved_space = state.space;
+ saved_word = state.word;
+ memset (&state, 0, sizeof (state));
+ state.space = saved_space;
+ state.word = saved_word;
+ state.space.end = state.word.end = 0;
+ state.in_use = 1;
+
+ /* Default values. */
+ state.max = 72;
+ state.indent_length_next = -1; /* Special value meaning undefined. */
+ state.end_sentence = -2; /* Special value meaning undefined. */
+ state.last_letter = L'\0';
+
+ if (conf)
+ xspara_init_state (conf);
+
+ /* The paragraph ID. */
+ return i;
+}
+
+
+/* SV is a blessed reference to an integer containing the paragraph ID. */
+void
+xspara_set_state (SV *sv)
+{
+ dTHX;
+
+ xspara__switch_state (SvIV (sv));
+}
+
+/* Set the state internal to this C module from the Perl hash. */
+void
+xspara_init_state (HV *hash)
+{
+#define FETCH(key) hv_fetch (hash, key, strlen (key), 0)
+#define FETCH_INT(key,where) { val = FETCH(key); \
+ if (val) { where = SvIV (*val); } }
+
+ SV **val;
+
+ dTHX; /* This is boilerplate for interacting with Perl. */
+
+ /* Fetch all these so they are set, and reset for each paragraph. */
+ FETCH_INT("end_sentence", state.end_sentence);
+ FETCH_INT("max", state.max);
+
+ FETCH_INT("indent_length", state.indent_length);
+ FETCH_INT("indent_length_next", state.indent_length_next);
+ FETCH_INT("counter", state.counter);
+
+ FETCH_INT("word_counter", state.word_counter);
+
+ FETCH_INT("lines_counter", state.lines_counter);
+ FETCH_INT("end_line_count", state.end_line_count);
+
+ FETCH_INT("protect_spaces", state.protect_spaces);
+ FETCH_INT("ignore_columns", state.ignore_columns);
+ FETCH_INT("keep_end_lines", state.keep_end_lines);
+ FETCH_INT("frenchspacing", state.french_spacing);
+
+ FETCH_INT("unfilled", state.unfilled);
+ FETCH_INT("no_final_newline", state.no_final_newline);
+ FETCH_INT("add_final_space", state.add_final_space);
+
+ val = FETCH("word");
+ if (val)
+ {
+ fprintf (stderr, "Bug: setting 'word' is not supported.\n");
+ abort ();
+ }
+ val = FETCH("space");
+ if (val)
+ {
+ fprintf (stderr, "Bug: setting 'space' is not supported.\n");
+ abort ();
+ }
+ return;
+
+#undef FETCH
+#undef FETCH_INT
+}
+
+/* Move the state back into the Perl hash. */
+void
+xspara_get_state (HV *hash)
+{
+ /* TODO: The last argument of hv_store would be a precomputed hash, which
+ would save the time of calculating it. */
+#define STORE(key) hv_store (hash, key, strlen (key), val, 0)
+
+ SV *val;
+
+ /* Don't do anything. */
+ return;
+
+ dTHX; /* Perl boilerplate. */
+
+ val = newSViv (state.end_sentence);
+ STORE("end_sentence");
+
+ val = newSViv (state.counter);
+ STORE("counter");
+
+ val = newSViv (state.word_counter);
+ STORE("word_counter");
+
+ val = newSViv (state.lines_counter);
+ STORE("lines_counter");
+
+ return;
+
+
+#undef STORE
+}
+
+
+/************************************************************************/
+
+
+/* Append a newline character to RESULT. */
+void
+xspara__cut_line (TEXT *result)
+{
+ if (!state.ignore_columns)
+ {
+ xspara__end_line ();
+
+ text_append (result, "\n");
+ }
+}
+
+int
+xspara_end_line_count (void)
+{
+ return state.end_line_count;
+}
+
+/* End a line (throwing away a pending space, which we don't need)
+ Note _end_line in Paragraph.pm returned "\n". */
+void
+xspara__end_line (void)
+{
+ state.counter = 0;
+ state.space.end = 0;
+ state.space_counter = 0;
+
+ /* This will only be true for the first line of output. */
+ if (state.indent_length_next != -1)
+ {
+ state.indent_length = state.indent_length_next;
+ state.indent_length_next = -1;
+ }
+
+ state.lines_counter++;
+ state.end_line_count++;
+}
+
+char *
+xspara_end_line (void)
+{
+ state.end_line_count = 0;
+ xspara__end_line ();
+ return "\n";
+}
+
+/* Return concatenation of SPACE and WORD. */
+char *
+xspara_get_pending (void)
+{
+ TEXT t;
+ text_init (&t);
+ text_append_n (&t, state.space.text, state.space.end);
+ text_append_n (&t, state.word.text, state.word.end);
+ return t.text;
+}
+
+/* Append to RESULT pending space followed by pending word, clearing them
+ afterwards. Assume we don't need to wrap a line. Only add spaces without
a
+ word if ADD_SPACES. */
+void
+xspara__add_pending_word (TEXT *result, int add_spaces)
+{
+ if (state.word.end == 0 && !state.invisible_pending_word && !add_spaces)
+ return;
+
+ if (state.indent_length > state.counter)
+ {
+ int i;
+ /* If we are not up to the left margin yet, output spaces to get there,
+ and ignore 'state.space', the pending space string. In this case
+ state.counter is probably 0. */
+
+ for (i = 0; i < state.indent_length - state.counter; i++)
+ text_append (result, " ");
+ state.counter = state.indent_length;
+
+ /* Do not output leading spaces after the indent, unless 'unfilled'
+ is on. */
+ if (!state.unfilled)
+ state.space.end = 0;
+ }
+
+ if (state.space.end > 0)
+ {
+ text_append_n (result, state.space.text, state.space.end);
+
+ state.counter += state.space_counter;
+ state.space.end = 0;
+ state.space_counter = 0;
+ }
+
+ if (state.word.end > 0 || state.invisible_pending_word)
+ {
+ text_append_n (result, state.word.text, state.word.end);
+ state.counter += state.word_counter;
+
+ state.word.end = 0;
+ state.word_counter = 0;
+ state.invisible_pending_word = 0;
+ }
+}
+
+/* Function for users of this module. */
+char *
+xspara_add_pending_word (int add_spaces)
+{
+ TEXT ret;
+
+ text_init (&ret);
+ state.end_line_count = 0;
+ xspara__add_pending_word (&ret, add_spaces);
+ if (ret.text)
+ return ret.text;
+ else
+ return "";
+}
+
+/* End a paragraph. */
+char *
+xspara_end (void)
+{
+ TEXT ret;
+ text_init (&ret);
+ state.end_line_count = 0;
+ xspara__add_pending_word (&ret, state.add_final_space);
+ if (!state.no_final_newline && state.counter != 0)
+ {
+ text_append (&ret, "\n");
+ state.lines_counter++;
+ state.end_line_count++;
+ }
+
+ /* Now it's time to forget about the state. */
+ state_array[current_state].in_use = 0;
+ state.in_use = 0;
+
+ /* Don't do this so we can get the closing line counts. */
+ /* current_state = -1; */
+
+ if (ret.text)
+ return ret.text;
+ else
+ return "";
+}
+
+/* Add WORD to paragraph in RESULT, not refilling WORD. If we go past the end
+ of the line start a new one. TRANSPARENT means that the letters in WORD
+ are ignored for the purpose of deciding whether a full stop ends a sentence
+ or not. */
+void
+xspara__add_next (TEXT *result, char *word, int word_len, int transparent)
+{
+ int disinhibit = 0;
+ if (!word)
+ return;
+
+ if (word_len >= 1 && word[word_len - 1] == '\b')
+ {
+ word[--word_len] = '\0';
+ disinhibit = 1;
+ }
+
+ if (state.word.end == 0 && !state.invisible_pending_word)
+ {
+ /* Check if we are at the end of a sentence and if we need to
+ output two spaces after the full stop. If so, check if the
+ word we are given begins with whitespace. If it doesn't,
+ double the pending space.
+
+ We checked above if there was a pending word because if there
+ was, it is due to be output after the end-sentence whitespace,
+ not the string that was passed as an argument to this function.
+ */
+ state.last_letter = L'\0';
+
+ if (state.counter != 0 && state.space.end > 0
+ && state.end_sentence == 1 && !state.french_spacing)
+ {
+ wchar_t wc;
+ size_t char_len;
+
+ char_len = mbrtowc (&wc, word, word_len, NULL);
+ if ((long) char_len > 0 && !iswspace (wc))
+ {
+ /* Make the pending space up to two spaces. */
+ while (state.space_counter < 2)
+ {
+ text_append_n (&state.space, " ", 1);
+ state.space_counter++;
+ }
+ }
+
+ state.end_sentence = -2;
+ }
+ }
+
+ text_append_n (&state.word, word, word_len);
+ if (word_len == 0 && word)
+ state.invisible_pending_word = 1;
+
+ if (!transparent)
+ {
+ if (disinhibit)
+ state.last_letter = L'a'; /* a lower-case letter */
+ else
+ {
+ /* Save last character in WORD */
+ char *p = word + word_len;
+ int len = 0;
+ while (p > word)
+ {
+ p--; len++;
+ if ((long) mbrlen(p, len, NULL) > 0)
+ {
+ wchar_t wc = L'\0';
+ mbrtowc (&wc, p, len, NULL);
+ if (!wcschr (L".?!\"')]", wc))
+ {
+ state.last_letter = wc;
+ break;
+ }
+ }
+ }
+
+ }
+ }
+
+ if (strchr (word, '\n'))
+ {
+ /* If there was a newline in the word we just added, put the entire
+ pending ouput in the results string, and start a new line. */
+ xspara__add_pending_word (result, 0);
+ xspara__end_line ();
+ }
+ else
+ {
+ /* The possibility of two-column characters is ignored here. */
+
+ /* Calculate length of multibyte string in characters. */
+ int len = 0;
+ int left = word_len;
+ wchar_t w;
+ char *p = word;
+
+ while (left > 0)
+ {
+ int char_len = mbrtowc (&w, p, left, NULL);
+ left -= char_len;
+ p += char_len;
+ len++;
+ }
+
+ state.word_counter += len;
+ }
+
+ /* TODO: Shift this into the "else" clause above, because
+ xspara__end_line would have set state.counter to 0. */
+ if (state.counter != 0
+ && state.counter + state.word_counter + state.space_counter
+ > state.max)
+ {
+ xspara__cut_line (result);
+ }
+}
+
+/* Like _add_next but zero end_line_count at beginning. */
+char *
+xspara_add_next (char *text, int text_len, int transparent)
+{
+ TEXT t;
+
+ text_init (&t);
+ state.end_line_count = 0;
+ xspara__add_next (&t, text, text_len, transparent);
+
+ if (t.space > 0)
+ return t.text;
+ else
+ return "";
+}
+
+void
+xspara_remove_end_sentence (void)
+{
+ state.end_sentence = 0;
+}
+
+void
+xspara_add_end_sentence (int value)
+{
+ state.end_sentence = value;
+}
+
+void
+xspara_allow_end_sentence (void)
+{
+ state.last_letter = L'a'; /* A lower-case letter. */
+}
+
+/* -1 in a parameter means leave that value as it is. */
+char *
+xspara_set_space_protection (int protect_spaces,
+ int ignore_columns,
+ int keep_end_lines,
+ int french_spacing,
+ int double_width_no_break)
+{
+ if (protect_spaces != -1)
+ state.protect_spaces = protect_spaces;
+ if (ignore_columns != -1)
+ state.ignore_columns = ignore_columns;
+ if (keep_end_lines != -1)
+ state.keep_end_lines = keep_end_lines;
+ if (double_width_no_break != -1)
+ state.double_width_no_break = double_width_no_break;
+
+ /*fprintf (stderr, "SETTING SPACE (%d, %d, %d, %d)\n",
+ protect_spaces,
+ ignore_columns,
+ keep_end_lines,
+ french_spacing);*/
+
+ /* If at the end of a sentence, and due to output the end of sentence
+ space, and we switch to French spacing, then make the space up to
+ two spaces.
+
+ FIXME: This seems back-to-front: We want two spaces if we switch FROM
+ French spacing. */
+
+ if (state.french_spacing == 0
+ && french_spacing != -1 && french_spacing != 0
+ && state.end_sentence != -2 && state.end_sentence != 0
+ && state.counter != 0
+ && state.space.end > 0
+ && state.word.end == 0 && !state.invisible_pending_word)
+ {
+ while (state.space_counter < 2)
+ {
+ text_append_n (&state.space, " ", 1);
+ state.space_counter++;
+ }
+
+ /* End of sentence done. */
+ state.end_sentence = -2;
+ }
+
+ if (french_spacing != -1)
+ {
+ state.french_spacing = french_spacing;
+ }
+
+ if (protect_spaces != -1 && state.protect_spaces)
+ {
+ if (state.word.end == 0)
+ {
+ /* In _add_pending_word this meant that an "empty word" would
+ be output. This makes "a @w{} b" -> "a b", not "a b", and
+ "a @w{}" at end of paragraph -> "a ", not "a". */
+
+ state.invisible_pending_word = 1;
+ }
+ }
+
+ return ""; /* TODO: Check if we can remove this. */
+}
+
+/*****************************************************************/
+
+
+/* Return string to be added to paragraph contents, wrapping text. This
+ function relies on there being a UTF-8 locale in LC_CTYPE for mbrtowc to
+ work correctly. */
+char *
+xspara_add_text (char *text)
+{
+ char *p = text;
+ int len;
+ wchar_t wc;
+ size_t char_len;
+ TEXT result;
+ dTHX;
+
+ text_init (&result);
+
+ len = strlen (text); /* FIXME: Get this as an argument */
+ state.end_line_count = 0;
+
+ while (len > 0)
+ {
+ char_len = mbrtowc (&wc, p, len, NULL);
+ if ((long) char_len == 0)
+ break; /* Null character. Shouldn't happen. */
+ else if ((long) char_len < 0)
+ {
+ p++; len--; /* Invalid. Just try to keep going. */
+ continue;
+ }
+
+ /* 00A0 and 202F are non-breaking spaces in Unicode. */
+ if (iswspace (wc) && wc != L'\x00a0' && wc != L'\x202f')
+ {
+ state.last_letter = L'\0';
+
+ /* If protect_spaces is on, ... */
+ if (state.protect_spaces)
+ {
+ /* Append the spaces to the pending word. */
+ text_append_n (&state.word, p, char_len);
+ state.word_counter++;
+
+ if (strchr (state.word.text, '\n'))
+ {
+ /* Replace any '\n' with a ' '. Note that state.word_counter
+ will still be correct after this. */
+ char *ptr = state.word.text;
+ while (*ptr)
+ {
+ if (*ptr == '\n')
+ *ptr = ' ';
+ ptr++;
+ }
+ }
+
+ if (state.counter != 0
+ && state.counter + state.word_counter + state.space_counter
+ > state.max)
+ {
+ xspara__cut_line (&result);
+ }
+ }
+ else /* protect_spaces off */
+ {
+ int pending = state.invisible_pending_word;
+ xspara__add_pending_word (&result, 0);
+
+ if (state.counter != 0 || state.unfilled || pending)
+ {
+ /* If we are at the end of a sentence where two spaces
+ are required. */
+ if (state.end_sentence == 1
+ && !state.french_spacing
+ && !state.unfilled)
+ {
+ wchar_t q_char;
+ size_t q_len;
+ int at_least_two = 0;
+
+ /* Check if the next character is whitespace as well. */
+ q_len = mbrtowc (&q_char,
+ p + char_len, len - char_len,
+ NULL);
+ if ((long) q_len > 0)
+ {
+ if (iswspace (q_char))
+ at_least_two = 1;
+ }
+
+ /* If we have an existing pending space, or if we have
+ at least two whitespace characters in a row, set the
+ pending space to be two whitespace characters.
+
+ I don't know why we do it this way. */
+ if (state.space_counter >= 1 || at_least_two)
+ {
+ if (state.space_counter > 0)
+ {
+ /* Truncate to at most 2 spaces, and replace any
+ '\n' or '\r' characters with ' '. */
+
+ TEXT new_space;
+ char *pspace;
+ int pspace_left;
+ int len;
+ int i;
+
+ text_init (&new_space);
+ pspace = state.space.text;
+ pspace_left = state.space.end;
+ state.space_counter = 0;
+
+ for (i = 0; i < 2; i++)
+ {
+ if (!*pspace)
+ break;
+ len = mbrlen (pspace, pspace_left, NULL);
+
+ /* Substitute newlines in the pending space
+ with spaces. */
+ if (*pspace == '\n' || *pspace == '\r')
+ text_append_n (&new_space, " ", 1);
+ else if (len > 0)
+ text_append_n (&new_space, pspace, len);
+ else
+ /* Skip one character and try again. */
+ len = 1;
+
+ state.space_counter++;
+
+ pspace += len;
+ pspace_left -= len;
+ }
+
+ state.space.end = 0;
+ text_append_n (&state.space,
+ new_space.text, new_space.end);
+ text_destroy (&new_space);
+ }
+
+ /* Now get characters from the input. */
+ while (state.space_counter < 2)
+ {
+ if (*p == '\n' || *p == '\r')
+ text_append_n (&state.space, " ", 1);
+ else
+ text_append_n (&state.space, p, char_len);
+ state.space_counter++;
+
+ p += char_len; len -= char_len;
+ char_len = mbrtowc (&wc, p, len, NULL);
+ if ((long) char_len <= 0 || !iswspace (wc))
+ break;
+ }
+
+ /* Skip any more following whitespace. */
+ while ((long) char_len > 0 && iswspace (wc))
+ {
+ p += char_len; len -= char_len;
+ char_len = mbrtowc (&wc, p, len, NULL);
+ }
+
+ /* Make it up to two characters. */
+ while (state.space_counter < 2)
+ {
+ text_append_n (&state.space, " ", 1);
+ state.space_counter++;
+ }
+
+ /* Reset the end_sentence flag. */
+ state.end_sentence = -2;
+ continue;
+ }
+ else
+ {
+ /* Otherwise, an extra space is added
+ in _add_next. */
+ state.space.end = 0;
+ state.space_counter = 0;
+ if (*p == '\n' || *p == '\r')
+ text_append_n (&state.space, " ", 1);
+ else
+ text_append_n (&state.space, p, char_len);
+ state.space_counter++;
+ }
+
+ }
+ else /* Not at end of sentence. */
+ {
+ /* Only save the first space. */
+ if (state.unfilled || state.space_counter < 1)
+ {
+ if (*p == '\n' || *p == '\r')
+ {
+ if (!state.unfilled)
+ {
+ text_append_n (&state.space, " ", 1);
+ state.space_counter++;
+ }
+ else if (*p == '\n')
+ {
+ xspara__add_pending_word (&result, 0);
+ xspara__end_line ();
+ text_append (&result, "\n");
+ }
+ }
+ else
+ {
+ text_append_n (&state.space, p, char_len);
+ state.space_counter++;
+ }
+ }
+ }
+ }
+ }
+
+ /* If not enough space in the line for the pending space, start
+ a new line. */
+ if (state.counter + state.space_counter > state.max)
+ {
+ xspara__cut_line (&result);
+ }
+
+ if (!state.unfilled && *p == '\n' && state.keep_end_lines)
+ {
+ xspara__end_line ();
+ text_append (&result, "\n");
+ }
+ }
+ else /************** Not a white space character. *****************/
+ {
+ int width = wcwidth (wc);
+ /*************** Double width character. *********************/
+ if (width == 2)
+ {
+ state.last_letter = L'\0';
+
+ /* We allow a line break in between Chinese characters even if
+ there was no space between them, unlike single-width
+ characters. */
+
+ /* Append wc to state.word. */
+ text_append_n (&state.word, p, char_len);
+
+ state.word_counter += 2;
+
+ if (state.counter != 0
+ && state.counter + state.word_counter > state.max)
+ {
+ xspara__cut_line (&result);
+ }
+ /* If protect_spaces is on, accumulate the characters so that
+ they can be pushed onto the next line if necessary. */
+ if (!state.protect_spaces && !state.double_width_no_break)
+ {
+ xspara__add_pending_word (&result, 0);
+ state.end_sentence = -2;
+ }
+ }
+ /*************** Word character ******************************/
+ else if (width == 1)
+ {
+ char *added_word;
+ added_word = malloc (char_len + 1);
+ memcpy (added_word, p, char_len);
+ added_word[char_len] = '\0';
+
+ xspara__add_next (&result, added_word, char_len, 0);
+ free (added_word);
+
+ /* Now check if it is considered as an end of sentence, and
+ set state.end_sentence if it is. */
+
+ if (strchr (".?!", *p) && !state.unfilled)
+ {
+ /* Doesn't count if preceded by an upper-case letter. */
+ if (!iswupper (state.last_letter))
+ {
+ if (state.french_spacing)
+ state.end_sentence = -1;
+ else
+ state.end_sentence = 1;
+ }
+ }
+ else if (strchr ("\"')]", *p))
+ {
+ /* '"', '\'', ']' and ')' are ignored for the purpose
+ of deciding whether a full stop ends a sentence. */
+ }
+ else
+ {
+ /* Otherwise reset the end of sentence marker: a full stop
in
+ a string like "aaaa.bbbb" doesn't mark an end of
+ sentence. */
+ state.end_sentence = -2;
+ state.last_letter = wc;
+ }
+ }
+ else if (wc == L'\b')
+ {
+ /* Code to say that a following full stop (or question or
+ exclamation mark) may be an end of sentence. */
+ xspara_allow_end_sentence ();
+ }
+ else
+ {
+ /* Not printable, possibly a tab, or a combining character.
+ Add it to the pending word without increasing the column
+ count. */
+ text_append_n (&state.word, p, char_len);
+ }
+ }
+
+ p += char_len; len -= char_len;
+ }
+
+ if (result.space > 0)
+ return result.text;
+ else
+ return "";
+}
+
+
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- [8059] move XS modules,
gavinsmith0123 <=