=== modified file 'src/alloc.c' --- src/alloc.c 2011-11-20 03:07:02 +0000 +++ src/alloc.c 2011-11-26 02:01:23 +0000 @@ -136,9 +136,9 @@ /* Mark, unmark, query mark bit of a Lisp string. S must be a pointer to a struct Lisp_String. */ -#define MARK_STRING(S) ((S)->size |= ARRAY_MARK_FLAG) -#define UNMARK_STRING(S) ((S)->size &= ~ARRAY_MARK_FLAG) -#define STRING_MARKED_P(S) (((S)->size & ARRAY_MARK_FLAG) != 0) +#define MARK_STRING(S) ((S)->gcmarkbit = 1) +#define UNMARK_STRING(S) ((S)->gcmarkbit = 0) +#define STRING_MARKED_P(S) ((S)->gcmarkbit) #define VECTOR_MARK(V) ((V)->header.size |= ARRAY_MARK_FLAG) #define VECTOR_UNMARK(V) ((V)->header.size &= ~ARRAY_MARK_FLAG) @@ -1733,7 +1733,8 @@ a pointer to the `u.data' member of its sdata structure; the structure starts at a constant offset in front of that. */ -#define SDATA_OF_STRING(S) ((struct sdata *) ((S)->data - SDATA_DATA_OFFSET)) +#define SDATA_OF_STRING(S) ((S)->immbit ? (abort (), (struct sdata *) NULL) \ + : ((struct sdata *) ((S)->u.ptrdata - SDATA_DATA_OFFSET))) #ifdef GC_CHECK_STRING_OVERRUN @@ -1824,12 +1825,25 @@ string_bytes (struct Lisp_String *s) { EMACS_INT nbytes = - (s->size_byte < 0 ? s->size & ~ARRAY_MARK_FLAG : s->size_byte); + (s->size_byte < 0 ? s->size : s->size_byte); - if (!PURE_POINTER_P (s) - && s->data - && nbytes != SDATA_NBYTES (SDATA_OF_STRING (s))) - abort (); + if (s->immbit) + { + if (nbytes >= STRING_IMMEDIATE_MAX) + /* Impossible immediate string. */ + abort (); + } + else + { + if (nbytes < STRING_IMMEDIATE_MAX) + /* Impossible normal string. */ + abort (); + if (!PURE_POINTER_P (s) && + s->u.ptrdata && + nbytes != SDATA_NBYTES (SDATA_OF_STRING (s))) + /* Normal non-pure string with size mismatch. */ + abort (); + } return nbytes; } @@ -2000,7 +2014,7 @@ /* Determine the number of bytes needed to store NBYTES bytes of string data. */ needed = SDATA_SIZE (nbytes); - old_data = s->data ? SDATA_OF_STRING (s) : NULL; + old_data = s->u.ptrdata ? SDATA_OF_STRING (s) : NULL; old_nbytes = GC_STRING_BYTES (s); MALLOC_BLOCK_INPUT; @@ -2060,13 +2074,11 @@ MALLOC_UNBLOCK_INPUT; data->string = s; - s->data = SDATA_DATA (data); + s->u.ptrdata = SDATA_DATA (data); #ifdef GC_CHECK_STRING_BYTES SDATA_NBYTES (data) = nbytes; #endif - s->size = nchars; - s->size_byte = nbytes; - s->data[nbytes] = '\0'; + s->u.ptrdata[nbytes] = '\0'; #ifdef GC_CHECK_STRING_OVERRUN memcpy ((char *) data + needed, string_overrun_cookie, GC_STRING_OVERRUN_COOKIE_SIZE); @@ -2109,21 +2121,20 @@ { struct Lisp_String *s = b->strings + i; - if (s->data) + if (STRING_MARKED_P (s)) + { + /* String is live; unmark it and its intervals. */ + UNMARK_STRING (s); + + if (!NULL_INTERVAL_P (s->intervals)) + UNMARK_BALANCE_INTERVALS (s->intervals); + + ++total_strings; + total_string_size += STRING_BYTES (s); + } + else { - /* String was not on free-list before. */ - if (STRING_MARKED_P (s)) - { - /* String is live; unmark it and its intervals. */ - UNMARK_STRING (s); - - if (!NULL_INTERVAL_P (s->intervals)) - UNMARK_BALANCE_INTERVALS (s->intervals); - - ++total_strings; - total_string_size += STRING_BYTES (s); - } - else + if (!s->immbit && s->u.ptrdata) { /* String is dead. Put it on the free-list. */ struct sdata *data = SDATA_OF_STRING (s); @@ -2138,20 +2149,13 @@ data->u.nbytes = GC_STRING_BYTES (s); #endif data->string = NULL; - - /* Reset the strings's `data' member so that we - know it's free. */ - s->data = NULL; - - /* Put the string on the free-list. */ - NEXT_FREE_LISP_STRING (s) = string_free_list; - string_free_list = s; - ++nfree; } - } - else - { - /* S was on the free-list before. Put it there again. */ + + /* Reset the strings's `data' member so that we know it's + free. This also zeroes an immediate string storage. */ + s->u.ptrdata = NULL; + + /* Put the string on the free-list. */ NEXT_FREE_LISP_STRING (s) = string_free_list; string_free_list = s; ++nfree; @@ -2284,7 +2288,7 @@ { xassert (tb != b || to < from); memmove (to, from, nbytes + GC_STRING_EXTRA); - to->string->data = SDATA_DATA (to); + to->string->u.ptrdata = SDATA_DATA (to); } /* Advance past the sdata we copied to. */ @@ -2533,7 +2537,15 @@ return empty_multibyte_string; s = allocate_string (); - allocate_string_data (s, nchars, nbytes); + s->size = nchars; + s->size_byte = nbytes; + if (nbytes < STRING_IMMEDIATE_MAX) + s->immbit = 1; + else + { + s->immbit = 0; + allocate_string_data (s, nchars, nbytes); + } XSETSTRING (string, s); string_chars_consed += nbytes; return string; @@ -3901,7 +3913,9 @@ return (offset >= 0 && offset % sizeof b->strings[0] == 0 && offset < (STRING_BLOCK_SIZE * sizeof b->strings[0]) - && ((struct Lisp_String *) p)->data != NULL); + /* Live immediate string always has at least one + non-zero byte, so this check will be passed. */ + && ((struct Lisp_String *) p)->u.ptrdata != NULL); } else return 0; @@ -4801,13 +4815,25 @@ struct Lisp_String *s; s = (struct Lisp_String *) pure_alloc (sizeof *s, Lisp_String); - s->data = (unsigned char *) find_string_data_in_pure (data, nbytes); - if (s->data == NULL) - { - s->data = (unsigned char *) pure_alloc (nbytes + 1, -1); - memcpy (s->data, data, nbytes); - s->data[nbytes] = '\0'; - } + + if (nbytes < STRING_IMMEDIATE_MAX) + { + memcpy (s->u.immdata, data, nbytes); + s->u.immdata[nbytes] = '\0'; + s->immbit = 1; + } + else + { + s->u.ptrdata = (unsigned char *) find_string_data_in_pure (data, nbytes); + if (s->u.ptrdata == NULL) + { + s->u.ptrdata = (unsigned char *) pure_alloc (nbytes + 1, -1); + memcpy (s->u.ptrdata, data, nbytes); + s->u.ptrdata[nbytes] = '\0'; + } + s->immbit = 0; + } + s->size = nchars; s->size_byte = multibyte ? nbytes : -1; s->intervals = NULL_INTERVAL; @@ -4828,7 +4854,19 @@ s = (struct Lisp_String *) pure_alloc (sizeof *s, Lisp_String); s->size = nchars; s->size_byte = -1; - s->data = (unsigned char *) data; + + if (nchars < STRING_IMMEDIATE_MAX) + { + memcpy (s->u.immdata, data, nchars); + s->u.immdata[nchars] = '\0'; + s->immbit = 1; + } + else + { + s->u.ptrdata = (unsigned char *) data; + s->immbit = 0; + } + s->intervals = NULL_INTERVAL; XSETSTRING (string, s); return string; === modified file 'src/fns.c' --- src/fns.c 2011-11-19 09:18:31 +0000 +++ src/fns.c 2011-11-25 10:42:04 +0000 @@ -2176,8 +2176,8 @@ int len = CHAR_STRING (charval, str); EMACS_INT size_byte = SBYTES (array); - if (INT_MULTIPLY_OVERFLOW (SCHARS (array), len) - || SCHARS (array) * len != size_byte) + if (INT_MULTIPLY_OVERFLOW (size, len) + || size * len != size_byte) error ("Attempt to change byte length of a string"); for (idx = 0; idx < size_byte; idx++) *p++ = str[idx % len]; === modified file 'src/lisp.h' --- src/lisp.h 2011-11-20 03:07:02 +0000 +++ src/lisp.h 2011-11-25 15:15:57 +0000 @@ -691,7 +691,9 @@ /* Convenience macros for dealing with Lisp strings. */ -#define SDATA(string) (XSTRING (string)->data + 0) +#define SDATA(string) (XSTRING (string)->immbit ? \ + (XSTRING (string)->u.immdata + 0) : \ + (XSTRING (string)->u.ptrdata + 0)) #define SREF(string, index) (SDATA (string)[index] + 0) #define SSET(string, index, new) (SDATA (string)[index] = (new)) #define SCHARS(string) (XSTRING (string)->size + 0) @@ -824,6 +826,9 @@ #define STRING_BYTES_BOUND \ min (MOST_POSITIVE_FIXNUM, (ptrdiff_t) min (SIZE_MAX, PTRDIFF_MAX) - 1) +/* Maximum amount of bytes, including '\0', in an immediate string. */ +#define STRING_IMMEDIATE_MAX (sizeof (unsigned char *)) + /* Mark STR as a unibyte string. */ #define STRING_SET_UNIBYTE(STR) \ do { if (EQ (STR, empty_multibyte_string)) \ @@ -843,14 +848,24 @@ /* Set text properties. */ #define STRING_SET_INTERVALS(STR, INT) (XSTRING (STR)->intervals = (INT)) -/* In a string or vector, the sign bit of the `size' is the gc mark bit */ - struct Lisp_String { - EMACS_INT size; - EMACS_INT size_byte; - INTERVAL intervals; /* text properties in this string */ - unsigned char *data; + EMACS_INT size : BITS_PER_EMACS_INT - 1; + EMACS_INT size_byte : BITS_PER_EMACS_INT - 1; + unsigned gcmarkbit : 1; + + /* 1 if this is a small immediate string with data stored within IMMDATA. + Otherwise this is a normal string with PTRDATA points to actual data. */ + unsigned immbit : 1; + + /* Text properties in this string. */ + INTERVAL intervals; + + /* Actual string data, depending on the value of IMMBIT. */ + union { + unsigned char *ptrdata; + unsigned char immdata[STRING_IMMEDIATE_MAX]; + } u; }; /* Header of vector-like objects. This documents the layout constraints on