--- ChangeLog.orig Mon Dec 27 16:31:25 2010 +++ ChangeLog Mon Dec 27 15:40:53 2010 @@ -10,7 +10,7 @@ * lib/unigbrk/gbrkprop.h: New file. * lib/unigbrk/uc-gbrk-prop.c: New file. - New module unigbrk/uc-is-grapheme-break'. + New module 'unigbrk/uc-is-grapheme-break'. * modules/unigbrk/uc-is-grapheme-break: New file. * modules/unigbrk/uc-is-grapheme-break-tests: New file. * lib/unigbrk/uc-is-grapheme-break.c: New file. --- lib/gen-uni-tables.c.orig Mon Dec 27 16:31:26 2010 +++ lib/gen-uni-tables.c Mon Dec 27 16:04:28 2010 @@ -25,9 +25,9 @@ /usr/local/share/Unidata/Blocks.txt \ /usr/local/share/Unidata/PropList-3.0.1.txt \ /usr/local/share/Unidata/EastAsianWidth.txt \ - /usr/local/share/Unidata/GraphemeBreakProperty.txt \ /usr/local/share/Unidata/LineBreak.txt \ /usr/local/share/Unidata/WordBreakProperty.txt \ + /usr/local/share/Unidata/GraphemeBreakProperty.txt \ /usr/local/share/Unidata/CompositionExclusions.txt \ /usr/local/share/Unidata/SpecialCasing.txt \ /usr/local/share/Unidata/CaseFolding.txt \ @@ -5144,232 +5144,6 @@ /* ========================================================================= */ -/* Grapheme break property. */ - -/* Possible values of the Grapheme_Cluster_Break property. */ -enum -{ - GBP_OTHER = 0, - GBP_CR = 1, - GBP_LF = 2, - GBP_CONTROL = 3, - GBP_EXTEND = 4, - GBP_PREPEND = 5, - GBP_SPACINGMARK = 6, - GBP_L = 7, - GBP_V = 8, - GBP_T = 9, - GBP_LV = 10, - GBP_LVT = 11 -}; - -/* Construction of sparse 3-level tables. */ -#define TABLE gbp_table -#define ELEMENT unsigned char -#define DEFAULT GBP_OTHER -#define xmalloc malloc -#define xrealloc realloc -#include "3level.h" - -/* The grapheme break property from the GraphemeBreakProperty.txt file. */ -int unicode_org_gbp[0x110000]; - -/* Output the per-character grapheme break property table. */ -static void -output_gbp (const char *filename, const char *version) -{ - FILE *stream; - unsigned int ch, i; - struct gbp_table t; - unsigned int level1_offset, level2_offset, level3_offset; - - stream = fopen (filename, "w"); - if (stream == NULL) - { - fprintf (stderr, "cannot open '%s' for writing\n", filename); - exit (1); - } - - fprintf (stream, "/* DO NOT EDIT! GENERATED AUTOMATICALLY! */\n"); - fprintf (stream, "/* Grapheme break property of Unicode characters. */\n"); - fprintf (stream, "/* Generated automatically by gen-uni-tables.c for Unicode %s. */\n", - version); - - t.p = 7; - t.q = 9; - gbp_table_init (&t); - - for (ch = 0; ch < 0x110000; ch++) - gbp_table_add (&t, ch, unicode_org_gbp[ch]); - - gbp_table_finalize (&t); - - /* Offsets in t.result, in memory of this process. */ - level1_offset = - 5 * sizeof (uint32_t); - level2_offset = - 5 * sizeof (uint32_t) - + t.level1_size * sizeof (uint32_t); - level3_offset = - 5 * sizeof (uint32_t) - + t.level1_size * sizeof (uint32_t) - + (t.level2_size << t.q) * sizeof (uint32_t); - - for (i = 0; i < 5; i++) - fprintf (stream, "#define gbrkprop_header_%d %d\n", i, - ((uint32_t *) t.result)[i]); - fprintf (stream, "static const\n"); - fprintf (stream, "struct\n"); - fprintf (stream, " {\n"); - fprintf (stream, " int level1[%zu];\n", t.level1_size); - fprintf (stream, " short level2[%zu << %d];\n", t.level2_size, t.q); - fprintf (stream, " unsigned char level3[(%zu << %d) / 2];\n", - t.level3_size, t.p); - fprintf (stream, " }\n"); - fprintf (stream, "unigbrkprop =\n"); - fprintf (stream, "{\n"); - fprintf (stream, " {"); - if (t.level1_size > 8) - fprintf (stream, "\n "); - for (i = 0; i < t.level1_size; i++) - { - uint32_t offset; - if (i > 0 && (i % 8) == 0) - fprintf (stream, "\n "); - offset = ((uint32_t *) (t.result + level1_offset))[i]; - if (offset == 0) - fprintf (stream, " %5d", -1); - else - fprintf (stream, " %5zu", - (offset - level2_offset) / sizeof (uint32_t)); - if (i+1 < t.level1_size) - fprintf (stream, ","); - } - if (t.level1_size > 8) - fprintf (stream, "\n "); - fprintf (stream, " },\n"); - fprintf (stream, " {"); - if (t.level2_size << t.q > 8) - fprintf (stream, "\n "); - for (i = 0; i < t.level2_size << t.q; i++) - { - uint32_t offset; - if (i > 0 && (i % 8) == 0) - fprintf (stream, "\n "); - offset = ((uint32_t *) (t.result + level2_offset))[i]; - if (offset == 0) - fprintf (stream, " %5d", -1); - else - fprintf (stream, " %5zu", - (offset - level3_offset) / sizeof (uint8_t) / 2); - if (i+1 < t.level2_size << t.q) - fprintf (stream, ","); - } - if (t.level2_size << t.q > 8) - fprintf (stream, "\n "); - fprintf (stream, " },\n"); - fprintf (stream, " {"); - if (t.level3_size << t.p > 8) - fprintf (stream, "\n "); - for (i = 0; i < (t.level3_size << t.p) / 2; i++) - { - unsigned char *p = (unsigned char *) (t.result + level3_offset); - unsigned char value0 = p[i * 2]; - unsigned char value1 = p[i * 2 + 1]; - if (i > 0 && (i % 8) == 0) - fprintf (stream, "\n "); - fprintf (stream, " 0x%02x%s", (value1 << 4) + value0, - (i+1 < (t.level3_size << t.p) / 2 ? "," : "")); - } - if (t.level3_size << t.p > 8) - fprintf (stream, "\n "); - fprintf (stream, " }\n"); - fprintf (stream, "};\n"); - - if (ferror (stream) || fclose (stream)) - { - fprintf (stderr, "error writing to '%s'\n", filename); - exit (1); - } -} - -/* Stores in unicode_org_gbp[] the grapheme breaking property from the - GraphemeBreakProperty.txt file. */ -static void -fill_org_gbp (const char *graphemebreakproperty_filename) -{ - unsigned int i; - FILE *stream; - int lineno = 0; - - for (i = 0; i < 0x110000; i++) - unicode_org_gbp[i] = GBP_OTHER; - - stream = fopen (graphemebreakproperty_filename, "r"); - if (stream == NULL) - { - fprintf (stderr, "error during fopen of '%s'\n", - graphemebreakproperty_filename); - exit (1); - } - - for (;;) - { - char buf[200+1]; - unsigned int i1, i2; - char padding[200+1]; - char propname[200+1]; - int propvalue; - - lineno++; - if (fscanf (stream, "%200[^\n]\n", buf) < 1) - break; - - if (buf[0] == '\0' || buf[0] == '#') - continue; - - if (sscanf (buf, "%X..%X%[ ;]%[^ ]", &i1, &i2, padding, propname) != 4) - { - if (sscanf (buf, "%X%[ ;]%[^ ]", &i1, padding, propname) != 3) - { - fprintf (stderr, "parse error in '%s'\n", - graphemebreakproperty_filename); - exit (1); - } - i2 = i1; - } -#define PROP(name,value) \ - if (strcmp (propname, name) == 0) propvalue = value; else - PROP ("CR", GBP_CR) - PROP ("LF", GBP_LF) - PROP ("Control", GBP_CONTROL) - PROP ("Extend", GBP_EXTEND) - PROP ("Prepend", GBP_PREPEND) - PROP ("SpacingMark", GBP_SPACINGMARK) - PROP ("L", GBP_L) - PROP ("V", GBP_V) - PROP ("T", GBP_T) - PROP ("LV", GBP_LV) - PROP ("LVT", GBP_LVT) -#undef PROP - { - fprintf (stderr, "unknown property value '%s' in %s:%d\n", propname, - graphemebreakproperty_filename, lineno); - exit (1); - } - if (!(i1 <= i2 && i2 < 0x110000)) - abort (); - - for (i = i1; i <= i2; i++) - unicode_org_gbp[i] = propvalue; - } - if (ferror (stream) || fclose (stream)) - { - fprintf (stderr, "error reading from '%s'\n", graphemebreakproperty_filename); - exit (1); - } -} - /* Line breaking classification. */ enum @@ -6972,6 +6746,234 @@ /* ========================================================================= */ +/* Grapheme break property. */ + +/* Possible values of the Grapheme_Cluster_Break property. */ +enum +{ + GBP_OTHER = 0, + GBP_CR = 1, + GBP_LF = 2, + GBP_CONTROL = 3, + GBP_EXTEND = 4, + GBP_PREPEND = 5, + GBP_SPACINGMARK = 6, + GBP_L = 7, + GBP_V = 8, + GBP_T = 9, + GBP_LV = 10, + GBP_LVT = 11 +}; + +/* Construction of sparse 3-level tables. */ +#define TABLE gbp_table +#define ELEMENT unsigned char +#define DEFAULT GBP_OTHER +#define xmalloc malloc +#define xrealloc realloc +#include "3level.h" + +/* The grapheme break property from the GraphemeBreakProperty.txt file. */ +int unicode_org_gbp[0x110000]; + +/* Output the per-character grapheme break property table. */ +static void +output_gbp_table (const char *filename, const char *version) +{ + FILE *stream; + unsigned int ch, i; + struct gbp_table t; + unsigned int level1_offset, level2_offset, level3_offset; + + stream = fopen (filename, "w"); + if (stream == NULL) + { + fprintf (stderr, "cannot open '%s' for writing\n", filename); + exit (1); + } + + fprintf (stream, "/* DO NOT EDIT! GENERATED AUTOMATICALLY! */\n"); + fprintf (stream, "/* Grapheme break property of Unicode characters. */\n"); + fprintf (stream, "/* Generated automatically by gen-uni-tables.c for Unicode %s. */\n", + version); + + t.p = 7; + t.q = 9; + gbp_table_init (&t); + + for (ch = 0; ch < 0x110000; ch++) + gbp_table_add (&t, ch, unicode_org_gbp[ch]); + + gbp_table_finalize (&t); + + /* Offsets in t.result, in memory of this process. */ + level1_offset = + 5 * sizeof (uint32_t); + level2_offset = + 5 * sizeof (uint32_t) + + t.level1_size * sizeof (uint32_t); + level3_offset = + 5 * sizeof (uint32_t) + + t.level1_size * sizeof (uint32_t) + + (t.level2_size << t.q) * sizeof (uint32_t); + + for (i = 0; i < 5; i++) + fprintf (stream, "#define gbrkprop_header_%d %d\n", i, + ((uint32_t *) t.result)[i]); + fprintf (stream, "static const\n"); + fprintf (stream, "struct\n"); + fprintf (stream, " {\n"); + fprintf (stream, " int level1[%zu];\n", t.level1_size); + fprintf (stream, " short level2[%zu << %d];\n", t.level2_size, t.q); + fprintf (stream, " unsigned char level3[(%zu << %d) / 2];\n", + t.level3_size, t.p); + fprintf (stream, " }\n"); + fprintf (stream, "unigbrkprop =\n"); + fprintf (stream, "{\n"); + fprintf (stream, " {"); + if (t.level1_size > 8) + fprintf (stream, "\n "); + for (i = 0; i < t.level1_size; i++) + { + uint32_t offset; + if (i > 0 && (i % 8) == 0) + fprintf (stream, "\n "); + offset = ((uint32_t *) (t.result + level1_offset))[i]; + if (offset == 0) + fprintf (stream, " %5d", -1); + else + fprintf (stream, " %5zu", + (offset - level2_offset) / sizeof (uint32_t)); + if (i+1 < t.level1_size) + fprintf (stream, ","); + } + if (t.level1_size > 8) + fprintf (stream, "\n "); + fprintf (stream, " },\n"); + fprintf (stream, " {"); + if (t.level2_size << t.q > 8) + fprintf (stream, "\n "); + for (i = 0; i < t.level2_size << t.q; i++) + { + uint32_t offset; + if (i > 0 && (i % 8) == 0) + fprintf (stream, "\n "); + offset = ((uint32_t *) (t.result + level2_offset))[i]; + if (offset == 0) + fprintf (stream, " %5d", -1); + else + fprintf (stream, " %5zu", + (offset - level3_offset) / sizeof (uint8_t) / 2); + if (i+1 < t.level2_size << t.q) + fprintf (stream, ","); + } + if (t.level2_size << t.q > 8) + fprintf (stream, "\n "); + fprintf (stream, " },\n"); + fprintf (stream, " {"); + if (t.level3_size << t.p > 8) + fprintf (stream, "\n "); + for (i = 0; i < (t.level3_size << t.p) / 2; i++) + { + unsigned char *p = (unsigned char *) (t.result + level3_offset); + unsigned char value0 = p[i * 2]; + unsigned char value1 = p[i * 2 + 1]; + if (i > 0 && (i % 8) == 0) + fprintf (stream, "\n "); + fprintf (stream, " 0x%02x%s", (value1 << 4) + value0, + (i+1 < (t.level3_size << t.p) / 2 ? "," : "")); + } + if (t.level3_size << t.p > 8) + fprintf (stream, "\n "); + fprintf (stream, " }\n"); + fprintf (stream, "};\n"); + + if (ferror (stream) || fclose (stream)) + { + fprintf (stderr, "error writing to '%s'\n", filename); + exit (1); + } +} + +/* Stores in unicode_org_gbp[] the grapheme breaking property from the + GraphemeBreakProperty.txt file. */ +static void +fill_org_gbp (const char *graphemebreakproperty_filename) +{ + unsigned int i; + FILE *stream; + int lineno = 0; + + for (i = 0; i < 0x110000; i++) + unicode_org_gbp[i] = GBP_OTHER; + + stream = fopen (graphemebreakproperty_filename, "r"); + if (stream == NULL) + { + fprintf (stderr, "error during fopen of '%s'\n", + graphemebreakproperty_filename); + exit (1); + } + + for (;;) + { + char buf[200+1]; + unsigned int i1, i2; + char padding[200+1]; + char propname[200+1]; + int propvalue; + + lineno++; + if (fscanf (stream, "%200[^\n]\n", buf) < 1) + break; + + if (buf[0] == '\0' || buf[0] == '#') + continue; + + if (sscanf (buf, "%X..%X%[ ;]%[^ ]", &i1, &i2, padding, propname) != 4) + { + if (sscanf (buf, "%X%[ ;]%[^ ]", &i1, padding, propname) != 3) + { + fprintf (stderr, "parse error in '%s'\n", + graphemebreakproperty_filename); + exit (1); + } + i2 = i1; + } +#define PROP(name,value) \ + if (strcmp (propname, name) == 0) propvalue = value; else + PROP ("CR", GBP_CR) + PROP ("LF", GBP_LF) + PROP ("Control", GBP_CONTROL) + PROP ("Extend", GBP_EXTEND) + PROP ("Prepend", GBP_PREPEND) + PROP ("SpacingMark", GBP_SPACINGMARK) + PROP ("L", GBP_L) + PROP ("V", GBP_V) + PROP ("T", GBP_T) + PROP ("LV", GBP_LV) + PROP ("LVT", GBP_LVT) +#undef PROP + { + fprintf (stderr, "unknown property value '%s' in %s:%d\n", propname, + graphemebreakproperty_filename, lineno); + exit (1); + } + if (!(i1 <= i2 && i2 < 0x110000)) + abort (); + + for (i = i1; i <= i2; i++) + unicode_org_gbp[i] = propvalue; + } + if (ferror (stream) || fclose (stream)) + { + fprintf (stderr, "error reading from '%s'\n", graphemebreakproperty_filename); + exit (1); + } +} + +/* ========================================================================= */ + /* Maximum number of characters into which a single Unicode character can be decomposed. */ #define MAX_DECOMP_LENGTH 18 @@ -8506,9 +8508,9 @@ const char *blocks_filename; const char *proplist30_filename; const char *eastasianwidth_filename; - const char *graphemebreakproperty_filename; const char *linebreak_filename; const char *wordbreakproperty_filename; + const char *graphemebreakproperty_filename; const char *compositionexclusions_filename; const char *specialcasing_filename; const char *casefolding_filename; @@ -8516,7 +8518,7 @@ if (argc != 15) { - fprintf (stderr, "Usage: %s UnicodeData.txt PropList.txt DerivedCoreProperties.txt Scripts.txt Blocks.txt PropList-3.0.1.txt EastAsianWidth.txt GraphemeBreakProperty.txt LineBreak.txt WordBreakProperty.txt CompositionExclusions.txt SpecialCasing.txt CaseFolding.txt version\n", + fprintf (stderr, "Usage: %s UnicodeData.txt PropList.txt DerivedCoreProperties.txt Scripts.txt Blocks.txt PropList-3.0.1.txt EastAsianWidth.txt LineBreak.txt WordBreakProperty.txt GraphemeBreakProperty.txt CompositionExclusions.txt SpecialCasing.txt CaseFolding.txt version\n", argv[0]); exit (1); } @@ -8528,9 +8530,9 @@ blocks_filename = argv[5]; proplist30_filename = argv[6]; eastasianwidth_filename = argv[7]; - graphemebreakproperty_filename = argv[8]; - linebreak_filename = argv[9]; - wordbreakproperty_filename = argv[10]; + linebreak_filename = argv[8]; + wordbreakproperty_filename = argv[9]; + graphemebreakproperty_filename = argv[10]; compositionexclusions_filename = argv[11]; specialcasing_filename = argv[12]; casefolding_filename = argv[13]; @@ -8544,9 +8546,9 @@ fill_scripts (scripts_filename); fill_blocks (blocks_filename); fill_width (eastasianwidth_filename); - fill_org_gbp (graphemebreakproperty_filename); fill_org_lbp (linebreak_filename); fill_org_wbp (wordbreakproperty_filename); + fill_org_gbp (graphemebreakproperty_filename); fill_composition_exclusions (compositionexclusions_filename); fill_casing_rules (specialcasing_filename); fill_casefolding_rules (casefolding_filename); @@ -8571,8 +8573,6 @@ output_ident_properties (version); output_old_ctype (version); - output_gbp ("unigbrk/gbrkprop.h", version); - debug_output_lbrk_tables ("unilbrk/lbrkprop.txt"); debug_output_org_lbrk_tables ("unilbrk/lbrkprop_org.txt"); output_lbrk_tables ("unilbrk/lbrkprop1.h", "unilbrk/lbrkprop2.h", version); @@ -8581,6 +8581,8 @@ debug_output_org_wbrk_tables ("uniwbrk/wbrkprop_org.txt"); output_wbrk_tables ("uniwbrk/wbrkprop.h", version); + output_gbp_table ("unigbrk/gbrkprop.h", version); + output_decomposition_tables ("uninorm/decomposition-table1.h", "uninorm/decomposition-table2.h", version); debug_output_composition_tables ("uninorm/composition.txt"); output_composition_tables ("uninorm/composition-table.gperf", version); @@ -8611,9 +8613,9 @@ /gfs/petix/Volumes/ExtData/www-archive/software/i18n/unicode/ftp.unicode.org/ArchiveVersions/5.1.0/ucd/Blocks.txt \ /gfs/petix/Volumes/ExtData/www-archive/software/i18n/unicode/ftp.unicode.org/ArchiveVersions/3.0.1/PropList-3.0.1.txt \ /gfs/petix/Volumes/ExtData/www-archive/software/i18n/unicode/ftp.unicode.org/ArchiveVersions/5.1.0/ucd/EastAsianWidth.txt \ - /gfs/petix/Volumes/ExtData/www-archive/software/i18n/unicode/ftp.unicode.org/ArchiveVersions/5.1.0/ucd/auxiliary/GraphemeBreakProperty.txt \ /gfs/petix/Volumes/ExtData/www-archive/software/i18n/unicode/ftp.unicode.org/ArchiveVersions/5.1.0/ucd/LineBreak.txt \ /gfs/petix/Volumes/ExtData/www-archive/software/i18n/unicode/ftp.unicode.org/ArchiveVersions/5.1.0/ucd/auxiliary/WordBreakProperty.txt \ + /gfs/petix/Volumes/ExtData/www-archive/software/i18n/unicode/ftp.unicode.org/ArchiveVersions/5.1.0/ucd/auxiliary/GraphemeBreakProperty.txt \ /gfs/petix/Volumes/ExtData/www-archive/software/i18n/unicode/ftp.unicode.org/ArchiveVersions/5.1.0/ucd/CompositionExclusions.txt \ /gfs/petix/Volumes/ExtData/www-archive/software/i18n/unicode/ftp.unicode.org/ArchiveVersions/5.1.0/ucd/SpecialCasing.txt \ /gfs/petix/Volumes/ExtData/www-archive/software/i18n/unicode/ftp.unicode.org/ArchiveVersions/5.1.0/ucd/CaseFolding.txt \ --- lib/unigbrk.in.h.orig Mon Dec 27 16:31:26 2010 +++ lib/unigbrk.in.h Mon Dec 27 16:06:29 2010 @@ -1,6 +1,6 @@ /* Grapheme cluster breaks in Unicode strings. Copyright (C) 2010 Free Software Foundation, Inc. - Written by Ben Pfaff , 2010. + Written by Ben Pfaff , 2010. This program is free software: you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published @@ -18,9 +18,6 @@ #ifndef _UNIGBRK_H #define _UNIGBRK_H -/* Get size_t. */ -#include - /* Get bool. */ #include --- modules/unigbrk/base.orig Mon Dec 27 16:31:26 2010 +++ modules/unigbrk/base Mon Dec 27 16:21:59 2010 @@ -10,7 +10,7 @@ stdbool configure.ac: -gl_LIBUNISTRING_LIBHEADER([0.9], [unigbrk.h]) +gl_LIBUNISTRING_LIBHEADER([0.9.4], [unigbrk.h]) Makefile.am: BUILT_SOURCES += $(LIBUNISTRING_UNIGBRK_H) --- modules/unigbrk/uc-gbrk-prop.orig Mon Dec 27 16:31:26 2010 +++ modules/unigbrk/uc-gbrk-prop Mon Dec 27 16:22:04 2010 @@ -9,7 +9,7 @@ unigbrk/base configure.ac: -gl_LIBUNISTRING_MODULE([0.9], [unigbrk/uc-gbrk-prop]) +gl_LIBUNISTRING_MODULE([0.9.4], [unigbrk/uc-gbrk-prop]) Makefile.am: if LIBUNISTRING_COMPILE_UNIGBRK_UC_GBRK_PROP --- modules/unigbrk/uc-is-grapheme-break.orig Mon Dec 27 16:31:26 2010 +++ modules/unigbrk/uc-is-grapheme-break Mon Dec 27 16:22:09 2010 @@ -9,7 +9,7 @@ unigbrk/uc-gbrk-prop configure.ac: -gl_LIBUNISTRING_MODULE([0.9], [unigbrk/uc-is-grapheme-break]) +gl_LIBUNISTRING_MODULE([0.9.4], [unigbrk/uc-is-grapheme-break]) Makefile.am: if LIBUNISTRING_COMPILE_UNIGBRK_UC_IS_GRAPHEME_BREAK Changing permissions from 100644 to 100755