#include #include #include #include int main () { // U+21234; in CJK Ideograph Extension B // in UTF-16 form: 0xD844 0xDE34 const char buf[4] = { 0xF0, 0xA1, 0x88, 0xB4 }; wchar_t wbuf[4] = { 0xFFFE, 0xFFFE, 0xFFFE, 0xFFFE }; const char *inptr = buf; char rbuf[6]; size_t ret, ret1, ret2; size_t i; #ifdef _AIX if (setlocale (LC_ALL, "ZH_CN.UTF-8") == NULL) #else if (setlocale (LC_ALL, "zh_CN.UTF-8") == NULL) #endif { printf ("setlocale failed\n"); return 1; } /* Test mbrtowc */ ret = mbrtowc (&wbuf[0], inptr, buf + sizeof (buf) - inptr, NULL); printf ("ret = %d, wbuf[0] = %x\n", (int) ret, (unsigned int) wbuf[0]); if ((int) ret > 0 && (int) ret < 4) { inptr += ret; ret = mbrtowc (&wbuf[1], inptr, buf + sizeof (buf) - inptr, NULL); printf ("ret = %d, wbuf[1] = %x\n", (int) ret, (unsigned int) wbuf[1]); if (ret > 0) { inptr += ret; /* Test wcrtomb */ ret1 = wcrtomb (rbuf, wbuf[0], NULL); printf ("ret1 = %d\n", (int) ret1); if ((int) ret1 >= 0) { printf ("rbuf ="); for (i = 0; i < ret1; i++) printf (" %02X", (unsigned char) rbuf[i]); printf ("\n"); ret2 = wcrtomb (rbuf + ret1, wbuf[1], NULL); printf ("ret2 = %d\n", (int) ret2); if ((int) ret2 >= 0) { printf ("rbuf ="); for (i = 0; i < ret1 + ret2; i++) printf (" %02X", (unsigned char) rbuf[i]); printf ("\n"); } } } } else { /* Test wcrtomb */ ret1 = wcrtomb (rbuf, wbuf[0], NULL); printf ("ret1 = %d\n", (int) ret1); if ((int) ret1 >= 0) { printf ("rbuf ="); for (i = 0; i < ret1; i++) printf (" %02X", (unsigned char) rbuf[i]); printf ("\n"); } } return 0; } /* Result on glibc: wchar_t[] is UCS-4 ret = 4, wbuf[0] = 21234 ret1 = 4 rbuf = F0 A1 88 B4 Result on Cygwin 1.7.5: wchar_t[] is UTF-16 ret = 3, wbuf[0] = d844 ret = 1, wbuf[1] = de34 ret1 = 0 rbuf = ret2 = 4 rbuf = F0 A1 88 B4 Result on AIX 5.1: ret = -1, wbuf[0] = fffe Result on AIX 5.2, 5.3, 6.1: wchar_t[] is UCS-2 ret = -1, wbuf[0] = fffd */