#include #include #define INVALID 0x80000000 #define get(c) c = fgetc(input); \ if ((c) == EOF) return (unsigned int)EOF unsigned int fgetu8(FILE *input) { unsigned int c; int ch, i, iterations; if (input == NULL) return (unsigned int)EOF; get(c); if ((c & 0xFE) == 0xFC) { c &= 0x01; iterations = 5; } else if ((c & 0xFC) == 0xF8) { c &= 0x03; iterations = 4; } else if ((c & 0xF8) == 0xF0) { c &= 0x07; iterations = 3; } else if ((c & 0xF0) == 0xE0) { c &= 0x0F; iterations = 2; } else if ((c & 0xE0) == 0xC0) { c &= 0x1F; iterations = 1; } else if ((c & 0x80) == 0x80) return INVALID; else return c; for (i = 0; i < iterations; i++) { get(ch); if ((ch & 0xC0) != 0x80) return INVALID; c <<= 6; c |= ch & 0x3F; } return c; } int main() { unsigned int c; while((c=fgetu8(stdin))!=EOF) { if (c!=INVALID) { if (c<=0x7f) { putchar(c); } else { printf("\\[u%04X]",c); } } else { fputs("Error decoding UTF-8\n",stderr); exit(1); } } exit(0); }