1 | |
2 | |
3 | |
4 | |
5 | |
6 | |
7 | |
8 | |
9 | |
10 | |
11 | |
12 | |
13 | |
14 | |
15 | |
16 | |
17 | |
18 | |
19 | |
20 | #include <config.h> |
21 | |
22 | |
23 | #include "localcharset.h" |
24 | |
25 | #include <fcntlrpl_fcntl.h> |
26 | #include <stddef.h> |
27 | #include <stdio.h> |
28 | #include <string.h> |
29 | #include <stdlib.h> |
30 | |
31 | #if defined __APPLE__ && defined __MACH__ && HAVE_LANGINFO_CODESET1 |
32 | # define DARWIN7 /* Darwin 7 or newer, i.e. Mac OS X 10.3 or newer */ |
33 | #endif |
34 | |
35 | #if defined _WIN32 || defined __WIN32__ |
36 | # define WINDOWS_NATIVE |
37 | #endif |
38 | |
39 | #if defined __EMX__ |
40 | |
41 | # ifndef OS2 |
42 | # define OS2 |
43 | # endif |
44 | #endif |
45 | |
46 | #if !defined WINDOWS_NATIVE |
47 | # include <unistd.h> |
48 | # if HAVE_LANGINFO_CODESET1 |
49 | # include <langinfo.h> |
50 | # else |
51 | # if 0 /* see comment below */ |
52 | # include <locale.h> |
53 | # endif |
54 | # endif |
55 | # ifdef __CYGWIN__ |
56 | # define WIN32_LEAN_AND_MEAN |
57 | # include <windows.h> |
58 | # endif |
59 | #elif defined WINDOWS_NATIVE |
60 | # define WIN32_LEAN_AND_MEAN |
61 | # include <windows.h> |
62 | #endif |
63 | #if defined OS2 |
64 | # define INCL_DOS |
65 | # include <os2.h> |
66 | #endif |
67 | |
68 | |
69 | #if defined DARWIN7 |
70 | # include <xlocale.h> |
71 | #endif |
72 | |
73 | #if ENABLE_RELOCATABLE |
74 | # include "relocatable.h" |
75 | #else |
76 | # define relocate(pathname)(pathname) (pathname) |
77 | #endif |
78 | |
79 | |
80 | #ifndef LIBDIR"/usr/local/lib" |
81 | # include "configmake.h" |
82 | #endif |
83 | |
84 | |
85 | #ifndef O_NOFOLLOW0400000 |
86 | # define O_NOFOLLOW0400000 0 |
87 | #endif |
88 | |
89 | #if defined _WIN32 || defined __WIN32__ || defined __CYGWIN__ || defined __EMX__ || defined __DJGPP__ |
90 | |
91 | # define ISSLASH(C)((C) == '/') ((C) == '/' || (C) == '\\') |
92 | #endif |
93 | |
94 | #ifndef DIRECTORY_SEPARATOR'/' |
95 | # define DIRECTORY_SEPARATOR'/' '/' |
96 | #endif |
97 | |
98 | #ifndef ISSLASH |
99 | # define ISSLASH(C)((C) == '/') ((C) == DIRECTORY_SEPARATOR'/') |
100 | #endif |
101 | |
102 | #if HAVE_DECL_GETC_UNLOCKED1 |
103 | # undef getcgetc_unlocked |
104 | # define getcgetc_unlocked getc_unlocked |
105 | #endif |
106 | |
107 | |
108 | |
109 | |
110 | |
111 | |
112 | |
113 | #if __STDC__1 != 1 |
114 | # define volatile /* empty */ |
115 | #endif |
116 | |
117 | |
118 | |
119 | static const char * volatile charset_aliases; |
120 | |
121 | |
122 | static const char * |
123 | get_charset_aliases (void) |
124 | { |
125 | const char *cp; |
126 | |
127 | cp = charset_aliases; |
128 | if (cp == NULL((void*)0)) |
| 1 | Assuming 'cp' is equal to null | |
|
| |
129 | { |
130 | #if !(defined DARWIN7 || defined VMS || defined WINDOWS_NATIVE || defined __CYGWIN__) |
131 | const char *dir; |
132 | const char *base = "charset.alias"; |
133 | char *file_name; |
134 | |
135 | |
136 | |
137 | dir = getenv ("CHARSETALIASDIR"); |
138 | if (dir == NULL((void*)0) || dir[0] == '\0') |
| 3 | | Assuming 'dir' is not equal to null | |
|
| |
139 | dir = relocate (LIBDIR)("/usr/local/lib"); |
140 | |
141 | |
142 | { |
143 | size_t dir_len = strlen (dir); |
144 | size_t base_len = strlen (base); |
145 | int add_slash = (dir_len > 0 && !ISSLASH (dir[dir_len - 1])((dir[dir_len - 1]) == '/')); |
| 5 | | Assuming 'dir_len' is <= 0 | |
|
146 | file_name = (char *) malloc (dir_len + add_slash + base_len + 1); |
147 | if (file_name != NULL((void*)0)) |
| 6 | | Assuming 'file_name' is not equal to null | |
|
| |
148 | { |
149 | memcpy (file_name, dir, dir_len); |
150 | if (add_slash) |
| |
151 | file_name[dir_len] = DIRECTORY_SEPARATOR'/'; |
152 | memcpy (file_name + dir_len + add_slash, base, base_len + 1); |
153 | } |
154 | } |
155 | |
156 | if (file_name == NULL((void*)0)) |
| |
157 | |
158 | cp = ""; |
159 | else |
160 | { |
161 | int fd; |
162 | |
163 | |
164 | |
165 | |
166 | |
167 | |
168 | |
169 | |
170 | fd = open (file_name, |
171 | O_RDONLY00 | (HAVE_WORKING_O_NOFOLLOW1 ? O_NOFOLLOW0400000 : 0)); |
172 | if (fd < 0) |
| |
| |
173 | |
174 | cp = ""; |
175 | else |
176 | { |
177 | FILE *fp; |
178 | |
179 | fp = fdopen (fd, "r"); |
180 | if (fp == NULL((void*)0)) |
| 12 | | Assuming 'fp' is not equal to null | |
|
| |
181 | { |
182 | |
183 | close (fd); |
184 | cp = ""; |
185 | } |
186 | else |
187 | { |
188 | |
189 | char *res_ptr = NULL((void*)0); |
190 | size_t res_size = 0; |
191 | |
192 | for (;;) |
| 14 | | Loop condition is true. Entering loop body | |
|
| 23 | | Loop condition is true. Entering loop body | |
|
193 | { |
194 | int c; |
195 | char buf1[50+1]; |
196 | char buf2[50+1]; |
197 | size_t l1, l2; |
198 | char *old_res_ptr; |
199 | |
200 | c = getcgetc_unlocked (fp); |
201 | if (c == EOF(-1)) |
| |
| |
202 | break; |
203 | if (c == '\n' || c == ' ' || c == '\t') |
| |
| |
204 | continue; |
205 | if (c == '#') |
| |
| |
206 | { |
207 | |
208 | do |
209 | c = getcgetc_unlocked (fp); |
210 | while (!(c == EOF(-1) || c == '\n')); |
211 | if (c == EOF(-1)) |
212 | break; |
213 | continue; |
214 | } |
215 | ungetc (c, fp); |
216 | if (fscanf (fp, "%50s %50s", buf1, buf2) < 2) |
| |
| |
217 | break; |
218 | l1 = strlen (buf1); |
219 | l2 = strlen (buf2); |
220 | old_res_ptr = res_ptr; |
221 | if (res_size == 0) |
| |
| 28 | | Assuming 'res_size' is equal to 0 | |
|
| |
222 | { |
223 | res_size = l1 + 1 + l2 + 1; |
224 | res_ptr = (char *) malloc (res_size + 1); |
| |
225 | } |
226 | else |
227 | { |
228 | res_size += l1 + 1 + l2 + 1; |
229 | res_ptr = (char *) realloc (res_ptr, res_size + 1); |
230 | } |
231 | if (res_ptr == NULL((void*)0)) |
| 21 | | Assuming 'res_ptr' is not equal to null | |
|
| |
| 30 | | Assuming 'res_ptr' is not equal to null | |
|
| |
232 | { |
233 | |
234 | res_size = 0; |
235 | free (old_res_ptr); |
236 | break; |
237 | } |
238 | strcpy (res_ptr + res_size - (l2 + 1) - (l1 + 1), buf1); |
| 32 | | Potential leak of memory pointed to by 'old_res_ptr' |
|
239 | strcpy (res_ptr + res_size - (l2 + 1), buf2); |
240 | } |
241 | fcloserpl_fclose (fp); |
242 | if (res_size == 0) |
243 | cp = ""; |
244 | else |
245 | { |
246 | *(res_ptr + res_size) = '\0'; |
247 | cp = res_ptr; |
248 | } |
249 | } |
250 | } |
251 | |
252 | free (file_name); |
253 | } |
254 | |
255 | #else |
256 | |
257 | # if defined DARWIN7 |
258 | |
259 | |
260 | |
261 | cp = "ISO8859-1" "\0" "ISO-8859-1" "\0" |
262 | "ISO8859-2" "\0" "ISO-8859-2" "\0" |
263 | "ISO8859-4" "\0" "ISO-8859-4" "\0" |
264 | "ISO8859-5" "\0" "ISO-8859-5" "\0" |
265 | "ISO8859-7" "\0" "ISO-8859-7" "\0" |
266 | "ISO8859-9" "\0" "ISO-8859-9" "\0" |
267 | "ISO8859-13" "\0" "ISO-8859-13" "\0" |
268 | "ISO8859-15" "\0" "ISO-8859-15" "\0" |
269 | "KOI8-R" "\0" "KOI8-R" "\0" |
270 | "KOI8-U" "\0" "KOI8-U" "\0" |
271 | "CP866" "\0" "CP866" "\0" |
272 | "CP949" "\0" "CP949" "\0" |
273 | "CP1131" "\0" "CP1131" "\0" |
274 | "CP1251" "\0" "CP1251" "\0" |
275 | "eucCN" "\0" "GB2312" "\0" |
276 | "GB2312" "\0" "GB2312" "\0" |
277 | "eucJP" "\0" "EUC-JP" "\0" |
278 | "eucKR" "\0" "EUC-KR" "\0" |
279 | "Big5" "\0" "BIG5" "\0" |
280 | "Big5HKSCS" "\0" "BIG5-HKSCS" "\0" |
281 | "GBK" "\0" "GBK" "\0" |
282 | "GB18030" "\0" "GB18030" "\0" |
283 | "SJIS" "\0" "SHIFT_JIS" "\0" |
284 | "ARMSCII-8" "\0" "ARMSCII-8" "\0" |
285 | "PT154" "\0" "PT154" "\0" |
286 | |
287 | "*" "\0" "UTF-8" "\0"; |
288 | # endif |
289 | |
290 | # if defined VMS |
291 | |
292 | |
293 | |
294 | |
295 | |
296 | cp = "ISO8859-1" "\0" "ISO-8859-1" "\0" |
297 | "ISO8859-2" "\0" "ISO-8859-2" "\0" |
298 | "ISO8859-5" "\0" "ISO-8859-5" "\0" |
299 | "ISO8859-7" "\0" "ISO-8859-7" "\0" |
300 | "ISO8859-8" "\0" "ISO-8859-8" "\0" |
301 | "ISO8859-9" "\0" "ISO-8859-9" "\0" |
302 | |
303 | "eucJP" "\0" "EUC-JP" "\0" |
304 | "SJIS" "\0" "SHIFT_JIS" "\0" |
305 | "DECKANJI" "\0" "DEC-KANJI" "\0" |
306 | "SDECKANJI" "\0" "EUC-JP" "\0" |
307 | |
308 | "eucTW" "\0" "EUC-TW" "\0" |
309 | "DECHANYU" "\0" "DEC-HANYU" "\0" |
310 | "DECHANZI" "\0" "GB2312" "\0" |
311 | |
312 | "DECKOREAN" "\0" "EUC-KR" "\0"; |
313 | # endif |
314 | |
315 | # if defined WINDOWS_NATIVE || defined __CYGWIN__ |
316 | |
317 | |
318 | |
319 | |
320 | cp = "CP936" "\0" "GBK" "\0" |
321 | "CP1361" "\0" "JOHAB" "\0" |
322 | "CP20127" "\0" "ASCII" "\0" |
323 | "CP20866" "\0" "KOI8-R" "\0" |
324 | "CP20936" "\0" "GB2312" "\0" |
325 | "CP21866" "\0" "KOI8-RU" "\0" |
326 | "CP28591" "\0" "ISO-8859-1" "\0" |
327 | "CP28592" "\0" "ISO-8859-2" "\0" |
328 | "CP28593" "\0" "ISO-8859-3" "\0" |
329 | "CP28594" "\0" "ISO-8859-4" "\0" |
330 | "CP28595" "\0" "ISO-8859-5" "\0" |
331 | "CP28596" "\0" "ISO-8859-6" "\0" |
332 | "CP28597" "\0" "ISO-8859-7" "\0" |
333 | "CP28598" "\0" "ISO-8859-8" "\0" |
334 | "CP28599" "\0" "ISO-8859-9" "\0" |
335 | "CP28605" "\0" "ISO-8859-15" "\0" |
336 | "CP38598" "\0" "ISO-8859-8" "\0" |
337 | "CP51932" "\0" "EUC-JP" "\0" |
338 | "CP51936" "\0" "GB2312" "\0" |
339 | "CP51949" "\0" "EUC-KR" "\0" |
340 | "CP51950" "\0" "EUC-TW" "\0" |
341 | "CP54936" "\0" "GB18030" "\0" |
342 | "CP65001" "\0" "UTF-8" "\0"; |
343 | # endif |
344 | #endif |
345 | |
346 | charset_aliases = cp; |
347 | } |
348 | |
349 | return cp; |
350 | } |
351 | |
352 | |
353 | |
354 | |
355 | |
356 | |
357 | |
358 | #ifdef STATIC |
359 | STATIC |
360 | #endif |
361 | const char * |
362 | locale_charset (void) |
363 | { |
364 | const char *codeset; |
365 | const char *aliases; |
366 | |
367 | #if !(defined WINDOWS_NATIVE || defined OS2) |
368 | |
369 | # if HAVE_LANGINFO_CODESET1 |
370 | |
371 | |
372 | codeset = nl_langinfo (CODESETCODESET); |
373 | |
374 | # ifdef __CYGWIN__ |
375 | |
376 | |
377 | |
378 | if (codeset != NULL((void*)0) && strcmp (codeset, "US-ASCII") == 0) |
379 | { |
380 | const char *locale; |
381 | static char buf[2 + 10 + 1]; |
382 | |
383 | locale = getenv ("LC_ALL"); |
384 | if (locale == NULL((void*)0) || locale[0] == '\0') |
385 | { |
386 | locale = getenv ("LC_CTYPE"); |
387 | if (locale == NULL((void*)0) || locale[0] == '\0') |
388 | locale = getenv ("LANG"); |
389 | } |
390 | if (locale != NULL((void*)0) && locale[0] != '\0') |
391 | { |
392 | |
393 | |
394 | const char *dot = strchr (locale, '.'); |
395 | |
396 | if (dot != NULL((void*)0)) |
397 | { |
398 | const char *modifier; |
399 | |
400 | dot++; |
401 | |
402 | modifier = strchr (dot, '@'); |
403 | if (modifier == NULL((void*)0)) |
404 | return dot; |
405 | if (modifier - dot < sizeof (buf)) |
406 | { |
407 | memcpy (buf, dot, modifier - dot); |
408 | buf [modifier - dot] = '\0'; |
409 | return buf; |
410 | } |
411 | } |
412 | } |
413 | |
414 | |
415 | |
416 | |
417 | |
418 | |
419 | |
420 | |
421 | |
422 | |
423 | |
424 | |
425 | sprintf (buf, "CP%u", GetACP ()); |
426 | codeset = buf; |
427 | } |
428 | # endif |
429 | |
430 | # else |
431 | |
432 | |
433 | const char *locale = NULL((void*)0); |
434 | |
435 | |
436 | |
437 | |
438 | |
439 | # if 0 |
440 | locale = setlocale (LC_CTYPE, NULL((void*)0)); |
441 | # endif |
442 | if (locale == NULL((void*)0) || locale[0] == '\0') |
443 | { |
444 | locale = getenv ("LC_ALL"); |
445 | if (locale == NULL((void*)0) || locale[0] == '\0') |
446 | { |
447 | locale = getenv ("LC_CTYPE"); |
448 | if (locale == NULL((void*)0) || locale[0] == '\0') |
449 | locale = getenv ("LANG"); |
450 | } |
451 | } |
452 | |
453 | |
454 | |
455 | |
456 | codeset = locale; |
457 | |
458 | # endif |
459 | |
460 | #elif defined WINDOWS_NATIVE |
461 | |
462 | static char buf[2 + 10 + 1]; |
463 | |
464 | |
465 | |
466 | |
467 | |
468 | |
469 | |
470 | |
471 | sprintf (buf, "CP%u", GetACP ()); |
472 | codeset = buf; |
473 | |
474 | #elif defined OS2 |
475 | |
476 | const char *locale; |
477 | static char buf[2 + 10 + 1]; |
478 | ULONG cp[3]; |
479 | ULONG cplen; |
480 | |
481 | |
482 | |
483 | locale = getenv ("LC_ALL"); |
484 | if (locale == NULL((void*)0) || locale[0] == '\0') |
485 | { |
486 | locale = getenv ("LC_CTYPE"); |
487 | if (locale == NULL((void*)0) || locale[0] == '\0') |
488 | locale = getenv ("LANG"); |
489 | } |
490 | if (locale != NULL((void*)0) && locale[0] != '\0') |
491 | { |
492 | |
493 | const char *dot = strchr (locale, '.'); |
494 | |
495 | if (dot != NULL((void*)0)) |
496 | { |
497 | const char *modifier; |
498 | |
499 | dot++; |
500 | |
501 | modifier = strchr (dot, '@'); |
502 | if (modifier == NULL((void*)0)) |
503 | return dot; |
504 | if (modifier - dot < sizeof (buf)) |
505 | { |
506 | memcpy (buf, dot, modifier - dot); |
507 | buf [modifier - dot] = '\0'; |
508 | return buf; |
509 | } |
510 | } |
511 | |
512 | |
513 | codeset = locale; |
514 | } |
515 | else |
516 | { |
517 | |
518 | if (DosQueryCp (sizeof (cp), cp, &cplen)) |
519 | codeset = ""; |
520 | else |
521 | { |
522 | sprintf (buf, "CP%u", cp[0]); |
523 | codeset = buf; |
524 | } |
525 | } |
526 | |
527 | #endif |
528 | |
529 | if (codeset == NULL((void*)0)) |
530 | |
531 | codeset = ""; |
532 | |
533 | |
534 | for (aliases = get_charset_aliases (); |
535 | *aliases != '\0'; |
536 | aliases += strlen (aliases) + 1, aliases += strlen (aliases) + 1) |
537 | if (strcmp (codeset, aliases) == 0 |
538 | || (aliases[0] == '*' && aliases[1] == '\0')) |
539 | { |
540 | codeset = aliases + strlen (aliases) + 1; |
541 | break; |
542 | } |
543 | |
544 | |
545 | |
546 | |
547 | if (codeset[0] == '\0') |
548 | codeset = "ASCII"; |
549 | |
550 | #ifdef DARWIN7 |
551 | |
552 | |
553 | if (strcmp (codeset, "UTF-8") == 0 && MB_CUR_MAX_L (uselocale (NULL((void*)0))) <= 1) |
554 | codeset = "ASCII"; |
555 | #endif |
556 | |
557 | return codeset; |
558 | } |