OLD | NEW |
| (Empty) |
1 /* | |
2 * xsltlocale.c: locale handling | |
3 * | |
4 * Reference: | |
5 * RFC 3066: Tags for the Identification of Languages | |
6 * http://www.ietf.org/rfc/rfc3066.txt | |
7 * ISO 639-1, ISO 3166-1 | |
8 * | |
9 * Author: Nick Wellnhofer | |
10 * winapi port: Roumen Petrov | |
11 */ | |
12 | |
13 #define IN_LIBXSLT | |
14 #include "libxslt.h" | |
15 | |
16 #include <string.h> | |
17 #include <libxml/xmlmemory.h> | |
18 | |
19 #include "xsltlocale.h" | |
20 #include "xsltutils.h" | |
21 | |
22 #if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 2 | |
23 #define newlocale __newlocale | |
24 #define freelocale __freelocale | |
25 #define strxfrm_l __strxfrm_l | |
26 #define LC_COLLATE_MASK (1 << LC_COLLATE) | |
27 #endif | |
28 | |
29 #define TOUPPER(c) (c & ~0x20) | |
30 #define TOLOWER(c) (c | 0x20) | |
31 #define ISALPHA(c) ((unsigned)(TOUPPER(c) - 'A') < 26) | |
32 | |
33 /*without terminating null character*/ | |
34 #define XSLTMAX_ISO639LANGLEN 8 | |
35 #define XSLTMAX_ISO3166CNTRYLEN 8 | |
36 /* <lang>-<cntry> */ | |
37 #define XSLTMAX_LANGTAGLEN (XSLTMAX_ISO639LANGLEN+1+XSLTMAX_ISO3166
CNTRYLEN) | |
38 | |
39 static const xmlChar* xsltDefaultRegion(const xmlChar *localeName); | |
40 | |
41 #ifdef XSLT_LOCALE_WINAPI | |
42 xmlRMutexPtr xsltLocaleMutex = NULL; | |
43 | |
44 struct xsltRFC1766Info_s { | |
45 /*note typedef unsigned char xmlChar !*/ | |
46 xmlChar tag[XSLTMAX_LANGTAGLEN+1]; | |
47 /*note typedef LCID xsltLocale !*/ | |
48 xsltLocale lcid; | |
49 }; | |
50 typedef struct xsltRFC1766Info_s xsltRFC1766Info; | |
51 | |
52 static int xsltLocaleListSize = 0; | |
53 static xsltRFC1766Info *xsltLocaleList = NULL; | |
54 | |
55 | |
56 static xsltLocale | |
57 xslt_locale_WINAPI(const xmlChar *languageTag) { | |
58 int k; | |
59 xsltRFC1766Info *p = xsltLocaleList; | |
60 | |
61 for (k=0; k<xsltLocaleListSize; k++, p++) | |
62 if (xmlStrcmp(p->tag, languageTag) == 0) return p->lcid; | |
63 return((xsltLocale)0); | |
64 } | |
65 | |
66 static void xsltEnumSupportedLocales(void); | |
67 #endif | |
68 | |
69 /** | |
70 * xsltFreeLocales: | |
71 * | |
72 * Cleanup function for the locale support on shutdown | |
73 */ | |
74 void | |
75 xsltFreeLocales(void) { | |
76 #ifdef XSLT_LOCALE_WINAPI | |
77 xmlRMutexLock(xsltLocaleMutex); | |
78 xmlFree(xsltLocaleList); | |
79 xsltLocaleList = NULL; | |
80 xmlRMutexUnlock(xsltLocaleMutex); | |
81 #endif | |
82 } | |
83 | |
84 /** | |
85 * xsltNewLocale: | |
86 * @languageTag: RFC 3066 language tag | |
87 * | |
88 * Creates a new locale of an opaque system dependent type based on the | |
89 * language tag. | |
90 * | |
91 * Returns the locale or NULL on error or if no matching locale was found | |
92 */ | |
93 xsltLocale | |
94 xsltNewLocale(const xmlChar *languageTag) { | |
95 #ifdef XSLT_LOCALE_XLOCALE | |
96 xsltLocale locale; | |
97 char localeName[XSLTMAX_LANGTAGLEN+6]; /* 6 chars for ".utf8\0" */ | |
98 const xmlChar *p = languageTag; | |
99 const char *region = NULL; | |
100 char *q = localeName; | |
101 int i, llen; | |
102 | |
103 /* Convert something like "pt-br" to "pt_BR.utf8" */ | |
104 | |
105 if (languageTag == NULL) | |
106 return(NULL); | |
107 | |
108 for (i=0; i<XSLTMAX_ISO639LANGLEN && ISALPHA(*p); ++i) | |
109 *q++ = TOLOWER(*p++); | |
110 | |
111 if (i == 0) | |
112 return(NULL); | |
113 | |
114 llen = i; | |
115 | |
116 if (*p) { | |
117 if (*p++ != '-') | |
118 return(NULL); | |
119 *q++ = '_'; | |
120 | |
121 for (i=0; i<XSLTMAX_ISO3166CNTRYLEN && ISALPHA(*p); ++i) | |
122 *q++ = TOUPPER(*p++); | |
123 | |
124 if (i == 0 || *p) | |
125 return(NULL); | |
126 | |
127 memcpy(q, ".utf8", 6); | |
128 locale = newlocale(LC_COLLATE_MASK, localeName, NULL); | |
129 if (locale != NULL) | |
130 return(locale); | |
131 | |
132 /* Continue without using country code */ | |
133 | |
134 q = localeName + llen; | |
135 } | |
136 | |
137 /* Try locale without territory, e.g. for Esperanto (eo) */ | |
138 | |
139 memcpy(q, ".utf8", 6); | |
140 locale = newlocale(LC_COLLATE_MASK, localeName, NULL); | |
141 if (locale != NULL) | |
142 return(locale); | |
143 | |
144 /* Try to find most common country for language */ | |
145 | |
146 if (llen != 2) | |
147 return(NULL); | |
148 | |
149 region = (char *)xsltDefaultRegion((xmlChar *)localeName); | |
150 if (region == NULL) | |
151 return(NULL); | |
152 | |
153 q = localeName + llen; | |
154 *q++ = '_'; | |
155 *q++ = region[0]; | |
156 *q++ = region[1]; | |
157 memcpy(q, ".utf8", 6); | |
158 locale = newlocale(LC_COLLATE_MASK, localeName, NULL); | |
159 | |
160 return(locale); | |
161 #endif | |
162 | |
163 #ifdef XSLT_LOCALE_WINAPI | |
164 { | |
165 xsltLocale locale = (xsltLocale)0; | |
166 xmlChar localeName[XSLTMAX_LANGTAGLEN+1]; | |
167 xmlChar *q = localeName; | |
168 const xmlChar *p = languageTag; | |
169 int i, llen; | |
170 const xmlChar *region = NULL; | |
171 | |
172 if (languageTag == NULL) goto end; | |
173 | |
174 xsltEnumSupportedLocales(); | |
175 | |
176 for (i=0; i<XSLTMAX_ISO639LANGLEN && ISALPHA(*p); ++i) | |
177 *q++ = TOLOWER(*p++); | |
178 if (i == 0) goto end; | |
179 | |
180 llen = i; | |
181 *q++ = '-'; | |
182 if (*p) { /*if country tag is given*/ | |
183 if (*p++ != '-') goto end; | |
184 | |
185 for (i=0; i<XSLTMAX_ISO3166CNTRYLEN && ISALPHA(*p); ++i) | |
186 *q++ = TOUPPER(*p++); | |
187 if (i == 0 || *p) goto end; | |
188 | |
189 *q = '\0'; | |
190 locale = xslt_locale_WINAPI(localeName); | |
191 if (locale != (xsltLocale)0) goto end; | |
192 } | |
193 /* Try to find most common country for language */ | |
194 region = xsltDefaultRegion(localeName); | |
195 if (region == NULL) goto end; | |
196 | |
197 strcpy(localeName + llen + 1, region); | |
198 locale = xslt_locale_WINAPI(localeName); | |
199 end: | |
200 return(locale); | |
201 } | |
202 #endif | |
203 | |
204 #ifdef XSLT_LOCALE_NONE | |
205 return(NULL); | |
206 #endif | |
207 } | |
208 | |
209 static const xmlChar* | |
210 xsltDefaultRegion(const xmlChar *localeName) { | |
211 xmlChar c; | |
212 /* region should be xmlChar, but gcc warns on all string assignments */ | |
213 const char *region = NULL; | |
214 | |
215 c = localeName[1]; | |
216 /* This is based on the locales from glibc 2.3.3 */ | |
217 | |
218 switch (localeName[0]) { | |
219 case 'a': | |
220 if (c == 'a' || c == 'm') region = "ET"; | |
221 else if (c == 'f') region = "ZA"; | |
222 else if (c == 'n') region = "ES"; | |
223 else if (c == 'r') region = "AE"; | |
224 else if (c == 'z') region = "AZ"; | |
225 break; | |
226 case 'b': | |
227 if (c == 'e') region = "BY"; | |
228 else if (c == 'g') region = "BG"; | |
229 else if (c == 'n') region = "BD"; | |
230 else if (c == 'r') region = "FR"; | |
231 else if (c == 's') region = "BA"; | |
232 break; | |
233 case 'c': | |
234 if (c == 'a') region = "ES"; | |
235 else if (c == 's') region = "CZ"; | |
236 else if (c == 'y') region = "GB"; | |
237 break; | |
238 case 'd': | |
239 if (c == 'a') region = "DK"; | |
240 else if (c == 'e') region = "DE"; | |
241 break; | |
242 case 'e': | |
243 if (c == 'l') region = "GR"; | |
244 else if (c == 'n' || c == 'o') region = "US"; | |
245 else if (c == 's' || c == 'u') region = "ES"; | |
246 else if (c == 't') region = "EE"; | |
247 break; | |
248 case 'f': | |
249 if (c == 'a') region = "IR"; | |
250 else if (c == 'i') region = "FI"; | |
251 else if (c == 'o') region = "FO"; | |
252 else if (c == 'r') region = "FR"; | |
253 break; | |
254 case 'g': | |
255 if (c == 'a') region = "IE"; | |
256 else if (c == 'l') region = "ES"; | |
257 else if (c == 'v') region = "GB"; | |
258 break; | |
259 case 'h': | |
260 if (c == 'e') region = "IL"; | |
261 else if (c == 'i') region = "IN"; | |
262 else if (c == 'r') region = "HT"; | |
263 else if (c == 'u') region = "HU"; | |
264 break; | |
265 case 'i': | |
266 if (c == 'd') region = "ID"; | |
267 else if (c == 's') region = "IS"; | |
268 else if (c == 't') region = "IT"; | |
269 else if (c == 'w') region = "IL"; | |
270 break; | |
271 case 'j': | |
272 if (c == 'a') region = "JP"; | |
273 break; | |
274 case 'k': | |
275 if (c == 'l') region = "GL"; | |
276 else if (c == 'o') region = "KR"; | |
277 else if (c == 'w') region = "GB"; | |
278 break; | |
279 case 'l': | |
280 if (c == 't') region = "LT"; | |
281 else if (c == 'v') region = "LV"; | |
282 break; | |
283 case 'm': | |
284 if (c == 'k') region = "MK"; | |
285 else if (c == 'l' || c == 'r') region = "IN"; | |
286 else if (c == 'n') region = "MN"; | |
287 else if (c == 's') region = "MY"; | |
288 else if (c == 't') region = "MT"; | |
289 break; | |
290 case 'n': | |
291 if (c == 'b' || c == 'n' || c == 'o') region = "NO"; | |
292 else if (c == 'e') region = "NP"; | |
293 else if (c == 'l') region = "NL"; | |
294 break; | |
295 case 'o': | |
296 if (c == 'm') region = "ET"; | |
297 break; | |
298 case 'p': | |
299 if (c == 'a') region = "IN"; | |
300 else if (c == 'l') region = "PL"; | |
301 else if (c == 't') region = "PT"; | |
302 break; | |
303 case 'r': | |
304 if (c == 'o') region = "RO"; | |
305 else if (c == 'u') region = "RU"; | |
306 break; | |
307 case 's': | |
308 switch (c) { | |
309 case 'e': region = "NO"; break; | |
310 case 'h': region = "YU"; break; | |
311 case 'k': region = "SK"; break; | |
312 case 'l': region = "SI"; break; | |
313 case 'o': region = "ET"; break; | |
314 case 'q': region = "AL"; break; | |
315 case 't': region = "ZA"; break; | |
316 case 'v': region = "SE"; break; | |
317 } | |
318 break; | |
319 case 't': | |
320 if (c == 'a' || c == 'e') region = "IN"; | |
321 else if (c == 'h') region = "TH"; | |
322 else if (c == 'i') region = "ER"; | |
323 else if (c == 'r') region = "TR"; | |
324 else if (c == 't') region = "RU"; | |
325 break; | |
326 case 'u': | |
327 if (c == 'k') region = "UA"; | |
328 else if (c == 'r') region = "PK"; | |
329 break; | |
330 case 'v': | |
331 if (c == 'i') region = "VN"; | |
332 break; | |
333 case 'w': | |
334 if (c == 'a') region = "BE"; | |
335 break; | |
336 case 'x': | |
337 if (c == 'h') region = "ZA"; | |
338 break; | |
339 case 'z': | |
340 if (c == 'h') region = "CN"; | |
341 else if (c == 'u') region = "ZA"; | |
342 break; | |
343 } | |
344 return((xmlChar *)region); | |
345 } | |
346 | |
347 /** | |
348 * xsltFreeLocale: | |
349 * @locale: the locale to free | |
350 * | |
351 * Frees a locale created with xsltNewLocale | |
352 */ | |
353 void | |
354 xsltFreeLocale(xsltLocale locale) { | |
355 #ifdef XSLT_LOCALE_XLOCALE | |
356 freelocale(locale); | |
357 #endif | |
358 } | |
359 | |
360 /** | |
361 * xsltStrxfrm: | |
362 * @locale: locale created with xsltNewLocale | |
363 * @string: UTF-8 string to transform | |
364 * | |
365 * Transforms a string according to locale. The transformed string must then be | |
366 * compared with xsltLocaleStrcmp and freed with xmlFree. | |
367 * | |
368 * Returns the transformed string or NULL on error | |
369 */ | |
370 xsltLocaleChar * | |
371 xsltStrxfrm(xsltLocale locale, const xmlChar *string) | |
372 { | |
373 #ifdef XSLT_LOCALE_NONE | |
374 return(NULL); | |
375 #else | |
376 size_t xstrlen, r; | |
377 xsltLocaleChar *xstr; | |
378 | |
379 #ifdef XSLT_LOCALE_XLOCALE | |
380 xstrlen = strxfrm_l(NULL, (const char *)string, 0, locale) + 1; | |
381 xstr = (xsltLocaleChar *) xmlMalloc(xstrlen); | |
382 if (xstr == NULL) { | |
383 xsltTransformError(NULL, NULL, NULL, | |
384 "xsltStrxfrm : out of memory error\n"); | |
385 return(NULL); | |
386 } | |
387 | |
388 r = strxfrm_l((char *)xstr, (const char *)string, xstrlen, locale); | |
389 #endif | |
390 | |
391 #ifdef XSLT_LOCALE_WINAPI | |
392 xstrlen = MultiByteToWideChar(CP_UTF8, 0, string, -1, NULL, 0); | |
393 if (xstrlen == 0) { | |
394 xsltTransformError(NULL, NULL, NULL, "xsltStrxfrm : MultiByteToWideChar
check failed\n"); | |
395 return(NULL); | |
396 } | |
397 xstr = (xsltLocaleChar*) xmlMalloc(xstrlen * sizeof(xsltLocaleChar)); | |
398 if (xstr == NULL) { | |
399 xsltTransformError(NULL, NULL, NULL, "xsltStrxfrm : out of memory\n"); | |
400 return(NULL); | |
401 } | |
402 r = MultiByteToWideChar(CP_UTF8, 0, string, -1, xstr, xstrlen); | |
403 if (r == 0) { | |
404 xsltTransformError(NULL, NULL, NULL, "xsltStrxfrm : MultiByteToWideChar
failed\n"); | |
405 xmlFree(xstr); | |
406 return(NULL); | |
407 } | |
408 return(xstr); | |
409 #endif /* XSLT_LOCALE_WINAPI */ | |
410 | |
411 if (r >= xstrlen) { | |
412 xsltTransformError(NULL, NULL, NULL, "xsltStrxfrm : strxfrm failed\n"); | |
413 xmlFree(xstr); | |
414 return(NULL); | |
415 } | |
416 | |
417 return(xstr); | |
418 #endif /* XSLT_LOCALE_NONE */ | |
419 } | |
420 | |
421 /** | |
422 * xsltLocaleStrcmp: | |
423 * @locale: a locale identifier | |
424 * @str1: a string transformed with xsltStrxfrm | |
425 * @str2: a string transformed with xsltStrxfrm | |
426 * | |
427 * Compares two strings transformed with xsltStrxfrm | |
428 * | |
429 * Returns a value < 0 if str1 sorts before str2, | |
430 * a value > 0 if str1 sorts after str2, | |
431 * 0 if str1 and str2 are equal wrt sorting | |
432 */ | |
433 int | |
434 xsltLocaleStrcmp(xsltLocale locale, const xsltLocaleChar *str1, const xsltLocale
Char *str2) { | |
435 (void)locale; | |
436 #ifdef XSLT_LOCALE_WINAPI | |
437 { | |
438 int ret; | |
439 if (str1 == str2) return(0); | |
440 if (str1 == NULL) return(-1); | |
441 if (str2 == NULL) return(1); | |
442 ret = CompareStringW(locale, 0, str1, -1, str2, -1); | |
443 if (ret == 0) { | |
444 xsltTransformError(NULL, NULL, NULL, "xsltLocaleStrcmp : CompareStringW
fail\n"); | |
445 return(0); | |
446 } | |
447 return(ret - 2); | |
448 } | |
449 #else | |
450 return(xmlStrcmp(str1, str2)); | |
451 #endif | |
452 } | |
453 | |
454 #ifdef XSLT_LOCALE_WINAPI | |
455 /** | |
456 * xsltCountSupportedLocales: | |
457 * @lcid: not used | |
458 * | |
459 * callback used to count locales | |
460 * | |
461 * Returns TRUE | |
462 */ | |
463 BOOL CALLBACK | |
464 xsltCountSupportedLocales(LPSTR lcid) { | |
465 (void) lcid; | |
466 ++xsltLocaleListSize; | |
467 return(TRUE); | |
468 } | |
469 | |
470 /** | |
471 * xsltIterateSupportedLocales: | |
472 * @lcid: not used | |
473 * | |
474 * callback used to track locales | |
475 * | |
476 * Returns TRUE if not at the end of the array | |
477 */ | |
478 BOOL CALLBACK | |
479 xsltIterateSupportedLocales(LPSTR lcid) { | |
480 static int count = 0; | |
481 xmlChar iso639lang [XSLTMAX_ISO639LANGLEN +1]; | |
482 xmlChar iso3136ctry[XSLTMAX_ISO3166CNTRYLEN+1]; | |
483 int k, l; | |
484 xsltRFC1766Info *p = xsltLocaleList + count; | |
485 | |
486 k = sscanf(lcid, "%lx", (long*)&p->lcid); | |
487 if (k < 1) goto end; | |
488 /*don't count terminating null character*/ | |
489 k = GetLocaleInfoA(p->lcid, LOCALE_SISO639LANGNAME , iso639lang , sizeof(iso
639lang )); | |
490 if (--k < 1) goto end; | |
491 l = GetLocaleInfoA(p->lcid, LOCALE_SISO3166CTRYNAME, iso3136ctry, sizeof(iso
3136ctry)); | |
492 if (--l < 1) goto end; | |
493 | |
494 { /*fill results*/ | |
495 xmlChar *q = p->tag; | |
496 memcpy(q, iso639lang, k); | |
497 q += k; | |
498 *q++ = '-'; | |
499 memcpy(q, iso3136ctry, l); | |
500 q += l; | |
501 *q = '\0'; | |
502 } | |
503 ++count; | |
504 end: | |
505 return((count < xsltLocaleListSize) ? TRUE : FALSE); | |
506 } | |
507 | |
508 | |
509 static void | |
510 xsltEnumSupportedLocales(void) { | |
511 xmlRMutexLock(xsltLocaleMutex); | |
512 if (xsltLocaleListSize <= 0) { | |
513 size_t len; | |
514 | |
515 EnumSystemLocalesA(xsltCountSupportedLocales, LCID_SUPPORTED); | |
516 | |
517 len = xsltLocaleListSize * sizeof(xsltRFC1766Info); | |
518 xsltLocaleList = xmlMalloc(len); | |
519 memset(xsltLocaleList, 0, len); | |
520 EnumSystemLocalesA(xsltIterateSupportedLocales, LCID_SUPPORTED); | |
521 } | |
522 xmlRMutexUnlock(xsltLocaleMutex); | |
523 } | |
524 | |
525 #endif /*def XSLT_LOCALE_WINAPI*/ | |
OLD | NEW |