Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(541)

Side by Side Diff: icu46/source/common/uloc_tag.c

Issue 5516007: Check in the pristine copy of ICU 4.6... (Closed) Base URL: svn://chrome-svn/chrome/trunk/deps/third_party/
Patch Set: Created 10 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « icu46/source/common/uloc.c ('k') | icu46/source/common/ulocimp.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Property Changes:
Added: svn:eol-style
+ LF
OLDNEW
(Empty)
1 /*
2 **********************************************************************
3 * Copyright (C) 2009-2010, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 **********************************************************************
6 */
7
8 #include "unicode/utypes.h"
9 #include "unicode/ures.h"
10 #include "unicode/putil.h"
11 #include "unicode/uloc.h"
12 #include "ustr_imp.h"
13 #include "cmemory.h"
14 #include "cstring.h"
15 #include "putilimp.h"
16 #include "uinvchar.h"
17
18 /* struct holding a single variant */
19 typedef struct VariantListEntry {
20 const char *variant;
21 struct VariantListEntry *next;
22 } VariantListEntry;
23
24 /* struct holding a single extension */
25 typedef struct ExtensionListEntry {
26 const char *key;
27 const char *value;
28 struct ExtensionListEntry *next;
29 } ExtensionListEntry;
30
31 #define MAXEXTLANG 3
32 typedef struct ULanguageTag {
33 char *buf; /* holding parsed subtags */
34 const char *language;
35 const char *extlang[MAXEXTLANG];
36 const char *script;
37 const char *region;
38 VariantListEntry *variants;
39 ExtensionListEntry *extensions;
40 const char *privateuse;
41 const char *grandfathered;
42 } ULanguageTag;
43
44 #define MINLEN 2
45 #define SEP '-'
46 #define PRIVATEUSE 'x'
47 #define LDMLEXT 'u'
48
49 #define LOCALE_SEP '_'
50 #define LOCALE_EXT_SEP '@'
51 #define LOCALE_KEYWORD_SEP ';'
52 #define LOCALE_KEY_TYPE_SEP '='
53
54 #define ISALPHA(c) (((c)>='A' && (c)<='Z') || ((c)>='a' && (c)<='z'))
55 #define ISNUMERIC(c) ((c)>='0' && (c)<='9')
56
57 static const char* EMPTY = "";
58 static const char* LANG_UND = "und";
59 static const char* PRIVATEUSE_KEY = "x";
60 static const char* _POSIX = "_POSIX";
61 static const char* POSIX_KEY = "va";
62 static const char* POSIX_VALUE = "posix";
63
64 #define LANG_UND_LEN 3
65
66 static const char* GRANDFATHERED[] = {
67 /* grandfathered preferred */
68 "art-lojban", "jbo",
69 "cel-gaulish", "",
70 "en-GB-oed", "",
71 "i-ami", "ami",
72 "i-bnn", "bnn",
73 "i-default", "",
74 "i-enochian", "",
75 "i-hak", "hak",
76 "i-klingon", "tlh",
77 "i-lux", "lb",
78 "i-mingo", "",
79 "i-navajo", "nv",
80 "i-pwn", "pwn",
81 "i-tao", "tao",
82 "i-tay", "tay",
83 "i-tsu", "tsu",
84 "no-bok", "nb",
85 "no-nyn", "nn",
86 "sgn-be-fr", "sfb",
87 "sgn-be-nl", "vgt",
88 "sgn-ch-de", "sgg",
89 "zh-guoyu", "cmn",
90 "zh-hakka", "hak",
91 "zh-min", "",
92 "zh-min-nan", "nan",
93 "zh-xiang", "hsn",
94 NULL, NULL
95 };
96
97 static const char* DEPRECATEDLANGS[] = {
98 /* deprecated new */
99 "iw", "he",
100 "ji", "yi",
101 "in", "id",
102 NULL, NULL
103 };
104
105 /*
106 * -------------------------------------------------
107 *
108 * These ultag_ functions may be exposed as APIs later
109 *
110 * -------------------------------------------------
111 */
112
113 static ULanguageTag*
114 ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* sta tus);
115
116 static void
117 ultag_close(ULanguageTag* langtag);
118
119 static const char*
120 ultag_getLanguage(const ULanguageTag* langtag);
121
122 #if 0
123 static const char*
124 ultag_getJDKLanguage(const ULanguageTag* langtag);
125 #endif
126
127 static const char*
128 ultag_getExtlang(const ULanguageTag* langtag, int32_t idx);
129
130 static int32_t
131 ultag_getExtlangSize(const ULanguageTag* langtag);
132
133 static const char*
134 ultag_getScript(const ULanguageTag* langtag);
135
136 static const char*
137 ultag_getRegion(const ULanguageTag* langtag);
138
139 static const char*
140 ultag_getVariant(const ULanguageTag* langtag, int32_t idx);
141
142 static int32_t
143 ultag_getVariantsSize(const ULanguageTag* langtag);
144
145 static const char*
146 ultag_getExtensionKey(const ULanguageTag* langtag, int32_t idx);
147
148 static const char*
149 ultag_getExtensionValue(const ULanguageTag* langtag, int32_t idx);
150
151 static int32_t
152 ultag_getExtensionsSize(const ULanguageTag* langtag);
153
154 static const char*
155 ultag_getPrivateUse(const ULanguageTag* langtag);
156
157 #if 0
158 static const char*
159 ultag_getGrandfathered(const ULanguageTag* langtag);
160 #endif
161
162 /*
163 * -------------------------------------------------
164 *
165 * Language subtag syntax validation functions
166 *
167 * -------------------------------------------------
168 */
169
170 static UBool
171 _isAlphaString(const char* s, int32_t len) {
172 int32_t i;
173 for (i = 0; i < len; i++) {
174 if (!ISALPHA(*(s + i))) {
175 return FALSE;
176 }
177 }
178 return TRUE;
179 }
180
181 static UBool
182 _isNumericString(const char* s, int32_t len) {
183 int32_t i;
184 for (i = 0; i < len; i++) {
185 if (!ISNUMERIC(*(s + i))) {
186 return FALSE;
187 }
188 }
189 return TRUE;
190 }
191
192 static UBool
193 _isAlphaNumericString(const char* s, int32_t len) {
194 int32_t i;
195 for (i = 0; i < len; i++) {
196 if (!ISALPHA(*(s + i)) && !ISNUMERIC(*(s + i))) {
197 return FALSE;
198 }
199 }
200 return TRUE;
201 }
202
203 static UBool
204 _isLanguageSubtag(const char* s, int32_t len) {
205 /*
206 * language = 2*3ALPHA ; shortest ISO 639 code
207 * ["-" extlang] ; sometimes followed by
208 * ; extended language subtags
209 * / 4ALPHA ; or reserved for future use
210 * / 5*8ALPHA ; or registered language subtag
211 */
212 if (len < 0) {
213 len = (int32_t)uprv_strlen(s);
214 }
215 if (len >= 2 && len <= 8 && _isAlphaString(s, len)) {
216 return TRUE;
217 }
218 return FALSE;
219 }
220
221 static UBool
222 _isExtlangSubtag(const char* s, int32_t len) {
223 /*
224 * extlang = 3ALPHA ; selected ISO 639 codes
225 * *2("-" 3ALPHA) ; permanently reserved
226 */
227 if (len < 0) {
228 len = (int32_t)uprv_strlen(s);
229 }
230 if (len == 3 && _isAlphaString(s, len)) {
231 return TRUE;
232 }
233 return FALSE;
234 }
235
236 static UBool
237 _isScriptSubtag(const char* s, int32_t len) {
238 /*
239 * script = 4ALPHA ; ISO 15924 code
240 */
241 if (len < 0) {
242 len = (int32_t)uprv_strlen(s);
243 }
244 if (len == 4 && _isAlphaString(s, len)) {
245 return TRUE;
246 }
247 return FALSE;
248 }
249
250 static UBool
251 _isRegionSubtag(const char* s, int32_t len) {
252 /*
253 * region = 2ALPHA ; ISO 3166-1 code
254 * / 3DIGIT ; UN M.49 code
255 */
256 if (len < 0) {
257 len = (int32_t)uprv_strlen(s);
258 }
259 if (len == 2 && _isAlphaString(s, len)) {
260 return TRUE;
261 }
262 if (len == 3 && _isNumericString(s, len)) {
263 return TRUE;
264 }
265 return FALSE;
266 }
267
268 static UBool
269 _isVariantSubtag(const char* s, int32_t len) {
270 /*
271 * variant = 5*8alphanum ; registered variants
272 * / (DIGIT 3alphanum)
273 */
274 if (len < 0) {
275 len = (int32_t)uprv_strlen(s);
276 }
277 if (len >= 5 && len <= 8 && _isAlphaString(s, len)) {
278 return TRUE;
279 }
280 if (len == 4 && ISNUMERIC(*s) && _isAlphaNumericString(s + 1, 3)) {
281 return TRUE;
282 }
283 return FALSE;
284 }
285
286 static UBool
287 _isExtensionSingleton(const char* s, int32_t len) {
288 /*
289 * extension = singleton 1*("-" (2*8alphanum))
290 */
291 if (len < 0) {
292 len = (int32_t)uprv_strlen(s);
293 }
294 if (len == 1 && ISALPHA(*s) && (uprv_tolower(*s) != PRIVATEUSE)) {
295 return TRUE;
296 }
297 return FALSE;
298 }
299
300 static UBool
301 _isExtensionSubtag(const char* s, int32_t len) {
302 /*
303 * extension = singleton 1*("-" (2*8alphanum))
304 */
305 if (len < 0) {
306 len = (int32_t)uprv_strlen(s);
307 }
308 if (len >= 2 && len <= 8 && _isAlphaNumericString(s, len)) {
309 return TRUE;
310 }
311 return FALSE;
312 }
313
314 static UBool
315 _isExtensionSubtags(const char* s, int32_t len) {
316 const char *p = s;
317 const char *pSubtag = NULL;
318
319 if (len < 0) {
320 len = (int32_t)uprv_strlen(s);
321 }
322
323 while ((p - s) < len) {
324 if (*p == SEP) {
325 if (pSubtag == NULL) {
326 return FALSE;
327 }
328 if (!_isExtensionSubtag(pSubtag, (int32_t)(p - pSubtag))) {
329 return FALSE;
330 }
331 pSubtag = NULL;
332 } else if (pSubtag == NULL) {
333 pSubtag = p;
334 }
335 p++;
336 }
337 if (pSubtag == NULL) {
338 return FALSE;
339 }
340 return _isExtensionSubtag(pSubtag, (int32_t)(p - pSubtag));
341 }
342
343 static UBool
344 _isPrivateuseValueSubtag(const char* s, int32_t len) {
345 /*
346 * privateuse = "x" 1*("-" (1*8alphanum))
347 */
348 if (len < 0) {
349 len = (int32_t)uprv_strlen(s);
350 }
351 if (len >= 1 && len <= 8 && _isAlphaNumericString(s, len)) {
352 return TRUE;
353 }
354 return FALSE;
355 }
356
357 static UBool
358 _isPrivateuseValueSubtags(const char* s, int32_t len) {
359 const char *p = s;
360 const char *pSubtag = NULL;
361
362 if (len < 0) {
363 len = (int32_t)uprv_strlen(s);
364 }
365
366 while ((p - s) < len) {
367 if (*p == SEP) {
368 if (pSubtag == NULL) {
369 return FALSE;
370 }
371 if (!_isPrivateuseValueSubtag(pSubtag, (int32_t)(p - pSubtag))) {
372 return FALSE;
373 }
374 pSubtag = NULL;
375 } else if (pSubtag == NULL) {
376 pSubtag = p;
377 }
378 p++;
379 }
380 if (pSubtag == NULL) {
381 return FALSE;
382 }
383 return _isPrivateuseValueSubtag(pSubtag, (int32_t)(p - pSubtag));
384 }
385
386 static UBool
387 _isLDMLKey(const char* s, int32_t len) {
388 if (len < 0) {
389 len = (int32_t)uprv_strlen(s);
390 }
391 if (len == 2 && _isAlphaNumericString(s, len)) {
392 return TRUE;
393 }
394 return FALSE;
395 }
396
397 static UBool
398 _isLDMLType(const char* s, int32_t len) {
399 if (len < 0) {
400 len = (int32_t)uprv_strlen(s);
401 }
402 if (len >= 3 && len <= 8 && _isAlphaNumericString(s, len)) {
403 return TRUE;
404 }
405 return FALSE;
406 }
407
408 /*
409 * -------------------------------------------------
410 *
411 * Helper functions
412 *
413 * -------------------------------------------------
414 */
415
416 static UBool
417 _addVariantToList(VariantListEntry **first, VariantListEntry *var) {
418 UBool bAdded = TRUE;
419
420 if (*first == NULL) {
421 var->next = NULL;
422 *first = var;
423 } else {
424 VariantListEntry *prev, *cur;
425 int32_t cmp;
426
427 /* reorder variants in alphabetical order */
428 prev = NULL;
429 cur = *first;
430 while (TRUE) {
431 if (cur == NULL) {
432 prev->next = var;
433 var->next = NULL;
434 break;
435 }
436 cmp = uprv_compareInvCharsAsAscii(var->variant, cur->variant);
437 if (cmp < 0) {
438 if (prev == NULL) {
439 *first = var;
440 } else {
441 prev->next = var;
442 }
443 var->next = cur;
444 break;
445 }
446 if (cmp == 0) {
447 /* duplicated variant */
448 bAdded = FALSE;
449 break;
450 }
451 prev = cur;
452 cur = cur->next;
453 }
454 }
455
456 return bAdded;
457 }
458
459
460 static UBool
461 _addExtensionToList(ExtensionListEntry **first, ExtensionListEntry *ext, UBool l ocaleToBCP) {
462 UBool bAdded = TRUE;
463
464 if (*first == NULL) {
465 ext->next = NULL;
466 *first = ext;
467 } else {
468 ExtensionListEntry *prev, *cur;
469 int32_t cmp;
470
471 /* reorder variants in alphabetical order */
472 prev = NULL;
473 cur = *first;
474 while (TRUE) {
475 if (cur == NULL) {
476 prev->next = ext;
477 ext->next = NULL;
478 break;
479 }
480 if (localeToBCP) {
481 /* special handling for locale to bcp conversion */
482 int32_t len, curlen;
483
484 len = (int32_t)uprv_strlen(ext->key);
485 curlen = (int32_t)uprv_strlen(cur->key);
486
487 if (len == 1 && curlen == 1) {
488 if (*(ext->key) == *(cur->key)) {
489 cmp = 0;
490 } else if (*(ext->key) == PRIVATEUSE) {
491 cmp = 1;
492 } else if (*(cur->key) == PRIVATEUSE) {
493 cmp = -1;
494 } else {
495 cmp = *(ext->key) - *(cur->key);
496 }
497 } else if (len == 1) {
498 cmp = *(ext->key) - LDMLEXT;
499 } else if (curlen == 1) {
500 cmp = LDMLEXT - *(cur->key);
501 } else {
502 cmp = uprv_compareInvCharsAsAscii(ext->key, cur->key);
503 }
504 } else {
505 cmp = uprv_compareInvCharsAsAscii(ext->key, cur->key);
506 }
507 if (cmp < 0) {
508 if (prev == NULL) {
509 *first = ext;
510 } else {
511 prev->next = ext;
512 }
513 ext->next = cur;
514 break;
515 }
516 if (cmp == 0) {
517 /* duplicated extension key */
518 bAdded = FALSE;
519 break;
520 }
521 prev = cur;
522 cur = cur->next;
523 }
524 }
525
526 return bAdded;
527 }
528
529 static void
530 _initializeULanguageTag(ULanguageTag* langtag) {
531 int32_t i;
532
533 langtag->buf = NULL;
534
535 langtag->language = EMPTY;
536 for (i = 0; i < MAXEXTLANG; i++) {
537 langtag->extlang[i] = NULL;
538 }
539
540 langtag->script = EMPTY;
541 langtag->region = EMPTY;
542
543 langtag->variants = NULL;
544 langtag->extensions = NULL;
545
546 langtag->grandfathered = EMPTY;
547 langtag->privateuse = EMPTY;
548 }
549
550 #define KEYTYPEDATA "keyTypeData"
551 #define KEYMAP "keyMap"
552 #define TYPEMAP "typeMap"
553 #define TYPEALIAS "typeAlias"
554 #define MAX_BCP47_SUBTAG_LEN 9 /* including null terminator */
555 #define MAX_LDML_KEY_LEN 22
556 #define MAX_LDML_TYPE_LEN 32
557
558 static int32_t
559 _ldmlKeyToBCP47(const char* key, int32_t keyLen,
560 char* bcpKey, int32_t bcpKeyCapacity,
561 UErrorCode *status) {
562 UResourceBundle *rb;
563 char keyBuf[MAX_LDML_KEY_LEN];
564 char bcpKeyBuf[MAX_BCP47_SUBTAG_LEN];
565 int32_t resultLen = 0;
566 int32_t i;
567 UErrorCode tmpStatus = U_ZERO_ERROR;
568 const UChar *uBcpKey;
569 int32_t bcpKeyLen;
570
571 if (keyLen < 0) {
572 keyLen = (int32_t)uprv_strlen(key);
573 }
574
575 if (keyLen >= sizeof(keyBuf)) {
576 /* no known valid LDML key exceeding 21 */
577 *status = U_ILLEGAL_ARGUMENT_ERROR;
578 return 0;
579 }
580
581 uprv_memcpy(keyBuf, key, keyLen);
582 keyBuf[keyLen] = 0;
583
584 /* to lower case */
585 for (i = 0; i < keyLen; i++) {
586 keyBuf[i] = uprv_tolower(keyBuf[i]);
587 }
588
589 rb = ures_openDirect(NULL, KEYTYPEDATA, status);
590 ures_getByKey(rb, KEYMAP, rb, status);
591
592 if (U_FAILURE(*status)) {
593 ures_close(rb);
594 return 0;
595 }
596
597 uBcpKey = ures_getStringByKey(rb, keyBuf, &bcpKeyLen, &tmpStatus);
598 if (U_SUCCESS(tmpStatus)) {
599 u_UCharsToChars(uBcpKey, bcpKeyBuf, bcpKeyLen);
600 bcpKeyBuf[bcpKeyLen] = 0;
601 resultLen = bcpKeyLen;
602 } else {
603 if (_isLDMLKey(key, keyLen)) {
604 uprv_memcpy(bcpKeyBuf, key, keyLen);
605 bcpKeyBuf[keyLen] = 0;
606 resultLen = keyLen;
607 } else {
608 /* mapping not availabe */
609 *status = U_ILLEGAL_ARGUMENT_ERROR;
610 }
611 }
612 ures_close(rb);
613
614 if (U_FAILURE(*status)) {
615 return 0;
616 }
617
618 uprv_memcpy(bcpKey, bcpKeyBuf, uprv_min(resultLen, bcpKeyCapacity));
619 return u_terminateChars(bcpKey, bcpKeyCapacity, resultLen, status);
620 }
621
622 static int32_t
623 _bcp47ToLDMLKey(const char* bcpKey, int32_t bcpKeyLen,
624 char* key, int32_t keyCapacity,
625 UErrorCode *status) {
626 UResourceBundle *rb;
627 char bcpKeyBuf[MAX_BCP47_SUBTAG_LEN];
628 int32_t resultLen = 0;
629 int32_t i;
630 const char *resKey = NULL;
631 UResourceBundle *mapData;
632
633 if (bcpKeyLen < 0) {
634 bcpKeyLen = (int32_t)uprv_strlen(bcpKey);
635 }
636
637 if (bcpKeyLen >= sizeof(bcpKeyBuf)) {
638 *status = U_ILLEGAL_ARGUMENT_ERROR;
639 return 0;
640 }
641
642 uprv_memcpy(bcpKeyBuf, bcpKey, bcpKeyLen);
643 bcpKeyBuf[bcpKeyLen] = 0;
644
645 /* to lower case */
646 for (i = 0; i < bcpKeyLen; i++) {
647 bcpKeyBuf[i] = uprv_tolower(bcpKeyBuf[i]);
648 }
649
650 rb = ures_openDirect(NULL, KEYTYPEDATA, status);
651 ures_getByKey(rb, KEYMAP, rb, status);
652 if (U_FAILURE(*status)) {
653 ures_close(rb);
654 return 0;
655 }
656
657 mapData = ures_getNextResource(rb, NULL, status);
658 while (U_SUCCESS(*status)) {
659 const UChar *uBcpKey;
660 char tmpBcpKeyBuf[MAX_BCP47_SUBTAG_LEN];
661 int32_t tmpBcpKeyLen;
662
663 uBcpKey = ures_getString(mapData, &tmpBcpKeyLen, status);
664 if (U_FAILURE(*status)) {
665 break;
666 }
667 u_UCharsToChars(uBcpKey, tmpBcpKeyBuf, tmpBcpKeyLen);
668 tmpBcpKeyBuf[tmpBcpKeyLen] = 0;
669 if (uprv_compareInvCharsAsAscii(bcpKeyBuf, tmpBcpKeyBuf) == 0) {
670 /* found a matching BCP47 key */
671 resKey = ures_getKey(mapData);
672 resultLen = (int32_t)uprv_strlen(resKey);
673 break;
674 }
675 if (!ures_hasNext(rb)) {
676 break;
677 }
678 ures_getNextResource(rb, mapData, status);
679 }
680 ures_close(mapData);
681 ures_close(rb);
682
683 if (U_FAILURE(*status)) {
684 return 0;
685 }
686
687 if (resKey == NULL) {
688 resKey = bcpKeyBuf;
689 resultLen = bcpKeyLen;
690 }
691
692 uprv_memcpy(key, resKey, uprv_min(resultLen, keyCapacity));
693 return u_terminateChars(key, keyCapacity, resultLen, status);
694 }
695
696 static int32_t
697 _ldmlTypeToBCP47(const char* key, int32_t keyLen,
698 const char* type, int32_t typeLen,
699 char* bcpType, int32_t bcpTypeCapacity,
700 UErrorCode *status) {
701 UResourceBundle *rb, *keyTypeData, *typeMapForKey;
702 char keyBuf[MAX_LDML_KEY_LEN];
703 char typeBuf[MAX_LDML_TYPE_LEN];
704 char bcpTypeBuf[MAX_BCP47_SUBTAG_LEN];
705 int32_t resultLen = 0;
706 int32_t i;
707 UErrorCode tmpStatus = U_ZERO_ERROR;
708 const UChar *uBcpType, *uCanonicalType;
709 int32_t bcpTypeLen, canonicalTypeLen;
710 UBool isTimezone = FALSE;
711
712 if (keyLen < 0) {
713 keyLen = (int32_t)uprv_strlen(key);
714 }
715 if (keyLen >= sizeof(keyBuf)) {
716 /* no known valid LDML key exceeding 21 */
717 *status = U_ILLEGAL_ARGUMENT_ERROR;
718 return 0;
719 }
720 uprv_memcpy(keyBuf, key, keyLen);
721 keyBuf[keyLen] = 0;
722
723 /* to lower case */
724 for (i = 0; i < keyLen; i++) {
725 keyBuf[i] = uprv_tolower(keyBuf[i]);
726 }
727 if (uprv_compareInvCharsAsAscii(keyBuf, "timezone") == 0) {
728 isTimezone = TRUE;
729 }
730
731 if (typeLen < 0) {
732 typeLen = (int32_t)uprv_strlen(type);
733 }
734 if (typeLen >= sizeof(typeBuf)) {
735 *status = U_ILLEGAL_ARGUMENT_ERROR;
736 return 0;
737 }
738
739 if (isTimezone) {
740 /* replace '/' with ':' */
741 for (i = 0; i < typeLen; i++) {
742 if (*(type + i) == '/') {
743 typeBuf[i] = ':';
744 } else {
745 typeBuf[i] = *(type + i);
746 }
747 }
748 typeBuf[typeLen] = 0;
749 type = &typeBuf[0];
750 }
751
752 keyTypeData = ures_openDirect(NULL, KEYTYPEDATA, status);
753 rb = ures_getByKey(keyTypeData, TYPEMAP, NULL, status);
754 if (U_FAILURE(*status)) {
755 ures_close(rb);
756 ures_close(keyTypeData);
757 return 0;
758 }
759
760 typeMapForKey = ures_getByKey(rb, keyBuf, NULL, &tmpStatus);
761 uBcpType = ures_getStringByKey(typeMapForKey, type, &bcpTypeLen, &tmpStatus) ;
762 if (U_SUCCESS(tmpStatus)) {
763 u_UCharsToChars(uBcpType, bcpTypeBuf, bcpTypeLen);
764 resultLen = bcpTypeLen;
765 } else if (tmpStatus == U_MISSING_RESOURCE_ERROR) {
766 /* is this type alias? */
767 tmpStatus = U_ZERO_ERROR;
768 ures_getByKey(keyTypeData, TYPEALIAS, rb, &tmpStatus);
769 ures_getByKey(rb, keyBuf, rb, &tmpStatus);
770 uCanonicalType = ures_getStringByKey(rb, type, &canonicalTypeLen, &tmpSt atus);
771 if (U_SUCCESS(tmpStatus)) {
772 u_UCharsToChars(uCanonicalType, typeBuf, canonicalTypeLen);
773 if (isTimezone) {
774 /* replace '/' with ':' */
775 for (i = 0; i < canonicalTypeLen; i++) {
776 if (typeBuf[i] == '/') {
777 typeBuf[i] = ':';
778 }
779 }
780 }
781 typeBuf[canonicalTypeLen] = 0;
782
783 /* look up the canonical type */
784 uBcpType = ures_getStringByKey(typeMapForKey, typeBuf, &bcpTypeLen, &tmpStatus);
785 if (U_SUCCESS(tmpStatus)) {
786 u_UCharsToChars(uBcpType, bcpTypeBuf, bcpTypeLen);
787 resultLen = bcpTypeLen;
788 }
789 }
790 if (tmpStatus == U_MISSING_RESOURCE_ERROR) {
791 if (_isLDMLType(type, typeLen)) {
792 uprv_memcpy(bcpTypeBuf, type, typeLen);
793 resultLen = typeLen;
794 } else {
795 /* mapping not availabe */
796 *status = U_ILLEGAL_ARGUMENT_ERROR;
797 }
798 }
799 } else {
800 *status = tmpStatus;
801 }
802 ures_close(rb);
803 ures_close(typeMapForKey);
804 ures_close(keyTypeData);
805
806 if (U_FAILURE(*status)) {
807 return 0;
808 }
809
810 uprv_memcpy(bcpType, bcpTypeBuf, uprv_min(resultLen, bcpTypeCapacity));
811 return u_terminateChars(bcpType, bcpTypeCapacity, resultLen, status);
812 }
813
814 static int32_t
815 _bcp47ToLDMLType(const char* key, int32_t keyLen,
816 const char* bcpType, int32_t bcpTypeLen,
817 char* type, int32_t typeCapacity,
818 UErrorCode *status) {
819 UResourceBundle *rb;
820 char keyBuf[MAX_LDML_KEY_LEN];
821 char bcpTypeBuf[MAX_BCP47_SUBTAG_LEN];
822 int32_t resultLen = 0;
823 int32_t i;
824 const char *resType = NULL;
825 UResourceBundle *mapData;
826 UErrorCode tmpStatus = U_ZERO_ERROR;
827 int32_t copyLen;
828
829 if (keyLen < 0) {
830 keyLen = (int32_t)uprv_strlen(key);
831 }
832
833 if (keyLen >= sizeof(keyBuf)) {
834 /* no known valid LDML key exceeding 21 */
835 *status = U_ILLEGAL_ARGUMENT_ERROR;
836 return 0;
837 }
838 uprv_memcpy(keyBuf, key, keyLen);
839 keyBuf[keyLen] = 0;
840
841 /* to lower case */
842 for (i = 0; i < keyLen; i++) {
843 keyBuf[i] = uprv_tolower(keyBuf[i]);
844 }
845
846
847 if (bcpTypeLen < 0) {
848 bcpTypeLen = (int32_t)uprv_strlen(bcpType);
849 }
850
851 if (bcpTypeLen >= sizeof(bcpTypeBuf)) {
852 *status = U_ILLEGAL_ARGUMENT_ERROR;
853 return 0;
854 }
855
856 uprv_memcpy(bcpTypeBuf, bcpType, bcpTypeLen);
857 bcpTypeBuf[bcpTypeLen] = 0;
858
859 /* to lower case */
860 for (i = 0; i < bcpTypeLen; i++) {
861 bcpTypeBuf[i] = uprv_tolower(bcpTypeBuf[i]);
862 }
863
864 rb = ures_openDirect(NULL, KEYTYPEDATA, status);
865 ures_getByKey(rb, TYPEMAP, rb, status);
866 if (U_FAILURE(*status)) {
867 ures_close(rb);
868 return 0;
869 }
870
871 ures_getByKey(rb, keyBuf, rb, &tmpStatus);
872 mapData = ures_getNextResource(rb, NULL, &tmpStatus);
873 while (U_SUCCESS(tmpStatus)) {
874 const UChar *uBcpType;
875 char tmpBcpTypeBuf[MAX_BCP47_SUBTAG_LEN];
876 int32_t tmpBcpTypeLen;
877
878 uBcpType = ures_getString(mapData, &tmpBcpTypeLen, &tmpStatus);
879 if (U_FAILURE(tmpStatus)) {
880 break;
881 }
882 u_UCharsToChars(uBcpType, tmpBcpTypeBuf, tmpBcpTypeLen);
883 tmpBcpTypeBuf[tmpBcpTypeLen] = 0;
884 if (uprv_compareInvCharsAsAscii(bcpTypeBuf, tmpBcpTypeBuf) == 0) {
885 /* found a matching BCP47 type */
886 resType = ures_getKey(mapData);
887 resultLen = (int32_t)uprv_strlen(resType);
888 break;
889 }
890 if (!ures_hasNext(rb)) {
891 break;
892 }
893 ures_getNextResource(rb, mapData, &tmpStatus);
894 }
895 ures_close(mapData);
896 ures_close(rb);
897
898 if (U_FAILURE(tmpStatus) && tmpStatus != U_MISSING_RESOURCE_ERROR) {
899 *status = tmpStatus;
900 return 0;
901 }
902
903 if (resType == NULL) {
904 resType = bcpTypeBuf;
905 resultLen = bcpTypeLen;
906 }
907
908 copyLen = uprv_min(resultLen, typeCapacity);
909 uprv_memcpy(type, resType, copyLen);
910
911 if (uprv_compareInvCharsAsAscii(keyBuf, "timezone") == 0) {
912 for (i = 0; i < copyLen; i++) {
913 if (*(type + i) == ':') {
914 *(type + i) = '/';
915 }
916 }
917 }
918
919 return u_terminateChars(type, typeCapacity, resultLen, status);
920 }
921
922 static int32_t
923 _appendLanguageToLanguageTag(const char* localeID, char* appendAt, int32_t capac ity, UBool strict, UErrorCode* status) {
924 char buf[ULOC_LANG_CAPACITY];
925 UErrorCode tmpStatus = U_ZERO_ERROR;
926 int32_t len, i;
927 int32_t reslen = 0;
928
929 if (U_FAILURE(*status)) {
930 return 0;
931 }
932
933 len = uloc_getLanguage(localeID, buf, sizeof(buf), &tmpStatus);
934 if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
935 if (strict) {
936 *status = U_ILLEGAL_ARGUMENT_ERROR;
937 return 0;
938 }
939 len = 0;
940 }
941
942 /* Note: returned language code is in lower case letters */
943
944 if (len == 0) {
945 if (reslen < capacity) {
946 uprv_memcpy(appendAt + reslen, LANG_UND, uprv_min(LANG_UND_LEN, capa city - reslen));
947 }
948 reslen += LANG_UND_LEN;
949 } else if (!_isLanguageSubtag(buf, len)) {
950 /* invalid language code */
951 if (strict) {
952 *status = U_ILLEGAL_ARGUMENT_ERROR;
953 return 0;
954 }
955 if (reslen < capacity) {
956 uprv_memcpy(appendAt + reslen, LANG_UND, uprv_min(LANG_UND_LEN, capa city - reslen));
957 }
958 reslen += LANG_UND_LEN;
959 } else {
960 /* resolve deprecated */
961 for (i = 0; DEPRECATEDLANGS[i] != NULL; i += 2) {
962 if (uprv_compareInvCharsAsAscii(buf, DEPRECATEDLANGS[i]) == 0) {
963 uprv_strcpy(buf, DEPRECATEDLANGS[i + 1]);
964 len = (int32_t)uprv_strlen(buf);
965 break;
966 }
967 }
968 if (reslen < capacity) {
969 uprv_memcpy(appendAt + reslen, buf, uprv_min(len, capacity - reslen) );
970 }
971 reslen += len;
972 }
973 u_terminateChars(appendAt, capacity, reslen, status);
974 return reslen;
975 }
976
977 static int32_t
978 _appendScriptToLanguageTag(const char* localeID, char* appendAt, int32_t capacit y, UBool strict, UErrorCode* status) {
979 char buf[ULOC_SCRIPT_CAPACITY];
980 UErrorCode tmpStatus = U_ZERO_ERROR;
981 int32_t len;
982 int32_t reslen = 0;
983
984 if (U_FAILURE(*status)) {
985 return 0;
986 }
987
988 len = uloc_getScript(localeID, buf, sizeof(buf), &tmpStatus);
989 if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
990 if (strict) {
991 *status = U_ILLEGAL_ARGUMENT_ERROR;
992 }
993 return 0;
994 }
995
996 if (len > 0) {
997 if (!_isScriptSubtag(buf, len)) {
998 /* invalid script code */
999 if (strict) {
1000 *status = U_ILLEGAL_ARGUMENT_ERROR;
1001 }
1002 return 0;
1003 } else {
1004 if (reslen < capacity) {
1005 *(appendAt + reslen) = SEP;
1006 }
1007 reslen++;
1008
1009 if (reslen < capacity) {
1010 uprv_memcpy(appendAt + reslen, buf, uprv_min(len, capacity - res len));
1011 }
1012 reslen += len;
1013 }
1014 }
1015 u_terminateChars(appendAt, capacity, reslen, status);
1016 return reslen;
1017 }
1018
1019 static int32_t
1020 _appendRegionToLanguageTag(const char* localeID, char* appendAt, int32_t capacit y, UBool strict, UErrorCode* status) {
1021 char buf[ULOC_COUNTRY_CAPACITY];
1022 UErrorCode tmpStatus = U_ZERO_ERROR;
1023 int32_t len;
1024 int32_t reslen = 0;
1025
1026 if (U_FAILURE(*status)) {
1027 return 0;
1028 }
1029
1030 len = uloc_getCountry(localeID, buf, sizeof(buf), &tmpStatus);
1031 if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
1032 if (strict) {
1033 *status = U_ILLEGAL_ARGUMENT_ERROR;
1034 }
1035 return 0;
1036 }
1037
1038 if (len > 0) {
1039 if (!_isRegionSubtag(buf, len)) {
1040 /* invalid region code */
1041 if (strict) {
1042 *status = U_ILLEGAL_ARGUMENT_ERROR;
1043 }
1044 return 0;
1045 } else {
1046 if (reslen < capacity) {
1047 *(appendAt + reslen) = SEP;
1048 }
1049 reslen++;
1050
1051 if (reslen < capacity) {
1052 uprv_memcpy(appendAt + reslen, buf, uprv_min(len, capacity - res len));
1053 }
1054 reslen += len;
1055 }
1056 }
1057 u_terminateChars(appendAt, capacity, reslen, status);
1058 return reslen;
1059 }
1060
1061 static int32_t
1062 _appendVariantsToLanguageTag(const char* localeID, char* appendAt, int32_t capac ity, UBool strict, UBool *hadPosix, UErrorCode* status) {
1063 char buf[ULOC_FULLNAME_CAPACITY];
1064 UErrorCode tmpStatus = U_ZERO_ERROR;
1065 int32_t len, i;
1066 int32_t reslen = 0;
1067
1068 if (U_FAILURE(*status)) {
1069 return 0;
1070 }
1071
1072 len = uloc_getVariant(localeID, buf, sizeof(buf), &tmpStatus);
1073 if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
1074 if (strict) {
1075 *status = U_ILLEGAL_ARGUMENT_ERROR;
1076 }
1077 return 0;
1078 }
1079
1080 if (len > 0) {
1081 char *p, *pVar;
1082 UBool bNext = TRUE;
1083 VariantListEntry *var;
1084 VariantListEntry *varFirst = NULL;
1085
1086 pVar = NULL;
1087 p = buf;
1088 while (bNext) {
1089 if (*p == SEP || *p == LOCALE_SEP || *p == 0) {
1090 if (*p == 0) {
1091 bNext = FALSE;
1092 } else {
1093 *p = 0; /* terminate */
1094 }
1095 if (pVar == NULL) {
1096 if (strict) {
1097 *status = U_ILLEGAL_ARGUMENT_ERROR;
1098 break;
1099 }
1100 /* ignore empty variant */
1101 } else {
1102 /* ICU uses upper case letters for variants, but
1103 the canonical format is lowercase in BCP47 */
1104 for (i = 0; *(pVar + i) != 0; i++) {
1105 *(pVar + i) = uprv_tolower(*(pVar + i));
1106 }
1107
1108 /* validate */
1109 if (_isVariantSubtag(pVar, -1)) {
1110 if (uprv_strcmp(pVar,POSIX_VALUE)) {
1111 /* emit the variant to the list */
1112 var = uprv_malloc(sizeof(VariantListEntry));
1113 if (var == NULL) {
1114 *status = U_MEMORY_ALLOCATION_ERROR;
1115 break;
1116 }
1117 var->variant = pVar;
1118 if (!_addVariantToList(&varFirst, var)) {
1119 /* duplicated variant */
1120 uprv_free(var);
1121 if (strict) {
1122 *status = U_ILLEGAL_ARGUMENT_ERROR;
1123 break;
1124 }
1125 }
1126 } else {
1127 /* Special handling for POSIX variant, need to remem ber that we had it and then */
1128 /* treat it like an extension later. */
1129 *hadPosix = TRUE;
1130 }
1131 } else if (strict) {
1132 *status = U_ILLEGAL_ARGUMENT_ERROR;
1133 break;
1134 }
1135 }
1136 /* reset variant starting position */
1137 pVar = NULL;
1138 } else if (pVar == NULL) {
1139 pVar = p;
1140 }
1141 p++;
1142 }
1143
1144 if (U_SUCCESS(*status)) {
1145 if (varFirst != NULL) {
1146 int32_t varLen;
1147
1148 /* write out sorted/validated/normalized variants to the target */
1149 var = varFirst;
1150 while (var != NULL) {
1151 if (reslen < capacity) {
1152 *(appendAt + reslen) = SEP;
1153 }
1154 reslen++;
1155 varLen = (int32_t)uprv_strlen(var->variant);
1156 if (reslen < capacity) {
1157 uprv_memcpy(appendAt + reslen, var->variant, uprv_min(va rLen, capacity - reslen));
1158 }
1159 reslen += varLen;
1160 var = var->next;
1161 }
1162 }
1163 }
1164
1165 /* clean up */
1166 var = varFirst;
1167 while (var != NULL) {
1168 VariantListEntry *tmpVar = var->next;
1169 uprv_free(var);
1170 var = tmpVar;
1171 }
1172
1173 if (U_FAILURE(*status)) {
1174 return 0;
1175 }
1176 }
1177
1178 u_terminateChars(appendAt, capacity, reslen, status);
1179 return reslen;
1180 }
1181
1182 static int32_t
1183 _appendKeywordsToLanguageTag(const char* localeID, char* appendAt, int32_t capac ity, UBool strict, UBool hadPosix, UErrorCode* status) {
1184 char buf[ULOC_KEYWORD_AND_VALUES_CAPACITY];
1185 UEnumeration *keywordEnum = NULL;
1186 int32_t reslen = 0;
1187
1188 keywordEnum = uloc_openKeywords(localeID, status);
1189 if (U_FAILURE(*status) && !hadPosix) {
1190 uenum_close(keywordEnum);
1191 return 0;
1192 }
1193 if (keywordEnum != NULL || hadPosix) {
1194 /* reorder extensions */
1195 int32_t len;
1196 const char *key;
1197 ExtensionListEntry *firstExt = NULL;
1198 ExtensionListEntry *ext;
1199 char extBuf[ULOC_KEYWORD_AND_VALUES_CAPACITY];
1200 char *pExtBuf = extBuf;
1201 int32_t extBufCapacity = sizeof(extBuf);
1202 const char *bcpKey, *bcpValue;
1203 UErrorCode tmpStatus = U_ZERO_ERROR;
1204 int32_t keylen;
1205 UBool isLDMLKeyword;
1206
1207 while (TRUE) {
1208 key = uenum_next(keywordEnum, NULL, status);
1209 if (key == NULL) {
1210 break;
1211 }
1212 len = uloc_getKeywordValue(localeID, key, buf, sizeof(buf), &tmpStat us);
1213 if (U_FAILURE(tmpStatus)) {
1214 if (strict) {
1215 *status = U_ILLEGAL_ARGUMENT_ERROR;
1216 break;
1217 }
1218 /* ignore this keyword */
1219 tmpStatus = U_ZERO_ERROR;
1220 continue;
1221 }
1222
1223 keylen = (int32_t)uprv_strlen(key);
1224 isLDMLKeyword = (keylen > 1);
1225
1226 if (isLDMLKeyword) {
1227 int32_t modKeyLen;
1228
1229 /* transform key and value to bcp47 style */
1230 modKeyLen = _ldmlKeyToBCP47(key, keylen, pExtBuf, extBufCapacity , &tmpStatus);
1231 if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED _WARNING) {
1232 if (strict) {
1233 *status = U_ILLEGAL_ARGUMENT_ERROR;
1234 break;
1235 }
1236 tmpStatus = U_ZERO_ERROR;
1237 continue;
1238 }
1239
1240 bcpKey = pExtBuf;
1241 pExtBuf += (modKeyLen + 1);
1242 extBufCapacity -= (modKeyLen + 1);
1243
1244 len = _ldmlTypeToBCP47(key, keylen, buf, len, pExtBuf, extBufCap acity, &tmpStatus);
1245 if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED _WARNING) {
1246 if (strict) {
1247 *status = U_ILLEGAL_ARGUMENT_ERROR;
1248 break;
1249 }
1250 tmpStatus = U_ZERO_ERROR;
1251 continue;
1252 }
1253 bcpValue = pExtBuf;
1254 pExtBuf += (len + 1);
1255 extBufCapacity -= (len + 1);
1256 } else {
1257 if (*key == PRIVATEUSE) {
1258 if (!_isPrivateuseValueSubtags(buf, len)) {
1259 if (strict) {
1260 *status = U_ILLEGAL_ARGUMENT_ERROR;
1261 break;
1262 }
1263 continue;
1264 }
1265 } else {
1266 if (!_isExtensionSingleton(key, keylen) || !_isExtensionSubt ags(buf, len)) {
1267 if (strict) {
1268 *status = U_ILLEGAL_ARGUMENT_ERROR;
1269 break;
1270 }
1271 continue;
1272 }
1273 }
1274 bcpKey = key;
1275 if ((len + 1) < extBufCapacity) {
1276 uprv_memcpy(pExtBuf, buf, len);
1277 bcpValue = pExtBuf;
1278
1279 pExtBuf += len;
1280
1281 *pExtBuf = 0;
1282 pExtBuf++;
1283
1284 extBufCapacity -= (len + 1);
1285 } else {
1286 *status = U_ILLEGAL_ARGUMENT_ERROR;
1287 break;
1288 }
1289 }
1290
1291 /* create ExtensionListEntry */
1292 ext = uprv_malloc(sizeof(ExtensionListEntry));
1293 if (ext == NULL) {
1294 *status = U_MEMORY_ALLOCATION_ERROR;
1295 break;
1296 }
1297 ext->key = bcpKey;
1298 ext->value = bcpValue;
1299
1300 if (!_addExtensionToList(&firstExt, ext, TRUE)) {
1301 uprv_free(ext);
1302 if (strict) {
1303 *status = U_ILLEGAL_ARGUMENT_ERROR;
1304 break;
1305 }
1306 }
1307 }
1308
1309 /* Special handling for POSIX variant - add the keywords for POSIX */
1310 if (hadPosix) {
1311 /* create ExtensionListEntry for POSIX */
1312 ext = uprv_malloc(sizeof(ExtensionListEntry));
1313 if (ext == NULL) {
1314 *status = U_MEMORY_ALLOCATION_ERROR;
1315 }
1316 ext->key = POSIX_KEY;
1317 ext->value = POSIX_VALUE;
1318
1319 if (!_addExtensionToList(&firstExt, ext, TRUE)) {
1320 uprv_free(ext);
1321 }
1322 }
1323
1324 if (U_SUCCESS(*status) && (firstExt != NULL)) {
1325 UBool startLDMLExtension = FALSE;
1326
1327 /* write out the sorted BCP47 extensions and private use */
1328 ext = firstExt;
1329 while (ext != NULL) {
1330 if ((int32_t)uprv_strlen(ext->key) > 1 && !startLDMLExtension) {
1331 /* write LDML singleton extension */
1332 if (reslen < capacity) {
1333 *(appendAt + reslen) = SEP;
1334 }
1335 reslen++;
1336 if (reslen < capacity) {
1337 *(appendAt + reslen) = LDMLEXT;
1338 }
1339 reslen++;
1340 startLDMLExtension = TRUE;
1341 }
1342
1343 if (reslen < capacity) {
1344 *(appendAt + reslen) = SEP;
1345 }
1346 reslen++;
1347 len = (int32_t)uprv_strlen(ext->key);
1348 if (reslen < capacity) {
1349 uprv_memcpy(appendAt + reslen, ext->key, uprv_min(len, capac ity - reslen));
1350 }
1351 reslen += len;
1352 if (reslen < capacity) {
1353 *(appendAt + reslen) = SEP;
1354 }
1355 reslen++;
1356 len = (int32_t)uprv_strlen(ext->value);
1357 if (reslen < capacity) {
1358 uprv_memcpy(appendAt + reslen, ext->value, uprv_min(len, cap acity - reslen));
1359 }
1360 reslen += len;
1361
1362 ext = ext->next;
1363 }
1364 }
1365 /* clean up */
1366 ext = firstExt;
1367 while (ext != NULL) {
1368 ExtensionListEntry *tmpExt = ext->next;
1369 uprv_free(ext);
1370 ext = tmpExt;
1371 }
1372
1373 uenum_close(keywordEnum);
1374
1375 if (U_FAILURE(*status)) {
1376 return 0;
1377 }
1378 }
1379
1380 return u_terminateChars(appendAt, capacity, reslen, status);
1381 }
1382
1383 /**
1384 * Append keywords parsed from LDML extension value
1385 * e.g. "u-ca-gregory-co-trad" -> {calendar = gregorian} {collation = traditiona l}
1386 * Note: char* buf is used for storing keywords
1387 */
1388 static void
1389 _appendLDMLExtensionAsKeywords(const char* ldmlext, ExtensionListEntry** appendT o, char* buf, int32_t bufSize, UBool *posixVariant, UErrorCode *status) {
1390 const char *p, *pNext, *pSep;
1391 const char *pBcpKey, *pBcpType;
1392 const char *pKey, *pType;
1393 int32_t bcpKeyLen = 0, bcpTypeLen;
1394 ExtensionListEntry *kwd, *nextKwd;
1395 ExtensionListEntry *kwdFirst = NULL;
1396 int32_t bufIdx = 0;
1397 int32_t len;
1398
1399 pNext = ldmlext;
1400 pBcpKey = pBcpType = NULL;
1401 while (pNext) {
1402 p = pSep = pNext;
1403
1404 /* locate next separator char */
1405 while (*pSep) {
1406 if (*pSep == SEP) {
1407 break;
1408 }
1409 pSep++;
1410 }
1411 if (*pSep == 0) {
1412 /* last subtag */
1413 pNext = NULL;
1414 } else {
1415 pNext = pSep + 1;
1416 }
1417
1418 if (pBcpKey == NULL) {
1419 pBcpKey = p;
1420 bcpKeyLen = (int32_t)(pSep - p);
1421 } else {
1422 pBcpType = p;
1423 bcpTypeLen = (int32_t)(pSep - p);
1424
1425 /* BCP key to locale key */
1426 len = _bcp47ToLDMLKey(pBcpKey, bcpKeyLen, buf + bufIdx, bufSize - bu fIdx - 1, status);
1427 if (U_FAILURE(*status)) {
1428 goto cleanup;
1429 }
1430 pKey = buf + bufIdx;
1431 bufIdx += len;
1432 *(buf + bufIdx) = 0;
1433 bufIdx++;
1434
1435 /* BCP type to locale type */
1436 len = _bcp47ToLDMLType(pKey, -1, pBcpType, bcpTypeLen, buf + bufIdx, bufSize - bufIdx - 1, status);
1437 if (U_FAILURE(*status)) {
1438 goto cleanup;
1439 }
1440 pType = buf + bufIdx;
1441 bufIdx += len;
1442 *(buf + bufIdx) = 0;
1443 bufIdx++;
1444
1445 /* Special handling for u-va-posix, since we want to treat this as a variant, not */
1446 /* as a keyword. */
1447
1448 if ( !uprv_strcmp(pKey,POSIX_KEY) && !uprv_strcmp(pType,POSIX_VALUE) ) {
1449 *posixVariant = TRUE;
1450 } else {
1451 /* create an ExtensionListEntry for this keyword */
1452 kwd = uprv_malloc(sizeof(ExtensionListEntry));
1453 if (kwd == NULL) {
1454 *status = U_MEMORY_ALLOCATION_ERROR;
1455 goto cleanup;
1456 }
1457
1458 kwd->key = pKey;
1459 kwd->value = pType;
1460
1461 if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) {
1462 *status = U_ILLEGAL_ARGUMENT_ERROR;
1463 uprv_free(kwd);
1464 goto cleanup;
1465 }
1466 }
1467
1468 /* for next pair */
1469 pBcpKey = NULL;
1470 pBcpType = NULL;
1471 }
1472 }
1473
1474 if (pBcpKey != NULL) {
1475 *status = U_ILLEGAL_ARGUMENT_ERROR;
1476 goto cleanup;
1477 }
1478
1479 kwd = kwdFirst;
1480 while (kwd != NULL) {
1481 nextKwd = kwd->next;
1482 _addExtensionToList(appendTo, kwd, FALSE);
1483 kwd = nextKwd;
1484 }
1485
1486 return;
1487
1488 cleanup:
1489 kwd = kwdFirst;
1490 while (kwd != NULL) {
1491 nextKwd = kwd->next;
1492 uprv_free(kwd);
1493 kwd = nextKwd;
1494 }
1495 }
1496
1497
1498 static int32_t
1499 _appendKeywords(ULanguageTag* langtag, char* appendAt, int32_t capacity, UErrorC ode* status) {
1500 int32_t reslen = 0;
1501 int32_t i, n;
1502 int32_t len;
1503 ExtensionListEntry *kwdFirst = NULL;
1504 ExtensionListEntry *kwd;
1505 const char *key, *type;
1506 char kwdBuf[ULOC_KEYWORDS_CAPACITY];
1507 UBool posixVariant = FALSE;
1508
1509 if (U_FAILURE(*status)) {
1510 return 0;
1511 }
1512
1513 n = ultag_getExtensionsSize(langtag);
1514
1515 /* resolve locale keywords and reordering keys */
1516 for (i = 0; i < n; i++) {
1517 key = ultag_getExtensionKey(langtag, i);
1518 type = ultag_getExtensionValue(langtag, i);
1519 if (*key == LDMLEXT) {
1520 _appendLDMLExtensionAsKeywords(type, &kwdFirst, kwdBuf, sizeof(kwdBu f), &posixVariant, status);
1521 if (U_FAILURE(*status)) {
1522 break;
1523 }
1524 } else {
1525 kwd = uprv_malloc(sizeof(ExtensionListEntry));
1526 if (kwd == NULL) {
1527 *status = U_MEMORY_ALLOCATION_ERROR;
1528 break;
1529 }
1530 kwd->key = key;
1531 kwd->value = type;
1532 if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) {
1533 uprv_free(kwd);
1534 *status = U_ILLEGAL_ARGUMENT_ERROR;
1535 break;
1536 }
1537 }
1538 }
1539
1540 if (U_SUCCESS(*status)) {
1541 type = ultag_getPrivateUse(langtag);
1542 if ((int32_t)uprv_strlen(type) > 0) {
1543 /* add private use as a keyword */
1544 kwd = uprv_malloc(sizeof(ExtensionListEntry));
1545 if (kwd == NULL) {
1546 *status = U_MEMORY_ALLOCATION_ERROR;
1547 } else {
1548 kwd->key = PRIVATEUSE_KEY;
1549 kwd->value = type;
1550 if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) {
1551 uprv_free(kwd);
1552 *status = U_ILLEGAL_ARGUMENT_ERROR;
1553 }
1554 }
1555 }
1556 }
1557
1558 /* If a POSIX variant was in the extensions, write it out before writing the keywords. */
1559
1560 if (U_SUCCESS(*status) && posixVariant) {
1561 len = (int32_t) uprv_strlen(_POSIX);
1562 if (reslen < capacity) {
1563 uprv_memcpy(appendAt + reslen, _POSIX, uprv_min(len, capacity - resl en));
1564 }
1565 reslen += len;
1566 }
1567
1568 if (U_SUCCESS(*status) && kwdFirst != NULL) {
1569 /* write out the sorted keywords */
1570 kwd = kwdFirst;
1571 while (kwd != NULL) {
1572 if (reslen < capacity) {
1573 if (kwd == kwdFirst) {
1574 /* '@' */
1575 *(appendAt + reslen) = LOCALE_EXT_SEP;
1576 } else {
1577 /* ';' */
1578 *(appendAt + reslen) = LOCALE_KEYWORD_SEP;
1579 }
1580 }
1581 reslen++;
1582
1583 /* key */
1584 len = (int32_t)uprv_strlen(kwd->key);
1585 if (reslen < capacity) {
1586 uprv_memcpy(appendAt + reslen, kwd->key, uprv_min(len, capacity - reslen));
1587 }
1588 reslen += len;
1589
1590 /* '=' */
1591 if (reslen < capacity) {
1592 *(appendAt + reslen) = LOCALE_KEY_TYPE_SEP;
1593 }
1594 reslen++;
1595
1596 /* type */
1597 len = (int32_t)uprv_strlen(kwd->value);
1598 if (reslen < capacity) {
1599 uprv_memcpy(appendAt + reslen, kwd->value, uprv_min(len, capacit y - reslen));
1600 }
1601 reslen += len;
1602
1603 kwd = kwd->next;
1604 }
1605 }
1606
1607 /* clean up */
1608 kwd = kwdFirst;
1609 while (kwd != NULL) {
1610 ExtensionListEntry *tmpKwd = kwd->next;
1611 uprv_free(kwd);
1612 kwd = tmpKwd;
1613 }
1614
1615 if (U_FAILURE(*status)) {
1616 return 0;
1617 }
1618
1619 return u_terminateChars(appendAt, capacity, reslen, status);
1620 }
1621
1622 /*
1623 * -------------------------------------------------
1624 *
1625 * ultag_ functions
1626 *
1627 * -------------------------------------------------
1628 */
1629
1630 /* Bit flags used by the parser */
1631 #define LANG 0x0001
1632 #define EXTL 0x0002
1633 #define SCRT 0x0004
1634 #define REGN 0x0008
1635 #define VART 0x0010
1636 #define EXTS 0x0020
1637 #define EXTV 0x0040
1638 #define PRIV 0x0080
1639
1640 static ULanguageTag*
1641 ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* sta tus) {
1642 ULanguageTag *t;
1643 char *tagBuf;
1644 int16_t next;
1645 char *pSubtag, *pNext, *pLastGoodPosition;
1646 int32_t subtagLen;
1647 int32_t extlangIdx;
1648 ExtensionListEntry *pExtension;
1649 char *pExtValueSubtag, *pExtValueSubtagEnd;
1650 int32_t i;
1651 UBool isLDMLExtension, reqLDMLType;
1652
1653 if (parsedLen != NULL) {
1654 *parsedLen = 0;
1655 }
1656
1657 if (U_FAILURE(*status)) {
1658 return NULL;
1659 }
1660
1661 if (tagLen < 0) {
1662 tagLen = (int32_t)uprv_strlen(tag);
1663 }
1664
1665 /* copy the entire string */
1666 tagBuf = (char*)uprv_malloc(tagLen + 1);
1667 if (tagBuf == NULL) {
1668 *status = U_MEMORY_ALLOCATION_ERROR;
1669 return NULL;
1670 }
1671 uprv_memcpy(tagBuf, tag, tagLen);
1672 *(tagBuf + tagLen) = 0;
1673
1674 /* create a ULanguageTag */
1675 t = (ULanguageTag*)uprv_malloc(sizeof(ULanguageTag));
1676 _initializeULanguageTag(t);
1677 t->buf = tagBuf;
1678 if (t == NULL) {
1679 uprv_free(tagBuf);
1680 *status = U_MEMORY_ALLOCATION_ERROR;
1681 return NULL;
1682 }
1683
1684 if (tagLen < MINLEN) {
1685 /* the input tag is too short - return empty ULanguageTag */
1686 return t;
1687 }
1688
1689 /* check if the tag is grandfathered */
1690 for (i = 0; GRANDFATHERED[i] != NULL; i += 2) {
1691 if (T_CString_stricmp(GRANDFATHERED[i], tagBuf) == 0) {
1692 /* a grandfathered tag is always longer than its preferred mapping * /
1693 uprv_strcpy(t->buf, GRANDFATHERED[i + 1]);
1694 t->language = t->buf;
1695 if (parsedLen != NULL) {
1696 *parsedLen = tagLen;
1697 }
1698 return t;
1699 }
1700 }
1701
1702 /*
1703 * langtag = language
1704 * ["-" script]
1705 * ["-" region]
1706 * *("-" variant)
1707 * *("-" extension)
1708 * ["-" privateuse]
1709 */
1710
1711 next = LANG | PRIV;
1712 pNext = pLastGoodPosition = tagBuf;
1713 extlangIdx = 0;
1714 pExtension = NULL;
1715 pExtValueSubtag = NULL;
1716 pExtValueSubtagEnd = NULL;
1717 isLDMLExtension = FALSE;
1718 reqLDMLType = FALSE;
1719
1720 while (pNext) {
1721 char *pSep;
1722
1723 pSubtag = pNext;
1724
1725 /* locate next separator char */
1726 pSep = pSubtag;
1727 while (*pSep) {
1728 if (*pSep == SEP) {
1729 break;
1730 }
1731 pSep++;
1732 }
1733 if (*pSep == 0) {
1734 /* last subtag */
1735 pNext = NULL;
1736 } else {
1737 pNext = pSep + 1;
1738 }
1739 subtagLen = (int32_t)(pSep - pSubtag);
1740
1741 if (next & LANG) {
1742 if (_isLanguageSubtag(pSubtag, subtagLen)) {
1743 *pSep = 0; /* terminate */
1744 t->language = T_CString_toLowerCase(pSubtag);
1745
1746 pLastGoodPosition = pSep;
1747 next = EXTL | SCRT | REGN | VART | EXTS | PRIV;
1748 continue;
1749 }
1750 }
1751 if (next & EXTL) {
1752 if (_isExtlangSubtag(pSubtag, subtagLen)) {
1753 *pSep = 0;
1754 t->extlang[extlangIdx++] = T_CString_toLowerCase(pSubtag);
1755
1756 pLastGoodPosition = pSep;
1757 if (extlangIdx < 3) {
1758 next = EXTL | SCRT | REGN | VART | EXTS | PRIV;
1759 } else {
1760 next = SCRT | REGN | VART | EXTS | PRIV;
1761 }
1762 continue;
1763 }
1764 }
1765 if (next & SCRT) {
1766 if (_isScriptSubtag(pSubtag, subtagLen)) {
1767 char *p = pSubtag;
1768
1769 *pSep = 0;
1770
1771 /* to title case */
1772 *p = uprv_toupper(*p);
1773 p++;
1774 for (; *p; p++) {
1775 *p = uprv_tolower(*p);
1776 }
1777
1778 t->script = pSubtag;
1779
1780 pLastGoodPosition = pSep;
1781 next = REGN | VART | EXTS | PRIV;
1782 continue;
1783 }
1784 }
1785 if (next & REGN) {
1786 if (_isRegionSubtag(pSubtag, subtagLen)) {
1787 *pSep = 0;
1788 t->region = T_CString_toUpperCase(pSubtag);
1789
1790 pLastGoodPosition = pSep;
1791 next = VART | EXTS | PRIV;
1792 continue;
1793 }
1794 }
1795 if (next & VART) {
1796 if (_isVariantSubtag(pSubtag, subtagLen)) {
1797 VariantListEntry *var;
1798 UBool isAdded;
1799
1800 var = (VariantListEntry*)uprv_malloc(sizeof(VariantListEntry));
1801 if (var == NULL) {
1802 *status = U_MEMORY_ALLOCATION_ERROR;
1803 goto error;
1804 }
1805 *pSep = 0;
1806 var->variant = T_CString_toUpperCase(pSubtag);
1807 isAdded = _addVariantToList(&(t->variants), var);
1808 if (!isAdded) {
1809 /* duplicated variant entry */
1810 uprv_free(var);
1811 break;
1812 }
1813 pLastGoodPosition = pSep;
1814 next = VART | EXTS | PRIV;
1815 continue;
1816 }
1817 }
1818 if (next & EXTS) {
1819 if (_isExtensionSingleton(pSubtag, subtagLen)) {
1820 if (pExtension != NULL) {
1821 if (pExtValueSubtag == NULL || pExtValueSubtagEnd == NULL) {
1822 /* the previous extension is incomplete */
1823 uprv_free(pExtension);
1824 pExtension = NULL;
1825 break;
1826 }
1827
1828 /* terminate the previous extension value */
1829 *pExtValueSubtagEnd = 0;
1830 pExtension->value = T_CString_toLowerCase(pExtValueSubtag);
1831
1832 /* insert the extension to the list */
1833 if (_addExtensionToList(&(t->extensions), pExtension, FALSE) ) {
1834 pLastGoodPosition = pExtValueSubtagEnd;
1835 } else {
1836 /* stop parsing here */
1837 uprv_free(pExtension);
1838 pExtension = NULL;
1839 break;
1840 }
1841
1842 if (isLDMLExtension && reqLDMLType) {
1843 /* incomplete LDML extension key and type pair */
1844 pExtension = NULL;
1845 break;
1846 }
1847 }
1848
1849 isLDMLExtension = (uprv_tolower(*pSubtag) == LDMLEXT);
1850
1851 /* create a new extension */
1852 pExtension = uprv_malloc(sizeof(ExtensionListEntry));
1853 if (pExtension == NULL) {
1854 *status = U_MEMORY_ALLOCATION_ERROR;
1855 goto error;
1856 }
1857 *pSep = 0;
1858 pExtension->key = T_CString_toLowerCase(pSubtag);
1859 pExtension->value = NULL; /* will be set later */
1860
1861 /*
1862 * reset the start and the end location of extension value
1863 * subtags for this extension
1864 */
1865 pExtValueSubtag = NULL;
1866 pExtValueSubtagEnd = NULL;
1867
1868 next = EXTV;
1869 continue;
1870 }
1871 }
1872 if (next & EXTV) {
1873 if (_isExtensionSubtag(pSubtag, subtagLen)) {
1874 if (isLDMLExtension) {
1875 if (reqLDMLType) {
1876 /* already saw an LDML key */
1877 if (!_isLDMLType(pSubtag, subtagLen)) {
1878 /* stop parsing here and let the valid LDML extensio n key/type
1879 pairs processed by the code out of this while loo p */
1880 break;
1881 }
1882 pExtValueSubtagEnd = pSep;
1883 reqLDMLType = FALSE;
1884 next = EXTS | EXTV | PRIV;
1885 } else {
1886 /* LDML key */
1887 if (!_isLDMLKey(pSubtag, subtagLen)) {
1888 /* stop parsing here and let the valid LDML extensio n key/type
1889 pairs processed by the code out of this while loo p */
1890 break;
1891 }
1892 reqLDMLType = TRUE;
1893 next = EXTV;
1894 }
1895 } else {
1896 /* Mark the end of this subtag */
1897 pExtValueSubtagEnd = pSep;
1898 next = EXTS | EXTV | PRIV;
1899 }
1900
1901 if (pExtValueSubtag == NULL) {
1902 /* if the start postion of this extension's value is not yet ,
1903 this one is the first value subtag */
1904 pExtValueSubtag = pSubtag;
1905 }
1906 continue;
1907 }
1908 }
1909 if (next & PRIV) {
1910 if (uprv_tolower(*pSubtag) == PRIVATEUSE) {
1911 char *pPrivuseVal;
1912
1913 if (pExtension != NULL) {
1914 /* Process the last extension */
1915 if (pExtValueSubtag == NULL || pExtValueSubtagEnd == NULL) {
1916 /* the previous extension is incomplete */
1917 uprv_free(pExtension);
1918 pExtension = NULL;
1919 break;
1920 } else {
1921 /* terminate the previous extension value */
1922 *pExtValueSubtagEnd = 0;
1923 pExtension->value = T_CString_toLowerCase(pExtValueSubta g);
1924
1925 /* insert the extension to the list */
1926 if (_addExtensionToList(&(t->extensions), pExtension, FA LSE)) {
1927 pLastGoodPosition = pExtValueSubtagEnd;
1928 pExtension = NULL;
1929 } else {
1930 /* stop parsing here */
1931 uprv_free(pExtension);
1932 pExtension = NULL;
1933 break;
1934 }
1935 }
1936 }
1937
1938 /* The rest of part will be private use value subtags */
1939 if (pNext == NULL) {
1940 /* empty private use subtag */
1941 break;
1942 }
1943 /* back up the private use value start position */
1944 pPrivuseVal = pNext;
1945
1946 /* validate private use value subtags */
1947 while (pNext) {
1948 pSubtag = pNext;
1949 pSep = pSubtag;
1950 while (*pSep) {
1951 if (*pSep == SEP) {
1952 break;
1953 }
1954 pSep++;
1955 }
1956 if (*pSep == 0) {
1957 /* last subtag */
1958 pNext = NULL;
1959 } else {
1960 pNext = pSep + 1;
1961 }
1962 subtagLen = (int32_t)(pSep - pSubtag);
1963
1964 if (_isPrivateuseValueSubtag(pSubtag, subtagLen)) {
1965 pLastGoodPosition = pSep;
1966 } else {
1967 break;
1968 }
1969 }
1970 if (pLastGoodPosition - pPrivuseVal > 0) {
1971 *pLastGoodPosition = 0;
1972 t->privateuse = T_CString_toLowerCase(pPrivuseVal);
1973 }
1974 /* No more subtags, exiting the parse loop */
1975 break;
1976 }
1977 break;
1978 }
1979 /* If we fell through here, it means this subtag is illegal - quit parsi ng */
1980 break;
1981 }
1982
1983 if (pExtension != NULL) {
1984 /* Process the last extension */
1985 if (pExtValueSubtag == NULL || pExtValueSubtagEnd == NULL) {
1986 /* the previous extension is incomplete */
1987 uprv_free(pExtension);
1988 } else {
1989 /* terminate the previous extension value */
1990 *pExtValueSubtagEnd = 0;
1991 pExtension->value = T_CString_toLowerCase(pExtValueSubtag);
1992 /* insert the extension to the list */
1993 if (_addExtensionToList(&(t->extensions), pExtension, FALSE)) {
1994 pLastGoodPosition = pExtValueSubtagEnd;
1995 } else {
1996 uprv_free(pExtension);
1997 }
1998 }
1999 }
2000
2001 if (parsedLen != NULL) {
2002 *parsedLen = (int32_t)(pLastGoodPosition - t->buf);
2003 }
2004
2005 return t;
2006
2007 error:
2008 uprv_free(t);
2009 return NULL;
2010 }
2011
2012 static void
2013 ultag_close(ULanguageTag* langtag) {
2014
2015 if (langtag == NULL) {
2016 return;
2017 }
2018
2019 uprv_free(langtag->buf);
2020
2021 if (langtag->variants) {
2022 VariantListEntry *curVar = langtag->variants;
2023 while (curVar) {
2024 VariantListEntry *nextVar = curVar->next;
2025 uprv_free(curVar);
2026 curVar = nextVar;
2027 }
2028 }
2029
2030 if (langtag->extensions) {
2031 ExtensionListEntry *curExt = langtag->extensions;
2032 while (curExt) {
2033 ExtensionListEntry *nextExt = curExt->next;
2034 uprv_free(curExt);
2035 curExt = nextExt;
2036 }
2037 }
2038
2039 uprv_free(langtag);
2040 }
2041
2042 static const char*
2043 ultag_getLanguage(const ULanguageTag* langtag) {
2044 return langtag->language;
2045 }
2046
2047 #if 0
2048 static const char*
2049 ultag_getJDKLanguage(const ULanguageTag* langtag) {
2050 int32_t i;
2051 for (i = 0; DEPRECATEDLANGS[i] != NULL; i += 2) {
2052 if (uprv_compareInvCharsAsAscii(DEPRECATEDLANGS[i], langtag->language) = = 0) {
2053 return DEPRECATEDLANGS[i + 1];
2054 }
2055 }
2056 return langtag->language;
2057 }
2058 #endif
2059
2060 static const char*
2061 ultag_getExtlang(const ULanguageTag* langtag, int32_t idx) {
2062 if (idx >= 0 && idx < MAXEXTLANG) {
2063 return langtag->extlang[idx];
2064 }
2065 return NULL;
2066 }
2067
2068 static int32_t
2069 ultag_getExtlangSize(const ULanguageTag* langtag) {
2070 int32_t size = 0;
2071 int32_t i;
2072 for (i = 0; i < MAXEXTLANG; i++) {
2073 if (langtag->extlang[i]) {
2074 size++;
2075 }
2076 }
2077 return size;
2078 }
2079
2080 static const char*
2081 ultag_getScript(const ULanguageTag* langtag) {
2082 return langtag->script;
2083 }
2084
2085 static const char*
2086 ultag_getRegion(const ULanguageTag* langtag) {
2087 return langtag->region;
2088 }
2089
2090 static const char*
2091 ultag_getVariant(const ULanguageTag* langtag, int32_t idx) {
2092 const char *var = NULL;
2093 VariantListEntry *cur = langtag->variants;
2094 int32_t i = 0;
2095 while (cur) {
2096 if (i == idx) {
2097 var = cur->variant;
2098 break;
2099 }
2100 cur = cur->next;
2101 i++;
2102 }
2103 return var;
2104 }
2105
2106 static int32_t
2107 ultag_getVariantsSize(const ULanguageTag* langtag) {
2108 int32_t size = 0;
2109 VariantListEntry *cur = langtag->variants;
2110 while (TRUE) {
2111 if (cur == NULL) {
2112 break;
2113 }
2114 size++;
2115 cur = cur->next;
2116 }
2117 return size;
2118 }
2119
2120 static const char*
2121 ultag_getExtensionKey(const ULanguageTag* langtag, int32_t idx) {
2122 const char *key = NULL;
2123 ExtensionListEntry *cur = langtag->extensions;
2124 int32_t i = 0;
2125 while (cur) {
2126 if (i == idx) {
2127 key = cur->key;
2128 break;
2129 }
2130 cur = cur->next;
2131 i++;
2132 }
2133 return key;
2134 }
2135
2136 static const char*
2137 ultag_getExtensionValue(const ULanguageTag* langtag, int32_t idx) {
2138 const char *val = NULL;
2139 ExtensionListEntry *cur = langtag->extensions;
2140 int32_t i = 0;
2141 while (cur) {
2142 if (i == idx) {
2143 val = cur->value;
2144 break;
2145 }
2146 cur = cur->next;
2147 i++;
2148 }
2149 return val;
2150 }
2151
2152 static int32_t
2153 ultag_getExtensionsSize(const ULanguageTag* langtag) {
2154 int32_t size = 0;
2155 ExtensionListEntry *cur = langtag->extensions;
2156 while (TRUE) {
2157 if (cur == NULL) {
2158 break;
2159 }
2160 size++;
2161 cur = cur->next;
2162 }
2163 return size;
2164 }
2165
2166 static const char*
2167 ultag_getPrivateUse(const ULanguageTag* langtag) {
2168 return langtag->privateuse;
2169 }
2170
2171 #if 0
2172 static const char*
2173 ultag_getGrandfathered(const ULanguageTag* langtag) {
2174 return langtag->grandfathered;
2175 }
2176 #endif
2177
2178
2179 /*
2180 * -------------------------------------------------
2181 *
2182 * Locale/BCP47 conversion APIs, exposed as uloc_*
2183 *
2184 * -------------------------------------------------
2185 */
2186 U_DRAFT int32_t U_EXPORT2
2187 uloc_toLanguageTag(const char* localeID,
2188 char* langtag,
2189 int32_t langtagCapacity,
2190 UBool strict,
2191 UErrorCode* status) {
2192 /* char canonical[ULOC_FULLNAME_CAPACITY]; */ /* See #6822 */
2193 char canonical[256];
2194 int32_t reslen = 0;
2195 UErrorCode tmpStatus = U_ZERO_ERROR;
2196 UBool hadPosix = FALSE;
2197
2198 /* Note: uloc_canonicalize returns "en_US_POSIX" for input locale ID "". Se e #6835 */
2199 canonical[0] = 0;
2200 if (uprv_strlen(localeID) > 0) {
2201 uloc_canonicalize(localeID, canonical, sizeof(canonical), &tmpStatus);
2202 if (tmpStatus != U_ZERO_ERROR) {
2203 *status = U_ILLEGAL_ARGUMENT_ERROR;
2204 return 0;
2205 }
2206 }
2207
2208 reslen += _appendLanguageToLanguageTag(canonical, langtag, langtagCapacity, strict, status);
2209 reslen += _appendScriptToLanguageTag(canonical, langtag + reslen, langtagCap acity - reslen, strict, status);
2210 reslen += _appendRegionToLanguageTag(canonical, langtag + reslen, langtagCap acity - reslen, strict, status);
2211 reslen += _appendVariantsToLanguageTag(canonical, langtag + reslen, langtagC apacity - reslen, strict, &hadPosix, status);
2212 reslen += _appendKeywordsToLanguageTag(canonical, langtag + reslen, langtagC apacity - reslen, strict, hadPosix, status);
2213
2214 return reslen;
2215 }
2216
2217
2218 U_DRAFT int32_t U_EXPORT2
2219 uloc_forLanguageTag(const char* langtag,
2220 char* localeID,
2221 int32_t localeIDCapacity,
2222 int32_t* parsedLength,
2223 UErrorCode* status) {
2224 ULanguageTag *lt;
2225 int32_t reslen = 0;
2226 const char *subtag, *p;
2227 int32_t len;
2228 int32_t i, n;
2229 UBool noRegion = TRUE;
2230
2231 lt = ultag_parse(langtag, -1, parsedLength, status);
2232 if (U_FAILURE(*status)) {
2233 return 0;
2234 }
2235
2236 /* language */
2237 subtag = ultag_getExtlangSize(lt) > 0 ? ultag_getExtlang(lt, 0) : ultag_getL anguage(lt);
2238 if (uprv_compareInvCharsAsAscii(subtag, LANG_UND) != 0) {
2239 len = (int32_t)uprv_strlen(subtag);
2240 if (len > 0) {
2241 if (reslen < localeIDCapacity) {
2242 uprv_memcpy(localeID, subtag, uprv_min(len, localeIDCapacity - r eslen));
2243 }
2244 reslen += len;
2245 }
2246 }
2247
2248 /* script */
2249 subtag = ultag_getScript(lt);
2250 len = (int32_t)uprv_strlen(subtag);
2251 if (len > 0) {
2252 if (reslen < localeIDCapacity) {
2253 *(localeID + reslen) = LOCALE_SEP;
2254 }
2255 reslen++;
2256
2257 /* write out the script in title case */
2258 p = subtag;
2259 while (*p) {
2260 if (reslen < localeIDCapacity) {
2261 if (p == subtag) {
2262 *(localeID + reslen) = uprv_toupper(*p);
2263 } else {
2264 *(localeID + reslen) = *p;
2265 }
2266 }
2267 reslen++;
2268 p++;
2269 }
2270 }
2271
2272 /* region */
2273 subtag = ultag_getRegion(lt);
2274 len = (int32_t)uprv_strlen(subtag);
2275 if (len > 0) {
2276 if (reslen < localeIDCapacity) {
2277 *(localeID + reslen) = LOCALE_SEP;
2278 }
2279 reslen++;
2280 /* write out the retion in upper case */
2281 p = subtag;
2282 while (*p) {
2283 if (reslen < localeIDCapacity) {
2284 *(localeID + reslen) = uprv_toupper(*p);
2285 }
2286 reslen++;
2287 p++;
2288 }
2289 noRegion = FALSE;
2290 }
2291
2292 /* variants */
2293 n = ultag_getVariantsSize(lt);
2294 if (n > 0) {
2295 if (noRegion) {
2296 if (reslen < localeIDCapacity) {
2297 *(localeID + reslen) = LOCALE_SEP;
2298 }
2299 reslen++;
2300 }
2301
2302 for (i = 0; i < n; i++) {
2303 subtag = ultag_getVariant(lt, i);
2304 if (reslen < localeIDCapacity) {
2305 *(localeID + reslen) = LOCALE_SEP;
2306 }
2307 reslen++;
2308 /* write out the variant in upper case */
2309 p = subtag;
2310 while (*p) {
2311 if (reslen < localeIDCapacity) {
2312 *(localeID + reslen) = uprv_toupper(*p);
2313 }
2314 reslen++;
2315 p++;
2316 }
2317 }
2318 }
2319
2320 /* keywords */
2321 n = ultag_getExtensionsSize(lt);
2322 subtag = ultag_getPrivateUse(lt);
2323 if (n > 0 || uprv_strlen(subtag) > 0) {
2324 if (reslen == 0) {
2325 /* need a language */
2326 if (reslen < localeIDCapacity) {
2327 uprv_memcpy(localeID + reslen, LANG_UND, uprv_min(LANG_UND_LEN, localeIDCapacity - reslen));
2328 }
2329 reslen += LANG_UND_LEN;
2330 }
2331 len = _appendKeywords(lt, localeID + reslen, localeIDCapacity - reslen, status);
2332 reslen += len;
2333 }
2334
2335 ultag_close(lt);
2336 return u_terminateChars(localeID, localeIDCapacity, reslen, status);
2337 }
2338
2339
OLDNEW
« no previous file with comments | « icu46/source/common/uloc.c ('k') | icu46/source/common/ulocimp.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698