OLD | NEW |
(Empty) | |
| 1 /* |
| 2 ********************************************************************** |
| 3 * Copyright (C) 2009-2010, International Business Machines |
| 4 * Corporation and others. All Rights Reserved. |
| 5 ********************************************************************** |
| 6 */ |
| 7 |
| 8 #include "unicode/utypes.h" |
| 9 #include "unicode/ures.h" |
| 10 #include "unicode/putil.h" |
| 11 #include "unicode/uloc.h" |
| 12 #include "ustr_imp.h" |
| 13 #include "cmemory.h" |
| 14 #include "cstring.h" |
| 15 #include "putilimp.h" |
| 16 #include "uinvchar.h" |
| 17 |
| 18 /* struct holding a single variant */ |
| 19 typedef struct VariantListEntry { |
| 20 const char *variant; |
| 21 struct VariantListEntry *next; |
| 22 } VariantListEntry; |
| 23 |
| 24 /* struct holding a single extension */ |
| 25 typedef struct ExtensionListEntry { |
| 26 const char *key; |
| 27 const char *value; |
| 28 struct ExtensionListEntry *next; |
| 29 } ExtensionListEntry; |
| 30 |
| 31 #define MAXEXTLANG 3 |
| 32 typedef struct ULanguageTag { |
| 33 char *buf; /* holding parsed subtags */ |
| 34 const char *language; |
| 35 const char *extlang[MAXEXTLANG]; |
| 36 const char *script; |
| 37 const char *region; |
| 38 VariantListEntry *variants; |
| 39 ExtensionListEntry *extensions; |
| 40 const char *privateuse; |
| 41 const char *grandfathered; |
| 42 } ULanguageTag; |
| 43 |
| 44 #define MINLEN 2 |
| 45 #define SEP '-' |
| 46 #define PRIVATEUSE 'x' |
| 47 #define LDMLEXT 'u' |
| 48 |
| 49 #define LOCALE_SEP '_' |
| 50 #define LOCALE_EXT_SEP '@' |
| 51 #define LOCALE_KEYWORD_SEP ';' |
| 52 #define LOCALE_KEY_TYPE_SEP '=' |
| 53 |
| 54 #define ISALPHA(c) (((c)>='A' && (c)<='Z') || ((c)>='a' && (c)<='z')) |
| 55 #define ISNUMERIC(c) ((c)>='0' && (c)<='9') |
| 56 |
| 57 static const char* EMPTY = ""; |
| 58 static const char* LANG_UND = "und"; |
| 59 static const char* PRIVATEUSE_KEY = "x"; |
| 60 static const char* _POSIX = "_POSIX"; |
| 61 static const char* POSIX_KEY = "va"; |
| 62 static const char* POSIX_VALUE = "posix"; |
| 63 |
| 64 #define LANG_UND_LEN 3 |
| 65 |
| 66 static const char* GRANDFATHERED[] = { |
| 67 /* grandfathered preferred */ |
| 68 "art-lojban", "jbo", |
| 69 "cel-gaulish", "", |
| 70 "en-GB-oed", "", |
| 71 "i-ami", "ami", |
| 72 "i-bnn", "bnn", |
| 73 "i-default", "", |
| 74 "i-enochian", "", |
| 75 "i-hak", "hak", |
| 76 "i-klingon", "tlh", |
| 77 "i-lux", "lb", |
| 78 "i-mingo", "", |
| 79 "i-navajo", "nv", |
| 80 "i-pwn", "pwn", |
| 81 "i-tao", "tao", |
| 82 "i-tay", "tay", |
| 83 "i-tsu", "tsu", |
| 84 "no-bok", "nb", |
| 85 "no-nyn", "nn", |
| 86 "sgn-be-fr", "sfb", |
| 87 "sgn-be-nl", "vgt", |
| 88 "sgn-ch-de", "sgg", |
| 89 "zh-guoyu", "cmn", |
| 90 "zh-hakka", "hak", |
| 91 "zh-min", "", |
| 92 "zh-min-nan", "nan", |
| 93 "zh-xiang", "hsn", |
| 94 NULL, NULL |
| 95 }; |
| 96 |
| 97 static const char* DEPRECATEDLANGS[] = { |
| 98 /* deprecated new */ |
| 99 "iw", "he", |
| 100 "ji", "yi", |
| 101 "in", "id", |
| 102 NULL, NULL |
| 103 }; |
| 104 |
| 105 /* |
| 106 * ------------------------------------------------- |
| 107 * |
| 108 * These ultag_ functions may be exposed as APIs later |
| 109 * |
| 110 * ------------------------------------------------- |
| 111 */ |
| 112 |
| 113 static ULanguageTag* |
| 114 ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* sta
tus); |
| 115 |
| 116 static void |
| 117 ultag_close(ULanguageTag* langtag); |
| 118 |
| 119 static const char* |
| 120 ultag_getLanguage(const ULanguageTag* langtag); |
| 121 |
| 122 #if 0 |
| 123 static const char* |
| 124 ultag_getJDKLanguage(const ULanguageTag* langtag); |
| 125 #endif |
| 126 |
| 127 static const char* |
| 128 ultag_getExtlang(const ULanguageTag* langtag, int32_t idx); |
| 129 |
| 130 static int32_t |
| 131 ultag_getExtlangSize(const ULanguageTag* langtag); |
| 132 |
| 133 static const char* |
| 134 ultag_getScript(const ULanguageTag* langtag); |
| 135 |
| 136 static const char* |
| 137 ultag_getRegion(const ULanguageTag* langtag); |
| 138 |
| 139 static const char* |
| 140 ultag_getVariant(const ULanguageTag* langtag, int32_t idx); |
| 141 |
| 142 static int32_t |
| 143 ultag_getVariantsSize(const ULanguageTag* langtag); |
| 144 |
| 145 static const char* |
| 146 ultag_getExtensionKey(const ULanguageTag* langtag, int32_t idx); |
| 147 |
| 148 static const char* |
| 149 ultag_getExtensionValue(const ULanguageTag* langtag, int32_t idx); |
| 150 |
| 151 static int32_t |
| 152 ultag_getExtensionsSize(const ULanguageTag* langtag); |
| 153 |
| 154 static const char* |
| 155 ultag_getPrivateUse(const ULanguageTag* langtag); |
| 156 |
| 157 #if 0 |
| 158 static const char* |
| 159 ultag_getGrandfathered(const ULanguageTag* langtag); |
| 160 #endif |
| 161 |
| 162 /* |
| 163 * ------------------------------------------------- |
| 164 * |
| 165 * Language subtag syntax validation functions |
| 166 * |
| 167 * ------------------------------------------------- |
| 168 */ |
| 169 |
| 170 static UBool |
| 171 _isAlphaString(const char* s, int32_t len) { |
| 172 int32_t i; |
| 173 for (i = 0; i < len; i++) { |
| 174 if (!ISALPHA(*(s + i))) { |
| 175 return FALSE; |
| 176 } |
| 177 } |
| 178 return TRUE; |
| 179 } |
| 180 |
| 181 static UBool |
| 182 _isNumericString(const char* s, int32_t len) { |
| 183 int32_t i; |
| 184 for (i = 0; i < len; i++) { |
| 185 if (!ISNUMERIC(*(s + i))) { |
| 186 return FALSE; |
| 187 } |
| 188 } |
| 189 return TRUE; |
| 190 } |
| 191 |
| 192 static UBool |
| 193 _isAlphaNumericString(const char* s, int32_t len) { |
| 194 int32_t i; |
| 195 for (i = 0; i < len; i++) { |
| 196 if (!ISALPHA(*(s + i)) && !ISNUMERIC(*(s + i))) { |
| 197 return FALSE; |
| 198 } |
| 199 } |
| 200 return TRUE; |
| 201 } |
| 202 |
| 203 static UBool |
| 204 _isLanguageSubtag(const char* s, int32_t len) { |
| 205 /* |
| 206 * language = 2*3ALPHA ; shortest ISO 639 code |
| 207 * ["-" extlang] ; sometimes followed by |
| 208 * ; extended language subtags |
| 209 * / 4ALPHA ; or reserved for future use |
| 210 * / 5*8ALPHA ; or registered language subtag |
| 211 */ |
| 212 if (len < 0) { |
| 213 len = (int32_t)uprv_strlen(s); |
| 214 } |
| 215 if (len >= 2 && len <= 8 && _isAlphaString(s, len)) { |
| 216 return TRUE; |
| 217 } |
| 218 return FALSE; |
| 219 } |
| 220 |
| 221 static UBool |
| 222 _isExtlangSubtag(const char* s, int32_t len) { |
| 223 /* |
| 224 * extlang = 3ALPHA ; selected ISO 639 codes |
| 225 * *2("-" 3ALPHA) ; permanently reserved |
| 226 */ |
| 227 if (len < 0) { |
| 228 len = (int32_t)uprv_strlen(s); |
| 229 } |
| 230 if (len == 3 && _isAlphaString(s, len)) { |
| 231 return TRUE; |
| 232 } |
| 233 return FALSE; |
| 234 } |
| 235 |
| 236 static UBool |
| 237 _isScriptSubtag(const char* s, int32_t len) { |
| 238 /* |
| 239 * script = 4ALPHA ; ISO 15924 code |
| 240 */ |
| 241 if (len < 0) { |
| 242 len = (int32_t)uprv_strlen(s); |
| 243 } |
| 244 if (len == 4 && _isAlphaString(s, len)) { |
| 245 return TRUE; |
| 246 } |
| 247 return FALSE; |
| 248 } |
| 249 |
| 250 static UBool |
| 251 _isRegionSubtag(const char* s, int32_t len) { |
| 252 /* |
| 253 * region = 2ALPHA ; ISO 3166-1 code |
| 254 * / 3DIGIT ; UN M.49 code |
| 255 */ |
| 256 if (len < 0) { |
| 257 len = (int32_t)uprv_strlen(s); |
| 258 } |
| 259 if (len == 2 && _isAlphaString(s, len)) { |
| 260 return TRUE; |
| 261 } |
| 262 if (len == 3 && _isNumericString(s, len)) { |
| 263 return TRUE; |
| 264 } |
| 265 return FALSE; |
| 266 } |
| 267 |
| 268 static UBool |
| 269 _isVariantSubtag(const char* s, int32_t len) { |
| 270 /* |
| 271 * variant = 5*8alphanum ; registered variants |
| 272 * / (DIGIT 3alphanum) |
| 273 */ |
| 274 if (len < 0) { |
| 275 len = (int32_t)uprv_strlen(s); |
| 276 } |
| 277 if (len >= 5 && len <= 8 && _isAlphaString(s, len)) { |
| 278 return TRUE; |
| 279 } |
| 280 if (len == 4 && ISNUMERIC(*s) && _isAlphaNumericString(s + 1, 3)) { |
| 281 return TRUE; |
| 282 } |
| 283 return FALSE; |
| 284 } |
| 285 |
| 286 static UBool |
| 287 _isExtensionSingleton(const char* s, int32_t len) { |
| 288 /* |
| 289 * extension = singleton 1*("-" (2*8alphanum)) |
| 290 */ |
| 291 if (len < 0) { |
| 292 len = (int32_t)uprv_strlen(s); |
| 293 } |
| 294 if (len == 1 && ISALPHA(*s) && (uprv_tolower(*s) != PRIVATEUSE)) { |
| 295 return TRUE; |
| 296 } |
| 297 return FALSE; |
| 298 } |
| 299 |
| 300 static UBool |
| 301 _isExtensionSubtag(const char* s, int32_t len) { |
| 302 /* |
| 303 * extension = singleton 1*("-" (2*8alphanum)) |
| 304 */ |
| 305 if (len < 0) { |
| 306 len = (int32_t)uprv_strlen(s); |
| 307 } |
| 308 if (len >= 2 && len <= 8 && _isAlphaNumericString(s, len)) { |
| 309 return TRUE; |
| 310 } |
| 311 return FALSE; |
| 312 } |
| 313 |
| 314 static UBool |
| 315 _isExtensionSubtags(const char* s, int32_t len) { |
| 316 const char *p = s; |
| 317 const char *pSubtag = NULL; |
| 318 |
| 319 if (len < 0) { |
| 320 len = (int32_t)uprv_strlen(s); |
| 321 } |
| 322 |
| 323 while ((p - s) < len) { |
| 324 if (*p == SEP) { |
| 325 if (pSubtag == NULL) { |
| 326 return FALSE; |
| 327 } |
| 328 if (!_isExtensionSubtag(pSubtag, (int32_t)(p - pSubtag))) { |
| 329 return FALSE; |
| 330 } |
| 331 pSubtag = NULL; |
| 332 } else if (pSubtag == NULL) { |
| 333 pSubtag = p; |
| 334 } |
| 335 p++; |
| 336 } |
| 337 if (pSubtag == NULL) { |
| 338 return FALSE; |
| 339 } |
| 340 return _isExtensionSubtag(pSubtag, (int32_t)(p - pSubtag)); |
| 341 } |
| 342 |
| 343 static UBool |
| 344 _isPrivateuseValueSubtag(const char* s, int32_t len) { |
| 345 /* |
| 346 * privateuse = "x" 1*("-" (1*8alphanum)) |
| 347 */ |
| 348 if (len < 0) { |
| 349 len = (int32_t)uprv_strlen(s); |
| 350 } |
| 351 if (len >= 1 && len <= 8 && _isAlphaNumericString(s, len)) { |
| 352 return TRUE; |
| 353 } |
| 354 return FALSE; |
| 355 } |
| 356 |
| 357 static UBool |
| 358 _isPrivateuseValueSubtags(const char* s, int32_t len) { |
| 359 const char *p = s; |
| 360 const char *pSubtag = NULL; |
| 361 |
| 362 if (len < 0) { |
| 363 len = (int32_t)uprv_strlen(s); |
| 364 } |
| 365 |
| 366 while ((p - s) < len) { |
| 367 if (*p == SEP) { |
| 368 if (pSubtag == NULL) { |
| 369 return FALSE; |
| 370 } |
| 371 if (!_isPrivateuseValueSubtag(pSubtag, (int32_t)(p - pSubtag))) { |
| 372 return FALSE; |
| 373 } |
| 374 pSubtag = NULL; |
| 375 } else if (pSubtag == NULL) { |
| 376 pSubtag = p; |
| 377 } |
| 378 p++; |
| 379 } |
| 380 if (pSubtag == NULL) { |
| 381 return FALSE; |
| 382 } |
| 383 return _isPrivateuseValueSubtag(pSubtag, (int32_t)(p - pSubtag)); |
| 384 } |
| 385 |
| 386 static UBool |
| 387 _isLDMLKey(const char* s, int32_t len) { |
| 388 if (len < 0) { |
| 389 len = (int32_t)uprv_strlen(s); |
| 390 } |
| 391 if (len == 2 && _isAlphaNumericString(s, len)) { |
| 392 return TRUE; |
| 393 } |
| 394 return FALSE; |
| 395 } |
| 396 |
| 397 static UBool |
| 398 _isLDMLType(const char* s, int32_t len) { |
| 399 if (len < 0) { |
| 400 len = (int32_t)uprv_strlen(s); |
| 401 } |
| 402 if (len >= 3 && len <= 8 && _isAlphaNumericString(s, len)) { |
| 403 return TRUE; |
| 404 } |
| 405 return FALSE; |
| 406 } |
| 407 |
| 408 /* |
| 409 * ------------------------------------------------- |
| 410 * |
| 411 * Helper functions |
| 412 * |
| 413 * ------------------------------------------------- |
| 414 */ |
| 415 |
| 416 static UBool |
| 417 _addVariantToList(VariantListEntry **first, VariantListEntry *var) { |
| 418 UBool bAdded = TRUE; |
| 419 |
| 420 if (*first == NULL) { |
| 421 var->next = NULL; |
| 422 *first = var; |
| 423 } else { |
| 424 VariantListEntry *prev, *cur; |
| 425 int32_t cmp; |
| 426 |
| 427 /* reorder variants in alphabetical order */ |
| 428 prev = NULL; |
| 429 cur = *first; |
| 430 while (TRUE) { |
| 431 if (cur == NULL) { |
| 432 prev->next = var; |
| 433 var->next = NULL; |
| 434 break; |
| 435 } |
| 436 cmp = uprv_compareInvCharsAsAscii(var->variant, cur->variant); |
| 437 if (cmp < 0) { |
| 438 if (prev == NULL) { |
| 439 *first = var; |
| 440 } else { |
| 441 prev->next = var; |
| 442 } |
| 443 var->next = cur; |
| 444 break; |
| 445 } |
| 446 if (cmp == 0) { |
| 447 /* duplicated variant */ |
| 448 bAdded = FALSE; |
| 449 break; |
| 450 } |
| 451 prev = cur; |
| 452 cur = cur->next; |
| 453 } |
| 454 } |
| 455 |
| 456 return bAdded; |
| 457 } |
| 458 |
| 459 |
| 460 static UBool |
| 461 _addExtensionToList(ExtensionListEntry **first, ExtensionListEntry *ext, UBool l
ocaleToBCP) { |
| 462 UBool bAdded = TRUE; |
| 463 |
| 464 if (*first == NULL) { |
| 465 ext->next = NULL; |
| 466 *first = ext; |
| 467 } else { |
| 468 ExtensionListEntry *prev, *cur; |
| 469 int32_t cmp; |
| 470 |
| 471 /* reorder variants in alphabetical order */ |
| 472 prev = NULL; |
| 473 cur = *first; |
| 474 while (TRUE) { |
| 475 if (cur == NULL) { |
| 476 prev->next = ext; |
| 477 ext->next = NULL; |
| 478 break; |
| 479 } |
| 480 if (localeToBCP) { |
| 481 /* special handling for locale to bcp conversion */ |
| 482 int32_t len, curlen; |
| 483 |
| 484 len = (int32_t)uprv_strlen(ext->key); |
| 485 curlen = (int32_t)uprv_strlen(cur->key); |
| 486 |
| 487 if (len == 1 && curlen == 1) { |
| 488 if (*(ext->key) == *(cur->key)) { |
| 489 cmp = 0; |
| 490 } else if (*(ext->key) == PRIVATEUSE) { |
| 491 cmp = 1; |
| 492 } else if (*(cur->key) == PRIVATEUSE) { |
| 493 cmp = -1; |
| 494 } else { |
| 495 cmp = *(ext->key) - *(cur->key); |
| 496 } |
| 497 } else if (len == 1) { |
| 498 cmp = *(ext->key) - LDMLEXT; |
| 499 } else if (curlen == 1) { |
| 500 cmp = LDMLEXT - *(cur->key); |
| 501 } else { |
| 502 cmp = uprv_compareInvCharsAsAscii(ext->key, cur->key); |
| 503 } |
| 504 } else { |
| 505 cmp = uprv_compareInvCharsAsAscii(ext->key, cur->key); |
| 506 } |
| 507 if (cmp < 0) { |
| 508 if (prev == NULL) { |
| 509 *first = ext; |
| 510 } else { |
| 511 prev->next = ext; |
| 512 } |
| 513 ext->next = cur; |
| 514 break; |
| 515 } |
| 516 if (cmp == 0) { |
| 517 /* duplicated extension key */ |
| 518 bAdded = FALSE; |
| 519 break; |
| 520 } |
| 521 prev = cur; |
| 522 cur = cur->next; |
| 523 } |
| 524 } |
| 525 |
| 526 return bAdded; |
| 527 } |
| 528 |
| 529 static void |
| 530 _initializeULanguageTag(ULanguageTag* langtag) { |
| 531 int32_t i; |
| 532 |
| 533 langtag->buf = NULL; |
| 534 |
| 535 langtag->language = EMPTY; |
| 536 for (i = 0; i < MAXEXTLANG; i++) { |
| 537 langtag->extlang[i] = NULL; |
| 538 } |
| 539 |
| 540 langtag->script = EMPTY; |
| 541 langtag->region = EMPTY; |
| 542 |
| 543 langtag->variants = NULL; |
| 544 langtag->extensions = NULL; |
| 545 |
| 546 langtag->grandfathered = EMPTY; |
| 547 langtag->privateuse = EMPTY; |
| 548 } |
| 549 |
| 550 #define KEYTYPEDATA "keyTypeData" |
| 551 #define KEYMAP "keyMap" |
| 552 #define TYPEMAP "typeMap" |
| 553 #define TYPEALIAS "typeAlias" |
| 554 #define MAX_BCP47_SUBTAG_LEN 9 /* including null terminator */ |
| 555 #define MAX_LDML_KEY_LEN 22 |
| 556 #define MAX_LDML_TYPE_LEN 32 |
| 557 |
| 558 static int32_t |
| 559 _ldmlKeyToBCP47(const char* key, int32_t keyLen, |
| 560 char* bcpKey, int32_t bcpKeyCapacity, |
| 561 UErrorCode *status) { |
| 562 UResourceBundle *rb; |
| 563 char keyBuf[MAX_LDML_KEY_LEN]; |
| 564 char bcpKeyBuf[MAX_BCP47_SUBTAG_LEN]; |
| 565 int32_t resultLen = 0; |
| 566 int32_t i; |
| 567 UErrorCode tmpStatus = U_ZERO_ERROR; |
| 568 const UChar *uBcpKey; |
| 569 int32_t bcpKeyLen; |
| 570 |
| 571 if (keyLen < 0) { |
| 572 keyLen = (int32_t)uprv_strlen(key); |
| 573 } |
| 574 |
| 575 if (keyLen >= sizeof(keyBuf)) { |
| 576 /* no known valid LDML key exceeding 21 */ |
| 577 *status = U_ILLEGAL_ARGUMENT_ERROR; |
| 578 return 0; |
| 579 } |
| 580 |
| 581 uprv_memcpy(keyBuf, key, keyLen); |
| 582 keyBuf[keyLen] = 0; |
| 583 |
| 584 /* to lower case */ |
| 585 for (i = 0; i < keyLen; i++) { |
| 586 keyBuf[i] = uprv_tolower(keyBuf[i]); |
| 587 } |
| 588 |
| 589 rb = ures_openDirect(NULL, KEYTYPEDATA, status); |
| 590 ures_getByKey(rb, KEYMAP, rb, status); |
| 591 |
| 592 if (U_FAILURE(*status)) { |
| 593 ures_close(rb); |
| 594 return 0; |
| 595 } |
| 596 |
| 597 uBcpKey = ures_getStringByKey(rb, keyBuf, &bcpKeyLen, &tmpStatus); |
| 598 if (U_SUCCESS(tmpStatus)) { |
| 599 u_UCharsToChars(uBcpKey, bcpKeyBuf, bcpKeyLen); |
| 600 bcpKeyBuf[bcpKeyLen] = 0; |
| 601 resultLen = bcpKeyLen; |
| 602 } else { |
| 603 if (_isLDMLKey(key, keyLen)) { |
| 604 uprv_memcpy(bcpKeyBuf, key, keyLen); |
| 605 bcpKeyBuf[keyLen] = 0; |
| 606 resultLen = keyLen; |
| 607 } else { |
| 608 /* mapping not availabe */ |
| 609 *status = U_ILLEGAL_ARGUMENT_ERROR; |
| 610 } |
| 611 } |
| 612 ures_close(rb); |
| 613 |
| 614 if (U_FAILURE(*status)) { |
| 615 return 0; |
| 616 } |
| 617 |
| 618 uprv_memcpy(bcpKey, bcpKeyBuf, uprv_min(resultLen, bcpKeyCapacity)); |
| 619 return u_terminateChars(bcpKey, bcpKeyCapacity, resultLen, status); |
| 620 } |
| 621 |
| 622 static int32_t |
| 623 _bcp47ToLDMLKey(const char* bcpKey, int32_t bcpKeyLen, |
| 624 char* key, int32_t keyCapacity, |
| 625 UErrorCode *status) { |
| 626 UResourceBundle *rb; |
| 627 char bcpKeyBuf[MAX_BCP47_SUBTAG_LEN]; |
| 628 int32_t resultLen = 0; |
| 629 int32_t i; |
| 630 const char *resKey = NULL; |
| 631 UResourceBundle *mapData; |
| 632 |
| 633 if (bcpKeyLen < 0) { |
| 634 bcpKeyLen = (int32_t)uprv_strlen(bcpKey); |
| 635 } |
| 636 |
| 637 if (bcpKeyLen >= sizeof(bcpKeyBuf)) { |
| 638 *status = U_ILLEGAL_ARGUMENT_ERROR; |
| 639 return 0; |
| 640 } |
| 641 |
| 642 uprv_memcpy(bcpKeyBuf, bcpKey, bcpKeyLen); |
| 643 bcpKeyBuf[bcpKeyLen] = 0; |
| 644 |
| 645 /* to lower case */ |
| 646 for (i = 0; i < bcpKeyLen; i++) { |
| 647 bcpKeyBuf[i] = uprv_tolower(bcpKeyBuf[i]); |
| 648 } |
| 649 |
| 650 rb = ures_openDirect(NULL, KEYTYPEDATA, status); |
| 651 ures_getByKey(rb, KEYMAP, rb, status); |
| 652 if (U_FAILURE(*status)) { |
| 653 ures_close(rb); |
| 654 return 0; |
| 655 } |
| 656 |
| 657 mapData = ures_getNextResource(rb, NULL, status); |
| 658 while (U_SUCCESS(*status)) { |
| 659 const UChar *uBcpKey; |
| 660 char tmpBcpKeyBuf[MAX_BCP47_SUBTAG_LEN]; |
| 661 int32_t tmpBcpKeyLen; |
| 662 |
| 663 uBcpKey = ures_getString(mapData, &tmpBcpKeyLen, status); |
| 664 if (U_FAILURE(*status)) { |
| 665 break; |
| 666 } |
| 667 u_UCharsToChars(uBcpKey, tmpBcpKeyBuf, tmpBcpKeyLen); |
| 668 tmpBcpKeyBuf[tmpBcpKeyLen] = 0; |
| 669 if (uprv_compareInvCharsAsAscii(bcpKeyBuf, tmpBcpKeyBuf) == 0) { |
| 670 /* found a matching BCP47 key */ |
| 671 resKey = ures_getKey(mapData); |
| 672 resultLen = (int32_t)uprv_strlen(resKey); |
| 673 break; |
| 674 } |
| 675 if (!ures_hasNext(rb)) { |
| 676 break; |
| 677 } |
| 678 ures_getNextResource(rb, mapData, status); |
| 679 } |
| 680 ures_close(mapData); |
| 681 ures_close(rb); |
| 682 |
| 683 if (U_FAILURE(*status)) { |
| 684 return 0; |
| 685 } |
| 686 |
| 687 if (resKey == NULL) { |
| 688 resKey = bcpKeyBuf; |
| 689 resultLen = bcpKeyLen; |
| 690 } |
| 691 |
| 692 uprv_memcpy(key, resKey, uprv_min(resultLen, keyCapacity)); |
| 693 return u_terminateChars(key, keyCapacity, resultLen, status); |
| 694 } |
| 695 |
| 696 static int32_t |
| 697 _ldmlTypeToBCP47(const char* key, int32_t keyLen, |
| 698 const char* type, int32_t typeLen, |
| 699 char* bcpType, int32_t bcpTypeCapacity, |
| 700 UErrorCode *status) { |
| 701 UResourceBundle *rb, *keyTypeData, *typeMapForKey; |
| 702 char keyBuf[MAX_LDML_KEY_LEN]; |
| 703 char typeBuf[MAX_LDML_TYPE_LEN]; |
| 704 char bcpTypeBuf[MAX_BCP47_SUBTAG_LEN]; |
| 705 int32_t resultLen = 0; |
| 706 int32_t i; |
| 707 UErrorCode tmpStatus = U_ZERO_ERROR; |
| 708 const UChar *uBcpType, *uCanonicalType; |
| 709 int32_t bcpTypeLen, canonicalTypeLen; |
| 710 UBool isTimezone = FALSE; |
| 711 |
| 712 if (keyLen < 0) { |
| 713 keyLen = (int32_t)uprv_strlen(key); |
| 714 } |
| 715 if (keyLen >= sizeof(keyBuf)) { |
| 716 /* no known valid LDML key exceeding 21 */ |
| 717 *status = U_ILLEGAL_ARGUMENT_ERROR; |
| 718 return 0; |
| 719 } |
| 720 uprv_memcpy(keyBuf, key, keyLen); |
| 721 keyBuf[keyLen] = 0; |
| 722 |
| 723 /* to lower case */ |
| 724 for (i = 0; i < keyLen; i++) { |
| 725 keyBuf[i] = uprv_tolower(keyBuf[i]); |
| 726 } |
| 727 if (uprv_compareInvCharsAsAscii(keyBuf, "timezone") == 0) { |
| 728 isTimezone = TRUE; |
| 729 } |
| 730 |
| 731 if (typeLen < 0) { |
| 732 typeLen = (int32_t)uprv_strlen(type); |
| 733 } |
| 734 if (typeLen >= sizeof(typeBuf)) { |
| 735 *status = U_ILLEGAL_ARGUMENT_ERROR; |
| 736 return 0; |
| 737 } |
| 738 |
| 739 if (isTimezone) { |
| 740 /* replace '/' with ':' */ |
| 741 for (i = 0; i < typeLen; i++) { |
| 742 if (*(type + i) == '/') { |
| 743 typeBuf[i] = ':'; |
| 744 } else { |
| 745 typeBuf[i] = *(type + i); |
| 746 } |
| 747 } |
| 748 typeBuf[typeLen] = 0; |
| 749 type = &typeBuf[0]; |
| 750 } |
| 751 |
| 752 keyTypeData = ures_openDirect(NULL, KEYTYPEDATA, status); |
| 753 rb = ures_getByKey(keyTypeData, TYPEMAP, NULL, status); |
| 754 if (U_FAILURE(*status)) { |
| 755 ures_close(rb); |
| 756 ures_close(keyTypeData); |
| 757 return 0; |
| 758 } |
| 759 |
| 760 typeMapForKey = ures_getByKey(rb, keyBuf, NULL, &tmpStatus); |
| 761 uBcpType = ures_getStringByKey(typeMapForKey, type, &bcpTypeLen, &tmpStatus)
; |
| 762 if (U_SUCCESS(tmpStatus)) { |
| 763 u_UCharsToChars(uBcpType, bcpTypeBuf, bcpTypeLen); |
| 764 resultLen = bcpTypeLen; |
| 765 } else if (tmpStatus == U_MISSING_RESOURCE_ERROR) { |
| 766 /* is this type alias? */ |
| 767 tmpStatus = U_ZERO_ERROR; |
| 768 ures_getByKey(keyTypeData, TYPEALIAS, rb, &tmpStatus); |
| 769 ures_getByKey(rb, keyBuf, rb, &tmpStatus); |
| 770 uCanonicalType = ures_getStringByKey(rb, type, &canonicalTypeLen, &tmpSt
atus); |
| 771 if (U_SUCCESS(tmpStatus)) { |
| 772 u_UCharsToChars(uCanonicalType, typeBuf, canonicalTypeLen); |
| 773 if (isTimezone) { |
| 774 /* replace '/' with ':' */ |
| 775 for (i = 0; i < canonicalTypeLen; i++) { |
| 776 if (typeBuf[i] == '/') { |
| 777 typeBuf[i] = ':'; |
| 778 } |
| 779 } |
| 780 } |
| 781 typeBuf[canonicalTypeLen] = 0; |
| 782 |
| 783 /* look up the canonical type */ |
| 784 uBcpType = ures_getStringByKey(typeMapForKey, typeBuf, &bcpTypeLen,
&tmpStatus); |
| 785 if (U_SUCCESS(tmpStatus)) { |
| 786 u_UCharsToChars(uBcpType, bcpTypeBuf, bcpTypeLen); |
| 787 resultLen = bcpTypeLen; |
| 788 } |
| 789 } |
| 790 if (tmpStatus == U_MISSING_RESOURCE_ERROR) { |
| 791 if (_isLDMLType(type, typeLen)) { |
| 792 uprv_memcpy(bcpTypeBuf, type, typeLen); |
| 793 resultLen = typeLen; |
| 794 } else { |
| 795 /* mapping not availabe */ |
| 796 *status = U_ILLEGAL_ARGUMENT_ERROR; |
| 797 } |
| 798 } |
| 799 } else { |
| 800 *status = tmpStatus; |
| 801 } |
| 802 ures_close(rb); |
| 803 ures_close(typeMapForKey); |
| 804 ures_close(keyTypeData); |
| 805 |
| 806 if (U_FAILURE(*status)) { |
| 807 return 0; |
| 808 } |
| 809 |
| 810 uprv_memcpy(bcpType, bcpTypeBuf, uprv_min(resultLen, bcpTypeCapacity)); |
| 811 return u_terminateChars(bcpType, bcpTypeCapacity, resultLen, status); |
| 812 } |
| 813 |
| 814 static int32_t |
| 815 _bcp47ToLDMLType(const char* key, int32_t keyLen, |
| 816 const char* bcpType, int32_t bcpTypeLen, |
| 817 char* type, int32_t typeCapacity, |
| 818 UErrorCode *status) { |
| 819 UResourceBundle *rb; |
| 820 char keyBuf[MAX_LDML_KEY_LEN]; |
| 821 char bcpTypeBuf[MAX_BCP47_SUBTAG_LEN]; |
| 822 int32_t resultLen = 0; |
| 823 int32_t i; |
| 824 const char *resType = NULL; |
| 825 UResourceBundle *mapData; |
| 826 UErrorCode tmpStatus = U_ZERO_ERROR; |
| 827 int32_t copyLen; |
| 828 |
| 829 if (keyLen < 0) { |
| 830 keyLen = (int32_t)uprv_strlen(key); |
| 831 } |
| 832 |
| 833 if (keyLen >= sizeof(keyBuf)) { |
| 834 /* no known valid LDML key exceeding 21 */ |
| 835 *status = U_ILLEGAL_ARGUMENT_ERROR; |
| 836 return 0; |
| 837 } |
| 838 uprv_memcpy(keyBuf, key, keyLen); |
| 839 keyBuf[keyLen] = 0; |
| 840 |
| 841 /* to lower case */ |
| 842 for (i = 0; i < keyLen; i++) { |
| 843 keyBuf[i] = uprv_tolower(keyBuf[i]); |
| 844 } |
| 845 |
| 846 |
| 847 if (bcpTypeLen < 0) { |
| 848 bcpTypeLen = (int32_t)uprv_strlen(bcpType); |
| 849 } |
| 850 |
| 851 if (bcpTypeLen >= sizeof(bcpTypeBuf)) { |
| 852 *status = U_ILLEGAL_ARGUMENT_ERROR; |
| 853 return 0; |
| 854 } |
| 855 |
| 856 uprv_memcpy(bcpTypeBuf, bcpType, bcpTypeLen); |
| 857 bcpTypeBuf[bcpTypeLen] = 0; |
| 858 |
| 859 /* to lower case */ |
| 860 for (i = 0; i < bcpTypeLen; i++) { |
| 861 bcpTypeBuf[i] = uprv_tolower(bcpTypeBuf[i]); |
| 862 } |
| 863 |
| 864 rb = ures_openDirect(NULL, KEYTYPEDATA, status); |
| 865 ures_getByKey(rb, TYPEMAP, rb, status); |
| 866 if (U_FAILURE(*status)) { |
| 867 ures_close(rb); |
| 868 return 0; |
| 869 } |
| 870 |
| 871 ures_getByKey(rb, keyBuf, rb, &tmpStatus); |
| 872 mapData = ures_getNextResource(rb, NULL, &tmpStatus); |
| 873 while (U_SUCCESS(tmpStatus)) { |
| 874 const UChar *uBcpType; |
| 875 char tmpBcpTypeBuf[MAX_BCP47_SUBTAG_LEN]; |
| 876 int32_t tmpBcpTypeLen; |
| 877 |
| 878 uBcpType = ures_getString(mapData, &tmpBcpTypeLen, &tmpStatus); |
| 879 if (U_FAILURE(tmpStatus)) { |
| 880 break; |
| 881 } |
| 882 u_UCharsToChars(uBcpType, tmpBcpTypeBuf, tmpBcpTypeLen); |
| 883 tmpBcpTypeBuf[tmpBcpTypeLen] = 0; |
| 884 if (uprv_compareInvCharsAsAscii(bcpTypeBuf, tmpBcpTypeBuf) == 0) { |
| 885 /* found a matching BCP47 type */ |
| 886 resType = ures_getKey(mapData); |
| 887 resultLen = (int32_t)uprv_strlen(resType); |
| 888 break; |
| 889 } |
| 890 if (!ures_hasNext(rb)) { |
| 891 break; |
| 892 } |
| 893 ures_getNextResource(rb, mapData, &tmpStatus); |
| 894 } |
| 895 ures_close(mapData); |
| 896 ures_close(rb); |
| 897 |
| 898 if (U_FAILURE(tmpStatus) && tmpStatus != U_MISSING_RESOURCE_ERROR) { |
| 899 *status = tmpStatus; |
| 900 return 0; |
| 901 } |
| 902 |
| 903 if (resType == NULL) { |
| 904 resType = bcpTypeBuf; |
| 905 resultLen = bcpTypeLen; |
| 906 } |
| 907 |
| 908 copyLen = uprv_min(resultLen, typeCapacity); |
| 909 uprv_memcpy(type, resType, copyLen); |
| 910 |
| 911 if (uprv_compareInvCharsAsAscii(keyBuf, "timezone") == 0) { |
| 912 for (i = 0; i < copyLen; i++) { |
| 913 if (*(type + i) == ':') { |
| 914 *(type + i) = '/'; |
| 915 } |
| 916 } |
| 917 } |
| 918 |
| 919 return u_terminateChars(type, typeCapacity, resultLen, status); |
| 920 } |
| 921 |
| 922 static int32_t |
| 923 _appendLanguageToLanguageTag(const char* localeID, char* appendAt, int32_t capac
ity, UBool strict, UErrorCode* status) { |
| 924 char buf[ULOC_LANG_CAPACITY]; |
| 925 UErrorCode tmpStatus = U_ZERO_ERROR; |
| 926 int32_t len, i; |
| 927 int32_t reslen = 0; |
| 928 |
| 929 if (U_FAILURE(*status)) { |
| 930 return 0; |
| 931 } |
| 932 |
| 933 len = uloc_getLanguage(localeID, buf, sizeof(buf), &tmpStatus); |
| 934 if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) { |
| 935 if (strict) { |
| 936 *status = U_ILLEGAL_ARGUMENT_ERROR; |
| 937 return 0; |
| 938 } |
| 939 len = 0; |
| 940 } |
| 941 |
| 942 /* Note: returned language code is in lower case letters */ |
| 943 |
| 944 if (len == 0) { |
| 945 if (reslen < capacity) { |
| 946 uprv_memcpy(appendAt + reslen, LANG_UND, uprv_min(LANG_UND_LEN, capa
city - reslen)); |
| 947 } |
| 948 reslen += LANG_UND_LEN; |
| 949 } else if (!_isLanguageSubtag(buf, len)) { |
| 950 /* invalid language code */ |
| 951 if (strict) { |
| 952 *status = U_ILLEGAL_ARGUMENT_ERROR; |
| 953 return 0; |
| 954 } |
| 955 if (reslen < capacity) { |
| 956 uprv_memcpy(appendAt + reslen, LANG_UND, uprv_min(LANG_UND_LEN, capa
city - reslen)); |
| 957 } |
| 958 reslen += LANG_UND_LEN; |
| 959 } else { |
| 960 /* resolve deprecated */ |
| 961 for (i = 0; DEPRECATEDLANGS[i] != NULL; i += 2) { |
| 962 if (uprv_compareInvCharsAsAscii(buf, DEPRECATEDLANGS[i]) == 0) { |
| 963 uprv_strcpy(buf, DEPRECATEDLANGS[i + 1]); |
| 964 len = (int32_t)uprv_strlen(buf); |
| 965 break; |
| 966 } |
| 967 } |
| 968 if (reslen < capacity) { |
| 969 uprv_memcpy(appendAt + reslen, buf, uprv_min(len, capacity - reslen)
); |
| 970 } |
| 971 reslen += len; |
| 972 } |
| 973 u_terminateChars(appendAt, capacity, reslen, status); |
| 974 return reslen; |
| 975 } |
| 976 |
| 977 static int32_t |
| 978 _appendScriptToLanguageTag(const char* localeID, char* appendAt, int32_t capacit
y, UBool strict, UErrorCode* status) { |
| 979 char buf[ULOC_SCRIPT_CAPACITY]; |
| 980 UErrorCode tmpStatus = U_ZERO_ERROR; |
| 981 int32_t len; |
| 982 int32_t reslen = 0; |
| 983 |
| 984 if (U_FAILURE(*status)) { |
| 985 return 0; |
| 986 } |
| 987 |
| 988 len = uloc_getScript(localeID, buf, sizeof(buf), &tmpStatus); |
| 989 if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) { |
| 990 if (strict) { |
| 991 *status = U_ILLEGAL_ARGUMENT_ERROR; |
| 992 } |
| 993 return 0; |
| 994 } |
| 995 |
| 996 if (len > 0) { |
| 997 if (!_isScriptSubtag(buf, len)) { |
| 998 /* invalid script code */ |
| 999 if (strict) { |
| 1000 *status = U_ILLEGAL_ARGUMENT_ERROR; |
| 1001 } |
| 1002 return 0; |
| 1003 } else { |
| 1004 if (reslen < capacity) { |
| 1005 *(appendAt + reslen) = SEP; |
| 1006 } |
| 1007 reslen++; |
| 1008 |
| 1009 if (reslen < capacity) { |
| 1010 uprv_memcpy(appendAt + reslen, buf, uprv_min(len, capacity - res
len)); |
| 1011 } |
| 1012 reslen += len; |
| 1013 } |
| 1014 } |
| 1015 u_terminateChars(appendAt, capacity, reslen, status); |
| 1016 return reslen; |
| 1017 } |
| 1018 |
| 1019 static int32_t |
| 1020 _appendRegionToLanguageTag(const char* localeID, char* appendAt, int32_t capacit
y, UBool strict, UErrorCode* status) { |
| 1021 char buf[ULOC_COUNTRY_CAPACITY]; |
| 1022 UErrorCode tmpStatus = U_ZERO_ERROR; |
| 1023 int32_t len; |
| 1024 int32_t reslen = 0; |
| 1025 |
| 1026 if (U_FAILURE(*status)) { |
| 1027 return 0; |
| 1028 } |
| 1029 |
| 1030 len = uloc_getCountry(localeID, buf, sizeof(buf), &tmpStatus); |
| 1031 if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) { |
| 1032 if (strict) { |
| 1033 *status = U_ILLEGAL_ARGUMENT_ERROR; |
| 1034 } |
| 1035 return 0; |
| 1036 } |
| 1037 |
| 1038 if (len > 0) { |
| 1039 if (!_isRegionSubtag(buf, len)) { |
| 1040 /* invalid region code */ |
| 1041 if (strict) { |
| 1042 *status = U_ILLEGAL_ARGUMENT_ERROR; |
| 1043 } |
| 1044 return 0; |
| 1045 } else { |
| 1046 if (reslen < capacity) { |
| 1047 *(appendAt + reslen) = SEP; |
| 1048 } |
| 1049 reslen++; |
| 1050 |
| 1051 if (reslen < capacity) { |
| 1052 uprv_memcpy(appendAt + reslen, buf, uprv_min(len, capacity - res
len)); |
| 1053 } |
| 1054 reslen += len; |
| 1055 } |
| 1056 } |
| 1057 u_terminateChars(appendAt, capacity, reslen, status); |
| 1058 return reslen; |
| 1059 } |
| 1060 |
| 1061 static int32_t |
| 1062 _appendVariantsToLanguageTag(const char* localeID, char* appendAt, int32_t capac
ity, UBool strict, UBool *hadPosix, UErrorCode* status) { |
| 1063 char buf[ULOC_FULLNAME_CAPACITY]; |
| 1064 UErrorCode tmpStatus = U_ZERO_ERROR; |
| 1065 int32_t len, i; |
| 1066 int32_t reslen = 0; |
| 1067 |
| 1068 if (U_FAILURE(*status)) { |
| 1069 return 0; |
| 1070 } |
| 1071 |
| 1072 len = uloc_getVariant(localeID, buf, sizeof(buf), &tmpStatus); |
| 1073 if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) { |
| 1074 if (strict) { |
| 1075 *status = U_ILLEGAL_ARGUMENT_ERROR; |
| 1076 } |
| 1077 return 0; |
| 1078 } |
| 1079 |
| 1080 if (len > 0) { |
| 1081 char *p, *pVar; |
| 1082 UBool bNext = TRUE; |
| 1083 VariantListEntry *var; |
| 1084 VariantListEntry *varFirst = NULL; |
| 1085 |
| 1086 pVar = NULL; |
| 1087 p = buf; |
| 1088 while (bNext) { |
| 1089 if (*p == SEP || *p == LOCALE_SEP || *p == 0) { |
| 1090 if (*p == 0) { |
| 1091 bNext = FALSE; |
| 1092 } else { |
| 1093 *p = 0; /* terminate */ |
| 1094 } |
| 1095 if (pVar == NULL) { |
| 1096 if (strict) { |
| 1097 *status = U_ILLEGAL_ARGUMENT_ERROR; |
| 1098 break; |
| 1099 } |
| 1100 /* ignore empty variant */ |
| 1101 } else { |
| 1102 /* ICU uses upper case letters for variants, but |
| 1103 the canonical format is lowercase in BCP47 */ |
| 1104 for (i = 0; *(pVar + i) != 0; i++) { |
| 1105 *(pVar + i) = uprv_tolower(*(pVar + i)); |
| 1106 } |
| 1107 |
| 1108 /* validate */ |
| 1109 if (_isVariantSubtag(pVar, -1)) { |
| 1110 if (uprv_strcmp(pVar,POSIX_VALUE)) { |
| 1111 /* emit the variant to the list */ |
| 1112 var = uprv_malloc(sizeof(VariantListEntry)); |
| 1113 if (var == NULL) { |
| 1114 *status = U_MEMORY_ALLOCATION_ERROR; |
| 1115 break; |
| 1116 } |
| 1117 var->variant = pVar; |
| 1118 if (!_addVariantToList(&varFirst, var)) { |
| 1119 /* duplicated variant */ |
| 1120 uprv_free(var); |
| 1121 if (strict) { |
| 1122 *status = U_ILLEGAL_ARGUMENT_ERROR; |
| 1123 break; |
| 1124 } |
| 1125 } |
| 1126 } else { |
| 1127 /* Special handling for POSIX variant, need to remem
ber that we had it and then */ |
| 1128 /* treat it like an extension later. */ |
| 1129 *hadPosix = TRUE; |
| 1130 } |
| 1131 } else if (strict) { |
| 1132 *status = U_ILLEGAL_ARGUMENT_ERROR; |
| 1133 break; |
| 1134 } |
| 1135 } |
| 1136 /* reset variant starting position */ |
| 1137 pVar = NULL; |
| 1138 } else if (pVar == NULL) { |
| 1139 pVar = p; |
| 1140 } |
| 1141 p++; |
| 1142 } |
| 1143 |
| 1144 if (U_SUCCESS(*status)) { |
| 1145 if (varFirst != NULL) { |
| 1146 int32_t varLen; |
| 1147 |
| 1148 /* write out sorted/validated/normalized variants to the target
*/ |
| 1149 var = varFirst; |
| 1150 while (var != NULL) { |
| 1151 if (reslen < capacity) { |
| 1152 *(appendAt + reslen) = SEP; |
| 1153 } |
| 1154 reslen++; |
| 1155 varLen = (int32_t)uprv_strlen(var->variant); |
| 1156 if (reslen < capacity) { |
| 1157 uprv_memcpy(appendAt + reslen, var->variant, uprv_min(va
rLen, capacity - reslen)); |
| 1158 } |
| 1159 reslen += varLen; |
| 1160 var = var->next; |
| 1161 } |
| 1162 } |
| 1163 } |
| 1164 |
| 1165 /* clean up */ |
| 1166 var = varFirst; |
| 1167 while (var != NULL) { |
| 1168 VariantListEntry *tmpVar = var->next; |
| 1169 uprv_free(var); |
| 1170 var = tmpVar; |
| 1171 } |
| 1172 |
| 1173 if (U_FAILURE(*status)) { |
| 1174 return 0; |
| 1175 } |
| 1176 } |
| 1177 |
| 1178 u_terminateChars(appendAt, capacity, reslen, status); |
| 1179 return reslen; |
| 1180 } |
| 1181 |
| 1182 static int32_t |
| 1183 _appendKeywordsToLanguageTag(const char* localeID, char* appendAt, int32_t capac
ity, UBool strict, UBool hadPosix, UErrorCode* status) { |
| 1184 char buf[ULOC_KEYWORD_AND_VALUES_CAPACITY]; |
| 1185 UEnumeration *keywordEnum = NULL; |
| 1186 int32_t reslen = 0; |
| 1187 |
| 1188 keywordEnum = uloc_openKeywords(localeID, status); |
| 1189 if (U_FAILURE(*status) && !hadPosix) { |
| 1190 uenum_close(keywordEnum); |
| 1191 return 0; |
| 1192 } |
| 1193 if (keywordEnum != NULL || hadPosix) { |
| 1194 /* reorder extensions */ |
| 1195 int32_t len; |
| 1196 const char *key; |
| 1197 ExtensionListEntry *firstExt = NULL; |
| 1198 ExtensionListEntry *ext; |
| 1199 char extBuf[ULOC_KEYWORD_AND_VALUES_CAPACITY]; |
| 1200 char *pExtBuf = extBuf; |
| 1201 int32_t extBufCapacity = sizeof(extBuf); |
| 1202 const char *bcpKey, *bcpValue; |
| 1203 UErrorCode tmpStatus = U_ZERO_ERROR; |
| 1204 int32_t keylen; |
| 1205 UBool isLDMLKeyword; |
| 1206 |
| 1207 while (TRUE) { |
| 1208 key = uenum_next(keywordEnum, NULL, status); |
| 1209 if (key == NULL) { |
| 1210 break; |
| 1211 } |
| 1212 len = uloc_getKeywordValue(localeID, key, buf, sizeof(buf), &tmpStat
us); |
| 1213 if (U_FAILURE(tmpStatus)) { |
| 1214 if (strict) { |
| 1215 *status = U_ILLEGAL_ARGUMENT_ERROR; |
| 1216 break; |
| 1217 } |
| 1218 /* ignore this keyword */ |
| 1219 tmpStatus = U_ZERO_ERROR; |
| 1220 continue; |
| 1221 } |
| 1222 |
| 1223 keylen = (int32_t)uprv_strlen(key); |
| 1224 isLDMLKeyword = (keylen > 1); |
| 1225 |
| 1226 if (isLDMLKeyword) { |
| 1227 int32_t modKeyLen; |
| 1228 |
| 1229 /* transform key and value to bcp47 style */ |
| 1230 modKeyLen = _ldmlKeyToBCP47(key, keylen, pExtBuf, extBufCapacity
, &tmpStatus); |
| 1231 if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED
_WARNING) { |
| 1232 if (strict) { |
| 1233 *status = U_ILLEGAL_ARGUMENT_ERROR; |
| 1234 break; |
| 1235 } |
| 1236 tmpStatus = U_ZERO_ERROR; |
| 1237 continue; |
| 1238 } |
| 1239 |
| 1240 bcpKey = pExtBuf; |
| 1241 pExtBuf += (modKeyLen + 1); |
| 1242 extBufCapacity -= (modKeyLen + 1); |
| 1243 |
| 1244 len = _ldmlTypeToBCP47(key, keylen, buf, len, pExtBuf, extBufCap
acity, &tmpStatus); |
| 1245 if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED
_WARNING) { |
| 1246 if (strict) { |
| 1247 *status = U_ILLEGAL_ARGUMENT_ERROR; |
| 1248 break; |
| 1249 } |
| 1250 tmpStatus = U_ZERO_ERROR; |
| 1251 continue; |
| 1252 } |
| 1253 bcpValue = pExtBuf; |
| 1254 pExtBuf += (len + 1); |
| 1255 extBufCapacity -= (len + 1); |
| 1256 } else { |
| 1257 if (*key == PRIVATEUSE) { |
| 1258 if (!_isPrivateuseValueSubtags(buf, len)) { |
| 1259 if (strict) { |
| 1260 *status = U_ILLEGAL_ARGUMENT_ERROR; |
| 1261 break; |
| 1262 } |
| 1263 continue; |
| 1264 } |
| 1265 } else { |
| 1266 if (!_isExtensionSingleton(key, keylen) || !_isExtensionSubt
ags(buf, len)) { |
| 1267 if (strict) { |
| 1268 *status = U_ILLEGAL_ARGUMENT_ERROR; |
| 1269 break; |
| 1270 } |
| 1271 continue; |
| 1272 } |
| 1273 } |
| 1274 bcpKey = key; |
| 1275 if ((len + 1) < extBufCapacity) { |
| 1276 uprv_memcpy(pExtBuf, buf, len); |
| 1277 bcpValue = pExtBuf; |
| 1278 |
| 1279 pExtBuf += len; |
| 1280 |
| 1281 *pExtBuf = 0; |
| 1282 pExtBuf++; |
| 1283 |
| 1284 extBufCapacity -= (len + 1); |
| 1285 } else { |
| 1286 *status = U_ILLEGAL_ARGUMENT_ERROR; |
| 1287 break; |
| 1288 } |
| 1289 } |
| 1290 |
| 1291 /* create ExtensionListEntry */ |
| 1292 ext = uprv_malloc(sizeof(ExtensionListEntry)); |
| 1293 if (ext == NULL) { |
| 1294 *status = U_MEMORY_ALLOCATION_ERROR; |
| 1295 break; |
| 1296 } |
| 1297 ext->key = bcpKey; |
| 1298 ext->value = bcpValue; |
| 1299 |
| 1300 if (!_addExtensionToList(&firstExt, ext, TRUE)) { |
| 1301 uprv_free(ext); |
| 1302 if (strict) { |
| 1303 *status = U_ILLEGAL_ARGUMENT_ERROR; |
| 1304 break; |
| 1305 } |
| 1306 } |
| 1307 } |
| 1308 |
| 1309 /* Special handling for POSIX variant - add the keywords for POSIX */ |
| 1310 if (hadPosix) { |
| 1311 /* create ExtensionListEntry for POSIX */ |
| 1312 ext = uprv_malloc(sizeof(ExtensionListEntry)); |
| 1313 if (ext == NULL) { |
| 1314 *status = U_MEMORY_ALLOCATION_ERROR; |
| 1315 } |
| 1316 ext->key = POSIX_KEY; |
| 1317 ext->value = POSIX_VALUE; |
| 1318 |
| 1319 if (!_addExtensionToList(&firstExt, ext, TRUE)) { |
| 1320 uprv_free(ext); |
| 1321 } |
| 1322 } |
| 1323 |
| 1324 if (U_SUCCESS(*status) && (firstExt != NULL)) { |
| 1325 UBool startLDMLExtension = FALSE; |
| 1326 |
| 1327 /* write out the sorted BCP47 extensions and private use */ |
| 1328 ext = firstExt; |
| 1329 while (ext != NULL) { |
| 1330 if ((int32_t)uprv_strlen(ext->key) > 1 && !startLDMLExtension) { |
| 1331 /* write LDML singleton extension */ |
| 1332 if (reslen < capacity) { |
| 1333 *(appendAt + reslen) = SEP; |
| 1334 } |
| 1335 reslen++; |
| 1336 if (reslen < capacity) { |
| 1337 *(appendAt + reslen) = LDMLEXT; |
| 1338 } |
| 1339 reslen++; |
| 1340 startLDMLExtension = TRUE; |
| 1341 } |
| 1342 |
| 1343 if (reslen < capacity) { |
| 1344 *(appendAt + reslen) = SEP; |
| 1345 } |
| 1346 reslen++; |
| 1347 len = (int32_t)uprv_strlen(ext->key); |
| 1348 if (reslen < capacity) { |
| 1349 uprv_memcpy(appendAt + reslen, ext->key, uprv_min(len, capac
ity - reslen)); |
| 1350 } |
| 1351 reslen += len; |
| 1352 if (reslen < capacity) { |
| 1353 *(appendAt + reslen) = SEP; |
| 1354 } |
| 1355 reslen++; |
| 1356 len = (int32_t)uprv_strlen(ext->value); |
| 1357 if (reslen < capacity) { |
| 1358 uprv_memcpy(appendAt + reslen, ext->value, uprv_min(len, cap
acity - reslen)); |
| 1359 } |
| 1360 reslen += len; |
| 1361 |
| 1362 ext = ext->next; |
| 1363 } |
| 1364 } |
| 1365 /* clean up */ |
| 1366 ext = firstExt; |
| 1367 while (ext != NULL) { |
| 1368 ExtensionListEntry *tmpExt = ext->next; |
| 1369 uprv_free(ext); |
| 1370 ext = tmpExt; |
| 1371 } |
| 1372 |
| 1373 uenum_close(keywordEnum); |
| 1374 |
| 1375 if (U_FAILURE(*status)) { |
| 1376 return 0; |
| 1377 } |
| 1378 } |
| 1379 |
| 1380 return u_terminateChars(appendAt, capacity, reslen, status); |
| 1381 } |
| 1382 |
| 1383 /** |
| 1384 * Append keywords parsed from LDML extension value |
| 1385 * e.g. "u-ca-gregory-co-trad" -> {calendar = gregorian} {collation = traditiona
l} |
| 1386 * Note: char* buf is used for storing keywords |
| 1387 */ |
| 1388 static void |
| 1389 _appendLDMLExtensionAsKeywords(const char* ldmlext, ExtensionListEntry** appendT
o, char* buf, int32_t bufSize, UBool *posixVariant, UErrorCode *status) { |
| 1390 const char *p, *pNext, *pSep; |
| 1391 const char *pBcpKey, *pBcpType; |
| 1392 const char *pKey, *pType; |
| 1393 int32_t bcpKeyLen = 0, bcpTypeLen; |
| 1394 ExtensionListEntry *kwd, *nextKwd; |
| 1395 ExtensionListEntry *kwdFirst = NULL; |
| 1396 int32_t bufIdx = 0; |
| 1397 int32_t len; |
| 1398 |
| 1399 pNext = ldmlext; |
| 1400 pBcpKey = pBcpType = NULL; |
| 1401 while (pNext) { |
| 1402 p = pSep = pNext; |
| 1403 |
| 1404 /* locate next separator char */ |
| 1405 while (*pSep) { |
| 1406 if (*pSep == SEP) { |
| 1407 break; |
| 1408 } |
| 1409 pSep++; |
| 1410 } |
| 1411 if (*pSep == 0) { |
| 1412 /* last subtag */ |
| 1413 pNext = NULL; |
| 1414 } else { |
| 1415 pNext = pSep + 1; |
| 1416 } |
| 1417 |
| 1418 if (pBcpKey == NULL) { |
| 1419 pBcpKey = p; |
| 1420 bcpKeyLen = (int32_t)(pSep - p); |
| 1421 } else { |
| 1422 pBcpType = p; |
| 1423 bcpTypeLen = (int32_t)(pSep - p); |
| 1424 |
| 1425 /* BCP key to locale key */ |
| 1426 len = _bcp47ToLDMLKey(pBcpKey, bcpKeyLen, buf + bufIdx, bufSize - bu
fIdx - 1, status); |
| 1427 if (U_FAILURE(*status)) { |
| 1428 goto cleanup; |
| 1429 } |
| 1430 pKey = buf + bufIdx; |
| 1431 bufIdx += len; |
| 1432 *(buf + bufIdx) = 0; |
| 1433 bufIdx++; |
| 1434 |
| 1435 /* BCP type to locale type */ |
| 1436 len = _bcp47ToLDMLType(pKey, -1, pBcpType, bcpTypeLen, buf + bufIdx,
bufSize - bufIdx - 1, status); |
| 1437 if (U_FAILURE(*status)) { |
| 1438 goto cleanup; |
| 1439 } |
| 1440 pType = buf + bufIdx; |
| 1441 bufIdx += len; |
| 1442 *(buf + bufIdx) = 0; |
| 1443 bufIdx++; |
| 1444 |
| 1445 /* Special handling for u-va-posix, since we want to treat this as a
variant, not */ |
| 1446 /* as a keyword.
*/ |
| 1447 |
| 1448 if ( !uprv_strcmp(pKey,POSIX_KEY) && !uprv_strcmp(pType,POSIX_VALUE)
) { |
| 1449 *posixVariant = TRUE; |
| 1450 } else { |
| 1451 /* create an ExtensionListEntry for this keyword */ |
| 1452 kwd = uprv_malloc(sizeof(ExtensionListEntry)); |
| 1453 if (kwd == NULL) { |
| 1454 *status = U_MEMORY_ALLOCATION_ERROR; |
| 1455 goto cleanup; |
| 1456 } |
| 1457 |
| 1458 kwd->key = pKey; |
| 1459 kwd->value = pType; |
| 1460 |
| 1461 if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) { |
| 1462 *status = U_ILLEGAL_ARGUMENT_ERROR; |
| 1463 uprv_free(kwd); |
| 1464 goto cleanup; |
| 1465 } |
| 1466 } |
| 1467 |
| 1468 /* for next pair */ |
| 1469 pBcpKey = NULL; |
| 1470 pBcpType = NULL; |
| 1471 } |
| 1472 } |
| 1473 |
| 1474 if (pBcpKey != NULL) { |
| 1475 *status = U_ILLEGAL_ARGUMENT_ERROR; |
| 1476 goto cleanup; |
| 1477 } |
| 1478 |
| 1479 kwd = kwdFirst; |
| 1480 while (kwd != NULL) { |
| 1481 nextKwd = kwd->next; |
| 1482 _addExtensionToList(appendTo, kwd, FALSE); |
| 1483 kwd = nextKwd; |
| 1484 } |
| 1485 |
| 1486 return; |
| 1487 |
| 1488 cleanup: |
| 1489 kwd = kwdFirst; |
| 1490 while (kwd != NULL) { |
| 1491 nextKwd = kwd->next; |
| 1492 uprv_free(kwd); |
| 1493 kwd = nextKwd; |
| 1494 } |
| 1495 } |
| 1496 |
| 1497 |
| 1498 static int32_t |
| 1499 _appendKeywords(ULanguageTag* langtag, char* appendAt, int32_t capacity, UErrorC
ode* status) { |
| 1500 int32_t reslen = 0; |
| 1501 int32_t i, n; |
| 1502 int32_t len; |
| 1503 ExtensionListEntry *kwdFirst = NULL; |
| 1504 ExtensionListEntry *kwd; |
| 1505 const char *key, *type; |
| 1506 char kwdBuf[ULOC_KEYWORDS_CAPACITY]; |
| 1507 UBool posixVariant = FALSE; |
| 1508 |
| 1509 if (U_FAILURE(*status)) { |
| 1510 return 0; |
| 1511 } |
| 1512 |
| 1513 n = ultag_getExtensionsSize(langtag); |
| 1514 |
| 1515 /* resolve locale keywords and reordering keys */ |
| 1516 for (i = 0; i < n; i++) { |
| 1517 key = ultag_getExtensionKey(langtag, i); |
| 1518 type = ultag_getExtensionValue(langtag, i); |
| 1519 if (*key == LDMLEXT) { |
| 1520 _appendLDMLExtensionAsKeywords(type, &kwdFirst, kwdBuf, sizeof(kwdBu
f), &posixVariant, status); |
| 1521 if (U_FAILURE(*status)) { |
| 1522 break; |
| 1523 } |
| 1524 } else { |
| 1525 kwd = uprv_malloc(sizeof(ExtensionListEntry)); |
| 1526 if (kwd == NULL) { |
| 1527 *status = U_MEMORY_ALLOCATION_ERROR; |
| 1528 break; |
| 1529 } |
| 1530 kwd->key = key; |
| 1531 kwd->value = type; |
| 1532 if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) { |
| 1533 uprv_free(kwd); |
| 1534 *status = U_ILLEGAL_ARGUMENT_ERROR; |
| 1535 break; |
| 1536 } |
| 1537 } |
| 1538 } |
| 1539 |
| 1540 if (U_SUCCESS(*status)) { |
| 1541 type = ultag_getPrivateUse(langtag); |
| 1542 if ((int32_t)uprv_strlen(type) > 0) { |
| 1543 /* add private use as a keyword */ |
| 1544 kwd = uprv_malloc(sizeof(ExtensionListEntry)); |
| 1545 if (kwd == NULL) { |
| 1546 *status = U_MEMORY_ALLOCATION_ERROR; |
| 1547 } else { |
| 1548 kwd->key = PRIVATEUSE_KEY; |
| 1549 kwd->value = type; |
| 1550 if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) { |
| 1551 uprv_free(kwd); |
| 1552 *status = U_ILLEGAL_ARGUMENT_ERROR; |
| 1553 } |
| 1554 } |
| 1555 } |
| 1556 } |
| 1557 |
| 1558 /* If a POSIX variant was in the extensions, write it out before writing the
keywords. */ |
| 1559 |
| 1560 if (U_SUCCESS(*status) && posixVariant) { |
| 1561 len = (int32_t) uprv_strlen(_POSIX); |
| 1562 if (reslen < capacity) { |
| 1563 uprv_memcpy(appendAt + reslen, _POSIX, uprv_min(len, capacity - resl
en)); |
| 1564 } |
| 1565 reslen += len; |
| 1566 } |
| 1567 |
| 1568 if (U_SUCCESS(*status) && kwdFirst != NULL) { |
| 1569 /* write out the sorted keywords */ |
| 1570 kwd = kwdFirst; |
| 1571 while (kwd != NULL) { |
| 1572 if (reslen < capacity) { |
| 1573 if (kwd == kwdFirst) { |
| 1574 /* '@' */ |
| 1575 *(appendAt + reslen) = LOCALE_EXT_SEP; |
| 1576 } else { |
| 1577 /* ';' */ |
| 1578 *(appendAt + reslen) = LOCALE_KEYWORD_SEP; |
| 1579 } |
| 1580 } |
| 1581 reslen++; |
| 1582 |
| 1583 /* key */ |
| 1584 len = (int32_t)uprv_strlen(kwd->key); |
| 1585 if (reslen < capacity) { |
| 1586 uprv_memcpy(appendAt + reslen, kwd->key, uprv_min(len, capacity
- reslen)); |
| 1587 } |
| 1588 reslen += len; |
| 1589 |
| 1590 /* '=' */ |
| 1591 if (reslen < capacity) { |
| 1592 *(appendAt + reslen) = LOCALE_KEY_TYPE_SEP; |
| 1593 } |
| 1594 reslen++; |
| 1595 |
| 1596 /* type */ |
| 1597 len = (int32_t)uprv_strlen(kwd->value); |
| 1598 if (reslen < capacity) { |
| 1599 uprv_memcpy(appendAt + reslen, kwd->value, uprv_min(len, capacit
y - reslen)); |
| 1600 } |
| 1601 reslen += len; |
| 1602 |
| 1603 kwd = kwd->next; |
| 1604 } |
| 1605 } |
| 1606 |
| 1607 /* clean up */ |
| 1608 kwd = kwdFirst; |
| 1609 while (kwd != NULL) { |
| 1610 ExtensionListEntry *tmpKwd = kwd->next; |
| 1611 uprv_free(kwd); |
| 1612 kwd = tmpKwd; |
| 1613 } |
| 1614 |
| 1615 if (U_FAILURE(*status)) { |
| 1616 return 0; |
| 1617 } |
| 1618 |
| 1619 return u_terminateChars(appendAt, capacity, reslen, status); |
| 1620 } |
| 1621 |
| 1622 /* |
| 1623 * ------------------------------------------------- |
| 1624 * |
| 1625 * ultag_ functions |
| 1626 * |
| 1627 * ------------------------------------------------- |
| 1628 */ |
| 1629 |
| 1630 /* Bit flags used by the parser */ |
| 1631 #define LANG 0x0001 |
| 1632 #define EXTL 0x0002 |
| 1633 #define SCRT 0x0004 |
| 1634 #define REGN 0x0008 |
| 1635 #define VART 0x0010 |
| 1636 #define EXTS 0x0020 |
| 1637 #define EXTV 0x0040 |
| 1638 #define PRIV 0x0080 |
| 1639 |
| 1640 static ULanguageTag* |
| 1641 ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* sta
tus) { |
| 1642 ULanguageTag *t; |
| 1643 char *tagBuf; |
| 1644 int16_t next; |
| 1645 char *pSubtag, *pNext, *pLastGoodPosition; |
| 1646 int32_t subtagLen; |
| 1647 int32_t extlangIdx; |
| 1648 ExtensionListEntry *pExtension; |
| 1649 char *pExtValueSubtag, *pExtValueSubtagEnd; |
| 1650 int32_t i; |
| 1651 UBool isLDMLExtension, reqLDMLType; |
| 1652 |
| 1653 if (parsedLen != NULL) { |
| 1654 *parsedLen = 0; |
| 1655 } |
| 1656 |
| 1657 if (U_FAILURE(*status)) { |
| 1658 return NULL; |
| 1659 } |
| 1660 |
| 1661 if (tagLen < 0) { |
| 1662 tagLen = (int32_t)uprv_strlen(tag); |
| 1663 } |
| 1664 |
| 1665 /* copy the entire string */ |
| 1666 tagBuf = (char*)uprv_malloc(tagLen + 1); |
| 1667 if (tagBuf == NULL) { |
| 1668 *status = U_MEMORY_ALLOCATION_ERROR; |
| 1669 return NULL; |
| 1670 } |
| 1671 uprv_memcpy(tagBuf, tag, tagLen); |
| 1672 *(tagBuf + tagLen) = 0; |
| 1673 |
| 1674 /* create a ULanguageTag */ |
| 1675 t = (ULanguageTag*)uprv_malloc(sizeof(ULanguageTag)); |
| 1676 _initializeULanguageTag(t); |
| 1677 t->buf = tagBuf; |
| 1678 if (t == NULL) { |
| 1679 uprv_free(tagBuf); |
| 1680 *status = U_MEMORY_ALLOCATION_ERROR; |
| 1681 return NULL; |
| 1682 } |
| 1683 |
| 1684 if (tagLen < MINLEN) { |
| 1685 /* the input tag is too short - return empty ULanguageTag */ |
| 1686 return t; |
| 1687 } |
| 1688 |
| 1689 /* check if the tag is grandfathered */ |
| 1690 for (i = 0; GRANDFATHERED[i] != NULL; i += 2) { |
| 1691 if (T_CString_stricmp(GRANDFATHERED[i], tagBuf) == 0) { |
| 1692 /* a grandfathered tag is always longer than its preferred mapping *
/ |
| 1693 uprv_strcpy(t->buf, GRANDFATHERED[i + 1]); |
| 1694 t->language = t->buf; |
| 1695 if (parsedLen != NULL) { |
| 1696 *parsedLen = tagLen; |
| 1697 } |
| 1698 return t; |
| 1699 } |
| 1700 } |
| 1701 |
| 1702 /* |
| 1703 * langtag = language |
| 1704 * ["-" script] |
| 1705 * ["-" region] |
| 1706 * *("-" variant) |
| 1707 * *("-" extension) |
| 1708 * ["-" privateuse] |
| 1709 */ |
| 1710 |
| 1711 next = LANG | PRIV; |
| 1712 pNext = pLastGoodPosition = tagBuf; |
| 1713 extlangIdx = 0; |
| 1714 pExtension = NULL; |
| 1715 pExtValueSubtag = NULL; |
| 1716 pExtValueSubtagEnd = NULL; |
| 1717 isLDMLExtension = FALSE; |
| 1718 reqLDMLType = FALSE; |
| 1719 |
| 1720 while (pNext) { |
| 1721 char *pSep; |
| 1722 |
| 1723 pSubtag = pNext; |
| 1724 |
| 1725 /* locate next separator char */ |
| 1726 pSep = pSubtag; |
| 1727 while (*pSep) { |
| 1728 if (*pSep == SEP) { |
| 1729 break; |
| 1730 } |
| 1731 pSep++; |
| 1732 } |
| 1733 if (*pSep == 0) { |
| 1734 /* last subtag */ |
| 1735 pNext = NULL; |
| 1736 } else { |
| 1737 pNext = pSep + 1; |
| 1738 } |
| 1739 subtagLen = (int32_t)(pSep - pSubtag); |
| 1740 |
| 1741 if (next & LANG) { |
| 1742 if (_isLanguageSubtag(pSubtag, subtagLen)) { |
| 1743 *pSep = 0; /* terminate */ |
| 1744 t->language = T_CString_toLowerCase(pSubtag); |
| 1745 |
| 1746 pLastGoodPosition = pSep; |
| 1747 next = EXTL | SCRT | REGN | VART | EXTS | PRIV; |
| 1748 continue; |
| 1749 } |
| 1750 } |
| 1751 if (next & EXTL) { |
| 1752 if (_isExtlangSubtag(pSubtag, subtagLen)) { |
| 1753 *pSep = 0; |
| 1754 t->extlang[extlangIdx++] = T_CString_toLowerCase(pSubtag); |
| 1755 |
| 1756 pLastGoodPosition = pSep; |
| 1757 if (extlangIdx < 3) { |
| 1758 next = EXTL | SCRT | REGN | VART | EXTS | PRIV; |
| 1759 } else { |
| 1760 next = SCRT | REGN | VART | EXTS | PRIV; |
| 1761 } |
| 1762 continue; |
| 1763 } |
| 1764 } |
| 1765 if (next & SCRT) { |
| 1766 if (_isScriptSubtag(pSubtag, subtagLen)) { |
| 1767 char *p = pSubtag; |
| 1768 |
| 1769 *pSep = 0; |
| 1770 |
| 1771 /* to title case */ |
| 1772 *p = uprv_toupper(*p); |
| 1773 p++; |
| 1774 for (; *p; p++) { |
| 1775 *p = uprv_tolower(*p); |
| 1776 } |
| 1777 |
| 1778 t->script = pSubtag; |
| 1779 |
| 1780 pLastGoodPosition = pSep; |
| 1781 next = REGN | VART | EXTS | PRIV; |
| 1782 continue; |
| 1783 } |
| 1784 } |
| 1785 if (next & REGN) { |
| 1786 if (_isRegionSubtag(pSubtag, subtagLen)) { |
| 1787 *pSep = 0; |
| 1788 t->region = T_CString_toUpperCase(pSubtag); |
| 1789 |
| 1790 pLastGoodPosition = pSep; |
| 1791 next = VART | EXTS | PRIV; |
| 1792 continue; |
| 1793 } |
| 1794 } |
| 1795 if (next & VART) { |
| 1796 if (_isVariantSubtag(pSubtag, subtagLen)) { |
| 1797 VariantListEntry *var; |
| 1798 UBool isAdded; |
| 1799 |
| 1800 var = (VariantListEntry*)uprv_malloc(sizeof(VariantListEntry)); |
| 1801 if (var == NULL) { |
| 1802 *status = U_MEMORY_ALLOCATION_ERROR; |
| 1803 goto error; |
| 1804 } |
| 1805 *pSep = 0; |
| 1806 var->variant = T_CString_toUpperCase(pSubtag); |
| 1807 isAdded = _addVariantToList(&(t->variants), var); |
| 1808 if (!isAdded) { |
| 1809 /* duplicated variant entry */ |
| 1810 uprv_free(var); |
| 1811 break; |
| 1812 } |
| 1813 pLastGoodPosition = pSep; |
| 1814 next = VART | EXTS | PRIV; |
| 1815 continue; |
| 1816 } |
| 1817 } |
| 1818 if (next & EXTS) { |
| 1819 if (_isExtensionSingleton(pSubtag, subtagLen)) { |
| 1820 if (pExtension != NULL) { |
| 1821 if (pExtValueSubtag == NULL || pExtValueSubtagEnd == NULL) { |
| 1822 /* the previous extension is incomplete */ |
| 1823 uprv_free(pExtension); |
| 1824 pExtension = NULL; |
| 1825 break; |
| 1826 } |
| 1827 |
| 1828 /* terminate the previous extension value */ |
| 1829 *pExtValueSubtagEnd = 0; |
| 1830 pExtension->value = T_CString_toLowerCase(pExtValueSubtag); |
| 1831 |
| 1832 /* insert the extension to the list */ |
| 1833 if (_addExtensionToList(&(t->extensions), pExtension, FALSE)
) { |
| 1834 pLastGoodPosition = pExtValueSubtagEnd; |
| 1835 } else { |
| 1836 /* stop parsing here */ |
| 1837 uprv_free(pExtension); |
| 1838 pExtension = NULL; |
| 1839 break; |
| 1840 } |
| 1841 |
| 1842 if (isLDMLExtension && reqLDMLType) { |
| 1843 /* incomplete LDML extension key and type pair */ |
| 1844 pExtension = NULL; |
| 1845 break; |
| 1846 } |
| 1847 } |
| 1848 |
| 1849 isLDMLExtension = (uprv_tolower(*pSubtag) == LDMLEXT); |
| 1850 |
| 1851 /* create a new extension */ |
| 1852 pExtension = uprv_malloc(sizeof(ExtensionListEntry)); |
| 1853 if (pExtension == NULL) { |
| 1854 *status = U_MEMORY_ALLOCATION_ERROR; |
| 1855 goto error; |
| 1856 } |
| 1857 *pSep = 0; |
| 1858 pExtension->key = T_CString_toLowerCase(pSubtag); |
| 1859 pExtension->value = NULL; /* will be set later */ |
| 1860 |
| 1861 /* |
| 1862 * reset the start and the end location of extension value |
| 1863 * subtags for this extension |
| 1864 */ |
| 1865 pExtValueSubtag = NULL; |
| 1866 pExtValueSubtagEnd = NULL; |
| 1867 |
| 1868 next = EXTV; |
| 1869 continue; |
| 1870 } |
| 1871 } |
| 1872 if (next & EXTV) { |
| 1873 if (_isExtensionSubtag(pSubtag, subtagLen)) { |
| 1874 if (isLDMLExtension) { |
| 1875 if (reqLDMLType) { |
| 1876 /* already saw an LDML key */ |
| 1877 if (!_isLDMLType(pSubtag, subtagLen)) { |
| 1878 /* stop parsing here and let the valid LDML extensio
n key/type |
| 1879 pairs processed by the code out of this while loo
p */ |
| 1880 break; |
| 1881 } |
| 1882 pExtValueSubtagEnd = pSep; |
| 1883 reqLDMLType = FALSE; |
| 1884 next = EXTS | EXTV | PRIV; |
| 1885 } else { |
| 1886 /* LDML key */ |
| 1887 if (!_isLDMLKey(pSubtag, subtagLen)) { |
| 1888 /* stop parsing here and let the valid LDML extensio
n key/type |
| 1889 pairs processed by the code out of this while loo
p */ |
| 1890 break; |
| 1891 } |
| 1892 reqLDMLType = TRUE; |
| 1893 next = EXTV; |
| 1894 } |
| 1895 } else { |
| 1896 /* Mark the end of this subtag */ |
| 1897 pExtValueSubtagEnd = pSep; |
| 1898 next = EXTS | EXTV | PRIV; |
| 1899 } |
| 1900 |
| 1901 if (pExtValueSubtag == NULL) { |
| 1902 /* if the start postion of this extension's value is not yet
, |
| 1903 this one is the first value subtag */ |
| 1904 pExtValueSubtag = pSubtag; |
| 1905 } |
| 1906 continue; |
| 1907 } |
| 1908 } |
| 1909 if (next & PRIV) { |
| 1910 if (uprv_tolower(*pSubtag) == PRIVATEUSE) { |
| 1911 char *pPrivuseVal; |
| 1912 |
| 1913 if (pExtension != NULL) { |
| 1914 /* Process the last extension */ |
| 1915 if (pExtValueSubtag == NULL || pExtValueSubtagEnd == NULL) { |
| 1916 /* the previous extension is incomplete */ |
| 1917 uprv_free(pExtension); |
| 1918 pExtension = NULL; |
| 1919 break; |
| 1920 } else { |
| 1921 /* terminate the previous extension value */ |
| 1922 *pExtValueSubtagEnd = 0; |
| 1923 pExtension->value = T_CString_toLowerCase(pExtValueSubta
g); |
| 1924 |
| 1925 /* insert the extension to the list */ |
| 1926 if (_addExtensionToList(&(t->extensions), pExtension, FA
LSE)) { |
| 1927 pLastGoodPosition = pExtValueSubtagEnd; |
| 1928 pExtension = NULL; |
| 1929 } else { |
| 1930 /* stop parsing here */ |
| 1931 uprv_free(pExtension); |
| 1932 pExtension = NULL; |
| 1933 break; |
| 1934 } |
| 1935 } |
| 1936 } |
| 1937 |
| 1938 /* The rest of part will be private use value subtags */ |
| 1939 if (pNext == NULL) { |
| 1940 /* empty private use subtag */ |
| 1941 break; |
| 1942 } |
| 1943 /* back up the private use value start position */ |
| 1944 pPrivuseVal = pNext; |
| 1945 |
| 1946 /* validate private use value subtags */ |
| 1947 while (pNext) { |
| 1948 pSubtag = pNext; |
| 1949 pSep = pSubtag; |
| 1950 while (*pSep) { |
| 1951 if (*pSep == SEP) { |
| 1952 break; |
| 1953 } |
| 1954 pSep++; |
| 1955 } |
| 1956 if (*pSep == 0) { |
| 1957 /* last subtag */ |
| 1958 pNext = NULL; |
| 1959 } else { |
| 1960 pNext = pSep + 1; |
| 1961 } |
| 1962 subtagLen = (int32_t)(pSep - pSubtag); |
| 1963 |
| 1964 if (_isPrivateuseValueSubtag(pSubtag, subtagLen)) { |
| 1965 pLastGoodPosition = pSep; |
| 1966 } else { |
| 1967 break; |
| 1968 } |
| 1969 } |
| 1970 if (pLastGoodPosition - pPrivuseVal > 0) { |
| 1971 *pLastGoodPosition = 0; |
| 1972 t->privateuse = T_CString_toLowerCase(pPrivuseVal); |
| 1973 } |
| 1974 /* No more subtags, exiting the parse loop */ |
| 1975 break; |
| 1976 } |
| 1977 break; |
| 1978 } |
| 1979 /* If we fell through here, it means this subtag is illegal - quit parsi
ng */ |
| 1980 break; |
| 1981 } |
| 1982 |
| 1983 if (pExtension != NULL) { |
| 1984 /* Process the last extension */ |
| 1985 if (pExtValueSubtag == NULL || pExtValueSubtagEnd == NULL) { |
| 1986 /* the previous extension is incomplete */ |
| 1987 uprv_free(pExtension); |
| 1988 } else { |
| 1989 /* terminate the previous extension value */ |
| 1990 *pExtValueSubtagEnd = 0; |
| 1991 pExtension->value = T_CString_toLowerCase(pExtValueSubtag); |
| 1992 /* insert the extension to the list */ |
| 1993 if (_addExtensionToList(&(t->extensions), pExtension, FALSE)) { |
| 1994 pLastGoodPosition = pExtValueSubtagEnd; |
| 1995 } else { |
| 1996 uprv_free(pExtension); |
| 1997 } |
| 1998 } |
| 1999 } |
| 2000 |
| 2001 if (parsedLen != NULL) { |
| 2002 *parsedLen = (int32_t)(pLastGoodPosition - t->buf); |
| 2003 } |
| 2004 |
| 2005 return t; |
| 2006 |
| 2007 error: |
| 2008 uprv_free(t); |
| 2009 return NULL; |
| 2010 } |
| 2011 |
| 2012 static void |
| 2013 ultag_close(ULanguageTag* langtag) { |
| 2014 |
| 2015 if (langtag == NULL) { |
| 2016 return; |
| 2017 } |
| 2018 |
| 2019 uprv_free(langtag->buf); |
| 2020 |
| 2021 if (langtag->variants) { |
| 2022 VariantListEntry *curVar = langtag->variants; |
| 2023 while (curVar) { |
| 2024 VariantListEntry *nextVar = curVar->next; |
| 2025 uprv_free(curVar); |
| 2026 curVar = nextVar; |
| 2027 } |
| 2028 } |
| 2029 |
| 2030 if (langtag->extensions) { |
| 2031 ExtensionListEntry *curExt = langtag->extensions; |
| 2032 while (curExt) { |
| 2033 ExtensionListEntry *nextExt = curExt->next; |
| 2034 uprv_free(curExt); |
| 2035 curExt = nextExt; |
| 2036 } |
| 2037 } |
| 2038 |
| 2039 uprv_free(langtag); |
| 2040 } |
| 2041 |
| 2042 static const char* |
| 2043 ultag_getLanguage(const ULanguageTag* langtag) { |
| 2044 return langtag->language; |
| 2045 } |
| 2046 |
| 2047 #if 0 |
| 2048 static const char* |
| 2049 ultag_getJDKLanguage(const ULanguageTag* langtag) { |
| 2050 int32_t i; |
| 2051 for (i = 0; DEPRECATEDLANGS[i] != NULL; i += 2) { |
| 2052 if (uprv_compareInvCharsAsAscii(DEPRECATEDLANGS[i], langtag->language) =
= 0) { |
| 2053 return DEPRECATEDLANGS[i + 1]; |
| 2054 } |
| 2055 } |
| 2056 return langtag->language; |
| 2057 } |
| 2058 #endif |
| 2059 |
| 2060 static const char* |
| 2061 ultag_getExtlang(const ULanguageTag* langtag, int32_t idx) { |
| 2062 if (idx >= 0 && idx < MAXEXTLANG) { |
| 2063 return langtag->extlang[idx]; |
| 2064 } |
| 2065 return NULL; |
| 2066 } |
| 2067 |
| 2068 static int32_t |
| 2069 ultag_getExtlangSize(const ULanguageTag* langtag) { |
| 2070 int32_t size = 0; |
| 2071 int32_t i; |
| 2072 for (i = 0; i < MAXEXTLANG; i++) { |
| 2073 if (langtag->extlang[i]) { |
| 2074 size++; |
| 2075 } |
| 2076 } |
| 2077 return size; |
| 2078 } |
| 2079 |
| 2080 static const char* |
| 2081 ultag_getScript(const ULanguageTag* langtag) { |
| 2082 return langtag->script; |
| 2083 } |
| 2084 |
| 2085 static const char* |
| 2086 ultag_getRegion(const ULanguageTag* langtag) { |
| 2087 return langtag->region; |
| 2088 } |
| 2089 |
| 2090 static const char* |
| 2091 ultag_getVariant(const ULanguageTag* langtag, int32_t idx) { |
| 2092 const char *var = NULL; |
| 2093 VariantListEntry *cur = langtag->variants; |
| 2094 int32_t i = 0; |
| 2095 while (cur) { |
| 2096 if (i == idx) { |
| 2097 var = cur->variant; |
| 2098 break; |
| 2099 } |
| 2100 cur = cur->next; |
| 2101 i++; |
| 2102 } |
| 2103 return var; |
| 2104 } |
| 2105 |
| 2106 static int32_t |
| 2107 ultag_getVariantsSize(const ULanguageTag* langtag) { |
| 2108 int32_t size = 0; |
| 2109 VariantListEntry *cur = langtag->variants; |
| 2110 while (TRUE) { |
| 2111 if (cur == NULL) { |
| 2112 break; |
| 2113 } |
| 2114 size++; |
| 2115 cur = cur->next; |
| 2116 } |
| 2117 return size; |
| 2118 } |
| 2119 |
| 2120 static const char* |
| 2121 ultag_getExtensionKey(const ULanguageTag* langtag, int32_t idx) { |
| 2122 const char *key = NULL; |
| 2123 ExtensionListEntry *cur = langtag->extensions; |
| 2124 int32_t i = 0; |
| 2125 while (cur) { |
| 2126 if (i == idx) { |
| 2127 key = cur->key; |
| 2128 break; |
| 2129 } |
| 2130 cur = cur->next; |
| 2131 i++; |
| 2132 } |
| 2133 return key; |
| 2134 } |
| 2135 |
| 2136 static const char* |
| 2137 ultag_getExtensionValue(const ULanguageTag* langtag, int32_t idx) { |
| 2138 const char *val = NULL; |
| 2139 ExtensionListEntry *cur = langtag->extensions; |
| 2140 int32_t i = 0; |
| 2141 while (cur) { |
| 2142 if (i == idx) { |
| 2143 val = cur->value; |
| 2144 break; |
| 2145 } |
| 2146 cur = cur->next; |
| 2147 i++; |
| 2148 } |
| 2149 return val; |
| 2150 } |
| 2151 |
| 2152 static int32_t |
| 2153 ultag_getExtensionsSize(const ULanguageTag* langtag) { |
| 2154 int32_t size = 0; |
| 2155 ExtensionListEntry *cur = langtag->extensions; |
| 2156 while (TRUE) { |
| 2157 if (cur == NULL) { |
| 2158 break; |
| 2159 } |
| 2160 size++; |
| 2161 cur = cur->next; |
| 2162 } |
| 2163 return size; |
| 2164 } |
| 2165 |
| 2166 static const char* |
| 2167 ultag_getPrivateUse(const ULanguageTag* langtag) { |
| 2168 return langtag->privateuse; |
| 2169 } |
| 2170 |
| 2171 #if 0 |
| 2172 static const char* |
| 2173 ultag_getGrandfathered(const ULanguageTag* langtag) { |
| 2174 return langtag->grandfathered; |
| 2175 } |
| 2176 #endif |
| 2177 |
| 2178 |
| 2179 /* |
| 2180 * ------------------------------------------------- |
| 2181 * |
| 2182 * Locale/BCP47 conversion APIs, exposed as uloc_* |
| 2183 * |
| 2184 * ------------------------------------------------- |
| 2185 */ |
| 2186 U_DRAFT int32_t U_EXPORT2 |
| 2187 uloc_toLanguageTag(const char* localeID, |
| 2188 char* langtag, |
| 2189 int32_t langtagCapacity, |
| 2190 UBool strict, |
| 2191 UErrorCode* status) { |
| 2192 /* char canonical[ULOC_FULLNAME_CAPACITY]; */ /* See #6822 */ |
| 2193 char canonical[256]; |
| 2194 int32_t reslen = 0; |
| 2195 UErrorCode tmpStatus = U_ZERO_ERROR; |
| 2196 UBool hadPosix = FALSE; |
| 2197 |
| 2198 /* Note: uloc_canonicalize returns "en_US_POSIX" for input locale ID "". Se
e #6835 */ |
| 2199 canonical[0] = 0; |
| 2200 if (uprv_strlen(localeID) > 0) { |
| 2201 uloc_canonicalize(localeID, canonical, sizeof(canonical), &tmpStatus); |
| 2202 if (tmpStatus != U_ZERO_ERROR) { |
| 2203 *status = U_ILLEGAL_ARGUMENT_ERROR; |
| 2204 return 0; |
| 2205 } |
| 2206 } |
| 2207 |
| 2208 reslen += _appendLanguageToLanguageTag(canonical, langtag, langtagCapacity,
strict, status); |
| 2209 reslen += _appendScriptToLanguageTag(canonical, langtag + reslen, langtagCap
acity - reslen, strict, status); |
| 2210 reslen += _appendRegionToLanguageTag(canonical, langtag + reslen, langtagCap
acity - reslen, strict, status); |
| 2211 reslen += _appendVariantsToLanguageTag(canonical, langtag + reslen, langtagC
apacity - reslen, strict, &hadPosix, status); |
| 2212 reslen += _appendKeywordsToLanguageTag(canonical, langtag + reslen, langtagC
apacity - reslen, strict, hadPosix, status); |
| 2213 |
| 2214 return reslen; |
| 2215 } |
| 2216 |
| 2217 |
| 2218 U_DRAFT int32_t U_EXPORT2 |
| 2219 uloc_forLanguageTag(const char* langtag, |
| 2220 char* localeID, |
| 2221 int32_t localeIDCapacity, |
| 2222 int32_t* parsedLength, |
| 2223 UErrorCode* status) { |
| 2224 ULanguageTag *lt; |
| 2225 int32_t reslen = 0; |
| 2226 const char *subtag, *p; |
| 2227 int32_t len; |
| 2228 int32_t i, n; |
| 2229 UBool noRegion = TRUE; |
| 2230 |
| 2231 lt = ultag_parse(langtag, -1, parsedLength, status); |
| 2232 if (U_FAILURE(*status)) { |
| 2233 return 0; |
| 2234 } |
| 2235 |
| 2236 /* language */ |
| 2237 subtag = ultag_getExtlangSize(lt) > 0 ? ultag_getExtlang(lt, 0) : ultag_getL
anguage(lt); |
| 2238 if (uprv_compareInvCharsAsAscii(subtag, LANG_UND) != 0) { |
| 2239 len = (int32_t)uprv_strlen(subtag); |
| 2240 if (len > 0) { |
| 2241 if (reslen < localeIDCapacity) { |
| 2242 uprv_memcpy(localeID, subtag, uprv_min(len, localeIDCapacity - r
eslen)); |
| 2243 } |
| 2244 reslen += len; |
| 2245 } |
| 2246 } |
| 2247 |
| 2248 /* script */ |
| 2249 subtag = ultag_getScript(lt); |
| 2250 len = (int32_t)uprv_strlen(subtag); |
| 2251 if (len > 0) { |
| 2252 if (reslen < localeIDCapacity) { |
| 2253 *(localeID + reslen) = LOCALE_SEP; |
| 2254 } |
| 2255 reslen++; |
| 2256 |
| 2257 /* write out the script in title case */ |
| 2258 p = subtag; |
| 2259 while (*p) { |
| 2260 if (reslen < localeIDCapacity) { |
| 2261 if (p == subtag) { |
| 2262 *(localeID + reslen) = uprv_toupper(*p); |
| 2263 } else { |
| 2264 *(localeID + reslen) = *p; |
| 2265 } |
| 2266 } |
| 2267 reslen++; |
| 2268 p++; |
| 2269 } |
| 2270 } |
| 2271 |
| 2272 /* region */ |
| 2273 subtag = ultag_getRegion(lt); |
| 2274 len = (int32_t)uprv_strlen(subtag); |
| 2275 if (len > 0) { |
| 2276 if (reslen < localeIDCapacity) { |
| 2277 *(localeID + reslen) = LOCALE_SEP; |
| 2278 } |
| 2279 reslen++; |
| 2280 /* write out the retion in upper case */ |
| 2281 p = subtag; |
| 2282 while (*p) { |
| 2283 if (reslen < localeIDCapacity) { |
| 2284 *(localeID + reslen) = uprv_toupper(*p); |
| 2285 } |
| 2286 reslen++; |
| 2287 p++; |
| 2288 } |
| 2289 noRegion = FALSE; |
| 2290 } |
| 2291 |
| 2292 /* variants */ |
| 2293 n = ultag_getVariantsSize(lt); |
| 2294 if (n > 0) { |
| 2295 if (noRegion) { |
| 2296 if (reslen < localeIDCapacity) { |
| 2297 *(localeID + reslen) = LOCALE_SEP; |
| 2298 } |
| 2299 reslen++; |
| 2300 } |
| 2301 |
| 2302 for (i = 0; i < n; i++) { |
| 2303 subtag = ultag_getVariant(lt, i); |
| 2304 if (reslen < localeIDCapacity) { |
| 2305 *(localeID + reslen) = LOCALE_SEP; |
| 2306 } |
| 2307 reslen++; |
| 2308 /* write out the variant in upper case */ |
| 2309 p = subtag; |
| 2310 while (*p) { |
| 2311 if (reslen < localeIDCapacity) { |
| 2312 *(localeID + reslen) = uprv_toupper(*p); |
| 2313 } |
| 2314 reslen++; |
| 2315 p++; |
| 2316 } |
| 2317 } |
| 2318 } |
| 2319 |
| 2320 /* keywords */ |
| 2321 n = ultag_getExtensionsSize(lt); |
| 2322 subtag = ultag_getPrivateUse(lt); |
| 2323 if (n > 0 || uprv_strlen(subtag) > 0) { |
| 2324 if (reslen == 0) { |
| 2325 /* need a language */ |
| 2326 if (reslen < localeIDCapacity) { |
| 2327 uprv_memcpy(localeID + reslen, LANG_UND, uprv_min(LANG_UND_LEN,
localeIDCapacity - reslen)); |
| 2328 } |
| 2329 reslen += LANG_UND_LEN; |
| 2330 } |
| 2331 len = _appendKeywords(lt, localeID + reslen, localeIDCapacity - reslen,
status); |
| 2332 reslen += len; |
| 2333 } |
| 2334 |
| 2335 ultag_close(lt); |
| 2336 return u_terminateChars(localeID, localeIDCapacity, reslen, status); |
| 2337 } |
| 2338 |
| 2339 |
OLD | NEW |