OLD | NEW |
(Empty) | |
| 1 /* |
| 2 ******************************************************************************* |
| 3 * |
| 4 * Copyright (C) 1997-2010, International Business Machines |
| 5 * Corporation and others. All Rights Reserved. |
| 6 * |
| 7 ******************************************************************************* |
| 8 * file name: loclikely.cpp |
| 9 * encoding: US-ASCII |
| 10 * tab size: 8 (not used) |
| 11 * indentation:4 |
| 12 * |
| 13 * created on: 2010feb25 |
| 14 * created by: Markus W. Scherer |
| 15 * |
| 16 * Code for likely and minimized locale subtags, separated out from other .cpp
files |
| 17 * that then do not depend on resource bundle code and likely-subtags data. |
| 18 */ |
| 19 |
| 20 #include "unicode/utypes.h" |
| 21 #include "unicode/putil.h" |
| 22 #include "unicode/uloc.h" |
| 23 #include "unicode/ures.h" |
| 24 #include "cmemory.h" |
| 25 #include "cstring.h" |
| 26 #include "ulocimp.h" |
| 27 #include "ustr_imp.h" |
| 28 |
| 29 /** |
| 30 * This function looks for the localeID in the likelySubtags resource. |
| 31 * |
| 32 * @param localeID The tag to find. |
| 33 * @param buffer A buffer to hold the matching entry |
| 34 * @param bufferLength The length of the output buffer |
| 35 * @return A pointer to "buffer" if found, or a null pointer if not. |
| 36 */ |
| 37 static const char* U_CALLCONV |
| 38 findLikelySubtags(const char* localeID, |
| 39 char* buffer, |
| 40 int32_t bufferLength, |
| 41 UErrorCode* err) { |
| 42 const char* result = NULL; |
| 43 |
| 44 if (!U_FAILURE(*err)) { |
| 45 int32_t resLen = 0; |
| 46 const UChar* s = NULL; |
| 47 UErrorCode tmpErr = U_ZERO_ERROR; |
| 48 UResourceBundle* subtags = ures_openDirect(NULL, "likelySubtags", &tmpEr
r); |
| 49 if (U_SUCCESS(tmpErr)) { |
| 50 s = ures_getStringByKey(subtags, localeID, &resLen, &tmpErr); |
| 51 |
| 52 if (U_FAILURE(tmpErr)) { |
| 53 /* |
| 54 * If a resource is missing, it's not really an error, it's |
| 55 * just that we don't have any data for that particular locale I
D. |
| 56 */ |
| 57 if (tmpErr != U_MISSING_RESOURCE_ERROR) { |
| 58 *err = tmpErr; |
| 59 } |
| 60 } |
| 61 else if (resLen >= bufferLength) { |
| 62 /* The buffer should never overflow. */ |
| 63 *err = U_INTERNAL_PROGRAM_ERROR; |
| 64 } |
| 65 else { |
| 66 u_UCharsToChars(s, buffer, resLen + 1); |
| 67 result = buffer; |
| 68 } |
| 69 |
| 70 ures_close(subtags); |
| 71 } else { |
| 72 *err = tmpErr; |
| 73 } |
| 74 } |
| 75 |
| 76 return result; |
| 77 } |
| 78 |
| 79 /** |
| 80 * Append a tag to a buffer, adding the separator if necessary. The buffer |
| 81 * must be large enough to contain the resulting tag plus any separator |
| 82 * necessary. The tag must not be a zero-length string. |
| 83 * |
| 84 * @param tag The tag to add. |
| 85 * @param tagLength The length of the tag. |
| 86 * @param buffer The output buffer. |
| 87 * @param bufferLength The length of the output buffer. This is an input/ouput
parameter. |
| 88 **/ |
| 89 static void U_CALLCONV |
| 90 appendTag( |
| 91 const char* tag, |
| 92 int32_t tagLength, |
| 93 char* buffer, |
| 94 int32_t* bufferLength) { |
| 95 |
| 96 if (*bufferLength > 0) { |
| 97 buffer[*bufferLength] = '_'; |
| 98 ++(*bufferLength); |
| 99 } |
| 100 |
| 101 uprv_memmove( |
| 102 &buffer[*bufferLength], |
| 103 tag, |
| 104 tagLength); |
| 105 |
| 106 *bufferLength += tagLength; |
| 107 } |
| 108 |
| 109 /** |
| 110 * These are the canonical strings for unknown languages, scripts and regions. |
| 111 **/ |
| 112 static const char* const unknownLanguage = "und"; |
| 113 static const char* const unknownScript = "Zzzz"; |
| 114 static const char* const unknownRegion = "ZZ"; |
| 115 |
| 116 /** |
| 117 * Create a tag string from the supplied parameters. The lang, script and regio
n |
| 118 * parameters may be NULL pointers. If they are, their corresponding length para
meters |
| 119 * must be less than or equal to 0. |
| 120 * |
| 121 * If any of the language, script or region parameters are empty, and the altern
ateTags |
| 122 * parameter is not NULL, it will be parsed for potential language, script and r
egion tags |
| 123 * to be used when constructing the new tag. If the alternateTags parameter is
NULL, or |
| 124 * it contains no language tag, the default tag for the unknown language is used
. |
| 125 * |
| 126 * If the length of the new string exceeds the capacity of the output buffer, |
| 127 * the function copies as many bytes to the output buffer as it can, and returns |
| 128 * the error U_BUFFER_OVERFLOW_ERROR. |
| 129 * |
| 130 * If an illegal argument is provided, the function returns the error |
| 131 * U_ILLEGAL_ARGUMENT_ERROR. |
| 132 * |
| 133 * Note that this function can return the warning U_STRING_NOT_TERMINATED_WARNIN
G if |
| 134 * the tag string fits in the output buffer, but the null terminator doesn't. |
| 135 * |
| 136 * @param lang The language tag to use. |
| 137 * @param langLength The length of the language tag. |
| 138 * @param script The script tag to use. |
| 139 * @param scriptLength The length of the script tag. |
| 140 * @param region The region tag to use. |
| 141 * @param regionLength The length of the region tag. |
| 142 * @param trailing Any trailing data to append to the new tag. |
| 143 * @param trailingLength The length of the trailing data. |
| 144 * @param alternateTags A string containing any alternate tags. |
| 145 * @param tag The output buffer. |
| 146 * @param tagCapacity The capacity of the output buffer. |
| 147 * @param err A pointer to a UErrorCode for error reporting. |
| 148 * @return The length of the tag string, which may be greater than tagCapacity,
or -1 on error. |
| 149 **/ |
| 150 static int32_t U_CALLCONV |
| 151 createTagStringWithAlternates( |
| 152 const char* lang, |
| 153 int32_t langLength, |
| 154 const char* script, |
| 155 int32_t scriptLength, |
| 156 const char* region, |
| 157 int32_t regionLength, |
| 158 const char* trailing, |
| 159 int32_t trailingLength, |
| 160 const char* alternateTags, |
| 161 char* tag, |
| 162 int32_t tagCapacity, |
| 163 UErrorCode* err) { |
| 164 |
| 165 if (U_FAILURE(*err)) { |
| 166 goto error; |
| 167 } |
| 168 else if (tag == NULL || |
| 169 tagCapacity <= 0 || |
| 170 langLength >= ULOC_LANG_CAPACITY || |
| 171 scriptLength >= ULOC_SCRIPT_CAPACITY || |
| 172 regionLength >= ULOC_COUNTRY_CAPACITY) { |
| 173 goto error; |
| 174 } |
| 175 else { |
| 176 /** |
| 177 * ULOC_FULLNAME_CAPACITY will provide enough capacity |
| 178 * that we can build a string that contains the language, |
| 179 * script and region code without worrying about overrunning |
| 180 * the user-supplied buffer. |
| 181 **/ |
| 182 char tagBuffer[ULOC_FULLNAME_CAPACITY]; |
| 183 int32_t tagLength = 0; |
| 184 int32_t capacityRemaining = tagCapacity; |
| 185 UBool regionAppended = FALSE; |
| 186 |
| 187 if (langLength > 0) { |
| 188 appendTag( |
| 189 lang, |
| 190 langLength, |
| 191 tagBuffer, |
| 192 &tagLength); |
| 193 } |
| 194 else if (alternateTags == NULL) { |
| 195 /* |
| 196 * Append the value for an unknown language, if |
| 197 * we found no language. |
| 198 */ |
| 199 appendTag( |
| 200 unknownLanguage, |
| 201 (int32_t)uprv_strlen(unknownLanguage), |
| 202 tagBuffer, |
| 203 &tagLength); |
| 204 } |
| 205 else { |
| 206 /* |
| 207 * Parse the alternateTags string for the language. |
| 208 */ |
| 209 char alternateLang[ULOC_LANG_CAPACITY]; |
| 210 int32_t alternateLangLength = sizeof(alternateLang); |
| 211 |
| 212 alternateLangLength = |
| 213 uloc_getLanguage( |
| 214 alternateTags, |
| 215 alternateLang, |
| 216 alternateLangLength, |
| 217 err); |
| 218 if(U_FAILURE(*err) || |
| 219 alternateLangLength >= ULOC_LANG_CAPACITY) { |
| 220 goto error; |
| 221 } |
| 222 else if (alternateLangLength == 0) { |
| 223 /* |
| 224 * Append the value for an unknown language, if |
| 225 * we found no language. |
| 226 */ |
| 227 appendTag( |
| 228 unknownLanguage, |
| 229 (int32_t)uprv_strlen(unknownLanguage), |
| 230 tagBuffer, |
| 231 &tagLength); |
| 232 } |
| 233 else { |
| 234 appendTag( |
| 235 alternateLang, |
| 236 alternateLangLength, |
| 237 tagBuffer, |
| 238 &tagLength); |
| 239 } |
| 240 } |
| 241 |
| 242 if (scriptLength > 0) { |
| 243 appendTag( |
| 244 script, |
| 245 scriptLength, |
| 246 tagBuffer, |
| 247 &tagLength); |
| 248 } |
| 249 else if (alternateTags != NULL) { |
| 250 /* |
| 251 * Parse the alternateTags string for the script. |
| 252 */ |
| 253 char alternateScript[ULOC_SCRIPT_CAPACITY]; |
| 254 |
| 255 const int32_t alternateScriptLength = |
| 256 uloc_getScript( |
| 257 alternateTags, |
| 258 alternateScript, |
| 259 sizeof(alternateScript), |
| 260 err); |
| 261 |
| 262 if (U_FAILURE(*err) || |
| 263 alternateScriptLength >= ULOC_SCRIPT_CAPACITY) { |
| 264 goto error; |
| 265 } |
| 266 else if (alternateScriptLength > 0) { |
| 267 appendTag( |
| 268 alternateScript, |
| 269 alternateScriptLength, |
| 270 tagBuffer, |
| 271 &tagLength); |
| 272 } |
| 273 } |
| 274 |
| 275 if (regionLength > 0) { |
| 276 appendTag( |
| 277 region, |
| 278 regionLength, |
| 279 tagBuffer, |
| 280 &tagLength); |
| 281 |
| 282 regionAppended = TRUE; |
| 283 } |
| 284 else if (alternateTags != NULL) { |
| 285 /* |
| 286 * Parse the alternateTags string for the region. |
| 287 */ |
| 288 char alternateRegion[ULOC_COUNTRY_CAPACITY]; |
| 289 |
| 290 const int32_t alternateRegionLength = |
| 291 uloc_getCountry( |
| 292 alternateTags, |
| 293 alternateRegion, |
| 294 sizeof(alternateRegion), |
| 295 err); |
| 296 if (U_FAILURE(*err) || |
| 297 alternateRegionLength >= ULOC_COUNTRY_CAPACITY) { |
| 298 goto error; |
| 299 } |
| 300 else if (alternateRegionLength > 0) { |
| 301 appendTag( |
| 302 alternateRegion, |
| 303 alternateRegionLength, |
| 304 tagBuffer, |
| 305 &tagLength); |
| 306 |
| 307 regionAppended = TRUE; |
| 308 } |
| 309 } |
| 310 |
| 311 { |
| 312 const int32_t toCopy = |
| 313 tagLength >= tagCapacity ? tagCapacity : tagLength; |
| 314 |
| 315 /** |
| 316 * Copy the partial tag from our internal buffer to the supplied |
| 317 * target. |
| 318 **/ |
| 319 uprv_memcpy( |
| 320 tag, |
| 321 tagBuffer, |
| 322 toCopy); |
| 323 |
| 324 capacityRemaining -= toCopy; |
| 325 } |
| 326 |
| 327 if (trailingLength > 0) { |
| 328 if (capacityRemaining > 0 && !regionAppended) { |
| 329 tag[tagLength++] = '_'; |
| 330 --capacityRemaining; |
| 331 } |
| 332 |
| 333 if (capacityRemaining > 0) { |
| 334 /* |
| 335 * Copy the trailing data into the supplied buffer. Use uprv_me
mmove, since we |
| 336 * don't know if the user-supplied buffers overlap. |
| 337 */ |
| 338 const int32_t toCopy = |
| 339 trailingLength >= capacityRemaining ? capacityRemaining : tr
ailingLength; |
| 340 |
| 341 uprv_memmove( |
| 342 &tag[tagLength], |
| 343 trailing, |
| 344 toCopy); |
| 345 } |
| 346 } |
| 347 |
| 348 tagLength += trailingLength; |
| 349 |
| 350 return u_terminateChars( |
| 351 tag, |
| 352 tagCapacity, |
| 353 tagLength, |
| 354 err); |
| 355 } |
| 356 |
| 357 error: |
| 358 |
| 359 /** |
| 360 * An overflow indicates the locale ID passed in |
| 361 * is ill-formed. If we got here, and there was |
| 362 * no previous error, it's an implicit overflow. |
| 363 **/ |
| 364 if (*err == U_BUFFER_OVERFLOW_ERROR || |
| 365 U_SUCCESS(*err)) { |
| 366 *err = U_ILLEGAL_ARGUMENT_ERROR; |
| 367 } |
| 368 |
| 369 return -1; |
| 370 } |
| 371 |
| 372 /** |
| 373 * Create a tag string from the supplied parameters. The lang, script and regio
n |
| 374 * parameters may be NULL pointers. If they are, their corresponding length para
meters |
| 375 * must be less than or equal to 0. If the lang parameter is an empty string, t
he |
| 376 * default value for an unknown language is written to the output buffer. |
| 377 * |
| 378 * If the length of the new string exceeds the capacity of the output buffer, |
| 379 * the function copies as many bytes to the output buffer as it can, and returns |
| 380 * the error U_BUFFER_OVERFLOW_ERROR. |
| 381 * |
| 382 * If an illegal argument is provided, the function returns the error |
| 383 * U_ILLEGAL_ARGUMENT_ERROR. |
| 384 * |
| 385 * @param lang The language tag to use. |
| 386 * @param langLength The length of the language tag. |
| 387 * @param script The script tag to use. |
| 388 * @param scriptLength The length of the script tag. |
| 389 * @param region The region tag to use. |
| 390 * @param regionLength The length of the region tag. |
| 391 * @param trailing Any trailing data to append to the new tag. |
| 392 * @param trailingLength The length of the trailing data. |
| 393 * @param tag The output buffer. |
| 394 * @param tagCapacity The capacity of the output buffer. |
| 395 * @param err A pointer to a UErrorCode for error reporting. |
| 396 * @return The length of the tag string, which may be greater than tagCapacity. |
| 397 **/ |
| 398 static int32_t U_CALLCONV |
| 399 createTagString( |
| 400 const char* lang, |
| 401 int32_t langLength, |
| 402 const char* script, |
| 403 int32_t scriptLength, |
| 404 const char* region, |
| 405 int32_t regionLength, |
| 406 const char* trailing, |
| 407 int32_t trailingLength, |
| 408 char* tag, |
| 409 int32_t tagCapacity, |
| 410 UErrorCode* err) |
| 411 { |
| 412 return createTagStringWithAlternates( |
| 413 lang, |
| 414 langLength, |
| 415 script, |
| 416 scriptLength, |
| 417 region, |
| 418 regionLength, |
| 419 trailing, |
| 420 trailingLength, |
| 421 NULL, |
| 422 tag, |
| 423 tagCapacity, |
| 424 err); |
| 425 } |
| 426 |
| 427 /** |
| 428 * Parse the language, script, and region subtags from a tag string, and copy th
e |
| 429 * results into the corresponding output parameters. The buffers are null-termin
ated, |
| 430 * unless overflow occurs. |
| 431 * |
| 432 * The langLength, scriptLength, and regionLength parameters are input/output |
| 433 * parameters, and must contain the capacity of their corresponding buffers on |
| 434 * input. On output, they will contain the actual length of the buffers, not |
| 435 * including the null terminator. |
| 436 * |
| 437 * If the length of any of the output subtags exceeds the capacity of the corres
ponding |
| 438 * buffer, the function copies as many bytes to the output buffer as it can, and
returns |
| 439 * the error U_BUFFER_OVERFLOW_ERROR. It will not parse any more subtags once o
verflow |
| 440 * occurs. |
| 441 * |
| 442 * If an illegal argument is provided, the function returns the error |
| 443 * U_ILLEGAL_ARGUMENT_ERROR. |
| 444 * |
| 445 * @param localeID The locale ID to parse. |
| 446 * @param lang The language tag buffer. |
| 447 * @param langLength The length of the language tag. |
| 448 * @param script The script tag buffer. |
| 449 * @param scriptLength The length of the script tag. |
| 450 * @param region The region tag buffer. |
| 451 * @param regionLength The length of the region tag. |
| 452 * @param err A pointer to a UErrorCode for error reporting. |
| 453 * @return The number of chars of the localeID parameter consumed. |
| 454 **/ |
| 455 static int32_t U_CALLCONV |
| 456 parseTagString( |
| 457 const char* localeID, |
| 458 char* lang, |
| 459 int32_t* langLength, |
| 460 char* script, |
| 461 int32_t* scriptLength, |
| 462 char* region, |
| 463 int32_t* regionLength, |
| 464 UErrorCode* err) |
| 465 { |
| 466 const char* position = localeID; |
| 467 int32_t subtagLength = 0; |
| 468 |
| 469 if(U_FAILURE(*err) || |
| 470 localeID == NULL || |
| 471 lang == NULL || |
| 472 langLength == NULL || |
| 473 script == NULL || |
| 474 scriptLength == NULL || |
| 475 region == NULL || |
| 476 regionLength == NULL) { |
| 477 goto error; |
| 478 } |
| 479 |
| 480 subtagLength = ulocimp_getLanguage(position, lang, *langLength, &position); |
| 481 u_terminateChars(lang, *langLength, subtagLength, err); |
| 482 |
| 483 /* |
| 484 * Note that we explicit consider U_STRING_NOT_TERMINATED_WARNING |
| 485 * to be an error, because it indicates the user-supplied tag is |
| 486 * not well-formed. |
| 487 */ |
| 488 if(U_FAILURE(*err)) { |
| 489 goto error; |
| 490 } |
| 491 |
| 492 *langLength = subtagLength; |
| 493 |
| 494 /* |
| 495 * If no language was present, use the value of unknownLanguage |
| 496 * instead. Otherwise, move past any separator. |
| 497 */ |
| 498 if (*langLength == 0) { |
| 499 uprv_strcpy( |
| 500 lang, |
| 501 unknownLanguage); |
| 502 *langLength = (int32_t)uprv_strlen(lang); |
| 503 } |
| 504 else if (_isIDSeparator(*position)) { |
| 505 ++position; |
| 506 } |
| 507 |
| 508 subtagLength = ulocimp_getScript(position, script, *scriptLength, &position)
; |
| 509 u_terminateChars(script, *scriptLength, subtagLength, err); |
| 510 |
| 511 if(U_FAILURE(*err)) { |
| 512 goto error; |
| 513 } |
| 514 |
| 515 *scriptLength = subtagLength; |
| 516 |
| 517 if (*scriptLength > 0) { |
| 518 if (uprv_strnicmp(script, unknownScript, *scriptLength) == 0) { |
| 519 /** |
| 520 * If the script part is the "unknown" script, then don't return it. |
| 521 **/ |
| 522 *scriptLength = 0; |
| 523 } |
| 524 |
| 525 /* |
| 526 * Move past any separator. |
| 527 */ |
| 528 if (_isIDSeparator(*position)) { |
| 529 ++position; |
| 530 } |
| 531 } |
| 532 |
| 533 subtagLength = ulocimp_getCountry(position, region, *regionLength, &position
); |
| 534 u_terminateChars(region, *regionLength, subtagLength, err); |
| 535 |
| 536 if(U_FAILURE(*err)) { |
| 537 goto error; |
| 538 } |
| 539 |
| 540 *regionLength = subtagLength; |
| 541 |
| 542 if (*regionLength > 0) { |
| 543 if (uprv_strnicmp(region, unknownRegion, *regionLength) == 0) { |
| 544 /** |
| 545 * If the region part is the "unknown" region, then don't return it. |
| 546 **/ |
| 547 *regionLength = 0; |
| 548 } |
| 549 } |
| 550 |
| 551 exit: |
| 552 |
| 553 return (int32_t)(position - localeID); |
| 554 |
| 555 error: |
| 556 |
| 557 /** |
| 558 * If we get here, we have no explicit error, it's the result of an |
| 559 * illegal argument. |
| 560 **/ |
| 561 if (!U_FAILURE(*err)) { |
| 562 *err = U_ILLEGAL_ARGUMENT_ERROR; |
| 563 } |
| 564 |
| 565 goto exit; |
| 566 } |
| 567 |
| 568 static int32_t U_CALLCONV |
| 569 createLikelySubtagsString( |
| 570 const char* lang, |
| 571 int32_t langLength, |
| 572 const char* script, |
| 573 int32_t scriptLength, |
| 574 const char* region, |
| 575 int32_t regionLength, |
| 576 const char* variants, |
| 577 int32_t variantsLength, |
| 578 char* tag, |
| 579 int32_t tagCapacity, |
| 580 UErrorCode* err) |
| 581 { |
| 582 /** |
| 583 * ULOC_FULLNAME_CAPACITY will provide enough capacity |
| 584 * that we can build a string that contains the language, |
| 585 * script and region code without worrying about overrunning |
| 586 * the user-supplied buffer. |
| 587 **/ |
| 588 char tagBuffer[ULOC_FULLNAME_CAPACITY]; |
| 589 char likelySubtagsBuffer[ULOC_FULLNAME_CAPACITY]; |
| 590 int32_t tagBufferLength = 0; |
| 591 |
| 592 if(U_FAILURE(*err)) { |
| 593 goto error; |
| 594 } |
| 595 |
| 596 /** |
| 597 * Try the language with the script and region first. |
| 598 **/ |
| 599 if (scriptLength > 0 && regionLength > 0) { |
| 600 |
| 601 const char* likelySubtags = NULL; |
| 602 |
| 603 tagBufferLength = createTagString( |
| 604 lang, |
| 605 langLength, |
| 606 script, |
| 607 scriptLength, |
| 608 region, |
| 609 regionLength, |
| 610 NULL, |
| 611 0, |
| 612 tagBuffer, |
| 613 sizeof(tagBuffer), |
| 614 err); |
| 615 if(U_FAILURE(*err)) { |
| 616 goto error; |
| 617 } |
| 618 |
| 619 likelySubtags = |
| 620 findLikelySubtags( |
| 621 tagBuffer, |
| 622 likelySubtagsBuffer, |
| 623 sizeof(likelySubtagsBuffer), |
| 624 err); |
| 625 if(U_FAILURE(*err)) { |
| 626 goto error; |
| 627 } |
| 628 |
| 629 if (likelySubtags != NULL) { |
| 630 /* Always use the language tag from the |
| 631 maximal string, since it may be more |
| 632 specific than the one provided. */ |
| 633 return createTagStringWithAlternates( |
| 634 NULL, |
| 635 0, |
| 636 NULL, |
| 637 0, |
| 638 NULL, |
| 639 0, |
| 640 variants, |
| 641 variantsLength, |
| 642 likelySubtags, |
| 643 tag, |
| 644 tagCapacity, |
| 645 err); |
| 646 } |
| 647 } |
| 648 |
| 649 /** |
| 650 * Try the language with just the script. |
| 651 **/ |
| 652 if (scriptLength > 0) { |
| 653 |
| 654 const char* likelySubtags = NULL; |
| 655 |
| 656 tagBufferLength = createTagString( |
| 657 lang, |
| 658 langLength, |
| 659 script, |
| 660 scriptLength, |
| 661 NULL, |
| 662 0, |
| 663 NULL, |
| 664 0, |
| 665 tagBuffer, |
| 666 sizeof(tagBuffer), |
| 667 err); |
| 668 if(U_FAILURE(*err)) { |
| 669 goto error; |
| 670 } |
| 671 |
| 672 likelySubtags = |
| 673 findLikelySubtags( |
| 674 tagBuffer, |
| 675 likelySubtagsBuffer, |
| 676 sizeof(likelySubtagsBuffer), |
| 677 err); |
| 678 if(U_FAILURE(*err)) { |
| 679 goto error; |
| 680 } |
| 681 |
| 682 if (likelySubtags != NULL) { |
| 683 /* Always use the language tag from the |
| 684 maximal string, since it may be more |
| 685 specific than the one provided. */ |
| 686 return createTagStringWithAlternates( |
| 687 NULL, |
| 688 0, |
| 689 NULL, |
| 690 0, |
| 691 region, |
| 692 regionLength, |
| 693 variants, |
| 694 variantsLength, |
| 695 likelySubtags, |
| 696 tag, |
| 697 tagCapacity, |
| 698 err); |
| 699 } |
| 700 } |
| 701 |
| 702 /** |
| 703 * Try the language with just the region. |
| 704 **/ |
| 705 if (regionLength > 0) { |
| 706 |
| 707 const char* likelySubtags = NULL; |
| 708 |
| 709 createTagString( |
| 710 lang, |
| 711 langLength, |
| 712 NULL, |
| 713 0, |
| 714 region, |
| 715 regionLength, |
| 716 NULL, |
| 717 0, |
| 718 tagBuffer, |
| 719 sizeof(tagBuffer), |
| 720 err); |
| 721 if(U_FAILURE(*err)) { |
| 722 goto error; |
| 723 } |
| 724 |
| 725 likelySubtags = |
| 726 findLikelySubtags( |
| 727 tagBuffer, |
| 728 likelySubtagsBuffer, |
| 729 sizeof(likelySubtagsBuffer), |
| 730 err); |
| 731 if(U_FAILURE(*err)) { |
| 732 goto error; |
| 733 } |
| 734 |
| 735 if (likelySubtags != NULL) { |
| 736 /* Always use the language tag from the |
| 737 maximal string, since it may be more |
| 738 specific than the one provided. */ |
| 739 return createTagStringWithAlternates( |
| 740 NULL, |
| 741 0, |
| 742 script, |
| 743 scriptLength, |
| 744 NULL, |
| 745 0, |
| 746 variants, |
| 747 variantsLength, |
| 748 likelySubtags, |
| 749 tag, |
| 750 tagCapacity, |
| 751 err); |
| 752 } |
| 753 } |
| 754 |
| 755 /** |
| 756 * Finally, try just the language. |
| 757 **/ |
| 758 { |
| 759 const char* likelySubtags = NULL; |
| 760 |
| 761 createTagString( |
| 762 lang, |
| 763 langLength, |
| 764 NULL, |
| 765 0, |
| 766 NULL, |
| 767 0, |
| 768 NULL, |
| 769 0, |
| 770 tagBuffer, |
| 771 sizeof(tagBuffer), |
| 772 err); |
| 773 if(U_FAILURE(*err)) { |
| 774 goto error; |
| 775 } |
| 776 |
| 777 likelySubtags = |
| 778 findLikelySubtags( |
| 779 tagBuffer, |
| 780 likelySubtagsBuffer, |
| 781 sizeof(likelySubtagsBuffer), |
| 782 err); |
| 783 if(U_FAILURE(*err)) { |
| 784 goto error; |
| 785 } |
| 786 |
| 787 if (likelySubtags != NULL) { |
| 788 /* Always use the language tag from the |
| 789 maximal string, since it may be more |
| 790 specific than the one provided. */ |
| 791 return createTagStringWithAlternates( |
| 792 NULL, |
| 793 0, |
| 794 script, |
| 795 scriptLength, |
| 796 region, |
| 797 regionLength, |
| 798 variants, |
| 799 variantsLength, |
| 800 likelySubtags, |
| 801 tag, |
| 802 tagCapacity, |
| 803 err); |
| 804 } |
| 805 } |
| 806 |
| 807 return u_terminateChars( |
| 808 tag, |
| 809 tagCapacity, |
| 810 0, |
| 811 err); |
| 812 |
| 813 error: |
| 814 |
| 815 if (!U_FAILURE(*err)) { |
| 816 *err = U_ILLEGAL_ARGUMENT_ERROR; |
| 817 } |
| 818 |
| 819 return -1; |
| 820 } |
| 821 |
| 822 #define CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength) \ |
| 823 { int32_t count = 0; \ |
| 824 int32_t i; \ |
| 825 for (i = 0; i < trailingLength; i++) { \ |
| 826 if (trailing[i] == '-' || trailing[i] == '_') { \ |
| 827 count = 0; \ |
| 828 if (count > 8) { \ |
| 829 goto error; \ |
| 830 } \ |
| 831 } else if (trailing[i] == '@') { \ |
| 832 break; \ |
| 833 } else if (count > 8) { \ |
| 834 goto error; \ |
| 835 } else { \ |
| 836 count++; \ |
| 837 } \ |
| 838 } \ |
| 839 } |
| 840 |
| 841 static int32_t |
| 842 _uloc_addLikelySubtags(const char* localeID, |
| 843 char* maximizedLocaleID, |
| 844 int32_t maximizedLocaleIDCapacity, |
| 845 UErrorCode* err) |
| 846 { |
| 847 char lang[ULOC_LANG_CAPACITY]; |
| 848 int32_t langLength = sizeof(lang); |
| 849 char script[ULOC_SCRIPT_CAPACITY]; |
| 850 int32_t scriptLength = sizeof(script); |
| 851 char region[ULOC_COUNTRY_CAPACITY]; |
| 852 int32_t regionLength = sizeof(region); |
| 853 const char* trailing = ""; |
| 854 int32_t trailingLength = 0; |
| 855 int32_t trailingIndex = 0; |
| 856 int32_t resultLength = 0; |
| 857 |
| 858 if(U_FAILURE(*err)) { |
| 859 goto error; |
| 860 } |
| 861 else if (localeID == NULL || |
| 862 maximizedLocaleID == NULL || |
| 863 maximizedLocaleIDCapacity <= 0) { |
| 864 goto error; |
| 865 } |
| 866 |
| 867 trailingIndex = parseTagString( |
| 868 localeID, |
| 869 lang, |
| 870 &langLength, |
| 871 script, |
| 872 &scriptLength, |
| 873 region, |
| 874 ®ionLength, |
| 875 err); |
| 876 if(U_FAILURE(*err)) { |
| 877 /* Overflow indicates an illegal argument error */ |
| 878 if (*err == U_BUFFER_OVERFLOW_ERROR) { |
| 879 *err = U_ILLEGAL_ARGUMENT_ERROR; |
| 880 } |
| 881 |
| 882 goto error; |
| 883 } |
| 884 |
| 885 /* Find the length of the trailing portion. */ |
| 886 trailing = &localeID[trailingIndex]; |
| 887 trailingLength = (int32_t)uprv_strlen(trailing); |
| 888 |
| 889 CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength); |
| 890 |
| 891 resultLength = |
| 892 createLikelySubtagsString( |
| 893 lang, |
| 894 langLength, |
| 895 script, |
| 896 scriptLength, |
| 897 region, |
| 898 regionLength, |
| 899 trailing, |
| 900 trailingLength, |
| 901 maximizedLocaleID, |
| 902 maximizedLocaleIDCapacity, |
| 903 err); |
| 904 |
| 905 if (resultLength == 0) { |
| 906 const int32_t localIDLength = (int32_t)uprv_strlen(localeID); |
| 907 |
| 908 /* |
| 909 * If we get here, we need to return localeID. |
| 910 */ |
| 911 uprv_memcpy( |
| 912 maximizedLocaleID, |
| 913 localeID, |
| 914 localIDLength <= maximizedLocaleIDCapacity ? |
| 915 localIDLength : maximizedLocaleIDCapacity); |
| 916 |
| 917 resultLength = |
| 918 u_terminateChars( |
| 919 maximizedLocaleID, |
| 920 maximizedLocaleIDCapacity, |
| 921 localIDLength, |
| 922 err); |
| 923 } |
| 924 |
| 925 return resultLength; |
| 926 |
| 927 error: |
| 928 |
| 929 if (!U_FAILURE(*err)) { |
| 930 *err = U_ILLEGAL_ARGUMENT_ERROR; |
| 931 } |
| 932 |
| 933 return -1; |
| 934 } |
| 935 |
| 936 static int32_t |
| 937 _uloc_minimizeSubtags(const char* localeID, |
| 938 char* minimizedLocaleID, |
| 939 int32_t minimizedLocaleIDCapacity, |
| 940 UErrorCode* err) |
| 941 { |
| 942 /** |
| 943 * ULOC_FULLNAME_CAPACITY will provide enough capacity |
| 944 * that we can build a string that contains the language, |
| 945 * script and region code without worrying about overrunning |
| 946 * the user-supplied buffer. |
| 947 **/ |
| 948 char maximizedTagBuffer[ULOC_FULLNAME_CAPACITY]; |
| 949 int32_t maximizedTagBufferLength = sizeof(maximizedTagBuffer); |
| 950 |
| 951 char lang[ULOC_LANG_CAPACITY]; |
| 952 int32_t langLength = sizeof(lang); |
| 953 char script[ULOC_SCRIPT_CAPACITY]; |
| 954 int32_t scriptLength = sizeof(script); |
| 955 char region[ULOC_COUNTRY_CAPACITY]; |
| 956 int32_t regionLength = sizeof(region); |
| 957 const char* trailing = ""; |
| 958 int32_t trailingLength = 0; |
| 959 int32_t trailingIndex = 0; |
| 960 |
| 961 if(U_FAILURE(*err)) { |
| 962 goto error; |
| 963 } |
| 964 else if (localeID == NULL || |
| 965 minimizedLocaleID == NULL || |
| 966 minimizedLocaleIDCapacity <= 0) { |
| 967 goto error; |
| 968 } |
| 969 |
| 970 trailingIndex = |
| 971 parseTagString( |
| 972 localeID, |
| 973 lang, |
| 974 &langLength, |
| 975 script, |
| 976 &scriptLength, |
| 977 region, |
| 978 ®ionLength, |
| 979 err); |
| 980 if(U_FAILURE(*err)) { |
| 981 |
| 982 /* Overflow indicates an illegal argument error */ |
| 983 if (*err == U_BUFFER_OVERFLOW_ERROR) { |
| 984 *err = U_ILLEGAL_ARGUMENT_ERROR; |
| 985 } |
| 986 |
| 987 goto error; |
| 988 } |
| 989 |
| 990 /* Find the spot where the variants begin, if any. */ |
| 991 trailing = &localeID[trailingIndex]; |
| 992 trailingLength = (int32_t)uprv_strlen(trailing); |
| 993 |
| 994 CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength); |
| 995 |
| 996 createTagString( |
| 997 lang, |
| 998 langLength, |
| 999 script, |
| 1000 scriptLength, |
| 1001 region, |
| 1002 regionLength, |
| 1003 NULL, |
| 1004 0, |
| 1005 maximizedTagBuffer, |
| 1006 maximizedTagBufferLength, |
| 1007 err); |
| 1008 if(U_FAILURE(*err)) { |
| 1009 goto error; |
| 1010 } |
| 1011 |
| 1012 /** |
| 1013 * First, we need to first get the maximization |
| 1014 * from AddLikelySubtags. |
| 1015 **/ |
| 1016 maximizedTagBufferLength = |
| 1017 uloc_addLikelySubtags( |
| 1018 maximizedTagBuffer, |
| 1019 maximizedTagBuffer, |
| 1020 maximizedTagBufferLength, |
| 1021 err); |
| 1022 |
| 1023 if(U_FAILURE(*err)) { |
| 1024 goto error; |
| 1025 } |
| 1026 |
| 1027 /** |
| 1028 * Start first with just the language. |
| 1029 **/ |
| 1030 { |
| 1031 char tagBuffer[ULOC_FULLNAME_CAPACITY]; |
| 1032 |
| 1033 const int32_t tagBufferLength = |
| 1034 createLikelySubtagsString( |
| 1035 lang, |
| 1036 langLength, |
| 1037 NULL, |
| 1038 0, |
| 1039 NULL, |
| 1040 0, |
| 1041 NULL, |
| 1042 0, |
| 1043 tagBuffer, |
| 1044 sizeof(tagBuffer), |
| 1045 err); |
| 1046 |
| 1047 if(U_FAILURE(*err)) { |
| 1048 goto error; |
| 1049 } |
| 1050 else if (uprv_strnicmp( |
| 1051 maximizedTagBuffer, |
| 1052 tagBuffer, |
| 1053 tagBufferLength) == 0) { |
| 1054 |
| 1055 return createTagString( |
| 1056 lang, |
| 1057 langLength, |
| 1058 NULL, |
| 1059 0, |
| 1060 NULL, |
| 1061 0, |
| 1062 trailing, |
| 1063 trailingLength, |
| 1064 minimizedLocaleID, |
| 1065 minimizedLocaleIDCapacity, |
| 1066 err); |
| 1067 } |
| 1068 } |
| 1069 |
| 1070 /** |
| 1071 * Next, try the language and region. |
| 1072 **/ |
| 1073 if (regionLength > 0) { |
| 1074 |
| 1075 char tagBuffer[ULOC_FULLNAME_CAPACITY]; |
| 1076 |
| 1077 const int32_t tagBufferLength = |
| 1078 createLikelySubtagsString( |
| 1079 lang, |
| 1080 langLength, |
| 1081 NULL, |
| 1082 0, |
| 1083 region, |
| 1084 regionLength, |
| 1085 NULL, |
| 1086 0, |
| 1087 tagBuffer, |
| 1088 sizeof(tagBuffer), |
| 1089 err); |
| 1090 |
| 1091 if(U_FAILURE(*err)) { |
| 1092 goto error; |
| 1093 } |
| 1094 else if (uprv_strnicmp( |
| 1095 maximizedTagBuffer, |
| 1096 tagBuffer, |
| 1097 tagBufferLength) == 0) { |
| 1098 |
| 1099 return createTagString( |
| 1100 lang, |
| 1101 langLength, |
| 1102 NULL, |
| 1103 0, |
| 1104 region, |
| 1105 regionLength, |
| 1106 trailing, |
| 1107 trailingLength, |
| 1108 minimizedLocaleID, |
| 1109 minimizedLocaleIDCapacity, |
| 1110 err); |
| 1111 } |
| 1112 } |
| 1113 |
| 1114 /** |
| 1115 * Finally, try the language and script. This is our last chance, |
| 1116 * since trying with all three subtags would only yield the |
| 1117 * maximal version that we already have. |
| 1118 **/ |
| 1119 if (scriptLength > 0 && regionLength > 0) { |
| 1120 char tagBuffer[ULOC_FULLNAME_CAPACITY]; |
| 1121 |
| 1122 const int32_t tagBufferLength = |
| 1123 createLikelySubtagsString( |
| 1124 lang, |
| 1125 langLength, |
| 1126 script, |
| 1127 scriptLength, |
| 1128 NULL, |
| 1129 0, |
| 1130 NULL, |
| 1131 0, |
| 1132 tagBuffer, |
| 1133 sizeof(tagBuffer), |
| 1134 err); |
| 1135 |
| 1136 if(U_FAILURE(*err)) { |
| 1137 goto error; |
| 1138 } |
| 1139 else if (uprv_strnicmp( |
| 1140 maximizedTagBuffer, |
| 1141 tagBuffer, |
| 1142 tagBufferLength) == 0) { |
| 1143 |
| 1144 return createTagString( |
| 1145 lang, |
| 1146 langLength, |
| 1147 script, |
| 1148 scriptLength, |
| 1149 NULL, |
| 1150 0, |
| 1151 trailing, |
| 1152 trailingLength, |
| 1153 minimizedLocaleID, |
| 1154 minimizedLocaleIDCapacity, |
| 1155 err); |
| 1156 } |
| 1157 } |
| 1158 |
| 1159 { |
| 1160 /** |
| 1161 * If we got here, return the locale ID parameter. |
| 1162 **/ |
| 1163 const int32_t localeIDLength = (int32_t)uprv_strlen(localeID); |
| 1164 |
| 1165 uprv_memcpy( |
| 1166 minimizedLocaleID, |
| 1167 localeID, |
| 1168 localeIDLength <= minimizedLocaleIDCapacity ? |
| 1169 localeIDLength : minimizedLocaleIDCapacity); |
| 1170 |
| 1171 return u_terminateChars( |
| 1172 minimizedLocaleID, |
| 1173 minimizedLocaleIDCapacity, |
| 1174 localeIDLength, |
| 1175 err); |
| 1176 } |
| 1177 |
| 1178 error: |
| 1179 |
| 1180 if (!U_FAILURE(*err)) { |
| 1181 *err = U_ILLEGAL_ARGUMENT_ERROR; |
| 1182 } |
| 1183 |
| 1184 return -1; |
| 1185 |
| 1186 |
| 1187 } |
| 1188 |
| 1189 static UBool |
| 1190 do_canonicalize(const char* localeID, |
| 1191 char* buffer, |
| 1192 int32_t bufferCapacity, |
| 1193 UErrorCode* err) |
| 1194 { |
| 1195 uloc_canonicalize( |
| 1196 localeID, |
| 1197 buffer, |
| 1198 bufferCapacity, |
| 1199 err); |
| 1200 |
| 1201 if (*err == U_STRING_NOT_TERMINATED_WARNING || |
| 1202 *err == U_BUFFER_OVERFLOW_ERROR) { |
| 1203 *err = U_ILLEGAL_ARGUMENT_ERROR; |
| 1204 |
| 1205 return FALSE; |
| 1206 } |
| 1207 else if (U_FAILURE(*err)) { |
| 1208 |
| 1209 return FALSE; |
| 1210 } |
| 1211 else { |
| 1212 return TRUE; |
| 1213 } |
| 1214 } |
| 1215 |
| 1216 U_DRAFT int32_t U_EXPORT2 |
| 1217 uloc_addLikelySubtags(const char* localeID, |
| 1218 char* maximizedLocaleID, |
| 1219 int32_t maximizedLocaleIDCapacity, |
| 1220 UErrorCode* err) |
| 1221 { |
| 1222 char localeBuffer[ULOC_FULLNAME_CAPACITY]; |
| 1223 |
| 1224 if (!do_canonicalize( |
| 1225 localeID, |
| 1226 localeBuffer, |
| 1227 sizeof(localeBuffer), |
| 1228 err)) { |
| 1229 return -1; |
| 1230 } |
| 1231 else { |
| 1232 return _uloc_addLikelySubtags( |
| 1233 localeBuffer, |
| 1234 maximizedLocaleID, |
| 1235 maximizedLocaleIDCapacity, |
| 1236 err); |
| 1237 } |
| 1238 } |
| 1239 |
| 1240 U_DRAFT int32_t U_EXPORT2 |
| 1241 uloc_minimizeSubtags(const char* localeID, |
| 1242 char* minimizedLocaleID, |
| 1243 int32_t minimizedLocaleIDCapacity, |
| 1244 UErrorCode* err) |
| 1245 { |
| 1246 char localeBuffer[ULOC_FULLNAME_CAPACITY]; |
| 1247 |
| 1248 if (!do_canonicalize( |
| 1249 localeID, |
| 1250 localeBuffer, |
| 1251 sizeof(localeBuffer), |
| 1252 err)) { |
| 1253 return -1; |
| 1254 } |
| 1255 else { |
| 1256 return _uloc_minimizeSubtags( |
| 1257 localeBuffer, |
| 1258 minimizedLocaleID, |
| 1259 minimizedLocaleIDCapacity, |
| 1260 err); |
| 1261 } |
| 1262 } |
OLD | NEW |