| OLD | NEW |
| 1 /* | 1 /* |
| 2 ******************************************************************************* | 2 ******************************************************************************* |
| 3 * Copyright (C) 2010-2012, International Business Machines | 3 * Copyright (C) 2010-2014, International Business Machines |
| 4 * Corporation and others. All Rights Reserved. | 4 * Corporation and others. All Rights Reserved. |
| 5 ******************************************************************************* | 5 ******************************************************************************* |
| 6 * file name: uts46.cpp | 6 * file name: uts46.cpp |
| 7 * encoding: US-ASCII | 7 * encoding: US-ASCII |
| 8 * tab size: 8 (not used) | 8 * tab size: 8 (not used) |
| 9 * indentation:4 | 9 * indentation:4 |
| 10 * | 10 * |
| 11 * created on: 2010mar09 | 11 * created on: 2010mar09 |
| 12 * created by: Markus W. Scherer | 12 * created by: Markus W. Scherer |
| 13 */ | 13 */ |
| 14 | 14 |
| 15 #include "unicode/utypes.h" | 15 #include "unicode/utypes.h" |
| 16 | 16 |
| 17 #if !UCONFIG_NO_IDNA | 17 #if !UCONFIG_NO_IDNA |
| 18 | 18 |
| 19 #include "unicode/idna.h" | 19 #include "unicode/idna.h" |
| 20 #include "unicode/normalizer2.h" | 20 #include "unicode/normalizer2.h" |
| 21 #include "unicode/uscript.h" | 21 #include "unicode/uscript.h" |
| 22 #include "unicode/ustring.h" | 22 #include "unicode/ustring.h" |
| 23 #include "unicode/utf16.h" | 23 #include "unicode/utf16.h" |
| 24 #include "cmemory.h" | 24 #include "cmemory.h" |
| 25 #include "cstring.h" | 25 #include "cstring.h" |
| 26 #include "punycode.h" | 26 #include "punycode.h" |
| 27 #include "ubidi_props.h" | 27 #include "ubidi_props.h" |
| 28 #include "ustr_imp.h" | 28 #include "ustr_imp.h" |
| 29 | 29 |
| 30 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) | |
| 31 | |
| 32 // Note about tests for UIDNA_ERROR_DOMAIN_NAME_TOO_LONG: | 30 // Note about tests for UIDNA_ERROR_DOMAIN_NAME_TOO_LONG: |
| 33 // | 31 // |
| 34 // The domain name length limit is 255 octets in an internal DNS representation | 32 // The domain name length limit is 255 octets in an internal DNS representation |
| 35 // where the last ("root") label is the empty label | 33 // where the last ("root") label is the empty label |
| 36 // represented by length byte 0 alone. | 34 // represented by length byte 0 alone. |
| 37 // In a conventional string, this translates to 253 characters, or 254 | 35 // In a conventional string, this translates to 253 characters, or 254 |
| 38 // if there is a trailing dot for the root label. | 36 // if there is a trailing dot for the root label. |
| 39 | 37 |
| 40 U_NAMESPACE_BEGIN | 38 U_NAMESPACE_BEGIN |
| 41 | 39 |
| (...skipping 272 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 314 if(&dest==&src || srcArray==NULL) { | 312 if(&dest==&src || srcArray==NULL) { |
| 315 errorCode=U_ILLEGAL_ARGUMENT_ERROR; | 313 errorCode=U_ILLEGAL_ARGUMENT_ERROR; |
| 316 dest.setToBogus(); | 314 dest.setToBogus(); |
| 317 return dest; | 315 return dest; |
| 318 } | 316 } |
| 319 // Arguments are fine, reset output values. | 317 // Arguments are fine, reset output values. |
| 320 dest.remove(); | 318 dest.remove(); |
| 321 info.reset(); | 319 info.reset(); |
| 322 int32_t srcLength=src.length(); | 320 int32_t srcLength=src.length(); |
| 323 if(srcLength==0) { | 321 if(srcLength==0) { |
| 324 if(toASCII) { | 322 info.errors|=UIDNA_ERROR_EMPTY_LABEL; |
| 325 info.errors|=UIDNA_ERROR_EMPTY_LABEL; | |
| 326 } | |
| 327 return dest; | 323 return dest; |
| 328 } | 324 } |
| 329 UChar *destArray=dest.getBuffer(srcLength); | 325 UChar *destArray=dest.getBuffer(srcLength); |
| 330 if(destArray==NULL) { | 326 if(destArray==NULL) { |
| 331 errorCode=U_MEMORY_ALLOCATION_ERROR; | 327 errorCode=U_MEMORY_ALLOCATION_ERROR; |
| 332 return dest; | 328 return dest; |
| 333 } | 329 } |
| 334 // ASCII fastpath | 330 // ASCII fastpath |
| 335 UBool disallowNonLDHDot=(options&UIDNA_USE_STD3_RULES)!=0; | 331 UBool disallowNonLDHDot=(options&UIDNA_USE_STD3_RULES)!=0; |
| 336 int32_t labelStart=0; | 332 int32_t labelStart=0; |
| (...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 374 if((i+1)==srcLength || srcArray[i+1]==0x2e) { | 370 if((i+1)==srcLength || srcArray[i+1]==0x2e) { |
| 375 // label ends with "-" | 371 // label ends with "-" |
| 376 info.labelErrors|=UIDNA_ERROR_TRAILING_HYPHEN; | 372 info.labelErrors|=UIDNA_ERROR_TRAILING_HYPHEN; |
| 377 } | 373 } |
| 378 } else if(c==0x2e) { // dot | 374 } else if(c==0x2e) { // dot |
| 379 if(isLabel) { | 375 if(isLabel) { |
| 380 // Replacing with U+FFFD can be complicated for toASCII. | 376 // Replacing with U+FFFD can be complicated for toASCII. |
| 381 ++i; // '.' was copied to dest already | 377 ++i; // '.' was copied to dest already |
| 382 break; | 378 break; |
| 383 } | 379 } |
| 384 if(toASCII) { | 380 if(i==labelStart) { |
| 385 // Permit an empty label at the end but not elsewhere. | 381 info.labelErrors|=UIDNA_ERROR_EMPTY_LABEL; |
| 386 if(i==labelStart && i<(srcLength-1)) { | 382 } |
| 387 info.labelErrors|=UIDNA_ERROR_EMPTY_LABEL; | 383 if(toASCII && (i-labelStart)>63) { |
| 388 } else if((i-labelStart)>63) { | 384 info.labelErrors|=UIDNA_ERROR_LABEL_TOO_LONG; |
| 389 info.labelErrors|=UIDNA_ERROR_LABEL_TOO_LONG; | |
| 390 } | |
| 391 } | 385 } |
| 392 info.errors|=info.labelErrors; | 386 info.errors|=info.labelErrors; |
| 393 info.labelErrors=0; | 387 info.labelErrors=0; |
| 394 labelStart=i+1; | 388 labelStart=i+1; |
| 395 } | 389 } |
| 396 } | 390 } |
| 397 } | 391 } |
| 398 info.errors|=info.labelErrors; | 392 info.errors|=info.labelErrors; |
| 399 dest.releaseBuffer(i); | 393 dest.releaseBuffer(i); |
| 400 processUnicode(src, labelStart, i, isLabel, toASCII, dest, info, errorCode); | 394 processUnicode(src, labelStart, i, isLabel, toASCII, dest, info, errorCode); |
| (...skipping 15 matching lines...) Expand all Loading... |
| 416 } | 410 } |
| 417 const char *srcArray=src.data(); | 411 const char *srcArray=src.data(); |
| 418 int32_t srcLength=src.length(); | 412 int32_t srcLength=src.length(); |
| 419 if(srcArray==NULL && srcLength!=0) { | 413 if(srcArray==NULL && srcLength!=0) { |
| 420 errorCode=U_ILLEGAL_ARGUMENT_ERROR; | 414 errorCode=U_ILLEGAL_ARGUMENT_ERROR; |
| 421 return; | 415 return; |
| 422 } | 416 } |
| 423 // Arguments are fine, reset output values. | 417 // Arguments are fine, reset output values. |
| 424 info.reset(); | 418 info.reset(); |
| 425 if(srcLength==0) { | 419 if(srcLength==0) { |
| 426 if(toASCII) { | 420 info.errors|=UIDNA_ERROR_EMPTY_LABEL; |
| 427 info.errors|=UIDNA_ERROR_EMPTY_LABEL; | |
| 428 } | |
| 429 dest.Flush(); | 421 dest.Flush(); |
| 430 return; | 422 return; |
| 431 } | 423 } |
| 432 UnicodeString destString; | 424 UnicodeString destString; |
| 433 int32_t labelStart=0; | 425 int32_t labelStart=0; |
| 434 if(srcLength<=256) { // length of stackArray[] | 426 if(srcLength<=256) { // length of stackArray[] |
| 435 // ASCII fastpath | 427 // ASCII fastpath |
| 436 char stackArray[256]; | 428 char stackArray[256]; |
| 437 int32_t destCapacity; | 429 int32_t destCapacity; |
| 438 char *destArray=dest.GetAppendBuffer(srcLength, srcLength+20, | 430 char *destArray=dest.GetAppendBuffer(srcLength, srcLength+20, |
| 439 stackArray, LENGTHOF(stackArray), &
destCapacity); | 431 stackArray, UPRV_LENGTHOF(stackArra
y), &destCapacity); |
| 440 UBool disallowNonLDHDot=(options&UIDNA_USE_STD3_RULES)!=0; | 432 UBool disallowNonLDHDot=(options&UIDNA_USE_STD3_RULES)!=0; |
| 441 int32_t i; | 433 int32_t i; |
| 442 for(i=0;; ++i) { | 434 for(i=0;; ++i) { |
| 443 if(i==srcLength) { | 435 if(i==srcLength) { |
| 444 if(toASCII) { | 436 if(toASCII) { |
| 445 if((i-labelStart)>63) { | 437 if((i-labelStart)>63) { |
| 446 info.labelErrors|=UIDNA_ERROR_LABEL_TOO_LONG; | 438 info.labelErrors|=UIDNA_ERROR_LABEL_TOO_LONG; |
| 447 } | 439 } |
| 448 // There is a trailing dot if labelStart==i. | 440 // There is a trailing dot if labelStart==i. |
| 449 if(!isLabel && i>=254 && (i>254 || labelStart<i)) { | 441 if(!isLabel && i>=254 && (i>254 || labelStart<i)) { |
| (...skipping 26 matching lines...) Expand all Loading... |
| 476 info.labelErrors|=UIDNA_ERROR_LEADING_HYPHEN; | 468 info.labelErrors|=UIDNA_ERROR_LEADING_HYPHEN; |
| 477 } | 469 } |
| 478 if((i+1)==srcLength || srcArray[i+1]==0x2e) { | 470 if((i+1)==srcLength || srcArray[i+1]==0x2e) { |
| 479 // label ends with "-" | 471 // label ends with "-" |
| 480 info.labelErrors|=UIDNA_ERROR_TRAILING_HYPHEN; | 472 info.labelErrors|=UIDNA_ERROR_TRAILING_HYPHEN; |
| 481 } | 473 } |
| 482 } else if(c==0x2e) { // dot | 474 } else if(c==0x2e) { // dot |
| 483 if(isLabel) { | 475 if(isLabel) { |
| 484 break; // Replacing with U+FFFD can be complicated for
toASCII. | 476 break; // Replacing with U+FFFD can be complicated for
toASCII. |
| 485 } | 477 } |
| 486 if(toASCII) { | 478 if(i==labelStart) { |
| 487 // Permit an empty label at the end but not elsewhere. | 479 info.labelErrors|=UIDNA_ERROR_EMPTY_LABEL; |
| 488 if(i==labelStart && i<(srcLength-1)) { | 480 } |
| 489 info.labelErrors|=UIDNA_ERROR_EMPTY_LABEL; | 481 if(toASCII && (i-labelStart)>63) { |
| 490 } else if((i-labelStart)>63) { | 482 info.labelErrors|=UIDNA_ERROR_LABEL_TOO_LONG; |
| 491 info.labelErrors|=UIDNA_ERROR_LABEL_TOO_LONG; | |
| 492 } | |
| 493 } | 483 } |
| 494 info.errors|=info.labelErrors; | 484 info.errors|=info.labelErrors; |
| 495 info.labelErrors=0; | 485 info.labelErrors=0; |
| 496 labelStart=i+1; | 486 labelStart=i+1; |
| 497 } | 487 } |
| 498 } | 488 } |
| 499 } | 489 } |
| 500 info.errors|=info.labelErrors; | 490 info.errors|=info.labelErrors; |
| 501 // Convert the processed ASCII prefix of the current label to UTF-16. | 491 // Convert the processed ASCII prefix of the current label to UTF-16. |
| 502 int32_t mappingStart=i-labelStart; | 492 int32_t mappingStart=i-labelStart; |
| (...skipping 238 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 741 labelString=&fromPunycode; | 731 labelString=&fromPunycode; |
| 742 label=fromPunycode.getBuffer(); | 732 label=fromPunycode.getBuffer(); |
| 743 labelStart=0; | 733 labelStart=0; |
| 744 labelLength=fromPunycode.length(); | 734 labelLength=fromPunycode.length(); |
| 745 } else { | 735 } else { |
| 746 wasPunycode=FALSE; | 736 wasPunycode=FALSE; |
| 747 labelString=&dest; | 737 labelString=&dest; |
| 748 } | 738 } |
| 749 // Validity check | 739 // Validity check |
| 750 if(labelLength==0) { | 740 if(labelLength==0) { |
| 751 if(toASCII) { | 741 info.labelErrors|=UIDNA_ERROR_EMPTY_LABEL; |
| 752 info.labelErrors|=UIDNA_ERROR_EMPTY_LABEL; | |
| 753 } | |
| 754 return replaceLabel(dest, destLabelStart, destLabelLength, *labelString,
labelLength); | 742 return replaceLabel(dest, destLabelStart, destLabelLength, *labelString,
labelLength); |
| 755 } | 743 } |
| 756 // labelLength>0 | 744 // labelLength>0 |
| 757 if(labelLength>=4 && label[2]==0x2d && label[3]==0x2d) { | 745 if(labelLength>=4 && label[2]==0x2d && label[3]==0x2d) { |
| 758 // label starts with "??--" | 746 // label starts with "??--" |
| 759 info.labelErrors|=UIDNA_ERROR_HYPHEN_3_4; | 747 info.labelErrors|=UIDNA_ERROR_HYPHEN_3_4; |
| 760 } | 748 } |
| 761 if(label[0]==0x2d) { | 749 if(label[0]==0x2d) { |
| 762 // label starts with "-" | 750 // label starts with "-" |
| 763 info.labelErrors|=UIDNA_ERROR_LEADING_HYPHEN; | 751 info.labelErrors|=UIDNA_ERROR_LEADING_HYPHEN; |
| (...skipping 686 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1450 } | 1438 } |
| 1451 StringPiece src(name, length<0 ? uprv_strlen(name) : length); | 1439 StringPiece src(name, length<0 ? uprv_strlen(name) : length); |
| 1452 CheckedArrayByteSink sink(dest, capacity); | 1440 CheckedArrayByteSink sink(dest, capacity); |
| 1453 IDNAInfo info; | 1441 IDNAInfo info; |
| 1454 reinterpret_cast<const IDNA *>(idna)->nameToUnicodeUTF8(src, sink, info, *pE
rrorCode); | 1442 reinterpret_cast<const IDNA *>(idna)->nameToUnicodeUTF8(src, sink, info, *pE
rrorCode); |
| 1455 idnaInfoToStruct(info, pInfo); | 1443 idnaInfoToStruct(info, pInfo); |
| 1456 return u_terminateChars(dest, capacity, sink.NumberOfBytesAppended(), pError
Code); | 1444 return u_terminateChars(dest, capacity, sink.NumberOfBytesAppended(), pError
Code); |
| 1457 } | 1445 } |
| 1458 | 1446 |
| 1459 #endif // UCONFIG_NO_IDNA | 1447 #endif // UCONFIG_NO_IDNA |
| OLD | NEW |