| OLD | NEW | 
|---|
| 1 /* | 1 /* | 
| 2 ******************************************************************************* | 2 ******************************************************************************* | 
| 3 *   Copyright (C) 2010-2012, International Business Machines | 3 *   Copyright (C) 2010-2014, International Business Machines | 
| 4 *   Corporation and others.  All Rights Reserved. | 4 *   Corporation and others.  All Rights Reserved. | 
| 5 ******************************************************************************* | 5 ******************************************************************************* | 
| 6 *   file name:  uts46.cpp | 6 *   file name:  uts46.cpp | 
| 7 *   encoding:   US-ASCII | 7 *   encoding:   US-ASCII | 
| 8 *   tab size:   8 (not used) | 8 *   tab size:   8 (not used) | 
| 9 *   indentation:4 | 9 *   indentation:4 | 
| 10 * | 10 * | 
| 11 *   created on: 2010mar09 | 11 *   created on: 2010mar09 | 
| 12 *   created by: Markus W. Scherer | 12 *   created by: Markus W. Scherer | 
| 13 */ | 13 */ | 
| 14 | 14 | 
| 15 #include "unicode/utypes.h" | 15 #include "unicode/utypes.h" | 
| 16 | 16 | 
| 17 #if !UCONFIG_NO_IDNA | 17 #if !UCONFIG_NO_IDNA | 
| 18 | 18 | 
| 19 #include "unicode/idna.h" | 19 #include "unicode/idna.h" | 
| 20 #include "unicode/normalizer2.h" | 20 #include "unicode/normalizer2.h" | 
| 21 #include "unicode/uscript.h" | 21 #include "unicode/uscript.h" | 
| 22 #include "unicode/ustring.h" | 22 #include "unicode/ustring.h" | 
| 23 #include "unicode/utf16.h" | 23 #include "unicode/utf16.h" | 
| 24 #include "cmemory.h" | 24 #include "cmemory.h" | 
| 25 #include "cstring.h" | 25 #include "cstring.h" | 
| 26 #include "punycode.h" | 26 #include "punycode.h" | 
| 27 #include "ubidi_props.h" | 27 #include "ubidi_props.h" | 
| 28 #include "ustr_imp.h" | 28 #include "ustr_imp.h" | 
| 29 | 29 | 
| 30 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) |  | 
| 31 |  | 
| 32 // Note about tests for UIDNA_ERROR_DOMAIN_NAME_TOO_LONG: | 30 // Note about tests for UIDNA_ERROR_DOMAIN_NAME_TOO_LONG: | 
| 33 // | 31 // | 
| 34 // The domain name length limit is 255 octets in an internal DNS representation | 32 // The domain name length limit is 255 octets in an internal DNS representation | 
| 35 // where the last ("root") label is the empty label | 33 // where the last ("root") label is the empty label | 
| 36 // represented by length byte 0 alone. | 34 // represented by length byte 0 alone. | 
| 37 // In a conventional string, this translates to 253 characters, or 254 | 35 // In a conventional string, this translates to 253 characters, or 254 | 
| 38 // if there is a trailing dot for the root label. | 36 // if there is a trailing dot for the root label. | 
| 39 | 37 | 
| 40 U_NAMESPACE_BEGIN | 38 U_NAMESPACE_BEGIN | 
| 41 | 39 | 
| (...skipping 272 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
| 314     if(&dest==&src || srcArray==NULL) { | 312     if(&dest==&src || srcArray==NULL) { | 
| 315         errorCode=U_ILLEGAL_ARGUMENT_ERROR; | 313         errorCode=U_ILLEGAL_ARGUMENT_ERROR; | 
| 316         dest.setToBogus(); | 314         dest.setToBogus(); | 
| 317         return dest; | 315         return dest; | 
| 318     } | 316     } | 
| 319     // Arguments are fine, reset output values. | 317     // Arguments are fine, reset output values. | 
| 320     dest.remove(); | 318     dest.remove(); | 
| 321     info.reset(); | 319     info.reset(); | 
| 322     int32_t srcLength=src.length(); | 320     int32_t srcLength=src.length(); | 
| 323     if(srcLength==0) { | 321     if(srcLength==0) { | 
| 324         if(toASCII) { | 322         info.errors|=UIDNA_ERROR_EMPTY_LABEL; | 
| 325             info.errors|=UIDNA_ERROR_EMPTY_LABEL; |  | 
| 326         } |  | 
| 327         return dest; | 323         return dest; | 
| 328     } | 324     } | 
| 329     UChar *destArray=dest.getBuffer(srcLength); | 325     UChar *destArray=dest.getBuffer(srcLength); | 
| 330     if(destArray==NULL) { | 326     if(destArray==NULL) { | 
| 331         errorCode=U_MEMORY_ALLOCATION_ERROR; | 327         errorCode=U_MEMORY_ALLOCATION_ERROR; | 
| 332         return dest; | 328         return dest; | 
| 333     } | 329     } | 
| 334     // ASCII fastpath | 330     // ASCII fastpath | 
| 335     UBool disallowNonLDHDot=(options&UIDNA_USE_STD3_RULES)!=0; | 331     UBool disallowNonLDHDot=(options&UIDNA_USE_STD3_RULES)!=0; | 
| 336     int32_t labelStart=0; | 332     int32_t labelStart=0; | 
| (...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
| 374                 if((i+1)==srcLength || srcArray[i+1]==0x2e) { | 370                 if((i+1)==srcLength || srcArray[i+1]==0x2e) { | 
| 375                     // label ends with "-" | 371                     // label ends with "-" | 
| 376                     info.labelErrors|=UIDNA_ERROR_TRAILING_HYPHEN; | 372                     info.labelErrors|=UIDNA_ERROR_TRAILING_HYPHEN; | 
| 377                 } | 373                 } | 
| 378             } else if(c==0x2e) {  // dot | 374             } else if(c==0x2e) {  // dot | 
| 379                 if(isLabel) { | 375                 if(isLabel) { | 
| 380                     // Replacing with U+FFFD can be complicated for toASCII. | 376                     // Replacing with U+FFFD can be complicated for toASCII. | 
| 381                     ++i;  // '.' was copied to dest already | 377                     ++i;  // '.' was copied to dest already | 
| 382                     break; | 378                     break; | 
| 383                 } | 379                 } | 
| 384                 if(toASCII) { | 380                 if(i==labelStart) { | 
| 385                     // Permit an empty label at the end but not elsewhere. | 381                     info.labelErrors|=UIDNA_ERROR_EMPTY_LABEL; | 
| 386                     if(i==labelStart && i<(srcLength-1)) { | 382                 } | 
| 387                         info.labelErrors|=UIDNA_ERROR_EMPTY_LABEL; | 383                 if(toASCII && (i-labelStart)>63) { | 
| 388                     } else if((i-labelStart)>63) { | 384                     info.labelErrors|=UIDNA_ERROR_LABEL_TOO_LONG; | 
| 389                         info.labelErrors|=UIDNA_ERROR_LABEL_TOO_LONG; |  | 
| 390                     } |  | 
| 391                 } | 385                 } | 
| 392                 info.errors|=info.labelErrors; | 386                 info.errors|=info.labelErrors; | 
| 393                 info.labelErrors=0; | 387                 info.labelErrors=0; | 
| 394                 labelStart=i+1; | 388                 labelStart=i+1; | 
| 395             } | 389             } | 
| 396         } | 390         } | 
| 397     } | 391     } | 
| 398     info.errors|=info.labelErrors; | 392     info.errors|=info.labelErrors; | 
| 399     dest.releaseBuffer(i); | 393     dest.releaseBuffer(i); | 
| 400     processUnicode(src, labelStart, i, isLabel, toASCII, dest, info, errorCode); | 394     processUnicode(src, labelStart, i, isLabel, toASCII, dest, info, errorCode); | 
| (...skipping 15 matching lines...) Expand all  Loading... | 
| 416     } | 410     } | 
| 417     const char *srcArray=src.data(); | 411     const char *srcArray=src.data(); | 
| 418     int32_t srcLength=src.length(); | 412     int32_t srcLength=src.length(); | 
| 419     if(srcArray==NULL && srcLength!=0) { | 413     if(srcArray==NULL && srcLength!=0) { | 
| 420         errorCode=U_ILLEGAL_ARGUMENT_ERROR; | 414         errorCode=U_ILLEGAL_ARGUMENT_ERROR; | 
| 421         return; | 415         return; | 
| 422     } | 416     } | 
| 423     // Arguments are fine, reset output values. | 417     // Arguments are fine, reset output values. | 
| 424     info.reset(); | 418     info.reset(); | 
| 425     if(srcLength==0) { | 419     if(srcLength==0) { | 
| 426         if(toASCII) { | 420         info.errors|=UIDNA_ERROR_EMPTY_LABEL; | 
| 427             info.errors|=UIDNA_ERROR_EMPTY_LABEL; |  | 
| 428         } |  | 
| 429         dest.Flush(); | 421         dest.Flush(); | 
| 430         return; | 422         return; | 
| 431     } | 423     } | 
| 432     UnicodeString destString; | 424     UnicodeString destString; | 
| 433     int32_t labelStart=0; | 425     int32_t labelStart=0; | 
| 434     if(srcLength<=256) {  // length of stackArray[] | 426     if(srcLength<=256) {  // length of stackArray[] | 
| 435         // ASCII fastpath | 427         // ASCII fastpath | 
| 436         char stackArray[256]; | 428         char stackArray[256]; | 
| 437         int32_t destCapacity; | 429         int32_t destCapacity; | 
| 438         char *destArray=dest.GetAppendBuffer(srcLength, srcLength+20, | 430         char *destArray=dest.GetAppendBuffer(srcLength, srcLength+20, | 
| 439                                              stackArray, LENGTHOF(stackArray), &
      destCapacity); | 431                                              stackArray, UPRV_LENGTHOF(stackArra
      y), &destCapacity); | 
| 440         UBool disallowNonLDHDot=(options&UIDNA_USE_STD3_RULES)!=0; | 432         UBool disallowNonLDHDot=(options&UIDNA_USE_STD3_RULES)!=0; | 
| 441         int32_t i; | 433         int32_t i; | 
| 442         for(i=0;; ++i) { | 434         for(i=0;; ++i) { | 
| 443             if(i==srcLength) { | 435             if(i==srcLength) { | 
| 444                 if(toASCII) { | 436                 if(toASCII) { | 
| 445                     if((i-labelStart)>63) { | 437                     if((i-labelStart)>63) { | 
| 446                         info.labelErrors|=UIDNA_ERROR_LABEL_TOO_LONG; | 438                         info.labelErrors|=UIDNA_ERROR_LABEL_TOO_LONG; | 
| 447                     } | 439                     } | 
| 448                     // There is a trailing dot if labelStart==i. | 440                     // There is a trailing dot if labelStart==i. | 
| 449                     if(!isLabel && i>=254 && (i>254 || labelStart<i)) { | 441                     if(!isLabel && i>=254 && (i>254 || labelStart<i)) { | 
| (...skipping 26 matching lines...) Expand all  Loading... | 
| 476                         info.labelErrors|=UIDNA_ERROR_LEADING_HYPHEN; | 468                         info.labelErrors|=UIDNA_ERROR_LEADING_HYPHEN; | 
| 477                     } | 469                     } | 
| 478                     if((i+1)==srcLength || srcArray[i+1]==0x2e) { | 470                     if((i+1)==srcLength || srcArray[i+1]==0x2e) { | 
| 479                         // label ends with "-" | 471                         // label ends with "-" | 
| 480                         info.labelErrors|=UIDNA_ERROR_TRAILING_HYPHEN; | 472                         info.labelErrors|=UIDNA_ERROR_TRAILING_HYPHEN; | 
| 481                     } | 473                     } | 
| 482                 } else if(c==0x2e) {  // dot | 474                 } else if(c==0x2e) {  // dot | 
| 483                     if(isLabel) { | 475                     if(isLabel) { | 
| 484                         break;  // Replacing with U+FFFD can be complicated for 
      toASCII. | 476                         break;  // Replacing with U+FFFD can be complicated for 
      toASCII. | 
| 485                     } | 477                     } | 
| 486                     if(toASCII) { | 478                     if(i==labelStart) { | 
| 487                         // Permit an empty label at the end but not elsewhere. | 479                         info.labelErrors|=UIDNA_ERROR_EMPTY_LABEL; | 
| 488                         if(i==labelStart && i<(srcLength-1)) { | 480                     } | 
| 489                             info.labelErrors|=UIDNA_ERROR_EMPTY_LABEL; | 481                     if(toASCII && (i-labelStart)>63) { | 
| 490                         } else if((i-labelStart)>63) { | 482                         info.labelErrors|=UIDNA_ERROR_LABEL_TOO_LONG; | 
| 491                             info.labelErrors|=UIDNA_ERROR_LABEL_TOO_LONG; |  | 
| 492                         } |  | 
| 493                     } | 483                     } | 
| 494                     info.errors|=info.labelErrors; | 484                     info.errors|=info.labelErrors; | 
| 495                     info.labelErrors=0; | 485                     info.labelErrors=0; | 
| 496                     labelStart=i+1; | 486                     labelStart=i+1; | 
| 497                 } | 487                 } | 
| 498             } | 488             } | 
| 499         } | 489         } | 
| 500         info.errors|=info.labelErrors; | 490         info.errors|=info.labelErrors; | 
| 501         // Convert the processed ASCII prefix of the current label to UTF-16. | 491         // Convert the processed ASCII prefix of the current label to UTF-16. | 
| 502         int32_t mappingStart=i-labelStart; | 492         int32_t mappingStart=i-labelStart; | 
| (...skipping 238 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
| 741         labelString=&fromPunycode; | 731         labelString=&fromPunycode; | 
| 742         label=fromPunycode.getBuffer(); | 732         label=fromPunycode.getBuffer(); | 
| 743         labelStart=0; | 733         labelStart=0; | 
| 744         labelLength=fromPunycode.length(); | 734         labelLength=fromPunycode.length(); | 
| 745     } else { | 735     } else { | 
| 746         wasPunycode=FALSE; | 736         wasPunycode=FALSE; | 
| 747         labelString=&dest; | 737         labelString=&dest; | 
| 748     } | 738     } | 
| 749     // Validity check | 739     // Validity check | 
| 750     if(labelLength==0) { | 740     if(labelLength==0) { | 
| 751         if(toASCII) { | 741         info.labelErrors|=UIDNA_ERROR_EMPTY_LABEL; | 
| 752             info.labelErrors|=UIDNA_ERROR_EMPTY_LABEL; |  | 
| 753         } |  | 
| 754         return replaceLabel(dest, destLabelStart, destLabelLength, *labelString,
       labelLength); | 742         return replaceLabel(dest, destLabelStart, destLabelLength, *labelString,
       labelLength); | 
| 755     } | 743     } | 
| 756     // labelLength>0 | 744     // labelLength>0 | 
| 757     if(labelLength>=4 && label[2]==0x2d && label[3]==0x2d) { | 745     if(labelLength>=4 && label[2]==0x2d && label[3]==0x2d) { | 
| 758         // label starts with "??--" | 746         // label starts with "??--" | 
| 759         info.labelErrors|=UIDNA_ERROR_HYPHEN_3_4; | 747         info.labelErrors|=UIDNA_ERROR_HYPHEN_3_4; | 
| 760     } | 748     } | 
| 761     if(label[0]==0x2d) { | 749     if(label[0]==0x2d) { | 
| 762         // label starts with "-" | 750         // label starts with "-" | 
| 763         info.labelErrors|=UIDNA_ERROR_LEADING_HYPHEN; | 751         info.labelErrors|=UIDNA_ERROR_LEADING_HYPHEN; | 
| (...skipping 686 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
| 1450     } | 1438     } | 
| 1451     StringPiece src(name, length<0 ? uprv_strlen(name) : length); | 1439     StringPiece src(name, length<0 ? uprv_strlen(name) : length); | 
| 1452     CheckedArrayByteSink sink(dest, capacity); | 1440     CheckedArrayByteSink sink(dest, capacity); | 
| 1453     IDNAInfo info; | 1441     IDNAInfo info; | 
| 1454     reinterpret_cast<const IDNA *>(idna)->nameToUnicodeUTF8(src, sink, info, *pE
      rrorCode); | 1442     reinterpret_cast<const IDNA *>(idna)->nameToUnicodeUTF8(src, sink, info, *pE
      rrorCode); | 
| 1455     idnaInfoToStruct(info, pInfo); | 1443     idnaInfoToStruct(info, pInfo); | 
| 1456     return u_terminateChars(dest, capacity, sink.NumberOfBytesAppended(), pError
      Code); | 1444     return u_terminateChars(dest, capacity, sink.NumberOfBytesAppended(), pError
      Code); | 
| 1457 } | 1445 } | 
| 1458 | 1446 | 
| 1459 #endif  // UCONFIG_NO_IDNA | 1447 #endif  // UCONFIG_NO_IDNA | 
| OLD | NEW | 
|---|