| OLD | NEW |
| 1 /* | 1 /* |
| 2 ******************************************************************************* | 2 ******************************************************************************* |
| 3 * Copyright (C) 2010-2014, International Business Machines | 3 * Copyright (C) 2010-2015, International Business Machines |
| 4 * Corporation and others. All Rights Reserved. | 4 * Corporation and others. All Rights Reserved. |
| 5 ******************************************************************************* | 5 ******************************************************************************* |
| 6 * file name: uts46.cpp | 6 * file name: uts46.cpp |
| 7 * encoding: US-ASCII | 7 * encoding: US-ASCII |
| 8 * tab size: 8 (not used) | 8 * tab size: 8 (not used) |
| 9 * indentation:4 | 9 * indentation:4 |
| 10 * | 10 * |
| 11 * created on: 2010mar09 | 11 * created on: 2010mar09 |
| 12 * created by: Markus W. Scherer | 12 * created by: Markus W. Scherer |
| 13 */ | 13 */ |
| (...skipping 159 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 173 | 173 |
| 174 // returns the new label length | 174 // returns the new label length |
| 175 int32_t | 175 int32_t |
| 176 processLabel(UnicodeString &dest, | 176 processLabel(UnicodeString &dest, |
| 177 int32_t labelStart, int32_t labelLength, | 177 int32_t labelStart, int32_t labelLength, |
| 178 UBool toASCII, | 178 UBool toASCII, |
| 179 IDNAInfo &info, UErrorCode &errorCode) const; | 179 IDNAInfo &info, UErrorCode &errorCode) const; |
| 180 int32_t | 180 int32_t |
| 181 markBadACELabel(UnicodeString &dest, | 181 markBadACELabel(UnicodeString &dest, |
| 182 int32_t labelStart, int32_t labelLength, | 182 int32_t labelStart, int32_t labelLength, |
| 183 UBool toASCII, IDNAInfo &info) const; | 183 UBool toASCII, IDNAInfo &info, UErrorCode &errorCode) const; |
| 184 | 184 |
| 185 void | 185 void |
| 186 checkLabelBiDi(const UChar *label, int32_t labelLength, IDNAInfo &info) cons
t; | 186 checkLabelBiDi(const UChar *label, int32_t labelLength, IDNAInfo &info) cons
t; |
| 187 | 187 |
| 188 UBool | 188 UBool |
| 189 isLabelOkContextJ(const UChar *label, int32_t labelLength) const; | 189 isLabelOkContextJ(const UChar *label, int32_t labelLength) const; |
| 190 | 190 |
| 191 void | 191 void |
| 192 checkLabelContextO(const UChar *label, int32_t labelLength, IDNAInfo &info)
const; | 192 checkLabelContextO(const UChar *label, int32_t labelLength, IDNAInfo &info)
const; |
| 193 | 193 |
| (...skipping 386 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 580 processLabel(dest, labelStart, labelLimit-labelStart, | 580 processLabel(dest, labelStart, labelLimit-labelStart, |
| 581 toASCII, info, errorCode); | 581 toASCII, info, errorCode); |
| 582 info.errors|=info.labelErrors; | 582 info.errors|=info.labelErrors; |
| 583 } | 583 } |
| 584 return dest; | 584 return dest; |
| 585 } | 585 } |
| 586 | 586 |
| 587 int32_t | 587 int32_t |
| 588 UTS46::mapDevChars(UnicodeString &dest, int32_t labelStart, int32_t mappingStart
, | 588 UTS46::mapDevChars(UnicodeString &dest, int32_t labelStart, int32_t mappingStart
, |
| 589 UErrorCode &errorCode) const { | 589 UErrorCode &errorCode) const { |
| 590 if(U_FAILURE(errorCode)) { |
| 591 return 0; |
| 592 } |
| 590 int32_t length=dest.length(); | 593 int32_t length=dest.length(); |
| 591 UChar *s=dest.getBuffer(dest[mappingStart]==0xdf ? length+1 : length); | 594 UChar *s=dest.getBuffer(dest[mappingStart]==0xdf ? length+1 : length); |
| 592 if(s==NULL) { | 595 if(s==NULL) { |
| 593 errorCode=U_MEMORY_ALLOCATION_ERROR; | 596 errorCode=U_MEMORY_ALLOCATION_ERROR; |
| 594 return length; | 597 return length; |
| 595 } | 598 } |
| 596 int32_t capacity=dest.getCapacity(); | 599 int32_t capacity=dest.getCapacity(); |
| 597 UBool didMapDevChars=FALSE; | 600 UBool didMapDevChars=FALSE; |
| 598 int32_t readIndex=mappingStart, writeIndex=mappingStart; | 601 int32_t readIndex=mappingStart, writeIndex=mappingStart; |
| 599 do { | 602 do { |
| (...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 637 } while(writeIndex<length); | 640 } while(writeIndex<length); |
| 638 dest.releaseBuffer(length); | 641 dest.releaseBuffer(length); |
| 639 if(didMapDevChars) { | 642 if(didMapDevChars) { |
| 640 // Mapping deviation characters might have resulted in an un-NFC string. | 643 // Mapping deviation characters might have resulted in an un-NFC string. |
| 641 // We could use either the NFC or the UTS #46 normalizer. | 644 // We could use either the NFC or the UTS #46 normalizer. |
| 642 // By using the UTS #46 normalizer again, we avoid having to load a seco
nd .nrm data file. | 645 // By using the UTS #46 normalizer again, we avoid having to load a seco
nd .nrm data file. |
| 643 UnicodeString normalized; | 646 UnicodeString normalized; |
| 644 uts46Norm2.normalize(dest.tempSubString(labelStart), normalized, errorCo
de); | 647 uts46Norm2.normalize(dest.tempSubString(labelStart), normalized, errorCo
de); |
| 645 if(U_SUCCESS(errorCode)) { | 648 if(U_SUCCESS(errorCode)) { |
| 646 dest.replace(labelStart, 0x7fffffff, normalized); | 649 dest.replace(labelStart, 0x7fffffff, normalized); |
| 650 if(dest.isBogus()) { |
| 651 errorCode=U_MEMORY_ALLOCATION_ERROR; |
| 652 } |
| 647 return dest.length(); | 653 return dest.length(); |
| 648 } | 654 } |
| 649 } | 655 } |
| 650 return length; | 656 return length; |
| 651 } | 657 } |
| 652 | 658 |
| 653 // Some non-ASCII characters are equivalent to sequences with | 659 // Some non-ASCII characters are equivalent to sequences with |
| 654 // non-LDH ASCII characters. To find them: | 660 // non-LDH ASCII characters. To find them: |
| 655 // grep disallowed_STD3_valid IdnaMappingTable.txt (or uts46.txt) | 661 // grep disallowed_STD3_valid IdnaMappingTable.txt (or uts46.txt) |
| 656 static inline UBool | 662 static inline UBool |
| 657 isNonASCIIDisallowedSTD3Valid(UChar32 c) { | 663 isNonASCIIDisallowedSTD3Valid(UChar32 c) { |
| 658 return c==0x2260 || c==0x226E || c==0x226F; | 664 return c==0x2260 || c==0x226E || c==0x226F; |
| 659 } | 665 } |
| 660 | 666 |
| 661 // Replace the label in dest with the label string, if the label was modified. | 667 // Replace the label in dest with the label string, if the label was modified. |
| 662 // If &label==&dest then the label was modified in-place and labelLength | 668 // If &label==&dest then the label was modified in-place and labelLength |
| 663 // is the new label length, different from label.length(). | 669 // is the new label length, different from label.length(). |
| 664 // If &label!=&dest then labelLength==label.length(). | 670 // If &label!=&dest then labelLength==label.length(). |
| 665 // Returns labelLength (= the new label length). | 671 // Returns labelLength (= the new label length). |
| 666 static int32_t | 672 static int32_t |
| 667 replaceLabel(UnicodeString &dest, int32_t destLabelStart, int32_t destLabelLengt
h, | 673 replaceLabel(UnicodeString &dest, int32_t destLabelStart, int32_t destLabelLengt
h, |
| 668 const UnicodeString &label, int32_t labelLength) { | 674 const UnicodeString &label, int32_t labelLength, UErrorCode &errorC
ode) { |
| 675 if(U_FAILURE(errorCode)) { |
| 676 return 0; |
| 677 } |
| 669 if(&label!=&dest) { | 678 if(&label!=&dest) { |
| 670 dest.replace(destLabelStart, destLabelLength, label); | 679 dest.replace(destLabelStart, destLabelLength, label); |
| 680 if(dest.isBogus()) { |
| 681 errorCode=U_MEMORY_ALLOCATION_ERROR; |
| 682 return 0; |
| 683 } |
| 671 } | 684 } |
| 672 return labelLength; | 685 return labelLength; |
| 673 } | 686 } |
| 674 | 687 |
| 675 int32_t | 688 int32_t |
| 676 UTS46::processLabel(UnicodeString &dest, | 689 UTS46::processLabel(UnicodeString &dest, |
| 677 int32_t labelStart, int32_t labelLength, | 690 int32_t labelStart, int32_t labelLength, |
| 678 UBool toASCII, | 691 UBool toASCII, |
| 679 IDNAInfo &info, UErrorCode &errorCode) const { | 692 IDNAInfo &info, UErrorCode &errorCode) const { |
| 693 if(U_FAILURE(errorCode)) { |
| 694 return 0; |
| 695 } |
| 680 UnicodeString fromPunycode; | 696 UnicodeString fromPunycode; |
| 681 UnicodeString *labelString; | 697 UnicodeString *labelString; |
| 682 const UChar *label=dest.getBuffer()+labelStart; | 698 const UChar *label=dest.getBuffer()+labelStart; |
| 683 int32_t destLabelStart=labelStart; | 699 int32_t destLabelStart=labelStart; |
| 684 int32_t destLabelLength=labelLength; | 700 int32_t destLabelLength=labelLength; |
| 685 UBool wasPunycode; | 701 UBool wasPunycode; |
| 686 if(labelLength>=4 && label[0]==0x78 && label[1]==0x6e && label[2]==0x2d && l
abel[3]==0x2d) { | 702 if(labelLength>=4 && label[0]==0x78 && label[1]==0x6e && label[2]==0x2d && l
abel[3]==0x2d) { |
| 687 // Label starts with "xn--", try to un-Punycode it. | 703 // Label starts with "xn--", try to un-Punycode it. |
| 688 wasPunycode=TRUE; | 704 wasPunycode=TRUE; |
| 689 UChar *unicodeBuffer=fromPunycode.getBuffer(-1); // capacity==-1: most
labels should fit | 705 UChar *unicodeBuffer=fromPunycode.getBuffer(-1); // capacity==-1: most
labels should fit |
| (...skipping 14 matching lines...) Expand all Loading... |
| 704 return labelLength; | 720 return labelLength; |
| 705 } | 721 } |
| 706 punycodeErrorCode=U_ZERO_ERROR; | 722 punycodeErrorCode=U_ZERO_ERROR; |
| 707 unicodeLength=u_strFromPunycode(label+4, labelLength-4, | 723 unicodeLength=u_strFromPunycode(label+4, labelLength-4, |
| 708 unicodeBuffer, fromPunycode.getCapac
ity(), | 724 unicodeBuffer, fromPunycode.getCapac
ity(), |
| 709 NULL, &punycodeErrorCode); | 725 NULL, &punycodeErrorCode); |
| 710 } | 726 } |
| 711 fromPunycode.releaseBuffer(unicodeLength); | 727 fromPunycode.releaseBuffer(unicodeLength); |
| 712 if(U_FAILURE(punycodeErrorCode)) { | 728 if(U_FAILURE(punycodeErrorCode)) { |
| 713 info.labelErrors|=UIDNA_ERROR_PUNYCODE; | 729 info.labelErrors|=UIDNA_ERROR_PUNYCODE; |
| 714 return markBadACELabel(dest, labelStart, labelLength, toASCII, info)
; | 730 return markBadACELabel(dest, labelStart, labelLength, toASCII, info,
errorCode); |
| 715 } | 731 } |
| 716 // Check for NFC, and for characters that are not | 732 // Check for NFC, and for characters that are not |
| 717 // valid or deviation characters according to the normalizer. | 733 // valid or deviation characters according to the normalizer. |
| 718 // If there is something wrong, then the string will change. | 734 // If there is something wrong, then the string will change. |
| 719 // Note that the normalizer passes through non-LDH ASCII and deviation c
haracters. | 735 // Note that the normalizer passes through non-LDH ASCII and deviation c
haracters. |
| 720 // Deviation characters are ok in Punycode even in transitional processi
ng. | 736 // Deviation characters are ok in Punycode even in transitional processi
ng. |
| 721 // In the code further below, if we find non-LDH ASCII and we have UIDNA
_USE_STD3_RULES | 737 // In the code further below, if we find non-LDH ASCII and we have UIDNA
_USE_STD3_RULES |
| 722 // then we will set UIDNA_ERROR_INVALID_ACE_LABEL there too. | 738 // then we will set UIDNA_ERROR_INVALID_ACE_LABEL there too. |
| 723 UBool isValid=uts46Norm2.isNormalized(fromPunycode, errorCode); | 739 UBool isValid=uts46Norm2.isNormalized(fromPunycode, errorCode); |
| 724 if(U_FAILURE(errorCode)) { | 740 if(U_FAILURE(errorCode)) { |
| 725 return labelLength; | 741 return labelLength; |
| 726 } | 742 } |
| 727 if(!isValid) { | 743 if(!isValid) { |
| 728 info.labelErrors|=UIDNA_ERROR_INVALID_ACE_LABEL; | 744 info.labelErrors|=UIDNA_ERROR_INVALID_ACE_LABEL; |
| 729 return markBadACELabel(dest, labelStart, labelLength, toASCII, info)
; | 745 return markBadACELabel(dest, labelStart, labelLength, toASCII, info,
errorCode); |
| 730 } | 746 } |
| 731 labelString=&fromPunycode; | 747 labelString=&fromPunycode; |
| 732 label=fromPunycode.getBuffer(); | 748 label=fromPunycode.getBuffer(); |
| 733 labelStart=0; | 749 labelStart=0; |
| 734 labelLength=fromPunycode.length(); | 750 labelLength=fromPunycode.length(); |
| 735 } else { | 751 } else { |
| 736 wasPunycode=FALSE; | 752 wasPunycode=FALSE; |
| 737 labelString=&dest; | 753 labelString=&dest; |
| 738 } | 754 } |
| 739 // Validity check | 755 // Validity check |
| 740 if(labelLength==0) { | 756 if(labelLength==0) { |
| 741 info.labelErrors|=UIDNA_ERROR_EMPTY_LABEL; | 757 info.labelErrors|=UIDNA_ERROR_EMPTY_LABEL; |
| 742 return replaceLabel(dest, destLabelStart, destLabelLength, *labelString,
labelLength); | 758 return replaceLabel(dest, destLabelStart, destLabelLength, |
| 759 *labelString, labelLength, errorCode); |
| 743 } | 760 } |
| 744 // labelLength>0 | 761 // labelLength>0 |
| 745 if(labelLength>=4 && label[2]==0x2d && label[3]==0x2d) { | 762 if(labelLength>=4 && label[2]==0x2d && label[3]==0x2d) { |
| 746 // label starts with "??--" | 763 // label starts with "??--" |
| 747 info.labelErrors|=UIDNA_ERROR_HYPHEN_3_4; | 764 info.labelErrors|=UIDNA_ERROR_HYPHEN_3_4; |
| 748 } | 765 } |
| 749 if(label[0]==0x2d) { | 766 if(label[0]==0x2d) { |
| 750 // label starts with "-" | 767 // label starts with "-" |
| 751 info.labelErrors|=UIDNA_ERROR_LEADING_HYPHEN; | 768 info.labelErrors|=UIDNA_ERROR_LEADING_HYPHEN; |
| 752 } | 769 } |
| (...skipping 101 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 854 } | 871 } |
| 855 punycodeLength+=4; | 872 punycodeLength+=4; |
| 856 punycode.releaseBuffer(punycodeLength); | 873 punycode.releaseBuffer(punycodeLength); |
| 857 if(U_FAILURE(errorCode)) { | 874 if(U_FAILURE(errorCode)) { |
| 858 return destLabelLength; | 875 return destLabelLength; |
| 859 } | 876 } |
| 860 if(punycodeLength>63) { | 877 if(punycodeLength>63) { |
| 861 info.labelErrors|=UIDNA_ERROR_LABEL_TOO_LONG; | 878 info.labelErrors|=UIDNA_ERROR_LABEL_TOO_LONG; |
| 862 } | 879 } |
| 863 return replaceLabel(dest, destLabelStart, destLabelLength, | 880 return replaceLabel(dest, destLabelStart, destLabelLength, |
| 864 punycode, punycodeLength); | 881 punycode, punycodeLength, errorCode); |
| 865 } else { | 882 } else { |
| 866 // all-ASCII label | 883 // all-ASCII label |
| 867 if(labelLength>63) { | 884 if(labelLength>63) { |
| 868 info.labelErrors|=UIDNA_ERROR_LABEL_TOO_LONG; | 885 info.labelErrors|=UIDNA_ERROR_LABEL_TOO_LONG; |
| 869 } | 886 } |
| 870 } | 887 } |
| 871 } | 888 } |
| 872 } else { | 889 } else { |
| 873 // If a Punycode label has severe errors, | 890 // If a Punycode label has severe errors, |
| 874 // then leave it but make sure it does not look valid. | 891 // then leave it but make sure it does not look valid. |
| 875 if(wasPunycode) { | 892 if(wasPunycode) { |
| 876 info.labelErrors|=UIDNA_ERROR_INVALID_ACE_LABEL; | 893 info.labelErrors|=UIDNA_ERROR_INVALID_ACE_LABEL; |
| 877 return markBadACELabel(dest, destLabelStart, destLabelLength, toASCI
I, info); | 894 return markBadACELabel(dest, destLabelStart, destLabelLength, toASCI
I, info, errorCode); |
| 878 } | 895 } |
| 879 } | 896 } |
| 880 return replaceLabel(dest, destLabelStart, destLabelLength, *labelString, lab
elLength); | 897 return replaceLabel(dest, destLabelStart, destLabelLength, |
| 898 *labelString, labelLength, errorCode); |
| 881 } | 899 } |
| 882 | 900 |
| 883 // Make sure an ACE label does not look valid. | 901 // Make sure an ACE label does not look valid. |
| 884 // Append U+FFFD if the label has only LDH characters. | 902 // Append U+FFFD if the label has only LDH characters. |
| 885 // If UIDNA_USE_STD3_RULES, also replace disallowed ASCII characters with U+FFFD
. | 903 // If UIDNA_USE_STD3_RULES, also replace disallowed ASCII characters with U+FFFD
. |
| 886 int32_t | 904 int32_t |
| 887 UTS46::markBadACELabel(UnicodeString &dest, | 905 UTS46::markBadACELabel(UnicodeString &dest, |
| 888 int32_t labelStart, int32_t labelLength, | 906 int32_t labelStart, int32_t labelLength, |
| 889 UBool toASCII, IDNAInfo &info) const { | 907 UBool toASCII, IDNAInfo &info, UErrorCode &errorCode) con
st { |
| 908 if(U_FAILURE(errorCode)) { |
| 909 return 0; |
| 910 } |
| 890 UBool disallowNonLDHDot=(options&UIDNA_USE_STD3_RULES)!=0; | 911 UBool disallowNonLDHDot=(options&UIDNA_USE_STD3_RULES)!=0; |
| 891 UBool isASCII=TRUE; | 912 UBool isASCII=TRUE; |
| 892 UBool onlyLDH=TRUE; | 913 UBool onlyLDH=TRUE; |
| 893 const UChar *label=dest.getBuffer()+labelStart; | 914 const UChar *label=dest.getBuffer()+labelStart; |
| 894 // Ok to cast away const because we own the UnicodeString. | 915 // Ok to cast away const because we own the UnicodeString. |
| 895 UChar *s=(UChar *)label+4; // After the initial "xn--". | 916 UChar *s=(UChar *)label+4; // After the initial "xn--". |
| 896 const UChar *limit=label+labelLength; | 917 const UChar *limit=label+labelLength; |
| 897 do { | 918 do { |
| 898 UChar c=*s; | 919 UChar c=*s; |
| 899 if(c<=0x7f) { | 920 if(c<=0x7f) { |
| 900 if(c==0x2e) { | 921 if(c==0x2e) { |
| 901 info.labelErrors|=UIDNA_ERROR_LABEL_HAS_DOT; | 922 info.labelErrors|=UIDNA_ERROR_LABEL_HAS_DOT; |
| 902 *s=0xfffd; | 923 *s=0xfffd; |
| 903 isASCII=onlyLDH=FALSE; | 924 isASCII=onlyLDH=FALSE; |
| 904 } else if(asciiData[c]<0) { | 925 } else if(asciiData[c]<0) { |
| 905 onlyLDH=FALSE; | 926 onlyLDH=FALSE; |
| 906 if(disallowNonLDHDot) { | 927 if(disallowNonLDHDot) { |
| 907 *s=0xfffd; | 928 *s=0xfffd; |
| 908 isASCII=FALSE; | 929 isASCII=FALSE; |
| 909 } | 930 } |
| 910 } | 931 } |
| 911 } else { | 932 } else { |
| 912 isASCII=onlyLDH=FALSE; | 933 isASCII=onlyLDH=FALSE; |
| 913 } | 934 } |
| 914 } while(++s<limit); | 935 } while(++s<limit); |
| 915 if(onlyLDH) { | 936 if(onlyLDH) { |
| 916 dest.insert(labelStart+labelLength, (UChar)0xfffd); | 937 dest.insert(labelStart+labelLength, (UChar)0xfffd); |
| 938 if(dest.isBogus()) { |
| 939 errorCode=U_MEMORY_ALLOCATION_ERROR; |
| 940 return 0; |
| 941 } |
| 917 ++labelLength; | 942 ++labelLength; |
| 918 } else { | 943 } else { |
| 919 if(toASCII && isASCII && labelLength>63) { | 944 if(toASCII && isASCII && labelLength>63) { |
| 920 info.labelErrors|=UIDNA_ERROR_LABEL_TOO_LONG; | 945 info.labelErrors|=UIDNA_ERROR_LABEL_TOO_LONG; |
| 921 } | 946 } |
| 922 } | 947 } |
| 923 return labelLength; | 948 return labelLength; |
| 924 } | 949 } |
| 925 | 950 |
| 926 const uint32_t L_MASK=U_MASK(U_LEFT_TO_RIGHT); | 951 const uint32_t L_MASK=U_MASK(U_LEFT_TO_RIGHT); |
| (...skipping 511 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1438 } | 1463 } |
| 1439 StringPiece src(name, length<0 ? uprv_strlen(name) : length); | 1464 StringPiece src(name, length<0 ? uprv_strlen(name) : length); |
| 1440 CheckedArrayByteSink sink(dest, capacity); | 1465 CheckedArrayByteSink sink(dest, capacity); |
| 1441 IDNAInfo info; | 1466 IDNAInfo info; |
| 1442 reinterpret_cast<const IDNA *>(idna)->nameToUnicodeUTF8(src, sink, info, *pE
rrorCode); | 1467 reinterpret_cast<const IDNA *>(idna)->nameToUnicodeUTF8(src, sink, info, *pE
rrorCode); |
| 1443 idnaInfoToStruct(info, pInfo); | 1468 idnaInfoToStruct(info, pInfo); |
| 1444 return u_terminateChars(dest, capacity, sink.NumberOfBytesAppended(), pError
Code); | 1469 return u_terminateChars(dest, capacity, sink.NumberOfBytesAppended(), pError
Code); |
| 1445 } | 1470 } |
| 1446 | 1471 |
| 1447 #endif // UCONFIG_NO_IDNA | 1472 #endif // UCONFIG_NO_IDNA |
| OLD | NEW |