OLD | NEW |
1 /* | 1 /* |
2 ******************************************************************************* | 2 ******************************************************************************* |
3 * Copyright (C) 2010-2014, International Business Machines | 3 * Copyright (C) 2010-2015, International Business Machines |
4 * Corporation and others. All Rights Reserved. | 4 * Corporation and others. All Rights Reserved. |
5 ******************************************************************************* | 5 ******************************************************************************* |
6 * file name: uts46.cpp | 6 * file name: uts46.cpp |
7 * encoding: US-ASCII | 7 * encoding: US-ASCII |
8 * tab size: 8 (not used) | 8 * tab size: 8 (not used) |
9 * indentation:4 | 9 * indentation:4 |
10 * | 10 * |
11 * created on: 2010mar09 | 11 * created on: 2010mar09 |
12 * created by: Markus W. Scherer | 12 * created by: Markus W. Scherer |
13 */ | 13 */ |
(...skipping 159 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
173 | 173 |
174 // returns the new label length | 174 // returns the new label length |
175 int32_t | 175 int32_t |
176 processLabel(UnicodeString &dest, | 176 processLabel(UnicodeString &dest, |
177 int32_t labelStart, int32_t labelLength, | 177 int32_t labelStart, int32_t labelLength, |
178 UBool toASCII, | 178 UBool toASCII, |
179 IDNAInfo &info, UErrorCode &errorCode) const; | 179 IDNAInfo &info, UErrorCode &errorCode) const; |
180 int32_t | 180 int32_t |
181 markBadACELabel(UnicodeString &dest, | 181 markBadACELabel(UnicodeString &dest, |
182 int32_t labelStart, int32_t labelLength, | 182 int32_t labelStart, int32_t labelLength, |
183 UBool toASCII, IDNAInfo &info) const; | 183 UBool toASCII, IDNAInfo &info, UErrorCode &errorCode) const; |
184 | 184 |
185 void | 185 void |
186 checkLabelBiDi(const UChar *label, int32_t labelLength, IDNAInfo &info) cons
t; | 186 checkLabelBiDi(const UChar *label, int32_t labelLength, IDNAInfo &info) cons
t; |
187 | 187 |
188 UBool | 188 UBool |
189 isLabelOkContextJ(const UChar *label, int32_t labelLength) const; | 189 isLabelOkContextJ(const UChar *label, int32_t labelLength) const; |
190 | 190 |
191 void | 191 void |
192 checkLabelContextO(const UChar *label, int32_t labelLength, IDNAInfo &info)
const; | 192 checkLabelContextO(const UChar *label, int32_t labelLength, IDNAInfo &info)
const; |
193 | 193 |
(...skipping 386 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
580 processLabel(dest, labelStart, labelLimit-labelStart, | 580 processLabel(dest, labelStart, labelLimit-labelStart, |
581 toASCII, info, errorCode); | 581 toASCII, info, errorCode); |
582 info.errors|=info.labelErrors; | 582 info.errors|=info.labelErrors; |
583 } | 583 } |
584 return dest; | 584 return dest; |
585 } | 585 } |
586 | 586 |
587 int32_t | 587 int32_t |
588 UTS46::mapDevChars(UnicodeString &dest, int32_t labelStart, int32_t mappingStart
, | 588 UTS46::mapDevChars(UnicodeString &dest, int32_t labelStart, int32_t mappingStart
, |
589 UErrorCode &errorCode) const { | 589 UErrorCode &errorCode) const { |
| 590 if(U_FAILURE(errorCode)) { |
| 591 return 0; |
| 592 } |
590 int32_t length=dest.length(); | 593 int32_t length=dest.length(); |
591 UChar *s=dest.getBuffer(dest[mappingStart]==0xdf ? length+1 : length); | 594 UChar *s=dest.getBuffer(dest[mappingStart]==0xdf ? length+1 : length); |
592 if(s==NULL) { | 595 if(s==NULL) { |
593 errorCode=U_MEMORY_ALLOCATION_ERROR; | 596 errorCode=U_MEMORY_ALLOCATION_ERROR; |
594 return length; | 597 return length; |
595 } | 598 } |
596 int32_t capacity=dest.getCapacity(); | 599 int32_t capacity=dest.getCapacity(); |
597 UBool didMapDevChars=FALSE; | 600 UBool didMapDevChars=FALSE; |
598 int32_t readIndex=mappingStart, writeIndex=mappingStart; | 601 int32_t readIndex=mappingStart, writeIndex=mappingStart; |
599 do { | 602 do { |
(...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
637 } while(writeIndex<length); | 640 } while(writeIndex<length); |
638 dest.releaseBuffer(length); | 641 dest.releaseBuffer(length); |
639 if(didMapDevChars) { | 642 if(didMapDevChars) { |
640 // Mapping deviation characters might have resulted in an un-NFC string. | 643 // Mapping deviation characters might have resulted in an un-NFC string. |
641 // We could use either the NFC or the UTS #46 normalizer. | 644 // We could use either the NFC or the UTS #46 normalizer. |
642 // By using the UTS #46 normalizer again, we avoid having to load a seco
nd .nrm data file. | 645 // By using the UTS #46 normalizer again, we avoid having to load a seco
nd .nrm data file. |
643 UnicodeString normalized; | 646 UnicodeString normalized; |
644 uts46Norm2.normalize(dest.tempSubString(labelStart), normalized, errorCo
de); | 647 uts46Norm2.normalize(dest.tempSubString(labelStart), normalized, errorCo
de); |
645 if(U_SUCCESS(errorCode)) { | 648 if(U_SUCCESS(errorCode)) { |
646 dest.replace(labelStart, 0x7fffffff, normalized); | 649 dest.replace(labelStart, 0x7fffffff, normalized); |
| 650 if(dest.isBogus()) { |
| 651 errorCode=U_MEMORY_ALLOCATION_ERROR; |
| 652 } |
647 return dest.length(); | 653 return dest.length(); |
648 } | 654 } |
649 } | 655 } |
650 return length; | 656 return length; |
651 } | 657 } |
652 | 658 |
653 // Some non-ASCII characters are equivalent to sequences with | 659 // Some non-ASCII characters are equivalent to sequences with |
654 // non-LDH ASCII characters. To find them: | 660 // non-LDH ASCII characters. To find them: |
655 // grep disallowed_STD3_valid IdnaMappingTable.txt (or uts46.txt) | 661 // grep disallowed_STD3_valid IdnaMappingTable.txt (or uts46.txt) |
656 static inline UBool | 662 static inline UBool |
657 isNonASCIIDisallowedSTD3Valid(UChar32 c) { | 663 isNonASCIIDisallowedSTD3Valid(UChar32 c) { |
658 return c==0x2260 || c==0x226E || c==0x226F; | 664 return c==0x2260 || c==0x226E || c==0x226F; |
659 } | 665 } |
660 | 666 |
661 // Replace the label in dest with the label string, if the label was modified. | 667 // Replace the label in dest with the label string, if the label was modified. |
662 // If &label==&dest then the label was modified in-place and labelLength | 668 // If &label==&dest then the label was modified in-place and labelLength |
663 // is the new label length, different from label.length(). | 669 // is the new label length, different from label.length(). |
664 // If &label!=&dest then labelLength==label.length(). | 670 // If &label!=&dest then labelLength==label.length(). |
665 // Returns labelLength (= the new label length). | 671 // Returns labelLength (= the new label length). |
666 static int32_t | 672 static int32_t |
667 replaceLabel(UnicodeString &dest, int32_t destLabelStart, int32_t destLabelLengt
h, | 673 replaceLabel(UnicodeString &dest, int32_t destLabelStart, int32_t destLabelLengt
h, |
668 const UnicodeString &label, int32_t labelLength) { | 674 const UnicodeString &label, int32_t labelLength, UErrorCode &errorC
ode) { |
| 675 if(U_FAILURE(errorCode)) { |
| 676 return 0; |
| 677 } |
669 if(&label!=&dest) { | 678 if(&label!=&dest) { |
670 dest.replace(destLabelStart, destLabelLength, label); | 679 dest.replace(destLabelStart, destLabelLength, label); |
| 680 if(dest.isBogus()) { |
| 681 errorCode=U_MEMORY_ALLOCATION_ERROR; |
| 682 return 0; |
| 683 } |
671 } | 684 } |
672 return labelLength; | 685 return labelLength; |
673 } | 686 } |
674 | 687 |
675 int32_t | 688 int32_t |
676 UTS46::processLabel(UnicodeString &dest, | 689 UTS46::processLabel(UnicodeString &dest, |
677 int32_t labelStart, int32_t labelLength, | 690 int32_t labelStart, int32_t labelLength, |
678 UBool toASCII, | 691 UBool toASCII, |
679 IDNAInfo &info, UErrorCode &errorCode) const { | 692 IDNAInfo &info, UErrorCode &errorCode) const { |
| 693 if(U_FAILURE(errorCode)) { |
| 694 return 0; |
| 695 } |
680 UnicodeString fromPunycode; | 696 UnicodeString fromPunycode; |
681 UnicodeString *labelString; | 697 UnicodeString *labelString; |
682 const UChar *label=dest.getBuffer()+labelStart; | 698 const UChar *label=dest.getBuffer()+labelStart; |
683 int32_t destLabelStart=labelStart; | 699 int32_t destLabelStart=labelStart; |
684 int32_t destLabelLength=labelLength; | 700 int32_t destLabelLength=labelLength; |
685 UBool wasPunycode; | 701 UBool wasPunycode; |
686 if(labelLength>=4 && label[0]==0x78 && label[1]==0x6e && label[2]==0x2d && l
abel[3]==0x2d) { | 702 if(labelLength>=4 && label[0]==0x78 && label[1]==0x6e && label[2]==0x2d && l
abel[3]==0x2d) { |
687 // Label starts with "xn--", try to un-Punycode it. | 703 // Label starts with "xn--", try to un-Punycode it. |
688 wasPunycode=TRUE; | 704 wasPunycode=TRUE; |
689 UChar *unicodeBuffer=fromPunycode.getBuffer(-1); // capacity==-1: most
labels should fit | 705 UChar *unicodeBuffer=fromPunycode.getBuffer(-1); // capacity==-1: most
labels should fit |
(...skipping 14 matching lines...) Expand all Loading... |
704 return labelLength; | 720 return labelLength; |
705 } | 721 } |
706 punycodeErrorCode=U_ZERO_ERROR; | 722 punycodeErrorCode=U_ZERO_ERROR; |
707 unicodeLength=u_strFromPunycode(label+4, labelLength-4, | 723 unicodeLength=u_strFromPunycode(label+4, labelLength-4, |
708 unicodeBuffer, fromPunycode.getCapac
ity(), | 724 unicodeBuffer, fromPunycode.getCapac
ity(), |
709 NULL, &punycodeErrorCode); | 725 NULL, &punycodeErrorCode); |
710 } | 726 } |
711 fromPunycode.releaseBuffer(unicodeLength); | 727 fromPunycode.releaseBuffer(unicodeLength); |
712 if(U_FAILURE(punycodeErrorCode)) { | 728 if(U_FAILURE(punycodeErrorCode)) { |
713 info.labelErrors|=UIDNA_ERROR_PUNYCODE; | 729 info.labelErrors|=UIDNA_ERROR_PUNYCODE; |
714 return markBadACELabel(dest, labelStart, labelLength, toASCII, info)
; | 730 return markBadACELabel(dest, labelStart, labelLength, toASCII, info,
errorCode); |
715 } | 731 } |
716 // Check for NFC, and for characters that are not | 732 // Check for NFC, and for characters that are not |
717 // valid or deviation characters according to the normalizer. | 733 // valid or deviation characters according to the normalizer. |
718 // If there is something wrong, then the string will change. | 734 // If there is something wrong, then the string will change. |
719 // Note that the normalizer passes through non-LDH ASCII and deviation c
haracters. | 735 // Note that the normalizer passes through non-LDH ASCII and deviation c
haracters. |
720 // Deviation characters are ok in Punycode even in transitional processi
ng. | 736 // Deviation characters are ok in Punycode even in transitional processi
ng. |
721 // In the code further below, if we find non-LDH ASCII and we have UIDNA
_USE_STD3_RULES | 737 // In the code further below, if we find non-LDH ASCII and we have UIDNA
_USE_STD3_RULES |
722 // then we will set UIDNA_ERROR_INVALID_ACE_LABEL there too. | 738 // then we will set UIDNA_ERROR_INVALID_ACE_LABEL there too. |
723 UBool isValid=uts46Norm2.isNormalized(fromPunycode, errorCode); | 739 UBool isValid=uts46Norm2.isNormalized(fromPunycode, errorCode); |
724 if(U_FAILURE(errorCode)) { | 740 if(U_FAILURE(errorCode)) { |
725 return labelLength; | 741 return labelLength; |
726 } | 742 } |
727 if(!isValid) { | 743 if(!isValid) { |
728 info.labelErrors|=UIDNA_ERROR_INVALID_ACE_LABEL; | 744 info.labelErrors|=UIDNA_ERROR_INVALID_ACE_LABEL; |
729 return markBadACELabel(dest, labelStart, labelLength, toASCII, info)
; | 745 return markBadACELabel(dest, labelStart, labelLength, toASCII, info,
errorCode); |
730 } | 746 } |
731 labelString=&fromPunycode; | 747 labelString=&fromPunycode; |
732 label=fromPunycode.getBuffer(); | 748 label=fromPunycode.getBuffer(); |
733 labelStart=0; | 749 labelStart=0; |
734 labelLength=fromPunycode.length(); | 750 labelLength=fromPunycode.length(); |
735 } else { | 751 } else { |
736 wasPunycode=FALSE; | 752 wasPunycode=FALSE; |
737 labelString=&dest; | 753 labelString=&dest; |
738 } | 754 } |
739 // Validity check | 755 // Validity check |
740 if(labelLength==0) { | 756 if(labelLength==0) { |
741 info.labelErrors|=UIDNA_ERROR_EMPTY_LABEL; | 757 info.labelErrors|=UIDNA_ERROR_EMPTY_LABEL; |
742 return replaceLabel(dest, destLabelStart, destLabelLength, *labelString,
labelLength); | 758 return replaceLabel(dest, destLabelStart, destLabelLength, |
| 759 *labelString, labelLength, errorCode); |
743 } | 760 } |
744 // labelLength>0 | 761 // labelLength>0 |
745 if(labelLength>=4 && label[2]==0x2d && label[3]==0x2d) { | 762 if(labelLength>=4 && label[2]==0x2d && label[3]==0x2d) { |
746 // label starts with "??--" | 763 // label starts with "??--" |
747 info.labelErrors|=UIDNA_ERROR_HYPHEN_3_4; | 764 info.labelErrors|=UIDNA_ERROR_HYPHEN_3_4; |
748 } | 765 } |
749 if(label[0]==0x2d) { | 766 if(label[0]==0x2d) { |
750 // label starts with "-" | 767 // label starts with "-" |
751 info.labelErrors|=UIDNA_ERROR_LEADING_HYPHEN; | 768 info.labelErrors|=UIDNA_ERROR_LEADING_HYPHEN; |
752 } | 769 } |
(...skipping 101 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
854 } | 871 } |
855 punycodeLength+=4; | 872 punycodeLength+=4; |
856 punycode.releaseBuffer(punycodeLength); | 873 punycode.releaseBuffer(punycodeLength); |
857 if(U_FAILURE(errorCode)) { | 874 if(U_FAILURE(errorCode)) { |
858 return destLabelLength; | 875 return destLabelLength; |
859 } | 876 } |
860 if(punycodeLength>63) { | 877 if(punycodeLength>63) { |
861 info.labelErrors|=UIDNA_ERROR_LABEL_TOO_LONG; | 878 info.labelErrors|=UIDNA_ERROR_LABEL_TOO_LONG; |
862 } | 879 } |
863 return replaceLabel(dest, destLabelStart, destLabelLength, | 880 return replaceLabel(dest, destLabelStart, destLabelLength, |
864 punycode, punycodeLength); | 881 punycode, punycodeLength, errorCode); |
865 } else { | 882 } else { |
866 // all-ASCII label | 883 // all-ASCII label |
867 if(labelLength>63) { | 884 if(labelLength>63) { |
868 info.labelErrors|=UIDNA_ERROR_LABEL_TOO_LONG; | 885 info.labelErrors|=UIDNA_ERROR_LABEL_TOO_LONG; |
869 } | 886 } |
870 } | 887 } |
871 } | 888 } |
872 } else { | 889 } else { |
873 // If a Punycode label has severe errors, | 890 // If a Punycode label has severe errors, |
874 // then leave it but make sure it does not look valid. | 891 // then leave it but make sure it does not look valid. |
875 if(wasPunycode) { | 892 if(wasPunycode) { |
876 info.labelErrors|=UIDNA_ERROR_INVALID_ACE_LABEL; | 893 info.labelErrors|=UIDNA_ERROR_INVALID_ACE_LABEL; |
877 return markBadACELabel(dest, destLabelStart, destLabelLength, toASCI
I, info); | 894 return markBadACELabel(dest, destLabelStart, destLabelLength, toASCI
I, info, errorCode); |
878 } | 895 } |
879 } | 896 } |
880 return replaceLabel(dest, destLabelStart, destLabelLength, *labelString, lab
elLength); | 897 return replaceLabel(dest, destLabelStart, destLabelLength, |
| 898 *labelString, labelLength, errorCode); |
881 } | 899 } |
882 | 900 |
883 // Make sure an ACE label does not look valid. | 901 // Make sure an ACE label does not look valid. |
884 // Append U+FFFD if the label has only LDH characters. | 902 // Append U+FFFD if the label has only LDH characters. |
885 // If UIDNA_USE_STD3_RULES, also replace disallowed ASCII characters with U+FFFD
. | 903 // If UIDNA_USE_STD3_RULES, also replace disallowed ASCII characters with U+FFFD
. |
886 int32_t | 904 int32_t |
887 UTS46::markBadACELabel(UnicodeString &dest, | 905 UTS46::markBadACELabel(UnicodeString &dest, |
888 int32_t labelStart, int32_t labelLength, | 906 int32_t labelStart, int32_t labelLength, |
889 UBool toASCII, IDNAInfo &info) const { | 907 UBool toASCII, IDNAInfo &info, UErrorCode &errorCode) con
st { |
| 908 if(U_FAILURE(errorCode)) { |
| 909 return 0; |
| 910 } |
890 UBool disallowNonLDHDot=(options&UIDNA_USE_STD3_RULES)!=0; | 911 UBool disallowNonLDHDot=(options&UIDNA_USE_STD3_RULES)!=0; |
891 UBool isASCII=TRUE; | 912 UBool isASCII=TRUE; |
892 UBool onlyLDH=TRUE; | 913 UBool onlyLDH=TRUE; |
893 const UChar *label=dest.getBuffer()+labelStart; | 914 const UChar *label=dest.getBuffer()+labelStart; |
894 // Ok to cast away const because we own the UnicodeString. | 915 // Ok to cast away const because we own the UnicodeString. |
895 UChar *s=(UChar *)label+4; // After the initial "xn--". | 916 UChar *s=(UChar *)label+4; // After the initial "xn--". |
896 const UChar *limit=label+labelLength; | 917 const UChar *limit=label+labelLength; |
897 do { | 918 do { |
898 UChar c=*s; | 919 UChar c=*s; |
899 if(c<=0x7f) { | 920 if(c<=0x7f) { |
900 if(c==0x2e) { | 921 if(c==0x2e) { |
901 info.labelErrors|=UIDNA_ERROR_LABEL_HAS_DOT; | 922 info.labelErrors|=UIDNA_ERROR_LABEL_HAS_DOT; |
902 *s=0xfffd; | 923 *s=0xfffd; |
903 isASCII=onlyLDH=FALSE; | 924 isASCII=onlyLDH=FALSE; |
904 } else if(asciiData[c]<0) { | 925 } else if(asciiData[c]<0) { |
905 onlyLDH=FALSE; | 926 onlyLDH=FALSE; |
906 if(disallowNonLDHDot) { | 927 if(disallowNonLDHDot) { |
907 *s=0xfffd; | 928 *s=0xfffd; |
908 isASCII=FALSE; | 929 isASCII=FALSE; |
909 } | 930 } |
910 } | 931 } |
911 } else { | 932 } else { |
912 isASCII=onlyLDH=FALSE; | 933 isASCII=onlyLDH=FALSE; |
913 } | 934 } |
914 } while(++s<limit); | 935 } while(++s<limit); |
915 if(onlyLDH) { | 936 if(onlyLDH) { |
916 dest.insert(labelStart+labelLength, (UChar)0xfffd); | 937 dest.insert(labelStart+labelLength, (UChar)0xfffd); |
| 938 if(dest.isBogus()) { |
| 939 errorCode=U_MEMORY_ALLOCATION_ERROR; |
| 940 return 0; |
| 941 } |
917 ++labelLength; | 942 ++labelLength; |
918 } else { | 943 } else { |
919 if(toASCII && isASCII && labelLength>63) { | 944 if(toASCII && isASCII && labelLength>63) { |
920 info.labelErrors|=UIDNA_ERROR_LABEL_TOO_LONG; | 945 info.labelErrors|=UIDNA_ERROR_LABEL_TOO_LONG; |
921 } | 946 } |
922 } | 947 } |
923 return labelLength; | 948 return labelLength; |
924 } | 949 } |
925 | 950 |
926 const uint32_t L_MASK=U_MASK(U_LEFT_TO_RIGHT); | 951 const uint32_t L_MASK=U_MASK(U_LEFT_TO_RIGHT); |
(...skipping 511 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1438 } | 1463 } |
1439 StringPiece src(name, length<0 ? uprv_strlen(name) : length); | 1464 StringPiece src(name, length<0 ? uprv_strlen(name) : length); |
1440 CheckedArrayByteSink sink(dest, capacity); | 1465 CheckedArrayByteSink sink(dest, capacity); |
1441 IDNAInfo info; | 1466 IDNAInfo info; |
1442 reinterpret_cast<const IDNA *>(idna)->nameToUnicodeUTF8(src, sink, info, *pE
rrorCode); | 1467 reinterpret_cast<const IDNA *>(idna)->nameToUnicodeUTF8(src, sink, info, *pE
rrorCode); |
1443 idnaInfoToStruct(info, pInfo); | 1468 idnaInfoToStruct(info, pInfo); |
1444 return u_terminateChars(dest, capacity, sink.NumberOfBytesAppended(), pError
Code); | 1469 return u_terminateChars(dest, capacity, sink.NumberOfBytesAppended(), pError
Code); |
1445 } | 1470 } |
1446 | 1471 |
1447 #endif // UCONFIG_NO_IDNA | 1472 #endif // UCONFIG_NO_IDNA |
OLD | NEW |