Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(707)

Side by Side Diff: source/common/uts46.cpp

Issue 1621843002: ICU 56 update step 1 (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/icu.git@561
Patch Set: Created 4 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « source/common/utext.cpp ('k') | source/common/utypes.c » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 ******************************************************************************* 2 *******************************************************************************
3 * Copyright (C) 2010-2014, International Business Machines 3 * Copyright (C) 2010-2015, International Business Machines
4 * Corporation and others. All Rights Reserved. 4 * Corporation and others. All Rights Reserved.
5 ******************************************************************************* 5 *******************************************************************************
6 * file name: uts46.cpp 6 * file name: uts46.cpp
7 * encoding: US-ASCII 7 * encoding: US-ASCII
8 * tab size: 8 (not used) 8 * tab size: 8 (not used)
9 * indentation:4 9 * indentation:4
10 * 10 *
11 * created on: 2010mar09 11 * created on: 2010mar09
12 * created by: Markus W. Scherer 12 * created by: Markus W. Scherer
13 */ 13 */
(...skipping 159 matching lines...) Expand 10 before | Expand all | Expand 10 after
173 173
174 // returns the new label length 174 // returns the new label length
175 int32_t 175 int32_t
176 processLabel(UnicodeString &dest, 176 processLabel(UnicodeString &dest,
177 int32_t labelStart, int32_t labelLength, 177 int32_t labelStart, int32_t labelLength,
178 UBool toASCII, 178 UBool toASCII,
179 IDNAInfo &info, UErrorCode &errorCode) const; 179 IDNAInfo &info, UErrorCode &errorCode) const;
180 int32_t 180 int32_t
181 markBadACELabel(UnicodeString &dest, 181 markBadACELabel(UnicodeString &dest,
182 int32_t labelStart, int32_t labelLength, 182 int32_t labelStart, int32_t labelLength,
183 UBool toASCII, IDNAInfo &info) const; 183 UBool toASCII, IDNAInfo &info, UErrorCode &errorCode) const;
184 184
185 void 185 void
186 checkLabelBiDi(const UChar *label, int32_t labelLength, IDNAInfo &info) cons t; 186 checkLabelBiDi(const UChar *label, int32_t labelLength, IDNAInfo &info) cons t;
187 187
188 UBool 188 UBool
189 isLabelOkContextJ(const UChar *label, int32_t labelLength) const; 189 isLabelOkContextJ(const UChar *label, int32_t labelLength) const;
190 190
191 void 191 void
192 checkLabelContextO(const UChar *label, int32_t labelLength, IDNAInfo &info) const; 192 checkLabelContextO(const UChar *label, int32_t labelLength, IDNAInfo &info) const;
193 193
(...skipping 386 matching lines...) Expand 10 before | Expand all | Expand 10 after
580 processLabel(dest, labelStart, labelLimit-labelStart, 580 processLabel(dest, labelStart, labelLimit-labelStart,
581 toASCII, info, errorCode); 581 toASCII, info, errorCode);
582 info.errors|=info.labelErrors; 582 info.errors|=info.labelErrors;
583 } 583 }
584 return dest; 584 return dest;
585 } 585 }
586 586
587 int32_t 587 int32_t
588 UTS46::mapDevChars(UnicodeString &dest, int32_t labelStart, int32_t mappingStart , 588 UTS46::mapDevChars(UnicodeString &dest, int32_t labelStart, int32_t mappingStart ,
589 UErrorCode &errorCode) const { 589 UErrorCode &errorCode) const {
590 if(U_FAILURE(errorCode)) {
591 return 0;
592 }
590 int32_t length=dest.length(); 593 int32_t length=dest.length();
591 UChar *s=dest.getBuffer(dest[mappingStart]==0xdf ? length+1 : length); 594 UChar *s=dest.getBuffer(dest[mappingStart]==0xdf ? length+1 : length);
592 if(s==NULL) { 595 if(s==NULL) {
593 errorCode=U_MEMORY_ALLOCATION_ERROR; 596 errorCode=U_MEMORY_ALLOCATION_ERROR;
594 return length; 597 return length;
595 } 598 }
596 int32_t capacity=dest.getCapacity(); 599 int32_t capacity=dest.getCapacity();
597 UBool didMapDevChars=FALSE; 600 UBool didMapDevChars=FALSE;
598 int32_t readIndex=mappingStart, writeIndex=mappingStart; 601 int32_t readIndex=mappingStart, writeIndex=mappingStart;
599 do { 602 do {
(...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after
637 } while(writeIndex<length); 640 } while(writeIndex<length);
638 dest.releaseBuffer(length); 641 dest.releaseBuffer(length);
639 if(didMapDevChars) { 642 if(didMapDevChars) {
640 // Mapping deviation characters might have resulted in an un-NFC string. 643 // Mapping deviation characters might have resulted in an un-NFC string.
641 // We could use either the NFC or the UTS #46 normalizer. 644 // We could use either the NFC or the UTS #46 normalizer.
642 // By using the UTS #46 normalizer again, we avoid having to load a seco nd .nrm data file. 645 // By using the UTS #46 normalizer again, we avoid having to load a seco nd .nrm data file.
643 UnicodeString normalized; 646 UnicodeString normalized;
644 uts46Norm2.normalize(dest.tempSubString(labelStart), normalized, errorCo de); 647 uts46Norm2.normalize(dest.tempSubString(labelStart), normalized, errorCo de);
645 if(U_SUCCESS(errorCode)) { 648 if(U_SUCCESS(errorCode)) {
646 dest.replace(labelStart, 0x7fffffff, normalized); 649 dest.replace(labelStart, 0x7fffffff, normalized);
650 if(dest.isBogus()) {
651 errorCode=U_MEMORY_ALLOCATION_ERROR;
652 }
647 return dest.length(); 653 return dest.length();
648 } 654 }
649 } 655 }
650 return length; 656 return length;
651 } 657 }
652 658
653 // Some non-ASCII characters are equivalent to sequences with 659 // Some non-ASCII characters are equivalent to sequences with
654 // non-LDH ASCII characters. To find them: 660 // non-LDH ASCII characters. To find them:
655 // grep disallowed_STD3_valid IdnaMappingTable.txt (or uts46.txt) 661 // grep disallowed_STD3_valid IdnaMappingTable.txt (or uts46.txt)
656 static inline UBool 662 static inline UBool
657 isNonASCIIDisallowedSTD3Valid(UChar32 c) { 663 isNonASCIIDisallowedSTD3Valid(UChar32 c) {
658 return c==0x2260 || c==0x226E || c==0x226F; 664 return c==0x2260 || c==0x226E || c==0x226F;
659 } 665 }
660 666
661 // Replace the label in dest with the label string, if the label was modified. 667 // Replace the label in dest with the label string, if the label was modified.
662 // If &label==&dest then the label was modified in-place and labelLength 668 // If &label==&dest then the label was modified in-place and labelLength
663 // is the new label length, different from label.length(). 669 // is the new label length, different from label.length().
664 // If &label!=&dest then labelLength==label.length(). 670 // If &label!=&dest then labelLength==label.length().
665 // Returns labelLength (= the new label length). 671 // Returns labelLength (= the new label length).
666 static int32_t 672 static int32_t
667 replaceLabel(UnicodeString &dest, int32_t destLabelStart, int32_t destLabelLengt h, 673 replaceLabel(UnicodeString &dest, int32_t destLabelStart, int32_t destLabelLengt h,
668 const UnicodeString &label, int32_t labelLength) { 674 const UnicodeString &label, int32_t labelLength, UErrorCode &errorC ode) {
675 if(U_FAILURE(errorCode)) {
676 return 0;
677 }
669 if(&label!=&dest) { 678 if(&label!=&dest) {
670 dest.replace(destLabelStart, destLabelLength, label); 679 dest.replace(destLabelStart, destLabelLength, label);
680 if(dest.isBogus()) {
681 errorCode=U_MEMORY_ALLOCATION_ERROR;
682 return 0;
683 }
671 } 684 }
672 return labelLength; 685 return labelLength;
673 } 686 }
674 687
675 int32_t 688 int32_t
676 UTS46::processLabel(UnicodeString &dest, 689 UTS46::processLabel(UnicodeString &dest,
677 int32_t labelStart, int32_t labelLength, 690 int32_t labelStart, int32_t labelLength,
678 UBool toASCII, 691 UBool toASCII,
679 IDNAInfo &info, UErrorCode &errorCode) const { 692 IDNAInfo &info, UErrorCode &errorCode) const {
693 if(U_FAILURE(errorCode)) {
694 return 0;
695 }
680 UnicodeString fromPunycode; 696 UnicodeString fromPunycode;
681 UnicodeString *labelString; 697 UnicodeString *labelString;
682 const UChar *label=dest.getBuffer()+labelStart; 698 const UChar *label=dest.getBuffer()+labelStart;
683 int32_t destLabelStart=labelStart; 699 int32_t destLabelStart=labelStart;
684 int32_t destLabelLength=labelLength; 700 int32_t destLabelLength=labelLength;
685 UBool wasPunycode; 701 UBool wasPunycode;
686 if(labelLength>=4 && label[0]==0x78 && label[1]==0x6e && label[2]==0x2d && l abel[3]==0x2d) { 702 if(labelLength>=4 && label[0]==0x78 && label[1]==0x6e && label[2]==0x2d && l abel[3]==0x2d) {
687 // Label starts with "xn--", try to un-Punycode it. 703 // Label starts with "xn--", try to un-Punycode it.
688 wasPunycode=TRUE; 704 wasPunycode=TRUE;
689 UChar *unicodeBuffer=fromPunycode.getBuffer(-1); // capacity==-1: most labels should fit 705 UChar *unicodeBuffer=fromPunycode.getBuffer(-1); // capacity==-1: most labels should fit
(...skipping 14 matching lines...) Expand all
704 return labelLength; 720 return labelLength;
705 } 721 }
706 punycodeErrorCode=U_ZERO_ERROR; 722 punycodeErrorCode=U_ZERO_ERROR;
707 unicodeLength=u_strFromPunycode(label+4, labelLength-4, 723 unicodeLength=u_strFromPunycode(label+4, labelLength-4,
708 unicodeBuffer, fromPunycode.getCapac ity(), 724 unicodeBuffer, fromPunycode.getCapac ity(),
709 NULL, &punycodeErrorCode); 725 NULL, &punycodeErrorCode);
710 } 726 }
711 fromPunycode.releaseBuffer(unicodeLength); 727 fromPunycode.releaseBuffer(unicodeLength);
712 if(U_FAILURE(punycodeErrorCode)) { 728 if(U_FAILURE(punycodeErrorCode)) {
713 info.labelErrors|=UIDNA_ERROR_PUNYCODE; 729 info.labelErrors|=UIDNA_ERROR_PUNYCODE;
714 return markBadACELabel(dest, labelStart, labelLength, toASCII, info) ; 730 return markBadACELabel(dest, labelStart, labelLength, toASCII, info, errorCode);
715 } 731 }
716 // Check for NFC, and for characters that are not 732 // Check for NFC, and for characters that are not
717 // valid or deviation characters according to the normalizer. 733 // valid or deviation characters according to the normalizer.
718 // If there is something wrong, then the string will change. 734 // If there is something wrong, then the string will change.
719 // Note that the normalizer passes through non-LDH ASCII and deviation c haracters. 735 // Note that the normalizer passes through non-LDH ASCII and deviation c haracters.
720 // Deviation characters are ok in Punycode even in transitional processi ng. 736 // Deviation characters are ok in Punycode even in transitional processi ng.
721 // In the code further below, if we find non-LDH ASCII and we have UIDNA _USE_STD3_RULES 737 // In the code further below, if we find non-LDH ASCII and we have UIDNA _USE_STD3_RULES
722 // then we will set UIDNA_ERROR_INVALID_ACE_LABEL there too. 738 // then we will set UIDNA_ERROR_INVALID_ACE_LABEL there too.
723 UBool isValid=uts46Norm2.isNormalized(fromPunycode, errorCode); 739 UBool isValid=uts46Norm2.isNormalized(fromPunycode, errorCode);
724 if(U_FAILURE(errorCode)) { 740 if(U_FAILURE(errorCode)) {
725 return labelLength; 741 return labelLength;
726 } 742 }
727 if(!isValid) { 743 if(!isValid) {
728 info.labelErrors|=UIDNA_ERROR_INVALID_ACE_LABEL; 744 info.labelErrors|=UIDNA_ERROR_INVALID_ACE_LABEL;
729 return markBadACELabel(dest, labelStart, labelLength, toASCII, info) ; 745 return markBadACELabel(dest, labelStart, labelLength, toASCII, info, errorCode);
730 } 746 }
731 labelString=&fromPunycode; 747 labelString=&fromPunycode;
732 label=fromPunycode.getBuffer(); 748 label=fromPunycode.getBuffer();
733 labelStart=0; 749 labelStart=0;
734 labelLength=fromPunycode.length(); 750 labelLength=fromPunycode.length();
735 } else { 751 } else {
736 wasPunycode=FALSE; 752 wasPunycode=FALSE;
737 labelString=&dest; 753 labelString=&dest;
738 } 754 }
739 // Validity check 755 // Validity check
740 if(labelLength==0) { 756 if(labelLength==0) {
741 info.labelErrors|=UIDNA_ERROR_EMPTY_LABEL; 757 info.labelErrors|=UIDNA_ERROR_EMPTY_LABEL;
742 return replaceLabel(dest, destLabelStart, destLabelLength, *labelString, labelLength); 758 return replaceLabel(dest, destLabelStart, destLabelLength,
759 *labelString, labelLength, errorCode);
743 } 760 }
744 // labelLength>0 761 // labelLength>0
745 if(labelLength>=4 && label[2]==0x2d && label[3]==0x2d) { 762 if(labelLength>=4 && label[2]==0x2d && label[3]==0x2d) {
746 // label starts with "??--" 763 // label starts with "??--"
747 info.labelErrors|=UIDNA_ERROR_HYPHEN_3_4; 764 info.labelErrors|=UIDNA_ERROR_HYPHEN_3_4;
748 } 765 }
749 if(label[0]==0x2d) { 766 if(label[0]==0x2d) {
750 // label starts with "-" 767 // label starts with "-"
751 info.labelErrors|=UIDNA_ERROR_LEADING_HYPHEN; 768 info.labelErrors|=UIDNA_ERROR_LEADING_HYPHEN;
752 } 769 }
(...skipping 101 matching lines...) Expand 10 before | Expand all | Expand 10 after
854 } 871 }
855 punycodeLength+=4; 872 punycodeLength+=4;
856 punycode.releaseBuffer(punycodeLength); 873 punycode.releaseBuffer(punycodeLength);
857 if(U_FAILURE(errorCode)) { 874 if(U_FAILURE(errorCode)) {
858 return destLabelLength; 875 return destLabelLength;
859 } 876 }
860 if(punycodeLength>63) { 877 if(punycodeLength>63) {
861 info.labelErrors|=UIDNA_ERROR_LABEL_TOO_LONG; 878 info.labelErrors|=UIDNA_ERROR_LABEL_TOO_LONG;
862 } 879 }
863 return replaceLabel(dest, destLabelStart, destLabelLength, 880 return replaceLabel(dest, destLabelStart, destLabelLength,
864 punycode, punycodeLength); 881 punycode, punycodeLength, errorCode);
865 } else { 882 } else {
866 // all-ASCII label 883 // all-ASCII label
867 if(labelLength>63) { 884 if(labelLength>63) {
868 info.labelErrors|=UIDNA_ERROR_LABEL_TOO_LONG; 885 info.labelErrors|=UIDNA_ERROR_LABEL_TOO_LONG;
869 } 886 }
870 } 887 }
871 } 888 }
872 } else { 889 } else {
873 // If a Punycode label has severe errors, 890 // If a Punycode label has severe errors,
874 // then leave it but make sure it does not look valid. 891 // then leave it but make sure it does not look valid.
875 if(wasPunycode) { 892 if(wasPunycode) {
876 info.labelErrors|=UIDNA_ERROR_INVALID_ACE_LABEL; 893 info.labelErrors|=UIDNA_ERROR_INVALID_ACE_LABEL;
877 return markBadACELabel(dest, destLabelStart, destLabelLength, toASCI I, info); 894 return markBadACELabel(dest, destLabelStart, destLabelLength, toASCI I, info, errorCode);
878 } 895 }
879 } 896 }
880 return replaceLabel(dest, destLabelStart, destLabelLength, *labelString, lab elLength); 897 return replaceLabel(dest, destLabelStart, destLabelLength,
898 *labelString, labelLength, errorCode);
881 } 899 }
882 900
883 // Make sure an ACE label does not look valid. 901 // Make sure an ACE label does not look valid.
884 // Append U+FFFD if the label has only LDH characters. 902 // Append U+FFFD if the label has only LDH characters.
885 // If UIDNA_USE_STD3_RULES, also replace disallowed ASCII characters with U+FFFD . 903 // If UIDNA_USE_STD3_RULES, also replace disallowed ASCII characters with U+FFFD .
886 int32_t 904 int32_t
887 UTS46::markBadACELabel(UnicodeString &dest, 905 UTS46::markBadACELabel(UnicodeString &dest,
888 int32_t labelStart, int32_t labelLength, 906 int32_t labelStart, int32_t labelLength,
889 UBool toASCII, IDNAInfo &info) const { 907 UBool toASCII, IDNAInfo &info, UErrorCode &errorCode) con st {
908 if(U_FAILURE(errorCode)) {
909 return 0;
910 }
890 UBool disallowNonLDHDot=(options&UIDNA_USE_STD3_RULES)!=0; 911 UBool disallowNonLDHDot=(options&UIDNA_USE_STD3_RULES)!=0;
891 UBool isASCII=TRUE; 912 UBool isASCII=TRUE;
892 UBool onlyLDH=TRUE; 913 UBool onlyLDH=TRUE;
893 const UChar *label=dest.getBuffer()+labelStart; 914 const UChar *label=dest.getBuffer()+labelStart;
894 // Ok to cast away const because we own the UnicodeString. 915 // Ok to cast away const because we own the UnicodeString.
895 UChar *s=(UChar *)label+4; // After the initial "xn--". 916 UChar *s=(UChar *)label+4; // After the initial "xn--".
896 const UChar *limit=label+labelLength; 917 const UChar *limit=label+labelLength;
897 do { 918 do {
898 UChar c=*s; 919 UChar c=*s;
899 if(c<=0x7f) { 920 if(c<=0x7f) {
900 if(c==0x2e) { 921 if(c==0x2e) {
901 info.labelErrors|=UIDNA_ERROR_LABEL_HAS_DOT; 922 info.labelErrors|=UIDNA_ERROR_LABEL_HAS_DOT;
902 *s=0xfffd; 923 *s=0xfffd;
903 isASCII=onlyLDH=FALSE; 924 isASCII=onlyLDH=FALSE;
904 } else if(asciiData[c]<0) { 925 } else if(asciiData[c]<0) {
905 onlyLDH=FALSE; 926 onlyLDH=FALSE;
906 if(disallowNonLDHDot) { 927 if(disallowNonLDHDot) {
907 *s=0xfffd; 928 *s=0xfffd;
908 isASCII=FALSE; 929 isASCII=FALSE;
909 } 930 }
910 } 931 }
911 } else { 932 } else {
912 isASCII=onlyLDH=FALSE; 933 isASCII=onlyLDH=FALSE;
913 } 934 }
914 } while(++s<limit); 935 } while(++s<limit);
915 if(onlyLDH) { 936 if(onlyLDH) {
916 dest.insert(labelStart+labelLength, (UChar)0xfffd); 937 dest.insert(labelStart+labelLength, (UChar)0xfffd);
938 if(dest.isBogus()) {
939 errorCode=U_MEMORY_ALLOCATION_ERROR;
940 return 0;
941 }
917 ++labelLength; 942 ++labelLength;
918 } else { 943 } else {
919 if(toASCII && isASCII && labelLength>63) { 944 if(toASCII && isASCII && labelLength>63) {
920 info.labelErrors|=UIDNA_ERROR_LABEL_TOO_LONG; 945 info.labelErrors|=UIDNA_ERROR_LABEL_TOO_LONG;
921 } 946 }
922 } 947 }
923 return labelLength; 948 return labelLength;
924 } 949 }
925 950
926 const uint32_t L_MASK=U_MASK(U_LEFT_TO_RIGHT); 951 const uint32_t L_MASK=U_MASK(U_LEFT_TO_RIGHT);
(...skipping 511 matching lines...) Expand 10 before | Expand all | Expand 10 after
1438 } 1463 }
1439 StringPiece src(name, length<0 ? uprv_strlen(name) : length); 1464 StringPiece src(name, length<0 ? uprv_strlen(name) : length);
1440 CheckedArrayByteSink sink(dest, capacity); 1465 CheckedArrayByteSink sink(dest, capacity);
1441 IDNAInfo info; 1466 IDNAInfo info;
1442 reinterpret_cast<const IDNA *>(idna)->nameToUnicodeUTF8(src, sink, info, *pE rrorCode); 1467 reinterpret_cast<const IDNA *>(idna)->nameToUnicodeUTF8(src, sink, info, *pE rrorCode);
1443 idnaInfoToStruct(info, pInfo); 1468 idnaInfoToStruct(info, pInfo);
1444 return u_terminateChars(dest, capacity, sink.NumberOfBytesAppended(), pError Code); 1469 return u_terminateChars(dest, capacity, sink.NumberOfBytesAppended(), pError Code);
1445 } 1470 }
1446 1471
1447 #endif // UCONFIG_NO_IDNA 1472 #endif // UCONFIG_NO_IDNA
OLDNEW
« no previous file with comments | « source/common/utext.cpp ('k') | source/common/utypes.c » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698