Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(804)

Side by Side Diff: runtime/vm/unicode_test.cc

Issue 11280150: Add support for surrogates when serializing and deserializing for native ports (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart
Patch Set: Use iterator reset Created 8 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« runtime/vm/unicode.cc ('K') | « runtime/vm/unicode.cc ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright (c) 2011, the Dart project authors. Please see the AUTHORS file 1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file
2 // for details. All rights reserved. Use of this source code is governed by a 2 // for details. All rights reserved. Use of this source code is governed by a
3 // BSD-style license that can be found in the LICENSE file. 3 // BSD-style license that can be found in the LICENSE file.
4 4
5 #include "vm/globals.h" 5 #include "vm/globals.h"
6 #include "vm/unicode.h" 6 #include "vm/unicode.h"
7 #include "vm/unit_test.h" 7 #include "vm/unit_test.h"
8 8
9 namespace dart { 9 namespace dart {
10 10
11 TEST_CASE(Utf8Decode) { 11 TEST_CASE(Utf8Decode) {
(...skipping 821 matching lines...) Expand 10 before | Expand all | Expand 10 after
833 833
834 // 5.1 - Single UTF-16 surrogates 834 // 5.1 - Single UTF-16 surrogates
835 835
836 // 5.1.1 - U+D800 = ed a0 80 = "\xED\xA0\x80" 836 // 5.1.1 - U+D800 = ed a0 80 = "\xED\xA0\x80"
837 { 837 {
838 const char* src = "\xED\xA0\x80"; 838 const char* src = "\xED\xA0\x80";
839 int32_t expected[] = { 0xD800 }; 839 int32_t expected[] = { 0xD800 };
840 int32_t dst[ARRAY_SIZE(expected)]; 840 int32_t dst[ARRAY_SIZE(expected)];
841 memset(dst, 0, sizeof(dst)); 841 memset(dst, 0, sizeof(dst));
842 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 842 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
843 EXPECT(!is_valid); 843 EXPECT(is_valid);
844 EXPECT(memcmp(expected, dst, sizeof(expected))); 844 EXPECT(!memcmp(expected, dst, sizeof(expected)));
845 } 845 }
846 846
847 // 5.1.2 - U+DB7F = ed ad bf = "\xED\xAD\xBF" 847 // 5.1.2 - U+DB7F = ed ad bf = "\xED\xAD\xBF"
848 { 848 {
849 const char* src = "\xED\xAD\xBF"; 849 const char* src = "\xED\xAD\xBF";
850 int32_t expected[] = { 0xDB7F }; 850 int32_t expected[] = { 0xDB7F };
851 int32_t dst[ARRAY_SIZE(expected)]; 851 int32_t dst[ARRAY_SIZE(expected)];
852 memset(dst, 0, sizeof(dst)); 852 memset(dst, 0, sizeof(dst));
853 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 853 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
854 EXPECT(!is_valid); 854 EXPECT(is_valid);
855 EXPECT(memcmp(expected, dst, sizeof(expected))); 855 EXPECT(!memcmp(expected, dst, sizeof(expected)));
856 } 856 }
857 857
858 // 5.1.3 - U+DB80 = ed ae 80 = "\xED\xAE\x80" 858 // 5.1.3 - U+DB80 = ed ae 80 = "\xED\xAE\x80"
859 { 859 {
860 const char* src = "\xED\xAE\x80"; 860 const char* src = "\xED\xAE\x80";
861 int32_t expected[] = { 0xDB80 }; 861 int32_t expected[] = { 0xDB80 };
862 int32_t dst[ARRAY_SIZE(expected)]; 862 int32_t dst[ARRAY_SIZE(expected)];
863 memset(dst, 0, sizeof(dst)); 863 memset(dst, 0, sizeof(dst));
864 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 864 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
865 EXPECT(!is_valid); 865 EXPECT(is_valid);
866 EXPECT(memcmp(expected, dst, sizeof(expected))); 866 EXPECT(!memcmp(expected, dst, sizeof(expected)));
867 } 867 }
868 868
869 // 5.1.4 - U+DBFF = ed af bf = "\xED\xAF\xBF" 869 // 5.1.4 - U+DBFF = ed af bf = "\xED\xAF\xBF"
870 { 870 {
871 const char* src = "\xED\xAF\xBF"; 871 const char* src = "\xED\xAF\xBF";
872 int32_t expected[] = { 0xDBFF }; 872 int32_t expected[] = { 0xDBFF };
873 int32_t dst[ARRAY_SIZE(expected)]; 873 int32_t dst[ARRAY_SIZE(expected)];
874 memset(dst, 0, sizeof(dst)); 874 memset(dst, 0, sizeof(dst));
875 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 875 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
876 EXPECT(!is_valid); 876 EXPECT(is_valid);
877 EXPECT(memcmp(expected, dst, sizeof(expected))); 877 EXPECT(!memcmp(expected, dst, sizeof(expected)));
878 } 878 }
879 879
880 // 5.1.5 - U+DC00 = ed b0 80 = "\xED\xB0\x80" 880 // 5.1.5 - U+DC00 = ed b0 80 = "\xED\xB0\x80"
881 { 881 {
882 const char* src = "\xED\xB0\x80"; 882 const char* src = "\xED\xB0\x80";
883 int32_t expected[] = { 0xDC00 }; 883 int32_t expected[] = { 0xDC00 };
884 int32_t dst[ARRAY_SIZE(expected)]; 884 int32_t dst[ARRAY_SIZE(expected)];
885 memset(dst, 0, sizeof(dst)); 885 memset(dst, 0, sizeof(dst));
886 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 886 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
887 EXPECT(!is_valid); 887 EXPECT(is_valid);
888 EXPECT(memcmp(expected, dst, sizeof(expected))); 888 EXPECT(!memcmp(expected, dst, sizeof(expected)));
889 } 889 }
890 890
891 // 5.1.6 - U+DF80 = ed be 80 = "\xED\xBE\x80" 891 // 5.1.6 - U+DF80 = ed be 80 = "\xED\xBE\x80"
892 { 892 {
893 const char* src = "\xED\xBE\x80"; 893 const char* src = "\xED\xBE\x80";
894 int32_t expected[] = { 0xDF80 }; 894 int32_t expected[] = { 0xDF80 };
895 int32_t dst[ARRAY_SIZE(expected)]; 895 int32_t dst[ARRAY_SIZE(expected)];
896 memset(dst, 0, sizeof(dst)); 896 memset(dst, 0, sizeof(dst));
897 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 897 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
898 EXPECT(!is_valid); 898 EXPECT(is_valid);
899 EXPECT(memcmp(expected, dst, sizeof(expected))); 899 EXPECT(!memcmp(expected, dst, sizeof(expected)));
900 } 900 }
901 901
902 // 5.1.7 - U+DFFF = ed bf bf = "\xED\xBF\xBF" 902 // 5.1.7 - U+DFFF = ed bf bf = "\xED\xBF\xBF"
903 { 903 {
904 const char* src = "\xED\xBF\xBF"; 904 const char* src = "\xED\xBF\xBF";
905 int32_t expected[] = { 0xDFFF }; 905 int32_t expected[] = { 0xDFFF };
906 int32_t dst[ARRAY_SIZE(expected)]; 906 int32_t dst[ARRAY_SIZE(expected)];
907 memset(dst, 0, sizeof(dst)); 907 memset(dst, 0, sizeof(dst));
908 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 908 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
909 EXPECT(!is_valid); 909 EXPECT(is_valid);
910 EXPECT(memcmp(expected, dst, sizeof(expected))); 910 EXPECT(!memcmp(expected, dst, sizeof(expected)));
911 } 911 }
912 912
913 // 5.2 Paired UTF-16 surrogates 913 // 5.2 Paired UTF-16 surrogates
914 914
915 // 5.2.1 - U+D800 U+DC00 = ed a0 80 ed b0 80 = "\xED\xA0\x80\xED\xB0\x80" 915 // 5.2.1 - U+D800 U+DC00 = ed a0 80 ed b0 80 = "\xED\xA0\x80\xED\xB0\x80"
916 { 916 {
917 const char* src = "\xED\xA0\x80\xED\xB0\x80"; 917 const char* src = "\xED\xA0\x80\xED\xB0\x80";
918 int32_t expected[] = { 0xD800, 0xDC00 }; 918 int32_t expected[] = { 0xD800, 0xDC00 };
919 int32_t dst[ARRAY_SIZE(expected)]; 919 int32_t dst[ARRAY_SIZE(expected)];
920 memset(dst, 0, sizeof(dst)); 920 memset(dst, 0, sizeof(dst));
921 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 921 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
922 EXPECT(!is_valid); 922 EXPECT(is_valid);
923 EXPECT(memcmp(expected, dst, sizeof(expected))); 923 EXPECT(!memcmp(expected, dst, sizeof(expected)));
924 } 924 }
925 925
926 // 5.2.2 - U+D800 U+DFFF = ed a0 80 ed bf bf = "\xED\xA0\x80\xED\xBF\xBF" 926 // 5.2.2 - U+D800 U+DFFF = ed a0 80 ed bf bf = "\xED\xA0\x80\xED\xBF\xBF"
927 { 927 {
928 const char* src = "\xED\xA0\x80\xED\xBF\xBF"; 928 const char* src = "\xED\xA0\x80\xED\xBF\xBF";
929 int32_t expected[] = { 0xD800, 0xDFFF }; 929 int32_t expected[] = { 0xD800, 0xDFFF };
930 int32_t dst[ARRAY_SIZE(expected)]; 930 int32_t dst[ARRAY_SIZE(expected)];
931 memset(dst, 0, sizeof(dst)); 931 memset(dst, 0, sizeof(dst));
932 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 932 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
933 EXPECT(!is_valid); 933 EXPECT(is_valid);
934 EXPECT(memcmp(expected, dst, sizeof(expected))); 934 EXPECT(!memcmp(expected, dst, sizeof(expected)));
935 } 935 }
936 936
937 // 5.2.3 - U+DB7F U+DC00 = ed a0 80 ed bf bf = "\xED\xAD\xBF\xED\xB0\x80" 937 // 5.2.3 - U+DB7F U+DC00 = ed a0 80 ed bf bf = "\xED\xAD\xBF\xED\xB0\x80"
938 { 938 {
939 const char* src = "\xED\xAD\xBF\xED\xB0\x80"; 939 const char* src = "\xED\xAD\xBF\xED\xB0\x80";
940 int32_t expected[] = { 0xDB7F, 0xDC00 }; 940 int32_t expected[] = { 0xDB7F, 0xDC00 };
941 int32_t dst[ARRAY_SIZE(expected)]; 941 int32_t dst[ARRAY_SIZE(expected)];
942 memset(dst, 0, sizeof(dst)); 942 memset(dst, 0, sizeof(dst));
943 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 943 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
944 EXPECT(!is_valid); 944 EXPECT(is_valid);
945 EXPECT(memcmp(expected, dst, sizeof(expected))); 945 EXPECT(!memcmp(expected, dst, sizeof(expected)));
946 } 946 }
947 947
948 // 5.2.4 - U+DB7F U+DFFF = ed ad bf ed bf bf = "\xED\xAD\xBF\xED\xBF\xBF" 948 // 5.2.4 - U+DB7F U+DFFF = ed ad bf ed bf bf = "\xED\xAD\xBF\xED\xBF\xBF"
949 { 949 {
950 const char* src = "\xED\xAD\xBF\xED\xBF\xBF"; 950 const char* src = "\xED\xAD\xBF\xED\xBF\xBF";
951 int32_t expected[] = { 0xDB7F, 0xDFFF }; 951 int32_t expected[] = { 0xDB7F, 0xDFFF };
952 int32_t dst[ARRAY_SIZE(expected)]; 952 int32_t dst[ARRAY_SIZE(expected)];
953 memset(dst, 0, sizeof(dst)); 953 memset(dst, 0, sizeof(dst));
954 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 954 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
955 EXPECT(!is_valid); 955 EXPECT(is_valid);
956 EXPECT(memcmp(expected, dst, sizeof(expected))); 956 EXPECT(!memcmp(expected, dst, sizeof(expected)));
957 } 957 }
958 958
959 // 5.2.5 - U+DB80 U+DC00 = ed ae 80 ed b0 80 = "\xED\xAE\x80\xED\xB0\x80" 959 // 5.2.5 - U+DB80 U+DC00 = ed ae 80 ed b0 80 = "\xED\xAE\x80\xED\xB0\x80"
960 { 960 {
961 const char* src = "\xED\xAE\x80\xED\xB0\x80"; 961 const char* src = "\xED\xAE\x80\xED\xB0\x80";
962 int32_t expected[] = { 0xDB80, 0xDC00 }; 962 int32_t expected[] = { 0xDB80, 0xDC00 };
963 int32_t dst[ARRAY_SIZE(expected)]; 963 int32_t dst[ARRAY_SIZE(expected)];
964 memset(dst, 0, sizeof(dst)); 964 memset(dst, 0, sizeof(dst));
965 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 965 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
966 EXPECT(!is_valid); 966 EXPECT(is_valid);
967 EXPECT(memcmp(expected, dst, sizeof(expected))); 967 EXPECT(!memcmp(expected, dst, sizeof(expected)));
968 } 968 }
969 969
970 // 5.2.6 - U+DB80 U+DFFF = ed ae 80 ed bf bf = "\xED\xAE\x80\xED\xBF\xBF" 970 // 5.2.6 - U+DB80 U+DFFF = ed ae 80 ed bf bf = "\xED\xAE\x80\xED\xBF\xBF"
971 { 971 {
972 const char* src = "\xED\xAE\x80\xED\xBF\xBF"; 972 const char* src = "\xED\xAE\x80\xED\xBF\xBF";
973 int32_t expected[] = { 0xDB80, 0xDFFF }; 973 int32_t expected[] = { 0xDB80, 0xDFFF };
974 int32_t dst[ARRAY_SIZE(expected)]; 974 int32_t dst[ARRAY_SIZE(expected)];
975 memset(dst, 0, sizeof(dst)); 975 memset(dst, 0, sizeof(dst));
976 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 976 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
977 EXPECT(!is_valid); 977 EXPECT(is_valid);
978 EXPECT(memcmp(expected, dst, sizeof(expected))); 978 EXPECT(!memcmp(expected, dst, sizeof(expected)));
979 } 979 }
980 980
981 // 5.2.7 - U+DBFF U+DC00 = ed af bf ed b0 80 = "\xED\xAF\xBF\xED\xB0\x80" 981 // 5.2.7 - U+DBFF U+DC00 = ed af bf ed b0 80 = "\xED\xAF\xBF\xED\xB0\x80"
982 { 982 {
983 const char* src = "\xED\xAF\xBF\xED\xB0\x80"; 983 const char* src = "\xED\xAF\xBF\xED\xB0\x80";
984 int32_t expected[] = { 0xDBFF, 0xDC00 }; 984 int32_t expected[] = { 0xDBFF, 0xDC00 };
985 int32_t dst[ARRAY_SIZE(expected)]; 985 int32_t dst[ARRAY_SIZE(expected)];
986 memset(dst, 0, sizeof(dst)); 986 memset(dst, 0, sizeof(dst));
987 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 987 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
988 EXPECT(!is_valid); 988 EXPECT(is_valid);
989 EXPECT(memcmp(expected, dst, sizeof(expected))); 989 EXPECT(!memcmp(expected, dst, sizeof(expected)));
990 } 990 }
991 991
992 // 5.2.8 - U+DBFF U+DFFF = ed af bf ed bf bf = "\xED\xAF\xBF\xED\xBF\xBF" 992 // 5.2.8 - U+DBFF U+DFFF = ed af bf ed bf bf = "\xED\xAF\xBF\xED\xBF\xBF"
993 { 993 {
994 const char* src = "\xED\xAF\xBF\xED\xBF\xBF"; 994 const char* src = "\xED\xAF\xBF\xED\xBF\xBF";
995 int32_t expected[] = { 0xDBFF, 0xDFFF }; 995 int32_t expected[] = { 0xDBFF, 0xDFFF };
996 int32_t dst[ARRAY_SIZE(expected)]; 996 int32_t dst[ARRAY_SIZE(expected)];
997 memset(dst, 0, sizeof(dst)); 997 memset(dst, 0, sizeof(dst));
998 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 998 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
999 EXPECT(!is_valid); 999 EXPECT(is_valid);
1000 EXPECT(memcmp(expected, dst, sizeof(expected))); 1000 EXPECT(!memcmp(expected, dst, sizeof(expected)));
1001 } 1001 }
1002 1002
1003 // 5.3 - Other illegal code positions 1003 // 5.3 - Other illegal code positions
1004 1004
1005 // 5.3.1 - U+FFFE = ef bf be = "\xEF\xBF\xBE" 1005 // 5.3.1 - U+FFFE = ef bf be = "\xEF\xBF\xBE"
1006 { 1006 {
1007 const char* src = "\xEF\xBF\xBE"; 1007 const char* src = "\xEF\xBF\xBE";
1008 int32_t expected[] = { 0xFFFE }; 1008 int32_t expected[] = { 0xFFFE };
1009 int32_t dst[ARRAY_SIZE(expected)]; 1009 int32_t dst[ARRAY_SIZE(expected)];
1010 memset(dst, 0, sizeof(dst)); 1010 memset(dst, 0, sizeof(dst));
1011 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 1011 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
1012 EXPECT(is_valid); 1012 EXPECT(is_valid);
1013 EXPECT(!memcmp(expected, dst, sizeof(expected))); 1013 EXPECT(!memcmp(expected, dst, sizeof(expected)));
1014 } 1014 }
1015 1015
1016 // 5.3.2 - U+FFFF = ef bf bf = "\xEF\xBF\xBF" 1016 // 5.3.2 - U+FFFF = ef bf bf = "\xEF\xBF\xBF"
1017 { 1017 {
1018 const char* src = "\xEF\xBF\xBF"; 1018 const char* src = "\xEF\xBF\xBF";
1019 int32_t expected[] = { 0xFFFF }; 1019 int32_t expected[] = { 0xFFFF };
1020 int32_t dst[ARRAY_SIZE(expected)]; 1020 int32_t dst[ARRAY_SIZE(expected)];
1021 memset(dst, 0, sizeof(dst)); 1021 memset(dst, 0, sizeof(dst));
1022 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); 1022 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));
1023 EXPECT(is_valid); 1023 EXPECT(is_valid);
1024 EXPECT(!memcmp(expected, dst, sizeof(expected))); 1024 EXPECT(!memcmp(expected, dst, sizeof(expected)));
1025 } 1025 }
1026 } 1026 }
1027 1027
1028
1029 TEST_CASE(Utf16Iterator) {
1030 // Normal code units.
1031 {
1032 const int len = 4;
1033 const uint16_t src[] = { 32, 33, 255, 256 };
1034 Utf16::CodePointIterator it(src, len);
1035 for (int i = 0; i < len; i++) {
1036 EXPECT(it.Next());
1037 EXPECT_EQ(src[i], it.Current());
1038 }
1039 EXPECT(!it.Next());
1040 }
1041
1042 // Surrogate pairs.
1043 {
1044 const uint16_t src[] = { 0xd800, 0xdc00, 0xd800, 0xdc01 };
1045 Utf16::CodePointIterator it(src, 4);
1046 EXPECT(it.Next());
1047 EXPECT_EQ(0x10000, it.Current());
1048 EXPECT(it.Next());
1049 EXPECT_EQ(0x10001, it.Current());
1050 EXPECT(!it.Next());
1051 }
1052
1053 // Surrogate characters not in pairs.
1054 {
1055 const uint16_t src[] = { 0xdc00, 0xd800 };
1056 Utf16::CodePointIterator it(src, 2);
1057 EXPECT(it.Next());
1058 EXPECT_EQ(0xdc00, it.Current());
1059 EXPECT(it.Next());
1060 EXPECT_EQ(0xd800, it.Current());
1061 EXPECT(!it.Next());
1062 }
1063 }
1064
1028 } // namespace dart 1065 } // namespace dart
OLDNEW
« runtime/vm/unicode.cc ('K') | « runtime/vm/unicode.cc ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698