runtime/vm/unicode_test.cc - Issue 11280150: Add support for surrogates when serializing and deserializing for native ports

Side by Side Diff: runtime/vm/unicode_test.cc

Issue 11280150: Add support for surrogates when serializing and deserializing for native ports (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart

Patch Set: Use iterator reset Created 8 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 // Copyright (c) 2011, the Dart project authors. Please see the AUTHORS file	1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file

2 // for details. All rights reserved. Use of this source code is governed by a	2 // for details. All rights reserved. Use of this source code is governed by a

3 // BSD-style license that can be found in the LICENSE file.	3 // BSD-style license that can be found in the LICENSE file.

4	4

5 #include "vm/globals.h"	5 #include "vm/globals.h"

6 #include "vm/unicode.h"	6 #include "vm/unicode.h"

7 #include "vm/unit_test.h"	7 #include "vm/unit_test.h"

8	8

9 namespace dart {	9 namespace dart {

10	10

11 TEST_CASE(Utf8Decode) {	11 TEST_CASE(Utf8Decode) {

(...skipping 821 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
833	833

834 // 5.1 - Single UTF-16 surrogates	834 // 5.1 - Single UTF-16 surrogates

835	835

836 // 5.1.1 - U+D800 = ed a0 80 = "\xED\xA0\x80"	836 // 5.1.1 - U+D800 = ed a0 80 = "\xED\xA0\x80"

837 {	837 {

838 const char* src = "\xED\xA0\x80";	838 const char* src = "\xED\xA0\x80";

839 int32_t expected[] = { 0xD800 };	839 int32_t expected[] = { 0xD800 };

840 int32_t dst[ARRAY_SIZE(expected)];	840 int32_t dst[ARRAY_SIZE(expected)];

841 memset(dst, 0, sizeof(dst));	841 memset(dst, 0, sizeof(dst));

842 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));	842 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));

843 EXPECT(!is_valid);	843 EXPECT(is_valid);

844 EXPECT(memcmp(expected, dst, sizeof(expected)));	844 EXPECT(!memcmp(expected, dst, sizeof(expected)));

845 }	845 }

846	846

847 // 5.1.2 - U+DB7F = ed ad bf = "\xED\xAD\xBF"	847 // 5.1.2 - U+DB7F = ed ad bf = "\xED\xAD\xBF"

848 {	848 {

849 const char* src = "\xED\xAD\xBF";	849 const char* src = "\xED\xAD\xBF";

850 int32_t expected[] = { 0xDB7F };	850 int32_t expected[] = { 0xDB7F };

851 int32_t dst[ARRAY_SIZE(expected)];	851 int32_t dst[ARRAY_SIZE(expected)];

852 memset(dst, 0, sizeof(dst));	852 memset(dst, 0, sizeof(dst));

853 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));	853 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));

854 EXPECT(!is_valid);	854 EXPECT(is_valid);

855 EXPECT(memcmp(expected, dst, sizeof(expected)));	855 EXPECT(!memcmp(expected, dst, sizeof(expected)));

856 }	856 }

857	857

858 // 5.1.3 - U+DB80 = ed ae 80 = "\xED\xAE\x80"	858 // 5.1.3 - U+DB80 = ed ae 80 = "\xED\xAE\x80"

859 {	859 {

860 const char* src = "\xED\xAE\x80";	860 const char* src = "\xED\xAE\x80";

861 int32_t expected[] = { 0xDB80 };	861 int32_t expected[] = { 0xDB80 };

862 int32_t dst[ARRAY_SIZE(expected)];	862 int32_t dst[ARRAY_SIZE(expected)];

863 memset(dst, 0, sizeof(dst));	863 memset(dst, 0, sizeof(dst));

864 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));	864 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));

865 EXPECT(!is_valid);	865 EXPECT(is_valid);

866 EXPECT(memcmp(expected, dst, sizeof(expected)));	866 EXPECT(!memcmp(expected, dst, sizeof(expected)));

867 }	867 }

868	868

869 // 5.1.4 - U+DBFF = ed af bf = "\xED\xAF\xBF"	869 // 5.1.4 - U+DBFF = ed af bf = "\xED\xAF\xBF"

870 {	870 {

871 const char* src = "\xED\xAF\xBF";	871 const char* src = "\xED\xAF\xBF";

872 int32_t expected[] = { 0xDBFF };	872 int32_t expected[] = { 0xDBFF };

873 int32_t dst[ARRAY_SIZE(expected)];	873 int32_t dst[ARRAY_SIZE(expected)];

874 memset(dst, 0, sizeof(dst));	874 memset(dst, 0, sizeof(dst));

875 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));	875 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));

876 EXPECT(!is_valid);	876 EXPECT(is_valid);

877 EXPECT(memcmp(expected, dst, sizeof(expected)));	877 EXPECT(!memcmp(expected, dst, sizeof(expected)));

878 }	878 }

879	879

880 // 5.1.5 - U+DC00 = ed b0 80 = "\xED\xB0\x80"	880 // 5.1.5 - U+DC00 = ed b0 80 = "\xED\xB0\x80"

881 {	881 {

882 const char* src = "\xED\xB0\x80";	882 const char* src = "\xED\xB0\x80";

883 int32_t expected[] = { 0xDC00 };	883 int32_t expected[] = { 0xDC00 };

884 int32_t dst[ARRAY_SIZE(expected)];	884 int32_t dst[ARRAY_SIZE(expected)];

885 memset(dst, 0, sizeof(dst));	885 memset(dst, 0, sizeof(dst));

886 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));	886 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));

887 EXPECT(!is_valid);	887 EXPECT(is_valid);

888 EXPECT(memcmp(expected, dst, sizeof(expected)));	888 EXPECT(!memcmp(expected, dst, sizeof(expected)));

889 }	889 }

890	890

891 // 5.1.6 - U+DF80 = ed be 80 = "\xED\xBE\x80"	891 // 5.1.6 - U+DF80 = ed be 80 = "\xED\xBE\x80"

892 {	892 {

893 const char* src = "\xED\xBE\x80";	893 const char* src = "\xED\xBE\x80";

894 int32_t expected[] = { 0xDF80 };	894 int32_t expected[] = { 0xDF80 };

895 int32_t dst[ARRAY_SIZE(expected)];	895 int32_t dst[ARRAY_SIZE(expected)];

896 memset(dst, 0, sizeof(dst));	896 memset(dst, 0, sizeof(dst));

897 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));	897 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));

898 EXPECT(!is_valid);	898 EXPECT(is_valid);

899 EXPECT(memcmp(expected, dst, sizeof(expected)));	899 EXPECT(!memcmp(expected, dst, sizeof(expected)));

900 }	900 }

901	901

902 // 5.1.7 - U+DFFF = ed bf bf = "\xED\xBF\xBF"	902 // 5.1.7 - U+DFFF = ed bf bf = "\xED\xBF\xBF"

903 {	903 {

904 const char* src = "\xED\xBF\xBF";	904 const char* src = "\xED\xBF\xBF";

905 int32_t expected[] = { 0xDFFF };	905 int32_t expected[] = { 0xDFFF };

906 int32_t dst[ARRAY_SIZE(expected)];	906 int32_t dst[ARRAY_SIZE(expected)];

907 memset(dst, 0, sizeof(dst));	907 memset(dst, 0, sizeof(dst));

908 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));	908 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));

909 EXPECT(!is_valid);	909 EXPECT(is_valid);

910 EXPECT(memcmp(expected, dst, sizeof(expected)));	910 EXPECT(!memcmp(expected, dst, sizeof(expected)));

911 }	911 }

912	912

913 // 5.2 Paired UTF-16 surrogates	913 // 5.2 Paired UTF-16 surrogates

914	914

915 // 5.2.1 - U+D800 U+DC00 = ed a0 80 ed b0 80 = "\xED\xA0\x80\xED\xB0\x80"	915 // 5.2.1 - U+D800 U+DC00 = ed a0 80 ed b0 80 = "\xED\xA0\x80\xED\xB0\x80"

916 {	916 {

917 const char* src = "\xED\xA0\x80\xED\xB0\x80";	917 const char* src = "\xED\xA0\x80\xED\xB0\x80";

918 int32_t expected[] = { 0xD800, 0xDC00 };	918 int32_t expected[] = { 0xD800, 0xDC00 };

919 int32_t dst[ARRAY_SIZE(expected)];	919 int32_t dst[ARRAY_SIZE(expected)];

920 memset(dst, 0, sizeof(dst));	920 memset(dst, 0, sizeof(dst));

921 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));	921 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));

922 EXPECT(!is_valid);	922 EXPECT(is_valid);

923 EXPECT(memcmp(expected, dst, sizeof(expected)));	923 EXPECT(!memcmp(expected, dst, sizeof(expected)));

924 }	924 }

925	925

926 // 5.2.2 - U+D800 U+DFFF = ed a0 80 ed bf bf = "\xED\xA0\x80\xED\xBF\xBF"	926 // 5.2.2 - U+D800 U+DFFF = ed a0 80 ed bf bf = "\xED\xA0\x80\xED\xBF\xBF"

927 {	927 {

928 const char* src = "\xED\xA0\x80\xED\xBF\xBF";	928 const char* src = "\xED\xA0\x80\xED\xBF\xBF";

929 int32_t expected[] = { 0xD800, 0xDFFF };	929 int32_t expected[] = { 0xD800, 0xDFFF };

930 int32_t dst[ARRAY_SIZE(expected)];	930 int32_t dst[ARRAY_SIZE(expected)];

931 memset(dst, 0, sizeof(dst));	931 memset(dst, 0, sizeof(dst));

932 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));	932 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));

933 EXPECT(!is_valid);	933 EXPECT(is_valid);

934 EXPECT(memcmp(expected, dst, sizeof(expected)));	934 EXPECT(!memcmp(expected, dst, sizeof(expected)));

935 }	935 }

936	936

937 // 5.2.3 - U+DB7F U+DC00 = ed a0 80 ed bf bf = "\xED\xAD\xBF\xED\xB0\x80"	937 // 5.2.3 - U+DB7F U+DC00 = ed a0 80 ed bf bf = "\xED\xAD\xBF\xED\xB0\x80"

938 {	938 {

939 const char* src = "\xED\xAD\xBF\xED\xB0\x80";	939 const char* src = "\xED\xAD\xBF\xED\xB0\x80";

940 int32_t expected[] = { 0xDB7F, 0xDC00 };	940 int32_t expected[] = { 0xDB7F, 0xDC00 };

941 int32_t dst[ARRAY_SIZE(expected)];	941 int32_t dst[ARRAY_SIZE(expected)];

942 memset(dst, 0, sizeof(dst));	942 memset(dst, 0, sizeof(dst));

943 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));	943 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));

944 EXPECT(!is_valid);	944 EXPECT(is_valid);

945 EXPECT(memcmp(expected, dst, sizeof(expected)));	945 EXPECT(!memcmp(expected, dst, sizeof(expected)));

946 }	946 }

947	947

948 // 5.2.4 - U+DB7F U+DFFF = ed ad bf ed bf bf = "\xED\xAD\xBF\xED\xBF\xBF"	948 // 5.2.4 - U+DB7F U+DFFF = ed ad bf ed bf bf = "\xED\xAD\xBF\xED\xBF\xBF"

949 {	949 {

950 const char* src = "\xED\xAD\xBF\xED\xBF\xBF";	950 const char* src = "\xED\xAD\xBF\xED\xBF\xBF";

951 int32_t expected[] = { 0xDB7F, 0xDFFF };	951 int32_t expected[] = { 0xDB7F, 0xDFFF };

952 int32_t dst[ARRAY_SIZE(expected)];	952 int32_t dst[ARRAY_SIZE(expected)];

953 memset(dst, 0, sizeof(dst));	953 memset(dst, 0, sizeof(dst));

954 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));	954 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));

955 EXPECT(!is_valid);	955 EXPECT(is_valid);

956 EXPECT(memcmp(expected, dst, sizeof(expected)));	956 EXPECT(!memcmp(expected, dst, sizeof(expected)));

957 }	957 }

958	958

959 // 5.2.5 - U+DB80 U+DC00 = ed ae 80 ed b0 80 = "\xED\xAE\x80\xED\xB0\x80"	959 // 5.2.5 - U+DB80 U+DC00 = ed ae 80 ed b0 80 = "\xED\xAE\x80\xED\xB0\x80"

960 {	960 {

961 const char* src = "\xED\xAE\x80\xED\xB0\x80";	961 const char* src = "\xED\xAE\x80\xED\xB0\x80";

962 int32_t expected[] = { 0xDB80, 0xDC00 };	962 int32_t expected[] = { 0xDB80, 0xDC00 };

963 int32_t dst[ARRAY_SIZE(expected)];	963 int32_t dst[ARRAY_SIZE(expected)];

964 memset(dst, 0, sizeof(dst));	964 memset(dst, 0, sizeof(dst));

965 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));	965 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));

966 EXPECT(!is_valid);	966 EXPECT(is_valid);

967 EXPECT(memcmp(expected, dst, sizeof(expected)));	967 EXPECT(!memcmp(expected, dst, sizeof(expected)));

968 }	968 }

969	969

970 // 5.2.6 - U+DB80 U+DFFF = ed ae 80 ed bf bf = "\xED\xAE\x80\xED\xBF\xBF"	970 // 5.2.6 - U+DB80 U+DFFF = ed ae 80 ed bf bf = "\xED\xAE\x80\xED\xBF\xBF"

971 {	971 {

972 const char* src = "\xED\xAE\x80\xED\xBF\xBF";	972 const char* src = "\xED\xAE\x80\xED\xBF\xBF";

973 int32_t expected[] = { 0xDB80, 0xDFFF };	973 int32_t expected[] = { 0xDB80, 0xDFFF };

974 int32_t dst[ARRAY_SIZE(expected)];	974 int32_t dst[ARRAY_SIZE(expected)];

975 memset(dst, 0, sizeof(dst));	975 memset(dst, 0, sizeof(dst));

976 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));	976 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));

977 EXPECT(!is_valid);	977 EXPECT(is_valid);

978 EXPECT(memcmp(expected, dst, sizeof(expected)));	978 EXPECT(!memcmp(expected, dst, sizeof(expected)));

979 }	979 }

980	980

981 // 5.2.7 - U+DBFF U+DC00 = ed af bf ed b0 80 = "\xED\xAF\xBF\xED\xB0\x80"	981 // 5.2.7 - U+DBFF U+DC00 = ed af bf ed b0 80 = "\xED\xAF\xBF\xED\xB0\x80"

982 {	982 {

983 const char* src = "\xED\xAF\xBF\xED\xB0\x80";	983 const char* src = "\xED\xAF\xBF\xED\xB0\x80";

984 int32_t expected[] = { 0xDBFF, 0xDC00 };	984 int32_t expected[] = { 0xDBFF, 0xDC00 };

985 int32_t dst[ARRAY_SIZE(expected)];	985 int32_t dst[ARRAY_SIZE(expected)];

986 memset(dst, 0, sizeof(dst));	986 memset(dst, 0, sizeof(dst));

987 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));	987 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));

988 EXPECT(!is_valid);	988 EXPECT(is_valid);

989 EXPECT(memcmp(expected, dst, sizeof(expected)));	989 EXPECT(!memcmp(expected, dst, sizeof(expected)));

990 }	990 }

991	991

992 // 5.2.8 - U+DBFF U+DFFF = ed af bf ed bf bf = "\xED\xAF\xBF\xED\xBF\xBF"	992 // 5.2.8 - U+DBFF U+DFFF = ed af bf ed bf bf = "\xED\xAF\xBF\xED\xBF\xBF"

993 {	993 {

994 const char* src = "\xED\xAF\xBF\xED\xBF\xBF";	994 const char* src = "\xED\xAF\xBF\xED\xBF\xBF";

995 int32_t expected[] = { 0xDBFF, 0xDFFF };	995 int32_t expected[] = { 0xDBFF, 0xDFFF };

996 int32_t dst[ARRAY_SIZE(expected)];	996 int32_t dst[ARRAY_SIZE(expected)];

997 memset(dst, 0, sizeof(dst));	997 memset(dst, 0, sizeof(dst));

998 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));	998 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));

999 EXPECT(!is_valid);	999 EXPECT(is_valid);

1000 EXPECT(memcmp(expected, dst, sizeof(expected)));	1000 EXPECT(!memcmp(expected, dst, sizeof(expected)));

1001 }	1001 }

1002	1002

1003 // 5.3 - Other illegal code positions	1003 // 5.3 - Other illegal code positions

1004	1004

1005 // 5.3.1 - U+FFFE = ef bf be = "\xEF\xBF\xBE"	1005 // 5.3.1 - U+FFFE = ef bf be = "\xEF\xBF\xBE"

1006 {	1006 {

1007 const char* src = "\xEF\xBF\xBE";	1007 const char* src = "\xEF\xBF\xBE";

1008 int32_t expected[] = { 0xFFFE };	1008 int32_t expected[] = { 0xFFFE };

1009 int32_t dst[ARRAY_SIZE(expected)];	1009 int32_t dst[ARRAY_SIZE(expected)];

1010 memset(dst, 0, sizeof(dst));	1010 memset(dst, 0, sizeof(dst));

1011 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));	1011 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));

1012 EXPECT(is_valid);	1012 EXPECT(is_valid);

1013 EXPECT(!memcmp(expected, dst, sizeof(expected)));	1013 EXPECT(!memcmp(expected, dst, sizeof(expected)));

1014 }	1014 }

1015	1015

1016 // 5.3.2 - U+FFFF = ef bf bf = "\xEF\xBF\xBF"	1016 // 5.3.2 - U+FFFF = ef bf bf = "\xEF\xBF\xBF"

1017 {	1017 {

1018 const char* src = "\xEF\xBF\xBF";	1018 const char* src = "\xEF\xBF\xBF";

1019 int32_t expected[] = { 0xFFFF };	1019 int32_t expected[] = { 0xFFFF };

1020 int32_t dst[ARRAY_SIZE(expected)];	1020 int32_t dst[ARRAY_SIZE(expected)];

1021 memset(dst, 0, sizeof(dst));	1021 memset(dst, 0, sizeof(dst));

1022 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));	1022 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst));

1023 EXPECT(is_valid);	1023 EXPECT(is_valid);

1024 EXPECT(!memcmp(expected, dst, sizeof(expected)));	1024 EXPECT(!memcmp(expected, dst, sizeof(expected)));

1025 }	1025 }

1026 }	1026 }

1027	1027

	1028

	1029 TEST_CASE(Utf16Iterator) {

	1030 // Normal code units.

	1031 {

	1032 const int len = 4;

	1033 const uint16_t src[] = { 32, 33, 255, 256 };

	1034 Utf16::CodePointIterator it(src, len);

	1035 for (int i = 0; i < len; i++) {

	1036 EXPECT(it.Next());

	1037 EXPECT_EQ(src[i], it.Current());

	1038 }

	1039 EXPECT(!it.Next());

	1040 }

	1041

	1042 // Surrogate pairs.

	1043 {

	1044 const uint16_t src[] = { 0xd800, 0xdc00, 0xd800, 0xdc01 };

	1045 Utf16::CodePointIterator it(src, 4);

	1046 EXPECT(it.Next());

	1047 EXPECT_EQ(0x10000, it.Current());

	1048 EXPECT(it.Next());

	1049 EXPECT_EQ(0x10001, it.Current());

	1050 EXPECT(!it.Next());

	1051 }

	1052

	1053 // Surrogate characters not in pairs.

	1054 {

	1055 const uint16_t src[] = { 0xdc00, 0xd800 };

	1056 Utf16::CodePointIterator it(src, 2);

	1057 EXPECT(it.Next());

	1058 EXPECT_EQ(0xdc00, it.Current());

	1059 EXPECT(it.Next());

	1060 EXPECT_EQ(0xd800, it.Current());

	1061 EXPECT(!it.Next());

	1062 }

	1063 }

	1064

1028 } // namespace dart	1065 } // namespace dart

OLD	NEW

« runtime/vm/unicode.cc ('K') | « runtime/vm/unicode.cc ('k') | no next file » | no next file with comments »