| OLD | NEW |
| 1 // Copyright (c) 2011, the Dart project authors. Please see the AUTHORS file | 1 // Copyright (c) 2011, the Dart project authors. Please see the AUTHORS file |
| 2 // for details. All rights reserved. Use of this source code is governed by a | 2 // for details. All rights reserved. Use of this source code is governed by a |
| 3 // BSD-style license that can be found in the LICENSE file. | 3 // BSD-style license that can be found in the LICENSE file. |
| 4 | 4 |
| 5 #include "vm/globals.h" | 5 #include "vm/globals.h" |
| 6 #include "vm/unicode.h" | 6 #include "vm/unicode.h" |
| 7 #include "vm/unit_test.h" | 7 #include "vm/unit_test.h" |
| 8 | 8 |
| 9 namespace dart { | 9 namespace dart { |
| 10 | 10 |
| 11 TEST_CASE(Utf8Decode) { | 11 TEST_CASE(Utf8Decode) { |
| 12 // Examples from the Unicode specification, chapter 3 | 12 // Examples from the Unicode specification, chapter 3 |
| 13 { | 13 { |
| 14 const char* src = "\x41\xC3\xB1\x42"; | 14 const char* src = "\x41\xC3\xB1\x42"; |
| 15 int32_t expected[] = { 0x41, 0xF1, 0x42 }; | 15 int32_t expected[] = {0x41, 0xF1, 0x42}; |
| 16 int32_t dst[ARRAY_SIZE(expected)]; | 16 int32_t dst[ARRAY_SIZE(expected)]; |
| 17 memset(dst, 0, sizeof(dst)); | 17 memset(dst, 0, sizeof(dst)); |
| 18 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 18 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
| 19 EXPECT(is_valid); | 19 EXPECT(is_valid); |
| 20 EXPECT(!memcmp(expected, dst, sizeof(expected))); | 20 EXPECT(!memcmp(expected, dst, sizeof(expected))); |
| 21 } | 21 } |
| 22 | 22 |
| 23 { | 23 { |
| 24 const char* src = "\x4D"; | 24 const char* src = "\x4D"; |
| 25 int32_t expected[] = { 0x4D }; | 25 int32_t expected[] = {0x4D}; |
| 26 int32_t dst[ARRAY_SIZE(expected)]; | 26 int32_t dst[ARRAY_SIZE(expected)]; |
| 27 memset(dst, 0, sizeof(dst)); | 27 memset(dst, 0, sizeof(dst)); |
| 28 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 28 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
| 29 EXPECT(is_valid); | 29 EXPECT(is_valid); |
| 30 EXPECT(!memcmp(expected, dst, sizeof(expected))); | 30 EXPECT(!memcmp(expected, dst, sizeof(expected))); |
| 31 } | 31 } |
| 32 | 32 |
| 33 { | 33 { |
| 34 const char* src = "\xD0\xB0"; | 34 const char* src = "\xD0\xB0"; |
| 35 int32_t expected[] = { 0x430 }; | 35 int32_t expected[] = {0x430}; |
| 36 int32_t dst[ARRAY_SIZE(expected)]; | 36 int32_t dst[ARRAY_SIZE(expected)]; |
| 37 memset(dst, 0, sizeof(dst)); | 37 memset(dst, 0, sizeof(dst)); |
| 38 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 38 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
| 39 EXPECT(is_valid); | 39 EXPECT(is_valid); |
| 40 EXPECT(!memcmp(expected, dst, sizeof(expected))); | 40 EXPECT(!memcmp(expected, dst, sizeof(expected))); |
| 41 } | 41 } |
| 42 | 42 |
| 43 { | 43 { |
| 44 const char* src = "\xE4\xBA\x8C"; | 44 const char* src = "\xE4\xBA\x8C"; |
| 45 int32_t expected[] = { 0x4E8C }; | 45 int32_t expected[] = {0x4E8C}; |
| 46 int32_t dst[ARRAY_SIZE(expected)]; | 46 int32_t dst[ARRAY_SIZE(expected)]; |
| 47 memset(dst, 0, sizeof(dst)); | 47 memset(dst, 0, sizeof(dst)); |
| 48 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 48 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
| 49 EXPECT(is_valid); | 49 EXPECT(is_valid); |
| 50 EXPECT(!memcmp(expected, dst, sizeof(expected))); | 50 EXPECT(!memcmp(expected, dst, sizeof(expected))); |
| 51 } | 51 } |
| 52 | 52 |
| 53 { | 53 { |
| 54 const char* src = "\xF0\x90\x8C\x82"; | 54 const char* src = "\xF0\x90\x8C\x82"; |
| 55 int32_t expected[] = { 0x10302 }; | 55 int32_t expected[] = {0x10302}; |
| 56 int32_t dst[ARRAY_SIZE(expected)]; | 56 int32_t dst[ARRAY_SIZE(expected)]; |
| 57 memset(dst, 0, sizeof(dst)); | 57 memset(dst, 0, sizeof(dst)); |
| 58 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 58 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
| 59 EXPECT(is_valid); | 59 EXPECT(is_valid); |
| 60 EXPECT(!memcmp(expected, dst, sizeof(expected))); | 60 EXPECT(!memcmp(expected, dst, sizeof(expected))); |
| 61 } | 61 } |
| 62 | 62 |
| 63 { | 63 { |
| 64 const char* src = "\x4D\xD0\xB0\xE4\xBA\x8C\xF0\x90\x8C\x82"; | 64 const char* src = "\x4D\xD0\xB0\xE4\xBA\x8C\xF0\x90\x8C\x82"; |
| 65 int32_t expected[] = { 0x4D, 0x430, 0x4E8C, 0x10302 }; | 65 int32_t expected[] = {0x4D, 0x430, 0x4E8C, 0x10302}; |
| 66 int32_t dst[ARRAY_SIZE(expected)]; | 66 int32_t dst[ARRAY_SIZE(expected)]; |
| 67 memset(dst, 0, sizeof(dst)); | 67 memset(dst, 0, sizeof(dst)); |
| 68 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 68 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
| 69 EXPECT(is_valid); | 69 EXPECT(is_valid); |
| 70 EXPECT(!memcmp(expected, dst, sizeof(expected))); | 70 EXPECT(!memcmp(expected, dst, sizeof(expected))); |
| 71 } | 71 } |
| 72 | 72 |
| 73 // Mixture of non-ASCII and ASCII characters | 73 // Mixture of non-ASCII and ASCII characters |
| 74 { | 74 { |
| 75 const char* src = "\xD7\x92\xD7\x9C\xD7\xA2\xD7\x93" | 75 const char* src = |
| 76 "\x20" | 76 "\xD7\x92\xD7\x9C\xD7\xA2\xD7\x93" |
| 77 "\xD7\x91\xD7\xA8\xD7\x9B\xD7\x94"; | 77 "\x20" |
| 78 int32_t expected[] = { 0x5D2, 0x5DC, 0x5E2, 0x5D3, | 78 "\xD7\x91\xD7\xA8\xD7\x9B\xD7\x94"; |
| 79 0x20, | 79 int32_t expected[] = {0x5D2, 0x5DC, 0x5E2, 0x5D3, 0x20, |
| 80 0x5D1, 0x5E8, 0x5DB, 0x5D4 }; | 80 0x5D1, 0x5E8, 0x5DB, 0x5D4}; |
| 81 int32_t dst[ARRAY_SIZE(expected)]; | 81 int32_t dst[ARRAY_SIZE(expected)]; |
| 82 memset(dst, 0, sizeof(dst)); | 82 memset(dst, 0, sizeof(dst)); |
| 83 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 83 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
| 84 EXPECT(is_valid); | 84 EXPECT(is_valid); |
| 85 EXPECT(!memcmp(expected, dst, sizeof(expected))); | 85 EXPECT(!memcmp(expected, dst, sizeof(expected))); |
| 86 } | 86 } |
| 87 | 87 |
| 88 // http://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt | 88 // http://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt |
| 89 | 89 |
| 90 // 1 - Some correct UTF-8 text | 90 // 1 - Some correct UTF-8 text |
| 91 { | 91 { |
| 92 const char* src = "\xCE\xBA\xE1\xBD\xB9\xCF\x83\xCE\xBC\xCE\xB5"; | 92 const char* src = "\xCE\xBA\xE1\xBD\xB9\xCF\x83\xCE\xBC\xCE\xB5"; |
| 93 int32_t expected[] = { 0x3BA, 0x1F79, 0x3C3, 0x3BC, 0x3B5 }; | 93 int32_t expected[] = {0x3BA, 0x1F79, 0x3C3, 0x3BC, 0x3B5}; |
| 94 int32_t dst[ARRAY_SIZE(expected)]; | 94 int32_t dst[ARRAY_SIZE(expected)]; |
| 95 memset(dst, 0, sizeof(dst)); | 95 memset(dst, 0, sizeof(dst)); |
| 96 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 96 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
| 97 EXPECT(is_valid); | 97 EXPECT(is_valid); |
| 98 EXPECT(!memcmp(expected, dst, sizeof(expected))); | 98 EXPECT(!memcmp(expected, dst, sizeof(expected))); |
| 99 } | 99 } |
| 100 | 100 |
| 101 // 2 - Boundary condition test cases | 101 // 2 - Boundary condition test cases |
| 102 | 102 |
| 103 // 2.1 - First possible sequence of a certain length | 103 // 2.1 - First possible sequence of a certain length |
| 104 | 104 |
| 105 // 2.1.1 - 1 byte (U-00000000): "\x00" | 105 // 2.1.1 - 1 byte (U-00000000): "\x00" |
| 106 { | 106 { |
| 107 const char* src = "\x00"; | 107 const char* src = "\x00"; |
| 108 int32_t expected[] = { 0x0 }; | 108 int32_t expected[] = {0x0}; |
| 109 int32_t dst[ARRAY_SIZE(expected)]; | 109 int32_t dst[ARRAY_SIZE(expected)]; |
| 110 memset(dst, 0xFF, sizeof(dst)); | 110 memset(dst, 0xFF, sizeof(dst)); |
| 111 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 111 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
| 112 EXPECT(is_valid); | 112 EXPECT(is_valid); |
| 113 EXPECT(memcmp(expected, dst, sizeof(expected))); | 113 EXPECT(memcmp(expected, dst, sizeof(expected))); |
| 114 } | 114 } |
| 115 | 115 |
| 116 // 2.1.2 - 2 bytes (U-00000080): "\xC2\x80" | 116 // 2.1.2 - 2 bytes (U-00000080): "\xC2\x80" |
| 117 { | 117 { |
| 118 const char* src = "\xC2\x80"; | 118 const char* src = "\xC2\x80"; |
| 119 int32_t expected[] = { 0x80 }; | 119 int32_t expected[] = {0x80}; |
| 120 int32_t dst[ARRAY_SIZE(expected)]; | 120 int32_t dst[ARRAY_SIZE(expected)]; |
| 121 memset(dst, 0, sizeof(dst)); | 121 memset(dst, 0, sizeof(dst)); |
| 122 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 122 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
| 123 EXPECT(is_valid); | 123 EXPECT(is_valid); |
| 124 EXPECT(!memcmp(expected, dst, sizeof(expected))); | 124 EXPECT(!memcmp(expected, dst, sizeof(expected))); |
| 125 } | 125 } |
| 126 | 126 |
| 127 // 2.1.3 - 3 bytes (U-00000800): "\xE0\xA0\x80" | 127 // 2.1.3 - 3 bytes (U-00000800): "\xE0\xA0\x80" |
| 128 { | 128 { |
| 129 const char* src = "\xE0\xA0\x80"; | 129 const char* src = "\xE0\xA0\x80"; |
| 130 int32_t expected[] = { 0x800 }; | 130 int32_t expected[] = {0x800}; |
| 131 int32_t dst[ARRAY_SIZE(expected)]; | 131 int32_t dst[ARRAY_SIZE(expected)]; |
| 132 memset(dst, 0, sizeof(dst)); | 132 memset(dst, 0, sizeof(dst)); |
| 133 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 133 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
| 134 EXPECT(is_valid); | 134 EXPECT(is_valid); |
| 135 EXPECT(!memcmp(expected, dst, sizeof(expected))); | 135 EXPECT(!memcmp(expected, dst, sizeof(expected))); |
| 136 } | 136 } |
| 137 | 137 |
| 138 // 2.1.4 - 4 bytes (U-00010000): "\xF0\x90\x80\x80" | 138 // 2.1.4 - 4 bytes (U-00010000): "\xF0\x90\x80\x80" |
| 139 { | 139 { |
| 140 const char* src = "\xF0\x90\x80\x80"; | 140 const char* src = "\xF0\x90\x80\x80"; |
| 141 int32_t expected[] = { 0x10000 }; | 141 int32_t expected[] = {0x10000}; |
| 142 int32_t dst[ARRAY_SIZE(expected)]; | 142 int32_t dst[ARRAY_SIZE(expected)]; |
| 143 memset(dst, 0, sizeof(dst)); | 143 memset(dst, 0, sizeof(dst)); |
| 144 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 144 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
| 145 EXPECT(is_valid); | 145 EXPECT(is_valid); |
| 146 EXPECT(!memcmp(expected, dst, sizeof(expected))); | 146 EXPECT(!memcmp(expected, dst, sizeof(expected))); |
| 147 } | 147 } |
| 148 | 148 |
| 149 // 2.1.5 - 5 bytes (U-00200000): "\xF8\x88\x80\x80\x80" | 149 // 2.1.5 - 5 bytes (U-00200000): "\xF8\x88\x80\x80\x80" |
| 150 { | 150 { |
| 151 const char* src = "\xF8\x88\x80\x80\x80"; | 151 const char* src = "\xF8\x88\x80\x80\x80"; |
| 152 int32_t expected[] = { 0x200000 }; | 152 int32_t expected[] = {0x200000}; |
| 153 int32_t dst[ARRAY_SIZE(expected)]; | 153 int32_t dst[ARRAY_SIZE(expected)]; |
| 154 memset(dst, 0, sizeof(dst)); | 154 memset(dst, 0, sizeof(dst)); |
| 155 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 155 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
| 156 EXPECT(!is_valid); | 156 EXPECT(!is_valid); |
| 157 EXPECT(memcmp(expected, dst, sizeof(expected))); | 157 EXPECT(memcmp(expected, dst, sizeof(expected))); |
| 158 } | 158 } |
| 159 | 159 |
| 160 // 2.1.6 - 6 bytes (U-04000000): "\xFC\x84\x80\x80\x80\x80" | 160 // 2.1.6 - 6 bytes (U-04000000): "\xFC\x84\x80\x80\x80\x80" |
| 161 { | 161 { |
| 162 const char* src = "\xFC\x84\x80\x80\x80\x80"; | 162 const char* src = "\xFC\x84\x80\x80\x80\x80"; |
| 163 int32_t expected[] = { 0x400000 }; | 163 int32_t expected[] = {0x400000}; |
| 164 int32_t dst[ARRAY_SIZE(expected)]; | 164 int32_t dst[ARRAY_SIZE(expected)]; |
| 165 memset(dst, 0, sizeof(dst)); | 165 memset(dst, 0, sizeof(dst)); |
| 166 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 166 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
| 167 EXPECT(!is_valid); | 167 EXPECT(!is_valid); |
| 168 EXPECT(memcmp(expected, dst, sizeof(expected))); | 168 EXPECT(memcmp(expected, dst, sizeof(expected))); |
| 169 } | 169 } |
| 170 | 170 |
| 171 // 2.2 - Last possible sequence of a certain length | 171 // 2.2 - Last possible sequence of a certain length |
| 172 | 172 |
| 173 // 2.2.1 - 1 byte (U-0000007F): "\x7F" | 173 // 2.2.1 - 1 byte (U-0000007F): "\x7F" |
| 174 { | 174 { |
| 175 const char* src = "\x7F"; | 175 const char* src = "\x7F"; |
| 176 int32_t expected[] = { 0x7F }; | 176 int32_t expected[] = {0x7F}; |
| 177 int32_t dst[ARRAY_SIZE(expected)]; | 177 int32_t dst[ARRAY_SIZE(expected)]; |
| 178 memset(dst, 0, sizeof(dst)); | 178 memset(dst, 0, sizeof(dst)); |
| 179 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 179 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
| 180 EXPECT(is_valid); | 180 EXPECT(is_valid); |
| 181 EXPECT(!memcmp(expected, dst, sizeof(expected))); | 181 EXPECT(!memcmp(expected, dst, sizeof(expected))); |
| 182 } | 182 } |
| 183 | 183 |
| 184 // 2.2.2 - 2 bytes (U-000007FF): "\xDF\xBF" | 184 // 2.2.2 - 2 bytes (U-000007FF): "\xDF\xBF" |
| 185 { | 185 { |
| 186 const char* src = "\xDF\xBF"; | 186 const char* src = "\xDF\xBF"; |
| 187 int32_t expected[] = { 0x7FF }; | 187 int32_t expected[] = {0x7FF}; |
| 188 int32_t dst[ARRAY_SIZE(expected)]; | 188 int32_t dst[ARRAY_SIZE(expected)]; |
| 189 memset(dst, 0, sizeof(dst)); | 189 memset(dst, 0, sizeof(dst)); |
| 190 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 190 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
| 191 EXPECT(is_valid); | 191 EXPECT(is_valid); |
| 192 EXPECT(!memcmp(expected, dst, sizeof(expected))); | 192 EXPECT(!memcmp(expected, dst, sizeof(expected))); |
| 193 } | 193 } |
| 194 | 194 |
| 195 // 2.2.3 - 3 bytes (U-0000FFFF): "\xEF\xBF\xBF" | 195 // 2.2.3 - 3 bytes (U-0000FFFF): "\xEF\xBF\xBF" |
| 196 { | 196 { |
| 197 const char* src = "\xEF\xBF\xBF"; | 197 const char* src = "\xEF\xBF\xBF"; |
| 198 int32_t expected[] = { 0xFFFF }; | 198 int32_t expected[] = {0xFFFF}; |
| 199 int32_t dst[ARRAY_SIZE(expected)]; | 199 int32_t dst[ARRAY_SIZE(expected)]; |
| 200 memset(dst, 0, sizeof(dst)); | 200 memset(dst, 0, sizeof(dst)); |
| 201 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 201 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
| 202 EXPECT(is_valid); | 202 EXPECT(is_valid); |
| 203 EXPECT(!memcmp(expected, dst, sizeof(expected))); | 203 EXPECT(!memcmp(expected, dst, sizeof(expected))); |
| 204 } | 204 } |
| 205 | 205 |
| 206 // 2.2.4 - 4 bytes (U-001FFFFF): "\xF7\xBF\xBF\xBF" | 206 // 2.2.4 - 4 bytes (U-001FFFFF): "\xF7\xBF\xBF\xBF" |
| 207 { | 207 { |
| 208 const char* src = "\xF7\xBF\xBF\xBF"; | 208 const char* src = "\xF7\xBF\xBF\xBF"; |
| 209 int32_t expected[] = { 0x1FFFF }; | 209 int32_t expected[] = {0x1FFFF}; |
| 210 int32_t dst[ARRAY_SIZE(expected)]; | 210 int32_t dst[ARRAY_SIZE(expected)]; |
| 211 memset(dst, 0, sizeof(dst)); | 211 memset(dst, 0, sizeof(dst)); |
| 212 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 212 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
| 213 EXPECT(!is_valid); | 213 EXPECT(!is_valid); |
| 214 EXPECT(memcmp(expected, dst, sizeof(expected))); | 214 EXPECT(memcmp(expected, dst, sizeof(expected))); |
| 215 } | 215 } |
| 216 | 216 |
| 217 // 2.2.5 - 5 bytes (U-03FFFFFF): "\xFB\xBF\xBF\xBF\xBF" | 217 // 2.2.5 - 5 bytes (U-03FFFFFF): "\xFB\xBF\xBF\xBF\xBF" |
| 218 { | 218 { |
| 219 const char* src = "\xFB\xBF\xBF\xBF\xBF"; | 219 const char* src = "\xFB\xBF\xBF\xBF\xBF"; |
| 220 int32_t expected[] = { 0x3FFFFFF }; | 220 int32_t expected[] = {0x3FFFFFF}; |
| 221 int32_t dst[ARRAY_SIZE(expected)]; | 221 int32_t dst[ARRAY_SIZE(expected)]; |
| 222 memset(dst, 0, sizeof(dst)); | 222 memset(dst, 0, sizeof(dst)); |
| 223 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 223 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
| 224 EXPECT(!is_valid); | 224 EXPECT(!is_valid); |
| 225 EXPECT(memcmp(expected, dst, sizeof(expected))); | 225 EXPECT(memcmp(expected, dst, sizeof(expected))); |
| 226 } | 226 } |
| 227 | 227 |
| 228 // 2.2.6 - 6 bytes (U-7FFFFFFF): "\xFD\xBF\xBF\xBF\xBF\xBF" | 228 // 2.2.6 - 6 bytes (U-7FFFFFFF): "\xFD\xBF\xBF\xBF\xBF\xBF" |
| 229 { | 229 { |
| 230 const char* src = "\xFD\xBF\xBF\xBF\xBF\xBF"; | 230 const char* src = "\xFD\xBF\xBF\xBF\xBF\xBF"; |
| 231 int32_t expected[] = { 0x7FFFFFF }; | 231 int32_t expected[] = {0x7FFFFFF}; |
| 232 int32_t dst[ARRAY_SIZE(expected)]; | 232 int32_t dst[ARRAY_SIZE(expected)]; |
| 233 memset(dst, 0, sizeof(dst)); | 233 memset(dst, 0, sizeof(dst)); |
| 234 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 234 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
| 235 EXPECT(!is_valid); | 235 EXPECT(!is_valid); |
| 236 EXPECT(memcmp(expected, dst, sizeof(expected))); | 236 EXPECT(memcmp(expected, dst, sizeof(expected))); |
| 237 } | 237 } |
| 238 | 238 |
| 239 // 2.3 - Other boundary conditions | 239 // 2.3 - Other boundary conditions |
| 240 | 240 |
| 241 // 2.3.1 - U-0000D7FF = ed 9f bf = "\xED\x9F\xBF" | 241 // 2.3.1 - U-0000D7FF = ed 9f bf = "\xED\x9F\xBF" |
| 242 { | 242 { |
| 243 const char* src = "\xED\x9F\xBF"; | 243 const char* src = "\xED\x9F\xBF"; |
| 244 int32_t expected[] = { 0xD7FF }; | 244 int32_t expected[] = {0xD7FF}; |
| 245 int32_t dst[ARRAY_SIZE(expected)]; | 245 int32_t dst[ARRAY_SIZE(expected)]; |
| 246 memset(dst, 0, sizeof(dst)); | 246 memset(dst, 0, sizeof(dst)); |
| 247 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 247 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
| 248 EXPECT(is_valid); | 248 EXPECT(is_valid); |
| 249 EXPECT(!memcmp(expected, dst, sizeof(expected))); | 249 EXPECT(!memcmp(expected, dst, sizeof(expected))); |
| 250 } | 250 } |
| 251 | 251 |
| 252 // 2.3.2 - U-0000E000 = ee 80 80 = "\xEE\x80\x80" | 252 // 2.3.2 - U-0000E000 = ee 80 80 = "\xEE\x80\x80" |
| 253 { | 253 { |
| 254 const char* src = "\xEE\x80\x80"; | 254 const char* src = "\xEE\x80\x80"; |
| 255 int32_t expected[] = { 0xE000 }; | 255 int32_t expected[] = {0xE000}; |
| 256 int32_t dst[ARRAY_SIZE(expected)]; | 256 int32_t dst[ARRAY_SIZE(expected)]; |
| 257 memset(dst, 0, sizeof(dst)); | 257 memset(dst, 0, sizeof(dst)); |
| 258 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 258 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
| 259 EXPECT(is_valid); | 259 EXPECT(is_valid); |
| 260 EXPECT(!memcmp(expected, dst, sizeof(expected))); | 260 EXPECT(!memcmp(expected, dst, sizeof(expected))); |
| 261 } | 261 } |
| 262 | 262 |
| 263 // 2.3.3 - U-0000FFFD = ef bf bd = "\xEF\xBF\xBD" | 263 // 2.3.3 - U-0000FFFD = ef bf bd = "\xEF\xBF\xBD" |
| 264 { | 264 { |
| 265 const char* src = "\xEF\xBF\xBD"; | 265 const char* src = "\xEF\xBF\xBD"; |
| 266 int32_t expected[] = { 0xFFFD }; | 266 int32_t expected[] = {0xFFFD}; |
| 267 int32_t dst[ARRAY_SIZE(expected)]; | 267 int32_t dst[ARRAY_SIZE(expected)]; |
| 268 memset(dst, 0, sizeof(dst)); | 268 memset(dst, 0, sizeof(dst)); |
| 269 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 269 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
| 270 EXPECT(is_valid); | 270 EXPECT(is_valid); |
| 271 EXPECT(!memcmp(expected, dst, sizeof(expected))); | 271 EXPECT(!memcmp(expected, dst, sizeof(expected))); |
| 272 } | 272 } |
| 273 | 273 |
| 274 // 2.3.4 - U-0010FFFF = f4 8f bf bf = "\xF4\x8F\xBF\xBF" | 274 // 2.3.4 - U-0010FFFF = f4 8f bf bf = "\xF4\x8F\xBF\xBF" |
| 275 { | 275 { |
| 276 const char* src = "\xF4\x8F\xBF\xBF"; | 276 const char* src = "\xF4\x8F\xBF\xBF"; |
| 277 int32_t expected[] = { 0x10FFFF }; | 277 int32_t expected[] = {0x10FFFF}; |
| 278 int32_t dst[ARRAY_SIZE(expected)]; | 278 int32_t dst[ARRAY_SIZE(expected)]; |
| 279 memset(dst, 0, sizeof(dst)); | 279 memset(dst, 0, sizeof(dst)); |
| 280 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 280 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
| 281 EXPECT(is_valid); | 281 EXPECT(is_valid); |
| 282 EXPECT(!memcmp(expected, dst, sizeof(expected))); | 282 EXPECT(!memcmp(expected, dst, sizeof(expected))); |
| 283 } | 283 } |
| 284 | 284 |
| 285 // 2.3.5 - U-00110000 = f4 90 80 80 = "\xF4\x90\x80\x80" | 285 // 2.3.5 - U-00110000 = f4 90 80 80 = "\xF4\x90\x80\x80" |
| 286 { | 286 { |
| 287 const char* src = "\xF4\x90\x80\x80"; | 287 const char* src = "\xF4\x90\x80\x80"; |
| 288 int32_t expected[] = { 0x110000 }; | 288 int32_t expected[] = {0x110000}; |
| 289 int32_t dst[ARRAY_SIZE(expected)]; | 289 int32_t dst[ARRAY_SIZE(expected)]; |
| 290 memset(dst, 0, sizeof(dst)); | 290 memset(dst, 0, sizeof(dst)); |
| 291 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 291 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
| 292 EXPECT(!is_valid); | 292 EXPECT(!is_valid); |
| 293 EXPECT(memcmp(expected, dst, sizeof(expected))); | 293 EXPECT(memcmp(expected, dst, sizeof(expected))); |
| 294 } | 294 } |
| 295 | 295 |
| 296 // 3 - Malformed sequences | 296 // 3 - Malformed sequences |
| 297 | 297 |
| 298 // 3.1 - Unexpected continuation bytes | 298 // 3.1 - Unexpected continuation bytes |
| 299 | 299 |
| 300 // 3.1.1 - First continuation byte 0x80: "\x80" | 300 // 3.1.1 - First continuation byte 0x80: "\x80" |
| 301 { | 301 { |
| 302 const char* src = "\x80"; | 302 const char* src = "\x80"; |
| 303 int32_t expected[] = { 0x80 }; | 303 int32_t expected[] = {0x80}; |
| 304 int32_t dst[ARRAY_SIZE(expected)]; | 304 int32_t dst[ARRAY_SIZE(expected)]; |
| 305 memset(dst, 0, sizeof(dst)); | 305 memset(dst, 0, sizeof(dst)); |
| 306 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 306 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
| 307 EXPECT(!is_valid); | 307 EXPECT(!is_valid); |
| 308 EXPECT(memcmp(expected, dst, sizeof(expected))); | 308 EXPECT(memcmp(expected, dst, sizeof(expected))); |
| 309 } | 309 } |
| 310 | 310 |
| 311 // 3.1.2 - Last continuation byte 0xbf: "\xBF" | 311 // 3.1.2 - Last continuation byte 0xbf: "\xBF" |
| 312 { | 312 { |
| 313 const char* src = "\xBF"; | 313 const char* src = "\xBF"; |
| 314 int32_t expected[] = { 0xBF }; | 314 int32_t expected[] = {0xBF}; |
| 315 int32_t dst[ARRAY_SIZE(expected)]; | 315 int32_t dst[ARRAY_SIZE(expected)]; |
| 316 memset(dst, 0, sizeof(dst)); | 316 memset(dst, 0, sizeof(dst)); |
| 317 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 317 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
| 318 EXPECT(!is_valid); | 318 EXPECT(!is_valid); |
| 319 EXPECT(memcmp(expected, dst, sizeof(expected))); | 319 EXPECT(memcmp(expected, dst, sizeof(expected))); |
| 320 } | 320 } |
| 321 | 321 |
| 322 // 3.1.3 - 2 continuation bytes: "\x80\xBF" | 322 // 3.1.3 - 2 continuation bytes: "\x80\xBF" |
| 323 { | 323 { |
| 324 const char* src = "\x80\xBF"; | 324 const char* src = "\x80\xBF"; |
| 325 int32_t expected[] = { 0x80, 0xBF }; | 325 int32_t expected[] = {0x80, 0xBF}; |
| 326 int32_t dst[ARRAY_SIZE(expected)]; | 326 int32_t dst[ARRAY_SIZE(expected)]; |
| 327 memset(dst, 0, sizeof(dst)); | 327 memset(dst, 0, sizeof(dst)); |
| 328 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 328 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
| 329 EXPECT(!is_valid); | 329 EXPECT(!is_valid); |
| 330 EXPECT(memcmp(expected, dst, sizeof(expected))); | 330 EXPECT(memcmp(expected, dst, sizeof(expected))); |
| 331 } | 331 } |
| 332 | 332 |
| 333 // 3.1.4 - 3 continuation bytes: "\x80\xBF\x80" | 333 // 3.1.4 - 3 continuation bytes: "\x80\xBF\x80" |
| 334 { | 334 { |
| 335 const char* src = "\x80\xBF\x80"; | 335 const char* src = "\x80\xBF\x80"; |
| 336 int32_t expected[] = { 0x80, 0xBF, 0x80 }; | 336 int32_t expected[] = {0x80, 0xBF, 0x80}; |
| 337 int32_t dst[ARRAY_SIZE(expected)]; | 337 int32_t dst[ARRAY_SIZE(expected)]; |
| 338 memset(dst, 0, sizeof(dst)); | 338 memset(dst, 0, sizeof(dst)); |
| 339 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 339 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
| 340 EXPECT(!is_valid); | 340 EXPECT(!is_valid); |
| 341 EXPECT(memcmp(expected, dst, sizeof(expected))); | 341 EXPECT(memcmp(expected, dst, sizeof(expected))); |
| 342 } | 342 } |
| 343 | 343 |
| 344 // 3.1.5 - 4 continuation bytes: "\x80\xBF\x80\xBF" | 344 // 3.1.5 - 4 continuation bytes: "\x80\xBF\x80\xBF" |
| 345 { | 345 { |
| 346 const char* src = "\x80\xBF\x80\xBF"; | 346 const char* src = "\x80\xBF\x80\xBF"; |
| 347 int32_t expected[] = { 0x80, 0xBF, 0x80, 0xBF }; | 347 int32_t expected[] = {0x80, 0xBF, 0x80, 0xBF}; |
| 348 int32_t dst[ARRAY_SIZE(expected)]; | 348 int32_t dst[ARRAY_SIZE(expected)]; |
| 349 memset(dst, 0, sizeof(dst)); | 349 memset(dst, 0, sizeof(dst)); |
| 350 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 350 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
| 351 EXPECT(!is_valid); | 351 EXPECT(!is_valid); |
| 352 EXPECT(memcmp(expected, dst, sizeof(expected))); | 352 EXPECT(memcmp(expected, dst, sizeof(expected))); |
| 353 } | 353 } |
| 354 | 354 |
| 355 // 3.1.6 - 5 continuation bytes: "\x80\xBF\x80\xBF\x80" | 355 // 3.1.6 - 5 continuation bytes: "\x80\xBF\x80\xBF\x80" |
| 356 { | 356 { |
| 357 const char* src = "\x80\xBF\x80\xBF\x80"; | 357 const char* src = "\x80\xBF\x80\xBF\x80"; |
| 358 int32_t expected[] = { 0x80, 0xBF, 0x80, 0xBF, 0x80 }; | 358 int32_t expected[] = {0x80, 0xBF, 0x80, 0xBF, 0x80}; |
| 359 int32_t dst[ARRAY_SIZE(expected)]; | 359 int32_t dst[ARRAY_SIZE(expected)]; |
| 360 memset(dst, 0, sizeof(dst)); | 360 memset(dst, 0, sizeof(dst)); |
| 361 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 361 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
| 362 EXPECT(!is_valid); | 362 EXPECT(!is_valid); |
| 363 EXPECT(memcmp(expected, dst, sizeof(expected))); | 363 EXPECT(memcmp(expected, dst, sizeof(expected))); |
| 364 } | 364 } |
| 365 | 365 |
| 366 // 3.1.7 - 6 continuation bytes: "\x80\xBF\x80\xBF\x80\xBF" | 366 // 3.1.7 - 6 continuation bytes: "\x80\xBF\x80\xBF\x80\xBF" |
| 367 { | 367 { |
| 368 const char* src = "\x80\xBF\x80\xBF\x80\xBF"; | 368 const char* src = "\x80\xBF\x80\xBF\x80\xBF"; |
| 369 int32_t expected[] = { 0x80, 0xBF, 0x80, 0xBF, 0x80, 0xBF }; | 369 int32_t expected[] = {0x80, 0xBF, 0x80, 0xBF, 0x80, 0xBF}; |
| 370 int32_t dst[ARRAY_SIZE(expected)]; | 370 int32_t dst[ARRAY_SIZE(expected)]; |
| 371 memset(dst, 0, sizeof(dst)); | 371 memset(dst, 0, sizeof(dst)); |
| 372 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 372 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
| 373 EXPECT(!is_valid); | 373 EXPECT(!is_valid); |
| 374 EXPECT(memcmp(expected, dst, sizeof(expected))); | 374 EXPECT(memcmp(expected, dst, sizeof(expected))); |
| 375 } | 375 } |
| 376 | 376 |
| 377 // 3.1.8 - 7 continuation bytes: "\x80\xBF\x80\xBF\x80\xBF\x80" | 377 // 3.1.8 - 7 continuation bytes: "\x80\xBF\x80\xBF\x80\xBF\x80" |
| 378 { | 378 { |
| 379 const char* src = "\x80\xBF\x80\xBF\x80\xBF\x80"; | 379 const char* src = "\x80\xBF\x80\xBF\x80\xBF\x80"; |
| 380 int32_t expected[] = { 0x80, 0xBF, 0x80, 0xBF, 0x80, 0xBF, 0x80 }; | 380 int32_t expected[] = {0x80, 0xBF, 0x80, 0xBF, 0x80, 0xBF, 0x80}; |
| 381 int32_t dst[ARRAY_SIZE(expected)]; | 381 int32_t dst[ARRAY_SIZE(expected)]; |
| 382 memset(dst, 0, sizeof(dst)); | 382 memset(dst, 0, sizeof(dst)); |
| 383 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 383 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
| 384 EXPECT(!is_valid); | 384 EXPECT(!is_valid); |
| 385 EXPECT(memcmp(expected, dst, sizeof(expected))); | 385 EXPECT(memcmp(expected, dst, sizeof(expected))); |
| 386 } | 386 } |
| 387 | 387 |
| 388 // 3.1.9 - Sequence of all 64 possible continuation bytes (0x80-0xbf): | 388 // 3.1.9 - Sequence of all 64 possible continuation bytes (0x80-0xbf): |
| 389 { | 389 { |
| 390 const char* src = "\x80\x81\x82\x83\x84\x85\x86\x87" | 390 const char* src = |
| 391 "\x88\x89\x8A\x8B\x8C\x8D\x8E\x8F" | 391 "\x80\x81\x82\x83\x84\x85\x86\x87" |
| 392 "\x90\x91\x92\x93\x94\x95\x96\x97" | 392 "\x88\x89\x8A\x8B\x8C\x8D\x8E\x8F" |
| 393 "\x98\x99\x9A\x9B\x9C\x9D\x9E\x9F" | 393 "\x90\x91\x92\x93\x94\x95\x96\x97" |
| 394 "\xA0\xA1\xA2\xA3\xA4\xA5\xA6\xA7" | 394 "\x98\x99\x9A\x9B\x9C\x9D\x9E\x9F" |
| 395 "\xA8\xA9\xAA\xAB\xAC\xAD\xAE\xAF" | 395 "\xA0\xA1\xA2\xA3\xA4\xA5\xA6\xA7" |
| 396 "\xB0\xB1\xB2\xB3\xB4\xB5\xB6\xB7" | 396 "\xA8\xA9\xAA\xAB\xAC\xAD\xAE\xAF" |
| 397 "\xB8\xB9\xBA\xBB\xBC\xBD\xBE\xBF"; | 397 "\xB0\xB1\xB2\xB3\xB4\xB5\xB6\xB7" |
| 398 int32_t expected[] = { 0x0 }; | 398 "\xB8\xB9\xBA\xBB\xBC\xBD\xBE\xBF"; |
| 399 int32_t expected[] = {0x0}; |
| 399 int32_t dst[ARRAY_SIZE(expected)]; | 400 int32_t dst[ARRAY_SIZE(expected)]; |
| 400 for (size_t i = 0; i < strlen(src); ++i) { | 401 for (size_t i = 0; i < strlen(src); ++i) { |
| 401 memset(dst, 0xFF, sizeof(dst)); | 402 memset(dst, 0xFF, sizeof(dst)); |
| 402 bool is_valid = Utf8::DecodeCStringToUTF32(&src[i], dst, ARRAY_SIZE(dst)); | 403 bool is_valid = Utf8::DecodeCStringToUTF32(&src[i], dst, ARRAY_SIZE(dst)); |
| 403 EXPECT(!is_valid); | 404 EXPECT(!is_valid); |
| 404 EXPECT(memcmp(expected, dst, sizeof(expected))); | 405 EXPECT(memcmp(expected, dst, sizeof(expected))); |
| 405 } | 406 } |
| 406 } | 407 } |
| 407 | 408 |
| 408 // 3.2 - Lonely start character | 409 // 3.2 - Lonely start character |
| 409 | 410 |
| 410 // 3.2.1 - All 32 first bytes of 2-byte sequences (0xc0-0xdf), each | 411 // 3.2.1 - All 32 first bytes of 2-byte sequences (0xc0-0xdf), each |
| 411 // followed by a space character: | 412 // followed by a space character: |
| 412 { | 413 { |
| 413 const char* src = "\xC0\x20\xC1\x20\xC2\x20\xC3\x20" | 414 const char* src = |
| 414 "\xC4\x20\xC5\x20\xC6\x20\xC7\x20" | 415 "\xC0\x20\xC1\x20\xC2\x20\xC3\x20" |
| 415 "\xC8\x20\xC9\x20\xCA\x20\xCB\x20" | 416 "\xC4\x20\xC5\x20\xC6\x20\xC7\x20" |
| 416 "\xCC\x20\xCD\x20\xCE\x20\xCF\x20" | 417 "\xC8\x20\xC9\x20\xCA\x20\xCB\x20" |
| 417 "\xD0\x20\xD1\x20\xD2\x20\xD3\x20" | 418 "\xCC\x20\xCD\x20\xCE\x20\xCF\x20" |
| 418 "\xD4\x20\xD5\x20\xD6\x20\xD7\x20" | 419 "\xD0\x20\xD1\x20\xD2\x20\xD3\x20" |
| 419 "\xD8\x20\xD9\x20\xDA\x20\xDB\x20" | 420 "\xD4\x20\xD5\x20\xD6\x20\xD7\x20" |
| 420 "\xDC\x20\xDD\x20\xDE\x20\xDF\x20"; | 421 "\xD8\x20\xD9\x20\xDA\x20\xDB\x20" |
| 421 int32_t expected[] = { 0x0 }; | 422 "\xDC\x20\xDD\x20\xDE\x20\xDF\x20"; |
| 423 int32_t expected[] = {0x0}; |
| 422 int32_t dst[ARRAY_SIZE(expected)]; | 424 int32_t dst[ARRAY_SIZE(expected)]; |
| 423 for (size_t i = 0; i < strlen(src); i += 2) { | 425 for (size_t i = 0; i < strlen(src); i += 2) { |
| 424 memset(dst, 0xFF, sizeof(dst)); | 426 memset(dst, 0xFF, sizeof(dst)); |
| 425 bool is_valid = Utf8::DecodeCStringToUTF32(&src[i], dst, ARRAY_SIZE(dst)); | 427 bool is_valid = Utf8::DecodeCStringToUTF32(&src[i], dst, ARRAY_SIZE(dst)); |
| 426 EXPECT(!is_valid); | 428 EXPECT(!is_valid); |
| 427 EXPECT(memcmp(expected, dst, sizeof(expected))); | 429 EXPECT(memcmp(expected, dst, sizeof(expected))); |
| 428 } | 430 } |
| 429 } | 431 } |
| 430 | 432 |
| 431 // 3.2.2 - All 16 first bytes of 3-byte sequences (0xe0-0xef), each | 433 // 3.2.2 - All 16 first bytes of 3-byte sequences (0xe0-0xef), each |
| 432 // followed by a space character: | 434 // followed by a space character: |
| 433 { | 435 { |
| 434 const char* src = "\xE0\x20\xE1\x20\xE2\x20\xE3\x20" | 436 const char* src = |
| 435 "\xE4\x20\xE5\x20\xE6\x20\xE7\x20" | 437 "\xE0\x20\xE1\x20\xE2\x20\xE3\x20" |
| 436 "\xE8\x20\xE9\x20\xEA\x20\xEB\x20" | 438 "\xE4\x20\xE5\x20\xE6\x20\xE7\x20" |
| 437 "\xEC\x20\xED\x20\xEE\x20\xEF\x20"; | 439 "\xE8\x20\xE9\x20\xEA\x20\xEB\x20" |
| 438 int32_t expected[] = { 0x0 }; | 440 "\xEC\x20\xED\x20\xEE\x20\xEF\x20"; |
| 441 int32_t expected[] = {0x0}; |
| 439 int32_t dst[ARRAY_SIZE(expected)]; | 442 int32_t dst[ARRAY_SIZE(expected)]; |
| 440 for (size_t i = 0; i < strlen(src); i += 2) { | 443 for (size_t i = 0; i < strlen(src); i += 2) { |
| 441 memset(dst, 0xFF, sizeof(dst)); | 444 memset(dst, 0xFF, sizeof(dst)); |
| 442 bool is_valid = Utf8::DecodeCStringToUTF32(&src[i], dst, ARRAY_SIZE(dst)); | 445 bool is_valid = Utf8::DecodeCStringToUTF32(&src[i], dst, ARRAY_SIZE(dst)); |
| 443 EXPECT(!is_valid); | 446 EXPECT(!is_valid); |
| 444 EXPECT(memcmp(expected, dst, sizeof(expected))); | 447 EXPECT(memcmp(expected, dst, sizeof(expected))); |
| 445 } | 448 } |
| 446 } | 449 } |
| 447 | 450 |
| 448 // 3.2.3 - All 8 first bytes of 4-byte sequences (0xf0-0xf7), each | 451 // 3.2.3 - All 8 first bytes of 4-byte sequences (0xf0-0xf7), each |
| 449 // followed by a space character: | 452 // followed by a space character: |
| 450 { | 453 { |
| 451 const char* src = "\xF0\x20\xF1\x20\xF2\x20\xF3\x20" | 454 const char* src = |
| 452 "\xF4\x20\xF5\x20\xF6\x20\xF7\x20"; | 455 "\xF0\x20\xF1\x20\xF2\x20\xF3\x20" |
| 453 int32_t expected[] = { 0x0 }; | 456 "\xF4\x20\xF5\x20\xF6\x20\xF7\x20"; |
| 457 int32_t expected[] = {0x0}; |
| 454 int32_t dst[ARRAY_SIZE(expected)]; | 458 int32_t dst[ARRAY_SIZE(expected)]; |
| 455 for (size_t i = 0; i < strlen(src); i += 2) { | 459 for (size_t i = 0; i < strlen(src); i += 2) { |
| 456 memset(dst, 0xFF, sizeof(dst)); | 460 memset(dst, 0xFF, sizeof(dst)); |
| 457 bool is_valid = Utf8::DecodeCStringToUTF32(&src[i], dst, ARRAY_SIZE(dst)); | 461 bool is_valid = Utf8::DecodeCStringToUTF32(&src[i], dst, ARRAY_SIZE(dst)); |
| 458 EXPECT(!is_valid); | 462 EXPECT(!is_valid); |
| 459 EXPECT(memcmp(expected, dst, sizeof(expected))); | 463 EXPECT(memcmp(expected, dst, sizeof(expected))); |
| 460 } | 464 } |
| 461 } | 465 } |
| 462 | 466 |
| 463 // 3.2.4 - All 4 first bytes of 5-byte sequences (0xf8-0xfb), each | 467 // 3.2.4 - All 4 first bytes of 5-byte sequences (0xf8-0xfb), each |
| 464 // followed by a space character: | 468 // followed by a space character: |
| 465 { | 469 { |
| 466 const char* src = "\xF8\x20\xF9\x20\xFA\x20\xFB\x20"; | 470 const char* src = "\xF8\x20\xF9\x20\xFA\x20\xFB\x20"; |
| 467 int32_t expected[] = { 0x0 }; | 471 int32_t expected[] = {0x0}; |
| 468 int32_t dst[ARRAY_SIZE(expected)]; | 472 int32_t dst[ARRAY_SIZE(expected)]; |
| 469 for (size_t i = 0; i < strlen(src); i += 2) { | 473 for (size_t i = 0; i < strlen(src); i += 2) { |
| 470 memset(dst, 0xFF, sizeof(dst)); | 474 memset(dst, 0xFF, sizeof(dst)); |
| 471 bool is_valid = Utf8::DecodeCStringToUTF32(&src[i], dst, ARRAY_SIZE(dst)); | 475 bool is_valid = Utf8::DecodeCStringToUTF32(&src[i], dst, ARRAY_SIZE(dst)); |
| 472 EXPECT(!is_valid); | 476 EXPECT(!is_valid); |
| 473 EXPECT(memcmp(expected, dst, sizeof(expected))); | 477 EXPECT(memcmp(expected, dst, sizeof(expected))); |
| 474 } | 478 } |
| 475 } | 479 } |
| 476 | 480 |
| 477 // 3.2.5 - All 2 first bytes of 6-byte sequences (0xfc-0xfd), each | 481 // 3.2.5 - All 2 first bytes of 6-byte sequences (0xfc-0xfd), each |
| 478 // followed by a space character: | 482 // followed by a space character: |
| 479 { | 483 { |
| 480 const char* src = "\xFC\x20\xFD\x20"; | 484 const char* src = "\xFC\x20\xFD\x20"; |
| 481 int32_t expected[] = { 0x0 }; | 485 int32_t expected[] = {0x0}; |
| 482 int32_t dst[ARRAY_SIZE(expected)]; | 486 int32_t dst[ARRAY_SIZE(expected)]; |
| 483 for (size_t i = 0; i < strlen(src); i += 2) { | 487 for (size_t i = 0; i < strlen(src); i += 2) { |
| 484 memset(dst, 0xFF, sizeof(dst)); | 488 memset(dst, 0xFF, sizeof(dst)); |
| 485 bool is_valid = Utf8::DecodeCStringToUTF32(&src[i], dst, ARRAY_SIZE(dst)); | 489 bool is_valid = Utf8::DecodeCStringToUTF32(&src[i], dst, ARRAY_SIZE(dst)); |
| 486 EXPECT(!is_valid); | 490 EXPECT(!is_valid); |
| 487 EXPECT(memcmp(expected, dst, sizeof(expected))); | 491 EXPECT(memcmp(expected, dst, sizeof(expected))); |
| 488 } | 492 } |
| 489 } | 493 } |
| 490 | 494 |
| 491 // 3.3 - Sequences with last continuation byte missing | 495 // 3.3 - Sequences with last continuation byte missing |
| 492 | 496 |
| 493 // 3.3.1 - 2-byte sequence with last byte missing (U+0000): "\xC0" | 497 // 3.3.1 - 2-byte sequence with last byte missing (U+0000): "\xC0" |
| 494 { | 498 { |
| 495 const char* src = "\xC0"; | 499 const char* src = "\xC0"; |
| 496 int32_t expected[] = { 0x0 }; | 500 int32_t expected[] = {0x0}; |
| 497 int32_t dst[ARRAY_SIZE(expected)]; | 501 int32_t dst[ARRAY_SIZE(expected)]; |
| 498 memset(dst, 0xFF, sizeof(dst)); | 502 memset(dst, 0xFF, sizeof(dst)); |
| 499 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 503 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
| 500 EXPECT(!is_valid); | 504 EXPECT(!is_valid); |
| 501 EXPECT(memcmp(expected, dst, sizeof(expected))); | 505 EXPECT(memcmp(expected, dst, sizeof(expected))); |
| 502 } | 506 } |
| 503 | 507 |
| 504 // 3.3.2 - 3-byte sequence with last byte missing (U+0000): "\xE0\x80" | 508 // 3.3.2 - 3-byte sequence with last byte missing (U+0000): "\xE0\x80" |
| 505 { | 509 { |
| 506 const char* src = "\xE0\x80"; | 510 const char* src = "\xE0\x80"; |
| 507 int32_t expected[] = { 0x0 }; | 511 int32_t expected[] = {0x0}; |
| 508 int32_t dst[ARRAY_SIZE(expected)]; | 512 int32_t dst[ARRAY_SIZE(expected)]; |
| 509 memset(dst, 0xFF, sizeof(dst)); | 513 memset(dst, 0xFF, sizeof(dst)); |
| 510 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 514 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
| 511 EXPECT(!is_valid); | 515 EXPECT(!is_valid); |
| 512 EXPECT(memcmp(expected, dst, sizeof(expected))); | 516 EXPECT(memcmp(expected, dst, sizeof(expected))); |
| 513 } | 517 } |
| 514 | 518 |
| 515 // 3.3.3 - 4-byte sequence with last byte missing (U+0000): "\xF0\x80\x80" | 519 // 3.3.3 - 4-byte sequence with last byte missing (U+0000): "\xF0\x80\x80" |
| 516 { | 520 { |
| 517 const char* src = "\xF0\x80\x80"; | 521 const char* src = "\xF0\x80\x80"; |
| 518 int32_t expected[] = { 0x0 }; | 522 int32_t expected[] = {0x0}; |
| 519 int32_t dst[ARRAY_SIZE(expected)]; | 523 int32_t dst[ARRAY_SIZE(expected)]; |
| 520 memset(dst, 0xFF, sizeof(dst)); | 524 memset(dst, 0xFF, sizeof(dst)); |
| 521 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 525 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
| 522 EXPECT(!is_valid); | 526 EXPECT(!is_valid); |
| 523 EXPECT(memcmp(expected, dst, sizeof(expected))); | 527 EXPECT(memcmp(expected, dst, sizeof(expected))); |
| 524 } | 528 } |
| 525 | 529 |
| 526 // 3.3.4 - 5-byte sequence with last byte missing (U+0000): "\xF8\x80\x80\x80" | 530 // 3.3.4 - 5-byte sequence with last byte missing (U+0000): "\xF8\x80\x80\x80" |
| 527 { | 531 { |
| 528 const char* src = "\xF8\x80\x80\x80"; | 532 const char* src = "\xF8\x80\x80\x80"; |
| 529 int32_t expected[] = { 0x0 }; | 533 int32_t expected[] = {0x0}; |
| 530 int32_t dst[ARRAY_SIZE(expected)]; | 534 int32_t dst[ARRAY_SIZE(expected)]; |
| 531 memset(dst, 0xFF, sizeof(dst)); | 535 memset(dst, 0xFF, sizeof(dst)); |
| 532 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 536 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
| 533 EXPECT(!is_valid); | 537 EXPECT(!is_valid); |
| 534 EXPECT(memcmp(expected, dst, sizeof(expected))); | 538 EXPECT(memcmp(expected, dst, sizeof(expected))); |
| 535 } | 539 } |
| 536 | 540 |
| 537 // 3.3.5 - 6-byte sequence with last byte missing (U+0000): | 541 // 3.3.5 - 6-byte sequence with last byte missing (U+0000): |
| 538 // "\xFC\x80\x80\x80\x80" | 542 // "\xFC\x80\x80\x80\x80" |
| 539 { | 543 { |
| 540 const char* src = "\xFC\x80\x80\x80\x80"; | 544 const char* src = "\xFC\x80\x80\x80\x80"; |
| 541 int32_t expected[] = { 0x0 }; | 545 int32_t expected[] = {0x0}; |
| 542 int32_t dst[ARRAY_SIZE(expected)]; | 546 int32_t dst[ARRAY_SIZE(expected)]; |
| 543 memset(dst, 0xFF, sizeof(dst)); | 547 memset(dst, 0xFF, sizeof(dst)); |
| 544 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 548 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
| 545 EXPECT(!is_valid); | 549 EXPECT(!is_valid); |
| 546 EXPECT(memcmp(expected, dst, sizeof(expected))); | 550 EXPECT(memcmp(expected, dst, sizeof(expected))); |
| 547 } | 551 } |
| 548 | 552 |
| 549 // 3.3.6 - 2-byte sequence with last byte missing (U-000007FF): "\xDF" | 553 // 3.3.6 - 2-byte sequence with last byte missing (U-000007FF): "\xDF" |
| 550 { | 554 { |
| 551 const char* src = "\xDF"; | 555 const char* src = "\xDF"; |
| 552 int32_t expected[] = { 0x0 }; | 556 int32_t expected[] = {0x0}; |
| 553 int32_t dst[ARRAY_SIZE(expected)]; | 557 int32_t dst[ARRAY_SIZE(expected)]; |
| 554 memset(dst, 0xFF, sizeof(dst)); | 558 memset(dst, 0xFF, sizeof(dst)); |
| 555 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 559 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
| 556 EXPECT(!is_valid); | 560 EXPECT(!is_valid); |
| 557 EXPECT(memcmp(expected, dst, sizeof(expected))); | 561 EXPECT(memcmp(expected, dst, sizeof(expected))); |
| 558 } | 562 } |
| 559 | 563 |
| 560 // 3.3.7 - 3-byte sequence with last byte missing (U-0000FFFF): "\xEF\xBF" | 564 // 3.3.7 - 3-byte sequence with last byte missing (U-0000FFFF): "\xEF\xBF" |
| 561 { | 565 { |
| 562 const char* src = "\xEF\xBF"; | 566 const char* src = "\xEF\xBF"; |
| 563 int32_t expected[] = { 0x0 }; | 567 int32_t expected[] = {0x0}; |
| 564 int32_t dst[ARRAY_SIZE(expected)]; | 568 int32_t dst[ARRAY_SIZE(expected)]; |
| 565 memset(dst, 0xFF, sizeof(dst)); | 569 memset(dst, 0xFF, sizeof(dst)); |
| 566 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 570 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
| 567 EXPECT(!is_valid); | 571 EXPECT(!is_valid); |
| 568 EXPECT(memcmp(expected, dst, sizeof(expected))); | 572 EXPECT(memcmp(expected, dst, sizeof(expected))); |
| 569 } | 573 } |
| 570 | 574 |
| 571 // 3.3.8 - 4-byte sequence with last byte missing (U-001FFFFF): "\xF7\xBF\xBF" | 575 // 3.3.8 - 4-byte sequence with last byte missing (U-001FFFFF): "\xF7\xBF\xBF" |
| 572 { | 576 { |
| 573 const char* src = "\xF7\xBF\xBF"; | 577 const char* src = "\xF7\xBF\xBF"; |
| 574 int32_t expected[] = { 0x0 }; | 578 int32_t expected[] = {0x0}; |
| 575 int32_t dst[ARRAY_SIZE(expected)]; | 579 int32_t dst[ARRAY_SIZE(expected)]; |
| 576 memset(dst, 0xFF, sizeof(dst)); | 580 memset(dst, 0xFF, sizeof(dst)); |
| 577 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 581 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
| 578 EXPECT(!is_valid); | 582 EXPECT(!is_valid); |
| 579 EXPECT(memcmp(expected, dst, sizeof(expected))); | 583 EXPECT(memcmp(expected, dst, sizeof(expected))); |
| 580 } | 584 } |
| 581 | 585 |
| 582 // 3.3.9 - 5-byte sequence with last byte missing (U-03FFFFFF): | 586 // 3.3.9 - 5-byte sequence with last byte missing (U-03FFFFFF): |
| 583 // "\xFB\xBF\xBF\xBF" | 587 // "\xFB\xBF\xBF\xBF" |
| 584 { | 588 { |
| 585 const char* src = "\xFB\xBF\xBF\xBF"; | 589 const char* src = "\xFB\xBF\xBF\xBF"; |
| 586 int32_t expected[] = { 0x0 }; | 590 int32_t expected[] = {0x0}; |
| 587 int32_t dst[ARRAY_SIZE(expected)]; | 591 int32_t dst[ARRAY_SIZE(expected)]; |
| 588 memset(dst, 0xFF, sizeof(dst)); | 592 memset(dst, 0xFF, sizeof(dst)); |
| 589 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 593 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
| 590 EXPECT(!is_valid); | 594 EXPECT(!is_valid); |
| 591 EXPECT(memcmp(expected, dst, sizeof(expected))); | 595 EXPECT(memcmp(expected, dst, sizeof(expected))); |
| 592 } | 596 } |
| 593 | 597 |
| 594 // 3.3.10 - 6-byte sequence with last byte missing (U-7FFFFFFF): | 598 // 3.3.10 - 6-byte sequence with last byte missing (U-7FFFFFFF): |
| 595 // "\xFD\xBF\xBF\xBF\xBF" | 599 // "\xFD\xBF\xBF\xBF\xBF" |
| 596 { | 600 { |
| 597 const char* src = "\xFD\xBF\xBF\xBF\xBF"; | 601 const char* src = "\xFD\xBF\xBF\xBF\xBF"; |
| 598 int32_t expected[] = { 0x0 }; | 602 int32_t expected[] = {0x0}; |
| 599 int32_t dst[ARRAY_SIZE(expected)]; | 603 int32_t dst[ARRAY_SIZE(expected)]; |
| 600 memset(dst, 0xFF, sizeof(dst)); | 604 memset(dst, 0xFF, sizeof(dst)); |
| 601 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 605 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
| 602 EXPECT(!is_valid); | 606 EXPECT(!is_valid); |
| 603 EXPECT(memcmp(expected, dst, sizeof(expected))); | 607 EXPECT(memcmp(expected, dst, sizeof(expected))); |
| 604 } | 608 } |
| 605 | 609 |
| 606 // 3.4 - Concatenation of incomplete sequences | 610 // 3.4 - Concatenation of incomplete sequences |
| 607 { | 611 { |
| 608 const char* src = "\xC0\xE0\x80\xF0\x80\x80" | 612 const char* src = |
| 609 "\xF8\x80\x80\x80\xFC\x80" | 613 "\xC0\xE0\x80\xF0\x80\x80" |
| 610 "\x80\x80\x80\xDF\xEF\xBF" | 614 "\xF8\x80\x80\x80\xFC\x80" |
| 611 "\xF7\xBF\xBF\xFB\xBF\xBF" | 615 "\x80\x80\x80\xDF\xEF\xBF" |
| 612 "\xBF\xFD\xBF\xBF\xBF\xBF"; | 616 "\xF7\xBF\xBF\xFB\xBF\xBF" |
| 613 int32_t expected[] = { 0x0 }; | 617 "\xBF\xFD\xBF\xBF\xBF\xBF"; |
| 618 int32_t expected[] = {0x0}; |
| 614 int32_t dst[ARRAY_SIZE(expected)]; | 619 int32_t dst[ARRAY_SIZE(expected)]; |
| 615 for (size_t i = 0; i < strlen(src); ++i) { | 620 for (size_t i = 0; i < strlen(src); ++i) { |
| 616 for (size_t j = 1; j < (strlen(src) - i); ++j) { | 621 for (size_t j = 1; j < (strlen(src) - i); ++j) { |
| 617 memset(dst, 0xFF, sizeof(dst)); | 622 memset(dst, 0xFF, sizeof(dst)); |
| 618 bool is_valid = Utf8::DecodeCStringToUTF32(&src[i], | 623 bool is_valid = |
| 619 dst, ARRAY_SIZE(dst)); | 624 Utf8::DecodeCStringToUTF32(&src[i], dst, ARRAY_SIZE(dst)); |
| 620 EXPECT(!is_valid); | 625 EXPECT(!is_valid); |
| 621 EXPECT(memcmp(expected, dst, sizeof(expected))); | 626 EXPECT(memcmp(expected, dst, sizeof(expected))); |
| 622 } | 627 } |
| 623 } | 628 } |
| 624 } | 629 } |
| 625 | 630 |
| 626 // 3.5 - Impossible bytes | 631 // 3.5 - Impossible bytes |
| 627 | 632 |
| 628 // 3.5.1 - fe = "\xFE" | 633 // 3.5.1 - fe = "\xFE" |
| 629 { | 634 { |
| 630 const char* src = "\xFE"; | 635 const char* src = "\xFE"; |
| 631 int32_t expected[] = { 0xFE }; | 636 int32_t expected[] = {0xFE}; |
| 632 int32_t dst[ARRAY_SIZE(expected)]; | 637 int32_t dst[ARRAY_SIZE(expected)]; |
| 633 memset(dst, 0, sizeof(dst)); | 638 memset(dst, 0, sizeof(dst)); |
| 634 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 639 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
| 635 EXPECT(!is_valid); | 640 EXPECT(!is_valid); |
| 636 EXPECT(memcmp(expected, dst, sizeof(expected))); | 641 EXPECT(memcmp(expected, dst, sizeof(expected))); |
| 637 } | 642 } |
| 638 | 643 |
| 639 // 3.5.2 - ff = "\xFF" | 644 // 3.5.2 - ff = "\xFF" |
| 640 { | 645 { |
| 641 const char* src = "\xFF"; | 646 const char* src = "\xFF"; |
| 642 int32_t expected[] = { 0xFF }; | 647 int32_t expected[] = {0xFF}; |
| 643 int32_t dst[ARRAY_SIZE(expected)]; | 648 int32_t dst[ARRAY_SIZE(expected)]; |
| 644 memset(dst, 0, sizeof(dst)); | 649 memset(dst, 0, sizeof(dst)); |
| 645 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 650 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
| 646 EXPECT(!is_valid); | 651 EXPECT(!is_valid); |
| 647 EXPECT(memcmp(expected, dst, sizeof(expected))); | 652 EXPECT(memcmp(expected, dst, sizeof(expected))); |
| 648 } | 653 } |
| 649 | 654 |
| 650 // 3.5.3 - fe fe ff ff = "\xFE\xFE\xFF\xFF" | 655 // 3.5.3 - fe fe ff ff = "\xFE\xFE\xFF\xFF" |
| 651 { | 656 { |
| 652 const char* src = "\xFE\xFE\xFF\xFF"; | 657 const char* src = "\xFE\xFE\xFF\xFF"; |
| 653 int32_t expected[] = { 0xFF }; | 658 int32_t expected[] = {0xFF}; |
| 654 int32_t dst[ARRAY_SIZE(expected)]; | 659 int32_t dst[ARRAY_SIZE(expected)]; |
| 655 memset(dst, 0, sizeof(dst)); | 660 memset(dst, 0, sizeof(dst)); |
| 656 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 661 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
| 657 EXPECT(!is_valid); | 662 EXPECT(!is_valid); |
| 658 EXPECT(memcmp(expected, dst, sizeof(expected))); | 663 EXPECT(memcmp(expected, dst, sizeof(expected))); |
| 659 } | 664 } |
| 660 | 665 |
| 661 // 4 - Overlong sequences | 666 // 4 - Overlong sequences |
| 662 | 667 |
| 663 // 4.1 - Examples of an overlong ASCII character | 668 // 4.1 - Examples of an overlong ASCII character |
| 664 | 669 |
| 665 // 4.1.1 - U+002F = c0 af = "\xC0\xAF" | 670 // 4.1.1 - U+002F = c0 af = "\xC0\xAF" |
| 666 { | 671 { |
| 667 const char* src = "\xC0\xAF"; | 672 const char* src = "\xC0\xAF"; |
| 668 int32_t expected[] = { 0x2F }; | 673 int32_t expected[] = {0x2F}; |
| 669 int32_t dst[ARRAY_SIZE(expected)]; | 674 int32_t dst[ARRAY_SIZE(expected)]; |
| 670 memset(dst, 0, sizeof(dst)); | 675 memset(dst, 0, sizeof(dst)); |
| 671 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 676 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
| 672 EXPECT(!is_valid); | 677 EXPECT(!is_valid); |
| 673 EXPECT(memcmp(expected, dst, sizeof(expected))); | 678 EXPECT(memcmp(expected, dst, sizeof(expected))); |
| 674 } | 679 } |
| 675 | 680 |
| 676 // 4.1.2 - U+002F = e0 80 af = "\xE0\x80\xAF" | 681 // 4.1.2 - U+002F = e0 80 af = "\xE0\x80\xAF" |
| 677 { | 682 { |
| 678 const char* src = "\xE0\x80\xAF"; | 683 const char* src = "\xE0\x80\xAF"; |
| 679 int32_t expected[] = { 0x2F }; | 684 int32_t expected[] = {0x2F}; |
| 680 int32_t dst[ARRAY_SIZE(expected)]; | 685 int32_t dst[ARRAY_SIZE(expected)]; |
| 681 memset(dst, 0, sizeof(dst)); | 686 memset(dst, 0, sizeof(dst)); |
| 682 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 687 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
| 683 EXPECT(!is_valid); | 688 EXPECT(!is_valid); |
| 684 EXPECT(memcmp(expected, dst, sizeof(expected))); | 689 EXPECT(memcmp(expected, dst, sizeof(expected))); |
| 685 } | 690 } |
| 686 | 691 |
| 687 // 4.1.3 - U+002F = f0 80 80 af = "\xF0\x80\x80\xAF" | 692 // 4.1.3 - U+002F = f0 80 80 af = "\xF0\x80\x80\xAF" |
| 688 { | 693 { |
| 689 const char* src = "\xF0\x80\x80\xAF"; | 694 const char* src = "\xF0\x80\x80\xAF"; |
| 690 int32_t expected[] = { 0x2F }; | 695 int32_t expected[] = {0x2F}; |
| 691 int32_t dst[ARRAY_SIZE(expected)]; | 696 int32_t dst[ARRAY_SIZE(expected)]; |
| 692 memset(dst, 0, sizeof(dst)); | 697 memset(dst, 0, sizeof(dst)); |
| 693 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 698 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
| 694 EXPECT(!is_valid); | 699 EXPECT(!is_valid); |
| 695 EXPECT(memcmp(expected, dst, sizeof(expected))); | 700 EXPECT(memcmp(expected, dst, sizeof(expected))); |
| 696 } | 701 } |
| 697 | 702 |
| 698 // 4.1.4 - U+002F = f8 80 80 80 af = "\xF8\x80\x80\x80\xAF" | 703 // 4.1.4 - U+002F = f8 80 80 80 af = "\xF8\x80\x80\x80\xAF" |
| 699 { | 704 { |
| 700 const char* src = "\xF8\x80\x80\x80\xAF"; | 705 const char* src = "\xF8\x80\x80\x80\xAF"; |
| 701 int32_t expected[] = { 0x2F }; | 706 int32_t expected[] = {0x2F}; |
| 702 int32_t dst[ARRAY_SIZE(expected)]; | 707 int32_t dst[ARRAY_SIZE(expected)]; |
| 703 memset(dst, 0, sizeof(dst)); | 708 memset(dst, 0, sizeof(dst)); |
| 704 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 709 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
| 705 EXPECT(!is_valid); | 710 EXPECT(!is_valid); |
| 706 EXPECT(memcmp(expected, dst, sizeof(expected))); | 711 EXPECT(memcmp(expected, dst, sizeof(expected))); |
| 707 } | 712 } |
| 708 | 713 |
| 709 // 4.1.5 - U+002F = fc 80 80 80 80 af = "\xFC\x80\x80\x80\x80\xAF" | 714 // 4.1.5 - U+002F = fc 80 80 80 80 af = "\xFC\x80\x80\x80\x80\xAF" |
| 710 { | 715 { |
| 711 const char* src = "\xFC\x80\x80\x80\x80\xAF"; | 716 const char* src = "\xFC\x80\x80\x80\x80\xAF"; |
| 712 int32_t expected[] = { 0x2F }; | 717 int32_t expected[] = {0x2F}; |
| 713 int32_t dst[ARRAY_SIZE(expected)]; | 718 int32_t dst[ARRAY_SIZE(expected)]; |
| 714 memset(dst, 0, sizeof(dst)); | 719 memset(dst, 0, sizeof(dst)); |
| 715 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 720 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
| 716 EXPECT(!is_valid); | 721 EXPECT(!is_valid); |
| 717 EXPECT(memcmp(expected, dst, sizeof(expected))); | 722 EXPECT(memcmp(expected, dst, sizeof(expected))); |
| 718 } | 723 } |
| 719 | 724 |
| 720 // 4.2 Maximum overlong sequences | 725 // 4.2 Maximum overlong sequences |
| 721 | 726 |
| 722 // 4.2.1 - U-0000007F = c1 bf = "\xC1\xBF" | 727 // 4.2.1 - U-0000007F = c1 bf = "\xC1\xBF" |
| 723 { | 728 { |
| 724 const char* src = "\xC1\xBF"; | 729 const char* src = "\xC1\xBF"; |
| 725 int32_t expected[] = { 0x7F }; | 730 int32_t expected[] = {0x7F}; |
| 726 int32_t dst[ARRAY_SIZE(expected)]; | 731 int32_t dst[ARRAY_SIZE(expected)]; |
| 727 memset(dst, 0, sizeof(dst)); | 732 memset(dst, 0, sizeof(dst)); |
| 728 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 733 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
| 729 EXPECT(!is_valid); | 734 EXPECT(!is_valid); |
| 730 EXPECT(memcmp(expected, dst, sizeof(expected))); | 735 EXPECT(memcmp(expected, dst, sizeof(expected))); |
| 731 } | 736 } |
| 732 | 737 |
| 733 // 4.2.2 U+000007FF = e0 9f bf = "\xE0\x9F\xBF" | 738 // 4.2.2 U+000007FF = e0 9f bf = "\xE0\x9F\xBF" |
| 734 { | 739 { |
| 735 const char* src = "\xE0\x9F\xBF"; | 740 const char* src = "\xE0\x9F\xBF"; |
| 736 int32_t expected[] = { 0x7FF }; | 741 int32_t expected[] = {0x7FF}; |
| 737 int32_t dst[ARRAY_SIZE(expected)]; | 742 int32_t dst[ARRAY_SIZE(expected)]; |
| 738 memset(dst, 0, sizeof(dst)); | 743 memset(dst, 0, sizeof(dst)); |
| 739 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 744 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
| 740 EXPECT(!is_valid); | 745 EXPECT(!is_valid); |
| 741 EXPECT(memcmp(expected, dst, sizeof(expected))); | 746 EXPECT(memcmp(expected, dst, sizeof(expected))); |
| 742 } | 747 } |
| 743 | 748 |
| 744 // 4.2.3 - U+0000FFFF = f0 8f bf bf = "\xF0\x8F\xBF\xBF" | 749 // 4.2.3 - U+0000FFFF = f0 8f bf bf = "\xF0\x8F\xBF\xBF" |
| 745 { | 750 { |
| 746 const char* src = "\xF0\x8F\xBF\xBF"; | 751 const char* src = "\xF0\x8F\xBF\xBF"; |
| 747 int32_t expected[] = { 0xFFFF }; | 752 int32_t expected[] = {0xFFFF}; |
| 748 int32_t dst[ARRAY_SIZE(expected)]; | 753 int32_t dst[ARRAY_SIZE(expected)]; |
| 749 memset(dst, 0, sizeof(dst)); | 754 memset(dst, 0, sizeof(dst)); |
| 750 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 755 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
| 751 EXPECT(!is_valid); | 756 EXPECT(!is_valid); |
| 752 EXPECT(memcmp(expected, dst, sizeof(expected))); | 757 EXPECT(memcmp(expected, dst, sizeof(expected))); |
| 753 } | 758 } |
| 754 | 759 |
| 755 // 4.2.4 U-001FFFFF = f8 87 bf bf bf = "\xF8\x87\xBF\xBF\xBF" | 760 // 4.2.4 U-001FFFFF = f8 87 bf bf bf = "\xF8\x87\xBF\xBF\xBF" |
| 756 { | 761 { |
| 757 const char* src = "\xF8\x87\xBF\xBF\xBF"; | 762 const char* src = "\xF8\x87\xBF\xBF\xBF"; |
| 758 int32_t expected[] = { 0x1FFFFF }; | 763 int32_t expected[] = {0x1FFFFF}; |
| 759 int32_t dst[ARRAY_SIZE(expected)]; | 764 int32_t dst[ARRAY_SIZE(expected)]; |
| 760 memset(dst, 0, sizeof(dst)); | 765 memset(dst, 0, sizeof(dst)); |
| 761 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 766 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
| 762 EXPECT(!is_valid); | 767 EXPECT(!is_valid); |
| 763 EXPECT(memcmp(expected, dst, sizeof(expected))); | 768 EXPECT(memcmp(expected, dst, sizeof(expected))); |
| 764 } | 769 } |
| 765 | 770 |
| 766 // 4.2.5 U-03FFFFFF = fc 83 bf bf bf bf = "\xFC\x83\xBF\xBF\xBF\xBF" | 771 // 4.2.5 U-03FFFFFF = fc 83 bf bf bf bf = "\xFC\x83\xBF\xBF\xBF\xBF" |
| 767 { | 772 { |
| 768 const char* src = "\xFC\x83\xBF\xBF\xBF\xBF"; | 773 const char* src = "\xFC\x83\xBF\xBF\xBF\xBF"; |
| 769 int32_t expected[] = { 0x3FFFFFF }; | 774 int32_t expected[] = {0x3FFFFFF}; |
| 770 int32_t dst[ARRAY_SIZE(expected)]; | 775 int32_t dst[ARRAY_SIZE(expected)]; |
| 771 memset(dst, 0, sizeof(dst)); | 776 memset(dst, 0, sizeof(dst)); |
| 772 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 777 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
| 773 EXPECT(!is_valid); | 778 EXPECT(!is_valid); |
| 774 EXPECT(memcmp(expected, dst, sizeof(expected))); | 779 EXPECT(memcmp(expected, dst, sizeof(expected))); |
| 775 } | 780 } |
| 776 | 781 |
| 777 // 4.3 - Overlong representation of the NUL character | 782 // 4.3 - Overlong representation of the NUL character |
| 778 | 783 |
| 779 // 4.3.1 - U+0000 = "\xC0\x80" | 784 // 4.3.1 - U+0000 = "\xC0\x80" |
| 780 { | 785 { |
| 781 const char* src = "\xC0\x80"; | 786 const char* src = "\xC0\x80"; |
| 782 int32_t expected[] = { 0x0 }; | 787 int32_t expected[] = {0x0}; |
| 783 int32_t dst[ARRAY_SIZE(expected)]; | 788 int32_t dst[ARRAY_SIZE(expected)]; |
| 784 memset(dst, 0xFF, sizeof(dst)); | 789 memset(dst, 0xFF, sizeof(dst)); |
| 785 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 790 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
| 786 EXPECT(!is_valid); | 791 EXPECT(!is_valid); |
| 787 EXPECT(memcmp(expected, dst, sizeof(expected))); | 792 EXPECT(memcmp(expected, dst, sizeof(expected))); |
| 788 } | 793 } |
| 789 | 794 |
| 790 // 4.3.2 U+0000 = e0 80 80 = "\xE0\x80\x80" | 795 // 4.3.2 U+0000 = e0 80 80 = "\xE0\x80\x80" |
| 791 { | 796 { |
| 792 const char* src = "\xE0\x80\x80"; | 797 const char* src = "\xE0\x80\x80"; |
| 793 int32_t expected[] = { 0x0 }; | 798 int32_t expected[] = {0x0}; |
| 794 int32_t dst[ARRAY_SIZE(expected)]; | 799 int32_t dst[ARRAY_SIZE(expected)]; |
| 795 memset(dst, 0xFF, sizeof(dst)); | 800 memset(dst, 0xFF, sizeof(dst)); |
| 796 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 801 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
| 797 EXPECT(!is_valid); | 802 EXPECT(!is_valid); |
| 798 EXPECT(memcmp(expected, dst, sizeof(expected))); | 803 EXPECT(memcmp(expected, dst, sizeof(expected))); |
| 799 } | 804 } |
| 800 | 805 |
| 801 // 4.3.3 U+0000 = f0 80 80 80 = "\xF0\x80\x80\x80" | 806 // 4.3.3 U+0000 = f0 80 80 80 = "\xF0\x80\x80\x80" |
| 802 { | 807 { |
| 803 const char* src = "\xF0\x80\x80\x80"; | 808 const char* src = "\xF0\x80\x80\x80"; |
| 804 int32_t expected[] = { 0x0 }; | 809 int32_t expected[] = {0x0}; |
| 805 int32_t dst[ARRAY_SIZE(expected)]; | 810 int32_t dst[ARRAY_SIZE(expected)]; |
| 806 memset(dst, 0xFF, sizeof(dst)); | 811 memset(dst, 0xFF, sizeof(dst)); |
| 807 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 812 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
| 808 EXPECT(!is_valid); | 813 EXPECT(!is_valid); |
| 809 EXPECT(memcmp(expected, dst, sizeof(expected))); | 814 EXPECT(memcmp(expected, dst, sizeof(expected))); |
| 810 } | 815 } |
| 811 | 816 |
| 812 // 4.3.4 U+0000 = f8 80 80 80 80 = "\xF8\x80\x80\x80\x80" | 817 // 4.3.4 U+0000 = f8 80 80 80 80 = "\xF8\x80\x80\x80\x80" |
| 813 { | 818 { |
| 814 const char* src = "\xF8\x80\x80\x80\x80"; | 819 const char* src = "\xF8\x80\x80\x80\x80"; |
| 815 int32_t expected[] = { 0x0 }; | 820 int32_t expected[] = {0x0}; |
| 816 int32_t dst[ARRAY_SIZE(expected)]; | 821 int32_t dst[ARRAY_SIZE(expected)]; |
| 817 memset(dst, 0xFF, sizeof(dst)); | 822 memset(dst, 0xFF, sizeof(dst)); |
| 818 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 823 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
| 819 EXPECT(!is_valid); | 824 EXPECT(!is_valid); |
| 820 EXPECT(memcmp(expected, dst, sizeof(expected))); | 825 EXPECT(memcmp(expected, dst, sizeof(expected))); |
| 821 } | 826 } |
| 822 | 827 |
| 823 // 4.3.5 U+0000 = fc 80 80 80 80 80 = "\xFC\x80\x80\x80\x80\x80" | 828 // 4.3.5 U+0000 = fc 80 80 80 80 80 = "\xFC\x80\x80\x80\x80\x80" |
| 824 { | 829 { |
| 825 const char* src = "\xFC\x80\x80\x80\x80\x80"; | 830 const char* src = "\xFC\x80\x80\x80\x80\x80"; |
| 826 int32_t expected[] = { 0x0 }; | 831 int32_t expected[] = {0x0}; |
| 827 int32_t dst[ARRAY_SIZE(expected)]; | 832 int32_t dst[ARRAY_SIZE(expected)]; |
| 828 memset(dst, 0xFF, sizeof(dst)); | 833 memset(dst, 0xFF, sizeof(dst)); |
| 829 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 834 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
| 830 EXPECT(!is_valid); | 835 EXPECT(!is_valid); |
| 831 EXPECT(memcmp(expected, dst, sizeof(expected))); | 836 EXPECT(memcmp(expected, dst, sizeof(expected))); |
| 832 } | 837 } |
| 833 | 838 |
| 834 // 5.1 - Single UTF-16 surrogates | 839 // 5.1 - Single UTF-16 surrogates |
| 835 // UTF-8 suggests single surrogates are invalid, but both JS and | 840 // UTF-8 suggests single surrogates are invalid, but both JS and |
| 836 // Dart allow them and make use of them. | 841 // Dart allow them and make use of them. |
| 837 | 842 |
| 838 // 5.1.1 - U+D800 = ed a0 80 = "\xED\xA0\x80" | 843 // 5.1.1 - U+D800 = ed a0 80 = "\xED\xA0\x80" |
| 839 { | 844 { |
| 840 const char* src = "\xED\xA0\x80"; | 845 const char* src = "\xED\xA0\x80"; |
| 841 int32_t expected[] = { 0xD800 }; | 846 int32_t expected[] = {0xD800}; |
| 842 int32_t dst[ARRAY_SIZE(expected)]; | 847 int32_t dst[ARRAY_SIZE(expected)]; |
| 843 memset(dst, 0, sizeof(dst)); | 848 memset(dst, 0, sizeof(dst)); |
| 844 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 849 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
| 845 EXPECT(is_valid); | 850 EXPECT(is_valid); |
| 846 EXPECT(!memcmp(expected, dst, sizeof(expected))); | 851 EXPECT(!memcmp(expected, dst, sizeof(expected))); |
| 847 } | 852 } |
| 848 | 853 |
| 849 // 5.1.2 - U+DB7F = ed ad bf = "\xED\xAD\xBF" | 854 // 5.1.2 - U+DB7F = ed ad bf = "\xED\xAD\xBF" |
| 850 { | 855 { |
| 851 const char* src = "\xED\xAD\xBF"; | 856 const char* src = "\xED\xAD\xBF"; |
| 852 int32_t expected[] = { 0xDB7F }; | 857 int32_t expected[] = {0xDB7F}; |
| 853 int32_t dst[ARRAY_SIZE(expected)]; | 858 int32_t dst[ARRAY_SIZE(expected)]; |
| 854 memset(dst, 0, sizeof(dst)); | 859 memset(dst, 0, sizeof(dst)); |
| 855 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 860 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
| 856 EXPECT(is_valid); | 861 EXPECT(is_valid); |
| 857 EXPECT(!memcmp(expected, dst, sizeof(expected))); | 862 EXPECT(!memcmp(expected, dst, sizeof(expected))); |
| 858 } | 863 } |
| 859 | 864 |
| 860 // 5.1.3 - U+DB80 = ed ae 80 = "\xED\xAE\x80" | 865 // 5.1.3 - U+DB80 = ed ae 80 = "\xED\xAE\x80" |
| 861 { | 866 { |
| 862 const char* src = "\xED\xAE\x80"; | 867 const char* src = "\xED\xAE\x80"; |
| 863 int32_t expected[] = { 0xDB80 }; | 868 int32_t expected[] = {0xDB80}; |
| 864 int32_t dst[ARRAY_SIZE(expected)]; | 869 int32_t dst[ARRAY_SIZE(expected)]; |
| 865 memset(dst, 0, sizeof(dst)); | 870 memset(dst, 0, sizeof(dst)); |
| 866 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 871 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
| 867 EXPECT(is_valid); | 872 EXPECT(is_valid); |
| 868 EXPECT(!memcmp(expected, dst, sizeof(expected))); | 873 EXPECT(!memcmp(expected, dst, sizeof(expected))); |
| 869 } | 874 } |
| 870 | 875 |
| 871 // 5.1.4 - U+DBFF = ed af bf = "\xED\xAF\xBF" | 876 // 5.1.4 - U+DBFF = ed af bf = "\xED\xAF\xBF" |
| 872 { | 877 { |
| 873 const char* src = "\xED\xAF\xBF"; | 878 const char* src = "\xED\xAF\xBF"; |
| 874 int32_t expected[] = { 0xDBFF }; | 879 int32_t expected[] = {0xDBFF}; |
| 875 int32_t dst[ARRAY_SIZE(expected)]; | 880 int32_t dst[ARRAY_SIZE(expected)]; |
| 876 memset(dst, 0, sizeof(dst)); | 881 memset(dst, 0, sizeof(dst)); |
| 877 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 882 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
| 878 EXPECT(is_valid); | 883 EXPECT(is_valid); |
| 879 EXPECT(!memcmp(expected, dst, sizeof(expected))); | 884 EXPECT(!memcmp(expected, dst, sizeof(expected))); |
| 880 } | 885 } |
| 881 | 886 |
| 882 // 5.1.5 - U+DC00 = ed b0 80 = "\xED\xB0\x80" | 887 // 5.1.5 - U+DC00 = ed b0 80 = "\xED\xB0\x80" |
| 883 { | 888 { |
| 884 const char* src = "\xED\xB0\x80"; | 889 const char* src = "\xED\xB0\x80"; |
| 885 int32_t expected[] = { 0xDC00 }; | 890 int32_t expected[] = {0xDC00}; |
| 886 int32_t dst[ARRAY_SIZE(expected)]; | 891 int32_t dst[ARRAY_SIZE(expected)]; |
| 887 memset(dst, 0, sizeof(dst)); | 892 memset(dst, 0, sizeof(dst)); |
| 888 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 893 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
| 889 EXPECT(is_valid); | 894 EXPECT(is_valid); |
| 890 EXPECT(!memcmp(expected, dst, sizeof(expected))); | 895 EXPECT(!memcmp(expected, dst, sizeof(expected))); |
| 891 } | 896 } |
| 892 | 897 |
| 893 // 5.1.6 - U+DF80 = ed be 80 = "\xED\xBE\x80" | 898 // 5.1.6 - U+DF80 = ed be 80 = "\xED\xBE\x80" |
| 894 { | 899 { |
| 895 const char* src = "\xED\xBE\x80"; | 900 const char* src = "\xED\xBE\x80"; |
| 896 int32_t expected[] = { 0xDF80 }; | 901 int32_t expected[] = {0xDF80}; |
| 897 int32_t dst[ARRAY_SIZE(expected)]; | 902 int32_t dst[ARRAY_SIZE(expected)]; |
| 898 memset(dst, 0, sizeof(dst)); | 903 memset(dst, 0, sizeof(dst)); |
| 899 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 904 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
| 900 EXPECT(is_valid); | 905 EXPECT(is_valid); |
| 901 EXPECT(!memcmp(expected, dst, sizeof(expected))); | 906 EXPECT(!memcmp(expected, dst, sizeof(expected))); |
| 902 } | 907 } |
| 903 | 908 |
| 904 // 5.1.7 - U+DFFF = ed bf bf = "\xED\xBF\xBF" | 909 // 5.1.7 - U+DFFF = ed bf bf = "\xED\xBF\xBF" |
| 905 { | 910 { |
| 906 const char* src = "\xED\xBF\xBF"; | 911 const char* src = "\xED\xBF\xBF"; |
| 907 int32_t expected[] = { 0xDFFF }; | 912 int32_t expected[] = {0xDFFF}; |
| 908 int32_t dst[ARRAY_SIZE(expected)]; | 913 int32_t dst[ARRAY_SIZE(expected)]; |
| 909 memset(dst, 0, sizeof(dst)); | 914 memset(dst, 0, sizeof(dst)); |
| 910 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 915 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
| 911 EXPECT(is_valid); | 916 EXPECT(is_valid); |
| 912 EXPECT(!memcmp(expected, dst, sizeof(expected))); | 917 EXPECT(!memcmp(expected, dst, sizeof(expected))); |
| 913 } | 918 } |
| 914 | 919 |
| 915 // 5.2 Paired UTF-16 surrogates | 920 // 5.2 Paired UTF-16 surrogates |
| 916 // Also not a valid string, but accepted in Dart, even if it doesn't make | 921 // Also not a valid string, but accepted in Dart, even if it doesn't make |
| 917 // sense. e.g. | 922 // sense. e.g. |
| 918 // var s = new String.fromCharCodes([0xd800, 0xDC00]); | 923 // var s = new String.fromCharCodes([0xd800, 0xDC00]); |
| 919 // print(s.runes); // (65536) (0x10000) | 924 // print(s.runes); // (65536) (0x10000) |
| 920 // print(s.codeUnits); // [55296, 56320] | 925 // print(s.codeUnits); // [55296, 56320] |
| 921 | 926 |
| 922 // 5.2.1 - U+D800 U+DC00 = ed a0 80 ed b0 80 = "\xED\xA0\x80\xED\xB0\x80" | 927 // 5.2.1 - U+D800 U+DC00 = ed a0 80 ed b0 80 = "\xED\xA0\x80\xED\xB0\x80" |
| 923 { | 928 { |
| 924 const char* src = "\xED\xA0\x80\xED\xB0\x80"; | 929 const char* src = "\xED\xA0\x80\xED\xB0\x80"; |
| 925 int32_t expected[] = { 0xD800, 0xDC00 }; | 930 int32_t expected[] = {0xD800, 0xDC00}; |
| 926 int32_t dst[ARRAY_SIZE(expected)]; | 931 int32_t dst[ARRAY_SIZE(expected)]; |
| 927 memset(dst, 0, sizeof(dst)); | 932 memset(dst, 0, sizeof(dst)); |
| 928 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 933 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
| 929 EXPECT(is_valid); | 934 EXPECT(is_valid); |
| 930 EXPECT(!memcmp(expected, dst, sizeof(expected))); | 935 EXPECT(!memcmp(expected, dst, sizeof(expected))); |
| 931 } | 936 } |
| 932 | 937 |
| 933 // 5.2.2 - U+D800 U+DFFF = ed a0 80 ed bf bf = "\xED\xA0\x80\xED\xBF\xBF" | 938 // 5.2.2 - U+D800 U+DFFF = ed a0 80 ed bf bf = "\xED\xA0\x80\xED\xBF\xBF" |
| 934 { | 939 { |
| 935 const char* src = "\xED\xA0\x80\xED\xBF\xBF"; | 940 const char* src = "\xED\xA0\x80\xED\xBF\xBF"; |
| 936 int32_t expected[] = { 0xD800, 0xDFFF }; | 941 int32_t expected[] = {0xD800, 0xDFFF}; |
| 937 int32_t dst[ARRAY_SIZE(expected)]; | 942 int32_t dst[ARRAY_SIZE(expected)]; |
| 938 memset(dst, 0, sizeof(dst)); | 943 memset(dst, 0, sizeof(dst)); |
| 939 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 944 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
| 940 EXPECT(is_valid); | 945 EXPECT(is_valid); |
| 941 EXPECT(!memcmp(expected, dst, sizeof(expected))); | 946 EXPECT(!memcmp(expected, dst, sizeof(expected))); |
| 942 } | 947 } |
| 943 | 948 |
| 944 // 5.2.3 - U+DB7F U+DC00 = ed a0 80 ed bf bf = "\xED\xAD\xBF\xED\xB0\x80" | 949 // 5.2.3 - U+DB7F U+DC00 = ed a0 80 ed bf bf = "\xED\xAD\xBF\xED\xB0\x80" |
| 945 { | 950 { |
| 946 const char* src = "\xED\xAD\xBF\xED\xB0\x80"; | 951 const char* src = "\xED\xAD\xBF\xED\xB0\x80"; |
| 947 int32_t expected[] = { 0xDB7F, 0xDC00 }; | 952 int32_t expected[] = {0xDB7F, 0xDC00}; |
| 948 int32_t dst[ARRAY_SIZE(expected)]; | 953 int32_t dst[ARRAY_SIZE(expected)]; |
| 949 memset(dst, 0, sizeof(dst)); | 954 memset(dst, 0, sizeof(dst)); |
| 950 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 955 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
| 951 EXPECT(is_valid); | 956 EXPECT(is_valid); |
| 952 EXPECT(!memcmp(expected, dst, sizeof(expected))); | 957 EXPECT(!memcmp(expected, dst, sizeof(expected))); |
| 953 } | 958 } |
| 954 | 959 |
| 955 // 5.2.4 - U+DB7F U+DFFF = ed ad bf ed bf bf = "\xED\xAD\xBF\xED\xBF\xBF" | 960 // 5.2.4 - U+DB7F U+DFFF = ed ad bf ed bf bf = "\xED\xAD\xBF\xED\xBF\xBF" |
| 956 { | 961 { |
| 957 const char* src = "\xED\xAD\xBF\xED\xBF\xBF"; | 962 const char* src = "\xED\xAD\xBF\xED\xBF\xBF"; |
| 958 int32_t expected[] = { 0xDB7F, 0xDFFF }; | 963 int32_t expected[] = {0xDB7F, 0xDFFF}; |
| 959 int32_t dst[ARRAY_SIZE(expected)]; | 964 int32_t dst[ARRAY_SIZE(expected)]; |
| 960 memset(dst, 0, sizeof(dst)); | 965 memset(dst, 0, sizeof(dst)); |
| 961 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 966 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
| 962 EXPECT(is_valid); | 967 EXPECT(is_valid); |
| 963 EXPECT(!memcmp(expected, dst, sizeof(expected))); | 968 EXPECT(!memcmp(expected, dst, sizeof(expected))); |
| 964 } | 969 } |
| 965 | 970 |
| 966 // 5.2.5 - U+DB80 U+DC00 = ed ae 80 ed b0 80 = "\xED\xAE\x80\xED\xB0\x80" | 971 // 5.2.5 - U+DB80 U+DC00 = ed ae 80 ed b0 80 = "\xED\xAE\x80\xED\xB0\x80" |
| 967 { | 972 { |
| 968 const char* src = "\xED\xAE\x80\xED\xB0\x80"; | 973 const char* src = "\xED\xAE\x80\xED\xB0\x80"; |
| 969 int32_t expected[] = { 0xDB80, 0xDC00 }; | 974 int32_t expected[] = {0xDB80, 0xDC00}; |
| 970 int32_t dst[ARRAY_SIZE(expected)]; | 975 int32_t dst[ARRAY_SIZE(expected)]; |
| 971 memset(dst, 0, sizeof(dst)); | 976 memset(dst, 0, sizeof(dst)); |
| 972 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 977 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
| 973 EXPECT(is_valid); | 978 EXPECT(is_valid); |
| 974 EXPECT(!memcmp(expected, dst, sizeof(expected))); | 979 EXPECT(!memcmp(expected, dst, sizeof(expected))); |
| 975 } | 980 } |
| 976 | 981 |
| 977 // 5.2.6 - U+DB80 U+DFFF = ed ae 80 ed bf bf = "\xED\xAE\x80\xED\xBF\xBF" | 982 // 5.2.6 - U+DB80 U+DFFF = ed ae 80 ed bf bf = "\xED\xAE\x80\xED\xBF\xBF" |
| 978 { | 983 { |
| 979 const char* src = "\xED\xAE\x80\xED\xBF\xBF"; | 984 const char* src = "\xED\xAE\x80\xED\xBF\xBF"; |
| 980 int32_t expected[] = { 0xDB80, 0xDFFF }; | 985 int32_t expected[] = {0xDB80, 0xDFFF}; |
| 981 int32_t dst[ARRAY_SIZE(expected)]; | 986 int32_t dst[ARRAY_SIZE(expected)]; |
| 982 memset(dst, 0, sizeof(dst)); | 987 memset(dst, 0, sizeof(dst)); |
| 983 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 988 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
| 984 EXPECT(is_valid); | 989 EXPECT(is_valid); |
| 985 EXPECT(!memcmp(expected, dst, sizeof(expected))); | 990 EXPECT(!memcmp(expected, dst, sizeof(expected))); |
| 986 } | 991 } |
| 987 | 992 |
| 988 // 5.2.7 - U+DBFF U+DC00 = ed af bf ed b0 80 = "\xED\xAF\xBF\xED\xB0\x80" | 993 // 5.2.7 - U+DBFF U+DC00 = ed af bf ed b0 80 = "\xED\xAF\xBF\xED\xB0\x80" |
| 989 { | 994 { |
| 990 const char* src = "\xED\xAF\xBF\xED\xB0\x80"; | 995 const char* src = "\xED\xAF\xBF\xED\xB0\x80"; |
| 991 int32_t expected[] = { 0xDBFF, 0xDC00 }; | 996 int32_t expected[] = {0xDBFF, 0xDC00}; |
| 992 int32_t dst[ARRAY_SIZE(expected)]; | 997 int32_t dst[ARRAY_SIZE(expected)]; |
| 993 memset(dst, 0, sizeof(dst)); | 998 memset(dst, 0, sizeof(dst)); |
| 994 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 999 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
| 995 EXPECT(is_valid); | 1000 EXPECT(is_valid); |
| 996 EXPECT(!memcmp(expected, dst, sizeof(expected))); | 1001 EXPECT(!memcmp(expected, dst, sizeof(expected))); |
| 997 } | 1002 } |
| 998 | 1003 |
| 999 // 5.2.8 - U+DBFF U+DFFF = ed af bf ed bf bf = "\xED\xAF\xBF\xED\xBF\xBF" | 1004 // 5.2.8 - U+DBFF U+DFFF = ed af bf ed bf bf = "\xED\xAF\xBF\xED\xBF\xBF" |
| 1000 { | 1005 { |
| 1001 const char* src = "\xED\xAF\xBF\xED\xBF\xBF"; | 1006 const char* src = "\xED\xAF\xBF\xED\xBF\xBF"; |
| 1002 int32_t expected[] = { 0xDBFF, 0xDFFF }; | 1007 int32_t expected[] = {0xDBFF, 0xDFFF}; |
| 1003 int32_t dst[ARRAY_SIZE(expected)]; | 1008 int32_t dst[ARRAY_SIZE(expected)]; |
| 1004 memset(dst, 0, sizeof(dst)); | 1009 memset(dst, 0, sizeof(dst)); |
| 1005 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 1010 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
| 1006 EXPECT(is_valid); | 1011 EXPECT(is_valid); |
| 1007 EXPECT(!memcmp(expected, dst, sizeof(expected))); | 1012 EXPECT(!memcmp(expected, dst, sizeof(expected))); |
| 1008 } | 1013 } |
| 1009 | 1014 |
| 1010 // 5.3 - Other illegal code positions | 1015 // 5.3 - Other illegal code positions |
| 1011 | 1016 |
| 1012 // 5.3.1 - U+FFFE = ef bf be = "\xEF\xBF\xBE" | 1017 // 5.3.1 - U+FFFE = ef bf be = "\xEF\xBF\xBE" |
| 1013 { | 1018 { |
| 1014 const char* src = "\xEF\xBF\xBE"; | 1019 const char* src = "\xEF\xBF\xBE"; |
| 1015 int32_t expected[] = { 0xFFFE }; | 1020 int32_t expected[] = {0xFFFE}; |
| 1016 int32_t dst[ARRAY_SIZE(expected)]; | 1021 int32_t dst[ARRAY_SIZE(expected)]; |
| 1017 memset(dst, 0, sizeof(dst)); | 1022 memset(dst, 0, sizeof(dst)); |
| 1018 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 1023 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
| 1019 EXPECT(is_valid); | 1024 EXPECT(is_valid); |
| 1020 EXPECT(!memcmp(expected, dst, sizeof(expected))); | 1025 EXPECT(!memcmp(expected, dst, sizeof(expected))); |
| 1021 } | 1026 } |
| 1022 | 1027 |
| 1023 // 5.3.2 - U+FFFF = ef bf bf = "\xEF\xBF\xBF" | 1028 // 5.3.2 - U+FFFF = ef bf bf = "\xEF\xBF\xBF" |
| 1024 { | 1029 { |
| 1025 const char* src = "\xEF\xBF\xBF"; | 1030 const char* src = "\xEF\xBF\xBF"; |
| 1026 int32_t expected[] = { 0xFFFF }; | 1031 int32_t expected[] = {0xFFFF}; |
| 1027 int32_t dst[ARRAY_SIZE(expected)]; | 1032 int32_t dst[ARRAY_SIZE(expected)]; |
| 1028 memset(dst, 0, sizeof(dst)); | 1033 memset(dst, 0, sizeof(dst)); |
| 1029 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); | 1034 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
| 1030 EXPECT(is_valid); | 1035 EXPECT(is_valid); |
| 1031 EXPECT(!memcmp(expected, dst, sizeof(expected))); | 1036 EXPECT(!memcmp(expected, dst, sizeof(expected))); |
| 1032 } | 1037 } |
| 1033 } | 1038 } |
| 1034 | 1039 |
| 1035 } // namespace dart | 1040 } // namespace dart |
| OLD | NEW |