| OLD | NEW |
| 1 // Copyright (c) 2011, the Dart project authors. Please see the AUTHORS file | 1 // Copyright (c) 2011, the Dart project authors. Please see the AUTHORS file |
| 2 // for details. All rights reserved. Use of this source code is governed by a | 2 // for details. All rights reserved. Use of this source code is governed by a |
| 3 // BSD-style license that can be found in the LICENSE file. | 3 // BSD-style license that can be found in the LICENSE file. |
| 4 | 4 |
| 5 #include "vm/globals.h" | 5 #include "vm/globals.h" |
| 6 #include "vm/unicode.h" | 6 #include "vm/unicode.h" |
| 7 #include "vm/unit_test.h" | 7 #include "vm/unit_test.h" |
| 8 | 8 |
| 9 namespace dart { | 9 namespace dart { |
| 10 | 10 |
| 11 TEST_CASE(Utf8Decode) { | 11 TEST_CASE(Utf8Decode) { |
| 12 // Examples from the Unicode specification, chapter 3 | 12 // Examples from the Unicode specification, chapter 3 |
| 13 { | 13 { |
| 14 const char* src = "\x41\xC3\xB1\x42"; | 14 const char* src = "\x41\xC3\xB1\x42"; |
| 15 uint32_t expected[] = { 0x41, 0xF1, 0x42 }; | 15 uint32_t expected[] = { 0x41, 0xF1, 0x42 }; |
| 16 uint32_t dst[ARRAY_SIZE(expected)]; | 16 uint32_t dst[ARRAY_SIZE(expected)]; |
| 17 memset(dst, 0, sizeof(dst)); | 17 memset(dst, 0, sizeof(dst)); |
| 18 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); | 18 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
| 19 EXPECT(is_valid); | 19 EXPECT(is_valid); |
| 20 EXPECT(!memcmp(expected, dst, sizeof(expected))); | 20 EXPECT(!memcmp(expected, dst, sizeof(expected))); |
| 21 } | 21 } |
| 22 | 22 |
| 23 { | 23 { |
| 24 const char* src = "\x4D"; | 24 const char* src = "\x4D"; |
| 25 uint32_t expected[] = { 0x4D }; | 25 uint32_t expected[] = { 0x4D }; |
| 26 uint32_t dst[ARRAY_SIZE(expected)]; | 26 uint32_t dst[ARRAY_SIZE(expected)]; |
| 27 memset(dst, 0, sizeof(dst)); | 27 memset(dst, 0, sizeof(dst)); |
| 28 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); | 28 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
| 29 EXPECT(is_valid); | 29 EXPECT(is_valid); |
| 30 EXPECT(!memcmp(expected, dst, sizeof(expected))); | 30 EXPECT(!memcmp(expected, dst, sizeof(expected))); |
| 31 } | 31 } |
| 32 | 32 |
| 33 { | 33 { |
| 34 const char* src = "\xD0\xB0"; | 34 const char* src = "\xD0\xB0"; |
| 35 uint32_t expected[] = { 0x430 }; | 35 uint32_t expected[] = { 0x430 }; |
| 36 uint32_t dst[ARRAY_SIZE(expected)]; | 36 uint32_t dst[ARRAY_SIZE(expected)]; |
| 37 memset(dst, 0, sizeof(dst)); | 37 memset(dst, 0, sizeof(dst)); |
| 38 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); | 38 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
| 39 EXPECT(is_valid); | 39 EXPECT(is_valid); |
| 40 EXPECT(!memcmp(expected, dst, sizeof(expected))); | 40 EXPECT(!memcmp(expected, dst, sizeof(expected))); |
| 41 } | 41 } |
| 42 | 42 |
| 43 { | 43 { |
| 44 const char* src = "\xE4\xBA\x8C"; | 44 const char* src = "\xE4\xBA\x8C"; |
| 45 uint32_t expected[] = { 0x4E8C }; | 45 uint32_t expected[] = { 0x4E8C }; |
| 46 uint32_t dst[ARRAY_SIZE(expected)]; | 46 uint32_t dst[ARRAY_SIZE(expected)]; |
| 47 memset(dst, 0, sizeof(dst)); | 47 memset(dst, 0, sizeof(dst)); |
| 48 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); | 48 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
| 49 EXPECT(is_valid); | 49 EXPECT(is_valid); |
| 50 EXPECT(!memcmp(expected, dst, sizeof(expected))); | 50 EXPECT(!memcmp(expected, dst, sizeof(expected))); |
| 51 } | 51 } |
| 52 | 52 |
| 53 { | 53 { |
| 54 const char* src = "\xF0\x90\x8C\x82"; | 54 const char* src = "\xF0\x90\x8C\x82"; |
| 55 uint32_t expected[] = { 0x10302 }; | 55 uint32_t expected[] = { 0x10302 }; |
| 56 uint32_t dst[ARRAY_SIZE(expected)]; | 56 uint32_t dst[ARRAY_SIZE(expected)]; |
| 57 memset(dst, 0, sizeof(dst)); | 57 memset(dst, 0, sizeof(dst)); |
| 58 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); | 58 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
| 59 EXPECT(is_valid); | 59 EXPECT(is_valid); |
| 60 EXPECT(!memcmp(expected, dst, sizeof(expected))); | 60 EXPECT(!memcmp(expected, dst, sizeof(expected))); |
| 61 } | 61 } |
| 62 | 62 |
| 63 { | 63 { |
| 64 const char* src = "\x4D\xD0\xB0\xE4\xBA\x8C\xF0\x90\x8C\x82"; | 64 const char* src = "\x4D\xD0\xB0\xE4\xBA\x8C\xF0\x90\x8C\x82"; |
| 65 uint32_t expected[] = { 0x4D, 0x430, 0x4E8C, 0x10302 }; | 65 uint32_t expected[] = { 0x4D, 0x430, 0x4E8C, 0x10302 }; |
| 66 uint32_t dst[ARRAY_SIZE(expected)]; | 66 uint32_t dst[ARRAY_SIZE(expected)]; |
| 67 memset(dst, 0, sizeof(dst)); | 67 memset(dst, 0, sizeof(dst)); |
| 68 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); | 68 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
| 69 EXPECT(is_valid); | 69 EXPECT(is_valid); |
| 70 EXPECT(!memcmp(expected, dst, sizeof(expected))); | 70 EXPECT(!memcmp(expected, dst, sizeof(expected))); |
| 71 } | 71 } |
| 72 | 72 |
| 73 // Mixture of non-ASCII and ASCII characters | 73 // Mixture of non-ASCII and ASCII characters |
| 74 { | 74 { |
| 75 const char* src = "\xD7\x92\xD7\x9C\xD7\xA2\xD7\x93" | 75 const char* src = "\xD7\x92\xD7\x9C\xD7\xA2\xD7\x93" |
| 76 "\x20" | 76 "\x20" |
| 77 "\xD7\x91\xD7\xA8\xD7\x9B\xD7\x94"; | 77 "\xD7\x91\xD7\xA8\xD7\x9B\xD7\x94"; |
| 78 uint32_t expected[] = { 0x5D2, 0x5DC, 0x5E2, 0x5D3, | 78 uint32_t expected[] = { 0x5D2, 0x5DC, 0x5E2, 0x5D3, |
| 79 0x20, | 79 0x20, |
| 80 0x5D1, 0x5E8, 0x5DB, 0x5D4 }; | 80 0x5D1, 0x5E8, 0x5DB, 0x5D4 }; |
| 81 uint32_t dst[ARRAY_SIZE(expected)]; | 81 uint32_t dst[ARRAY_SIZE(expected)]; |
| 82 memset(dst, 0, sizeof(dst)); | 82 memset(dst, 0, sizeof(dst)); |
| 83 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); | 83 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
| 84 EXPECT(is_valid); | 84 EXPECT(is_valid); |
| 85 EXPECT(!memcmp(expected, dst, sizeof(expected))); | 85 EXPECT(!memcmp(expected, dst, sizeof(expected))); |
| 86 } | 86 } |
| 87 | 87 |
| 88 // http://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt | 88 // http://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt |
| 89 | 89 |
| 90 // 1 - Some correct UTF-8 text | 90 // 1 - Some correct UTF-8 text |
| 91 { | 91 { |
| 92 const char* src = "\xCE\xBA\xE1\xBD\xB9\xCF\x83\xCE\xBC\xCE\xB5"; | 92 const char* src = "\xCE\xBA\xE1\xBD\xB9\xCF\x83\xCE\xBC\xCE\xB5"; |
| 93 uint32_t expected[] = { 0x3BA, 0x1F79, 0x3C3, 0x3BC, 0x3B5 }; | 93 uint32_t expected[] = { 0x3BA, 0x1F79, 0x3C3, 0x3BC, 0x3B5 }; |
| 94 uint32_t dst[ARRAY_SIZE(expected)]; | 94 uint32_t dst[ARRAY_SIZE(expected)]; |
| 95 memset(dst, 0, sizeof(dst)); | 95 memset(dst, 0, sizeof(dst)); |
| 96 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); | 96 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
| 97 EXPECT(is_valid); | 97 EXPECT(is_valid); |
| 98 EXPECT(!memcmp(expected, dst, sizeof(expected))); | 98 EXPECT(!memcmp(expected, dst, sizeof(expected))); |
| 99 } | 99 } |
| 100 | 100 |
| 101 // 2 - Boundary condition test cases | 101 // 2 - Boundary condition test cases |
| 102 | 102 |
| 103 // 2.1 - First possible sequence of a certain length | 103 // 2.1 - First possible sequence of a certain length |
| 104 | 104 |
| 105 // 2.1.1 - 1 byte (U-00000000): "\x00" | 105 // 2.1.1 - 1 byte (U-00000000): "\x00" |
| 106 { | 106 { |
| 107 const char* src = "\x00"; | 107 const char* src = "\x00"; |
| 108 uint32_t expected[] = { 0x0 }; | 108 uint32_t expected[] = { 0x0 }; |
| 109 uint32_t dst[ARRAY_SIZE(expected)]; | 109 uint32_t dst[ARRAY_SIZE(expected)]; |
| 110 memset(dst, 0xFF, sizeof(dst)); | 110 memset(dst, 0xFF, sizeof(dst)); |
| 111 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); | 111 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
| 112 EXPECT(is_valid); | 112 EXPECT(is_valid); |
| 113 EXPECT(memcmp(expected, dst, sizeof(expected))); | 113 EXPECT(memcmp(expected, dst, sizeof(expected))); |
| 114 } | 114 } |
| 115 | 115 |
| 116 // 2.1.2 - 2 bytes (U-00000080): "\xC2\x80" | 116 // 2.1.2 - 2 bytes (U-00000080): "\xC2\x80" |
| 117 { | 117 { |
| 118 const char* src = "\xC2\x80"; | 118 const char* src = "\xC2\x80"; |
| 119 uint32_t expected[] = { 0x80 }; | 119 uint32_t expected[] = { 0x80 }; |
| 120 uint32_t dst[ARRAY_SIZE(expected)]; | 120 uint32_t dst[ARRAY_SIZE(expected)]; |
| 121 memset(dst, 0, sizeof(dst)); | 121 memset(dst, 0, sizeof(dst)); |
| 122 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); | 122 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
| 123 EXPECT(is_valid); | 123 EXPECT(is_valid); |
| 124 EXPECT(!memcmp(expected, dst, sizeof(expected))); | 124 EXPECT(!memcmp(expected, dst, sizeof(expected))); |
| 125 } | 125 } |
| 126 | 126 |
| 127 // 2.1.3 - 3 bytes (U-00000800): "\xE0\xA0\x80" | 127 // 2.1.3 - 3 bytes (U-00000800): "\xE0\xA0\x80" |
| 128 { | 128 { |
| 129 const char* src = "\xE0\xA0\x80"; | 129 const char* src = "\xE0\xA0\x80"; |
| 130 uint32_t expected[] = { 0x800 }; | 130 uint32_t expected[] = { 0x800 }; |
| 131 uint32_t dst[ARRAY_SIZE(expected)]; | 131 uint32_t dst[ARRAY_SIZE(expected)]; |
| 132 memset(dst, 0, sizeof(dst)); | 132 memset(dst, 0, sizeof(dst)); |
| 133 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); | 133 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
| 134 EXPECT(is_valid); | 134 EXPECT(is_valid); |
| 135 EXPECT(!memcmp(expected, dst, sizeof(expected))); | 135 EXPECT(!memcmp(expected, dst, sizeof(expected))); |
| 136 } | 136 } |
| 137 | 137 |
| 138 // 2.1.4 - 4 bytes (U-00010000): "\xF0\x90\x80\x80" | 138 // 2.1.4 - 4 bytes (U-00010000): "\xF0\x90\x80\x80" |
| 139 { | 139 { |
| 140 const char* src = "\xF0\x90\x80\x80"; | 140 const char* src = "\xF0\x90\x80\x80"; |
| 141 uint32_t expected[] = { 0x10000 }; | 141 uint32_t expected[] = { 0x10000 }; |
| 142 uint32_t dst[ARRAY_SIZE(expected)]; | 142 uint32_t dst[ARRAY_SIZE(expected)]; |
| 143 memset(dst, 0, sizeof(dst)); | 143 memset(dst, 0, sizeof(dst)); |
| 144 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); | 144 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
| 145 EXPECT(is_valid); | 145 EXPECT(is_valid); |
| 146 EXPECT(!memcmp(expected, dst, sizeof(expected))); | 146 EXPECT(!memcmp(expected, dst, sizeof(expected))); |
| 147 } | 147 } |
| 148 | 148 |
| 149 // 2.1.5 - 5 bytes (U-00200000): "\xF8\x88\x80\x80\x80" | 149 // 2.1.5 - 5 bytes (U-00200000): "\xF8\x88\x80\x80\x80" |
| 150 { | 150 { |
| 151 const char* src = "\xF8\x88\x80\x80\x80"; | 151 const char* src = "\xF8\x88\x80\x80\x80"; |
| 152 uint32_t expected[] = { 0x200000 }; | 152 uint32_t expected[] = { 0x200000 }; |
| 153 uint32_t dst[ARRAY_SIZE(expected)]; | 153 uint32_t dst[ARRAY_SIZE(expected)]; |
| 154 memset(dst, 0, sizeof(dst)); | 154 memset(dst, 0, sizeof(dst)); |
| 155 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); | 155 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
| 156 EXPECT(!is_valid); | 156 EXPECT(!is_valid); |
| 157 EXPECT(memcmp(expected, dst, sizeof(expected))); | 157 EXPECT(memcmp(expected, dst, sizeof(expected))); |
| 158 } | 158 } |
| 159 | 159 |
| 160 // 2.1.6 - 6 bytes (U-04000000): "\xFC\x84\x80\x80\x80\x80" | 160 // 2.1.6 - 6 bytes (U-04000000): "\xFC\x84\x80\x80\x80\x80" |
| 161 { | 161 { |
| 162 const char* src = "\xFC\x84\x80\x80\x80\x80"; | 162 const char* src = "\xFC\x84\x80\x80\x80\x80"; |
| 163 uint32_t expected[] = { 0x400000 }; | 163 uint32_t expected[] = { 0x400000 }; |
| 164 uint32_t dst[ARRAY_SIZE(expected)]; | 164 uint32_t dst[ARRAY_SIZE(expected)]; |
| 165 memset(dst, 0, sizeof(dst)); | 165 memset(dst, 0, sizeof(dst)); |
| 166 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); | 166 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
| 167 EXPECT(!is_valid); | 167 EXPECT(!is_valid); |
| 168 EXPECT(memcmp(expected, dst, sizeof(expected))); | 168 EXPECT(memcmp(expected, dst, sizeof(expected))); |
| 169 } | 169 } |
| 170 | 170 |
| 171 // 2.2 - Last possible sequence of a certain length | 171 // 2.2 - Last possible sequence of a certain length |
| 172 | 172 |
| 173 // 2.2.1 - 1 byte (U-0000007F): "\x7F" | 173 // 2.2.1 - 1 byte (U-0000007F): "\x7F" |
| 174 { | 174 { |
| 175 const char* src = "\x7F"; | 175 const char* src = "\x7F"; |
| 176 uint32_t expected[] = { 0x7F }; | 176 uint32_t expected[] = { 0x7F }; |
| 177 uint32_t dst[ARRAY_SIZE(expected)]; | 177 uint32_t dst[ARRAY_SIZE(expected)]; |
| 178 memset(dst, 0, sizeof(dst)); | 178 memset(dst, 0, sizeof(dst)); |
| 179 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); | 179 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
| 180 EXPECT(is_valid); | 180 EXPECT(is_valid); |
| 181 EXPECT(!memcmp(expected, dst, sizeof(expected))); | 181 EXPECT(!memcmp(expected, dst, sizeof(expected))); |
| 182 } | 182 } |
| 183 | 183 |
| 184 // 2.2.2 - 2 bytes (U-000007FF): "\xDF\xBF" | 184 // 2.2.2 - 2 bytes (U-000007FF): "\xDF\xBF" |
| 185 { | 185 { |
| 186 const char* src = "\xDF\xBF"; | 186 const char* src = "\xDF\xBF"; |
| 187 uint32_t expected[] = { 0x7FF }; | 187 uint32_t expected[] = { 0x7FF }; |
| 188 uint32_t dst[ARRAY_SIZE(expected)]; | 188 uint32_t dst[ARRAY_SIZE(expected)]; |
| 189 memset(dst, 0, sizeof(dst)); | 189 memset(dst, 0, sizeof(dst)); |
| 190 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); | 190 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
| 191 EXPECT(is_valid); | 191 EXPECT(is_valid); |
| 192 EXPECT(!memcmp(expected, dst, sizeof(expected))); | 192 EXPECT(!memcmp(expected, dst, sizeof(expected))); |
| 193 } | 193 } |
| 194 | 194 |
| 195 // 2.2.3 - 3 bytes (U-0000FFFF): "\xEF\xBF\xBF" | 195 // 2.2.3 - 3 bytes (U-0000FFFF): "\xEF\xBF\xBF" |
| 196 { | 196 { |
| 197 const char* src = "\xEF\xBF\xBF"; | 197 const char* src = "\xEF\xBF\xBF"; |
| 198 uint32_t expected[] = { 0xFFFF }; | 198 uint32_t expected[] = { 0xFFFF }; |
| 199 uint32_t dst[ARRAY_SIZE(expected)]; | 199 uint32_t dst[ARRAY_SIZE(expected)]; |
| 200 memset(dst, 0, sizeof(dst)); | 200 memset(dst, 0, sizeof(dst)); |
| 201 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); | 201 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
| 202 EXPECT(is_valid); | 202 EXPECT(is_valid); |
| 203 EXPECT(!memcmp(expected, dst, sizeof(expected))); | 203 EXPECT(!memcmp(expected, dst, sizeof(expected))); |
| 204 } | 204 } |
| 205 | 205 |
| 206 // 2.2.4 - 4 bytes (U-001FFFFF): "\xF7\xBF\xBF\xBF" | 206 // 2.2.4 - 4 bytes (U-001FFFFF): "\xF7\xBF\xBF\xBF" |
| 207 { | 207 { |
| 208 const char* src = "\xF7\xBF\xBF\xBF"; | 208 const char* src = "\xF7\xBF\xBF\xBF"; |
| 209 uint32_t expected[] = { 0x1FFFF }; | 209 uint32_t expected[] = { 0x1FFFF }; |
| 210 uint32_t dst[ARRAY_SIZE(expected)]; | 210 uint32_t dst[ARRAY_SIZE(expected)]; |
| 211 memset(dst, 0, sizeof(dst)); | 211 memset(dst, 0, sizeof(dst)); |
| 212 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); | 212 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
| 213 EXPECT(!is_valid); | 213 EXPECT(!is_valid); |
| 214 EXPECT(memcmp(expected, dst, sizeof(expected))); | 214 EXPECT(memcmp(expected, dst, sizeof(expected))); |
| 215 } | 215 } |
| 216 | 216 |
| 217 // 2.2.5 - 5 bytes (U-03FFFFFF): "\xFB\xBF\xBF\xBF\xBF" | 217 // 2.2.5 - 5 bytes (U-03FFFFFF): "\xFB\xBF\xBF\xBF\xBF" |
| 218 { | 218 { |
| 219 const char* src = "\xFB\xBF\xBF\xBF\xBF"; | 219 const char* src = "\xFB\xBF\xBF\xBF\xBF"; |
| 220 uint32_t expected[] = { 0x3FFFFFF }; | 220 uint32_t expected[] = { 0x3FFFFFF }; |
| 221 uint32_t dst[ARRAY_SIZE(expected)]; | 221 uint32_t dst[ARRAY_SIZE(expected)]; |
| 222 memset(dst, 0, sizeof(dst)); | 222 memset(dst, 0, sizeof(dst)); |
| 223 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); | 223 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
| 224 EXPECT(!is_valid); | 224 EXPECT(!is_valid); |
| 225 EXPECT(memcmp(expected, dst, sizeof(expected))); | 225 EXPECT(memcmp(expected, dst, sizeof(expected))); |
| 226 } | 226 } |
| 227 | 227 |
| 228 // 2.2.6 - 6 bytes (U-7FFFFFFF): "\xFD\xBF\xBF\xBF\xBF\xBF" | 228 // 2.2.6 - 6 bytes (U-7FFFFFFF): "\xFD\xBF\xBF\xBF\xBF\xBF" |
| 229 { | 229 { |
| 230 const char* src = "\xFD\xBF\xBF\xBF\xBF\xBF"; | 230 const char* src = "\xFD\xBF\xBF\xBF\xBF\xBF"; |
| 231 uint32_t expected[] = { 0x7FFFFFF }; | 231 uint32_t expected[] = { 0x7FFFFFF }; |
| 232 uint32_t dst[ARRAY_SIZE(expected)]; | 232 uint32_t dst[ARRAY_SIZE(expected)]; |
| 233 memset(dst, 0, sizeof(dst)); | 233 memset(dst, 0, sizeof(dst)); |
| 234 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); | 234 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
| 235 EXPECT(!is_valid); | 235 EXPECT(!is_valid); |
| 236 EXPECT(memcmp(expected, dst, sizeof(expected))); | 236 EXPECT(memcmp(expected, dst, sizeof(expected))); |
| 237 } | 237 } |
| 238 | 238 |
| 239 // 2.3 - Other boundary conditions | 239 // 2.3 - Other boundary conditions |
| 240 | 240 |
| 241 // 2.3.1 - U-0000D7FF = ed 9f bf = "\xED\x9F\xBF" | 241 // 2.3.1 - U-0000D7FF = ed 9f bf = "\xED\x9F\xBF" |
| 242 { | 242 { |
| 243 const char* src = "\xED\x9F\xBF"; | 243 const char* src = "\xED\x9F\xBF"; |
| 244 uint32_t expected[] = { 0xD7FF }; | 244 uint32_t expected[] = { 0xD7FF }; |
| 245 uint32_t dst[ARRAY_SIZE(expected)]; | 245 uint32_t dst[ARRAY_SIZE(expected)]; |
| 246 memset(dst, 0, sizeof(dst)); | 246 memset(dst, 0, sizeof(dst)); |
| 247 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); | 247 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
| 248 EXPECT(is_valid); | 248 EXPECT(is_valid); |
| 249 EXPECT(!memcmp(expected, dst, sizeof(expected))); | 249 EXPECT(!memcmp(expected, dst, sizeof(expected))); |
| 250 } | 250 } |
| 251 | 251 |
| 252 // 2.3.2 - U-0000E000 = ee 80 80 = "\xEE\x80\x80" | 252 // 2.3.2 - U-0000E000 = ee 80 80 = "\xEE\x80\x80" |
| 253 { | 253 { |
| 254 const char* src = "\xEE\x80\x80"; | 254 const char* src = "\xEE\x80\x80"; |
| 255 uint32_t expected[] = { 0xE000 }; | 255 uint32_t expected[] = { 0xE000 }; |
| 256 uint32_t dst[ARRAY_SIZE(expected)]; | 256 uint32_t dst[ARRAY_SIZE(expected)]; |
| 257 memset(dst, 0, sizeof(dst)); | 257 memset(dst, 0, sizeof(dst)); |
| 258 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); | 258 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
| 259 EXPECT(is_valid); | 259 EXPECT(is_valid); |
| 260 EXPECT(!memcmp(expected, dst, sizeof(expected))); | 260 EXPECT(!memcmp(expected, dst, sizeof(expected))); |
| 261 } | 261 } |
| 262 | 262 |
| 263 // 2.3.3 - U-0000FFFD = ef bf bd = "\xEF\xBF\xBD" | 263 // 2.3.3 - U-0000FFFD = ef bf bd = "\xEF\xBF\xBD" |
| 264 { | 264 { |
| 265 const char* src = "\xEF\xBF\xBD"; | 265 const char* src = "\xEF\xBF\xBD"; |
| 266 uint32_t expected[] = { 0xFFFD }; | 266 uint32_t expected[] = { 0xFFFD }; |
| 267 uint32_t dst[ARRAY_SIZE(expected)]; | 267 uint32_t dst[ARRAY_SIZE(expected)]; |
| 268 memset(dst, 0, sizeof(dst)); | 268 memset(dst, 0, sizeof(dst)); |
| 269 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); | 269 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
| 270 EXPECT(is_valid); | 270 EXPECT(is_valid); |
| 271 EXPECT(!memcmp(expected, dst, sizeof(expected))); | 271 EXPECT(!memcmp(expected, dst, sizeof(expected))); |
| 272 } | 272 } |
| 273 | 273 |
| 274 // 2.3.4 - U-0010FFFF = f4 8f bf bf = "\xF4\x8F\xBF\xBF" | 274 // 2.3.4 - U-0010FFFF = f4 8f bf bf = "\xF4\x8F\xBF\xBF" |
| 275 { | 275 { |
| 276 const char* src = "\xF4\x8F\xBF\xBF"; | 276 const char* src = "\xF4\x8F\xBF\xBF"; |
| 277 uint32_t expected[] = { 0x10FFFF }; | 277 uint32_t expected[] = { 0x10FFFF }; |
| 278 uint32_t dst[ARRAY_SIZE(expected)]; | 278 uint32_t dst[ARRAY_SIZE(expected)]; |
| 279 memset(dst, 0, sizeof(dst)); | 279 memset(dst, 0, sizeof(dst)); |
| 280 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); | 280 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
| 281 EXPECT(is_valid); | 281 EXPECT(is_valid); |
| 282 EXPECT(!memcmp(expected, dst, sizeof(expected))); | 282 EXPECT(!memcmp(expected, dst, sizeof(expected))); |
| 283 } | 283 } |
| 284 | 284 |
| 285 // 2.3.5 - U-00110000 = f4 90 80 80 = "\xF4\x90\x80\x80" | 285 // 2.3.5 - U-00110000 = f4 90 80 80 = "\xF4\x90\x80\x80" |
| 286 { | 286 { |
| 287 const char* src = "\xF4\x90\x80\x80"; | 287 const char* src = "\xF4\x90\x80\x80"; |
| 288 uint32_t expected[] = { 0x110000 }; | 288 uint32_t expected[] = { 0x110000 }; |
| 289 uint32_t dst[ARRAY_SIZE(expected)]; | 289 uint32_t dst[ARRAY_SIZE(expected)]; |
| 290 memset(dst, 0, sizeof(dst)); | 290 memset(dst, 0, sizeof(dst)); |
| 291 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); | 291 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
| 292 EXPECT(!is_valid); | 292 EXPECT(!is_valid); |
| 293 EXPECT(memcmp(expected, dst, sizeof(expected))); | 293 EXPECT(memcmp(expected, dst, sizeof(expected))); |
| 294 } | 294 } |
| 295 | 295 |
| 296 // 3 - Malformed sequences | 296 // 3 - Malformed sequences |
| 297 | 297 |
| 298 // 3.1 - Unexpected continuation bytes | 298 // 3.1 - Unexpected continuation bytes |
| 299 | 299 |
| 300 // 3.1.1 - First continuation byte 0x80: "\x80" | 300 // 3.1.1 - First continuation byte 0x80: "\x80" |
| 301 { | 301 { |
| 302 const char* src = "\x80"; | 302 const char* src = "\x80"; |
| 303 uint32_t expected[] = { 0x80 }; | 303 uint32_t expected[] = { 0x80 }; |
| 304 uint32_t dst[ARRAY_SIZE(expected)]; | 304 uint32_t dst[ARRAY_SIZE(expected)]; |
| 305 memset(dst, 0, sizeof(dst)); | 305 memset(dst, 0, sizeof(dst)); |
| 306 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); | 306 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
| 307 EXPECT(!is_valid); | 307 EXPECT(!is_valid); |
| 308 EXPECT(memcmp(expected, dst, sizeof(expected))); | 308 EXPECT(memcmp(expected, dst, sizeof(expected))); |
| 309 } | 309 } |
| 310 | 310 |
| 311 // 3.1.2 - Last continuation byte 0xbf: "\xBF" | 311 // 3.1.2 - Last continuation byte 0xbf: "\xBF" |
| 312 { | 312 { |
| 313 const char* src = "\xBF"; | 313 const char* src = "\xBF"; |
| 314 uint32_t expected[] = { 0xBF }; | 314 uint32_t expected[] = { 0xBF }; |
| 315 uint32_t dst[ARRAY_SIZE(expected)]; | 315 uint32_t dst[ARRAY_SIZE(expected)]; |
| 316 memset(dst, 0, sizeof(dst)); | 316 memset(dst, 0, sizeof(dst)); |
| 317 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); | 317 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
| 318 EXPECT(!is_valid); | 318 EXPECT(!is_valid); |
| 319 EXPECT(memcmp(expected, dst, sizeof(expected))); | 319 EXPECT(memcmp(expected, dst, sizeof(expected))); |
| 320 } | 320 } |
| 321 | 321 |
| 322 // 3.1.3 - 2 continuation bytes: "\x80\xBF" | 322 // 3.1.3 - 2 continuation bytes: "\x80\xBF" |
| 323 { | 323 { |
| 324 const char* src = "\x80\xBF"; | 324 const char* src = "\x80\xBF"; |
| 325 uint32_t expected[] = { 0x80, 0xBF }; | 325 uint32_t expected[] = { 0x80, 0xBF }; |
| 326 uint32_t dst[ARRAY_SIZE(expected)]; | 326 uint32_t dst[ARRAY_SIZE(expected)]; |
| 327 memset(dst, 0, sizeof(dst)); | 327 memset(dst, 0, sizeof(dst)); |
| 328 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); | 328 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
| 329 EXPECT(!is_valid); | 329 EXPECT(!is_valid); |
| 330 EXPECT(memcmp(expected, dst, sizeof(expected))); | 330 EXPECT(memcmp(expected, dst, sizeof(expected))); |
| 331 } | 331 } |
| 332 | 332 |
| 333 // 3.1.4 - 3 continuation bytes: "\x80\xBF\x80" | 333 // 3.1.4 - 3 continuation bytes: "\x80\xBF\x80" |
| 334 { | 334 { |
| 335 const char* src = "\x80\xBF\x80"; | 335 const char* src = "\x80\xBF\x80"; |
| 336 uint32_t expected[] = { 0x80, 0xBF, 0x80 }; | 336 uint32_t expected[] = { 0x80, 0xBF, 0x80 }; |
| 337 uint32_t dst[ARRAY_SIZE(expected)]; | 337 uint32_t dst[ARRAY_SIZE(expected)]; |
| 338 memset(dst, 0, sizeof(dst)); | 338 memset(dst, 0, sizeof(dst)); |
| 339 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); | 339 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
| 340 EXPECT(!is_valid); | 340 EXPECT(!is_valid); |
| 341 EXPECT(memcmp(expected, dst, sizeof(expected))); | 341 EXPECT(memcmp(expected, dst, sizeof(expected))); |
| 342 } | 342 } |
| 343 | 343 |
| 344 // 3.1.5 - 4 continuation bytes: "\x80\xBF\x80\xBF" | 344 // 3.1.5 - 4 continuation bytes: "\x80\xBF\x80\xBF" |
| 345 { | 345 { |
| 346 const char* src = "\x80\xBF\x80\xBF"; | 346 const char* src = "\x80\xBF\x80\xBF"; |
| 347 uint32_t expected[] = { 0x80, 0xBF, 0x80, 0xBF }; | 347 uint32_t expected[] = { 0x80, 0xBF, 0x80, 0xBF }; |
| 348 uint32_t dst[ARRAY_SIZE(expected)]; | 348 uint32_t dst[ARRAY_SIZE(expected)]; |
| 349 memset(dst, 0, sizeof(dst)); | 349 memset(dst, 0, sizeof(dst)); |
| 350 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); | 350 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
| 351 EXPECT(!is_valid); | 351 EXPECT(!is_valid); |
| 352 EXPECT(memcmp(expected, dst, sizeof(expected))); | 352 EXPECT(memcmp(expected, dst, sizeof(expected))); |
| 353 } | 353 } |
| 354 | 354 |
| 355 // 3.1.6 - 5 continuation bytes: "\x80\xBF\x80\xBF\x80" | 355 // 3.1.6 - 5 continuation bytes: "\x80\xBF\x80\xBF\x80" |
| 356 { | 356 { |
| 357 const char* src = "\x80\xBF\x80\xBF\x80"; | 357 const char* src = "\x80\xBF\x80\xBF\x80"; |
| 358 uint32_t expected[] = { 0x80, 0xBF, 0x80, 0xBF, 0x80 }; | 358 uint32_t expected[] = { 0x80, 0xBF, 0x80, 0xBF, 0x80 }; |
| 359 uint32_t dst[ARRAY_SIZE(expected)]; | 359 uint32_t dst[ARRAY_SIZE(expected)]; |
| 360 memset(dst, 0, sizeof(dst)); | 360 memset(dst, 0, sizeof(dst)); |
| 361 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); | 361 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
| 362 EXPECT(!is_valid); | 362 EXPECT(!is_valid); |
| 363 EXPECT(memcmp(expected, dst, sizeof(expected))); | 363 EXPECT(memcmp(expected, dst, sizeof(expected))); |
| 364 } | 364 } |
| 365 | 365 |
| 366 // 3.1.7 - 6 continuation bytes: "\x80\xBF\x80\xBF\x80\xBF" | 366 // 3.1.7 - 6 continuation bytes: "\x80\xBF\x80\xBF\x80\xBF" |
| 367 { | 367 { |
| 368 const char* src = "\x80\xBF\x80\xBF\x80\xBF"; | 368 const char* src = "\x80\xBF\x80\xBF\x80\xBF"; |
| 369 uint32_t expected[] = { 0x80, 0xBF, 0x80, 0xBF, 0x80, 0xBF }; | 369 uint32_t expected[] = { 0x80, 0xBF, 0x80, 0xBF, 0x80, 0xBF }; |
| 370 uint32_t dst[ARRAY_SIZE(expected)]; | 370 uint32_t dst[ARRAY_SIZE(expected)]; |
| 371 memset(dst, 0, sizeof(dst)); | 371 memset(dst, 0, sizeof(dst)); |
| 372 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); | 372 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
| 373 EXPECT(!is_valid); | 373 EXPECT(!is_valid); |
| 374 EXPECT(memcmp(expected, dst, sizeof(expected))); | 374 EXPECT(memcmp(expected, dst, sizeof(expected))); |
| 375 } | 375 } |
| 376 | 376 |
| 377 // 3.1.8 - 7 continuation bytes: "\x80\xBF\x80\xBF\x80\xBF\x80" | 377 // 3.1.8 - 7 continuation bytes: "\x80\xBF\x80\xBF\x80\xBF\x80" |
| 378 { | 378 { |
| 379 const char* src = "\x80\xBF\x80\xBF\x80\xBF\x80"; | 379 const char* src = "\x80\xBF\x80\xBF\x80\xBF\x80"; |
| 380 uint32_t expected[] = { 0x80, 0xBF, 0x80, 0xBF, 0x80, 0xBF, 0x80 }; | 380 uint32_t expected[] = { 0x80, 0xBF, 0x80, 0xBF, 0x80, 0xBF, 0x80 }; |
| 381 uint32_t dst[ARRAY_SIZE(expected)]; | 381 uint32_t dst[ARRAY_SIZE(expected)]; |
| 382 memset(dst, 0, sizeof(dst)); | 382 memset(dst, 0, sizeof(dst)); |
| 383 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); | 383 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
| 384 EXPECT(!is_valid); | 384 EXPECT(!is_valid); |
| 385 EXPECT(memcmp(expected, dst, sizeof(expected))); | 385 EXPECT(memcmp(expected, dst, sizeof(expected))); |
| 386 } | 386 } |
| 387 | 387 |
| 388 // 3.1.9 - Sequence of all 64 possible continuation bytes (0x80-0xbf): | 388 // 3.1.9 - Sequence of all 64 possible continuation bytes (0x80-0xbf): |
| 389 { | 389 { |
| 390 const char* src = "\x80\x81\x82\x83\x84\x85\x86\x87" | 390 const char* src = "\x80\x81\x82\x83\x84\x85\x86\x87" |
| 391 "\x88\x89\x8A\x8B\x8C\x8D\x8E\x8F" | 391 "\x88\x89\x8A\x8B\x8C\x8D\x8E\x8F" |
| 392 "\x90\x91\x92\x93\x94\x95\x96\x97" | 392 "\x90\x91\x92\x93\x94\x95\x96\x97" |
| 393 "\x98\x99\x9A\x9B\x9C\x9D\x9E\x9F" | 393 "\x98\x99\x9A\x9B\x9C\x9D\x9E\x9F" |
| 394 "\xA0\xA1\xA2\xA3\xA4\xA5\xA6\xA7" | 394 "\xA0\xA1\xA2\xA3\xA4\xA5\xA6\xA7" |
| 395 "\xA8\xA9\xAA\xAB\xAC\xAD\xAE\xAF" | 395 "\xA8\xA9\xAA\xAB\xAC\xAD\xAE\xAF" |
| 396 "\xB0\xB1\xB2\xB3\xB4\xB5\xB6\xB7" | 396 "\xB0\xB1\xB2\xB3\xB4\xB5\xB6\xB7" |
| 397 "\xB8\xB9\xBA\xBB\xBC\xBD\xBE\xBF"; | 397 "\xB8\xB9\xBA\xBB\xBC\xBD\xBE\xBF"; |
| 398 uint32_t expected[] = { 0x0 }; | 398 uint32_t expected[] = { 0x0 }; |
| 399 uint32_t dst[ARRAY_SIZE(expected)]; | 399 uint32_t dst[ARRAY_SIZE(expected)]; |
| 400 for (size_t i = 0; i < strlen(src); ++i) { | 400 for (size_t i = 0; i < strlen(src); ++i) { |
| 401 memset(dst, 0xFF, sizeof(dst)); | 401 memset(dst, 0xFF, sizeof(dst)); |
| 402 bool is_valid = Utf8::Decode(&src[i], dst, ARRAY_SIZE(dst)); | 402 bool is_valid = Utf8::DecodeCStringToUTF32(&src[i], dst, ARRAY_SIZE(dst)); |
| 403 EXPECT(!is_valid); | 403 EXPECT(!is_valid); |
| 404 EXPECT(memcmp(expected, dst, sizeof(expected))); | 404 EXPECT(memcmp(expected, dst, sizeof(expected))); |
| 405 } | 405 } |
| 406 } | 406 } |
| 407 | 407 |
| 408 // 3.2 - Lonely start character | 408 // 3.2 - Lonely start character |
| 409 | 409 |
| 410 // 3.2.1 - All 32 first bytes of 2-byte sequences (0xc0-0xdf), each | 410 // 3.2.1 - All 32 first bytes of 2-byte sequences (0xc0-0xdf), each |
| 411 // followed by a space character: | 411 // followed by a space character: |
| 412 { | 412 { |
| 413 const char* src = "\xC0\x20\xC1\x20\xC2\x20\xC3\x20" | 413 const char* src = "\xC0\x20\xC1\x20\xC2\x20\xC3\x20" |
| 414 "\xC4\x20\xC5\x20\xC6\x20\xC7\x20" | 414 "\xC4\x20\xC5\x20\xC6\x20\xC7\x20" |
| 415 "\xC8\x20\xC9\x20\xCA\x20\xCB\x20" | 415 "\xC8\x20\xC9\x20\xCA\x20\xCB\x20" |
| 416 "\xCC\x20\xCD\x20\xCE\x20\xCF\x20" | 416 "\xCC\x20\xCD\x20\xCE\x20\xCF\x20" |
| 417 "\xD0\x20\xD1\x20\xD2\x20\xD3\x20" | 417 "\xD0\x20\xD1\x20\xD2\x20\xD3\x20" |
| 418 "\xD4\x20\xD5\x20\xD6\x20\xD7\x20" | 418 "\xD4\x20\xD5\x20\xD6\x20\xD7\x20" |
| 419 "\xD8\x20\xD9\x20\xDA\x20\xDB\x20" | 419 "\xD8\x20\xD9\x20\xDA\x20\xDB\x20" |
| 420 "\xDC\x20\xDD\x20\xDE\x20\xDF\x20"; | 420 "\xDC\x20\xDD\x20\xDE\x20\xDF\x20"; |
| 421 uint32_t expected[] = { 0x0 }; | 421 uint32_t expected[] = { 0x0 }; |
| 422 uint32_t dst[ARRAY_SIZE(expected)]; | 422 uint32_t dst[ARRAY_SIZE(expected)]; |
| 423 for (size_t i = 0; i < strlen(src); i += 2) { | 423 for (size_t i = 0; i < strlen(src); i += 2) { |
| 424 memset(dst, 0xFF, sizeof(dst)); | 424 memset(dst, 0xFF, sizeof(dst)); |
| 425 bool is_valid = Utf8::Decode(&src[i], dst, ARRAY_SIZE(dst)); | 425 bool is_valid = Utf8::DecodeCStringToUTF32(&src[i], dst, ARRAY_SIZE(dst)); |
| 426 EXPECT(!is_valid); | 426 EXPECT(!is_valid); |
| 427 EXPECT(memcmp(expected, dst, sizeof(expected))); | 427 EXPECT(memcmp(expected, dst, sizeof(expected))); |
| 428 } | 428 } |
| 429 } | 429 } |
| 430 | 430 |
| 431 // 3.2.2 - All 16 first bytes of 3-byte sequences (0xe0-0xef), each | 431 // 3.2.2 - All 16 first bytes of 3-byte sequences (0xe0-0xef), each |
| 432 // followed by a space character: | 432 // followed by a space character: |
| 433 { | 433 { |
| 434 const char* src = "\xE0\x20\xE1\x20\xE2\x20\xE3\x20" | 434 const char* src = "\xE0\x20\xE1\x20\xE2\x20\xE3\x20" |
| 435 "\xE4\x20\xE5\x20\xE6\x20\xE7\x20" | 435 "\xE4\x20\xE5\x20\xE6\x20\xE7\x20" |
| 436 "\xE8\x20\xE9\x20\xEA\x20\xEB\x20" | 436 "\xE8\x20\xE9\x20\xEA\x20\xEB\x20" |
| 437 "\xEC\x20\xED\x20\xEE\x20\xEF\x20"; | 437 "\xEC\x20\xED\x20\xEE\x20\xEF\x20"; |
| 438 uint32_t expected[] = { 0x0 }; | 438 uint32_t expected[] = { 0x0 }; |
| 439 uint32_t dst[ARRAY_SIZE(expected)]; | 439 uint32_t dst[ARRAY_SIZE(expected)]; |
| 440 for (size_t i = 0; i < strlen(src); i += 2) { | 440 for (size_t i = 0; i < strlen(src); i += 2) { |
| 441 memset(dst, 0xFF, sizeof(dst)); | 441 memset(dst, 0xFF, sizeof(dst)); |
| 442 bool is_valid = Utf8::Decode(&src[i], dst, ARRAY_SIZE(dst)); | 442 bool is_valid = Utf8::DecodeCStringToUTF32(&src[i], dst, ARRAY_SIZE(dst)); |
| 443 EXPECT(!is_valid); | 443 EXPECT(!is_valid); |
| 444 EXPECT(memcmp(expected, dst, sizeof(expected))); | 444 EXPECT(memcmp(expected, dst, sizeof(expected))); |
| 445 } | 445 } |
| 446 } | 446 } |
| 447 | 447 |
| 448 // 3.2.3 - All 8 first bytes of 4-byte sequences (0xf0-0xf7), each | 448 // 3.2.3 - All 8 first bytes of 4-byte sequences (0xf0-0xf7), each |
| 449 // followed by a space character: | 449 // followed by a space character: |
| 450 { | 450 { |
| 451 const char* src = "\xF0\x20\xF1\x20\xF2\x20\xF3\x20" | 451 const char* src = "\xF0\x20\xF1\x20\xF2\x20\xF3\x20" |
| 452 "\xF4\x20\xF5\x20\xF6\x20\xF7\x20"; | 452 "\xF4\x20\xF5\x20\xF6\x20\xF7\x20"; |
| 453 uint32_t expected[] = { 0x0 }; | 453 uint32_t expected[] = { 0x0 }; |
| 454 uint32_t dst[ARRAY_SIZE(expected)]; | 454 uint32_t dst[ARRAY_SIZE(expected)]; |
| 455 for (size_t i = 0; i < strlen(src); i += 2) { | 455 for (size_t i = 0; i < strlen(src); i += 2) { |
| 456 memset(dst, 0xFF, sizeof(dst)); | 456 memset(dst, 0xFF, sizeof(dst)); |
| 457 bool is_valid = Utf8::Decode(&src[i], dst, ARRAY_SIZE(dst)); | 457 bool is_valid = Utf8::DecodeCStringToUTF32(&src[i], dst, ARRAY_SIZE(dst)); |
| 458 EXPECT(!is_valid); | 458 EXPECT(!is_valid); |
| 459 EXPECT(memcmp(expected, dst, sizeof(expected))); | 459 EXPECT(memcmp(expected, dst, sizeof(expected))); |
| 460 } | 460 } |
| 461 } | 461 } |
| 462 | 462 |
| 463 // 3.2.4 - All 4 first bytes of 5-byte sequences (0xf8-0xfb), each | 463 // 3.2.4 - All 4 first bytes of 5-byte sequences (0xf8-0xfb), each |
| 464 // followed by a space character: | 464 // followed by a space character: |
| 465 { | 465 { |
| 466 const char* src = "\xF8\x20\xF9\x20\xFA\x20\xFB\x20"; | 466 const char* src = "\xF8\x20\xF9\x20\xFA\x20\xFB\x20"; |
| 467 uint32_t expected[] = { 0x0 }; | 467 uint32_t expected[] = { 0x0 }; |
| 468 uint32_t dst[ARRAY_SIZE(expected)]; | 468 uint32_t dst[ARRAY_SIZE(expected)]; |
| 469 for (size_t i = 0; i < strlen(src); i += 2) { | 469 for (size_t i = 0; i < strlen(src); i += 2) { |
| 470 memset(dst, 0xFF, sizeof(dst)); | 470 memset(dst, 0xFF, sizeof(dst)); |
| 471 bool is_valid = Utf8::Decode(&src[i], dst, ARRAY_SIZE(dst)); | 471 bool is_valid = Utf8::DecodeCStringToUTF32(&src[i], dst, ARRAY_SIZE(dst)); |
| 472 EXPECT(!is_valid); | 472 EXPECT(!is_valid); |
| 473 EXPECT(memcmp(expected, dst, sizeof(expected))); | 473 EXPECT(memcmp(expected, dst, sizeof(expected))); |
| 474 } | 474 } |
| 475 } | 475 } |
| 476 | 476 |
| 477 // 3.2.5 - All 2 first bytes of 6-byte sequences (0xfc-0xfd), each | 477 // 3.2.5 - All 2 first bytes of 6-byte sequences (0xfc-0xfd), each |
| 478 // followed by a space character: | 478 // followed by a space character: |
| 479 { | 479 { |
| 480 const char* src = "\xFC\x20\xFD\x20"; | 480 const char* src = "\xFC\x20\xFD\x20"; |
| 481 uint32_t expected[] = { 0x0 }; | 481 uint32_t expected[] = { 0x0 }; |
| 482 uint32_t dst[ARRAY_SIZE(expected)]; | 482 uint32_t dst[ARRAY_SIZE(expected)]; |
| 483 for (size_t i = 0; i < strlen(src); i += 2) { | 483 for (size_t i = 0; i < strlen(src); i += 2) { |
| 484 memset(dst, 0xFF, sizeof(dst)); | 484 memset(dst, 0xFF, sizeof(dst)); |
| 485 bool is_valid = Utf8::Decode(&src[i], dst, ARRAY_SIZE(dst)); | 485 bool is_valid = Utf8::DecodeCStringToUTF32(&src[i], dst, ARRAY_SIZE(dst)); |
| 486 EXPECT(!is_valid); | 486 EXPECT(!is_valid); |
| 487 EXPECT(memcmp(expected, dst, sizeof(expected))); | 487 EXPECT(memcmp(expected, dst, sizeof(expected))); |
| 488 } | 488 } |
| 489 } | 489 } |
| 490 | 490 |
| 491 // 3.3 - Sequences with last continuation byte missing | 491 // 3.3 - Sequences with last continuation byte missing |
| 492 | 492 |
| 493 // 3.3.1 - 2-byte sequence with last byte missing (U+0000): "\xC0" | 493 // 3.3.1 - 2-byte sequence with last byte missing (U+0000): "\xC0" |
| 494 { | 494 { |
| 495 const char* src = "\xC0"; | 495 const char* src = "\xC0"; |
| 496 uint32_t expected[] = { 0x0 }; | 496 uint32_t expected[] = { 0x0 }; |
| 497 uint32_t dst[ARRAY_SIZE(expected)]; | 497 uint32_t dst[ARRAY_SIZE(expected)]; |
| 498 memset(dst, 0xFF, sizeof(dst)); | 498 memset(dst, 0xFF, sizeof(dst)); |
| 499 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); | 499 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
| 500 EXPECT(!is_valid); | 500 EXPECT(!is_valid); |
| 501 EXPECT(memcmp(expected, dst, sizeof(expected))); | 501 EXPECT(memcmp(expected, dst, sizeof(expected))); |
| 502 } | 502 } |
| 503 | 503 |
| 504 // 3.3.2 - 3-byte sequence with last byte missing (U+0000): "\xE0\x80" | 504 // 3.3.2 - 3-byte sequence with last byte missing (U+0000): "\xE0\x80" |
| 505 { | 505 { |
| 506 const char* src = "\xE0\x80"; | 506 const char* src = "\xE0\x80"; |
| 507 uint32_t expected[] = { 0x0 }; | 507 uint32_t expected[] = { 0x0 }; |
| 508 uint32_t dst[ARRAY_SIZE(expected)]; | 508 uint32_t dst[ARRAY_SIZE(expected)]; |
| 509 memset(dst, 0xFF, sizeof(dst)); | 509 memset(dst, 0xFF, sizeof(dst)); |
| 510 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); | 510 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
| 511 EXPECT(!is_valid); | 511 EXPECT(!is_valid); |
| 512 EXPECT(memcmp(expected, dst, sizeof(expected))); | 512 EXPECT(memcmp(expected, dst, sizeof(expected))); |
| 513 } | 513 } |
| 514 | 514 |
| 515 // 3.3.3 - 4-byte sequence with last byte missing (U+0000): "\xF0\x80\x80" | 515 // 3.3.3 - 4-byte sequence with last byte missing (U+0000): "\xF0\x80\x80" |
| 516 { | 516 { |
| 517 const char* src = "\xF0\x80\x80"; | 517 const char* src = "\xF0\x80\x80"; |
| 518 uint32_t expected[] = { 0x0 }; | 518 uint32_t expected[] = { 0x0 }; |
| 519 uint32_t dst[ARRAY_SIZE(expected)]; | 519 uint32_t dst[ARRAY_SIZE(expected)]; |
| 520 memset(dst, 0xFF, sizeof(dst)); | 520 memset(dst, 0xFF, sizeof(dst)); |
| 521 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); | 521 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
| 522 EXPECT(!is_valid); | 522 EXPECT(!is_valid); |
| 523 EXPECT(memcmp(expected, dst, sizeof(expected))); | 523 EXPECT(memcmp(expected, dst, sizeof(expected))); |
| 524 } | 524 } |
| 525 | 525 |
| 526 // 3.3.4 - 5-byte sequence with last byte missing (U+0000): "\xF8\x80\x80\x80" | 526 // 3.3.4 - 5-byte sequence with last byte missing (U+0000): "\xF8\x80\x80\x80" |
| 527 { | 527 { |
| 528 const char* src = "\xF8\x80\x80\x80"; | 528 const char* src = "\xF8\x80\x80\x80"; |
| 529 uint32_t expected[] = { 0x0 }; | 529 uint32_t expected[] = { 0x0 }; |
| 530 uint32_t dst[ARRAY_SIZE(expected)]; | 530 uint32_t dst[ARRAY_SIZE(expected)]; |
| 531 memset(dst, 0xFF, sizeof(dst)); | 531 memset(dst, 0xFF, sizeof(dst)); |
| 532 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); | 532 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
| 533 EXPECT(!is_valid); | 533 EXPECT(!is_valid); |
| 534 EXPECT(memcmp(expected, dst, sizeof(expected))); | 534 EXPECT(memcmp(expected, dst, sizeof(expected))); |
| 535 } | 535 } |
| 536 | 536 |
| 537 // 3.3.5 - 6-byte sequence with last byte missing (U+0000): | 537 // 3.3.5 - 6-byte sequence with last byte missing (U+0000): |
| 538 // "\xFC\x80\x80\x80\x80" | 538 // "\xFC\x80\x80\x80\x80" |
| 539 { | 539 { |
| 540 const char* src = "\xFC\x80\x80\x80\x80"; | 540 const char* src = "\xFC\x80\x80\x80\x80"; |
| 541 uint32_t expected[] = { 0x0 }; | 541 uint32_t expected[] = { 0x0 }; |
| 542 uint32_t dst[ARRAY_SIZE(expected)]; | 542 uint32_t dst[ARRAY_SIZE(expected)]; |
| 543 memset(dst, 0xFF, sizeof(dst)); | 543 memset(dst, 0xFF, sizeof(dst)); |
| 544 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); | 544 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
| 545 EXPECT(!is_valid); | 545 EXPECT(!is_valid); |
| 546 EXPECT(memcmp(expected, dst, sizeof(expected))); | 546 EXPECT(memcmp(expected, dst, sizeof(expected))); |
| 547 } | 547 } |
| 548 | 548 |
| 549 // 3.3.6 - 2-byte sequence with last byte missing (U-000007FF): "\xDF" | 549 // 3.3.6 - 2-byte sequence with last byte missing (U-000007FF): "\xDF" |
| 550 { | 550 { |
| 551 const char* src = "\xDF"; | 551 const char* src = "\xDF"; |
| 552 uint32_t expected[] = { 0x0 }; | 552 uint32_t expected[] = { 0x0 }; |
| 553 uint32_t dst[ARRAY_SIZE(expected)]; | 553 uint32_t dst[ARRAY_SIZE(expected)]; |
| 554 memset(dst, 0xFF, sizeof(dst)); | 554 memset(dst, 0xFF, sizeof(dst)); |
| 555 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); | 555 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
| 556 EXPECT(!is_valid); | 556 EXPECT(!is_valid); |
| 557 EXPECT(memcmp(expected, dst, sizeof(expected))); | 557 EXPECT(memcmp(expected, dst, sizeof(expected))); |
| 558 } | 558 } |
| 559 | 559 |
| 560 // 3.3.7 - 3-byte sequence with last byte missing (U-0000FFFF): "\xEF\xBF" | 560 // 3.3.7 - 3-byte sequence with last byte missing (U-0000FFFF): "\xEF\xBF" |
| 561 { | 561 { |
| 562 const char* src = "\xEF\xBF"; | 562 const char* src = "\xEF\xBF"; |
| 563 uint32_t expected[] = { 0x0 }; | 563 uint32_t expected[] = { 0x0 }; |
| 564 uint32_t dst[ARRAY_SIZE(expected)]; | 564 uint32_t dst[ARRAY_SIZE(expected)]; |
| 565 memset(dst, 0xFF, sizeof(dst)); | 565 memset(dst, 0xFF, sizeof(dst)); |
| 566 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); | 566 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
| 567 EXPECT(!is_valid); | 567 EXPECT(!is_valid); |
| 568 EXPECT(memcmp(expected, dst, sizeof(expected))); | 568 EXPECT(memcmp(expected, dst, sizeof(expected))); |
| 569 } | 569 } |
| 570 | 570 |
| 571 // 3.3.8 - 4-byte sequence with last byte missing (U-001FFFFF): "\xF7\xBF\xBF" | 571 // 3.3.8 - 4-byte sequence with last byte missing (U-001FFFFF): "\xF7\xBF\xBF" |
| 572 { | 572 { |
| 573 const char* src = "\xF7\xBF\xBF"; | 573 const char* src = "\xF7\xBF\xBF"; |
| 574 uint32_t expected[] = { 0x0 }; | 574 uint32_t expected[] = { 0x0 }; |
| 575 uint32_t dst[ARRAY_SIZE(expected)]; | 575 uint32_t dst[ARRAY_SIZE(expected)]; |
| 576 memset(dst, 0xFF, sizeof(dst)); | 576 memset(dst, 0xFF, sizeof(dst)); |
| 577 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); | 577 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
| 578 EXPECT(!is_valid); | 578 EXPECT(!is_valid); |
| 579 EXPECT(memcmp(expected, dst, sizeof(expected))); | 579 EXPECT(memcmp(expected, dst, sizeof(expected))); |
| 580 } | 580 } |
| 581 | 581 |
| 582 // 3.3.9 - 5-byte sequence with last byte missing (U-03FFFFFF): | 582 // 3.3.9 - 5-byte sequence with last byte missing (U-03FFFFFF): |
| 583 // "\xFB\xBF\xBF\xBF" | 583 // "\xFB\xBF\xBF\xBF" |
| 584 { | 584 { |
| 585 const char* src = "\xFB\xBF\xBF\xBF"; | 585 const char* src = "\xFB\xBF\xBF\xBF"; |
| 586 uint32_t expected[] = { 0x0 }; | 586 uint32_t expected[] = { 0x0 }; |
| 587 uint32_t dst[ARRAY_SIZE(expected)]; | 587 uint32_t dst[ARRAY_SIZE(expected)]; |
| 588 memset(dst, 0xFF, sizeof(dst)); | 588 memset(dst, 0xFF, sizeof(dst)); |
| 589 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); | 589 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
| 590 EXPECT(!is_valid); | 590 EXPECT(!is_valid); |
| 591 EXPECT(memcmp(expected, dst, sizeof(expected))); | 591 EXPECT(memcmp(expected, dst, sizeof(expected))); |
| 592 } | 592 } |
| 593 | 593 |
| 594 // 3.3.10 - 6-byte sequence with last byte missing (U-7FFFFFFF): | 594 // 3.3.10 - 6-byte sequence with last byte missing (U-7FFFFFFF): |
| 595 // "\xFD\xBF\xBF\xBF\xBF" | 595 // "\xFD\xBF\xBF\xBF\xBF" |
| 596 { | 596 { |
| 597 const char* src = "\xFD\xBF\xBF\xBF\xBF"; | 597 const char* src = "\xFD\xBF\xBF\xBF\xBF"; |
| 598 uint32_t expected[] = { 0x0 }; | 598 uint32_t expected[] = { 0x0 }; |
| 599 uint32_t dst[ARRAY_SIZE(expected)]; | 599 uint32_t dst[ARRAY_SIZE(expected)]; |
| 600 memset(dst, 0xFF, sizeof(dst)); | 600 memset(dst, 0xFF, sizeof(dst)); |
| 601 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); | 601 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
| 602 EXPECT(!is_valid); | 602 EXPECT(!is_valid); |
| 603 EXPECT(memcmp(expected, dst, sizeof(expected))); | 603 EXPECT(memcmp(expected, dst, sizeof(expected))); |
| 604 } | 604 } |
| 605 | 605 |
| 606 // 3.4 - Concatenation of incomplete sequences | 606 // 3.4 - Concatenation of incomplete sequences |
| 607 { | 607 { |
| 608 const char* src = "\xC0\xE0\x80\xF0\x80\x80" | 608 const char* src = "\xC0\xE0\x80\xF0\x80\x80" |
| 609 "\xF8\x80\x80\x80\xFC\x80" | 609 "\xF8\x80\x80\x80\xFC\x80" |
| 610 "\x80\x80\x80\xDF\xEF\xBF" | 610 "\x80\x80\x80\xDF\xEF\xBF" |
| 611 "\xF7\xBF\xBF\xFB\xBF\xBF" | 611 "\xF7\xBF\xBF\xFB\xBF\xBF" |
| 612 "\xBF\xFD\xBF\xBF\xBF\xBF"; | 612 "\xBF\xFD\xBF\xBF\xBF\xBF"; |
| 613 uint32_t expected[] = { 0x0 }; | 613 uint32_t expected[] = { 0x0 }; |
| 614 uint32_t dst[ARRAY_SIZE(expected)]; | 614 uint32_t dst[ARRAY_SIZE(expected)]; |
| 615 for (size_t i = 0; i < strlen(src); ++i) { | 615 for (size_t i = 0; i < strlen(src); ++i) { |
| 616 for (size_t j = 1; j < (strlen(src) - i); ++j) { | 616 for (size_t j = 1; j < (strlen(src) - i); ++j) { |
| 617 memset(dst, 0xFF, sizeof(dst)); | 617 memset(dst, 0xFF, sizeof(dst)); |
| 618 bool is_valid = Utf8::Decode(&src[i], dst, ARRAY_SIZE(dst)); | 618 bool is_valid = Utf8::DecodeCStringToUTF32(&src[i], |
| 619 dst, ARRAY_SIZE(dst)); |
| 619 EXPECT(!is_valid); | 620 EXPECT(!is_valid); |
| 620 EXPECT(memcmp(expected, dst, sizeof(expected))); | 621 EXPECT(memcmp(expected, dst, sizeof(expected))); |
| 621 } | 622 } |
| 622 } | 623 } |
| 623 } | 624 } |
| 624 | 625 |
| 625 // 3.5 - Impossible bytes | 626 // 3.5 - Impossible bytes |
| 626 | 627 |
| 627 // 3.5.1 - fe = "\xFE" | 628 // 3.5.1 - fe = "\xFE" |
| 628 { | 629 { |
| 629 const char* src = "\xFE"; | 630 const char* src = "\xFE"; |
| 630 uint32_t expected[] = { 0xFE }; | 631 uint32_t expected[] = { 0xFE }; |
| 631 uint32_t dst[ARRAY_SIZE(expected)]; | 632 uint32_t dst[ARRAY_SIZE(expected)]; |
| 632 memset(dst, 0, sizeof(dst)); | 633 memset(dst, 0, sizeof(dst)); |
| 633 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); | 634 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
| 634 EXPECT(!is_valid); | 635 EXPECT(!is_valid); |
| 635 EXPECT(memcmp(expected, dst, sizeof(expected))); | 636 EXPECT(memcmp(expected, dst, sizeof(expected))); |
| 636 } | 637 } |
| 637 | 638 |
| 638 // 3.5.2 - ff = "\xFF" | 639 // 3.5.2 - ff = "\xFF" |
| 639 { | 640 { |
| 640 const char* src = "\xFF"; | 641 const char* src = "\xFF"; |
| 641 uint32_t expected[] = { 0xFF }; | 642 uint32_t expected[] = { 0xFF }; |
| 642 uint32_t dst[ARRAY_SIZE(expected)]; | 643 uint32_t dst[ARRAY_SIZE(expected)]; |
| 643 memset(dst, 0, sizeof(dst)); | 644 memset(dst, 0, sizeof(dst)); |
| 644 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); | 645 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
| 645 EXPECT(!is_valid); | 646 EXPECT(!is_valid); |
| 646 EXPECT(memcmp(expected, dst, sizeof(expected))); | 647 EXPECT(memcmp(expected, dst, sizeof(expected))); |
| 647 } | 648 } |
| 648 | 649 |
| 649 // 3.5.3 - fe fe ff ff = "\xFE\xFE\xFF\xFF" | 650 // 3.5.3 - fe fe ff ff = "\xFE\xFE\xFF\xFF" |
| 650 { | 651 { |
| 651 const char* src = "\xFE\xFE\xFF\xFF"; | 652 const char* src = "\xFE\xFE\xFF\xFF"; |
| 652 uint32_t expected[] = { 0xFF }; | 653 uint32_t expected[] = { 0xFF }; |
| 653 uint32_t dst[ARRAY_SIZE(expected)]; | 654 uint32_t dst[ARRAY_SIZE(expected)]; |
| 654 memset(dst, 0, sizeof(dst)); | 655 memset(dst, 0, sizeof(dst)); |
| 655 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); | 656 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
| 656 EXPECT(!is_valid); | 657 EXPECT(!is_valid); |
| 657 EXPECT(memcmp(expected, dst, sizeof(expected))); | 658 EXPECT(memcmp(expected, dst, sizeof(expected))); |
| 658 } | 659 } |
| 659 | 660 |
| 660 // 4 - Overlong sequences | 661 // 4 - Overlong sequences |
| 661 | 662 |
| 662 // 4.1 - Examples of an overlong ASCII character | 663 // 4.1 - Examples of an overlong ASCII character |
| 663 | 664 |
| 664 // 4.1.1 - U+002F = c0 af = "\xC0\xAF" | 665 // 4.1.1 - U+002F = c0 af = "\xC0\xAF" |
| 665 { | 666 { |
| 666 const char* src = "\xC0\xAF"; | 667 const char* src = "\xC0\xAF"; |
| 667 uint32_t expected[] = { 0x2F }; | 668 uint32_t expected[] = { 0x2F }; |
| 668 uint32_t dst[ARRAY_SIZE(expected)]; | 669 uint32_t dst[ARRAY_SIZE(expected)]; |
| 669 memset(dst, 0, sizeof(dst)); | 670 memset(dst, 0, sizeof(dst)); |
| 670 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); | 671 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
| 671 EXPECT(!is_valid); | 672 EXPECT(!is_valid); |
| 672 EXPECT(memcmp(expected, dst, sizeof(expected))); | 673 EXPECT(memcmp(expected, dst, sizeof(expected))); |
| 673 } | 674 } |
| 674 | 675 |
| 675 // 4.1.2 - U+002F = e0 80 af = "\xE0\x80\xAF" | 676 // 4.1.2 - U+002F = e0 80 af = "\xE0\x80\xAF" |
| 676 { | 677 { |
| 677 const char* src = "\xE0\x80\xAF"; | 678 const char* src = "\xE0\x80\xAF"; |
| 678 uint32_t expected[] = { 0x2F }; | 679 uint32_t expected[] = { 0x2F }; |
| 679 uint32_t dst[ARRAY_SIZE(expected)]; | 680 uint32_t dst[ARRAY_SIZE(expected)]; |
| 680 memset(dst, 0, sizeof(dst)); | 681 memset(dst, 0, sizeof(dst)); |
| 681 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); | 682 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
| 682 EXPECT(!is_valid); | 683 EXPECT(!is_valid); |
| 683 EXPECT(memcmp(expected, dst, sizeof(expected))); | 684 EXPECT(memcmp(expected, dst, sizeof(expected))); |
| 684 } | 685 } |
| 685 | 686 |
| 686 // 4.1.3 - U+002F = f0 80 80 af = "\xF0\x80\x80\xAF" | 687 // 4.1.3 - U+002F = f0 80 80 af = "\xF0\x80\x80\xAF" |
| 687 { | 688 { |
| 688 const char* src = "\xF0\x80\x80\xAF"; | 689 const char* src = "\xF0\x80\x80\xAF"; |
| 689 uint32_t expected[] = { 0x2F }; | 690 uint32_t expected[] = { 0x2F }; |
| 690 uint32_t dst[ARRAY_SIZE(expected)]; | 691 uint32_t dst[ARRAY_SIZE(expected)]; |
| 691 memset(dst, 0, sizeof(dst)); | 692 memset(dst, 0, sizeof(dst)); |
| 692 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); | 693 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
| 693 EXPECT(!is_valid); | 694 EXPECT(!is_valid); |
| 694 EXPECT(memcmp(expected, dst, sizeof(expected))); | 695 EXPECT(memcmp(expected, dst, sizeof(expected))); |
| 695 } | 696 } |
| 696 | 697 |
| 697 // 4.1.4 - U+002F = f8 80 80 80 af = "\xF8\x80\x80\x80\xAF" | 698 // 4.1.4 - U+002F = f8 80 80 80 af = "\xF8\x80\x80\x80\xAF" |
| 698 { | 699 { |
| 699 const char* src = "\xF8\x80\x80\x80\xAF"; | 700 const char* src = "\xF8\x80\x80\x80\xAF"; |
| 700 uint32_t expected[] = { 0x2F }; | 701 uint32_t expected[] = { 0x2F }; |
| 701 uint32_t dst[ARRAY_SIZE(expected)]; | 702 uint32_t dst[ARRAY_SIZE(expected)]; |
| 702 memset(dst, 0, sizeof(dst)); | 703 memset(dst, 0, sizeof(dst)); |
| 703 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); | 704 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
| 704 EXPECT(!is_valid); | 705 EXPECT(!is_valid); |
| 705 EXPECT(memcmp(expected, dst, sizeof(expected))); | 706 EXPECT(memcmp(expected, dst, sizeof(expected))); |
| 706 } | 707 } |
| 707 | 708 |
| 708 // 4.1.5 - U+002F = fc 80 80 80 80 af = "\xFC\x80\x80\x80\x80\xAF" | 709 // 4.1.5 - U+002F = fc 80 80 80 80 af = "\xFC\x80\x80\x80\x80\xAF" |
| 709 { | 710 { |
| 710 const char* src = "\xFC\x80\x80\x80\x80\xAF"; | 711 const char* src = "\xFC\x80\x80\x80\x80\xAF"; |
| 711 uint32_t expected[] = { 0x2F }; | 712 uint32_t expected[] = { 0x2F }; |
| 712 uint32_t dst[ARRAY_SIZE(expected)]; | 713 uint32_t dst[ARRAY_SIZE(expected)]; |
| 713 memset(dst, 0, sizeof(dst)); | 714 memset(dst, 0, sizeof(dst)); |
| 714 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); | 715 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
| 715 EXPECT(!is_valid); | 716 EXPECT(!is_valid); |
| 716 EXPECT(memcmp(expected, dst, sizeof(expected))); | 717 EXPECT(memcmp(expected, dst, sizeof(expected))); |
| 717 } | 718 } |
| 718 | 719 |
| 719 // 4.2 Maximum overlong sequences | 720 // 4.2 Maximum overlong sequences |
| 720 | 721 |
| 721 // 4.2.1 - U-0000007F = c1 bf = "\xC1\xBF" | 722 // 4.2.1 - U-0000007F = c1 bf = "\xC1\xBF" |
| 722 { | 723 { |
| 723 const char* src = "\xC1\xBF"; | 724 const char* src = "\xC1\xBF"; |
| 724 uint32_t expected[] = { 0x7F }; | 725 uint32_t expected[] = { 0x7F }; |
| 725 uint32_t dst[ARRAY_SIZE(expected)]; | 726 uint32_t dst[ARRAY_SIZE(expected)]; |
| 726 memset(dst, 0, sizeof(dst)); | 727 memset(dst, 0, sizeof(dst)); |
| 727 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); | 728 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
| 728 EXPECT(!is_valid); | 729 EXPECT(!is_valid); |
| 729 EXPECT(memcmp(expected, dst, sizeof(expected))); | 730 EXPECT(memcmp(expected, dst, sizeof(expected))); |
| 730 } | 731 } |
| 731 | 732 |
| 732 // 4.2.2 U+000007FF = e0 9f bf = "\xE0\x9F\xBF" | 733 // 4.2.2 U+000007FF = e0 9f bf = "\xE0\x9F\xBF" |
| 733 { | 734 { |
| 734 const char* src = "\xE0\x9F\xBF"; | 735 const char* src = "\xE0\x9F\xBF"; |
| 735 uint32_t expected[] = { 0x7FF }; | 736 uint32_t expected[] = { 0x7FF }; |
| 736 uint32_t dst[ARRAY_SIZE(expected)]; | 737 uint32_t dst[ARRAY_SIZE(expected)]; |
| 737 memset(dst, 0, sizeof(dst)); | 738 memset(dst, 0, sizeof(dst)); |
| 738 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); | 739 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
| 739 EXPECT(!is_valid); | 740 EXPECT(!is_valid); |
| 740 EXPECT(memcmp(expected, dst, sizeof(expected))); | 741 EXPECT(memcmp(expected, dst, sizeof(expected))); |
| 741 } | 742 } |
| 742 | 743 |
| 743 // 4.2.3 - U+0000FFFF = f0 8f bf bf = "\xF0\x8F\xBF\xBF" | 744 // 4.2.3 - U+0000FFFF = f0 8f bf bf = "\xF0\x8F\xBF\xBF" |
| 744 { | 745 { |
| 745 const char* src = "\xF0\x8F\xBF\xBF"; | 746 const char* src = "\xF0\x8F\xBF\xBF"; |
| 746 uint32_t expected[] = { 0xFFFF }; | 747 uint32_t expected[] = { 0xFFFF }; |
| 747 uint32_t dst[ARRAY_SIZE(expected)]; | 748 uint32_t dst[ARRAY_SIZE(expected)]; |
| 748 memset(dst, 0, sizeof(dst)); | 749 memset(dst, 0, sizeof(dst)); |
| 749 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); | 750 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
| 750 EXPECT(!is_valid); | 751 EXPECT(!is_valid); |
| 751 EXPECT(memcmp(expected, dst, sizeof(expected))); | 752 EXPECT(memcmp(expected, dst, sizeof(expected))); |
| 752 } | 753 } |
| 753 | 754 |
| 754 // 4.2.4 U-001FFFFF = f8 87 bf bf bf = "\xF8\x87\xBF\xBF\xBF" | 755 // 4.2.4 U-001FFFFF = f8 87 bf bf bf = "\xF8\x87\xBF\xBF\xBF" |
| 755 { | 756 { |
| 756 const char* src = "\xF8\x87\xBF\xBF\xBF"; | 757 const char* src = "\xF8\x87\xBF\xBF\xBF"; |
| 757 uint32_t expected[] = { 0x1FFFFF }; | 758 uint32_t expected[] = { 0x1FFFFF }; |
| 758 uint32_t dst[ARRAY_SIZE(expected)]; | 759 uint32_t dst[ARRAY_SIZE(expected)]; |
| 759 memset(dst, 0, sizeof(dst)); | 760 memset(dst, 0, sizeof(dst)); |
| 760 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); | 761 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
| 761 EXPECT(!is_valid); | 762 EXPECT(!is_valid); |
| 762 EXPECT(memcmp(expected, dst, sizeof(expected))); | 763 EXPECT(memcmp(expected, dst, sizeof(expected))); |
| 763 } | 764 } |
| 764 | 765 |
| 765 // 4.2.5 U-03FFFFFF = fc 83 bf bf bf bf = "\xFC\x83\xBF\xBF\xBF\xBF" | 766 // 4.2.5 U-03FFFFFF = fc 83 bf bf bf bf = "\xFC\x83\xBF\xBF\xBF\xBF" |
| 766 { | 767 { |
| 767 const char* src = "\xFC\x83\xBF\xBF\xBF\xBF"; | 768 const char* src = "\xFC\x83\xBF\xBF\xBF\xBF"; |
| 768 uint32_t expected[] = { 0x3FFFFFF }; | 769 uint32_t expected[] = { 0x3FFFFFF }; |
| 769 uint32_t dst[ARRAY_SIZE(expected)]; | 770 uint32_t dst[ARRAY_SIZE(expected)]; |
| 770 memset(dst, 0, sizeof(dst)); | 771 memset(dst, 0, sizeof(dst)); |
| 771 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); | 772 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
| 772 EXPECT(!is_valid); | 773 EXPECT(!is_valid); |
| 773 EXPECT(memcmp(expected, dst, sizeof(expected))); | 774 EXPECT(memcmp(expected, dst, sizeof(expected))); |
| 774 } | 775 } |
| 775 | 776 |
| 776 // 4.3 - Overlong representation of the NUL character | 777 // 4.3 - Overlong representation of the NUL character |
| 777 | 778 |
| 778 // 4.3.1 - U+0000 = "\xC0\x80" | 779 // 4.3.1 - U+0000 = "\xC0\x80" |
| 779 { | 780 { |
| 780 const char* src = "\xC0\x80"; | 781 const char* src = "\xC0\x80"; |
| 781 uint32_t expected[] = { 0x0 }; | 782 uint32_t expected[] = { 0x0 }; |
| 782 uint32_t dst[ARRAY_SIZE(expected)]; | 783 uint32_t dst[ARRAY_SIZE(expected)]; |
| 783 memset(dst, 0xFF, sizeof(dst)); | 784 memset(dst, 0xFF, sizeof(dst)); |
| 784 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); | 785 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
| 785 EXPECT(!is_valid); | 786 EXPECT(!is_valid); |
| 786 EXPECT(memcmp(expected, dst, sizeof(expected))); | 787 EXPECT(memcmp(expected, dst, sizeof(expected))); |
| 787 } | 788 } |
| 788 | 789 |
| 789 // 4.3.2 U+0000 = e0 80 80 = "\xE0\x80\x80" | 790 // 4.3.2 U+0000 = e0 80 80 = "\xE0\x80\x80" |
| 790 { | 791 { |
| 791 const char* src = "\xE0\x80\x80"; | 792 const char* src = "\xE0\x80\x80"; |
| 792 uint32_t expected[] = { 0x0 }; | 793 uint32_t expected[] = { 0x0 }; |
| 793 uint32_t dst[ARRAY_SIZE(expected)]; | 794 uint32_t dst[ARRAY_SIZE(expected)]; |
| 794 memset(dst, 0xFF, sizeof(dst)); | 795 memset(dst, 0xFF, sizeof(dst)); |
| 795 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); | 796 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
| 796 EXPECT(!is_valid); | 797 EXPECT(!is_valid); |
| 797 EXPECT(memcmp(expected, dst, sizeof(expected))); | 798 EXPECT(memcmp(expected, dst, sizeof(expected))); |
| 798 } | 799 } |
| 799 | 800 |
| 800 // 4.3.3 U+0000 = f0 80 80 80 = "\xF0\x80\x80\x80" | 801 // 4.3.3 U+0000 = f0 80 80 80 = "\xF0\x80\x80\x80" |
| 801 { | 802 { |
| 802 const char* src = "\xF0\x80\x80\x80"; | 803 const char* src = "\xF0\x80\x80\x80"; |
| 803 uint32_t expected[] = { 0x0 }; | 804 uint32_t expected[] = { 0x0 }; |
| 804 uint32_t dst[ARRAY_SIZE(expected)]; | 805 uint32_t dst[ARRAY_SIZE(expected)]; |
| 805 memset(dst, 0xFF, sizeof(dst)); | 806 memset(dst, 0xFF, sizeof(dst)); |
| 806 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); | 807 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
| 807 EXPECT(!is_valid); | 808 EXPECT(!is_valid); |
| 808 EXPECT(memcmp(expected, dst, sizeof(expected))); | 809 EXPECT(memcmp(expected, dst, sizeof(expected))); |
| 809 } | 810 } |
| 810 | 811 |
| 811 // 4.3.4 U+0000 = f8 80 80 80 80 = "\xF8\x80\x80\x80\x80" | 812 // 4.3.4 U+0000 = f8 80 80 80 80 = "\xF8\x80\x80\x80\x80" |
| 812 { | 813 { |
| 813 const char* src = "\xF8\x80\x80\x80\x80"; | 814 const char* src = "\xF8\x80\x80\x80\x80"; |
| 814 uint32_t expected[] = { 0x0 }; | 815 uint32_t expected[] = { 0x0 }; |
| 815 uint32_t dst[ARRAY_SIZE(expected)]; | 816 uint32_t dst[ARRAY_SIZE(expected)]; |
| 816 memset(dst, 0xFF, sizeof(dst)); | 817 memset(dst, 0xFF, sizeof(dst)); |
| 817 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); | 818 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
| 818 EXPECT(!is_valid); | 819 EXPECT(!is_valid); |
| 819 EXPECT(memcmp(expected, dst, sizeof(expected))); | 820 EXPECT(memcmp(expected, dst, sizeof(expected))); |
| 820 } | 821 } |
| 821 | 822 |
| 822 // 4.3.5 U+0000 = fc 80 80 80 80 80 = "\xFC\x80\x80\x80\x80\x80" | 823 // 4.3.5 U+0000 = fc 80 80 80 80 80 = "\xFC\x80\x80\x80\x80\x80" |
| 823 { | 824 { |
| 824 const char* src = "\xFC\x80\x80\x80\x80\x80"; | 825 const char* src = "\xFC\x80\x80\x80\x80\x80"; |
| 825 uint32_t expected[] = { 0x0 }; | 826 uint32_t expected[] = { 0x0 }; |
| 826 uint32_t dst[ARRAY_SIZE(expected)]; | 827 uint32_t dst[ARRAY_SIZE(expected)]; |
| 827 memset(dst, 0xFF, sizeof(dst)); | 828 memset(dst, 0xFF, sizeof(dst)); |
| 828 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); | 829 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
| 829 EXPECT(!is_valid); | 830 EXPECT(!is_valid); |
| 830 EXPECT(memcmp(expected, dst, sizeof(expected))); | 831 EXPECT(memcmp(expected, dst, sizeof(expected))); |
| 831 } | 832 } |
| 832 | 833 |
| 833 // 5.1 - Single UTF-16 surrogates | 834 // 5.1 - Single UTF-16 surrogates |
| 834 | 835 |
| 835 // 5.1.1 - U+D800 = ed a0 80 = "\xED\xA0\x80" | 836 // 5.1.1 - U+D800 = ed a0 80 = "\xED\xA0\x80" |
| 836 { | 837 { |
| 837 const char* src = "\xED\xA0\x80"; | 838 const char* src = "\xED\xA0\x80"; |
| 838 uint32_t expected[] = { 0xD800 }; | 839 uint32_t expected[] = { 0xD800 }; |
| 839 uint32_t dst[ARRAY_SIZE(expected)]; | 840 uint32_t dst[ARRAY_SIZE(expected)]; |
| 840 memset(dst, 0, sizeof(dst)); | 841 memset(dst, 0, sizeof(dst)); |
| 841 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); | 842 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
| 842 EXPECT(!is_valid); | 843 EXPECT(!is_valid); |
| 843 EXPECT(memcmp(expected, dst, sizeof(expected))); | 844 EXPECT(memcmp(expected, dst, sizeof(expected))); |
| 844 } | 845 } |
| 845 | 846 |
| 846 // 5.1.2 - U+DB7F = ed ad bf = "\xED\xAD\xBF" | 847 // 5.1.2 - U+DB7F = ed ad bf = "\xED\xAD\xBF" |
| 847 { | 848 { |
| 848 const char* src = "\xED\xAD\xBF"; | 849 const char* src = "\xED\xAD\xBF"; |
| 849 uint32_t expected[] = { 0xDB7F }; | 850 uint32_t expected[] = { 0xDB7F }; |
| 850 uint32_t dst[ARRAY_SIZE(expected)]; | 851 uint32_t dst[ARRAY_SIZE(expected)]; |
| 851 memset(dst, 0, sizeof(dst)); | 852 memset(dst, 0, sizeof(dst)); |
| 852 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); | 853 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
| 853 EXPECT(!is_valid); | 854 EXPECT(!is_valid); |
| 854 EXPECT(memcmp(expected, dst, sizeof(expected))); | 855 EXPECT(memcmp(expected, dst, sizeof(expected))); |
| 855 } | 856 } |
| 856 | 857 |
| 857 // 5.1.3 - U+DB80 = ed ae 80 = "\xED\xAE\x80" | 858 // 5.1.3 - U+DB80 = ed ae 80 = "\xED\xAE\x80" |
| 858 { | 859 { |
| 859 const char* src = "\xED\xAE\x80"; | 860 const char* src = "\xED\xAE\x80"; |
| 860 uint32_t expected[] = { 0xDB80 }; | 861 uint32_t expected[] = { 0xDB80 }; |
| 861 uint32_t dst[ARRAY_SIZE(expected)]; | 862 uint32_t dst[ARRAY_SIZE(expected)]; |
| 862 memset(dst, 0, sizeof(dst)); | 863 memset(dst, 0, sizeof(dst)); |
| 863 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); | 864 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
| 864 EXPECT(!is_valid); | 865 EXPECT(!is_valid); |
| 865 EXPECT(memcmp(expected, dst, sizeof(expected))); | 866 EXPECT(memcmp(expected, dst, sizeof(expected))); |
| 866 } | 867 } |
| 867 | 868 |
| 868 // 5.1.4 - U+DBFF = ed af bf = "\xED\xAF\xBF" | 869 // 5.1.4 - U+DBFF = ed af bf = "\xED\xAF\xBF" |
| 869 { | 870 { |
| 870 const char* src = "\xED\xAF\xBF"; | 871 const char* src = "\xED\xAF\xBF"; |
| 871 uint32_t expected[] = { 0xDBFF }; | 872 uint32_t expected[] = { 0xDBFF }; |
| 872 uint32_t dst[ARRAY_SIZE(expected)]; | 873 uint32_t dst[ARRAY_SIZE(expected)]; |
| 873 memset(dst, 0, sizeof(dst)); | 874 memset(dst, 0, sizeof(dst)); |
| 874 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); | 875 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
| 875 EXPECT(!is_valid); | 876 EXPECT(!is_valid); |
| 876 EXPECT(memcmp(expected, dst, sizeof(expected))); | 877 EXPECT(memcmp(expected, dst, sizeof(expected))); |
| 877 } | 878 } |
| 878 | 879 |
| 879 // 5.1.5 - U+DC00 = ed b0 80 = "\xED\xB0\x80" | 880 // 5.1.5 - U+DC00 = ed b0 80 = "\xED\xB0\x80" |
| 880 { | 881 { |
| 881 const char* src = "\xED\xB0\x80"; | 882 const char* src = "\xED\xB0\x80"; |
| 882 uint32_t expected[] = { 0xDC00 }; | 883 uint32_t expected[] = { 0xDC00 }; |
| 883 uint32_t dst[ARRAY_SIZE(expected)]; | 884 uint32_t dst[ARRAY_SIZE(expected)]; |
| 884 memset(dst, 0, sizeof(dst)); | 885 memset(dst, 0, sizeof(dst)); |
| 885 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); | 886 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
| 886 EXPECT(!is_valid); | 887 EXPECT(!is_valid); |
| 887 EXPECT(memcmp(expected, dst, sizeof(expected))); | 888 EXPECT(memcmp(expected, dst, sizeof(expected))); |
| 888 } | 889 } |
| 889 | 890 |
| 890 // 5.1.6 - U+DF80 = ed be 80 = "\xED\xBE\x80" | 891 // 5.1.6 - U+DF80 = ed be 80 = "\xED\xBE\x80" |
| 891 { | 892 { |
| 892 const char* src = "\xED\xBE\x80"; | 893 const char* src = "\xED\xBE\x80"; |
| 893 uint32_t expected[] = { 0xDF80 }; | 894 uint32_t expected[] = { 0xDF80 }; |
| 894 uint32_t dst[ARRAY_SIZE(expected)]; | 895 uint32_t dst[ARRAY_SIZE(expected)]; |
| 895 memset(dst, 0, sizeof(dst)); | 896 memset(dst, 0, sizeof(dst)); |
| 896 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); | 897 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
| 897 EXPECT(!is_valid); | 898 EXPECT(!is_valid); |
| 898 EXPECT(memcmp(expected, dst, sizeof(expected))); | 899 EXPECT(memcmp(expected, dst, sizeof(expected))); |
| 899 } | 900 } |
| 900 | 901 |
| 901 // 5.1.7 - U+DFFF = ed bf bf = "\xED\xBF\xBF" | 902 // 5.1.7 - U+DFFF = ed bf bf = "\xED\xBF\xBF" |
| 902 { | 903 { |
| 903 const char* src = "\xED\xBF\xBF"; | 904 const char* src = "\xED\xBF\xBF"; |
| 904 uint32_t expected[] = { 0xDFFF }; | 905 uint32_t expected[] = { 0xDFFF }; |
| 905 uint32_t dst[ARRAY_SIZE(expected)]; | 906 uint32_t dst[ARRAY_SIZE(expected)]; |
| 906 memset(dst, 0, sizeof(dst)); | 907 memset(dst, 0, sizeof(dst)); |
| 907 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); | 908 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
| 908 EXPECT(!is_valid); | 909 EXPECT(!is_valid); |
| 909 EXPECT(memcmp(expected, dst, sizeof(expected))); | 910 EXPECT(memcmp(expected, dst, sizeof(expected))); |
| 910 } | 911 } |
| 911 | 912 |
| 912 // 5.2 Paired UTF-16 surrogates | 913 // 5.2 Paired UTF-16 surrogates |
| 913 | 914 |
| 914 // 5.2.1 - U+D800 U+DC00 = ed a0 80 ed b0 80 = "\xED\xA0\x80\xED\xB0\x80" | 915 // 5.2.1 - U+D800 U+DC00 = ed a0 80 ed b0 80 = "\xED\xA0\x80\xED\xB0\x80" |
| 915 { | 916 { |
| 916 const char* src = "\xED\xA0\x80\xED\xB0\x80"; | 917 const char* src = "\xED\xA0\x80\xED\xB0\x80"; |
| 917 uint32_t expected[] = { 0xD800, 0xDC00 }; | 918 uint32_t expected[] = { 0xD800, 0xDC00 }; |
| 918 uint32_t dst[ARRAY_SIZE(expected)]; | 919 uint32_t dst[ARRAY_SIZE(expected)]; |
| 919 memset(dst, 0, sizeof(dst)); | 920 memset(dst, 0, sizeof(dst)); |
| 920 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); | 921 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
| 921 EXPECT(!is_valid); | 922 EXPECT(!is_valid); |
| 922 EXPECT(memcmp(expected, dst, sizeof(expected))); | 923 EXPECT(memcmp(expected, dst, sizeof(expected))); |
| 923 } | 924 } |
| 924 | 925 |
| 925 // 5.2.2 - U+D800 U+DFFF = ed a0 80 ed bf bf = "\xED\xA0\x80\xED\xBF\xBF" | 926 // 5.2.2 - U+D800 U+DFFF = ed a0 80 ed bf bf = "\xED\xA0\x80\xED\xBF\xBF" |
| 926 { | 927 { |
| 927 const char* src = "\xED\xA0\x80\xED\xBF\xBF"; | 928 const char* src = "\xED\xA0\x80\xED\xBF\xBF"; |
| 928 uint32_t expected[] = { 0xD800, 0xDFFF }; | 929 uint32_t expected[] = { 0xD800, 0xDFFF }; |
| 929 uint32_t dst[ARRAY_SIZE(expected)]; | 930 uint32_t dst[ARRAY_SIZE(expected)]; |
| 930 memset(dst, 0, sizeof(dst)); | 931 memset(dst, 0, sizeof(dst)); |
| 931 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); | 932 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
| 932 EXPECT(!is_valid); | 933 EXPECT(!is_valid); |
| 933 EXPECT(memcmp(expected, dst, sizeof(expected))); | 934 EXPECT(memcmp(expected, dst, sizeof(expected))); |
| 934 } | 935 } |
| 935 | 936 |
| 936 // 5.2.3 - U+DB7F U+DC00 = ed a0 80 ed bf bf = "\xED\xAD\xBF\xED\xB0\x80" | 937 // 5.2.3 - U+DB7F U+DC00 = ed a0 80 ed bf bf = "\xED\xAD\xBF\xED\xB0\x80" |
| 937 { | 938 { |
| 938 const char* src = "\xED\xAD\xBF\xED\xB0\x80"; | 939 const char* src = "\xED\xAD\xBF\xED\xB0\x80"; |
| 939 uint32_t expected[] = { 0xDB7F, 0xDC00 }; | 940 uint32_t expected[] = { 0xDB7F, 0xDC00 }; |
| 940 uint32_t dst[ARRAY_SIZE(expected)]; | 941 uint32_t dst[ARRAY_SIZE(expected)]; |
| 941 memset(dst, 0, sizeof(dst)); | 942 memset(dst, 0, sizeof(dst)); |
| 942 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); | 943 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
| 943 EXPECT(!is_valid); | 944 EXPECT(!is_valid); |
| 944 EXPECT(memcmp(expected, dst, sizeof(expected))); | 945 EXPECT(memcmp(expected, dst, sizeof(expected))); |
| 945 } | 946 } |
| 946 | 947 |
| 947 // 5.2.4 - U+DB7F U+DFFF = ed ad bf ed bf bf = "\xED\xAD\xBF\xED\xBF\xBF" | 948 // 5.2.4 - U+DB7F U+DFFF = ed ad bf ed bf bf = "\xED\xAD\xBF\xED\xBF\xBF" |
| 948 { | 949 { |
| 949 const char* src = "\xED\xAD\xBF\xED\xBF\xBF"; | 950 const char* src = "\xED\xAD\xBF\xED\xBF\xBF"; |
| 950 uint32_t expected[] = { 0xDB7F, 0xDFFF }; | 951 uint32_t expected[] = { 0xDB7F, 0xDFFF }; |
| 951 uint32_t dst[ARRAY_SIZE(expected)]; | 952 uint32_t dst[ARRAY_SIZE(expected)]; |
| 952 memset(dst, 0, sizeof(dst)); | 953 memset(dst, 0, sizeof(dst)); |
| 953 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); | 954 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
| 954 EXPECT(!is_valid); | 955 EXPECT(!is_valid); |
| 955 EXPECT(memcmp(expected, dst, sizeof(expected))); | 956 EXPECT(memcmp(expected, dst, sizeof(expected))); |
| 956 } | 957 } |
| 957 | 958 |
| 958 // 5.2.5 - U+DB80 U+DC00 = ed ae 80 ed b0 80 = "\xED\xAE\x80\xED\xB0\x80" | 959 // 5.2.5 - U+DB80 U+DC00 = ed ae 80 ed b0 80 = "\xED\xAE\x80\xED\xB0\x80" |
| 959 { | 960 { |
| 960 const char* src = "\xED\xAE\x80\xED\xB0\x80"; | 961 const char* src = "\xED\xAE\x80\xED\xB0\x80"; |
| 961 uint32_t expected[] = { 0xDB80, 0xDC00 }; | 962 uint32_t expected[] = { 0xDB80, 0xDC00 }; |
| 962 uint32_t dst[ARRAY_SIZE(expected)]; | 963 uint32_t dst[ARRAY_SIZE(expected)]; |
| 963 memset(dst, 0, sizeof(dst)); | 964 memset(dst, 0, sizeof(dst)); |
| 964 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); | 965 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
| 965 EXPECT(!is_valid); | 966 EXPECT(!is_valid); |
| 966 EXPECT(memcmp(expected, dst, sizeof(expected))); | 967 EXPECT(memcmp(expected, dst, sizeof(expected))); |
| 967 } | 968 } |
| 968 | 969 |
| 969 // 5.2.6 - U+DB80 U+DFFF = ed ae 80 ed bf bf = "\xED\xAE\x80\xED\xBF\xBF" | 970 // 5.2.6 - U+DB80 U+DFFF = ed ae 80 ed bf bf = "\xED\xAE\x80\xED\xBF\xBF" |
| 970 { | 971 { |
| 971 const char* src = "\xED\xAE\x80\xED\xBF\xBF"; | 972 const char* src = "\xED\xAE\x80\xED\xBF\xBF"; |
| 972 uint32_t expected[] = { 0xDB80, 0xDFFF }; | 973 uint32_t expected[] = { 0xDB80, 0xDFFF }; |
| 973 uint32_t dst[ARRAY_SIZE(expected)]; | 974 uint32_t dst[ARRAY_SIZE(expected)]; |
| 974 memset(dst, 0, sizeof(dst)); | 975 memset(dst, 0, sizeof(dst)); |
| 975 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); | 976 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
| 976 EXPECT(!is_valid); | 977 EXPECT(!is_valid); |
| 977 EXPECT(memcmp(expected, dst, sizeof(expected))); | 978 EXPECT(memcmp(expected, dst, sizeof(expected))); |
| 978 } | 979 } |
| 979 | 980 |
| 980 // 5.2.7 - U+DBFF U+DC00 = ed af bf ed b0 80 = "\xED\xAF\xBF\xED\xB0\x80" | 981 // 5.2.7 - U+DBFF U+DC00 = ed af bf ed b0 80 = "\xED\xAF\xBF\xED\xB0\x80" |
| 981 { | 982 { |
| 982 const char* src = "\xED\xAF\xBF\xED\xB0\x80"; | 983 const char* src = "\xED\xAF\xBF\xED\xB0\x80"; |
| 983 uint32_t expected[] = { 0xDBFF, 0xDC00 }; | 984 uint32_t expected[] = { 0xDBFF, 0xDC00 }; |
| 984 uint32_t dst[ARRAY_SIZE(expected)]; | 985 uint32_t dst[ARRAY_SIZE(expected)]; |
| 985 memset(dst, 0, sizeof(dst)); | 986 memset(dst, 0, sizeof(dst)); |
| 986 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); | 987 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
| 987 EXPECT(!is_valid); | 988 EXPECT(!is_valid); |
| 988 EXPECT(memcmp(expected, dst, sizeof(expected))); | 989 EXPECT(memcmp(expected, dst, sizeof(expected))); |
| 989 } | 990 } |
| 990 | 991 |
| 991 // 5.2.8 - U+DBFF U+DFFF = ed af bf ed bf bf = "\xED\xAF\xBF\xED\xBF\xBF" | 992 // 5.2.8 - U+DBFF U+DFFF = ed af bf ed bf bf = "\xED\xAF\xBF\xED\xBF\xBF" |
| 992 { | 993 { |
| 993 const char* src = "\xED\xAF\xBF\xED\xBF\xBF"; | 994 const char* src = "\xED\xAF\xBF\xED\xBF\xBF"; |
| 994 uint32_t expected[] = { 0xDBFF, 0xDFFF }; | 995 uint32_t expected[] = { 0xDBFF, 0xDFFF }; |
| 995 uint32_t dst[ARRAY_SIZE(expected)]; | 996 uint32_t dst[ARRAY_SIZE(expected)]; |
| 996 memset(dst, 0, sizeof(dst)); | 997 memset(dst, 0, sizeof(dst)); |
| 997 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); | 998 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
| 998 EXPECT(!is_valid); | 999 EXPECT(!is_valid); |
| 999 EXPECT(memcmp(expected, dst, sizeof(expected))); | 1000 EXPECT(memcmp(expected, dst, sizeof(expected))); |
| 1000 } | 1001 } |
| 1001 | 1002 |
| 1002 // 5.3 - Other illegal code positions | 1003 // 5.3 - Other illegal code positions |
| 1003 | 1004 |
| 1004 // 5.3.1 - U+FFFE = ef bf be = "\xEF\xBF\xBE" | 1005 // 5.3.1 - U+FFFE = ef bf be = "\xEF\xBF\xBE" |
| 1005 { | 1006 { |
| 1006 const char* src = "\xEF\xBF\xBE"; | 1007 const char* src = "\xEF\xBF\xBE"; |
| 1007 uint32_t expected[] = { 0xFFFE }; | 1008 uint32_t expected[] = { 0xFFFE }; |
| 1008 uint32_t dst[ARRAY_SIZE(expected)]; | 1009 uint32_t dst[ARRAY_SIZE(expected)]; |
| 1009 memset(dst, 0, sizeof(dst)); | 1010 memset(dst, 0, sizeof(dst)); |
| 1010 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); | 1011 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
| 1011 EXPECT(is_valid); | 1012 EXPECT(is_valid); |
| 1012 EXPECT(!memcmp(expected, dst, sizeof(expected))); | 1013 EXPECT(!memcmp(expected, dst, sizeof(expected))); |
| 1013 } | 1014 } |
| 1014 | 1015 |
| 1015 // 5.3.2 - U+FFFF = ef bf bf = "\xEF\xBF\xBF" | 1016 // 5.3.2 - U+FFFF = ef bf bf = "\xEF\xBF\xBF" |
| 1016 { | 1017 { |
| 1017 const char* src = "\xEF\xBF\xBF"; | 1018 const char* src = "\xEF\xBF\xBF"; |
| 1018 uint32_t expected[] = { 0xFFFF }; | 1019 uint32_t expected[] = { 0xFFFF }; |
| 1019 uint32_t dst[ARRAY_SIZE(expected)]; | 1020 uint32_t dst[ARRAY_SIZE(expected)]; |
| 1020 memset(dst, 0, sizeof(dst)); | 1021 memset(dst, 0, sizeof(dst)); |
| 1021 bool is_valid = Utf8::Decode(src, dst, ARRAY_SIZE(dst)); | 1022 bool is_valid = Utf8::DecodeCStringToUTF32(src, dst, ARRAY_SIZE(dst)); |
| 1022 EXPECT(is_valid); | 1023 EXPECT(is_valid); |
| 1023 EXPECT(!memcmp(expected, dst, sizeof(expected))); | 1024 EXPECT(!memcmp(expected, dst, sizeof(expected))); |
| 1024 } | 1025 } |
| 1025 } | 1026 } |
| 1026 | 1027 |
| 1027 } // namespace dart | 1028 } // namespace dart |
| OLD | NEW |