OLD | NEW |
(Empty) | |
| 1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file |
| 2 // for details. All rights reserved. Use of this source code is governed by a |
| 3 // BSD-style license that can be found in the LICENSE file. |
| 4 |
| 5 import 'package:expect/expect.dart'; |
| 6 import 'dart:convert'; |
| 7 |
| 8 const String testEnglishPhrase = |
| 9 "The quick brown fox jumps over the lazy dog."; |
| 10 |
| 11 const List<int> testEnglishUtf8 = const<int> [ |
| 12 0x54, 0x68, 0x65, 0x20, 0x71, 0x75, 0x69, 0x63, |
| 13 0x6b, 0x20, 0x62, 0x72, 0x6f, 0x77, 0x6e, 0x20, |
| 14 0x66, 0x6f, 0x78, 0x20, 0x6a, 0x75, 0x6d, 0x70, |
| 15 0x73, 0x20, 0x6f, 0x76, 0x65, 0x72, 0x20, 0x74, |
| 16 0x68, 0x65, 0x20, 0x6c, 0x61, 0x7a, 0x79, 0x20, |
| 17 0x64, 0x6f, 0x67, 0x2e]; |
| 18 |
| 19 const String testDanishPhrase = "Quizdeltagerne spiste jordbær med " |
| 20 "fløde mens cirkusklovnen Wolther spillede på xylofon."; |
| 21 |
| 22 const List<int> testDanishUtf8 = const<int>[ |
| 23 0x51, 0x75, 0x69, 0x7a, 0x64, 0x65, 0x6c, 0x74, |
| 24 0x61, 0x67, 0x65, 0x72, 0x6e, 0x65, 0x20, 0x73, |
| 25 0x70, 0x69, 0x73, 0x74, 0x65, 0x20, 0x6a, 0x6f, |
| 26 0x72, 0x64, 0x62, 0xc3, 0xa6, 0x72, 0x20, 0x6d, |
| 27 0x65, 0x64, 0x20, 0x66, 0x6c, 0xc3, 0xb8, 0x64, |
| 28 0x65, 0x20, 0x6d, 0x65, 0x6e, 0x73, 0x20, 0x63, |
| 29 0x69, 0x72, 0x6b, 0x75, 0x73, 0x6b, 0x6c, 0x6f, |
| 30 0x76, 0x6e, 0x65, 0x6e, 0x20, 0x57, 0x6f, 0x6c, |
| 31 0x74, 0x68, 0x65, 0x72, 0x20, 0x73, 0x70, 0x69, |
| 32 0x6c, 0x6c, 0x65, 0x64, 0x65, 0x20, 0x70, 0xc3, |
| 33 0xa5, 0x20, 0x78, 0x79, 0x6c, 0x6f, 0x66, 0x6f, |
| 34 0x6e, 0x2e]; |
| 35 |
| 36 // unusual formatting due to strange editor interaction w/ text direction. |
| 37 const String |
| 38 testHebrewPhrase = "דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה"; |
| 39 |
| 40 const List<int> testHebrewUtf8 = const<int>[ |
| 41 0xd7, 0x93, 0xd7, 0x92, 0x20, 0xd7, 0xa1, 0xd7, |
| 42 0xa7, 0xd7, 0xa8, 0xd7, 0x9f, 0x20, 0xd7, 0xa9, |
| 43 0xd7, 0x98, 0x20, 0xd7, 0x91, 0xd7, 0x99, 0xd7, |
| 44 0x9d, 0x20, 0xd7, 0x9e, 0xd7, 0x90, 0xd7, 0x95, |
| 45 0xd7, 0x9b, 0xd7, 0x96, 0xd7, 0x91, 0x20, 0xd7, |
| 46 0x95, 0xd7, 0x9c, 0xd7, 0xa4, 0xd7, 0xaa, 0xd7, |
| 47 0xa2, 0x20, 0xd7, 0x9e, 0xd7, 0xa6, 0xd7, 0x90, |
| 48 0x20, 0xd7, 0x9c, 0xd7, 0x95, 0x20, 0xd7, 0x97, |
| 49 0xd7, 0x91, 0xd7, 0xa8, 0xd7, 0x94, 0x20, 0xd7, |
| 50 0x90, 0xd7, 0x99, 0xd7, 0x9a, 0x20, 0xd7, 0x94, |
| 51 0xd7, 0xa7, 0xd7, 0x9c, 0xd7, 0x99, 0xd7, 0x98, |
| 52 0xd7, 0x94]; |
| 53 |
| 54 const String testRussianPhrase = "Съешь же ещё этих мягких " |
| 55 "французских булок да выпей чаю"; |
| 56 |
| 57 const List<int> testRussianUtf8 = const<int>[ |
| 58 0xd0, 0xa1, 0xd1, 0x8a, 0xd0, 0xb5, 0xd1, 0x88, |
| 59 0xd1, 0x8c, 0x20, 0xd0, 0xb6, 0xd0, 0xb5, 0x20, |
| 60 0xd0, 0xb5, 0xd1, 0x89, 0xd1, 0x91, 0x20, 0xd1, |
| 61 0x8d, 0xd1, 0x82, 0xd0, 0xb8, 0xd1, 0x85, 0x20, |
| 62 0xd0, 0xbc, 0xd1, 0x8f, 0xd0, 0xb3, 0xd0, 0xba, |
| 63 0xd0, 0xb8, 0xd1, 0x85, 0x20, 0xd1, 0x84, 0xd1, |
| 64 0x80, 0xd0, 0xb0, 0xd0, 0xbd, 0xd1, 0x86, 0xd1, |
| 65 0x83, 0xd0, 0xb7, 0xd1, 0x81, 0xd0, 0xba, 0xd0, |
| 66 0xb8, 0xd1, 0x85, 0x20, 0xd0, 0xb1, 0xd1, 0x83, |
| 67 0xd0, 0xbb, 0xd0, 0xbe, 0xd0, 0xba, 0x20, 0xd0, |
| 68 0xb4, 0xd0, 0xb0, 0x20, 0xd0, 0xb2, 0xd1, 0x8b, |
| 69 0xd0, 0xbf, 0xd0, 0xb5, 0xd0, 0xb9, 0x20, 0xd1, |
| 70 0x87, 0xd0, 0xb0, 0xd1, 0x8e]; |
| 71 |
| 72 const String testGreekPhrase = "Γαζέες καὶ μυρτιὲς δὲν θὰ βρῶ πιὰ " |
| 73 "στὸ χρυσαφὶ ξέφωτο"; |
| 74 |
| 75 const List<int> testGreekUtf8 = const<int>[ |
| 76 0xce, 0x93, 0xce, 0xb1, 0xce, 0xb6, 0xce, 0xad, |
| 77 0xce, 0xb5, 0xcf, 0x82, 0x20, 0xce, 0xba, 0xce, |
| 78 0xb1, 0xe1, 0xbd, 0xb6, 0x20, 0xce, 0xbc, 0xcf, |
| 79 0x85, 0xcf, 0x81, 0xcf, 0x84, 0xce, 0xb9, 0xe1, |
| 80 0xbd, 0xb2, 0xcf, 0x82, 0x20, 0xce, 0xb4, 0xe1, |
| 81 0xbd, 0xb2, 0xce, 0xbd, 0x20, 0xce, 0xb8, 0xe1, |
| 82 0xbd, 0xb0, 0x20, 0xce, 0xb2, 0xcf, 0x81, 0xe1, |
| 83 0xbf, 0xb6, 0x20, 0xcf, 0x80, 0xce, 0xb9, 0xe1, |
| 84 0xbd, 0xb0, 0x20, 0xcf, 0x83, 0xcf, 0x84, 0xe1, |
| 85 0xbd, 0xb8, 0x20, 0xcf, 0x87, 0xcf, 0x81, 0xcf, |
| 86 0x85, 0xcf, 0x83, 0xce, 0xb1, 0xcf, 0x86, 0xe1, |
| 87 0xbd, 0xb6, 0x20, 0xce, 0xbe, 0xce, 0xad, 0xcf, |
| 88 0x86, 0xcf, 0x89, 0xcf, 0x84, 0xce, 0xbf]; |
| 89 |
| 90 const String testKatakanaPhrase = "イロハニホヘト チリヌルヲ ワカヨタレソ " |
| 91 "ツネナラム ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン"; |
| 92 |
| 93 const List<int> testKatakanaUtf8 = const<int>[ |
| 94 0xe3, 0x82, 0xa4, 0xe3, 0x83, 0xad, 0xe3, 0x83, |
| 95 0x8f, 0xe3, 0x83, 0x8b, 0xe3, 0x83, 0x9b, 0xe3, |
| 96 0x83, 0x98, 0xe3, 0x83, 0x88, 0x20, 0xe3, 0x83, |
| 97 0x81, 0xe3, 0x83, 0xaa, 0xe3, 0x83, 0x8c, 0xe3, |
| 98 0x83, 0xab, 0xe3, 0x83, 0xb2, 0x20, 0xe3, 0x83, |
| 99 0xaf, 0xe3, 0x82, 0xab, 0xe3, 0x83, 0xa8, 0xe3, |
| 100 0x82, 0xbf, 0xe3, 0x83, 0xac, 0xe3, 0x82, 0xbd, |
| 101 0x20, 0xe3, 0x83, 0x84, 0xe3, 0x83, 0x8d, 0xe3, |
| 102 0x83, 0x8a, 0xe3, 0x83, 0xa9, 0xe3, 0x83, 0xa0, |
| 103 0x20, 0xe3, 0x82, 0xa6, 0xe3, 0x83, 0xb0, 0xe3, |
| 104 0x83, 0x8e, 0xe3, 0x82, 0xaa, 0xe3, 0x82, 0xaf, |
| 105 0xe3, 0x83, 0xa4, 0xe3, 0x83, 0x9e, 0x20, 0xe3, |
| 106 0x82, 0xb1, 0xe3, 0x83, 0x95, 0xe3, 0x82, 0xb3, |
| 107 0xe3, 0x82, 0xa8, 0xe3, 0x83, 0x86, 0x20, 0xe3, |
| 108 0x82, 0xa2, 0xe3, 0x82, 0xb5, 0xe3, 0x82, 0xad, |
| 109 0xe3, 0x83, 0xa6, 0xe3, 0x83, 0xa1, 0xe3, 0x83, |
| 110 0x9f, 0xe3, 0x82, 0xb7, 0x20, 0xe3, 0x83, 0xb1, |
| 111 0xe3, 0x83, 0x92, 0xe3, 0x83, 0xa2, 0xe3, 0x82, |
| 112 0xbb, 0xe3, 0x82, 0xb9, 0xe3, 0x83, 0xb3]; |
| 113 |
| 114 void main() { |
| 115 testUtf8bytesToCodepoints(); |
| 116 testUtf8BytesToString(); |
| 117 testEncodeToUtf8(); |
| 118 } |
| 119 |
| 120 List<int> encodeUtf8(String str) => UTF8.encode(str); |
| 121 List<int> utf8ToRunes(List<int> codeUnits) { |
| 122 return UTF8.decode(codeUnits, allowMalformed: true).runes.toList(); |
| 123 } |
| 124 String decodeUtf8(List<int> codeUnits) => UTF8.decode(codeUnits); |
| 125 |
| 126 void testEncodeToUtf8() { |
| 127 Expect.listEquals(testEnglishUtf8, encodeUtf8(testEnglishPhrase), |
| 128 "english to utf8"); |
| 129 |
| 130 Expect.listEquals(testDanishUtf8, encodeUtf8(testDanishPhrase), |
| 131 "encode danish to utf8"); |
| 132 |
| 133 Expect.listEquals(testHebrewUtf8, encodeUtf8(testHebrewPhrase), |
| 134 "Hebrew to utf8"); |
| 135 |
| 136 Expect.listEquals(testRussianUtf8, encodeUtf8(testRussianPhrase), |
| 137 "Russian to utf8"); |
| 138 |
| 139 Expect.listEquals(testGreekUtf8, encodeUtf8(testGreekPhrase), |
| 140 "Greek to utf8"); |
| 141 |
| 142 Expect.listEquals(testKatakanaUtf8, encodeUtf8(testKatakanaPhrase), |
| 143 "Katakana to utf8"); |
| 144 } |
| 145 |
| 146 void testUtf8bytesToCodepoints() { |
| 147 Expect.listEquals([954, 972, 963, 956, 949], |
| 148 utf8ToRunes([0xce, 0xba, 0xcf, 0x8c, 0xcf, |
| 149 0x83, 0xce, 0xbc, 0xce, 0xb5]), "κόσμε"); |
| 150 |
| 151 // boundary conditions: First possible sequence of a certain length |
| 152 Expect.listEquals([], utf8ToRunes([]), "no input"); |
| 153 Expect.listEquals([0x0], utf8ToRunes([0x0]), "0"); |
| 154 Expect.listEquals([0x80], utf8ToRunes([0xc2, 0x80]), "80"); |
| 155 Expect.listEquals([0x800], |
| 156 utf8ToRunes([0xe0, 0xa0, 0x80]), "800"); |
| 157 Expect.listEquals([0x10000], |
| 158 utf8ToRunes([0xf0, 0x90, 0x80, 0x80]), "10000"); |
| 159 Expect.listEquals( |
| 160 [UNICODE_REPLACEMENT_CHARACTER_RUNE, UNICODE_REPLACEMENT_CHARACTER_RUNE, |
| 161 UNICODE_REPLACEMENT_CHARACTER_RUNE, UNICODE_REPLACEMENT_CHARACTER_RUNE, |
| 162 UNICODE_REPLACEMENT_CHARACTER_RUNE], |
| 163 utf8ToRunes([0xf8, 0x88, 0x80, 0x80, 0x80]), "200000"); |
| 164 Expect.listEquals( |
| 165 [UNICODE_REPLACEMENT_CHARACTER_RUNE, UNICODE_REPLACEMENT_CHARACTER_RUNE, |
| 166 UNICODE_REPLACEMENT_CHARACTER_RUNE, UNICODE_REPLACEMENT_CHARACTER_RUNE, |
| 167 UNICODE_REPLACEMENT_CHARACTER_RUNE, UNICODE_REPLACEMENT_CHARACTER_RUNE], |
| 168 utf8ToRunes([0xfc, 0x84, 0x80, 0x80, 0x80, 0x80]), |
| 169 "4000000"); |
| 170 |
| 171 // boundary conditions: Last possible sequence of a certain length |
| 172 Expect.listEquals([0x7f], utf8ToRunes([0x7f]), "7f"); |
| 173 Expect.listEquals([0x7ff], utf8ToRunes([0xdf, 0xbf]), "7ff"); |
| 174 Expect.listEquals([0xffff], |
| 175 utf8ToRunes([0xef, 0xbf, 0xbf]), "ffff"); |
| 176 Expect.listEquals( |
| 177 [UNICODE_REPLACEMENT_CHARACTER_RUNE, UNICODE_REPLACEMENT_CHARACTER_RUNE, |
| 178 UNICODE_REPLACEMENT_CHARACTER_RUNE, UNICODE_REPLACEMENT_CHARACTER_RUNE], |
| 179 utf8ToRunes([0xf7, 0xbf, 0xbf, 0xbf]), "1fffff"); |
| 180 Expect.listEquals( |
| 181 [UNICODE_REPLACEMENT_CHARACTER_RUNE, UNICODE_REPLACEMENT_CHARACTER_RUNE, |
| 182 UNICODE_REPLACEMENT_CHARACTER_RUNE, UNICODE_REPLACEMENT_CHARACTER_RUNE, |
| 183 UNICODE_REPLACEMENT_CHARACTER_RUNE], |
| 184 utf8ToRunes([0xfb, 0xbf, 0xbf, 0xbf, 0xbf]), "3ffffff"); |
| 185 Expect.listEquals( |
| 186 [UNICODE_REPLACEMENT_CHARACTER_RUNE, UNICODE_REPLACEMENT_CHARACTER_RUNE, |
| 187 UNICODE_REPLACEMENT_CHARACTER_RUNE, UNICODE_REPLACEMENT_CHARACTER_RUNE, |
| 188 UNICODE_REPLACEMENT_CHARACTER_RUNE, UNICODE_REPLACEMENT_CHARACTER_RUNE], |
| 189 utf8ToRunes([0xfd, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf]), |
| 190 "4000000"); |
| 191 |
| 192 // other boundary conditions |
| 193 Expect.listEquals([0xd7ff], |
| 194 utf8ToRunes([0xed, 0x9f, 0xbf]), "d7ff"); |
| 195 Expect.listEquals([0xe000], |
| 196 utf8ToRunes([0xee, 0x80, 0x80]), "e000"); |
| 197 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_RUNE], |
| 198 utf8ToRunes([0xef, 0xbf, 0xbd]), "fffd"); |
| 199 Expect.listEquals([0x10ffff], |
| 200 utf8ToRunes([0xf4, 0x8f, 0xbf, 0xbf]), "10ffff"); |
| 201 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_RUNE], |
| 202 utf8ToRunes([0xf4, 0x90, 0x80, 0x80]), "110000"); |
| 203 |
| 204 // unexpected continuation bytes |
| 205 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_RUNE], |
| 206 utf8ToRunes([0x80]), "80 => replacement character"); |
| 207 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_RUNE], |
| 208 utf8ToRunes([0xbf]), "bf => replacement character"); |
| 209 |
| 210 List<int> allContinuationBytes = <int>[]; |
| 211 List<int> matchingReplacementChars = <int>[]; |
| 212 for (int i = 0x80; i < 0xc0; i++) { |
| 213 allContinuationBytes.add(i); |
| 214 matchingReplacementChars.add(UNICODE_REPLACEMENT_CHARACTER_RUNE); |
| 215 } |
| 216 Expect.listEquals(matchingReplacementChars, |
| 217 utf8ToRunes(allContinuationBytes), |
| 218 "80 - bf => replacement character x 64"); |
| 219 |
| 220 List<int> allFirstTwoByteSeq = <int>[]; |
| 221 matchingReplacementChars = <int>[]; |
| 222 for (int i = 0xc0; i < 0xe0; i++) { |
| 223 allFirstTwoByteSeq.addAll([i, 0x20]); |
| 224 matchingReplacementChars.addAll( |
| 225 [UNICODE_REPLACEMENT_CHARACTER_RUNE, 0x20]); |
| 226 } |
| 227 Expect.listEquals(matchingReplacementChars, |
| 228 utf8ToRunes(allFirstTwoByteSeq), |
| 229 "c0 - df + space => replacement character + space x 32"); |
| 230 |
| 231 List<int> allFirstThreeByteSeq = <int>[]; |
| 232 matchingReplacementChars = <int>[]; |
| 233 for (int i = 0xe0; i < 0xf0; i++) { |
| 234 allFirstThreeByteSeq.addAll([i, 0x20]); |
| 235 matchingReplacementChars.addAll( |
| 236 [UNICODE_REPLACEMENT_CHARACTER_RUNE, 0x20]); |
| 237 } |
| 238 Expect.listEquals(matchingReplacementChars, |
| 239 utf8ToRunes(allFirstThreeByteSeq), |
| 240 "e0 - ef + space => replacement character x 16"); |
| 241 |
| 242 List<int> allFirstFourByteSeq = <int>[]; |
| 243 matchingReplacementChars = <int>[]; |
| 244 for (int i = 0xf0; i < 0xf8; i++) { |
| 245 allFirstFourByteSeq.addAll([i, 0x20]); |
| 246 matchingReplacementChars.addAll( |
| 247 [UNICODE_REPLACEMENT_CHARACTER_RUNE, 0x20]); |
| 248 } |
| 249 Expect.listEquals(matchingReplacementChars, |
| 250 utf8ToRunes(allFirstFourByteSeq), |
| 251 "f0 - f7 + space => replacement character x 8"); |
| 252 |
| 253 List<int> allFirstFiveByteSeq = <int>[]; |
| 254 matchingReplacementChars = <int>[]; |
| 255 for (int i = 0xf8; i < 0xfc; i++) { |
| 256 allFirstFiveByteSeq.addAll([i, 0x20]); |
| 257 matchingReplacementChars.addAll( |
| 258 [UNICODE_REPLACEMENT_CHARACTER_RUNE, 0x20]); |
| 259 } |
| 260 Expect.listEquals(matchingReplacementChars, |
| 261 utf8ToRunes(allFirstFiveByteSeq), |
| 262 "f8 - fb + space => replacement character x 4"); |
| 263 |
| 264 List<int> allFirstSixByteSeq = <int>[]; |
| 265 matchingReplacementChars = <int>[]; |
| 266 for (int i = 0xfc; i < 0xfe; i++) { |
| 267 allFirstSixByteSeq.addAll([i, 0x20]); |
| 268 matchingReplacementChars.addAll( |
| 269 [UNICODE_REPLACEMENT_CHARACTER_RUNE, 0x20]); |
| 270 } |
| 271 Expect.listEquals(matchingReplacementChars, |
| 272 utf8ToRunes(allFirstSixByteSeq), |
| 273 "fc - fd + space => replacement character x 2"); |
| 274 |
| 275 // Sequences with last continuation byte missing |
| 276 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_RUNE], |
| 277 utf8ToRunes([0xc2]), |
| 278 "2-byte sequence with last byte missing"); |
| 279 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_RUNE], |
| 280 utf8ToRunes([0xe0, 0x80]), |
| 281 "3-byte sequence with last byte missing"); |
| 282 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_RUNE], |
| 283 utf8ToRunes([0xf0, 0x80, 0x80]), |
| 284 "4-byte sequence with last byte missing"); |
| 285 Expect.listEquals( |
| 286 [UNICODE_REPLACEMENT_CHARACTER_RUNE, UNICODE_REPLACEMENT_CHARACTER_RUNE, |
| 287 UNICODE_REPLACEMENT_CHARACTER_RUNE, UNICODE_REPLACEMENT_CHARACTER_RUNE], |
| 288 utf8ToRunes([0xf8, 0x88, 0x80, 0x80]), |
| 289 "5-byte sequence with last byte missing"); |
| 290 Expect.listEquals( |
| 291 [UNICODE_REPLACEMENT_CHARACTER_RUNE, UNICODE_REPLACEMENT_CHARACTER_RUNE, |
| 292 UNICODE_REPLACEMENT_CHARACTER_RUNE, UNICODE_REPLACEMENT_CHARACTER_RUNE, |
| 293 UNICODE_REPLACEMENT_CHARACTER_RUNE], |
| 294 utf8ToRunes([0xfc, 0x80, 0x80, 0x80, 0x80]), |
| 295 "6-byte sequence with last byte missing"); |
| 296 |
| 297 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_RUNE], |
| 298 utf8ToRunes([0xdf]), |
| 299 "2-byte sequence with last byte missing (hi)"); |
| 300 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_RUNE], |
| 301 utf8ToRunes([0xef, 0xbf]), |
| 302 "3-byte sequence with last byte missing (hi)"); |
| 303 Expect.listEquals( |
| 304 [UNICODE_REPLACEMENT_CHARACTER_RUNE, UNICODE_REPLACEMENT_CHARACTER_RUNE, |
| 305 UNICODE_REPLACEMENT_CHARACTER_RUNE], |
| 306 utf8ToRunes([0xf7, 0xbf, 0xbf]), |
| 307 "4-byte sequence with last byte missing (hi)"); |
| 308 Expect.listEquals( |
| 309 [UNICODE_REPLACEMENT_CHARACTER_RUNE, UNICODE_REPLACEMENT_CHARACTER_RUNE, |
| 310 UNICODE_REPLACEMENT_CHARACTER_RUNE, UNICODE_REPLACEMENT_CHARACTER_RUNE], |
| 311 utf8ToRunes([0xfb, 0xbf, 0xbf, 0xbf]), |
| 312 "5-byte sequence with last byte missing (hi)"); |
| 313 Expect.listEquals( |
| 314 [UNICODE_REPLACEMENT_CHARACTER_RUNE, UNICODE_REPLACEMENT_CHARACTER_RUNE, |
| 315 UNICODE_REPLACEMENT_CHARACTER_RUNE, UNICODE_REPLACEMENT_CHARACTER_RUNE, |
| 316 UNICODE_REPLACEMENT_CHARACTER_RUNE], |
| 317 utf8ToRunes([0xfd, 0xbf, 0xbf, 0xbf, 0xbf]), |
| 318 "6-byte sequence with last byte missing (hi)"); |
| 319 |
| 320 // Concatenation of incomplete sequences |
| 321 Expect.listEquals( |
| 322 [UNICODE_REPLACEMENT_CHARACTER_RUNE, UNICODE_REPLACEMENT_CHARACTER_RUNE, |
| 323 UNICODE_REPLACEMENT_CHARACTER_RUNE, UNICODE_REPLACEMENT_CHARACTER_RUNE, |
| 324 UNICODE_REPLACEMENT_CHARACTER_RUNE, UNICODE_REPLACEMENT_CHARACTER_RUNE, |
| 325 UNICODE_REPLACEMENT_CHARACTER_RUNE, UNICODE_REPLACEMENT_CHARACTER_RUNE, |
| 326 UNICODE_REPLACEMENT_CHARACTER_RUNE, UNICODE_REPLACEMENT_CHARACTER_RUNE, |
| 327 UNICODE_REPLACEMENT_CHARACTER_RUNE, UNICODE_REPLACEMENT_CHARACTER_RUNE, |
| 328 UNICODE_REPLACEMENT_CHARACTER_RUNE, UNICODE_REPLACEMENT_CHARACTER_RUNE, |
| 329 UNICODE_REPLACEMENT_CHARACTER_RUNE, UNICODE_REPLACEMENT_CHARACTER_RUNE, |
| 330 UNICODE_REPLACEMENT_CHARACTER_RUNE, UNICODE_REPLACEMENT_CHARACTER_RUNE, |
| 331 UNICODE_REPLACEMENT_CHARACTER_RUNE, UNICODE_REPLACEMENT_CHARACTER_RUNE, |
| 332 UNICODE_REPLACEMENT_CHARACTER_RUNE, UNICODE_REPLACEMENT_CHARACTER_RUNE, |
| 333 UNICODE_REPLACEMENT_CHARACTER_RUNE, UNICODE_REPLACEMENT_CHARACTER_RUNE, |
| 334 UNICODE_REPLACEMENT_CHARACTER_RUNE, UNICODE_REPLACEMENT_CHARACTER_RUNE], |
| 335 utf8ToRunes( |
| 336 [ 0xc2, |
| 337 0xe0, 0x80, |
| 338 0xf0, 0x80, 0x80, |
| 339 0xf8, 0x88, 0x80, 0x80, |
| 340 0xfc, 0x80, 0x80, 0x80, 0x80, |
| 341 0xdf, |
| 342 0xef, 0xbf, |
| 343 0xf7, 0xbf, 0xbf, |
| 344 0xfb, 0xbf, 0xbf, 0xbf, |
| 345 0xfd, 0xbf, 0xbf, 0xbf, 0xbf ]), |
| 346 "Concatenation of incomplete sequences"); |
| 347 |
| 348 // Impossible bytes |
| 349 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_RUNE], |
| 350 utf8ToRunes([0xfe]), "fe"); |
| 351 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_RUNE], |
| 352 utf8ToRunes([0xff]), "ff"); |
| 353 Expect.listEquals([ |
| 354 UNICODE_REPLACEMENT_CHARACTER_RUNE, |
| 355 UNICODE_REPLACEMENT_CHARACTER_RUNE, |
| 356 UNICODE_REPLACEMENT_CHARACTER_RUNE, |
| 357 UNICODE_REPLACEMENT_CHARACTER_RUNE], |
| 358 utf8ToRunes([0xfe, 0xfe, 0xff, 0xff]), "fe fe ff ff"); |
| 359 |
| 360 // Overlong sequences |
| 361 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_RUNE], |
| 362 utf8ToRunes([0xc0, 0xaf]), "c0 af"); |
| 363 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_RUNE], |
| 364 utf8ToRunes([0xe0, 0x80, 0xaf]), "e0 80 af"); |
| 365 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_RUNE], |
| 366 utf8ToRunes([0xf0, 0x80, 0x80, 0xaf]), "f0 80 80 af"); |
| 367 Expect.listEquals( |
| 368 [UNICODE_REPLACEMENT_CHARACTER_RUNE, UNICODE_REPLACEMENT_CHARACTER_RUNE, |
| 369 UNICODE_REPLACEMENT_CHARACTER_RUNE, UNICODE_REPLACEMENT_CHARACTER_RUNE, |
| 370 UNICODE_REPLACEMENT_CHARACTER_RUNE], |
| 371 utf8ToRunes([0xf8, 0x80, 0x80, 0x80, 0xaf]), "f8 80 80 80 af"); |
| 372 Expect.listEquals( |
| 373 [UNICODE_REPLACEMENT_CHARACTER_RUNE, UNICODE_REPLACEMENT_CHARACTER_RUNE, |
| 374 UNICODE_REPLACEMENT_CHARACTER_RUNE, UNICODE_REPLACEMENT_CHARACTER_RUNE, |
| 375 UNICODE_REPLACEMENT_CHARACTER_RUNE, UNICODE_REPLACEMENT_CHARACTER_RUNE], |
| 376 utf8ToRunes([0xfc, 0x80, 0x80, 0x80, 0x80, 0xaf]), |
| 377 "fc 80 80 80 80 af"); |
| 378 |
| 379 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_RUNE], |
| 380 utf8ToRunes([0xc1, 0xbf]), "c1 bf"); |
| 381 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_RUNE], |
| 382 utf8ToRunes([0xe0, 0x9f, 0xbf]), "e0 9f bf"); |
| 383 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_RUNE], |
| 384 utf8ToRunes([0xf0, 0x8f, 0xbf, 0xbf]), "f0 8f bf bf"); |
| 385 Expect.listEquals( |
| 386 [UNICODE_REPLACEMENT_CHARACTER_RUNE, UNICODE_REPLACEMENT_CHARACTER_RUNE, |
| 387 UNICODE_REPLACEMENT_CHARACTER_RUNE, UNICODE_REPLACEMENT_CHARACTER_RUNE, |
| 388 UNICODE_REPLACEMENT_CHARACTER_RUNE], |
| 389 utf8ToRunes([0xf8, 0x87, 0xbf, 0xbf, 0xbf]), "f8 87 bf bf bf"); |
| 390 Expect.listEquals( |
| 391 [UNICODE_REPLACEMENT_CHARACTER_RUNE, UNICODE_REPLACEMENT_CHARACTER_RUNE, |
| 392 UNICODE_REPLACEMENT_CHARACTER_RUNE, UNICODE_REPLACEMENT_CHARACTER_RUNE, |
| 393 UNICODE_REPLACEMENT_CHARACTER_RUNE, UNICODE_REPLACEMENT_CHARACTER_RUNE], |
| 394 utf8ToRunes([0xfc, 0x83, 0xbf, 0xbf, 0xbf, 0xbf]), |
| 395 "fc 83 bf bf bf bf"); |
| 396 |
| 397 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_RUNE], |
| 398 utf8ToRunes([0xc0, 0x80]), "c0 80"); |
| 399 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_RUNE], |
| 400 utf8ToRunes([0xe0, 0x80, 0x80]), "e0 80 80"); |
| 401 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_RUNE], |
| 402 utf8ToRunes([0xf0, 0x80, 0x80, 0x80]), "f0 80 80 80"); |
| 403 Expect.listEquals( |
| 404 [UNICODE_REPLACEMENT_CHARACTER_RUNE, UNICODE_REPLACEMENT_CHARACTER_RUNE, |
| 405 UNICODE_REPLACEMENT_CHARACTER_RUNE, UNICODE_REPLACEMENT_CHARACTER_RUNE, |
| 406 UNICODE_REPLACEMENT_CHARACTER_RUNE], |
| 407 utf8ToRunes([0xf8, 0x80, 0x80, 0x80, 0x80]), "f8 80 80 80 80"); |
| 408 Expect.listEquals( |
| 409 [UNICODE_REPLACEMENT_CHARACTER_RUNE, UNICODE_REPLACEMENT_CHARACTER_RUNE, |
| 410 UNICODE_REPLACEMENT_CHARACTER_RUNE, UNICODE_REPLACEMENT_CHARACTER_RUNE, |
| 411 UNICODE_REPLACEMENT_CHARACTER_RUNE, UNICODE_REPLACEMENT_CHARACTER_RUNE], |
| 412 utf8ToRunes([0xfc, 0x80, 0x80, 0x80, 0x80, 0x80]), |
| 413 "fc 80 80 80 80 80"); |
| 414 |
| 415 // Other illegal code positions (???) |
| 416 Expect.listEquals([0xfffe], utf8ToRunes([0xef, 0xbf, 0xbe]), |
| 417 "U+FFFE"); |
| 418 Expect.listEquals([0xffff], utf8ToRunes([0xef, 0xbf, 0xbf]), |
| 419 "U+FFFF"); |
| 420 } |
| 421 |
| 422 void testUtf8BytesToString() { |
| 423 Expect.stringEquals(testEnglishPhrase, |
| 424 decodeUtf8(testEnglishUtf8), "English"); |
| 425 |
| 426 Expect.stringEquals(testDanishPhrase, |
| 427 decodeUtf8(testDanishUtf8), "Danish"); |
| 428 |
| 429 Expect.stringEquals(testHebrewPhrase, |
| 430 decodeUtf8(testHebrewUtf8), "Hebrew"); |
| 431 |
| 432 Expect.stringEquals(testRussianPhrase, |
| 433 decodeUtf8(testRussianUtf8), "Russian"); |
| 434 |
| 435 Expect.stringEquals(testGreekPhrase, |
| 436 decodeUtf8(testGreekUtf8), "Greek"); |
| 437 |
| 438 Expect.stringEquals(testKatakanaPhrase, |
| 439 decodeUtf8(testKatakanaUtf8), "Katakana"); |
| 440 } |
OLD | NEW |