utils/tests/string_encoding/utf8_test.dart - Issue 68563004: Move unicode tests to utf package.

Side by Side Diff: utils/tests/string_encoding/utf8_test.dart

Issue 68563004: Move unicode tests to utf package. (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart

Patch Set: Simplify test. Created 7 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
	(Empty)
1 #!/usr/bin/env dart

2 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file

3 // for details. All rights reserved. Use of this source code is governed by a

4 // BSD-style license that can be found in the LICENSE file.

5

6 library utf8_tests;

7 import 'dunit.dart';

8 import '../../../lib/convert/convert.dart';

9

10 void main() {

11 TestSuite suite = new TestSuite();

12 suite.registerTestClass(new Utf8Tests());

13 suite.run();

14 }

15

16 class Utf8Tests extends TestClass {

17 static const String testEnglishPhrase =

18 "The quick brown fox jumps over the lazy dog.";

19

20 static const List<int> testEnglishUtf8 = const<int> [

21 0x54, 0x68, 0x65, 0x20, 0x71, 0x75, 0x69, 0x63,

22 0x6b, 0x20, 0x62, 0x72, 0x6f, 0x77, 0x6e, 0x20,

23 0x66, 0x6f, 0x78, 0x20, 0x6a, 0x75, 0x6d, 0x70,

24 0x73, 0x20, 0x6f, 0x76, 0x65, 0x72, 0x20, 0x74,

25 0x68, 0x65, 0x20, 0x6c, 0x61, 0x7a, 0x79, 0x20,

26 0x64, 0x6f, 0x67, 0x2e];

27

28 static const String testDanishPhrase = "Quizdeltagerne spiste jordbær med " +

29 "fløde mens cirkusklovnen Wolther spillede på xylofon.";

30

31 static const List<int> testDanishUtf8 = const<int>[

32 0x51, 0x75, 0x69, 0x7a, 0x64, 0x65, 0x6c, 0x74,

33 0x61, 0x67, 0x65, 0x72, 0x6e, 0x65, 0x20, 0x73,

34 0x70, 0x69, 0x73, 0x74, 0x65, 0x20, 0x6a, 0x6f,

35 0x72, 0x64, 0x62, 0xc3, 0xa6, 0x72, 0x20, 0x6d,

36 0x65, 0x64, 0x20, 0x66, 0x6c, 0xc3, 0xb8, 0x64,

37 0x65, 0x20, 0x6d, 0x65, 0x6e, 0x73, 0x20, 0x63,

38 0x69, 0x72, 0x6b, 0x75, 0x73, 0x6b, 0x6c, 0x6f,

39 0x76, 0x6e, 0x65, 0x6e, 0x20, 0x57, 0x6f, 0x6c,

40 0x74, 0x68, 0x65, 0x72, 0x20, 0x73, 0x70, 0x69,

41 0x6c, 0x6c, 0x65, 0x64, 0x65, 0x20, 0x70, 0xc3,

42 0xa5, 0x20, 0x78, 0x79, 0x6c, 0x6f, 0x66, 0x6f,

43 0x6e, 0x2e];

44

45 // unusual formatting due to strange editor interaction w/ text direction.

46 static const String

47 testHebrewPhrase = "דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה";

48

49 static const List<int> testHebrewUtf8 = const<int>[

50 0xd7, 0x93, 0xd7, 0x92, 0x20, 0xd7, 0xa1, 0xd7,

51 0xa7, 0xd7, 0xa8, 0xd7, 0x9f, 0x20, 0xd7, 0xa9,

52 0xd7, 0x98, 0x20, 0xd7, 0x91, 0xd7, 0x99, 0xd7,

53 0x9d, 0x20, 0xd7, 0x9e, 0xd7, 0x90, 0xd7, 0x95,

54 0xd7, 0x9b, 0xd7, 0x96, 0xd7, 0x91, 0x20, 0xd7,

55 0x95, 0xd7, 0x9c, 0xd7, 0xa4, 0xd7, 0xaa, 0xd7,

56 0xa2, 0x20, 0xd7, 0x9e, 0xd7, 0xa6, 0xd7, 0x90,

57 0x20, 0xd7, 0x9c, 0xd7, 0x95, 0x20, 0xd7, 0x97,

58 0xd7, 0x91, 0xd7, 0xa8, 0xd7, 0x94, 0x20, 0xd7,

59 0x90, 0xd7, 0x99, 0xd7, 0x9a, 0x20, 0xd7, 0x94,

60 0xd7, 0xa7, 0xd7, 0x9c, 0xd7, 0x99, 0xd7, 0x98,

61 0xd7, 0x94];

62

63 static const String testRussianPhrase = "Съешь же ещё этих мягких " +

64 "французских булок да выпей чаю";

65

66 static const List<int> testRussianUtf8 = const<int>[

67 0xd0, 0xa1, 0xd1, 0x8a, 0xd0, 0xb5, 0xd1, 0x88,

68 0xd1, 0x8c, 0x20, 0xd0, 0xb6, 0xd0, 0xb5, 0x20,

69 0xd0, 0xb5, 0xd1, 0x89, 0xd1, 0x91, 0x20, 0xd1,

70 0x8d, 0xd1, 0x82, 0xd0, 0xb8, 0xd1, 0x85, 0x20,

71 0xd0, 0xbc, 0xd1, 0x8f, 0xd0, 0xb3, 0xd0, 0xba,

72 0xd0, 0xb8, 0xd1, 0x85, 0x20, 0xd1, 0x84, 0xd1,

73 0x80, 0xd0, 0xb0, 0xd0, 0xbd, 0xd1, 0x86, 0xd1,

74 0x83, 0xd0, 0xb7, 0xd1, 0x81, 0xd0, 0xba, 0xd0,

75 0xb8, 0xd1, 0x85, 0x20, 0xd0, 0xb1, 0xd1, 0x83,

76 0xd0, 0xbb, 0xd0, 0xbe, 0xd0, 0xba, 0x20, 0xd0,

77 0xb4, 0xd0, 0xb0, 0x20, 0xd0, 0xb2, 0xd1, 0x8b,

78 0xd0, 0xbf, 0xd0, 0xb5, 0xd0, 0xb9, 0x20, 0xd1,

79 0x87, 0xd0, 0xb0, 0xd1, 0x8e];

80

81 static const String testGreekPhrase = "Γαζέες καὶ μυρτιὲς δὲν θὰ βρῶ πιὰ " +

82 "στὸ χρυσαφὶ ξέφωτο";

83

84 static const List<int> testGreekUtf8 = const<int>[

85 0xce, 0x93, 0xce, 0xb1, 0xce, 0xb6, 0xce, 0xad,

86 0xce, 0xb5, 0xcf, 0x82, 0x20, 0xce, 0xba, 0xce,

87 0xb1, 0xe1, 0xbd, 0xb6, 0x20, 0xce, 0xbc, 0xcf,

88 0x85, 0xcf, 0x81, 0xcf, 0x84, 0xce, 0xb9, 0xe1,

89 0xbd, 0xb2, 0xcf, 0x82, 0x20, 0xce, 0xb4, 0xe1,

90 0xbd, 0xb2, 0xce, 0xbd, 0x20, 0xce, 0xb8, 0xe1,

91 0xbd, 0xb0, 0x20, 0xce, 0xb2, 0xcf, 0x81, 0xe1,

92 0xbf, 0xb6, 0x20, 0xcf, 0x80, 0xce, 0xb9, 0xe1,

93 0xbd, 0xb0, 0x20, 0xcf, 0x83, 0xcf, 0x84, 0xe1,

94 0xbd, 0xb8, 0x20, 0xcf, 0x87, 0xcf, 0x81, 0xcf,

95 0x85, 0xcf, 0x83, 0xce, 0xb1, 0xcf, 0x86, 0xe1,

96 0xbd, 0xb6, 0x20, 0xce, 0xbe, 0xce, 0xad, 0xcf,

97 0x86, 0xcf, 0x89, 0xcf, 0x84, 0xce, 0xbf];

98

99 static const String testKatakanaPhrase = """

100 イロハニホヘトチリヌルヲワカヨタレソツネナラム

101 ウヰノオクヤマケフコエテアサキユメミシヱヒモセスン""";

102

103 static const List<int> testKatakanaUtf8 = const<int>[

104 0xe3, 0x82, 0xa4, 0xe3, 0x83, 0xad, 0xe3, 0x83,

105 0x8f, 0xe3, 0x83, 0x8b, 0xe3, 0x83, 0x9b, 0xe3,

106 0x83, 0x98, 0xe3, 0x83, 0x88, 0x20, 0xe3, 0x83,

107 0x81, 0xe3, 0x83, 0xaa, 0xe3, 0x83, 0x8c, 0xe3,

108 0x83, 0xab, 0xe3, 0x83, 0xb2, 0x20, 0xe3, 0x83,

109 0xaf, 0xe3, 0x82, 0xab, 0xe3, 0x83, 0xa8, 0xe3,

110 0x82, 0xbf, 0xe3, 0x83, 0xac, 0xe3, 0x82, 0xbd,

111 0x20, 0xe3, 0x83, 0x84, 0xe3, 0x83, 0x8d, 0xe3,

112 0x83, 0x8a, 0xe3, 0x83, 0xa9, 0xe3, 0x83, 0xa0,

113 0x0a, 0xe3, 0x82, 0xa6, 0xe3, 0x83, 0xb0, 0xe3,

114 0x83, 0x8e, 0xe3, 0x82, 0xaa, 0xe3, 0x82, 0xaf,

115 0xe3, 0x83, 0xa4, 0xe3, 0x83, 0x9e, 0x20, 0xe3,

116 0x82, 0xb1, 0xe3, 0x83, 0x95, 0xe3, 0x82, 0xb3,

117 0xe3, 0x82, 0xa8, 0xe3, 0x83, 0x86, 0x20, 0xe3,

118 0x82, 0xa2, 0xe3, 0x82, 0xb5, 0xe3, 0x82, 0xad,

119 0xe3, 0x83, 0xa6, 0xe3, 0x83, 0xa1, 0xe3, 0x83,

120 0x9f, 0xe3, 0x82, 0xb7, 0x20, 0xe3, 0x83, 0xb1,

121 0xe3, 0x83, 0x92, 0xe3, 0x83, 0xa2, 0xe3, 0x82,

122 0xbb, 0xe3, 0x82, 0xb9, 0xe3, 0x83, 0xb3];

123

124 void registerTests(TestSuite suite) {

125 register("Utf8Tests.testUtf8bytesToCodepoints", testUtf8bytesToCodepoints,

126 suite);

127 register("Utf8Tests.testUtf8BytesToString", testUtf8BytesToString, suite);

128 register("Utf8Tests.testEncodeToUtf8", testEncodeToUtf8, suite);

129 register("Utf8Tests.testIterableMethods", testIterableMethods, suite);

130 }

131

132 void testEncodeToUtf8() {

133 Expect.listEquals(testEnglishUtf8, encodeUtf8(testEnglishPhrase),

134 "english to utf8");

135

136 Expect.listEquals(testDanishUtf8, encodeUtf8(testDanishPhrase),

137 "encode danish to utf8");

138

139 Expect.listEquals(testHebrewUtf8, encodeUtf8(testHebrewPhrase),

140 "Hebrew to utf8");

141

142 Expect.listEquals(testRussianUtf8, encodeUtf8(testRussianPhrase),

143 "Russian to utf8");

144

145 Expect.listEquals(testGreekUtf8, encodeUtf8(testGreekPhrase),

146 "Greek to utf8");

147

148 Expect.listEquals(testKatakanaUtf8, encodeUtf8(testKatakanaPhrase),

149 "Katakana to utf8");

150 }

151

152 List encodeUtf8(String text) => UTF8.encode(text);

153 String decodeUtf8(List bytes) => UTF8.decode(bytes);

154 List utf8ToCodePoints(List bytes) => UTF8.decode(bytes).runes.toList();

155

156 void testUtf8bytesToCodepoints() {

157 Expect.listEquals([954, 972, 963, 956, 949],

158 utf8ToCodepoints([0xce, 0xba, 0xcf, 0x8c, 0xcf,

159 0x83, 0xce, 0xbc, 0xce, 0xb5]), "κόσμε");

160

161 // boundary conditions: First possible sequence of a certain length

162 Expect.listEquals([], utf8ToCodepoints([]), "no input");

163 Expect.listEquals([0x0], utf8ToCodepoints([0x0]), "0");

164 Expect.listEquals([0x80], utf8ToCodepoints([0xc2, 0x80]), "80");

165 Expect.listEquals([0x800],

166 utf8ToCodepoints([0xe0, 0xa0, 0x80]), "800");

167 Expect.listEquals([0x10000],

168 utf8ToCodepoints([0xf0, 0x90, 0x80, 0x80]), "10000");

169 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],

170 utf8ToCodepoints([0xf8, 0x88, 0x80, 0x80, 0x80]), "200000");

171 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],

172 utf8ToCodepoints([0xfc, 0x84, 0x80, 0x80, 0x80, 0x80]),

173 "4000000");

174

175 // boundary conditions: Last possible sequence of a certain length

176 Expect.listEquals([0x7f], utf8ToCodepoints([0x7f]), "7f");

177 Expect.listEquals([0x7ff], utf8ToCodepoints([0xdf, 0xbf]), "7ff");

178 Expect.listEquals([0xffff],

179 utf8ToCodepoints([0xef, 0xbf, 0xbf]), "ffff");

180 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],

181 utf8ToCodepoints([0xf7, 0xbf, 0xbf, 0xbf]), "1fffff");

182 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],

183 utf8ToCodepoints([0xfb, 0xbf, 0xbf, 0xbf, 0xbf]), "3ffffff");

184 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],

185 utf8ToCodepoints([0xfd, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf]),

186 "4000000");

187

188 // other boundary conditions

189 Expect.listEquals([0xd7ff],

190 utf8ToCodepoints([0xed, 0x9f, 0xbf]), "d7ff");

191 Expect.listEquals([0xe000],

192 utf8ToCodepoints([0xee, 0x80, 0x80]), "e000");

193 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],

194 utf8ToCodepoints([0xef, 0xbf, 0xbd]), "fffd");

195 Expect.listEquals([0x10ffff],

196 utf8ToCodepoints([0xf4, 0x8f, 0xbf, 0xbf]), "10ffff");

197 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],

198 utf8ToCodepoints([0xf4, 0x90, 0x80, 0x80]), "110000");

199

200 // unexpected continuation bytes

201 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],

202 utf8ToCodepoints([0x80]), "80 => replacement character");

203 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],

204 utf8ToCodepoints([0xbf]), "bf => replacement character");

205

206 List<int> allContinuationBytes = <int>[];

207 List<int> matchingReplacementChars = <int>[];

208 for (int i = 0x80; i < 0xc0; i++) {

209 allContinuationBytes.add(i);

210 matchingReplacementChars.add(UNICODE_REPLACEMENT_CHARACTER_CODEPOINT);

211 }

212 Expect.listEquals(matchingReplacementChars,

213 utf8ToCodepoints(allContinuationBytes),

214 "80 - bf => replacement character x 64");

215

216 List<int> allFirstTwoByteSeq = <int>[];

217 matchingReplacementChars = <int>[];

218 for (int i = 0xc0; i < 0xe0; i++) {

219 allFirstTwoByteSeq.addAll([i, 0x20]);

220 matchingReplacementChars.addAll(

221 [UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]);

222 }

223 Expect.listEquals(matchingReplacementChars,

224 utf8ToCodepoints(allFirstTwoByteSeq),

225 "c0 - df + space => replacement character + space x 32");

226

227 List<int> allFirstThreeByteSeq = <int>[];

228 matchingReplacementChars = <int>[];

229 for (int i = 0xe0; i < 0xf0; i++) {

230 allFirstThreeByteSeq.addAll([i, 0x20]);

231 matchingReplacementChars.addAll(

232 [UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]);

233 }

234 Expect.listEquals(matchingReplacementChars,

235 utf8ToCodepoints(allFirstThreeByteSeq),

236 "e0 - ef + space => replacement character x 16");

237

238 List<int> allFirstFourByteSeq = <int>[];

239 matchingReplacementChars = <int>[];

240 for (int i = 0xf0; i < 0xf8; i++) {

241 allFirstFourByteSeq.addAll([i, 0x20]);

242 matchingReplacementChars.addAll(

243 [UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]);

244 }

245 Expect.listEquals(matchingReplacementChars,

246 utf8ToCodepoints(allFirstFourByteSeq),

247 "f0 - f7 + space => replacement character x 8");

248

249 List<int> allFirstFiveByteSeq = <int>[];

250 matchingReplacementChars = <int>[];

251 for (int i = 0xf8; i < 0xfc; i++) {

252 allFirstFiveByteSeq.addAll([i, 0x20]);

253 matchingReplacementChars.addAll(

254 [UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]);

255 }

256 Expect.listEquals(matchingReplacementChars,

257 utf8ToCodepoints(allFirstFiveByteSeq),

258 "f8 - fb + space => replacement character x 4");

259

260 List<int> allFirstSixByteSeq = <int>[];

261 matchingReplacementChars = <int>[];

262 for (int i = 0xfc; i < 0xfe; i++) {

263 allFirstSixByteSeq.addAll([i, 0x20]);

264 matchingReplacementChars.addAll(

265 [UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]);

266 }

267 Expect.listEquals(matchingReplacementChars,

268 utf8ToCodepoints(allFirstSixByteSeq),

269 "fc - fd + space => replacement character x 2");

270

271 // Sequences with last continuation byte missing

272 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],

273 utf8ToCodepoints([0xc2]),

274 "2-byte sequence with last byte missing");

275 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],

276 utf8ToCodepoints([0xe0, 0x80]),

277 "3-byte sequence with last byte missing");

278 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],

279 utf8ToCodepoints([0xf0, 0x80, 0x80]),

280 "4-byte sequence with last byte missing");

281 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],

282 utf8ToCodepoints([0xf8, 0x88, 0x80, 0x80]),

283 "5-byte sequence with last byte missing");

284 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],

285 utf8ToCodepoints([0xfc, 0x80, 0x80, 0x80, 0x80]),

286 "6-byte sequence with last byte missing");

287

288 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],

289 utf8ToCodepoints([0xdf]),

290 "2-byte sequence with last byte missing (hi)");

291 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],

292 utf8ToCodepoints([0xef, 0xbf]),

293 "3-byte sequence with last byte missing (hi)");

294 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],

295 utf8ToCodepoints([0xf7, 0xbf, 0xbf]),

296 "4-byte sequence with last byte missing (hi)");

297 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],

298 utf8ToCodepoints([0xfb, 0xbf, 0xbf, 0xbf]),

299 "5-byte sequence with last byte missing (hi)");

300 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],

301 utf8ToCodepoints([0xfd, 0xbf, 0xbf, 0xbf, 0xbf]),

302 "6-byte sequence with last byte missing (hi)");

303

304 // Concatenation of incomplete sequences

305 Expect.listEquals(

306 [ UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,

307 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,

308 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,

309 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,

310 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,

311 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,

312 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,

313 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,

314 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,

315 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT ],

316 utf8ToCodepoints(

317 [ 0xc2,

318 0xe0, 0x80,

319 0xf0, 0x80, 0x80,

320 0xf8, 0x88, 0x80, 0x80,

321 0xfc, 0x80, 0x80, 0x80, 0x80,

322 0xdf,

323 0xef, 0xbf,

324 0xf7, 0xbf, 0xbf,

325 0xfb, 0xbf, 0xbf, 0xbf,

326 0xfd, 0xbf, 0xbf, 0xbf, 0xbf ]),

327 "Concatenation of incomplete sequences");

328

329 // Impossible bytes

330 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],

331 utf8ToCodepoints([0xfe]), "fe");

332 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],

333 utf8ToCodepoints([0xff]), "ff");

334 Expect.listEquals([

335 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,

336 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,

337 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,

338 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],

339 utf8ToCodepoints([0xfe, 0xfe, 0xff, 0xff]), "fe fe ff ff");

340

341 // Overlong sequences

342 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],

343 utf8ToCodepoints([0xc0, 0xaf]), "c0 af");

344 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],

345 utf8ToCodepoints([0xe0, 0x80, 0xaf]), "e0 80 af");

346 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],

347 utf8ToCodepoints([0xf0, 0x80, 0x80, 0xaf]), "f0 80 80 af");

348 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],

349 utf8ToCodepoints([0xf8, 0x80, 0x80, 0x80, 0xaf]), "f8 80 80 80 af");

350 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],

351 utf8ToCodepoints([0xfc, 0x80, 0x80, 0x80, 0x80, 0xaf]),

352 "fc 80 80 80 80 af");

353

354 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],

355 utf8ToCodepoints([0xc1, 0xbf]), "c1 bf");

356 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],

357 utf8ToCodepoints([0xe0, 0x9f, 0xbf]), "e0 9f bf");

358 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],

359 utf8ToCodepoints([0xf0, 0x8f, 0xbf, 0xbf]), "f0 8f bf bf");

360 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],

361 utf8ToCodepoints([0xf8, 0x87, 0xbf, 0xbf, 0xbf]), "f8 87 bf bf bf");

362 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],

363 utf8ToCodepoints([0xfc, 0x83, 0xbf, 0xbf, 0xbf, 0xbf]),

364 "fc 83 bf bf bf bf");

365

366 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],

367 utf8ToCodepoints([0xc0, 0x80]), "c0 80");

368 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],

369 utf8ToCodepoints([0xe0, 0x80, 0x80]), "e0 80 80");

370 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],

371 utf8ToCodepoints([0xf0, 0x80, 0x80, 0x80]), "f0 80 80 80");

372 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],

373 utf8ToCodepoints([0xf8, 0x80, 0x80, 0x80, 0x80]), "f8 80 80 80 80");

374 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],

375 utf8ToCodepoints([0xfc, 0x80, 0x80, 0x80, 0x80, 0x80]),

376 "fc 80 80 80 80 80");

377

378 // Illegal code positions

379 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],

380 utf8ToCodepoints([0xed, 0xa0, 0x80]), "U+D800");

381 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],

382 utf8ToCodepoints([0xed, 0xad, 0xbf]), "U+DB7F");

383 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],

384 utf8ToCodepoints([0xed, 0xae, 0x80]), "U+DB80");

385 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],

386 utf8ToCodepoints([0xed, 0xaf, 0xbf]), "U+DBFF");

387 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],

388 utf8ToCodepoints([0xed, 0xb0, 0x80]), "U+DC00");

389 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],

390 utf8ToCodepoints([0xed, 0xbe, 0x80]), "U+DF80");

391 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],

392 utf8ToCodepoints([0xed, 0xbf, 0xbf]), "U+DFFF");

393

394 // Paired UTF-16 surrogates

395 Expect.listEquals([

396 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,

397 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],

398 utf8ToCodepoints([0xed, 0xa0, 0x80, 0xed, 0xb0, 0x80]),

399 "U+D800 U+DC00");

400 Expect.listEquals([

401 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,

402 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],

403 utf8ToCodepoints([0xed, 0xa0, 0x80, 0xed, 0xbf, 0xbf]),

404 "U+D800 U+DFFF");

405 Expect.listEquals([

406 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,

407 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],

408 utf8ToCodepoints([0xed, 0xad, 0xbf, 0xed, 0xb0, 0x80]),

409 "U+DB7F U+DC00");

410 Expect.listEquals([

411 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,

412 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],

413 utf8ToCodepoints([0xed, 0xad, 0xbf, 0xed, 0xbf, 0xbf]),

414 "U+DB7F U+DFFF");

415 Expect.listEquals([

416 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,

417 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],

418 utf8ToCodepoints([0xed, 0xae, 0x80, 0xed, 0xb0, 0x80]),

419 "U+DB80 U+DC00");

420 Expect.listEquals([

421 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,

422 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],

423 utf8ToCodepoints([0xed, 0xae, 0x80, 0xed, 0xbf, 0xbf]),

424 "U+DB80 U+DFFF");

425 Expect.listEquals([

426 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,

427 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],

428 utf8ToCodepoints([0xed, 0xaf, 0xbf, 0xed, 0xb0, 0x80]),

429 "U+DBFF U+DC00");

430 Expect.listEquals([

431 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,

432 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],

433 utf8ToCodepoints([0xed, 0xaf, 0xbf, 0xed, 0xbf, 0xbf]),

434 "U+DBFF U+DFFF");

435

436 // Other illegal code positions (???)

437 Expect.listEquals([0xfffe], utf8ToCodepoints([0xef, 0xbf, 0xbe]),

438 "U+FFFE");

439 Expect.listEquals([0xffff], utf8ToCodepoints([0xef, 0xbf, 0xbf]),

440 "U+FFFF");

441 }

442

443 void testUtf8BytesToString() {

444 Expect.stringEquals(testEnglishPhrase,

445 decodeUtf8(testEnglishUtf8), "English");

446

447 Expect.stringEquals(testDanishPhrase,

448 decodeUtf8(testDanishUtf8), "Danish");

449

450 Expect.stringEquals(testHebrewPhrase,

451 decodeUtf8(testHebrewUtf8), "Hebrew");

452

453 Expect.stringEquals(testRussianPhrase,

454 decodeUtf8(testRussianUtf8), "Russian");

455

456 Expect.stringEquals(testGreekPhrase,

457 decodeUtf8(testGreekUtf8), "Greek");

458

459 Expect.stringEquals(testKatakanaPhrase,

460 decodeUtf8(testKatakanaUtf8), "Katakana");

461 }

462

463 void testIterableMethods() {

464 IterableUtf8Decoder englishDecoder = decodeUtf8AsIterable(testEnglishUtf8);

465 // get the first character

466 Expect.equals(testEnglishUtf8[0], englishDecoder.first);

467 // get the whole translation using the Iterable interface

468 Expect.stringEquals(testEnglishPhrase,

469 new String.fromCharCodes(new List<int>.from(englishDecoder)));

470

471 IterableUtf8Decoder kataDecoder = decodeUtf8AsIterable(testKatakanaUtf8);

472 // get the first character

473 Expect.equals(testKatakanaPhrase.codeUnits[0], kataDecoder.first);

474 // get the whole translation using the Iterable interface

475 Expect.stringEquals(testKatakanaPhrase,

476 new String.fromCharCodes(new List<int>.from(kataDecoder)));

477 }

478 }

OLD	NEW

« pkg/utf/test/utf32_test.dart ('K') | « utils/tests/string_encoding/utf8_benchmarks.dart ('k') | no next file » | no next file with comments »