pkg/utf/test/utf82_test.dart - Issue 68563004: Move unicode tests to utf package.

Side by Side Diff: pkg/utf/test/utf82_test.dart

Issue 68563004: Move unicode tests to utf package. (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart

Patch Set: Simplify test. Created 7 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
(Empty)
	1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file

	2 // for details. All rights reserved. Use of this source code is governed by a

	3 // BSD-style license that can be found in the LICENSE file.

	4

	5 import 'package:expect/expect.dart';

	6 import 'package:utf/utf.dart';

	7

	8 const String testEnglishPhrase =

	9 "The quick brown fox jumps over the lazy dog.";

	10

	11 const List<int> testEnglishUtf8 = const<int> [

	12 0x54, 0x68, 0x65, 0x20, 0x71, 0x75, 0x69, 0x63,

	13 0x6b, 0x20, 0x62, 0x72, 0x6f, 0x77, 0x6e, 0x20,

	14 0x66, 0x6f, 0x78, 0x20, 0x6a, 0x75, 0x6d, 0x70,

	15 0x73, 0x20, 0x6f, 0x76, 0x65, 0x72, 0x20, 0x74,

	16 0x68, 0x65, 0x20, 0x6c, 0x61, 0x7a, 0x79, 0x20,

	17 0x64, 0x6f, 0x67, 0x2e];

	18

	19 const String testDanishPhrase = "Quizdeltagerne spiste jordbær med "

	20 "fløde mens cirkusklovnen Wolther spillede på xylofon.";

	21

	22 const List<int> testDanishUtf8 = const<int>[

	23 0x51, 0x75, 0x69, 0x7a, 0x64, 0x65, 0x6c, 0x74,

	24 0x61, 0x67, 0x65, 0x72, 0x6e, 0x65, 0x20, 0x73,

	25 0x70, 0x69, 0x73, 0x74, 0x65, 0x20, 0x6a, 0x6f,

	26 0x72, 0x64, 0x62, 0xc3, 0xa6, 0x72, 0x20, 0x6d,

	27 0x65, 0x64, 0x20, 0x66, 0x6c, 0xc3, 0xb8, 0x64,

	28 0x65, 0x20, 0x6d, 0x65, 0x6e, 0x73, 0x20, 0x63,

	29 0x69, 0x72, 0x6b, 0x75, 0x73, 0x6b, 0x6c, 0x6f,

	30 0x76, 0x6e, 0x65, 0x6e, 0x20, 0x57, 0x6f, 0x6c,

	31 0x74, 0x68, 0x65, 0x72, 0x20, 0x73, 0x70, 0x69,

	32 0x6c, 0x6c, 0x65, 0x64, 0x65, 0x20, 0x70, 0xc3,

	33 0xa5, 0x20, 0x78, 0x79, 0x6c, 0x6f, 0x66, 0x6f,

	34 0x6e, 0x2e];

	35

	36 // unusual formatting due to strange editor interaction w/ text direction.

	37 const String

	38 testHebrewPhrase = "דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה";

	39

	40 const List<int> testHebrewUtf8 = const<int>[

	41 0xd7, 0x93, 0xd7, 0x92, 0x20, 0xd7, 0xa1, 0xd7,

	42 0xa7, 0xd7, 0xa8, 0xd7, 0x9f, 0x20, 0xd7, 0xa9,

	43 0xd7, 0x98, 0x20, 0xd7, 0x91, 0xd7, 0x99, 0xd7,

	44 0x9d, 0x20, 0xd7, 0x9e, 0xd7, 0x90, 0xd7, 0x95,

	45 0xd7, 0x9b, 0xd7, 0x96, 0xd7, 0x91, 0x20, 0xd7,

	46 0x95, 0xd7, 0x9c, 0xd7, 0xa4, 0xd7, 0xaa, 0xd7,

	47 0xa2, 0x20, 0xd7, 0x9e, 0xd7, 0xa6, 0xd7, 0x90,

	48 0x20, 0xd7, 0x9c, 0xd7, 0x95, 0x20, 0xd7, 0x97,

	49 0xd7, 0x91, 0xd7, 0xa8, 0xd7, 0x94, 0x20, 0xd7,

	50 0x90, 0xd7, 0x99, 0xd7, 0x9a, 0x20, 0xd7, 0x94,

	51 0xd7, 0xa7, 0xd7, 0x9c, 0xd7, 0x99, 0xd7, 0x98,

	52 0xd7, 0x94];

	53

	54 const String testRussianPhrase = "Съешь же ещё этих мягких "

	55 "французских булок да выпей чаю";

	56

	57 const List<int> testRussianUtf8 = const<int>[

	58 0xd0, 0xa1, 0xd1, 0x8a, 0xd0, 0xb5, 0xd1, 0x88,

	59 0xd1, 0x8c, 0x20, 0xd0, 0xb6, 0xd0, 0xb5, 0x20,

	60 0xd0, 0xb5, 0xd1, 0x89, 0xd1, 0x91, 0x20, 0xd1,

	61 0x8d, 0xd1, 0x82, 0xd0, 0xb8, 0xd1, 0x85, 0x20,

	62 0xd0, 0xbc, 0xd1, 0x8f, 0xd0, 0xb3, 0xd0, 0xba,

	63 0xd0, 0xb8, 0xd1, 0x85, 0x20, 0xd1, 0x84, 0xd1,

	64 0x80, 0xd0, 0xb0, 0xd0, 0xbd, 0xd1, 0x86, 0xd1,

	65 0x83, 0xd0, 0xb7, 0xd1, 0x81, 0xd0, 0xba, 0xd0,

	66 0xb8, 0xd1, 0x85, 0x20, 0xd0, 0xb1, 0xd1, 0x83,

	67 0xd0, 0xbb, 0xd0, 0xbe, 0xd0, 0xba, 0x20, 0xd0,

	68 0xb4, 0xd0, 0xb0, 0x20, 0xd0, 0xb2, 0xd1, 0x8b,

	69 0xd0, 0xbf, 0xd0, 0xb5, 0xd0, 0xb9, 0x20, 0xd1,

	70 0x87, 0xd0, 0xb0, 0xd1, 0x8e];

	71

	72 const String testGreekPhrase = "Γαζέες καὶ μυρτιὲς δὲν θὰ βρῶ πιὰ "

	73 "στὸ χρυσαφὶ ξέφωτο";

	74

	75 const List<int> testGreekUtf8 = const<int>[

	76 0xce, 0x93, 0xce, 0xb1, 0xce, 0xb6, 0xce, 0xad,

	77 0xce, 0xb5, 0xcf, 0x82, 0x20, 0xce, 0xba, 0xce,

	78 0xb1, 0xe1, 0xbd, 0xb6, 0x20, 0xce, 0xbc, 0xcf,

	79 0x85, 0xcf, 0x81, 0xcf, 0x84, 0xce, 0xb9, 0xe1,

	80 0xbd, 0xb2, 0xcf, 0x82, 0x20, 0xce, 0xb4, 0xe1,

	81 0xbd, 0xb2, 0xce, 0xbd, 0x20, 0xce, 0xb8, 0xe1,

	82 0xbd, 0xb0, 0x20, 0xce, 0xb2, 0xcf, 0x81, 0xe1,

	83 0xbf, 0xb6, 0x20, 0xcf, 0x80, 0xce, 0xb9, 0xe1,

	84 0xbd, 0xb0, 0x20, 0xcf, 0x83, 0xcf, 0x84, 0xe1,

	85 0xbd, 0xb8, 0x20, 0xcf, 0x87, 0xcf, 0x81, 0xcf,

	86 0x85, 0xcf, 0x83, 0xce, 0xb1, 0xcf, 0x86, 0xe1,

	87 0xbd, 0xb6, 0x20, 0xce, 0xbe, 0xce, 0xad, 0xcf,

	88 0x86, 0xcf, 0x89, 0xcf, 0x84, 0xce, 0xbf];

	89

	90 const String testKatakanaPhrase = """

	91 イロハニホヘトチリヌルヲワカヨタレソツネナラム

	92 ウヰノオクヤマケフコエテアサキユメミシヱヒモセスン""";

	93

	94 const List<int> testKatakanaUtf8 = const<int>[

	95 0xe3, 0x82, 0xa4, 0xe3, 0x83, 0xad, 0xe3, 0x83,

	96 0x8f, 0xe3, 0x83, 0x8b, 0xe3, 0x83, 0x9b, 0xe3,

	97 0x83, 0x98, 0xe3, 0x83, 0x88, 0x20, 0xe3, 0x83,

	98 0x81, 0xe3, 0x83, 0xaa, 0xe3, 0x83, 0x8c, 0xe3,

	99 0x83, 0xab, 0xe3, 0x83, 0xb2, 0x20, 0xe3, 0x83,

	100 0xaf, 0xe3, 0x82, 0xab, 0xe3, 0x83, 0xa8, 0xe3,

	101 0x82, 0xbf, 0xe3, 0x83, 0xac, 0xe3, 0x82, 0xbd,

	102 0x20, 0xe3, 0x83, 0x84, 0xe3, 0x83, 0x8d, 0xe3,

	103 0x83, 0x8a, 0xe3, 0x83, 0xa9, 0xe3, 0x83, 0xa0,

	104 0x0a, 0xe3, 0x82, 0xa6, 0xe3, 0x83, 0xb0, 0xe3,

	105 0x83, 0x8e, 0xe3, 0x82, 0xaa, 0xe3, 0x82, 0xaf,

	106 0xe3, 0x83, 0xa4, 0xe3, 0x83, 0x9e, 0x20, 0xe3,

	107 0x82, 0xb1, 0xe3, 0x83, 0x95, 0xe3, 0x82, 0xb3,

	108 0xe3, 0x82, 0xa8, 0xe3, 0x83, 0x86, 0x20, 0xe3,

	109 0x82, 0xa2, 0xe3, 0x82, 0xb5, 0xe3, 0x82, 0xad,

	110 0xe3, 0x83, 0xa6, 0xe3, 0x83, 0xa1, 0xe3, 0x83,

	111 0x9f, 0xe3, 0x82, 0xb7, 0x20, 0xe3, 0x83, 0xb1,

	112 0xe3, 0x83, 0x92, 0xe3, 0x83, 0xa2, 0xe3, 0x82,

	113 0xbb, 0xe3, 0x82, 0xb9, 0xe3, 0x83, 0xb3];

	114

	115 void main() {

	116 testUtf8bytesToCodepoints();

	117 testUtf8BytesToString();

	118 testEncodeToUtf8();

	119 testIterableMethods();

	120 }

	121

	122 void testEncodeToUtf8() {

	123 Expect.listEquals(testEnglishUtf8, encodeUtf8(testEnglishPhrase),

	124 "english to utf8");

	125

	126 Expect.listEquals(testDanishUtf8, encodeUtf8(testDanishPhrase),

	127 "encode danish to utf8");

	128

	129 Expect.listEquals(testHebrewUtf8, encodeUtf8(testHebrewPhrase),

	130 "Hebrew to utf8");

	131

	132 Expect.listEquals(testRussianUtf8, encodeUtf8(testRussianPhrase),

	133 "Russian to utf8");

	134

	135 Expect.listEquals(testGreekUtf8, encodeUtf8(testGreekPhrase),

	136 "Greek to utf8");

	137

	138 Expect.listEquals(testKatakanaUtf8, encodeUtf8(testKatakanaPhrase),

	139 "Katakana to utf8");

	140 }

	141

	142 void testUtf8bytesToCodepoints() {

	143 Expect.listEquals([954, 972, 963, 956, 949],

	144 utf8ToCodepoints([0xce, 0xba, 0xcf, 0x8c, 0xcf,

	145 0x83, 0xce, 0xbc, 0xce, 0xb5]), "κόσμε");

	146

	147 // boundary conditions: First possible sequence of a certain length

	148 Expect.listEquals([], utf8ToCodepoints([]), "no input");

	149 Expect.listEquals([0x0], utf8ToCodepoints([0x0]), "0");

	150 Expect.listEquals([0x80], utf8ToCodepoints([0xc2, 0x80]), "80");

	151 Expect.listEquals([0x800],

	152 utf8ToCodepoints([0xe0, 0xa0, 0x80]), "800");

	153 Expect.listEquals([0x10000],

	154 utf8ToCodepoints([0xf0, 0x90, 0x80, 0x80]), "10000");

	155 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],

	156 utf8ToCodepoints([0xf8, 0x88, 0x80, 0x80, 0x80]), "200000");

	157 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],

	158 utf8ToCodepoints([0xfc, 0x84, 0x80, 0x80, 0x80, 0x80]),

	159 "4000000");

	160

	161 // boundary conditions: Last possible sequence of a certain length

	162 Expect.listEquals([0x7f], utf8ToCodepoints([0x7f]), "7f");

	163 Expect.listEquals([0x7ff], utf8ToCodepoints([0xdf, 0xbf]), "7ff");

	164 Expect.listEquals([0xffff],

	165 utf8ToCodepoints([0xef, 0xbf, 0xbf]), "ffff");

	166 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],

	167 utf8ToCodepoints([0xf7, 0xbf, 0xbf, 0xbf]), "1fffff");

	168 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],

	169 utf8ToCodepoints([0xfb, 0xbf, 0xbf, 0xbf, 0xbf]), "3ffffff");

	170 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],

	171 utf8ToCodepoints([0xfd, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf]),

	172 "4000000");

	173

	174 // other boundary conditions

	175 Expect.listEquals([0xd7ff],

	176 utf8ToCodepoints([0xed, 0x9f, 0xbf]), "d7ff");

	177 Expect.listEquals([0xe000],

	178 utf8ToCodepoints([0xee, 0x80, 0x80]), "e000");

	179 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],

	180 utf8ToCodepoints([0xef, 0xbf, 0xbd]), "fffd");

	181 Expect.listEquals([0x10ffff],

	182 utf8ToCodepoints([0xf4, 0x8f, 0xbf, 0xbf]), "10ffff");

	183 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],

	184 utf8ToCodepoints([0xf4, 0x90, 0x80, 0x80]), "110000");

	185

	186 // unexpected continuation bytes

	187 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],

	188 utf8ToCodepoints([0x80]), "80 => replacement character");

	189 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],

	190 utf8ToCodepoints([0xbf]), "bf => replacement character");

	191

	192 List<int> allContinuationBytes = <int>[];

	193 List<int> matchingReplacementChars = <int>[];

	194 for (int i = 0x80; i < 0xc0; i++) {

	195 allContinuationBytes.add(i);

	196 matchingReplacementChars.add(UNICODE_REPLACEMENT_CHARACTER_CODEPOINT);

	197 }

	198 Expect.listEquals(matchingReplacementChars,

	199 utf8ToCodepoints(allContinuationBytes),

	200 "80 - bf => replacement character x 64");

	201

	202 List<int> allFirstTwoByteSeq = <int>[];

	203 matchingReplacementChars = <int>[];

	204 for (int i = 0xc0; i < 0xe0; i++) {

	205 allFirstTwoByteSeq.addAll([i, 0x20]);

	206 matchingReplacementChars.addAll(

	207 [UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]);

	208 }

	209 Expect.listEquals(matchingReplacementChars,

	210 utf8ToCodepoints(allFirstTwoByteSeq),

	211 "c0 - df + space => replacement character + space x 32");

	212

	213 List<int> allFirstThreeByteSeq = <int>[];

	214 matchingReplacementChars = <int>[];

	215 for (int i = 0xe0; i < 0xf0; i++) {

	216 allFirstThreeByteSeq.addAll([i, 0x20]);

	217 matchingReplacementChars.addAll(

	218 [UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]);

	219 }

	220 Expect.listEquals(matchingReplacementChars,

	221 utf8ToCodepoints(allFirstThreeByteSeq),

	222 "e0 - ef + space => replacement character x 16");

	223

	224 List<int> allFirstFourByteSeq = <int>[];

	225 matchingReplacementChars = <int>[];

	226 for (int i = 0xf0; i < 0xf8; i++) {

	227 allFirstFourByteSeq.addAll([i, 0x20]);

	228 matchingReplacementChars.addAll(

	229 [UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]);

	230 }

	231 Expect.listEquals(matchingReplacementChars,

	232 utf8ToCodepoints(allFirstFourByteSeq),

	233 "f0 - f7 + space => replacement character x 8");

	234

	235 List<int> allFirstFiveByteSeq = <int>[];

	236 matchingReplacementChars = <int>[];

	237 for (int i = 0xf8; i < 0xfc; i++) {

	238 allFirstFiveByteSeq.addAll([i, 0x20]);

	239 matchingReplacementChars.addAll(

	240 [UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]);

	241 }

	242 Expect.listEquals(matchingReplacementChars,

	243 utf8ToCodepoints(allFirstFiveByteSeq),

	244 "f8 - fb + space => replacement character x 4");

	245

	246 List<int> allFirstSixByteSeq = <int>[];

	247 matchingReplacementChars = <int>[];

	248 for (int i = 0xfc; i < 0xfe; i++) {

	249 allFirstSixByteSeq.addAll([i, 0x20]);

	250 matchingReplacementChars.addAll(

	251 [UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]);

	252 }

	253 Expect.listEquals(matchingReplacementChars,

	254 utf8ToCodepoints(allFirstSixByteSeq),

	255 "fc - fd + space => replacement character x 2");

	256

	257 // Sequences with last continuation byte missing

	258 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],

	259 utf8ToCodepoints([0xc2]),

	260 "2-byte sequence with last byte missing");

	261 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],

	262 utf8ToCodepoints([0xe0, 0x80]),

	263 "3-byte sequence with last byte missing");

	264 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],

	265 utf8ToCodepoints([0xf0, 0x80, 0x80]),

	266 "4-byte sequence with last byte missing");

	267 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],

	268 utf8ToCodepoints([0xf8, 0x88, 0x80, 0x80]),

	269 "5-byte sequence with last byte missing");

	270 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],

	271 utf8ToCodepoints([0xfc, 0x80, 0x80, 0x80, 0x80]),

	272 "6-byte sequence with last byte missing");

	273

	274 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],

	275 utf8ToCodepoints([0xdf]),

	276 "2-byte sequence with last byte missing (hi)");

	277 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],

	278 utf8ToCodepoints([0xef, 0xbf]),

	279 "3-byte sequence with last byte missing (hi)");

	280 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],

	281 utf8ToCodepoints([0xf7, 0xbf, 0xbf]),

	282 "4-byte sequence with last byte missing (hi)");

	283 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],

	284 utf8ToCodepoints([0xfb, 0xbf, 0xbf, 0xbf]),

	285 "5-byte sequence with last byte missing (hi)");

	286 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],

	287 utf8ToCodepoints([0xfd, 0xbf, 0xbf, 0xbf, 0xbf]),

	288 "6-byte sequence with last byte missing (hi)");

	289

	290 // Concatenation of incomplete sequences

	291 Expect.listEquals(

	292 [ UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,

	293 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,

	294 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,

	295 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,

	296 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,

	297 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,

	298 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,

	299 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,

	300 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,

	301 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT ],

	302 utf8ToCodepoints(

	303 [ 0xc2,

	304 0xe0, 0x80,

	305 0xf0, 0x80, 0x80,

	306 0xf8, 0x88, 0x80, 0x80,

	307 0xfc, 0x80, 0x80, 0x80, 0x80,

	308 0xdf,

	309 0xef, 0xbf,

	310 0xf7, 0xbf, 0xbf,

	311 0xfb, 0xbf, 0xbf, 0xbf,

	312 0xfd, 0xbf, 0xbf, 0xbf, 0xbf ]),

	313 "Concatenation of incomplete sequences");

	314

	315 // Impossible bytes

	316 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],

	317 utf8ToCodepoints([0xfe]), "fe");

	318 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],

	319 utf8ToCodepoints([0xff]), "ff");

	320 Expect.listEquals([

	321 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,

	322 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,

	323 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,

	324 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],

	325 utf8ToCodepoints([0xfe, 0xfe, 0xff, 0xff]), "fe fe ff ff");

	326

	327 // Overlong sequences

	328 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],

	329 utf8ToCodepoints([0xc0, 0xaf]), "c0 af");

	330 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],

	331 utf8ToCodepoints([0xe0, 0x80, 0xaf]), "e0 80 af");

	332 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],

	333 utf8ToCodepoints([0xf0, 0x80, 0x80, 0xaf]), "f0 80 80 af");

	334 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],

	335 utf8ToCodepoints([0xf8, 0x80, 0x80, 0x80, 0xaf]), "f8 80 80 80 af");

	336 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],

	337 utf8ToCodepoints([0xfc, 0x80, 0x80, 0x80, 0x80, 0xaf]),

	338 "fc 80 80 80 80 af");

	339

	340 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],

	341 utf8ToCodepoints([0xc1, 0xbf]), "c1 bf");

	342 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],

	343 utf8ToCodepoints([0xe0, 0x9f, 0xbf]), "e0 9f bf");

	344 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],

	345 utf8ToCodepoints([0xf0, 0x8f, 0xbf, 0xbf]), "f0 8f bf bf");

	346 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],

	347 utf8ToCodepoints([0xf8, 0x87, 0xbf, 0xbf, 0xbf]), "f8 87 bf bf bf");

	348 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],

	349 utf8ToCodepoints([0xfc, 0x83, 0xbf, 0xbf, 0xbf, 0xbf]),

	350 "fc 83 bf bf bf bf");

	351

	352 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],

	353 utf8ToCodepoints([0xc0, 0x80]), "c0 80");

	354 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],

	355 utf8ToCodepoints([0xe0, 0x80, 0x80]), "e0 80 80");

	356 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],

	357 utf8ToCodepoints([0xf0, 0x80, 0x80, 0x80]), "f0 80 80 80");

	358 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],

	359 utf8ToCodepoints([0xf8, 0x80, 0x80, 0x80, 0x80]), "f8 80 80 80 80");

	360 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],

	361 utf8ToCodepoints([0xfc, 0x80, 0x80, 0x80, 0x80, 0x80]),

	362 "fc 80 80 80 80 80");

	363

	364 // Illegal code positions

	365 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],

	366 utf8ToCodepoints([0xed, 0xa0, 0x80]), "U+D800");

	367 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],

	368 utf8ToCodepoints([0xed, 0xad, 0xbf]), "U+DB7F");

	369 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],

	370 utf8ToCodepoints([0xed, 0xae, 0x80]), "U+DB80");

	371 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],

	372 utf8ToCodepoints([0xed, 0xaf, 0xbf]), "U+DBFF");

	373 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],

	374 utf8ToCodepoints([0xed, 0xb0, 0x80]), "U+DC00");

	375 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],

	376 utf8ToCodepoints([0xed, 0xbe, 0x80]), "U+DF80");

	377 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],

	378 utf8ToCodepoints([0xed, 0xbf, 0xbf]), "U+DFFF");

	379

	380 // Paired UTF-16 surrogates

	381 Expect.listEquals([

	382 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,

	383 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],

	384 utf8ToCodepoints([0xed, 0xa0, 0x80, 0xed, 0xb0, 0x80]),

	385 "U+D800 U+DC00");

	386 Expect.listEquals([

	387 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,

	388 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],

	389 utf8ToCodepoints([0xed, 0xa0, 0x80, 0xed, 0xbf, 0xbf]),

	390 "U+D800 U+DFFF");

	391 Expect.listEquals([

	392 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,

	393 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],

	394 utf8ToCodepoints([0xed, 0xad, 0xbf, 0xed, 0xb0, 0x80]),

	395 "U+DB7F U+DC00");

	396 Expect.listEquals([

	397 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,

	398 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],

	399 utf8ToCodepoints([0xed, 0xad, 0xbf, 0xed, 0xbf, 0xbf]),

	400 "U+DB7F U+DFFF");

	401 Expect.listEquals([

	402 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,

	403 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],

	404 utf8ToCodepoints([0xed, 0xae, 0x80, 0xed, 0xb0, 0x80]),

	405 "U+DB80 U+DC00");

	406 Expect.listEquals([

	407 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,

	408 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],

	409 utf8ToCodepoints([0xed, 0xae, 0x80, 0xed, 0xbf, 0xbf]),

	410 "U+DB80 U+DFFF");

	411 Expect.listEquals([

	412 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,

	413 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],

	414 utf8ToCodepoints([0xed, 0xaf, 0xbf, 0xed, 0xb0, 0x80]),

	415 "U+DBFF U+DC00");

	416 Expect.listEquals([

	417 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,

	418 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],

	419 utf8ToCodepoints([0xed, 0xaf, 0xbf, 0xed, 0xbf, 0xbf]),

	420 "U+DBFF U+DFFF");

	421

	422 // Other illegal code positions (???)

	423 Expect.listEquals([0xfffe], utf8ToCodepoints([0xef, 0xbf, 0xbe]),

	424 "U+FFFE");

	425 Expect.listEquals([0xffff], utf8ToCodepoints([0xef, 0xbf, 0xbf]),

	426 "U+FFFF");

	427 }

	428

	429 void testUtf8BytesToString() {

	430 Expect.stringEquals(testEnglishPhrase,

	431 decodeUtf8(testEnglishUtf8), "English");

	432

	433 Expect.stringEquals(testDanishPhrase,

	434 decodeUtf8(testDanishUtf8), "Danish");

	435

	436 Expect.stringEquals(testHebrewPhrase,

	437 decodeUtf8(testHebrewUtf8), "Hebrew");

	438

	439 Expect.stringEquals(testRussianPhrase,

	440 decodeUtf8(testRussianUtf8), "Russian");

	441

	442 Expect.stringEquals(testGreekPhrase,

	443 decodeUtf8(testGreekUtf8), "Greek");

	444

	445 Expect.stringEquals(testKatakanaPhrase,

	446 decodeUtf8(testKatakanaUtf8), "Katakana");

	447 }

	448

	449 void testIterableMethods() {

	450 IterableUtf8Decoder englishDecoder = decodeUtf8AsIterable(testEnglishUtf8);

	451 // get the first character

	452 Expect.equals(testEnglishUtf8[0], englishDecoder.first);

	453 // get the whole translation using the Iterable interface

	454 Expect.stringEquals(testEnglishPhrase,

	455 new String.fromCharCodes(new List<int>.from(englishDecoder)));

	456

	457 IterableUtf8Decoder kataDecoder = decodeUtf8AsIterable(testKatakanaUtf8);

	458 // get the first character

	459 Expect.equals(testKatakanaPhrase.codeUnits[0], kataDecoder.first);

	460 // get the whole translation using the Iterable interface

	461 Expect.stringEquals(testKatakanaPhrase,

	462 new String.fromCharCodes(new List<int>.from(kataDecoder)));

	463 }

OLD	NEW

« pkg/utf/test/utf32_test.dart ('K') | « pkg/utf/test/utf32_test.dart ('k') | tests/corelib/unicode2_test.dart » ('j') | no next file with comments »