packages/utf/test/utf82_test.dart - Issue 1400473008: Roll Observatory packages and add a roll script

Side by Side Diff: packages/utf/test/utf82_test.dart

Issue 1400473008: Roll Observatory packages and add a roll script (Closed) Base URL: git@github.com:dart-lang/observatory_pub_packages.git@master

Patch Set: Created 5 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
(Empty)
	1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file

	2 // for details. All rights reserved. Use of this source code is governed by a

	3 // BSD-style license that can be found in the LICENSE file.

	4

	5 library utf.utf82_test;

	6

	7 import 'package:expect/expect.dart';

	8 import 'package:utf/utf.dart';

	9

	10 const String testEnglishPhrase =

	11 "The quick brown fox jumps over the lazy dog.";

	12

	13 const List<int> testEnglishUtf8 = const<int> [

	14 0x54, 0x68, 0x65, 0x20, 0x71, 0x75, 0x69, 0x63,

	15 0x6b, 0x20, 0x62, 0x72, 0x6f, 0x77, 0x6e, 0x20,

	16 0x66, 0x6f, 0x78, 0x20, 0x6a, 0x75, 0x6d, 0x70,

	17 0x73, 0x20, 0x6f, 0x76, 0x65, 0x72, 0x20, 0x74,

	18 0x68, 0x65, 0x20, 0x6c, 0x61, 0x7a, 0x79, 0x20,

	19 0x64, 0x6f, 0x67, 0x2e];

	20

	21 const String testDanishPhrase = "Quizdeltagerne spiste jordbær med "

	22 "fløde mens cirkusklovnen Wolther spillede på xylofon.";

	23

	24 const List<int> testDanishUtf8 = const<int>[

	25 0x51, 0x75, 0x69, 0x7a, 0x64, 0x65, 0x6c, 0x74,

	26 0x61, 0x67, 0x65, 0x72, 0x6e, 0x65, 0x20, 0x73,

	27 0x70, 0x69, 0x73, 0x74, 0x65, 0x20, 0x6a, 0x6f,

	28 0x72, 0x64, 0x62, 0xc3, 0xa6, 0x72, 0x20, 0x6d,

	29 0x65, 0x64, 0x20, 0x66, 0x6c, 0xc3, 0xb8, 0x64,

	30 0x65, 0x20, 0x6d, 0x65, 0x6e, 0x73, 0x20, 0x63,

	31 0x69, 0x72, 0x6b, 0x75, 0x73, 0x6b, 0x6c, 0x6f,

	32 0x76, 0x6e, 0x65, 0x6e, 0x20, 0x57, 0x6f, 0x6c,

	33 0x74, 0x68, 0x65, 0x72, 0x20, 0x73, 0x70, 0x69,

	34 0x6c, 0x6c, 0x65, 0x64, 0x65, 0x20, 0x70, 0xc3,

	35 0xa5, 0x20, 0x78, 0x79, 0x6c, 0x6f, 0x66, 0x6f,

	36 0x6e, 0x2e];

	37

	38 // unusual formatting due to strange editor interaction w/ text direction.

	39 const String

	40 testHebrewPhrase = "דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה";

	41

	42 const List<int> testHebrewUtf8 = const<int>[

	43 0xd7, 0x93, 0xd7, 0x92, 0x20, 0xd7, 0xa1, 0xd7,

	44 0xa7, 0xd7, 0xa8, 0xd7, 0x9f, 0x20, 0xd7, 0xa9,

	45 0xd7, 0x98, 0x20, 0xd7, 0x91, 0xd7, 0x99, 0xd7,

	46 0x9d, 0x20, 0xd7, 0x9e, 0xd7, 0x90, 0xd7, 0x95,

	47 0xd7, 0x9b, 0xd7, 0x96, 0xd7, 0x91, 0x20, 0xd7,

	48 0x95, 0xd7, 0x9c, 0xd7, 0xa4, 0xd7, 0xaa, 0xd7,

	49 0xa2, 0x20, 0xd7, 0x9e, 0xd7, 0xa6, 0xd7, 0x90,

	50 0x20, 0xd7, 0x9c, 0xd7, 0x95, 0x20, 0xd7, 0x97,

	51 0xd7, 0x91, 0xd7, 0xa8, 0xd7, 0x94, 0x20, 0xd7,

	52 0x90, 0xd7, 0x99, 0xd7, 0x9a, 0x20, 0xd7, 0x94,

	53 0xd7, 0xa7, 0xd7, 0x9c, 0xd7, 0x99, 0xd7, 0x98,

	54 0xd7, 0x94];

	55

	56 const String testRussianPhrase = "Съешь же ещё этих мягких "

	57 "французских булок да выпей чаю";

	58

	59 const List<int> testRussianUtf8 = const<int>[

	60 0xd0, 0xa1, 0xd1, 0x8a, 0xd0, 0xb5, 0xd1, 0x88,

	61 0xd1, 0x8c, 0x20, 0xd0, 0xb6, 0xd0, 0xb5, 0x20,

	62 0xd0, 0xb5, 0xd1, 0x89, 0xd1, 0x91, 0x20, 0xd1,

	63 0x8d, 0xd1, 0x82, 0xd0, 0xb8, 0xd1, 0x85, 0x20,

	64 0xd0, 0xbc, 0xd1, 0x8f, 0xd0, 0xb3, 0xd0, 0xba,

	65 0xd0, 0xb8, 0xd1, 0x85, 0x20, 0xd1, 0x84, 0xd1,

	66 0x80, 0xd0, 0xb0, 0xd0, 0xbd, 0xd1, 0x86, 0xd1,

	67 0x83, 0xd0, 0xb7, 0xd1, 0x81, 0xd0, 0xba, 0xd0,

	68 0xb8, 0xd1, 0x85, 0x20, 0xd0, 0xb1, 0xd1, 0x83,

	69 0xd0, 0xbb, 0xd0, 0xbe, 0xd0, 0xba, 0x20, 0xd0,

	70 0xb4, 0xd0, 0xb0, 0x20, 0xd0, 0xb2, 0xd1, 0x8b,

	71 0xd0, 0xbf, 0xd0, 0xb5, 0xd0, 0xb9, 0x20, 0xd1,

	72 0x87, 0xd0, 0xb0, 0xd1, 0x8e];

	73

	74 const String testGreekPhrase = "Γαζέες καὶ μυρτιὲς δὲν θὰ βρῶ πιὰ "

	75 "στὸ χρυσαφὶ ξέφωτο";

	76

	77 const List<int> testGreekUtf8 = const<int>[

	78 0xce, 0x93, 0xce, 0xb1, 0xce, 0xb6, 0xce, 0xad,

	79 0xce, 0xb5, 0xcf, 0x82, 0x20, 0xce, 0xba, 0xce,

	80 0xb1, 0xe1, 0xbd, 0xb6, 0x20, 0xce, 0xbc, 0xcf,

	81 0x85, 0xcf, 0x81, 0xcf, 0x84, 0xce, 0xb9, 0xe1,

	82 0xbd, 0xb2, 0xcf, 0x82, 0x20, 0xce, 0xb4, 0xe1,

	83 0xbd, 0xb2, 0xce, 0xbd, 0x20, 0xce, 0xb8, 0xe1,

	84 0xbd, 0xb0, 0x20, 0xce, 0xb2, 0xcf, 0x81, 0xe1,

	85 0xbf, 0xb6, 0x20, 0xcf, 0x80, 0xce, 0xb9, 0xe1,

	86 0xbd, 0xb0, 0x20, 0xcf, 0x83, 0xcf, 0x84, 0xe1,

	87 0xbd, 0xb8, 0x20, 0xcf, 0x87, 0xcf, 0x81, 0xcf,

	88 0x85, 0xcf, 0x83, 0xce, 0xb1, 0xcf, 0x86, 0xe1,

	89 0xbd, 0xb6, 0x20, 0xce, 0xbe, 0xce, 0xad, 0xcf,

	90 0x86, 0xcf, 0x89, 0xcf, 0x84, 0xce, 0xbf];

	91

	92 const String testKatakanaPhrase = """

	93 イロハニホヘトチリヌルヲワカヨタレソツネナラム

	94 ウヰノオクヤマケフコエテアサキユメミシヱヒモセスン""";

	95

	96 const List<int> testKatakanaUtf8 = const<int>[

	97 0xe3, 0x82, 0xa4, 0xe3, 0x83, 0xad, 0xe3, 0x83,

	98 0x8f, 0xe3, 0x83, 0x8b, 0xe3, 0x83, 0x9b, 0xe3,

	99 0x83, 0x98, 0xe3, 0x83, 0x88, 0x20, 0xe3, 0x83,

	100 0x81, 0xe3, 0x83, 0xaa, 0xe3, 0x83, 0x8c, 0xe3,

	101 0x83, 0xab, 0xe3, 0x83, 0xb2, 0x20, 0xe3, 0x83,

	102 0xaf, 0xe3, 0x82, 0xab, 0xe3, 0x83, 0xa8, 0xe3,

	103 0x82, 0xbf, 0xe3, 0x83, 0xac, 0xe3, 0x82, 0xbd,

	104 0x20, 0xe3, 0x83, 0x84, 0xe3, 0x83, 0x8d, 0xe3,

	105 0x83, 0x8a, 0xe3, 0x83, 0xa9, 0xe3, 0x83, 0xa0,

	106 0x0a, 0xe3, 0x82, 0xa6, 0xe3, 0x83, 0xb0, 0xe3,

	107 0x83, 0x8e, 0xe3, 0x82, 0xaa, 0xe3, 0x82, 0xaf,

	108 0xe3, 0x83, 0xa4, 0xe3, 0x83, 0x9e, 0x20, 0xe3,

	109 0x82, 0xb1, 0xe3, 0x83, 0x95, 0xe3, 0x82, 0xb3,

	110 0xe3, 0x82, 0xa8, 0xe3, 0x83, 0x86, 0x20, 0xe3,

	111 0x82, 0xa2, 0xe3, 0x82, 0xb5, 0xe3, 0x82, 0xad,

	112 0xe3, 0x83, 0xa6, 0xe3, 0x83, 0xa1, 0xe3, 0x83,

	113 0x9f, 0xe3, 0x82, 0xb7, 0x20, 0xe3, 0x83, 0xb1,

	114 0xe3, 0x83, 0x92, 0xe3, 0x83, 0xa2, 0xe3, 0x82,

	115 0xbb, 0xe3, 0x82, 0xb9, 0xe3, 0x83, 0xb3];

	116

	117 void main() {

	118 testUtf8bytesToCodepoints();

	119 testUtf8BytesToString();

	120 testEncodeToUtf8();

	121 testIterableMethods();

	122 }

	123

	124 void testEncodeToUtf8() {

	125 Expect.listEquals(testEnglishUtf8, encodeUtf8(testEnglishPhrase),

	126 "english to utf8");

	127

	128 Expect.listEquals(testDanishUtf8, encodeUtf8(testDanishPhrase),

	129 "encode danish to utf8");

	130

	131 Expect.listEquals(testHebrewUtf8, encodeUtf8(testHebrewPhrase),

	132 "Hebrew to utf8");

	133

	134 Expect.listEquals(testRussianUtf8, encodeUtf8(testRussianPhrase),

	135 "Russian to utf8");

	136

	137 Expect.listEquals(testGreekUtf8, encodeUtf8(testGreekPhrase),

	138 "Greek to utf8");

	139

	140 Expect.listEquals(testKatakanaUtf8, encodeUtf8(testKatakanaPhrase),

	141 "Katakana to utf8");

	142 }

	143

	144 void testUtf8bytesToCodepoints() {

	145 Expect.listEquals([954, 972, 963, 956, 949],

	146 utf8ToCodepoints([0xce, 0xba, 0xcf, 0x8c, 0xcf,

	147 0x83, 0xce, 0xbc, 0xce, 0xb5]), "κόσμε");

	148

	149 // boundary conditions: First possible sequence of a certain length

	150 Expect.listEquals([], utf8ToCodepoints([]), "no input");

	151 Expect.listEquals([0x0], utf8ToCodepoints([0x0]), "0");

	152 Expect.listEquals([0x80], utf8ToCodepoints([0xc2, 0x80]), "80");

	153 Expect.listEquals([0x800],

	154 utf8ToCodepoints([0xe0, 0xa0, 0x80]), "800");

	155 Expect.listEquals([0x10000],

	156 utf8ToCodepoints([0xf0, 0x90, 0x80, 0x80]), "10000");

	157 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],

	158 utf8ToCodepoints([0xf8, 0x88, 0x80, 0x80, 0x80]), "200000");

	159 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],

	160 utf8ToCodepoints([0xfc, 0x84, 0x80, 0x80, 0x80, 0x80]),

	161 "4000000");

	162

	163 // boundary conditions: Last possible sequence of a certain length

	164 Expect.listEquals([0x7f], utf8ToCodepoints([0x7f]), "7f");

	165 Expect.listEquals([0x7ff], utf8ToCodepoints([0xdf, 0xbf]), "7ff");

	166 Expect.listEquals([0xffff],

	167 utf8ToCodepoints([0xef, 0xbf, 0xbf]), "ffff");

	168 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],

	169 utf8ToCodepoints([0xf7, 0xbf, 0xbf, 0xbf]), "1fffff");

	170 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],

	171 utf8ToCodepoints([0xfb, 0xbf, 0xbf, 0xbf, 0xbf]), "3ffffff");

	172 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],

	173 utf8ToCodepoints([0xfd, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf]),

	174 "4000000");

	175

	176 // other boundary conditions

	177 Expect.listEquals([0xd7ff],

	178 utf8ToCodepoints([0xed, 0x9f, 0xbf]), "d7ff");

	179 Expect.listEquals([0xe000],

	180 utf8ToCodepoints([0xee, 0x80, 0x80]), "e000");

	181 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],

	182 utf8ToCodepoints([0xef, 0xbf, 0xbd]), "fffd");

	183 Expect.listEquals([0x10ffff],

	184 utf8ToCodepoints([0xf4, 0x8f, 0xbf, 0xbf]), "10ffff");

	185 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],

	186 utf8ToCodepoints([0xf4, 0x90, 0x80, 0x80]), "110000");

	187

	188 // unexpected continuation bytes

	189 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],

	190 utf8ToCodepoints([0x80]), "80 => replacement character");

	191 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],

	192 utf8ToCodepoints([0xbf]), "bf => replacement character");

	193

	194 List<int> allContinuationBytes = <int>[];

	195 List<int> matchingReplacementChars = <int>[];

	196 for (int i = 0x80; i < 0xc0; i++) {

	197 allContinuationBytes.add(i);

	198 matchingReplacementChars.add(UNICODE_REPLACEMENT_CHARACTER_CODEPOINT);

	199 }

	200 Expect.listEquals(matchingReplacementChars,

	201 utf8ToCodepoints(allContinuationBytes),

	202 "80 - bf => replacement character x 64");

	203

	204 List<int> allFirstTwoByteSeq = <int>[];

	205 matchingReplacementChars = <int>[];

	206 for (int i = 0xc0; i < 0xe0; i++) {

	207 allFirstTwoByteSeq.addAll([i, 0x20]);

	208 matchingReplacementChars.addAll(

	209 [UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]);

	210 }

	211 Expect.listEquals(matchingReplacementChars,

	212 utf8ToCodepoints(allFirstTwoByteSeq),

	213 "c0 - df + space => replacement character + space x 32");

	214

	215 List<int> allFirstThreeByteSeq = <int>[];

	216 matchingReplacementChars = <int>[];

	217 for (int i = 0xe0; i < 0xf0; i++) {

	218 allFirstThreeByteSeq.addAll([i, 0x20]);

	219 matchingReplacementChars.addAll(

	220 [UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]);

	221 }

	222 Expect.listEquals(matchingReplacementChars,

	223 utf8ToCodepoints(allFirstThreeByteSeq),

	224 "e0 - ef + space => replacement character x 16");

	225

	226 List<int> allFirstFourByteSeq = <int>[];

	227 matchingReplacementChars = <int>[];

	228 for (int i = 0xf0; i < 0xf8; i++) {

	229 allFirstFourByteSeq.addAll([i, 0x20]);

	230 matchingReplacementChars.addAll(

	231 [UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]);

	232 }

	233 Expect.listEquals(matchingReplacementChars,

	234 utf8ToCodepoints(allFirstFourByteSeq),

	235 "f0 - f7 + space => replacement character x 8");

	236

	237 List<int> allFirstFiveByteSeq = <int>[];

	238 matchingReplacementChars = <int>[];

	239 for (int i = 0xf8; i < 0xfc; i++) {

	240 allFirstFiveByteSeq.addAll([i, 0x20]);

	241 matchingReplacementChars.addAll(

	242 [UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]);

	243 }

	244 Expect.listEquals(matchingReplacementChars,

	245 utf8ToCodepoints(allFirstFiveByteSeq),

	246 "f8 - fb + space => replacement character x 4");

	247

	248 List<int> allFirstSixByteSeq = <int>[];

	249 matchingReplacementChars = <int>[];

	250 for (int i = 0xfc; i < 0xfe; i++) {

	251 allFirstSixByteSeq.addAll([i, 0x20]);

	252 matchingReplacementChars.addAll(

	253 [UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]);

	254 }

	255 Expect.listEquals(matchingReplacementChars,

	256 utf8ToCodepoints(allFirstSixByteSeq),

	257 "fc - fd + space => replacement character x 2");

	258

	259 // Sequences with last continuation byte missing

	260 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],

	261 utf8ToCodepoints([0xc2]),

	262 "2-byte sequence with last byte missing");

	263 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],

	264 utf8ToCodepoints([0xe0, 0x80]),

	265 "3-byte sequence with last byte missing");

	266 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],

	267 utf8ToCodepoints([0xf0, 0x80, 0x80]),

	268 "4-byte sequence with last byte missing");

	269 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],

	270 utf8ToCodepoints([0xf8, 0x88, 0x80, 0x80]),

	271 "5-byte sequence with last byte missing");

	272 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],

	273 utf8ToCodepoints([0xfc, 0x80, 0x80, 0x80, 0x80]),

	274 "6-byte sequence with last byte missing");

	275

	276 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],

	277 utf8ToCodepoints([0xdf]),

	278 "2-byte sequence with last byte missing (hi)");

	279 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],

	280 utf8ToCodepoints([0xef, 0xbf]),

	281 "3-byte sequence with last byte missing (hi)");

	282 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],

	283 utf8ToCodepoints([0xf7, 0xbf, 0xbf]),

	284 "4-byte sequence with last byte missing (hi)");

	285 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],

	286 utf8ToCodepoints([0xfb, 0xbf, 0xbf, 0xbf]),

	287 "5-byte sequence with last byte missing (hi)");

	288 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],

	289 utf8ToCodepoints([0xfd, 0xbf, 0xbf, 0xbf, 0xbf]),

	290 "6-byte sequence with last byte missing (hi)");

	291

	292 // Concatenation of incomplete sequences

	293 Expect.listEquals(

	294 [ UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,

	295 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,

	296 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,

	297 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,

	298 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,

	299 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,

	300 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,

	301 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,

	302 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,

	303 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT ],

	304 utf8ToCodepoints(

	305 [ 0xc2,

	306 0xe0, 0x80,

	307 0xf0, 0x80, 0x80,

	308 0xf8, 0x88, 0x80, 0x80,

	309 0xfc, 0x80, 0x80, 0x80, 0x80,

	310 0xdf,

	311 0xef, 0xbf,

	312 0xf7, 0xbf, 0xbf,

	313 0xfb, 0xbf, 0xbf, 0xbf,

	314 0xfd, 0xbf, 0xbf, 0xbf, 0xbf ]),

	315 "Concatenation of incomplete sequences");

	316

	317 // Impossible bytes

	318 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],

	319 utf8ToCodepoints([0xfe]), "fe");

	320 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],

	321 utf8ToCodepoints([0xff]), "ff");

	322 Expect.listEquals([

	323 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,

	324 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,

	325 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,

	326 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],

	327 utf8ToCodepoints([0xfe, 0xfe, 0xff, 0xff]), "fe fe ff ff");

	328

	329 // Overlong sequences

	330 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],

	331 utf8ToCodepoints([0xc0, 0xaf]), "c0 af");

	332 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],

	333 utf8ToCodepoints([0xe0, 0x80, 0xaf]), "e0 80 af");

	334 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],

	335 utf8ToCodepoints([0xf0, 0x80, 0x80, 0xaf]), "f0 80 80 af");

	336 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],

	337 utf8ToCodepoints([0xf8, 0x80, 0x80, 0x80, 0xaf]), "f8 80 80 80 af");

	338 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],

	339 utf8ToCodepoints([0xfc, 0x80, 0x80, 0x80, 0x80, 0xaf]),

	340 "fc 80 80 80 80 af");

	341

	342 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],

	343 utf8ToCodepoints([0xc1, 0xbf]), "c1 bf");

	344 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],

	345 utf8ToCodepoints([0xe0, 0x9f, 0xbf]), "e0 9f bf");

	346 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],

	347 utf8ToCodepoints([0xf0, 0x8f, 0xbf, 0xbf]), "f0 8f bf bf");

	348 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],

	349 utf8ToCodepoints([0xf8, 0x87, 0xbf, 0xbf, 0xbf]), "f8 87 bf bf bf");

	350 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],

	351 utf8ToCodepoints([0xfc, 0x83, 0xbf, 0xbf, 0xbf, 0xbf]),

	352 "fc 83 bf bf bf bf");

	353

	354 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],

	355 utf8ToCodepoints([0xc0, 0x80]), "c0 80");

	356 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],

	357 utf8ToCodepoints([0xe0, 0x80, 0x80]), "e0 80 80");

	358 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],

	359 utf8ToCodepoints([0xf0, 0x80, 0x80, 0x80]), "f0 80 80 80");

	360 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],

	361 utf8ToCodepoints([0xf8, 0x80, 0x80, 0x80, 0x80]), "f8 80 80 80 80");

	362 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],

	363 utf8ToCodepoints([0xfc, 0x80, 0x80, 0x80, 0x80, 0x80]),

	364 "fc 80 80 80 80 80");

	365

	366 // Illegal code positions

	367 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],

	368 utf8ToCodepoints([0xed, 0xa0, 0x80]), "U+D800");

	369 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],

	370 utf8ToCodepoints([0xed, 0xad, 0xbf]), "U+DB7F");

	371 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],

	372 utf8ToCodepoints([0xed, 0xae, 0x80]), "U+DB80");

	373 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],

	374 utf8ToCodepoints([0xed, 0xaf, 0xbf]), "U+DBFF");

	375 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],

	376 utf8ToCodepoints([0xed, 0xb0, 0x80]), "U+DC00");

	377 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],

	378 utf8ToCodepoints([0xed, 0xbe, 0x80]), "U+DF80");

	379 Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],

	380 utf8ToCodepoints([0xed, 0xbf, 0xbf]), "U+DFFF");

	381

	382 // Paired UTF-16 surrogates

	383 Expect.listEquals([

	384 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,

	385 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],

	386 utf8ToCodepoints([0xed, 0xa0, 0x80, 0xed, 0xb0, 0x80]),

	387 "U+D800 U+DC00");

	388 Expect.listEquals([

	389 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,

	390 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],

	391 utf8ToCodepoints([0xed, 0xa0, 0x80, 0xed, 0xbf, 0xbf]),

	392 "U+D800 U+DFFF");

	393 Expect.listEquals([

	394 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,

	395 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],

	396 utf8ToCodepoints([0xed, 0xad, 0xbf, 0xed, 0xb0, 0x80]),

	397 "U+DB7F U+DC00");

	398 Expect.listEquals([

	399 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,

	400 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],

	401 utf8ToCodepoints([0xed, 0xad, 0xbf, 0xed, 0xbf, 0xbf]),

	402 "U+DB7F U+DFFF");

	403 Expect.listEquals([

	404 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,

	405 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],

	406 utf8ToCodepoints([0xed, 0xae, 0x80, 0xed, 0xb0, 0x80]),

	407 "U+DB80 U+DC00");

	408 Expect.listEquals([

	409 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,

	410 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],

	411 utf8ToCodepoints([0xed, 0xae, 0x80, 0xed, 0xbf, 0xbf]),

	412 "U+DB80 U+DFFF");

	413 Expect.listEquals([

	414 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,

	415 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],

	416 utf8ToCodepoints([0xed, 0xaf, 0xbf, 0xed, 0xb0, 0x80]),

	417 "U+DBFF U+DC00");

	418 Expect.listEquals([

	419 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,

	420 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],

	421 utf8ToCodepoints([0xed, 0xaf, 0xbf, 0xed, 0xbf, 0xbf]),

	422 "U+DBFF U+DFFF");

	423

	424 // Other illegal code positions (???)

	425 Expect.listEquals([0xfffe], utf8ToCodepoints([0xef, 0xbf, 0xbe]),

	426 "U+FFFE");

	427 Expect.listEquals([0xffff], utf8ToCodepoints([0xef, 0xbf, 0xbf]),

	428 "U+FFFF");

	429 }

	430

	431 void testUtf8BytesToString() {

	432 Expect.stringEquals(testEnglishPhrase,

	433 decodeUtf8(testEnglishUtf8), "English");

	434

	435 Expect.stringEquals(testDanishPhrase,

	436 decodeUtf8(testDanishUtf8), "Danish");

	437

	438 Expect.stringEquals(testHebrewPhrase,

	439 decodeUtf8(testHebrewUtf8), "Hebrew");

	440

	441 Expect.stringEquals(testRussianPhrase,

	442 decodeUtf8(testRussianUtf8), "Russian");

	443

	444 Expect.stringEquals(testGreekPhrase,

	445 decodeUtf8(testGreekUtf8), "Greek");

	446

	447 Expect.stringEquals(testKatakanaPhrase,

	448 decodeUtf8(testKatakanaUtf8), "Katakana");

	449 }

	450

	451 void testIterableMethods() {

	452 IterableUtf8Decoder englishDecoder = decodeUtf8AsIterable(testEnglishUtf8);

	453 // get the first character

	454 Expect.equals(testEnglishUtf8[0], englishDecoder.first);

	455 // get the whole translation using the Iterable interface

	456 Expect.stringEquals(testEnglishPhrase,

	457 new String.fromCharCodes(new List<int>.from(englishDecoder)));

	458

	459 IterableUtf8Decoder kataDecoder = decodeUtf8AsIterable(testKatakanaUtf8);

	460 // get the first character

	461 Expect.equals(testKatakanaPhrase.codeUnits[0], kataDecoder.first);

	462 // get the whole translation using the Iterable interface

	463 Expect.stringEquals(testKatakanaPhrase,

	464 new String.fromCharCodes(new List<int>.from(kataDecoder)));

	465 }

OLD	NEW

« no previous file with comments | « packages/utf/test/utf32_test.dart ('k') | packages/utf/test/utf8_test.dart » ('j') | no next file with comments »