| OLD | NEW | 
| (Empty) |  | 
 |    1 // Copyright (c) 2012, the Dart project authors.  Please see the AUTHORS file | 
 |    2 // for details. All rights reserved. Use of this source code is governed by a | 
 |    3 // BSD-style license that can be found in the LICENSE file. | 
 |    4  | 
 |    5 library utf.utf82_test; | 
 |    6  | 
 |    7 import 'package:expect/expect.dart'; | 
 |    8 import 'package:utf/utf.dart'; | 
 |    9  | 
 |   10 const String testEnglishPhrase = | 
 |   11     "The quick brown fox jumps over the lazy dog."; | 
 |   12  | 
 |   13 const List<int> testEnglishUtf8 = const<int> [ | 
 |   14     0x54, 0x68, 0x65, 0x20, 0x71, 0x75, 0x69, 0x63, | 
 |   15     0x6b, 0x20, 0x62, 0x72, 0x6f, 0x77, 0x6e, 0x20, | 
 |   16     0x66, 0x6f, 0x78, 0x20, 0x6a, 0x75, 0x6d, 0x70, | 
 |   17     0x73, 0x20, 0x6f, 0x76, 0x65, 0x72, 0x20, 0x74, | 
 |   18     0x68, 0x65, 0x20, 0x6c, 0x61, 0x7a, 0x79, 0x20, | 
 |   19     0x64, 0x6f, 0x67, 0x2e]; | 
 |   20  | 
 |   21 const String testDanishPhrase = "Quizdeltagerne spiste jordbær med " | 
 |   22     "fløde mens cirkusklovnen Wolther spillede på xylofon."; | 
 |   23  | 
 |   24 const List<int> testDanishUtf8 = const<int>[ | 
 |   25     0x51, 0x75, 0x69, 0x7a, 0x64, 0x65, 0x6c, 0x74, | 
 |   26     0x61, 0x67, 0x65, 0x72, 0x6e, 0x65, 0x20, 0x73, | 
 |   27     0x70, 0x69, 0x73, 0x74, 0x65, 0x20, 0x6a, 0x6f, | 
 |   28     0x72, 0x64, 0x62, 0xc3, 0xa6, 0x72, 0x20, 0x6d, | 
 |   29     0x65, 0x64, 0x20, 0x66, 0x6c, 0xc3, 0xb8, 0x64, | 
 |   30     0x65, 0x20, 0x6d, 0x65, 0x6e, 0x73, 0x20, 0x63, | 
 |   31     0x69, 0x72, 0x6b, 0x75, 0x73, 0x6b, 0x6c, 0x6f, | 
 |   32     0x76, 0x6e, 0x65, 0x6e, 0x20, 0x57, 0x6f, 0x6c, | 
 |   33     0x74, 0x68, 0x65, 0x72, 0x20, 0x73, 0x70, 0x69, | 
 |   34     0x6c, 0x6c, 0x65, 0x64, 0x65, 0x20, 0x70, 0xc3, | 
 |   35     0xa5, 0x20, 0x78, 0x79, 0x6c, 0x6f, 0x66, 0x6f, | 
 |   36     0x6e, 0x2e]; | 
 |   37  | 
 |   38 // unusual formatting due to strange editor interaction w/ text direction. | 
 |   39 const String | 
 |   40     testHebrewPhrase = "דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה"; | 
 |   41  | 
 |   42 const List<int> testHebrewUtf8 = const<int>[ | 
 |   43     0xd7, 0x93, 0xd7, 0x92, 0x20, 0xd7, 0xa1, 0xd7, | 
 |   44     0xa7, 0xd7, 0xa8, 0xd7, 0x9f, 0x20, 0xd7, 0xa9, | 
 |   45     0xd7, 0x98, 0x20, 0xd7, 0x91, 0xd7, 0x99, 0xd7, | 
 |   46     0x9d, 0x20, 0xd7, 0x9e, 0xd7, 0x90, 0xd7, 0x95, | 
 |   47     0xd7, 0x9b, 0xd7, 0x96, 0xd7, 0x91, 0x20, 0xd7, | 
 |   48     0x95, 0xd7, 0x9c, 0xd7, 0xa4, 0xd7, 0xaa, 0xd7, | 
 |   49     0xa2, 0x20, 0xd7, 0x9e, 0xd7, 0xa6, 0xd7, 0x90, | 
 |   50     0x20, 0xd7, 0x9c, 0xd7, 0x95, 0x20, 0xd7, 0x97, | 
 |   51     0xd7, 0x91, 0xd7, 0xa8, 0xd7, 0x94, 0x20, 0xd7, | 
 |   52     0x90, 0xd7, 0x99, 0xd7, 0x9a, 0x20, 0xd7, 0x94, | 
 |   53     0xd7, 0xa7, 0xd7, 0x9c, 0xd7, 0x99, 0xd7, 0x98, | 
 |   54     0xd7, 0x94]; | 
 |   55  | 
 |   56 const String testRussianPhrase = "Съешь же ещё этих мягких " | 
 |   57     "французских булок да выпей чаю"; | 
 |   58  | 
 |   59 const List<int> testRussianUtf8 = const<int>[ | 
 |   60     0xd0, 0xa1, 0xd1, 0x8a, 0xd0, 0xb5, 0xd1, 0x88, | 
 |   61     0xd1, 0x8c, 0x20, 0xd0, 0xb6, 0xd0, 0xb5, 0x20, | 
 |   62     0xd0, 0xb5, 0xd1, 0x89, 0xd1, 0x91, 0x20, 0xd1, | 
 |   63     0x8d, 0xd1, 0x82, 0xd0, 0xb8, 0xd1, 0x85, 0x20, | 
 |   64     0xd0, 0xbc, 0xd1, 0x8f, 0xd0, 0xb3, 0xd0, 0xba, | 
 |   65     0xd0, 0xb8, 0xd1, 0x85, 0x20, 0xd1, 0x84, 0xd1, | 
 |   66     0x80, 0xd0, 0xb0, 0xd0, 0xbd, 0xd1, 0x86, 0xd1, | 
 |   67     0x83, 0xd0, 0xb7, 0xd1, 0x81, 0xd0, 0xba, 0xd0, | 
 |   68     0xb8, 0xd1, 0x85, 0x20, 0xd0, 0xb1, 0xd1, 0x83, | 
 |   69     0xd0, 0xbb, 0xd0, 0xbe, 0xd0, 0xba, 0x20, 0xd0, | 
 |   70     0xb4, 0xd0, 0xb0, 0x20, 0xd0, 0xb2, 0xd1, 0x8b, | 
 |   71     0xd0, 0xbf, 0xd0, 0xb5, 0xd0, 0xb9, 0x20, 0xd1, | 
 |   72     0x87, 0xd0, 0xb0, 0xd1, 0x8e]; | 
 |   73  | 
 |   74 const String testGreekPhrase = "Γαζέες καὶ μυρτιὲς δὲν θὰ βρῶ πιὰ " | 
 |   75     "στὸ χρυσαφὶ ξέφωτο"; | 
 |   76  | 
 |   77 const List<int> testGreekUtf8 = const<int>[ | 
 |   78     0xce, 0x93, 0xce, 0xb1, 0xce, 0xb6, 0xce, 0xad, | 
 |   79     0xce, 0xb5, 0xcf, 0x82, 0x20, 0xce, 0xba, 0xce, | 
 |   80     0xb1, 0xe1, 0xbd, 0xb6, 0x20, 0xce, 0xbc, 0xcf, | 
 |   81     0x85, 0xcf, 0x81, 0xcf, 0x84, 0xce, 0xb9, 0xe1, | 
 |   82     0xbd, 0xb2, 0xcf, 0x82, 0x20, 0xce, 0xb4, 0xe1, | 
 |   83     0xbd, 0xb2, 0xce, 0xbd, 0x20, 0xce, 0xb8, 0xe1, | 
 |   84     0xbd, 0xb0, 0x20, 0xce, 0xb2, 0xcf, 0x81, 0xe1, | 
 |   85     0xbf, 0xb6, 0x20, 0xcf, 0x80, 0xce, 0xb9, 0xe1, | 
 |   86     0xbd, 0xb0, 0x20, 0xcf, 0x83, 0xcf, 0x84, 0xe1, | 
 |   87     0xbd, 0xb8, 0x20, 0xcf, 0x87, 0xcf, 0x81, 0xcf, | 
 |   88     0x85, 0xcf, 0x83, 0xce, 0xb1, 0xcf, 0x86, 0xe1, | 
 |   89     0xbd, 0xb6, 0x20, 0xce, 0xbe, 0xce, 0xad, 0xcf, | 
 |   90     0x86, 0xcf, 0x89, 0xcf, 0x84, 0xce, 0xbf]; | 
 |   91  | 
 |   92 const String testKatakanaPhrase = """ | 
 |   93 イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム | 
 |   94 ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン"""; | 
 |   95  | 
 |   96 const List<int> testKatakanaUtf8 = const<int>[ | 
 |   97     0xe3, 0x82, 0xa4, 0xe3, 0x83, 0xad, 0xe3, 0x83, | 
 |   98     0x8f, 0xe3, 0x83, 0x8b, 0xe3, 0x83, 0x9b, 0xe3, | 
 |   99     0x83, 0x98, 0xe3, 0x83, 0x88, 0x20, 0xe3, 0x83, | 
 |  100     0x81, 0xe3, 0x83, 0xaa, 0xe3, 0x83, 0x8c, 0xe3, | 
 |  101     0x83, 0xab, 0xe3, 0x83, 0xb2, 0x20, 0xe3, 0x83, | 
 |  102     0xaf, 0xe3, 0x82, 0xab, 0xe3, 0x83, 0xa8, 0xe3, | 
 |  103     0x82, 0xbf, 0xe3, 0x83, 0xac, 0xe3, 0x82, 0xbd, | 
 |  104     0x20, 0xe3, 0x83, 0x84, 0xe3, 0x83, 0x8d, 0xe3, | 
 |  105     0x83, 0x8a, 0xe3, 0x83, 0xa9, 0xe3, 0x83, 0xa0, | 
 |  106     0x0a, 0xe3, 0x82, 0xa6, 0xe3, 0x83, 0xb0, 0xe3, | 
 |  107     0x83, 0x8e, 0xe3, 0x82, 0xaa, 0xe3, 0x82, 0xaf, | 
 |  108     0xe3, 0x83, 0xa4, 0xe3, 0x83, 0x9e, 0x20, 0xe3, | 
 |  109     0x82, 0xb1, 0xe3, 0x83, 0x95, 0xe3, 0x82, 0xb3, | 
 |  110     0xe3, 0x82, 0xa8, 0xe3, 0x83, 0x86, 0x20, 0xe3, | 
 |  111     0x82, 0xa2, 0xe3, 0x82, 0xb5, 0xe3, 0x82, 0xad, | 
 |  112     0xe3, 0x83, 0xa6, 0xe3, 0x83, 0xa1, 0xe3, 0x83, | 
 |  113     0x9f, 0xe3, 0x82, 0xb7, 0x20, 0xe3, 0x83, 0xb1, | 
 |  114     0xe3, 0x83, 0x92, 0xe3, 0x83, 0xa2, 0xe3, 0x82, | 
 |  115     0xbb, 0xe3, 0x82, 0xb9, 0xe3, 0x83, 0xb3]; | 
 |  116  | 
 |  117 void main() { | 
 |  118   testUtf8bytesToCodepoints(); | 
 |  119   testUtf8BytesToString(); | 
 |  120   testEncodeToUtf8(); | 
 |  121   testIterableMethods(); | 
 |  122 } | 
 |  123  | 
 |  124 void testEncodeToUtf8() { | 
 |  125   Expect.listEquals(testEnglishUtf8, encodeUtf8(testEnglishPhrase), | 
 |  126       "english to utf8"); | 
 |  127  | 
 |  128   Expect.listEquals(testDanishUtf8, encodeUtf8(testDanishPhrase), | 
 |  129       "encode danish to utf8"); | 
 |  130  | 
 |  131   Expect.listEquals(testHebrewUtf8, encodeUtf8(testHebrewPhrase), | 
 |  132       "Hebrew to utf8"); | 
 |  133  | 
 |  134   Expect.listEquals(testRussianUtf8, encodeUtf8(testRussianPhrase), | 
 |  135       "Russian to utf8"); | 
 |  136  | 
 |  137   Expect.listEquals(testGreekUtf8, encodeUtf8(testGreekPhrase), | 
 |  138       "Greek to utf8"); | 
 |  139  | 
 |  140   Expect.listEquals(testKatakanaUtf8, encodeUtf8(testKatakanaPhrase), | 
 |  141       "Katakana to utf8"); | 
 |  142 } | 
 |  143  | 
 |  144 void testUtf8bytesToCodepoints() { | 
 |  145   Expect.listEquals([954, 972, 963, 956, 949], | 
 |  146       utf8ToCodepoints([0xce, 0xba, 0xcf, 0x8c, 0xcf, | 
 |  147       0x83, 0xce, 0xbc, 0xce, 0xb5]), "κόσμε"); | 
 |  148  | 
 |  149   // boundary conditions: First possible sequence of a certain length | 
 |  150   Expect.listEquals([], utf8ToCodepoints([]), "no input"); | 
 |  151   Expect.listEquals([0x0], utf8ToCodepoints([0x0]), "0"); | 
 |  152   Expect.listEquals([0x80], utf8ToCodepoints([0xc2, 0x80]), "80"); | 
 |  153   Expect.listEquals([0x800], | 
 |  154       utf8ToCodepoints([0xe0, 0xa0, 0x80]), "800"); | 
 |  155   Expect.listEquals([0x10000], | 
 |  156       utf8ToCodepoints([0xf0, 0x90, 0x80, 0x80]), "10000"); | 
 |  157   Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | 
 |  158       utf8ToCodepoints([0xf8, 0x88, 0x80, 0x80, 0x80]), "200000"); | 
 |  159   Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | 
 |  160       utf8ToCodepoints([0xfc, 0x84, 0x80, 0x80, 0x80, 0x80]), | 
 |  161       "4000000"); | 
 |  162  | 
 |  163   // boundary conditions: Last possible sequence of a certain length | 
 |  164   Expect.listEquals([0x7f], utf8ToCodepoints([0x7f]), "7f"); | 
 |  165   Expect.listEquals([0x7ff], utf8ToCodepoints([0xdf, 0xbf]), "7ff"); | 
 |  166   Expect.listEquals([0xffff], | 
 |  167       utf8ToCodepoints([0xef, 0xbf, 0xbf]), "ffff"); | 
 |  168   Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | 
 |  169       utf8ToCodepoints([0xf7, 0xbf, 0xbf, 0xbf]), "1fffff"); | 
 |  170   Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | 
 |  171       utf8ToCodepoints([0xfb, 0xbf, 0xbf, 0xbf, 0xbf]), "3ffffff"); | 
 |  172   Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | 
 |  173       utf8ToCodepoints([0xfd, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf]), | 
 |  174       "4000000"); | 
 |  175  | 
 |  176   // other boundary conditions | 
 |  177   Expect.listEquals([0xd7ff], | 
 |  178       utf8ToCodepoints([0xed, 0x9f, 0xbf]), "d7ff"); | 
 |  179   Expect.listEquals([0xe000], | 
 |  180       utf8ToCodepoints([0xee, 0x80, 0x80]), "e000"); | 
 |  181   Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | 
 |  182       utf8ToCodepoints([0xef, 0xbf, 0xbd]), "fffd"); | 
 |  183   Expect.listEquals([0x10ffff], | 
 |  184       utf8ToCodepoints([0xf4, 0x8f, 0xbf, 0xbf]), "10ffff"); | 
 |  185   Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | 
 |  186       utf8ToCodepoints([0xf4, 0x90, 0x80, 0x80]), "110000"); | 
 |  187  | 
 |  188   // unexpected continuation bytes | 
 |  189   Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | 
 |  190       utf8ToCodepoints([0x80]), "80 => replacement character"); | 
 |  191   Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | 
 |  192       utf8ToCodepoints([0xbf]), "bf => replacement character"); | 
 |  193  | 
 |  194   List<int> allContinuationBytes = <int>[]; | 
 |  195   List<int> matchingReplacementChars = <int>[]; | 
 |  196   for (int i = 0x80; i < 0xc0; i++) { | 
 |  197     allContinuationBytes.add(i); | 
 |  198     matchingReplacementChars.add(UNICODE_REPLACEMENT_CHARACTER_CODEPOINT); | 
 |  199   } | 
 |  200   Expect.listEquals(matchingReplacementChars, | 
 |  201       utf8ToCodepoints(allContinuationBytes), | 
 |  202       "80 - bf => replacement character x 64"); | 
 |  203  | 
 |  204   List<int> allFirstTwoByteSeq = <int>[]; | 
 |  205   matchingReplacementChars = <int>[]; | 
 |  206   for (int i = 0xc0; i < 0xe0; i++) { | 
 |  207     allFirstTwoByteSeq.addAll([i, 0x20]); | 
 |  208     matchingReplacementChars.addAll( | 
 |  209         [UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]); | 
 |  210   } | 
 |  211   Expect.listEquals(matchingReplacementChars, | 
 |  212       utf8ToCodepoints(allFirstTwoByteSeq), | 
 |  213       "c0 - df + space => replacement character + space x 32"); | 
 |  214  | 
 |  215   List<int> allFirstThreeByteSeq = <int>[]; | 
 |  216   matchingReplacementChars = <int>[]; | 
 |  217   for (int i = 0xe0; i < 0xf0; i++) { | 
 |  218     allFirstThreeByteSeq.addAll([i, 0x20]); | 
 |  219     matchingReplacementChars.addAll( | 
 |  220         [UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]); | 
 |  221   } | 
 |  222   Expect.listEquals(matchingReplacementChars, | 
 |  223       utf8ToCodepoints(allFirstThreeByteSeq), | 
 |  224       "e0 - ef + space => replacement character x 16"); | 
 |  225  | 
 |  226   List<int> allFirstFourByteSeq = <int>[]; | 
 |  227   matchingReplacementChars = <int>[]; | 
 |  228   for (int i = 0xf0; i < 0xf8; i++) { | 
 |  229     allFirstFourByteSeq.addAll([i, 0x20]); | 
 |  230     matchingReplacementChars.addAll( | 
 |  231         [UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]); | 
 |  232   } | 
 |  233   Expect.listEquals(matchingReplacementChars, | 
 |  234       utf8ToCodepoints(allFirstFourByteSeq), | 
 |  235       "f0 - f7 + space => replacement character x 8"); | 
 |  236  | 
 |  237   List<int> allFirstFiveByteSeq = <int>[]; | 
 |  238   matchingReplacementChars = <int>[]; | 
 |  239   for (int i = 0xf8; i < 0xfc; i++) { | 
 |  240     allFirstFiveByteSeq.addAll([i, 0x20]); | 
 |  241     matchingReplacementChars.addAll( | 
 |  242         [UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]); | 
 |  243   } | 
 |  244   Expect.listEquals(matchingReplacementChars, | 
 |  245       utf8ToCodepoints(allFirstFiveByteSeq), | 
 |  246       "f8 - fb + space => replacement character x 4"); | 
 |  247  | 
 |  248   List<int> allFirstSixByteSeq = <int>[]; | 
 |  249   matchingReplacementChars = <int>[]; | 
 |  250   for (int i = 0xfc; i < 0xfe; i++) { | 
 |  251     allFirstSixByteSeq.addAll([i, 0x20]); | 
 |  252     matchingReplacementChars.addAll( | 
 |  253         [UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]); | 
 |  254   } | 
 |  255   Expect.listEquals(matchingReplacementChars, | 
 |  256       utf8ToCodepoints(allFirstSixByteSeq), | 
 |  257       "fc - fd + space => replacement character x 2"); | 
 |  258  | 
 |  259   // Sequences with last continuation byte missing | 
 |  260   Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | 
 |  261       utf8ToCodepoints([0xc2]), | 
 |  262       "2-byte sequence with last byte missing"); | 
 |  263   Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | 
 |  264       utf8ToCodepoints([0xe0, 0x80]), | 
 |  265       "3-byte sequence with last byte missing"); | 
 |  266   Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | 
 |  267       utf8ToCodepoints([0xf0, 0x80, 0x80]), | 
 |  268       "4-byte sequence with last byte missing"); | 
 |  269   Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | 
 |  270       utf8ToCodepoints([0xf8, 0x88, 0x80, 0x80]), | 
 |  271       "5-byte sequence with last byte missing"); | 
 |  272   Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | 
 |  273       utf8ToCodepoints([0xfc, 0x80, 0x80, 0x80, 0x80]), | 
 |  274       "6-byte sequence with last byte missing"); | 
 |  275  | 
 |  276   Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | 
 |  277       utf8ToCodepoints([0xdf]), | 
 |  278       "2-byte sequence with last byte missing (hi)"); | 
 |  279   Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | 
 |  280       utf8ToCodepoints([0xef, 0xbf]), | 
 |  281       "3-byte sequence with last byte missing (hi)"); | 
 |  282   Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | 
 |  283       utf8ToCodepoints([0xf7, 0xbf, 0xbf]), | 
 |  284       "4-byte sequence with last byte missing (hi)"); | 
 |  285   Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | 
 |  286       utf8ToCodepoints([0xfb, 0xbf, 0xbf, 0xbf]), | 
 |  287       "5-byte sequence with last byte missing (hi)"); | 
 |  288   Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | 
 |  289       utf8ToCodepoints([0xfd, 0xbf, 0xbf, 0xbf, 0xbf]), | 
 |  290       "6-byte sequence with last byte missing (hi)"); | 
 |  291  | 
 |  292   // Concatenation of incomplete sequences | 
 |  293   Expect.listEquals( | 
 |  294       [ UNICODE_REPLACEMENT_CHARACTER_CODEPOINT, | 
 |  295         UNICODE_REPLACEMENT_CHARACTER_CODEPOINT, | 
 |  296         UNICODE_REPLACEMENT_CHARACTER_CODEPOINT, | 
 |  297         UNICODE_REPLACEMENT_CHARACTER_CODEPOINT, | 
 |  298         UNICODE_REPLACEMENT_CHARACTER_CODEPOINT, | 
 |  299         UNICODE_REPLACEMENT_CHARACTER_CODEPOINT, | 
 |  300         UNICODE_REPLACEMENT_CHARACTER_CODEPOINT, | 
 |  301         UNICODE_REPLACEMENT_CHARACTER_CODEPOINT, | 
 |  302         UNICODE_REPLACEMENT_CHARACTER_CODEPOINT, | 
 |  303         UNICODE_REPLACEMENT_CHARACTER_CODEPOINT ], | 
 |  304       utf8ToCodepoints( | 
 |  305           [ 0xc2, | 
 |  306             0xe0, 0x80, | 
 |  307             0xf0, 0x80, 0x80, | 
 |  308             0xf8, 0x88, 0x80, 0x80, | 
 |  309             0xfc, 0x80, 0x80, 0x80, 0x80, | 
 |  310             0xdf, | 
 |  311             0xef, 0xbf, | 
 |  312             0xf7, 0xbf, 0xbf, | 
 |  313             0xfb, 0xbf, 0xbf, 0xbf, | 
 |  314             0xfd, 0xbf, 0xbf, 0xbf, 0xbf ]), | 
 |  315           "Concatenation of incomplete sequences"); | 
 |  316  | 
 |  317   // Impossible bytes | 
 |  318   Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | 
 |  319       utf8ToCodepoints([0xfe]), "fe"); | 
 |  320   Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | 
 |  321       utf8ToCodepoints([0xff]), "ff"); | 
 |  322   Expect.listEquals([ | 
 |  323       UNICODE_REPLACEMENT_CHARACTER_CODEPOINT, | 
 |  324       UNICODE_REPLACEMENT_CHARACTER_CODEPOINT, | 
 |  325       UNICODE_REPLACEMENT_CHARACTER_CODEPOINT, | 
 |  326       UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | 
 |  327       utf8ToCodepoints([0xfe, 0xfe, 0xff, 0xff]), "fe fe ff ff"); | 
 |  328  | 
 |  329   // Overlong sequences | 
 |  330   Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | 
 |  331       utf8ToCodepoints([0xc0, 0xaf]), "c0 af"); | 
 |  332   Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | 
 |  333       utf8ToCodepoints([0xe0, 0x80, 0xaf]), "e0 80 af"); | 
 |  334   Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | 
 |  335       utf8ToCodepoints([0xf0, 0x80, 0x80, 0xaf]), "f0 80 80 af"); | 
 |  336   Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | 
 |  337       utf8ToCodepoints([0xf8, 0x80, 0x80, 0x80, 0xaf]), "f8 80 80 80 af"); | 
 |  338   Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | 
 |  339       utf8ToCodepoints([0xfc, 0x80, 0x80, 0x80, 0x80, 0xaf]), | 
 |  340       "fc 80 80 80 80 af"); | 
 |  341  | 
 |  342   Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | 
 |  343       utf8ToCodepoints([0xc1, 0xbf]), "c1 bf"); | 
 |  344   Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | 
 |  345       utf8ToCodepoints([0xe0, 0x9f, 0xbf]), "e0 9f bf"); | 
 |  346   Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | 
 |  347       utf8ToCodepoints([0xf0, 0x8f, 0xbf, 0xbf]), "f0 8f bf bf"); | 
 |  348   Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | 
 |  349       utf8ToCodepoints([0xf8, 0x87, 0xbf, 0xbf, 0xbf]), "f8 87 bf bf bf"); | 
 |  350   Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | 
 |  351       utf8ToCodepoints([0xfc, 0x83, 0xbf, 0xbf, 0xbf, 0xbf]), | 
 |  352       "fc 83 bf bf bf bf"); | 
 |  353  | 
 |  354   Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | 
 |  355       utf8ToCodepoints([0xc0, 0x80]), "c0 80"); | 
 |  356   Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | 
 |  357       utf8ToCodepoints([0xe0, 0x80, 0x80]), "e0 80 80"); | 
 |  358   Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | 
 |  359       utf8ToCodepoints([0xf0, 0x80, 0x80, 0x80]), "f0 80 80 80"); | 
 |  360   Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | 
 |  361       utf8ToCodepoints([0xf8, 0x80, 0x80, 0x80, 0x80]), "f8 80 80 80 80"); | 
 |  362   Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | 
 |  363       utf8ToCodepoints([0xfc, 0x80, 0x80, 0x80, 0x80, 0x80]), | 
 |  364       "fc 80 80 80 80 80"); | 
 |  365  | 
 |  366   // Illegal code positions | 
 |  367   Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | 
 |  368       utf8ToCodepoints([0xed, 0xa0, 0x80]), "U+D800"); | 
 |  369   Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | 
 |  370       utf8ToCodepoints([0xed, 0xad, 0xbf]), "U+DB7F"); | 
 |  371   Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | 
 |  372       utf8ToCodepoints([0xed, 0xae, 0x80]), "U+DB80"); | 
 |  373   Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | 
 |  374       utf8ToCodepoints([0xed, 0xaf, 0xbf]), "U+DBFF"); | 
 |  375   Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | 
 |  376       utf8ToCodepoints([0xed, 0xb0, 0x80]), "U+DC00"); | 
 |  377   Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | 
 |  378       utf8ToCodepoints([0xed, 0xbe, 0x80]), "U+DF80"); | 
 |  379   Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | 
 |  380       utf8ToCodepoints([0xed, 0xbf, 0xbf]), "U+DFFF"); | 
 |  381  | 
 |  382   // Paired UTF-16 surrogates | 
 |  383   Expect.listEquals([ | 
 |  384       UNICODE_REPLACEMENT_CHARACTER_CODEPOINT, | 
 |  385       UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | 
 |  386       utf8ToCodepoints([0xed, 0xa0, 0x80, 0xed, 0xb0, 0x80]), | 
 |  387       "U+D800 U+DC00"); | 
 |  388   Expect.listEquals([ | 
 |  389       UNICODE_REPLACEMENT_CHARACTER_CODEPOINT, | 
 |  390       UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | 
 |  391       utf8ToCodepoints([0xed, 0xa0, 0x80, 0xed, 0xbf, 0xbf]), | 
 |  392       "U+D800 U+DFFF"); | 
 |  393   Expect.listEquals([ | 
 |  394       UNICODE_REPLACEMENT_CHARACTER_CODEPOINT, | 
 |  395       UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | 
 |  396       utf8ToCodepoints([0xed, 0xad, 0xbf, 0xed, 0xb0, 0x80]), | 
 |  397       "U+DB7F U+DC00"); | 
 |  398   Expect.listEquals([ | 
 |  399       UNICODE_REPLACEMENT_CHARACTER_CODEPOINT, | 
 |  400       UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | 
 |  401       utf8ToCodepoints([0xed, 0xad, 0xbf, 0xed, 0xbf, 0xbf]), | 
 |  402       "U+DB7F U+DFFF"); | 
 |  403   Expect.listEquals([ | 
 |  404       UNICODE_REPLACEMENT_CHARACTER_CODEPOINT, | 
 |  405       UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | 
 |  406       utf8ToCodepoints([0xed, 0xae, 0x80, 0xed, 0xb0, 0x80]), | 
 |  407       "U+DB80 U+DC00"); | 
 |  408   Expect.listEquals([ | 
 |  409       UNICODE_REPLACEMENT_CHARACTER_CODEPOINT, | 
 |  410       UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | 
 |  411       utf8ToCodepoints([0xed, 0xae, 0x80, 0xed, 0xbf, 0xbf]), | 
 |  412       "U+DB80 U+DFFF"); | 
 |  413   Expect.listEquals([ | 
 |  414       UNICODE_REPLACEMENT_CHARACTER_CODEPOINT, | 
 |  415       UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | 
 |  416       utf8ToCodepoints([0xed, 0xaf, 0xbf, 0xed, 0xb0, 0x80]), | 
 |  417       "U+DBFF U+DC00"); | 
 |  418   Expect.listEquals([ | 
 |  419       UNICODE_REPLACEMENT_CHARACTER_CODEPOINT, | 
 |  420       UNICODE_REPLACEMENT_CHARACTER_CODEPOINT], | 
 |  421       utf8ToCodepoints([0xed, 0xaf, 0xbf, 0xed, 0xbf, 0xbf]), | 
 |  422       "U+DBFF U+DFFF"); | 
 |  423  | 
 |  424   // Other illegal code positions (???) | 
 |  425   Expect.listEquals([0xfffe], utf8ToCodepoints([0xef, 0xbf, 0xbe]), | 
 |  426       "U+FFFE"); | 
 |  427   Expect.listEquals([0xffff], utf8ToCodepoints([0xef, 0xbf, 0xbf]), | 
 |  428       "U+FFFF"); | 
 |  429 } | 
 |  430  | 
 |  431 void testUtf8BytesToString() { | 
 |  432   Expect.stringEquals(testEnglishPhrase, | 
 |  433       decodeUtf8(testEnglishUtf8), "English"); | 
 |  434  | 
 |  435   Expect.stringEquals(testDanishPhrase, | 
 |  436       decodeUtf8(testDanishUtf8), "Danish"); | 
 |  437  | 
 |  438   Expect.stringEquals(testHebrewPhrase, | 
 |  439       decodeUtf8(testHebrewUtf8), "Hebrew"); | 
 |  440  | 
 |  441   Expect.stringEquals(testRussianPhrase, | 
 |  442       decodeUtf8(testRussianUtf8), "Russian"); | 
 |  443  | 
 |  444   Expect.stringEquals(testGreekPhrase, | 
 |  445       decodeUtf8(testGreekUtf8), "Greek"); | 
 |  446  | 
 |  447   Expect.stringEquals(testKatakanaPhrase, | 
 |  448       decodeUtf8(testKatakanaUtf8), "Katakana"); | 
 |  449 } | 
 |  450  | 
 |  451 void testIterableMethods() { | 
 |  452   IterableUtf8Decoder englishDecoder = decodeUtf8AsIterable(testEnglishUtf8); | 
 |  453   // get the first character | 
 |  454   Expect.equals(testEnglishUtf8[0], englishDecoder.first); | 
 |  455   // get the whole translation using the Iterable interface | 
 |  456   Expect.stringEquals(testEnglishPhrase, | 
 |  457       new String.fromCharCodes(new List<int>.from(englishDecoder))); | 
 |  458  | 
 |  459   IterableUtf8Decoder kataDecoder = decodeUtf8AsIterable(testKatakanaUtf8); | 
 |  460   // get the first character | 
 |  461   Expect.equals(testKatakanaPhrase.codeUnits[0], kataDecoder.first); | 
 |  462   // get the whole translation using the Iterable interface | 
 |  463   Expect.stringEquals(testKatakanaPhrase, | 
 |  464       new String.fromCharCodes(new List<int>.from(kataDecoder))); | 
 |  465 } | 
| OLD | NEW |