OLD | NEW |
(Empty) | |
| 1 // Copyright (c) 2013, the Dart project authors. Please see the AUTHORS file |
| 2 // for details. All rights reserved. Use of this source code is governed by a |
| 3 // BSD-style license that can be found in the LICENSE file. |
| 4 |
| 5 library utf8_test; |
| 6 import "package:expect/expect.dart"; |
| 7 import 'dart:convert'; |
| 8 |
| 9 String decode(List<int> bytes, int chunkSize) { |
| 10 StringBuffer buffer = new StringBuffer(); |
| 11 ChunkedConversionSink stringSink = |
| 12 new StringConversionSink.fromStringSink(buffer); |
| 13 var byteSink = new Utf8Decoder().startChunkedConversion(stringSink); |
| 14 int i = 0; |
| 15 while (i < bytes.length) { |
| 16 List nextChunk = []; |
| 17 for (int j = 0; j < chunkSize; j++) { |
| 18 if (i < bytes.length) { |
| 19 nextChunk.add(bytes[i]); |
| 20 i++; |
| 21 } |
| 22 } |
| 23 byteSink.add(nextChunk); |
| 24 } |
| 25 byteSink.close(); |
| 26 return buffer.toString(); |
| 27 } |
| 28 |
| 29 String decodeAllowMalformed(List<int> bytes, int chunkSize) { |
| 30 StringBuffer buffer = new StringBuffer(); |
| 31 ChunkedConversionSink stringSink = |
| 32 new StringConversionSink.fromStringSink(buffer); |
| 33 var decoder = new Utf8Decoder(allowMalformed: true); |
| 34 var byteSink = decoder.startChunkedConversion(stringSink); |
| 35 int i = 0; |
| 36 while (i < bytes.length) { |
| 37 List nextChunk = []; |
| 38 for (int j = 0; j < chunkSize; j++) { |
| 39 if (i < bytes.length) { |
| 40 nextChunk.add(bytes[i]); |
| 41 i++; |
| 42 } |
| 43 } |
| 44 byteSink.add(nextChunk); |
| 45 } |
| 46 byteSink.close(); |
| 47 return buffer.toString(); |
| 48 } |
| 49 |
| 50 final TESTS = [ |
| 51 // Unfinished UTF-8 sequences. |
| 52 [ 0xc3 ], |
| 53 [ 0xE2, 0x82 ], |
| 54 [ 0xF0, 0xA4, 0xAD ], |
| 55 // Overlong encoding of euro-sign. |
| 56 [ 0xF0, 0x82, 0x82, 0xAC ], |
| 57 // Other overlong/unfinished sequences. |
| 58 [ 0xC0 ], |
| 59 [ 0xC1 ], |
| 60 [ 0xF5 ], |
| 61 [ 0xF6 ], |
| 62 [ 0xF7 ], |
| 63 [ 0xF8 ], |
| 64 [ 0xF9 ], |
| 65 [ 0xFA ], |
| 66 [ 0xFB ], |
| 67 [ 0xFC ], |
| 68 [ 0xFD ], |
| 69 [ 0xFE ], |
| 70 [ 0xFF ], |
| 71 [ 0xC0, 0x80 ], |
| 72 [ 0xC1, 0x80 ], |
| 73 // Outside valid range. |
| 74 [ 0xF4, 0xBF, 0xBF, 0xBF ]]; |
| 75 |
| 76 final TESTS2 = [ |
| 77 // Test that 0xC0|1, 0x80 does not eat the next character. |
| 78 [[ 0xC0, 0x80, 0x61 ], "Xa" ], |
| 79 [[ 0xC1, 0x80, 0x61 ], "Xa" ], |
| 80 // 0xF5 .. 0xFF never appear in valid UTF-8 sequences. |
| 81 [[ 0xF5, 0x80 ], "XX" ], |
| 82 [[ 0xF6, 0x80 ], "XX" ], |
| 83 [[ 0xF7, 0x80 ], "XX" ], |
| 84 [[ 0xF8, 0x80 ], "XX" ], |
| 85 [[ 0xF9, 0x80 ], "XX" ], |
| 86 [[ 0xFA, 0x80 ], "XX" ], |
| 87 [[ 0xFB, 0x80 ], "XX" ], |
| 88 [[ 0xFC, 0x80 ], "XX" ], |
| 89 [[ 0xFD, 0x80 ], "XX" ], |
| 90 [[ 0xFE, 0x80 ], "XX" ], |
| 91 [[ 0xFF, 0x80 ], "XX" ], |
| 92 [[ 0xF5, 0x80, 0x61 ], "XXa" ], |
| 93 [[ 0xF6, 0x80, 0x61 ], "XXa" ], |
| 94 [[ 0xF7, 0x80, 0x61 ], "XXa" ], |
| 95 [[ 0xF8, 0x80, 0x61 ], "XXa" ], |
| 96 [[ 0xF9, 0x80, 0x61 ], "XXa" ], |
| 97 [[ 0xFA, 0x80, 0x61 ], "XXa" ], |
| 98 [[ 0xFB, 0x80, 0x61 ], "XXa" ], |
| 99 [[ 0xFC, 0x80, 0x61 ], "XXa" ], |
| 100 [[ 0xFD, 0x80, 0x61 ], "XXa" ], |
| 101 [[ 0xFE, 0x80, 0x61 ], "XXa" ], |
| 102 [[ 0xFF, 0x80, 0x61 ], "XXa" ], |
| 103 // Characters outside the valid range. |
| 104 [[ 0xF5, 0x80, 0x80, 0x61 ], "XXXa" ], |
| 105 [[ 0xF6, 0x80, 0x80, 0x61 ], "XXXa" ], |
| 106 [[ 0xF7, 0x80, 0x80, 0x61 ], "XXXa" ], |
| 107 [[ 0xF8, 0x80, 0x80, 0x61 ], "XXXa" ], |
| 108 [[ 0xF9, 0x80, 0x80, 0x61 ], "XXXa" ], |
| 109 [[ 0xFA, 0x80, 0x80, 0x61 ], "XXXa" ], |
| 110 [[ 0xFB, 0x80, 0x80, 0x61 ], "XXXa" ], |
| 111 [[ 0xFC, 0x80, 0x80, 0x61 ], "XXXa" ], |
| 112 [[ 0xFD, 0x80, 0x80, 0x61 ], "XXXa" ], |
| 113 [[ 0xFE, 0x80, 0x80, 0x61 ], "XXXa" ], |
| 114 [[ 0xFF, 0x80, 0x80, 0x61 ], "XXXa" ]]; |
| 115 |
| 116 main() { |
| 117 var allTests = TESTS.expand((test) { |
| 118 // Pairs of test and expected string output when malformed strings are |
| 119 // allowed. Replacement character: U+FFFD |
| 120 return [[ test, "\u{FFFD}" ], |
| 121 [ new List.from([0x61])..addAll(test), "a\u{FFFD}" ], |
| 122 [ new List.from([0x61])..addAll(test)..add(0x61), "a\u{FFFD}a" ], |
| 123 [ new List.from(test)..add(0x61), "\u{FFFD}a" ], |
| 124 [ new List.from(test)..addAll(test), "\u{FFFD}\u{FFFD}" ], |
| 125 [ new List.from(test)..add(0x61)..addAll(test), |
| 126 "\u{FFFD}a\u{FFFD}" ], |
| 127 [ new List.from([0xc3, 0xa5])..addAll(test), "å\u{FFFD}" ], |
| 128 [ new List.from([0xc3, 0xa5])..addAll(test)..addAll([0xc3, 0xa5]), |
| 129 "å\u{FFFD}å" ], |
| 130 [ new List.from(test)..addAll([0xc3, 0xa5]), "\u{FFFD}å" ], |
| 131 [ new List.from(test)..addAll([0xc3, 0xa5])..addAll(test), |
| 132 "\u{FFFD}å\u{FFFD}" ]]; |
| 133 }); |
| 134 |
| 135 var allTests2 = TESTS2.map((test) { |
| 136 // Pairs of test and expected string output when malformed strings are |
| 137 // allowed. Replacement character: U+FFFD |
| 138 String expected = (test[1] as String).replaceAll("X", "\u{FFFD}"); |
| 139 return [test[0], expected]; |
| 140 }); |
| 141 |
| 142 for (var test in []..addAll(allTests)..addAll(allTests2)) { |
| 143 List<int> bytes = test[0]; |
| 144 Expect.throws(() => decode(bytes, 1), (e) => e is FormatException); |
| 145 Expect.throws(() => decode(bytes, 2), (e) => e is FormatException); |
| 146 Expect.throws(() => decode(bytes, 3), (e) => e is FormatException); |
| 147 Expect.throws(() => decode(bytes, 4), (e) => e is FormatException); |
| 148 |
| 149 String expected = test[1]; |
| 150 Expect.equals(expected, decodeAllowMalformed(bytes, 1)); |
| 151 Expect.equals(expected, decodeAllowMalformed(bytes, 2)); |
| 152 Expect.equals(expected, decodeAllowMalformed(bytes, 3)); |
| 153 Expect.equals(expected, decodeAllowMalformed(bytes, 4)); |
| 154 } |
| 155 } |
OLD | NEW |