OLD | NEW |
(Empty) | |
| 1 // Copyright (c) 2015, the Dart project authors. Please see the AUTHORS file |
| 2 // for details. All rights reserved. Use of this source code is governed by a |
| 3 // BSD-style license that can be found in the LICENSE file. |
| 4 |
| 5 library convert.percent.decoder; |
| 6 |
| 7 import 'dart:convert'; |
| 8 |
| 9 import 'package:charcode/ascii.dart'; |
| 10 import 'package:typed_data/typed_data.dart'; |
| 11 |
| 12 import '../utils.dart'; |
| 13 |
| 14 /// The canonical instance of [PercentDecoder]. |
| 15 const percentDecoder = const PercentDecoder._(); |
| 16 |
| 17 const _lastPercent = -1; |
| 18 |
| 19 /// A converter that decodes percent-encoded strings into byte arrays. |
| 20 /// |
| 21 /// To be maximally flexible, this will decode any percent-encoded byte and |
| 22 /// will allow any non-percent-encoded byte other than `%`. By default, it |
| 23 /// interprets `+` as `0x2B` rather than `0x20` as emitted by |
| 24 /// [Uri.encodeQueryComponent]. |
| 25 /// |
| 26 /// This will throw a [FormatException] if the input string has an incomplete |
| 27 /// percent-encoding, or if it contains non-ASCII code units. |
| 28 class PercentDecoder extends Converter<String, List<int>> { |
| 29 const PercentDecoder._(); |
| 30 |
| 31 List<int> convert(String string) { |
| 32 var buffer = new Uint8Buffer(); |
| 33 var lastDigit = _decode(string.codeUnits, 0, string.length, buffer); |
| 34 |
| 35 if (lastDigit != null) { |
| 36 throw new FormatException( |
| 37 "Input ended with incomplete encoded byte.", |
| 38 string, string.length); |
| 39 } |
| 40 |
| 41 return buffer.buffer.asUint8List(0, buffer.length); |
| 42 } |
| 43 |
| 44 StringConversionSink startChunkedConversion(Sink<List<int>> sink) => |
| 45 new _PercentDecoderSink(sink); |
| 46 } |
| 47 |
| 48 /// A conversion sink for chunked percent-encoded decoding. |
| 49 class _PercentDecoderSink extends StringConversionSinkBase { |
| 50 /// The underlying sink to which decoded byte arrays will be passed. |
| 51 final Sink<List<int>> _sink; |
| 52 |
| 53 /// The trailing digit from the previous string. |
| 54 /// |
| 55 /// This is `null` if the previous string ended with a complete |
| 56 /// percent-encoded byte or a literal character. It's [_lastPercent] if the |
| 57 /// most recent string ended with `%`. Otherwise, the most recent string ended |
| 58 /// with a `%` followed by a hexadecimal digit, and this is that digit. Since |
| 59 /// it's the most significant digit, it's always a multiple of 16. |
| 60 int _lastDigit; |
| 61 |
| 62 _PercentDecoderSink(this._sink); |
| 63 |
| 64 void addSlice(String string, int start, int end, bool isLast) { |
| 65 RangeError.checkValidRange(start, end, string.length); |
| 66 |
| 67 if (start == end) { |
| 68 if (isLast) _close(string, end); |
| 69 return; |
| 70 } |
| 71 |
| 72 var buffer = new Uint8Buffer(); |
| 73 var codeUnits = string.codeUnits; |
| 74 if (_lastDigit == _lastPercent) { |
| 75 _lastDigit = 16 * digitForCodeUnit(codeUnits, start); |
| 76 start++; |
| 77 |
| 78 if (start == end) { |
| 79 if (isLast) _close(string, end); |
| 80 return; |
| 81 } |
| 82 } |
| 83 |
| 84 if (_lastDigit != null) { |
| 85 buffer.add(_lastDigit + digitForCodeUnit(codeUnits, start)); |
| 86 start++; |
| 87 } |
| 88 |
| 89 _lastDigit = _decode(codeUnits, start, end, buffer); |
| 90 |
| 91 _sink.add(buffer.buffer.asUint8List(0, buffer.length)); |
| 92 if (isLast) _close(string, end); |
| 93 } |
| 94 |
| 95 ByteConversionSink asUtf8Sink(bool allowMalformed) => |
| 96 new _PercentDecoderByteSink(_sink); |
| 97 |
| 98 void close() => _close(); |
| 99 |
| 100 /// Like [close], but includes [string] and [index] in the [FormatException] |
| 101 /// if one is thrown. |
| 102 void _close([String string, int index]) { |
| 103 if (_lastDigit != null) { |
| 104 throw new FormatException( |
| 105 "Input ended with incomplete encoded byte.", string, index); |
| 106 } |
| 107 |
| 108 _sink.close(); |
| 109 } |
| 110 } |
| 111 |
| 112 /// A conversion sink for chunked percent-encoded decoding from UTF-8 bytes. |
| 113 class _PercentDecoderByteSink extends ByteConversionSinkBase { |
| 114 /// The underlying sink to which decoded byte arrays will be passed. |
| 115 final Sink<List<int>> _sink; |
| 116 |
| 117 /// The trailing digit from the previous string. |
| 118 /// |
| 119 /// This is `null` if the previous string ended with a complete |
| 120 /// percent-encoded byte or a literal character. It's [_lastPercent] if the |
| 121 /// most recent string ended with `%`. Otherwise, the most recent string ended |
| 122 /// with a `%` followed by a hexadecimal digit, and this is that digit. Since |
| 123 /// it's the most significant digit, it's always a multiple of 16. |
| 124 int _lastDigit; |
| 125 |
| 126 _PercentDecoderByteSink(this._sink); |
| 127 |
| 128 void add(List<int> chunk) => addSlice(chunk, 0, chunk.length, false); |
| 129 |
| 130 void addSlice(List<int> chunk, int start, int end, bool isLast) { |
| 131 RangeError.checkValidRange(start, end, chunk.length); |
| 132 |
| 133 if (start == end) { |
| 134 if (isLast) _close(chunk, end); |
| 135 return; |
| 136 } |
| 137 |
| 138 var buffer = new Uint8Buffer(); |
| 139 if (_lastDigit == _lastPercent) { |
| 140 _lastDigit = 16 * digitForCodeUnit(chunk, start); |
| 141 start++; |
| 142 |
| 143 if (start == end) { |
| 144 if (isLast) _close(chunk, end); |
| 145 return; |
| 146 } |
| 147 } |
| 148 |
| 149 if (_lastDigit != null) { |
| 150 buffer.add(_lastDigit + digitForCodeUnit(chunk, start)); |
| 151 start++; |
| 152 } |
| 153 |
| 154 _lastDigit = _decode(chunk, start, end, buffer); |
| 155 |
| 156 _sink.add(buffer.buffer.asUint8List(0, buffer.length)); |
| 157 if (isLast) _close(chunk, end); |
| 158 } |
| 159 |
| 160 void close() => _close(); |
| 161 |
| 162 /// Like [close], but includes [chunk] and [index] in the [FormatException] |
| 163 /// if one is thrown. |
| 164 void _close([List<int> chunk, int index]) { |
| 165 if (_lastDigit != null) { |
| 166 throw new FormatException( |
| 167 "Input ended with incomplete encoded byte.", chunk, index); |
| 168 } |
| 169 |
| 170 _sink.close(); |
| 171 } |
| 172 } |
| 173 |
| 174 /// Decodes [codeUnits] and writes the result into [destination]. |
| 175 /// |
| 176 /// This reads from [codeUnits] between [sourceStart] and [sourceEnd]. It writes |
| 177 /// the result into [destination] starting at [destinationStart]. |
| 178 /// |
| 179 /// If there's a leftover digit at the end of the decoding, this returns that |
| 180 /// digit. Otherwise it returns `null`. |
| 181 int _decode(List<int> codeUnits, int start, int end, Uint8Buffer buffer) { |
| 182 // A bitwise OR of all code units in [codeUnits]. This allows us to check for |
| 183 // out-of-range code units without adding more branches than necessary to the |
| 184 // core loop. |
| 185 var codeUnitOr = 0; |
| 186 |
| 187 // The beginning of the current slice of adjacent non-% characters. We can add |
| 188 // all of these to the buffer at once. |
| 189 var sliceStart = start; |
| 190 for (var i = start; i < end; i++) { |
| 191 // First, loop through non-% characters. |
| 192 var codeUnit = codeUnits[i]; |
| 193 if (codeUnits[i] != $percent) { |
| 194 codeUnitOr |= codeUnit; |
| 195 continue; |
| 196 } |
| 197 |
| 198 // We found a %. The slice from `sliceStart` to `i` represents characters |
| 199 // than can be copied to the buffer as-is. |
| 200 if (i > sliceStart) { |
| 201 _checkForInvalidCodeUnit(codeUnitOr, codeUnits, sliceStart, i); |
| 202 buffer.addAll(codeUnits.getRange(sliceStart, i)); |
| 203 } |
| 204 |
| 205 // Now decode the percent-encoded byte and add it as well. |
| 206 i++; |
| 207 if (i >= end) return _lastPercent; |
| 208 |
| 209 var firstDigit = digitForCodeUnit(codeUnits, i); |
| 210 i++; |
| 211 if (i >= end) return 16 * firstDigit; |
| 212 |
| 213 var secondDigit = digitForCodeUnit(codeUnits, i); |
| 214 buffer.add(16 * firstDigit + secondDigit); |
| 215 |
| 216 // The next iteration will look for non-% characters again. |
| 217 sliceStart = i + 1; |
| 218 } |
| 219 |
| 220 if (end > sliceStart) { |
| 221 _checkForInvalidCodeUnit(codeUnitOr, codeUnits, sliceStart, end); |
| 222 if (start == sliceStart) { |
| 223 buffer.addAll(codeUnits); |
| 224 } else { |
| 225 buffer.addAll(codeUnits.getRange(sliceStart, end)); |
| 226 } |
| 227 } |
| 228 |
| 229 return null; |
| 230 } |
| 231 |
| 232 void _checkForInvalidCodeUnit(int codeUnitOr, List<int> codeUnits, int start, |
| 233 int end) { |
| 234 if (codeUnitOr >= 0 && codeUnitOr <= 0x7f) return; |
| 235 |
| 236 for (var i = start; i < end; i++) { |
| 237 var codeUnit = codeUnits[i]; |
| 238 if (codeUnit >= 0 && codeUnit <= 0x7f) continue; |
| 239 throw new FormatException( |
| 240 "Non-ASCII code unit " |
| 241 "U+${codeUnit.toRadixString(16).padLeft(4, '0')}", |
| 242 codeUnits, i); |
| 243 } |
| 244 } |
OLD | NEW |