Index: lib/src/percent/decoder.dart |
diff --git a/lib/src/percent/decoder.dart b/lib/src/percent/decoder.dart |
new file mode 100644 |
index 0000000000000000000000000000000000000000..07406361680363ff1a7f197f2831883dc1bc73cf |
--- /dev/null |
+++ b/lib/src/percent/decoder.dart |
@@ -0,0 +1,244 @@ |
+// Copyright (c) 2015, the Dart project authors. Please see the AUTHORS file |
+// for details. All rights reserved. Use of this source code is governed by a |
+// BSD-style license that can be found in the LICENSE file. |
+ |
+library convert.percent.decoder; |
+ |
+import 'dart:convert'; |
+ |
+import 'package:charcode/ascii.dart'; |
+import 'package:typed_data/typed_data.dart'; |
+ |
+import '../utils.dart'; |
+ |
+/// The canonical instance of [PercentDecoder]. |
+const percentDecoder = const PercentDecoder._(); |
+ |
+const _lastPercent = -1; |
+ |
+/// A converter that decodes percent-encoded strings into byte arrays. |
+/// |
+/// To be maximally flexible, this will decode any percent-encoded byte and |
+/// will allow any non-percent-encoded byte other than `%`. By default, it |
+/// interprets `+` as `0x2B` rather than `0x20` as emitted by |
+/// [Uri.encodeQueryComponent]. |
+/// |
+/// This will throw a [FormatException] if the input string has an incomplete |
+/// percent-encoding, or if it contains non-ASCII code units. |
+class PercentDecoder extends Converter<String, List<int>> { |
+ const PercentDecoder._(); |
+ |
+ List<int> convert(String string) { |
+ var buffer = new Uint8Buffer(); |
+ var lastDigit = _decode(string.codeUnits, 0, string.length, buffer); |
+ |
+ if (lastDigit != null) { |
+ throw new FormatException( |
+ "Input ended with incomplete encoded byte.", |
+ string, string.length); |
+ } |
+ |
+ return buffer.buffer.asUint8List(0, buffer.length); |
+ } |
+ |
+ StringConversionSink startChunkedConversion(Sink<List<int>> sink) => |
+ new _PercentDecoderSink(sink); |
+} |
+ |
+/// A conversion sink for chunked percent-encoded decoding. |
+class _PercentDecoderSink extends StringConversionSinkBase { |
+ /// The underlying sink to which decoded byte arrays will be passed. |
+ final Sink<List<int>> _sink; |
+ |
+ /// The trailing digit from the previous string. |
+ /// |
+ /// This is `null` if the previous string ended with a complete |
+ /// percent-encoded byte or a literal character. It's [_lastPercent] if the |
+ /// most recent string ended with `%`. Otherwise, the most recent string ended |
+ /// with a `%` followed by a hexadecimal digit, and this is that digit. Since |
+ /// it's the most significant digit, it's always a multiple of 16. |
+ int _lastDigit; |
+ |
+ _PercentDecoderSink(this._sink); |
+ |
+ void addSlice(String string, int start, int end, bool isLast) { |
+ RangeError.checkValidRange(start, end, string.length); |
+ |
+ if (start == end) { |
+ if (isLast) _close(string, end); |
+ return; |
+ } |
+ |
+ var buffer = new Uint8Buffer(); |
+ var codeUnits = string.codeUnits; |
+ if (_lastDigit == _lastPercent) { |
+ _lastDigit = 16 * digitForCodeUnit(codeUnits, start); |
+ start++; |
+ |
+ if (start == end) { |
+ if (isLast) _close(string, end); |
+ return; |
+ } |
+ } |
+ |
+ if (_lastDigit != null) { |
+ buffer.add(_lastDigit + digitForCodeUnit(codeUnits, start)); |
+ start++; |
+ } |
+ |
+ _lastDigit = _decode(codeUnits, start, end, buffer); |
+ |
+ _sink.add(buffer.buffer.asUint8List(0, buffer.length)); |
+ if (isLast) _close(string, end); |
+ } |
+ |
+ ByteConversionSink asUtf8Sink(bool allowMalformed) => |
+ new _PercentDecoderByteSink(_sink); |
+ |
+ void close() => _close(); |
+ |
+ /// Like [close], but includes [string] and [index] in the [FormatException] |
+ /// if one is thrown. |
+ void _close([String string, int index]) { |
+ if (_lastDigit != null) { |
+ throw new FormatException( |
+ "Input ended with incomplete encoded byte.", string, index); |
+ } |
+ |
+ _sink.close(); |
+ } |
+} |
+ |
+/// A conversion sink for chunked percent-encoded decoding from UTF-8 bytes. |
+class _PercentDecoderByteSink extends ByteConversionSinkBase { |
+ /// The underlying sink to which decoded byte arrays will be passed. |
+ final Sink<List<int>> _sink; |
+ |
+ /// The trailing digit from the previous string. |
+ /// |
+ /// This is `null` if the previous string ended with a complete |
+ /// percent-encoded byte or a literal character. It's [_lastPercent] if the |
+ /// most recent string ended with `%`. Otherwise, the most recent string ended |
+ /// with a `%` followed by a hexadecimal digit, and this is that digit. Since |
+ /// it's the most significant digit, it's always a multiple of 16. |
+ int _lastDigit; |
+ |
+ _PercentDecoderByteSink(this._sink); |
+ |
+ void add(List<int> chunk) => addSlice(chunk, 0, chunk.length, false); |
+ |
+ void addSlice(List<int> chunk, int start, int end, bool isLast) { |
+ RangeError.checkValidRange(start, end, chunk.length); |
+ |
+ if (start == end) { |
+ if (isLast) _close(chunk, end); |
+ return; |
+ } |
+ |
+ var buffer = new Uint8Buffer(); |
+ if (_lastDigit == _lastPercent) { |
+ _lastDigit = 16 * digitForCodeUnit(chunk, start); |
+ start++; |
+ |
+ if (start == end) { |
+ if (isLast) _close(chunk, end); |
+ return; |
+ } |
+ } |
+ |
+ if (_lastDigit != null) { |
+ buffer.add(_lastDigit + digitForCodeUnit(chunk, start)); |
+ start++; |
+ } |
+ |
+ _lastDigit = _decode(chunk, start, end, buffer); |
+ |
+ _sink.add(buffer.buffer.asUint8List(0, buffer.length)); |
+ if (isLast) _close(chunk, end); |
+ } |
+ |
+ void close() => _close(); |
+ |
+ /// Like [close], but includes [chunk] and [index] in the [FormatException] |
+ /// if one is thrown. |
+ void _close([List<int> chunk, int index]) { |
+ if (_lastDigit != null) { |
+ throw new FormatException( |
+ "Input ended with incomplete encoded byte.", chunk, index); |
+ } |
+ |
+ _sink.close(); |
+ } |
+} |
+ |
+/// Decodes [codeUnits] and writes the result into [destination]. |
+/// |
+/// This reads from [codeUnits] between [sourceStart] and [sourceEnd]. It writes |
+/// the result into [destination] starting at [destinationStart]. |
+/// |
+/// If there's a leftover digit at the end of the decoding, this returns that |
+/// digit. Otherwise it returns `null`. |
+int _decode(List<int> codeUnits, int start, int end, Uint8Buffer buffer) { |
+ // A bitwise OR of all code units in [codeUnits]. This allows us to check for |
+ // out-of-range code units without adding more branches than necessary to the |
+ // core loop. |
+ var codeUnitOr = 0; |
+ |
+ // The beginning of the current slice of adjacent non-% characters. We can add |
+ // all of these to the buffer at once. |
+ var sliceStart = start; |
+ for (var i = start; i < end; i++) { |
+ // First, loop through non-% characters. |
+ var codeUnit = codeUnits[i]; |
+ if (codeUnits[i] != $percent) { |
+ codeUnitOr |= codeUnit; |
+ continue; |
+ } |
+ |
+ // We found a %. The slice from `sliceStart` to `i` represents characters |
+ // than can be copied to the buffer as-is. |
+ if (i > sliceStart) { |
+ _checkForInvalidCodeUnit(codeUnitOr, codeUnits, sliceStart, i); |
+ buffer.addAll(codeUnits.getRange(sliceStart, i)); |
+ } |
+ |
+ // Now decode the percent-encoded byte and add it as well. |
+ i++; |
+ if (i >= end) return _lastPercent; |
+ |
+ var firstDigit = digitForCodeUnit(codeUnits, i); |
+ i++; |
+ if (i >= end) return 16 * firstDigit; |
+ |
+ var secondDigit = digitForCodeUnit(codeUnits, i); |
+ buffer.add(16 * firstDigit + secondDigit); |
+ |
+ // The next iteration will look for non-% characters again. |
+ sliceStart = i + 1; |
+ } |
+ |
+ if (end > sliceStart) { |
+ _checkForInvalidCodeUnit(codeUnitOr, codeUnits, sliceStart, end); |
+ if (start == sliceStart) { |
+ buffer.addAll(codeUnits); |
+ } else { |
+ buffer.addAll(codeUnits.getRange(sliceStart, end)); |
+ } |
+ } |
+ |
+ return null; |
+} |
+ |
+void _checkForInvalidCodeUnit(int codeUnitOr, List<int> codeUnits, int start, |
+ int end) { |
+ if (codeUnitOr >= 0 && codeUnitOr <= 0x7f) return; |
+ |
+ for (var i = start; i < end; i++) { |
+ var codeUnit = codeUnits[i]; |
+ if (codeUnit >= 0 && codeUnit <= 0x7f) continue; |
+ throw new FormatException( |
+ "Non-ASCII code unit " |
+ "U+${codeUnit.toRadixString(16).padLeft(4, '0')}", |
+ codeUnits, i); |
+ } |
+} |