Index: lib/src/percent/decoder.dart |
diff --git a/lib/src/percent/decoder.dart b/lib/src/percent/decoder.dart |
new file mode 100644 |
index 0000000000000000000000000000000000000000..bab945c1c84b9da0abebabce01b1182c92a039e6 |
--- /dev/null |
+++ b/lib/src/percent/decoder.dart |
@@ -0,0 +1,198 @@ |
+// Copyright (c) 2015, the Dart project authors. Please see the AUTHORS file |
+// for details. All rights reserved. Use of this source code is governed by a |
+// BSD-style license that can be found in the LICENSE file. |
+ |
+library convert.percent.decoder; |
+ |
+import 'dart:convert'; |
+ |
+import 'package:charcode/ascii.dart'; |
+import 'package:typed_data/typed_data.dart'; |
+ |
+import '../utils.dart'; |
+ |
+/// The canonical instance of [PercentDecoder]. |
+const percentDecoder = const PercentDecoder._(); |
Lasse Reichstein Nielsen
2015/10/08 10:30:36
Consider typing the constant.
nweiz
2015/10/08 20:44:51
I really don't like doing this for final/const fie
Lasse Reichstein Nielsen
2015/10/09 09:07:13
Will the dartdoc show the correct type for the con
nweiz
2015/10/09 19:52:55
It should; if not, that's an issue with dartdoc.
|
+ |
+const _lastPercent = -1; |
+ |
+/// A converter that decodes percentadecimal strings into byte arrays. |
Lasse Reichstein Nielsen
2015/10/08 10:30:36
percentadecimal? I'm guessing "percent encoded".
R
nweiz
2015/10/08 20:44:52
I started this file as a search-replace of the hex
|
+/// |
+/// To be maximally flexible, this will decode any percent-encoded byte and |
+/// will allow any non-percent-encoded byte other than `%`. By default, it |
+/// interprets `+` as `0x2B` rather than `0x20` as emitted by |
+/// [Uri.encodeQueryComponent]. |
+/// |
+/// This will throw a [FormatException] if the input string has an incomplete |
+/// percent-encoding, or if it contains non-ASCII code units. |
+class PercentDecoder extends Converter<String, List<int>> { |
+ const PercentDecoder._(); |
+ |
+ List<int> convert(String string) { |
+ var buffer = new Uint8Buffer(); |
+ var lastDigit = _decode(string.codeUnits, 0, string.length, buffer); |
+ |
+ if (lastDigit != null) { |
+ throw new FormatException( |
+ "Input ended with incomplete encoded byte.", |
+ string, string.length); |
+ } |
+ |
+ return buffer.buffer.asUint8List(0, buffer.length); |
+ } |
+ |
+ StringConversionSink startChunkedConversion(Sink<List<int>> sink) => |
+ new _PercentDecoderSink(sink); |
+} |
+ |
+/// A conversion sink for chunked percentadecimal decoding. |
+class _PercentDecoderSink extends StringConversionSinkBase { |
+ /// The underlying sink to which decoded byte arrays will be passed. |
+ final Sink<List<int>> _sink; |
+ |
+ /// The trailing digit from the previous string. |
+ /// |
+ /// This is `null` if the previous string ended with a complete |
+ /// percent-encoded byte or a literal character. It's [_lastPercent] if the |
+ /// most recent string ended with `%`. Otherwise, the most recent string ended |
+ /// with a `%` followed by a hexadecimal digit, and this is that digit. Since |
+ /// it's the most significant digit, it's always a multiple of 16. |
+ int _lastDigit; |
+ |
+ _PercentDecoderSink(this._sink); |
+ |
+ void addSlice(String string, int start, int end, bool isLast) { |
+ RangeError.checkValidRange(start, end, string.length); |
+ |
+ if (start == end) { |
+ if (isLast) close(); |
+ return; |
+ } |
+ |
+ var buffer = new Uint8Buffer(); |
+ var codeUnits = string.codeUnits; |
+ if (_lastDigit == _lastPercent) { |
+ _lastDigit = 16 * digitForCodeUnit(codeUnits, start); |
+ start++; |
+ |
+ if (start == end) { |
+ if (isLast) close(); |
+ return; |
+ } |
+ } |
+ |
+ if (_lastDigit != null) { |
+ buffer.add(_lastDigit + digitForCodeUnit(codeUnits, start)); |
+ start++; |
+ } |
+ |
+ _lastDigit = _decode(codeUnits, start, end, buffer); |
+ |
+ _sink.add(buffer.buffer.asUint8List(0, buffer.length)); |
+ if (isLast) close(); |
+ } |
+ |
+ ByteConversionSink asUtf8Sink(bool allowMalformed) => |
+ new _PercentDecoderByteSink(_sink); |
+ |
+ void close() { |
+ if (_lastDigit != null) { |
+ throw new FormatException("Input ended with incomplete encoded byte."); |
Lasse Reichstein Nielsen
2015/10/08 10:30:36
Consider having a version of this that is passed t
nweiz
2015/10/08 20:44:51
Done. Made the same change in the hex decoder as w
|
+ } |
+ |
+ _sink.close(); |
+ } |
+} |
+ |
+/// A conversion sink for chunked percentadecimal decoding from UTF-8 bytes. |
Lasse Reichstein Nielsen
2015/10/08 10:30:36
percentadecimal :)
nweiz
2015/10/08 20:44:51
Done.
|
+class _PercentDecoderByteSink extends ByteConversionSinkBase { |
+ /// The underlying sink to which decoded byte arrays will be passed. |
+ final Sink<List<int>> _sink; |
+ |
+ /// The trailing digit from the previous string. |
+ /// |
+ /// This is `null` if the previous string ended with a complete |
+ /// percent-encoded byte or a literal character. It's [_lastPercent] if the |
+ /// most recent string ended with `%`. Otherwise, the most recent string ended |
+ /// with a `%` followed by a hexadecimal digit, and this is that digit. Since |
+ /// it's the most significant digit, it's always a multiple of 16. |
+ int _lastDigit; |
+ |
+ _PercentDecoderByteSink(this._sink); |
+ |
+ void add(List<int> chunk) => addSlice(chunk, 0, chunk.length, false); |
+ |
+ void addSlice(List<int> chunk, int start, int end, bool isLast) { |
+ RangeError.checkValidRange(start, end, chunk.length); |
+ |
+ if (start == end) { |
+ if (isLast) close(); |
+ return; |
+ } |
+ |
+ var buffer = new Uint8Buffer(); |
+ if (_lastDigit == _lastPercent) { |
+ _lastDigit = 16 * digitForCodeUnit(chunk, start); |
+ start++; |
+ |
+ if (start == end) { |
+ if (isLast) close(); |
+ return; |
+ } |
+ } |
+ |
+ if (_lastDigit != null) { |
+ buffer.add(_lastDigit + digitForCodeUnit(chunk, start)); |
+ start++; |
+ } |
+ |
+ _lastDigit = _decode(chunk, start, end, buffer); |
+ |
+ _sink.add(buffer.buffer.asUint8List(0, buffer.length)); |
+ if (isLast) close(); |
+ } |
+ |
+ void close() { |
+ if (_lastDigit != null) { |
+ throw new FormatException("Input ended with incomplete encoded byte."); |
+ } |
+ |
+ _sink.close(); |
+ } |
+} |
+ |
+/// Decodes [codeUnits] and writes the result into [destination]. |
+/// |
+/// This reads from [codeUnits] between [sourceStart] and [sourceEnd]. It writes |
+/// the result into [destination] starting at [destinationStart]. |
+/// |
+/// If there's a leftover digit at the end of the decoding, this returns that |
+/// digit. Otherwise it returns `null`. |
+int _decode(List<int> codeUnits, int start, int end, Uint8Buffer buffer) { |
+ for (var i = start; i < end; i++) { |
+ var codeUnit = codeUnits[i]; |
+ if (codeUnit != $percent) { |
+ if (codeUnit > 0xf7) { |
Lasse Reichstein Nielsen
2015/10/08 10:30:36
This should also be checking for codeUnit < 0.
Fo
nweiz
2015/10/08 20:44:52
Done.
|
+ throw new FormatException( |
+ "Non-ASCII code unit " |
+ "U+${codeUnit.toRadixString(16).padLeft(4, '0')}", |
+ codeUnits, i); |
+ } |
+ |
+ buffer.add(codeUnit); |
+ continue; |
+ } |
+ |
+ i++; |
+ if (i >= end) return _lastPercent; |
+ |
+ var firstDigit = digitForCodeUnit(codeUnits, i); |
+ i++; |
+ if (i >= end) return 16 * firstDigit; |
+ |
+ var secondDigit = digitForCodeUnit(codeUnits, i); |
+ buffer.add(16 * firstDigit + secondDigit); |
+ } |
+ |
+ return null; |
+} |