Chromium Code Reviews| Index: lib/src/percent/decoder.dart |
| diff --git a/lib/src/percent/decoder.dart b/lib/src/percent/decoder.dart |
| new file mode 100644 |
| index 0000000000000000000000000000000000000000..bab945c1c84b9da0abebabce01b1182c92a039e6 |
| --- /dev/null |
| +++ b/lib/src/percent/decoder.dart |
| @@ -0,0 +1,198 @@ |
| +// Copyright (c) 2015, the Dart project authors. Please see the AUTHORS file |
| +// for details. All rights reserved. Use of this source code is governed by a |
| +// BSD-style license that can be found in the LICENSE file. |
| + |
| +library convert.percent.decoder; |
| + |
| +import 'dart:convert'; |
| + |
| +import 'package:charcode/ascii.dart'; |
| +import 'package:typed_data/typed_data.dart'; |
| + |
| +import '../utils.dart'; |
| + |
| +/// The canonical instance of [PercentDecoder]. |
| +const percentDecoder = const PercentDecoder._(); |
|
Lasse Reichstein Nielsen
2015/10/08 10:30:36
Consider typing the constant.
nweiz
2015/10/08 20:44:51
I really don't like doing this for final/const fie
Lasse Reichstein Nielsen
2015/10/09 09:07:13
Will the dartdoc show the correct type for the con
nweiz
2015/10/09 19:52:55
It should; if not, that's an issue with dartdoc.
|
| + |
| +const _lastPercent = -1; |
| + |
| +/// A converter that decodes percentadecimal strings into byte arrays. |
|
Lasse Reichstein Nielsen
2015/10/08 10:30:36
percentadecimal? I'm guessing "percent encoded".
R
nweiz
2015/10/08 20:44:52
I started this file as a search-replace of the hex
|
| +/// |
| +/// To be maximally flexible, this will decode any percent-encoded byte and |
| +/// will allow any non-percent-encoded byte other than `%`. By default, it |
| +/// interprets `+` as `0x2B` rather than `0x20` as emitted by |
| +/// [Uri.encodeQueryComponent]. |
| +/// |
| +/// This will throw a [FormatException] if the input string has an incomplete |
| +/// percent-encoding, or if it contains non-ASCII code units. |
| +class PercentDecoder extends Converter<String, List<int>> { |
| + const PercentDecoder._(); |
| + |
| + List<int> convert(String string) { |
| + var buffer = new Uint8Buffer(); |
| + var lastDigit = _decode(string.codeUnits, 0, string.length, buffer); |
| + |
| + if (lastDigit != null) { |
| + throw new FormatException( |
| + "Input ended with incomplete encoded byte.", |
| + string, string.length); |
| + } |
| + |
| + return buffer.buffer.asUint8List(0, buffer.length); |
| + } |
| + |
| + StringConversionSink startChunkedConversion(Sink<List<int>> sink) => |
| + new _PercentDecoderSink(sink); |
| +} |
| + |
| +/// A conversion sink for chunked percentadecimal decoding. |
| +class _PercentDecoderSink extends StringConversionSinkBase { |
| + /// The underlying sink to which decoded byte arrays will be passed. |
| + final Sink<List<int>> _sink; |
| + |
| + /// The trailing digit from the previous string. |
| + /// |
| + /// This is `null` if the previous string ended with a complete |
| + /// percent-encoded byte or a literal character. It's [_lastPercent] if the |
| + /// most recent string ended with `%`. Otherwise, the most recent string ended |
| + /// with a `%` followed by a hexadecimal digit, and this is that digit. Since |
| + /// it's the most significant digit, it's always a multiple of 16. |
| + int _lastDigit; |
| + |
| + _PercentDecoderSink(this._sink); |
| + |
| + void addSlice(String string, int start, int end, bool isLast) { |
| + RangeError.checkValidRange(start, end, string.length); |
| + |
| + if (start == end) { |
| + if (isLast) close(); |
| + return; |
| + } |
| + |
| + var buffer = new Uint8Buffer(); |
| + var codeUnits = string.codeUnits; |
| + if (_lastDigit == _lastPercent) { |
| + _lastDigit = 16 * digitForCodeUnit(codeUnits, start); |
| + start++; |
| + |
| + if (start == end) { |
| + if (isLast) close(); |
| + return; |
| + } |
| + } |
| + |
| + if (_lastDigit != null) { |
| + buffer.add(_lastDigit + digitForCodeUnit(codeUnits, start)); |
| + start++; |
| + } |
| + |
| + _lastDigit = _decode(codeUnits, start, end, buffer); |
| + |
| + _sink.add(buffer.buffer.asUint8List(0, buffer.length)); |
| + if (isLast) close(); |
| + } |
| + |
| + ByteConversionSink asUtf8Sink(bool allowMalformed) => |
| + new _PercentDecoderByteSink(_sink); |
| + |
| + void close() { |
| + if (_lastDigit != null) { |
| + throw new FormatException("Input ended with incomplete encoded byte."); |
|
Lasse Reichstein Nielsen
2015/10/08 10:30:36
Consider having a version of this that is passed t
nweiz
2015/10/08 20:44:51
Done. Made the same change in the hex decoder as w
|
| + } |
| + |
| + _sink.close(); |
| + } |
| +} |
| + |
| +/// A conversion sink for chunked percentadecimal decoding from UTF-8 bytes. |
|
Lasse Reichstein Nielsen
2015/10/08 10:30:36
percentadecimal :)
nweiz
2015/10/08 20:44:51
Done.
|
| +class _PercentDecoderByteSink extends ByteConversionSinkBase { |
| + /// The underlying sink to which decoded byte arrays will be passed. |
| + final Sink<List<int>> _sink; |
| + |
| + /// The trailing digit from the previous string. |
| + /// |
| + /// This is `null` if the previous string ended with a complete |
| + /// percent-encoded byte or a literal character. It's [_lastPercent] if the |
| + /// most recent string ended with `%`. Otherwise, the most recent string ended |
| + /// with a `%` followed by a hexadecimal digit, and this is that digit. Since |
| + /// it's the most significant digit, it's always a multiple of 16. |
| + int _lastDigit; |
| + |
| + _PercentDecoderByteSink(this._sink); |
| + |
| + void add(List<int> chunk) => addSlice(chunk, 0, chunk.length, false); |
| + |
| + void addSlice(List<int> chunk, int start, int end, bool isLast) { |
| + RangeError.checkValidRange(start, end, chunk.length); |
| + |
| + if (start == end) { |
| + if (isLast) close(); |
| + return; |
| + } |
| + |
| + var buffer = new Uint8Buffer(); |
| + if (_lastDigit == _lastPercent) { |
| + _lastDigit = 16 * digitForCodeUnit(chunk, start); |
| + start++; |
| + |
| + if (start == end) { |
| + if (isLast) close(); |
| + return; |
| + } |
| + } |
| + |
| + if (_lastDigit != null) { |
| + buffer.add(_lastDigit + digitForCodeUnit(chunk, start)); |
| + start++; |
| + } |
| + |
| + _lastDigit = _decode(chunk, start, end, buffer); |
| + |
| + _sink.add(buffer.buffer.asUint8List(0, buffer.length)); |
| + if (isLast) close(); |
| + } |
| + |
| + void close() { |
| + if (_lastDigit != null) { |
| + throw new FormatException("Input ended with incomplete encoded byte."); |
| + } |
| + |
| + _sink.close(); |
| + } |
| +} |
| + |
| +/// Decodes [codeUnits] and writes the result into [destination]. |
| +/// |
| +/// This reads from [codeUnits] between [sourceStart] and [sourceEnd]. It writes |
| +/// the result into [destination] starting at [destinationStart]. |
| +/// |
| +/// If there's a leftover digit at the end of the decoding, this returns that |
| +/// digit. Otherwise it returns `null`. |
| +int _decode(List<int> codeUnits, int start, int end, Uint8Buffer buffer) { |
| + for (var i = start; i < end; i++) { |
| + var codeUnit = codeUnits[i]; |
| + if (codeUnit != $percent) { |
| + if (codeUnit > 0xf7) { |
|
Lasse Reichstein Nielsen
2015/10/08 10:30:36
This should also be checking for codeUnit < 0.
Fo
nweiz
2015/10/08 20:44:52
Done.
|
| + throw new FormatException( |
| + "Non-ASCII code unit " |
| + "U+${codeUnit.toRadixString(16).padLeft(4, '0')}", |
| + codeUnits, i); |
| + } |
| + |
| + buffer.add(codeUnit); |
| + continue; |
| + } |
| + |
| + i++; |
| + if (i >= end) return _lastPercent; |
| + |
| + var firstDigit = digitForCodeUnit(codeUnits, i); |
| + i++; |
| + if (i >= end) return 16 * firstDigit; |
| + |
| + var secondDigit = digitForCodeUnit(codeUnits, i); |
| + buffer.add(16 * firstDigit + secondDigit); |
| + } |
| + |
| + return null; |
| +} |