lib/src/percent/decoder.dart - Issue 1393003003: Add a percent-encoding converter.

Unified Diff: lib/src/percent/decoder.dart

Issue 1393003003: Add a percent-encoding converter. (Closed) Base URL: git@github.com:dart-lang/convert.git@master

Patch Set: Created 5 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Index: lib/src/percent/decoder.dart

diff --git a/lib/src/percent/decoder.dart b/lib/src/percent/decoder.dart

new file mode 100644

index 0000000000000000000000000000000000000000..bab945c1c84b9da0abebabce01b1182c92a039e6

--- /dev/null

+++ b/lib/src/percent/decoder.dart

@@ -0,0 +1,198 @@

+// BSD-style license that can be found in the LICENSE file.

+library convert.percent.decoder;

+import 'dart:convert';

+import 'package:charcode/ascii.dart';

+import 'package:typed_data/typed_data.dart';

+import '../utils.dart';

+/// The canonical instance of [PercentDecoder].

+const percentDecoder = const PercentDecoder._();

Lasse Reichstein Nielsen 2015/10/08 10:30:36 Consider typing the constant.

nweiz 2015/10/08 20:44:51 I really don't like doing this for final/const fie

Lasse Reichstein Nielsen 2015/10/09 09:07:13 Will the dartdoc show the correct type for the con

nweiz 2015/10/09 19:52:55 It should; if not, that's an issue with dartdoc.

+const _lastPercent = -1;

+/// A converter that decodes percentadecimal strings into byte arrays.

Lasse Reichstein Nielsen 2015/10/08 10:30:36 percentadecimal? I'm guessing "percent encoded". R

nweiz 2015/10/08 20:44:52 I started this file as a search-replace of the hex

+///

+/// To be maximally flexible, this will decode any percent-encoded byte and

+/// will allow any non-percent-encoded byte other than `%`. By default, it

+/// interprets `+` as `0x2B` rather than `0x20` as emitted by

+/// [Uri.encodeQueryComponent].

+///

+/// This will throw a [FormatException] if the input string has an incomplete

+/// percent-encoding, or if it contains non-ASCII code units.

+class PercentDecoder extends Converter<String, List<int>> {

+ const PercentDecoder._();

+ List<int> convert(String string) {

+ var buffer = new Uint8Buffer();

+ var lastDigit = _decode(string.codeUnits, 0, string.length, buffer);

+ if (lastDigit != null) {

+ throw new FormatException(

+ "Input ended with incomplete encoded byte.",

+ string, string.length);

+ }

+ return buffer.buffer.asUint8List(0, buffer.length);

+ }

+ StringConversionSink startChunkedConversion(Sink<List<int>> sink) =>

+ new _PercentDecoderSink(sink);

+/// A conversion sink for chunked percentadecimal decoding.

+class _PercentDecoderSink extends StringConversionSinkBase {

+ /// The underlying sink to which decoded byte arrays will be passed.

+ final Sink<List<int>> _sink;

+ /// The trailing digit from the previous string.

+ ///

+ /// This is `null` if the previous string ended with a complete

+ /// percent-encoded byte or a literal character. It's [_lastPercent] if the

+ /// most recent string ended with `%`. Otherwise, the most recent string ended

+ /// with a `%` followed by a hexadecimal digit, and this is that digit. Since

+ /// it's the most significant digit, it's always a multiple of 16.

+ int _lastDigit;

+ _PercentDecoderSink(this._sink);

+ void addSlice(String string, int start, int end, bool isLast) {

+ RangeError.checkValidRange(start, end, string.length);

+ if (start == end) {

+ if (isLast) close();

+ return;

+ }

+ var buffer = new Uint8Buffer();

+ var codeUnits = string.codeUnits;

+ if (_lastDigit == _lastPercent) {

+ _lastDigit = 16 * digitForCodeUnit(codeUnits, start);

+ start++;

+ if (start == end) {

+ if (isLast) close();

+ return;

+ }

+ if (_lastDigit != null) {

+ buffer.add(_lastDigit + digitForCodeUnit(codeUnits, start));

+ start++;

+ }

+ _lastDigit = _decode(codeUnits, start, end, buffer);

+ _sink.add(buffer.buffer.asUint8List(0, buffer.length));

+ if (isLast) close();

+ }

+ ByteConversionSink asUtf8Sink(bool allowMalformed) =>

+ new _PercentDecoderByteSink(_sink);

+ void close() {

+ if (_lastDigit != null) {

+ throw new FormatException("Input ended with incomplete encoded byte.");

Lasse Reichstein Nielsen 2015/10/08 10:30:36 Consider having a version of this that is passed t

nweiz 2015/10/08 20:44:51 Done. Made the same change in the hex decoder as w

+ }

+ _sink.close();

+ }

+/// A conversion sink for chunked percentadecimal decoding from UTF-8 bytes.

Lasse Reichstein Nielsen 2015/10/08 10:30:36 percentadecimal :)

nweiz 2015/10/08 20:44:51 Done.

+class _PercentDecoderByteSink extends ByteConversionSinkBase {

+ /// The underlying sink to which decoded byte arrays will be passed.

+ final Sink<List<int>> _sink;

+ /// The trailing digit from the previous string.

+ ///

+ /// This is `null` if the previous string ended with a complete

+ /// percent-encoded byte or a literal character. It's [_lastPercent] if the

+ /// most recent string ended with `%`. Otherwise, the most recent string ended

+ /// with a `%` followed by a hexadecimal digit, and this is that digit. Since

+ /// it's the most significant digit, it's always a multiple of 16.

+ int _lastDigit;

+ _PercentDecoderByteSink(this._sink);

+ void add(List<int> chunk) => addSlice(chunk, 0, chunk.length, false);

+ void addSlice(List<int> chunk, int start, int end, bool isLast) {

+ RangeError.checkValidRange(start, end, chunk.length);

+ if (start == end) {

+ if (isLast) close();

+ return;

+ }

+ var buffer = new Uint8Buffer();

+ if (_lastDigit == _lastPercent) {

+ _lastDigit = 16 * digitForCodeUnit(chunk, start);

+ start++;

+ if (start == end) {

+ if (isLast) close();

+ return;

+ }

+ if (_lastDigit != null) {

+ buffer.add(_lastDigit + digitForCodeUnit(chunk, start));

+ start++;

+ }

+ _lastDigit = _decode(chunk, start, end, buffer);

+ _sink.add(buffer.buffer.asUint8List(0, buffer.length));

+ if (isLast) close();

+ }

+ void close() {

+ if (_lastDigit != null) {

+ throw new FormatException("Input ended with incomplete encoded byte.");

+ }

+ _sink.close();

+ }

+/// Decodes [codeUnits] and writes the result into [destination].

+///

+/// This reads from [codeUnits] between [sourceStart] and [sourceEnd]. It writes

+/// the result into [destination] starting at [destinationStart].

+///

+/// If there's a leftover digit at the end of the decoding, this returns that

+/// digit. Otherwise it returns `null`.

+int _decode(List<int> codeUnits, int start, int end, Uint8Buffer buffer) {

+ for (var i = start; i < end; i++) {

+ var codeUnit = codeUnits[i];

+ if (codeUnit != $percent) {

+ if (codeUnit > 0xf7) {

Lasse Reichstein Nielsen 2015/10/08 10:30:36 This should also be checking for codeUnit < 0. Fo

nweiz 2015/10/08 20:44:52 Done.

+ throw new FormatException(

+ "Non-ASCII code unit "

+ "U+${codeUnit.toRadixString(16).padLeft(4, '0')}",

+ codeUnits, i);

+ }

+ buffer.add(codeUnit);

+ continue;

+ }

+ i++;

+ if (i >= end) return _lastPercent;

+ var firstDigit = digitForCodeUnit(codeUnits, i);

+ i++;

+ if (i >= end) return 16 * firstDigit;

+ var secondDigit = digitForCodeUnit(codeUnits, i);

+ buffer.add(16 * firstDigit + secondDigit);

+ }

+ return null;

« lib/src/percent.dart ('K') | « lib/src/percent.dart ('k') | lib/src/percent/encoder.dart » ('j') | lib/src/percent/encoder.dart » ('J')