Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(228)

Unified Diff: lib/src/percent/decoder.dart

Issue 1393003003: Add a percent-encoding converter. (Closed) Base URL: git@github.com:dart-lang/convert.git@master
Patch Set: Created 5 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: lib/src/percent/decoder.dart
diff --git a/lib/src/percent/decoder.dart b/lib/src/percent/decoder.dart
new file mode 100644
index 0000000000000000000000000000000000000000..bab945c1c84b9da0abebabce01b1182c92a039e6
--- /dev/null
+++ b/lib/src/percent/decoder.dart
@@ -0,0 +1,198 @@
+// Copyright (c) 2015, the Dart project authors. Please see the AUTHORS file
+// for details. All rights reserved. Use of this source code is governed by a
+// BSD-style license that can be found in the LICENSE file.
+
+library convert.percent.decoder;
+
+import 'dart:convert';
+
+import 'package:charcode/ascii.dart';
+import 'package:typed_data/typed_data.dart';
+
+import '../utils.dart';
+
+/// The canonical instance of [PercentDecoder].
+const percentDecoder = const PercentDecoder._();
Lasse Reichstein Nielsen 2015/10/08 10:30:36 Consider typing the constant.
nweiz 2015/10/08 20:44:51 I really don't like doing this for final/const fie
Lasse Reichstein Nielsen 2015/10/09 09:07:13 Will the dartdoc show the correct type for the con
nweiz 2015/10/09 19:52:55 It should; if not, that's an issue with dartdoc.
+
+const _lastPercent = -1;
+
+/// A converter that decodes percentadecimal strings into byte arrays.
Lasse Reichstein Nielsen 2015/10/08 10:30:36 percentadecimal? I'm guessing "percent encoded". R
nweiz 2015/10/08 20:44:52 I started this file as a search-replace of the hex
+///
+/// To be maximally flexible, this will decode any percent-encoded byte and
+/// will allow any non-percent-encoded byte other than `%`. By default, it
+/// interprets `+` as `0x2B` rather than `0x20` as emitted by
+/// [Uri.encodeQueryComponent].
+///
+/// This will throw a [FormatException] if the input string has an incomplete
+/// percent-encoding, or if it contains non-ASCII code units.
+class PercentDecoder extends Converter<String, List<int>> {
+ const PercentDecoder._();
+
+ List<int> convert(String string) {
+ var buffer = new Uint8Buffer();
+ var lastDigit = _decode(string.codeUnits, 0, string.length, buffer);
+
+ if (lastDigit != null) {
+ throw new FormatException(
+ "Input ended with incomplete encoded byte.",
+ string, string.length);
+ }
+
+ return buffer.buffer.asUint8List(0, buffer.length);
+ }
+
+ StringConversionSink startChunkedConversion(Sink<List<int>> sink) =>
+ new _PercentDecoderSink(sink);
+}
+
+/// A conversion sink for chunked percentadecimal decoding.
+class _PercentDecoderSink extends StringConversionSinkBase {
+ /// The underlying sink to which decoded byte arrays will be passed.
+ final Sink<List<int>> _sink;
+
+ /// The trailing digit from the previous string.
+ ///
+ /// This is `null` if the previous string ended with a complete
+ /// percent-encoded byte or a literal character. It's [_lastPercent] if the
+ /// most recent string ended with `%`. Otherwise, the most recent string ended
+ /// with a `%` followed by a hexadecimal digit, and this is that digit. Since
+ /// it's the most significant digit, it's always a multiple of 16.
+ int _lastDigit;
+
+ _PercentDecoderSink(this._sink);
+
+ void addSlice(String string, int start, int end, bool isLast) {
+ RangeError.checkValidRange(start, end, string.length);
+
+ if (start == end) {
+ if (isLast) close();
+ return;
+ }
+
+ var buffer = new Uint8Buffer();
+ var codeUnits = string.codeUnits;
+ if (_lastDigit == _lastPercent) {
+ _lastDigit = 16 * digitForCodeUnit(codeUnits, start);
+ start++;
+
+ if (start == end) {
+ if (isLast) close();
+ return;
+ }
+ }
+
+ if (_lastDigit != null) {
+ buffer.add(_lastDigit + digitForCodeUnit(codeUnits, start));
+ start++;
+ }
+
+ _lastDigit = _decode(codeUnits, start, end, buffer);
+
+ _sink.add(buffer.buffer.asUint8List(0, buffer.length));
+ if (isLast) close();
+ }
+
+ ByteConversionSink asUtf8Sink(bool allowMalformed) =>
+ new _PercentDecoderByteSink(_sink);
+
+ void close() {
+ if (_lastDigit != null) {
+ throw new FormatException("Input ended with incomplete encoded byte.");
Lasse Reichstein Nielsen 2015/10/08 10:30:36 Consider having a version of this that is passed t
nweiz 2015/10/08 20:44:51 Done. Made the same change in the hex decoder as w
+ }
+
+ _sink.close();
+ }
+}
+
+/// A conversion sink for chunked percentadecimal decoding from UTF-8 bytes.
Lasse Reichstein Nielsen 2015/10/08 10:30:36 percentadecimal :)
nweiz 2015/10/08 20:44:51 Done.
+class _PercentDecoderByteSink extends ByteConversionSinkBase {
+ /// The underlying sink to which decoded byte arrays will be passed.
+ final Sink<List<int>> _sink;
+
+ /// The trailing digit from the previous string.
+ ///
+ /// This is `null` if the previous string ended with a complete
+ /// percent-encoded byte or a literal character. It's [_lastPercent] if the
+ /// most recent string ended with `%`. Otherwise, the most recent string ended
+ /// with a `%` followed by a hexadecimal digit, and this is that digit. Since
+ /// it's the most significant digit, it's always a multiple of 16.
+ int _lastDigit;
+
+ _PercentDecoderByteSink(this._sink);
+
+ void add(List<int> chunk) => addSlice(chunk, 0, chunk.length, false);
+
+ void addSlice(List<int> chunk, int start, int end, bool isLast) {
+ RangeError.checkValidRange(start, end, chunk.length);
+
+ if (start == end) {
+ if (isLast) close();
+ return;
+ }
+
+ var buffer = new Uint8Buffer();
+ if (_lastDigit == _lastPercent) {
+ _lastDigit = 16 * digitForCodeUnit(chunk, start);
+ start++;
+
+ if (start == end) {
+ if (isLast) close();
+ return;
+ }
+ }
+
+ if (_lastDigit != null) {
+ buffer.add(_lastDigit + digitForCodeUnit(chunk, start));
+ start++;
+ }
+
+ _lastDigit = _decode(chunk, start, end, buffer);
+
+ _sink.add(buffer.buffer.asUint8List(0, buffer.length));
+ if (isLast) close();
+ }
+
+ void close() {
+ if (_lastDigit != null) {
+ throw new FormatException("Input ended with incomplete encoded byte.");
+ }
+
+ _sink.close();
+ }
+}
+
+/// Decodes [codeUnits] and writes the result into [destination].
+///
+/// This reads from [codeUnits] between [sourceStart] and [sourceEnd]. It writes
+/// the result into [destination] starting at [destinationStart].
+///
+/// If there's a leftover digit at the end of the decoding, this returns that
+/// digit. Otherwise it returns `null`.
+int _decode(List<int> codeUnits, int start, int end, Uint8Buffer buffer) {
+ for (var i = start; i < end; i++) {
+ var codeUnit = codeUnits[i];
+ if (codeUnit != $percent) {
+ if (codeUnit > 0xf7) {
Lasse Reichstein Nielsen 2015/10/08 10:30:36 This should also be checking for codeUnit < 0. Fo
nweiz 2015/10/08 20:44:52 Done.
+ throw new FormatException(
+ "Non-ASCII code unit "
+ "U+${codeUnit.toRadixString(16).padLeft(4, '0')}",
+ codeUnits, i);
+ }
+
+ buffer.add(codeUnit);
+ continue;
+ }
+
+ i++;
+ if (i >= end) return _lastPercent;
+
+ var firstDigit = digitForCodeUnit(codeUnits, i);
+ i++;
+ if (i >= end) return 16 * firstDigit;
+
+ var secondDigit = digitForCodeUnit(codeUnits, i);
+ buffer.add(16 * firstDigit + secondDigit);
+ }
+
+ return null;
+}

Powered by Google App Engine
This is Rietveld 408576698