Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(515)

Unified Diff: lib/src/percent/decoder.dart

Issue 1393003003: Add a percent-encoding converter. (Closed) Base URL: git@github.com:dart-lang/convert.git@master
Patch Set: Code review changes Created 5 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « lib/src/percent.dart ('k') | lib/src/percent/encoder.dart » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: lib/src/percent/decoder.dart
diff --git a/lib/src/percent/decoder.dart b/lib/src/percent/decoder.dart
new file mode 100644
index 0000000000000000000000000000000000000000..07406361680363ff1a7f197f2831883dc1bc73cf
--- /dev/null
+++ b/lib/src/percent/decoder.dart
@@ -0,0 +1,244 @@
+// Copyright (c) 2015, the Dart project authors. Please see the AUTHORS file
+// for details. All rights reserved. Use of this source code is governed by a
+// BSD-style license that can be found in the LICENSE file.
+
+library convert.percent.decoder;
+
+import 'dart:convert';
+
+import 'package:charcode/ascii.dart';
+import 'package:typed_data/typed_data.dart';
+
+import '../utils.dart';
+
+/// The canonical instance of [PercentDecoder].
+const percentDecoder = const PercentDecoder._();
+
+const _lastPercent = -1;
+
+/// A converter that decodes percent-encoded strings into byte arrays.
+///
+/// To be maximally flexible, this will decode any percent-encoded byte and
+/// will allow any non-percent-encoded byte other than `%`. By default, it
+/// interprets `+` as `0x2B` rather than `0x20` as emitted by
+/// [Uri.encodeQueryComponent].
+///
+/// This will throw a [FormatException] if the input string has an incomplete
+/// percent-encoding, or if it contains non-ASCII code units.
+class PercentDecoder extends Converter<String, List<int>> {
+ const PercentDecoder._();
+
+ List<int> convert(String string) {
+ var buffer = new Uint8Buffer();
+ var lastDigit = _decode(string.codeUnits, 0, string.length, buffer);
+
+ if (lastDigit != null) {
+ throw new FormatException(
+ "Input ended with incomplete encoded byte.",
+ string, string.length);
+ }
+
+ return buffer.buffer.asUint8List(0, buffer.length);
+ }
+
+ StringConversionSink startChunkedConversion(Sink<List<int>> sink) =>
+ new _PercentDecoderSink(sink);
+}
+
+/// A conversion sink for chunked percent-encoded decoding.
+class _PercentDecoderSink extends StringConversionSinkBase {
+ /// The underlying sink to which decoded byte arrays will be passed.
+ final Sink<List<int>> _sink;
+
+ /// The trailing digit from the previous string.
+ ///
+ /// This is `null` if the previous string ended with a complete
+ /// percent-encoded byte or a literal character. It's [_lastPercent] if the
+ /// most recent string ended with `%`. Otherwise, the most recent string ended
+ /// with a `%` followed by a hexadecimal digit, and this is that digit. Since
+ /// it's the most significant digit, it's always a multiple of 16.
+ int _lastDigit;
+
+ _PercentDecoderSink(this._sink);
+
+ void addSlice(String string, int start, int end, bool isLast) {
+ RangeError.checkValidRange(start, end, string.length);
+
+ if (start == end) {
+ if (isLast) _close(string, end);
+ return;
+ }
+
+ var buffer = new Uint8Buffer();
+ var codeUnits = string.codeUnits;
+ if (_lastDigit == _lastPercent) {
+ _lastDigit = 16 * digitForCodeUnit(codeUnits, start);
+ start++;
+
+ if (start == end) {
+ if (isLast) _close(string, end);
+ return;
+ }
+ }
+
+ if (_lastDigit != null) {
+ buffer.add(_lastDigit + digitForCodeUnit(codeUnits, start));
+ start++;
+ }
+
+ _lastDigit = _decode(codeUnits, start, end, buffer);
+
+ _sink.add(buffer.buffer.asUint8List(0, buffer.length));
+ if (isLast) _close(string, end);
+ }
+
+ ByteConversionSink asUtf8Sink(bool allowMalformed) =>
+ new _PercentDecoderByteSink(_sink);
+
+ void close() => _close();
+
+ /// Like [close], but includes [string] and [index] in the [FormatException]
+ /// if one is thrown.
+ void _close([String string, int index]) {
+ if (_lastDigit != null) {
+ throw new FormatException(
+ "Input ended with incomplete encoded byte.", string, index);
+ }
+
+ _sink.close();
+ }
+}
+
+/// A conversion sink for chunked percent-encoded decoding from UTF-8 bytes.
+class _PercentDecoderByteSink extends ByteConversionSinkBase {
+ /// The underlying sink to which decoded byte arrays will be passed.
+ final Sink<List<int>> _sink;
+
+ /// The trailing digit from the previous string.
+ ///
+ /// This is `null` if the previous string ended with a complete
+ /// percent-encoded byte or a literal character. It's [_lastPercent] if the
+ /// most recent string ended with `%`. Otherwise, the most recent string ended
+ /// with a `%` followed by a hexadecimal digit, and this is that digit. Since
+ /// it's the most significant digit, it's always a multiple of 16.
+ int _lastDigit;
+
+ _PercentDecoderByteSink(this._sink);
+
+ void add(List<int> chunk) => addSlice(chunk, 0, chunk.length, false);
+
+ void addSlice(List<int> chunk, int start, int end, bool isLast) {
+ RangeError.checkValidRange(start, end, chunk.length);
+
+ if (start == end) {
+ if (isLast) _close(chunk, end);
+ return;
+ }
+
+ var buffer = new Uint8Buffer();
+ if (_lastDigit == _lastPercent) {
+ _lastDigit = 16 * digitForCodeUnit(chunk, start);
+ start++;
+
+ if (start == end) {
+ if (isLast) _close(chunk, end);
+ return;
+ }
+ }
+
+ if (_lastDigit != null) {
+ buffer.add(_lastDigit + digitForCodeUnit(chunk, start));
+ start++;
+ }
+
+ _lastDigit = _decode(chunk, start, end, buffer);
+
+ _sink.add(buffer.buffer.asUint8List(0, buffer.length));
+ if (isLast) _close(chunk, end);
+ }
+
+ void close() => _close();
+
+ /// Like [close], but includes [chunk] and [index] in the [FormatException]
+ /// if one is thrown.
+ void _close([List<int> chunk, int index]) {
+ if (_lastDigit != null) {
+ throw new FormatException(
+ "Input ended with incomplete encoded byte.", chunk, index);
+ }
+
+ _sink.close();
+ }
+}
+
+/// Decodes [codeUnits] and writes the result into [destination].
+///
+/// This reads from [codeUnits] between [sourceStart] and [sourceEnd]. It writes
+/// the result into [destination] starting at [destinationStart].
+///
+/// If there's a leftover digit at the end of the decoding, this returns that
+/// digit. Otherwise it returns `null`.
+int _decode(List<int> codeUnits, int start, int end, Uint8Buffer buffer) {
+ // A bitwise OR of all code units in [codeUnits]. This allows us to check for
+ // out-of-range code units without adding more branches than necessary to the
+ // core loop.
+ var codeUnitOr = 0;
+
+ // The beginning of the current slice of adjacent non-% characters. We can add
+ // all of these to the buffer at once.
+ var sliceStart = start;
+ for (var i = start; i < end; i++) {
+ // First, loop through non-% characters.
+ var codeUnit = codeUnits[i];
+ if (codeUnits[i] != $percent) {
+ codeUnitOr |= codeUnit;
+ continue;
+ }
+
+ // We found a %. The slice from `sliceStart` to `i` represents characters
+ // than can be copied to the buffer as-is.
+ if (i > sliceStart) {
+ _checkForInvalidCodeUnit(codeUnitOr, codeUnits, sliceStart, i);
+ buffer.addAll(codeUnits.getRange(sliceStart, i));
+ }
+
+ // Now decode the percent-encoded byte and add it as well.
+ i++;
+ if (i >= end) return _lastPercent;
+
+ var firstDigit = digitForCodeUnit(codeUnits, i);
+ i++;
+ if (i >= end) return 16 * firstDigit;
+
+ var secondDigit = digitForCodeUnit(codeUnits, i);
+ buffer.add(16 * firstDigit + secondDigit);
+
+ // The next iteration will look for non-% characters again.
+ sliceStart = i + 1;
+ }
+
+ if (end > sliceStart) {
+ _checkForInvalidCodeUnit(codeUnitOr, codeUnits, sliceStart, end);
+ if (start == sliceStart) {
+ buffer.addAll(codeUnits);
+ } else {
+ buffer.addAll(codeUnits.getRange(sliceStart, end));
+ }
+ }
+
+ return null;
+}
+
+void _checkForInvalidCodeUnit(int codeUnitOr, List<int> codeUnits, int start,
+ int end) {
+ if (codeUnitOr >= 0 && codeUnitOr <= 0x7f) return;
+
+ for (var i = start; i < end; i++) {
+ var codeUnit = codeUnits[i];
+ if (codeUnit >= 0 && codeUnit <= 0x7f) continue;
+ throw new FormatException(
+ "Non-ASCII code unit "
+ "U+${codeUnit.toRadixString(16).padLeft(4, '0')}",
+ codeUnits, i);
+ }
+}
« no previous file with comments | « lib/src/percent.dart ('k') | lib/src/percent/encoder.dart » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698