lib/src/percent/decoder.dart - Issue 1393003003: Add a percent-encoding converter.

Side by Side Diff: lib/src/percent/decoder.dart

Issue 1393003003: Add a percent-encoding converter. (Closed) Base URL: git@github.com:dart-lang/convert.git@master

Patch Set: Created 5 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
(Empty)
	1 // Copyright (c) 2015, the Dart project authors. Please see the AUTHORS file

	2 // for details. All rights reserved. Use of this source code is governed by a

	3 // BSD-style license that can be found in the LICENSE file.

	4

	5 library convert.percent.decoder;

	6

	7 import 'dart:convert';

	8

	9 import 'package:charcode/ascii.dart';

	10 import 'package:typed_data/typed_data.dart';

	11

	12 import '../utils.dart';

	13

	14 /// The canonical instance of [PercentDecoder].

	15 const percentDecoder = const PercentDecoder._();
	Lasse Reichstein Nielsen 2015/10/08 10:30:36 Consider typing the constant. Consider typing the constant. nweiz 2015/10/08 20:44:51 I really don't like doing this for final/const fie Show quoted text On 2015/10/08 10:30:36, Lasse Reichstein Nielsen wrote: > Consider typing the constant. I really don't like doing this for final/const fields where the RHS is exactly the correct type already. Lasse Reichstein Nielsen 2015/10/09 09:07:13 Will the dartdoc show the correct type for the con Will the dartdoc show the correct type for the constant if you don't? nweiz 2015/10/09 19:52:55 It should; if not, that's an issue with dartdoc. Show quoted text On 2015/10/09 09:07:13, Lasse Reichstein Nielsen wrote: > Will the dartdoc show the correct type for the constant if you don't? It should; if not, that's an issue with dartdoc. In this case in particular, this isn't exposed publicly, so that's a non-issue.
	16

	17 const _lastPercent = -1;

	18

	19 /// A converter that decodes percentadecimal strings into byte arrays.
	Lasse Reichstein Nielsen 2015/10/08 10:30:36 percentadecimal? I'm guessing "percent encoded". R percentadecimal? I'm guessing "percent encoded". Replace everywhere it occurs (most other places just with "percent"). nweiz 2015/10/08 20:44:52 I started this file as a search-replace of the hex Show quoted text On 2015/10/08 10:30:36, Lasse Reichstein Nielsen wrote: > percentadecimal? I'm guessing "percent encoded". > Replace everywhere it occurs (most other places just with "percent"). I started this file as a search-replace of the hex decoder, but I thought I caught all these :p.
	20 ///

	21 /// To be maximally flexible, this will decode any percent-encoded byte and

	22 /// will allow any non-percent-encoded byte other than `%`. By default, it

	23 /// interprets `+` as `0x2B` rather than `0x20` as emitted by

	24 /// [Uri.encodeQueryComponent].

	25 ///

	26 /// This will throw a [FormatException] if the input string has an incomplete

	27 /// percent-encoding, or if it contains non-ASCII code units.

	28 class PercentDecoder extends Converter<String, List<int>> {

	29 const PercentDecoder._();

	30

	31 List<int> convert(String string) {

	32 var buffer = new Uint8Buffer();

	33 var lastDigit = _decode(string.codeUnits, 0, string.length, buffer);

	34

	35 if (lastDigit != null) {

	36 throw new FormatException(

	37 "Input ended with incomplete encoded byte.",

	38 string, string.length);

	39 }

	40

	41 return buffer.buffer.asUint8List(0, buffer.length);

	42 }

	43

	44 StringConversionSink startChunkedConversion(Sink<List<int>> sink) =>

	45 new _PercentDecoderSink(sink);

	46 }

	47

	48 /// A conversion sink for chunked percentadecimal decoding.

	49 class _PercentDecoderSink extends StringConversionSinkBase {

	50 /// The underlying sink to which decoded byte arrays will be passed.

	51 final Sink<List<int>> _sink;

	52

	53 /// The trailing digit from the previous string.

	54 ///

	55 /// This is `null` if the previous string ended with a complete

	56 /// percent-encoded byte or a literal character. It's [_lastPercent] if the

	57 /// most recent string ended with `%`. Otherwise, the most recent string ended

	58 /// with a `%` followed by a hexadecimal digit, and this is that digit. Since

	59 /// it's the most significant digit, it's always a multiple of 16.

	60 int _lastDigit;

	61

	62 _PercentDecoderSink(this._sink);

	63

	64 void addSlice(String string, int start, int end, bool isLast) {

	65 RangeError.checkValidRange(start, end, string.length);

	66

	67 if (start == end) {

	68 if (isLast) close();

	69 return;

	70 }

	71

	72 var buffer = new Uint8Buffer();

	73 var codeUnits = string.codeUnits;

	74 if (_lastDigit == _lastPercent) {

	75 _lastDigit = 16 * digitForCodeUnit(codeUnits, start);

	76 start++;

	77

	78 if (start == end) {

	79 if (isLast) close();

	80 return;

	81 }

	82 }

	83

	84 if (_lastDigit != null) {

	85 buffer.add(_lastDigit + digitForCodeUnit(codeUnits, start));

	86 start++;

	87 }

	88

	89 _lastDigit = _decode(codeUnits, start, end, buffer);

	90

	91 _sink.add(buffer.buffer.asUint8List(0, buffer.length));

	92 if (isLast) close();

	93 }

	94

	95 ByteConversionSink asUtf8Sink(bool allowMalformed) =>

	96 new _PercentDecoderByteSink(_sink);

	97

	98 void close() {

	99 if (_lastDigit != null) {

	100 throw new FormatException("Input ended with incomplete encoded byte.");
	Lasse Reichstein Nielsen 2015/10/08 10:30:36 Consider having a version of this that is passed t Consider having a version of this that is passed the input and index. So: void close() { _close(null, null); } void _close(input, index) { if (_lastDigit != null) { throw new FormatException("...", input. index); } _sink.close(); } nweiz 2015/10/08 20:44:51 Done. Made the same change in the hex decoder as w Show quoted text On 2015/10/08 10:30:36, Lasse Reichstein Nielsen wrote: > Consider having a version of this that is passed the input and index. > > So: > void close() { _close(null, null); } > void _close(input, index) { > if (_lastDigit != null) { > throw new FormatException("...", input. index); > } > _sink.close(); > } Done. Made the same change in the hex decoder as well.
	101 }

	102

	103 _sink.close();

	104 }

	105 }

	106

	107 /// A conversion sink for chunked percentadecimal decoding from UTF-8 bytes.
	Lasse Reichstein Nielsen 2015/10/08 10:30:36 percentadecimal :) percentadecimal :) nweiz 2015/10/08 20:44:51 Done. Show quoted text On 2015/10/08 10:30:36, Lasse Reichstein Nielsen wrote: > percentadecimal :) Done.
	108 class _PercentDecoderByteSink extends ByteConversionSinkBase {

	109 /// The underlying sink to which decoded byte arrays will be passed.

	110 final Sink<List<int>> _sink;

	111

	112 /// The trailing digit from the previous string.

	113 ///

	114 /// This is `null` if the previous string ended with a complete

	115 /// percent-encoded byte or a literal character. It's [_lastPercent] if the

	116 /// most recent string ended with `%`. Otherwise, the most recent string ended

	117 /// with a `%` followed by a hexadecimal digit, and this is that digit. Since

	118 /// it's the most significant digit, it's always a multiple of 16.

	119 int _lastDigit;

	120

	121 _PercentDecoderByteSink(this._sink);

	122

	123 void add(List<int> chunk) => addSlice(chunk, 0, chunk.length, false);

	124

	125 void addSlice(List<int> chunk, int start, int end, bool isLast) {

	126 RangeError.checkValidRange(start, end, chunk.length);

	127

	128 if (start == end) {

	129 if (isLast) close();

	130 return;

	131 }

	132

	133 var buffer = new Uint8Buffer();

	134 if (_lastDigit == _lastPercent) {

	135 _lastDigit = 16 * digitForCodeUnit(chunk, start);

	136 start++;

	137

	138 if (start == end) {

	139 if (isLast) close();

	140 return;

	141 }

	142 }

	143

	144 if (_lastDigit != null) {

	145 buffer.add(_lastDigit + digitForCodeUnit(chunk, start));

	146 start++;

	147 }

	148

	149 _lastDigit = _decode(chunk, start, end, buffer);

	150

	151 _sink.add(buffer.buffer.asUint8List(0, buffer.length));

	152 if (isLast) close();

	153 }

	154

	155 void close() {

	156 if (_lastDigit != null) {

	157 throw new FormatException("Input ended with incomplete encoded byte.");

	158 }

	159

	160 _sink.close();

	161 }

	162 }

	163

	164 /// Decodes [codeUnits] and writes the result into [destination].

	165 ///

	166 /// This reads from [codeUnits] between [sourceStart] and [sourceEnd]. It writes

	167 /// the result into [destination] starting at [destinationStart].

	168 ///

	169 /// If there's a leftover digit at the end of the decoding, this returns that

	170 /// digit. Otherwise it returns `null`.

	171 int _decode(List<int> codeUnits, int start, int end, Uint8Buffer buffer) {

	172 for (var i = start; i < end; i++) {

	173 var codeUnit = codeUnits[i];

	174 if (codeUnit != $percent) {

	175 if (codeUnit > 0xf7) {
	Lasse Reichstein Nielsen 2015/10/08 10:30:36 This should also be checking for codeUnit < 0. Fo This should also be checking for codeUnit < 0. For optimizing, I'd just collect the bitwise-or of all the codeUnits and then check afterwards if any of them were bad. Heck, even wait with adding the code units to the buffer, just loop searching for the first '%', and don't do anything until it's found. If there are none, you can just add the entire input to the buffer. var codeUnitOr = 0; int sliceStart = start; for (var i = start; i < end; i++) { var codeUnit = codeUnits[i]; if (codeUnit != $percent) { codeUnitOr \|= codeUnit; continue; } if (codeUnitOr < 0 \|\| codeUnitOr > 127) { // find error and throw. } if (i > sliceStart) { buffer.addAll(codeUnits.getRange(sliceStart, i)); } // handle percent like now. sliceStart = i + 1; } if (codeUnitOr < 0 \|\| codeUnitOr > 127) { // find error and throw. } if (end > sliceStart) { if (start == sliceStart) { buffer.addAll(codeUnits); } else { buffer.addAll(codeUnits.getRange(sliceStart, end)); } } nweiz 2015/10/08 20:44:52 Done. Show quoted text On 2015/10/08 10:30:36, Lasse Reichstein Nielsen wrote: > This should also be checking for codeUnit < 0. > > For optimizing, I'd just collect the bitwise-or of all the codeUnits and then > check afterwards if any of them were bad. Heck, even wait with adding the code > units to the buffer, just loop searching for the first '%', and don't do > anything until it's found. If there are none, you can just add the entire input > to the buffer. > > var codeUnitOr = 0; > int sliceStart = start; > for (var i = start; i < end; i++) { > var codeUnit = codeUnits[i]; > if (codeUnit != $percent) { > codeUnitOr \|= codeUnit; > continue; > } > if (codeUnitOr < 0 \|\| codeUnitOr > 127) { > // find error and throw. > } > if (i > sliceStart) { > buffer.addAll(codeUnits.getRange(sliceStart, i)); > } > // handle percent like now. > > sliceStart = i + 1; > } > > if (codeUnitOr < 0 \|\| codeUnitOr > 127) { > // find error and throw. > } > if (end > sliceStart) { > if (start == sliceStart) { > buffer.addAll(codeUnits); > } else { > buffer.addAll(codeUnits.getRange(sliceStart, end)); > } > } Done.
	176 throw new FormatException(

	177 "Non-ASCII code unit "

	178 "U+${codeUnit.toRadixString(16).padLeft(4, '0')}",

	179 codeUnits, i);

	180 }

	181

	182 buffer.add(codeUnit);

	183 continue;

	184 }

	185

	186 i++;

	187 if (i >= end) return _lastPercent;

	188

	189 var firstDigit = digitForCodeUnit(codeUnits, i);

	190 i++;

	191 if (i >= end) return 16 * firstDigit;

	192

	193 var secondDigit = digitForCodeUnit(codeUnits, i);

	194 buffer.add(16 * firstDigit + secondDigit);

	195 }

	196

	197 return null;

	198 }

OLD	NEW

« lib/src/percent.dart ('K') | « lib/src/percent.dart ('k') | lib/src/percent/encoder.dart » ('j') | lib/src/percent/encoder.dart » ('J')