Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(842)

Side by Side Diff: lib/src/percent/decoder.dart

Issue 1393003003: Add a percent-encoding converter. (Closed) Base URL: git@github.com:dart-lang/convert.git@master
Patch Set: Code review changes Created 5 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « lib/src/percent.dart ('k') | lib/src/percent/encoder.dart » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 // Copyright (c) 2015, the Dart project authors. Please see the AUTHORS file
2 // for details. All rights reserved. Use of this source code is governed by a
3 // BSD-style license that can be found in the LICENSE file.
4
5 library convert.percent.decoder;
6
7 import 'dart:convert';
8
9 import 'package:charcode/ascii.dart';
10 import 'package:typed_data/typed_data.dart';
11
12 import '../utils.dart';
13
14 /// The canonical instance of [PercentDecoder].
15 const percentDecoder = const PercentDecoder._();
16
17 const _lastPercent = -1;
18
19 /// A converter that decodes percent-encoded strings into byte arrays.
20 ///
21 /// To be maximally flexible, this will decode any percent-encoded byte and
22 /// will allow any non-percent-encoded byte other than `%`. By default, it
23 /// interprets `+` as `0x2B` rather than `0x20` as emitted by
24 /// [Uri.encodeQueryComponent].
25 ///
26 /// This will throw a [FormatException] if the input string has an incomplete
27 /// percent-encoding, or if it contains non-ASCII code units.
28 class PercentDecoder extends Converter<String, List<int>> {
29 const PercentDecoder._();
30
31 List<int> convert(String string) {
32 var buffer = new Uint8Buffer();
33 var lastDigit = _decode(string.codeUnits, 0, string.length, buffer);
34
35 if (lastDigit != null) {
36 throw new FormatException(
37 "Input ended with incomplete encoded byte.",
38 string, string.length);
39 }
40
41 return buffer.buffer.asUint8List(0, buffer.length);
42 }
43
44 StringConversionSink startChunkedConversion(Sink<List<int>> sink) =>
45 new _PercentDecoderSink(sink);
46 }
47
48 /// A conversion sink for chunked percent-encoded decoding.
49 class _PercentDecoderSink extends StringConversionSinkBase {
50 /// The underlying sink to which decoded byte arrays will be passed.
51 final Sink<List<int>> _sink;
52
53 /// The trailing digit from the previous string.
54 ///
55 /// This is `null` if the previous string ended with a complete
56 /// percent-encoded byte or a literal character. It's [_lastPercent] if the
57 /// most recent string ended with `%`. Otherwise, the most recent string ended
58 /// with a `%` followed by a hexadecimal digit, and this is that digit. Since
59 /// it's the most significant digit, it's always a multiple of 16.
60 int _lastDigit;
61
62 _PercentDecoderSink(this._sink);
63
64 void addSlice(String string, int start, int end, bool isLast) {
65 RangeError.checkValidRange(start, end, string.length);
66
67 if (start == end) {
68 if (isLast) _close(string, end);
69 return;
70 }
71
72 var buffer = new Uint8Buffer();
73 var codeUnits = string.codeUnits;
74 if (_lastDigit == _lastPercent) {
75 _lastDigit = 16 * digitForCodeUnit(codeUnits, start);
76 start++;
77
78 if (start == end) {
79 if (isLast) _close(string, end);
80 return;
81 }
82 }
83
84 if (_lastDigit != null) {
85 buffer.add(_lastDigit + digitForCodeUnit(codeUnits, start));
86 start++;
87 }
88
89 _lastDigit = _decode(codeUnits, start, end, buffer);
90
91 _sink.add(buffer.buffer.asUint8List(0, buffer.length));
92 if (isLast) _close(string, end);
93 }
94
95 ByteConversionSink asUtf8Sink(bool allowMalformed) =>
96 new _PercentDecoderByteSink(_sink);
97
98 void close() => _close();
99
100 /// Like [close], but includes [string] and [index] in the [FormatException]
101 /// if one is thrown.
102 void _close([String string, int index]) {
103 if (_lastDigit != null) {
104 throw new FormatException(
105 "Input ended with incomplete encoded byte.", string, index);
106 }
107
108 _sink.close();
109 }
110 }
111
112 /// A conversion sink for chunked percent-encoded decoding from UTF-8 bytes.
113 class _PercentDecoderByteSink extends ByteConversionSinkBase {
114 /// The underlying sink to which decoded byte arrays will be passed.
115 final Sink<List<int>> _sink;
116
117 /// The trailing digit from the previous string.
118 ///
119 /// This is `null` if the previous string ended with a complete
120 /// percent-encoded byte or a literal character. It's [_lastPercent] if the
121 /// most recent string ended with `%`. Otherwise, the most recent string ended
122 /// with a `%` followed by a hexadecimal digit, and this is that digit. Since
123 /// it's the most significant digit, it's always a multiple of 16.
124 int _lastDigit;
125
126 _PercentDecoderByteSink(this._sink);
127
128 void add(List<int> chunk) => addSlice(chunk, 0, chunk.length, false);
129
130 void addSlice(List<int> chunk, int start, int end, bool isLast) {
131 RangeError.checkValidRange(start, end, chunk.length);
132
133 if (start == end) {
134 if (isLast) _close(chunk, end);
135 return;
136 }
137
138 var buffer = new Uint8Buffer();
139 if (_lastDigit == _lastPercent) {
140 _lastDigit = 16 * digitForCodeUnit(chunk, start);
141 start++;
142
143 if (start == end) {
144 if (isLast) _close(chunk, end);
145 return;
146 }
147 }
148
149 if (_lastDigit != null) {
150 buffer.add(_lastDigit + digitForCodeUnit(chunk, start));
151 start++;
152 }
153
154 _lastDigit = _decode(chunk, start, end, buffer);
155
156 _sink.add(buffer.buffer.asUint8List(0, buffer.length));
157 if (isLast) _close(chunk, end);
158 }
159
160 void close() => _close();
161
162 /// Like [close], but includes [chunk] and [index] in the [FormatException]
163 /// if one is thrown.
164 void _close([List<int> chunk, int index]) {
165 if (_lastDigit != null) {
166 throw new FormatException(
167 "Input ended with incomplete encoded byte.", chunk, index);
168 }
169
170 _sink.close();
171 }
172 }
173
174 /// Decodes [codeUnits] and writes the result into [destination].
175 ///
176 /// This reads from [codeUnits] between [sourceStart] and [sourceEnd]. It writes
177 /// the result into [destination] starting at [destinationStart].
178 ///
179 /// If there's a leftover digit at the end of the decoding, this returns that
180 /// digit. Otherwise it returns `null`.
181 int _decode(List<int> codeUnits, int start, int end, Uint8Buffer buffer) {
182 // A bitwise OR of all code units in [codeUnits]. This allows us to check for
183 // out-of-range code units without adding more branches than necessary to the
184 // core loop.
185 var codeUnitOr = 0;
186
187 // The beginning of the current slice of adjacent non-% characters. We can add
188 // all of these to the buffer at once.
189 var sliceStart = start;
190 for (var i = start; i < end; i++) {
191 // First, loop through non-% characters.
192 var codeUnit = codeUnits[i];
193 if (codeUnits[i] != $percent) {
194 codeUnitOr |= codeUnit;
195 continue;
196 }
197
198 // We found a %. The slice from `sliceStart` to `i` represents characters
199 // than can be copied to the buffer as-is.
200 if (i > sliceStart) {
201 _checkForInvalidCodeUnit(codeUnitOr, codeUnits, sliceStart, i);
202 buffer.addAll(codeUnits.getRange(sliceStart, i));
203 }
204
205 // Now decode the percent-encoded byte and add it as well.
206 i++;
207 if (i >= end) return _lastPercent;
208
209 var firstDigit = digitForCodeUnit(codeUnits, i);
210 i++;
211 if (i >= end) return 16 * firstDigit;
212
213 var secondDigit = digitForCodeUnit(codeUnits, i);
214 buffer.add(16 * firstDigit + secondDigit);
215
216 // The next iteration will look for non-% characters again.
217 sliceStart = i + 1;
218 }
219
220 if (end > sliceStart) {
221 _checkForInvalidCodeUnit(codeUnitOr, codeUnits, sliceStart, end);
222 if (start == sliceStart) {
223 buffer.addAll(codeUnits);
224 } else {
225 buffer.addAll(codeUnits.getRange(sliceStart, end));
226 }
227 }
228
229 return null;
230 }
231
232 void _checkForInvalidCodeUnit(int codeUnitOr, List<int> codeUnits, int start,
233 int end) {
234 if (codeUnitOr >= 0 && codeUnitOr <= 0x7f) return;
235
236 for (var i = start; i < end; i++) {
237 var codeUnit = codeUnits[i];
238 if (codeUnit >= 0 && codeUnit <= 0x7f) continue;
239 throw new FormatException(
240 "Non-ASCII code unit "
241 "U+${codeUnit.toRadixString(16).padLeft(4, '0')}",
242 codeUnits, i);
243 }
244 }
OLDNEW
« no previous file with comments | « lib/src/percent.dart ('k') | lib/src/percent/encoder.dart » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698