Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(198)

Side by Side Diff: lib/src/hex/decoder.dart

Issue 1393003003: Add a percent-encoding converter. (Closed) Base URL: git@github.com:dart-lang/convert.git@master
Patch Set: Code review changes Created 5 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « lib/convert.dart ('k') | lib/src/hex/encoder.dart » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright (c) 2015, the Dart project authors. Please see the AUTHORS file 1 // Copyright (c) 2015, the Dart project authors. Please see the AUTHORS file
2 // for details. All rights reserved. Use of this source code is governed by a 2 // for details. All rights reserved. Use of this source code is governed by a
3 // BSD-style license that can be found in the LICENSE file. 3 // BSD-style license that can be found in the LICENSE file.
4 4
5 library convert.hex.decoder; 5 library convert.hex.decoder;
6 6
7 import 'dart:convert'; 7 import 'dart:convert';
8 import 'dart:typed_data'; 8 import 'dart:typed_data';
9 9
10 import 'package:charcode/ascii.dart'; 10 import '../utils.dart';
11 11
12 /// The canonical instance of [HexDecoder]. 12 /// The canonical instance of [HexDecoder].
13 const hexDecoder = const HexDecoder._(); 13 const hexDecoder = const HexDecoder._();
14 14
15 /// A converter that decodes hexadecimal strings into byte arrays. 15 /// A converter that decodes hexadecimal strings into byte arrays.
16 /// 16 ///
17 /// Because two hexadecimal digits correspond to a single byte, this will throw 17 /// Because two hexadecimal digits correspond to a single byte, this will throw
18 /// a [FormatException] if given an odd-length string. It will also throw a 18 /// a [FormatException] if given an odd-length string. It will also throw a
19 /// [FormatException] if given a string containing non-hexadecimal code units. 19 /// [FormatException] if given a string containing non-hexadecimal code units.
20 class HexDecoder extends Converter<String, List<int>> { 20 class HexDecoder extends Converter<String, List<int>> {
(...skipping 25 matching lines...) Expand all
46 /// hexadecimal digits. Since it's the most significant digit, it's always a 46 /// hexadecimal digits. Since it's the most significant digit, it's always a
47 /// multiple of 16. 47 /// multiple of 16.
48 int _lastDigit; 48 int _lastDigit;
49 49
50 _HexDecoderSink(this._sink); 50 _HexDecoderSink(this._sink);
51 51
52 void addSlice(String string, int start, int end, bool isLast) { 52 void addSlice(String string, int start, int end, bool isLast) {
53 RangeError.checkValidRange(start, end, string.length); 53 RangeError.checkValidRange(start, end, string.length);
54 54
55 if (start == end) { 55 if (start == end) {
56 if (isLast) close(); 56 if (isLast) _close(string, end);
57 return; 57 return;
58 } 58 }
59 59
60 var codeUnits = string.codeUnits; 60 var codeUnits = string.codeUnits;
61 var bytes; 61 var bytes;
62 var bytesStart; 62 var bytesStart;
63 if (_lastDigit == null) { 63 if (_lastDigit == null) {
64 bytes = new Uint8List((end - start) ~/ 2); 64 bytes = new Uint8List((end - start) ~/ 2);
65 bytesStart = 0; 65 bytesStart = 0;
66 } else { 66 } else {
67 var hexPairs = (end - start - 1) ~/ 2; 67 var hexPairs = (end - start - 1) ~/ 2;
68 bytes = new Uint8List(1 + hexPairs); 68 bytes = new Uint8List(1 + hexPairs);
69 bytes[0] = _lastDigit + _digitForCodeUnit(codeUnits, start); 69 bytes[0] = _lastDigit + digitForCodeUnit(codeUnits, start);
70 start++; 70 start++;
71 bytesStart = 1; 71 bytesStart = 1;
72 } 72 }
73 73
74 _lastDigit = _decode(codeUnits, start, end, bytes, bytesStart); 74 _lastDigit = _decode(codeUnits, start, end, bytes, bytesStart);
75 75
76 _sink.add(bytes); 76 _sink.add(bytes);
77 if (isLast) close(); 77 if (isLast) _close(string, end);
78 } 78 }
79 79
80 ByteConversionSink asUtf8Sink(bool allowMalformed) => 80 ByteConversionSink asUtf8Sink(bool allowMalformed) =>
81 new _HexDecoderByteSink(_sink); 81 new _HexDecoderByteSink(_sink);
82 82
83 void close() { 83 void close() => _close();
84
85 /// Like [close], but includes [string] and [index] in the [FormatException]
86 /// if one is thrown.
87 void _close([String string, int index]) {
84 if (_lastDigit != null) { 88 if (_lastDigit != null) {
85 throw new FormatException("Invalid input length, must be even."); 89 throw new FormatException(
90 "Input ended with incomplete encoded byte.", string, index);
86 } 91 }
87 92
88 _sink.close(); 93 _sink.close();
89 } 94 }
90 } 95 }
91 96
92 /// A conversion sink for chunked hexadecimal decoding from UTF-8 bytes. 97 /// A conversion sink for chunked hexadecimal decoding from UTF-8 bytes.
93 class _HexDecoderByteSink extends ByteConversionSinkBase { 98 class _HexDecoderByteSink extends ByteConversionSinkBase {
94 /// The underlying sink to which decoded byte arrays will be passed. 99 /// The underlying sink to which decoded byte arrays will be passed.
95 final Sink<List<int>> _sink; 100 final Sink<List<int>> _sink;
96 101
97 /// The trailing digit from the previous string. 102 /// The trailing digit from the previous string.
98 /// 103 ///
99 /// This will be non-`null` if the most recent string had an odd number of 104 /// This will be non-`null` if the most recent string had an odd number of
100 /// hexadecimal digits. Since it's the most significant digit, it's always a 105 /// hexadecimal digits. Since it's the most significant digit, it's always a
101 /// multiple of 16. 106 /// multiple of 16.
102 int _lastDigit; 107 int _lastDigit;
103 108
104 _HexDecoderByteSink(this._sink); 109 _HexDecoderByteSink(this._sink);
105 110
106 void add(List<int> chunk) => addSlice(chunk, 0, chunk.length, false); 111 void add(List<int> chunk) => addSlice(chunk, 0, chunk.length, false);
107 112
108 void addSlice(List<int> chunk, int start, int end, bool isLast) { 113 void addSlice(List<int> chunk, int start, int end, bool isLast) {
109 RangeError.checkValidRange(start, end, chunk.length); 114 RangeError.checkValidRange(start, end, chunk.length);
110 115
111 if (start == end) { 116 if (start == end) {
112 if (isLast) close(); 117 if (isLast) _close(chunk, end);
113 return; 118 return;
114 } 119 }
115 120
116 var bytes; 121 var bytes;
117 var bytesStart; 122 var bytesStart;
118 if (_lastDigit == null) { 123 if (_lastDigit == null) {
119 bytes = new Uint8List((end - start) ~/ 2); 124 bytes = new Uint8List((end - start) ~/ 2);
120 bytesStart = 0; 125 bytesStart = 0;
121 } else { 126 } else {
122 var hexPairs = (end - start - 1) ~/ 2; 127 var hexPairs = (end - start - 1) ~/ 2;
123 bytes = new Uint8List(1 + hexPairs); 128 bytes = new Uint8List(1 + hexPairs);
124 bytes[0] = _lastDigit + _digitForCodeUnit(chunk, start); 129 bytes[0] = _lastDigit + digitForCodeUnit(chunk, start);
125 start++; 130 start++;
126 bytesStart = 1; 131 bytesStart = 1;
127 } 132 }
128 133
129 _lastDigit = _decode(chunk, start, end, bytes, bytesStart); 134 _lastDigit = _decode(chunk, start, end, bytes, bytesStart);
130 135
131 _sink.add(bytes); 136 _sink.add(bytes);
132 if (isLast) close(); 137 if (isLast) _close(chunk, end);
133 } 138 }
134 139
135 void close() { 140 void close() => _close();
141
142 /// Like [close], but includes [chunk] and [index] in the [FormatException]
143 /// if one is thrown.
144 void _close([List<int> chunk, int index]) {
136 if (_lastDigit != null) { 145 if (_lastDigit != null) {
137 throw new FormatException("Invalid input length, must be even."); 146 throw new FormatException(
147 "Input ended with incomplete encoded byte.", chunk, index);
138 } 148 }
139 149
140 _sink.close(); 150 _sink.close();
141 } 151 }
142 } 152 }
143 153
144 /// Decodes [codeUnits] and writes the result into [destination]. 154 /// Decodes [codeUnits] and writes the result into [destination].
145 /// 155 ///
146 /// This reads from [codeUnits] between [sourceStart] and [sourceEnd]. It writes 156 /// This reads from [codeUnits] between [sourceStart] and [sourceEnd]. It writes
147 /// the result into [destination] starting at [destinationStart]. 157 /// the result into [destination] starting at [destinationStart].
148 /// 158 ///
149 /// If there's a leftover digit at the end of the decoding, this returns that 159 /// If there's a leftover digit at the end of the decoding, this returns that
150 /// digit. Otherwise it returns `null`. 160 /// digit. Otherwise it returns `null`.
151 int _decode(List<int> codeUnits, int sourceStart, int sourceEnd, 161 int _decode(List<int> codeUnits, int sourceStart, int sourceEnd,
152 List<int> destination, int destinationStart) { 162 List<int> destination, int destinationStart) {
153 var destinationIndex = destinationStart; 163 var destinationIndex = destinationStart;
154 for (var i = sourceStart; i < sourceEnd - 1; i += 2) { 164 for (var i = sourceStart; i < sourceEnd - 1; i += 2) {
155 var firstDigit = _digitForCodeUnit(codeUnits, i); 165 var firstDigit = digitForCodeUnit(codeUnits, i);
156 var secondDigit = _digitForCodeUnit(codeUnits, i + 1); 166 var secondDigit = digitForCodeUnit(codeUnits, i + 1);
157 destination[destinationIndex++] = 16 * firstDigit + secondDigit; 167 destination[destinationIndex++] = 16 * firstDigit + secondDigit;
158 } 168 }
159 169
160 if ((sourceEnd - sourceStart).isEven) return null; 170 if ((sourceEnd - sourceStart).isEven) return null;
161 return 16 * _digitForCodeUnit(codeUnits, sourceEnd - 1); 171 return 16 * digitForCodeUnit(codeUnits, sourceEnd - 1);
162 } 172 }
163
164 /// Returns the digit (0 through 15) corresponding to the hexadecimal code unit
165 /// at index [i] in [codeUnits].
166 ///
167 /// If the given code unit isn't valid hexadecimal, throws a [FormatException].
168 int _digitForCodeUnit(List<int> codeUnits, int index) {
169 // If the code unit is a numeral, get its value. XOR works because 0 in ASCII
170 // is `0b110000` and the other numerals come after it in ascending order and
171 // take up at most four bits.
172 //
173 // We check for digits first because it ensures there's only a single branch
174 // for 10 out of 16 of the expected cases. We don't count the `digit >= 0`
175 // check because branch prediction will always work on it for valid data.
176 var codeUnit = codeUnits[index];
177 var digit = $0 ^ codeUnit;
178 if (digit <= 9) {
179 if (digit >= 0) return digit;
180 } else {
181 // If the code unit is an uppercase letter, convert it to lowercase. This
182 // works because uppercase letters in ASCII are exactly `0b100000 = 0x20`
183 // less than lowercase letters, so if we ensure that that bit is 1 we ensure
184 // that the letter is lowercase.
185 var letter = 0x20 | codeUnit;
186 if ($a <= letter && letter <= $f) return letter - $a + 10;
187 }
188
189 throw new FormatException(
190 "Invalid hexadecimal code unit "
191 "U+${codeUnit.toRadixString(16).padLeft(4, '0')}.",
192 codeUnits, index);
193 }
OLDNEW
« no previous file with comments | « lib/convert.dart ('k') | lib/src/hex/encoder.dart » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698