lib/src/hex/decoder.dart - Issue 1393003003: Add a percent-encoding converter.

Side by Side Diff: lib/src/hex/decoder.dart

Issue 1393003003: Add a percent-encoding converter. (Closed) Base URL: git@github.com:dart-lang/convert.git@master

Patch Set: Code review changes Created 5 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
1 // Copyright (c) 2015, the Dart project authors. Please see the AUTHORS file	1 // Copyright (c) 2015, the Dart project authors. Please see the AUTHORS file

2 // for details. All rights reserved. Use of this source code is governed by a	2 // for details. All rights reserved. Use of this source code is governed by a

3 // BSD-style license that can be found in the LICENSE file.	3 // BSD-style license that can be found in the LICENSE file.

4	4

5 library convert.hex.decoder;	5 library convert.hex.decoder;

6	6

7 import 'dart:convert';	7 import 'dart:convert';

8 import 'dart:typed_data';	8 import 'dart:typed_data';

9	9

10 import 'package:charcode/ascii.dart';	10 import '../utils.dart';

11	11

12 /// The canonical instance of [HexDecoder].	12 /// The canonical instance of [HexDecoder].

13 const hexDecoder = const HexDecoder._();	13 const hexDecoder = const HexDecoder._();

14	14

15 /// A converter that decodes hexadecimal strings into byte arrays.	15 /// A converter that decodes hexadecimal strings into byte arrays.

16 ///	16 ///

17 /// Because two hexadecimal digits correspond to a single byte, this will throw	17 /// Because two hexadecimal digits correspond to a single byte, this will throw

18 /// a [FormatException] if given an odd-length string. It will also throw a	18 /// a [FormatException] if given an odd-length string. It will also throw a

19 /// [FormatException] if given a string containing non-hexadecimal code units.	19 /// [FormatException] if given a string containing non-hexadecimal code units.

20 class HexDecoder extends Converter<String, List<int>> {	20 class HexDecoder extends Converter<String, List<int>> {

(...skipping 25 matching lines...) Expand all Loading...
46 /// hexadecimal digits. Since it's the most significant digit, it's always a	46 /// hexadecimal digits. Since it's the most significant digit, it's always a

47 /// multiple of 16.	47 /// multiple of 16.

48 int _lastDigit;	48 int _lastDigit;

49	49

50 _HexDecoderSink(this._sink);	50 _HexDecoderSink(this._sink);

51	51

52 void addSlice(String string, int start, int end, bool isLast) {	52 void addSlice(String string, int start, int end, bool isLast) {

53 RangeError.checkValidRange(start, end, string.length);	53 RangeError.checkValidRange(start, end, string.length);

54	54

55 if (start == end) {	55 if (start == end) {

56 if (isLast) close();	56 if (isLast) _close(string, end);

57 return;	57 return;

58 }	58 }

59	59

60 var codeUnits = string.codeUnits;	60 var codeUnits = string.codeUnits;

61 var bytes;	61 var bytes;

62 var bytesStart;	62 var bytesStart;

63 if (_lastDigit == null) {	63 if (_lastDigit == null) {

64 bytes = new Uint8List((end - start) ~/ 2);	64 bytes = new Uint8List((end - start) ~/ 2);

65 bytesStart = 0;	65 bytesStart = 0;

66 } else {	66 } else {

67 var hexPairs = (end - start - 1) ~/ 2;	67 var hexPairs = (end - start - 1) ~/ 2;

68 bytes = new Uint8List(1 + hexPairs);	68 bytes = new Uint8List(1 + hexPairs);

69 bytes[0] = _lastDigit + _digitForCodeUnit(codeUnits, start);	69 bytes[0] = _lastDigit + digitForCodeUnit(codeUnits, start);

70 start++;	70 start++;

71 bytesStart = 1;	71 bytesStart = 1;

72 }	72 }

73	73

74 _lastDigit = _decode(codeUnits, start, end, bytes, bytesStart);	74 _lastDigit = _decode(codeUnits, start, end, bytes, bytesStart);

75	75

76 _sink.add(bytes);	76 _sink.add(bytes);

77 if (isLast) close();	77 if (isLast) _close(string, end);

78 }	78 }

79	79

80 ByteConversionSink asUtf8Sink(bool allowMalformed) =>	80 ByteConversionSink asUtf8Sink(bool allowMalformed) =>

81 new _HexDecoderByteSink(_sink);	81 new _HexDecoderByteSink(_sink);

82	82

83 void close() {	83 void close() => _close();

	84

	85 /// Like [close], but includes [string] and [index] in the [FormatException]

	86 /// if one is thrown.

	87 void _close([String string, int index]) {

84 if (_lastDigit != null) {	88 if (_lastDigit != null) {

85 throw new FormatException("Invalid input length, must be even.");	89 throw new FormatException(

	90 "Input ended with incomplete encoded byte.", string, index);

86 }	91 }

87	92

88 _sink.close();	93 _sink.close();

89 }	94 }

90 }	95 }

91	96

92 /// A conversion sink for chunked hexadecimal decoding from UTF-8 bytes.	97 /// A conversion sink for chunked hexadecimal decoding from UTF-8 bytes.

93 class _HexDecoderByteSink extends ByteConversionSinkBase {	98 class _HexDecoderByteSink extends ByteConversionSinkBase {

94 /// The underlying sink to which decoded byte arrays will be passed.	99 /// The underlying sink to which decoded byte arrays will be passed.

95 final Sink<List<int>> _sink;	100 final Sink<List<int>> _sink;

96	101

97 /// The trailing digit from the previous string.	102 /// The trailing digit from the previous string.

98 ///	103 ///

99 /// This will be non-`null` if the most recent string had an odd number of	104 /// This will be non-`null` if the most recent string had an odd number of

100 /// hexadecimal digits. Since it's the most significant digit, it's always a	105 /// hexadecimal digits. Since it's the most significant digit, it's always a

101 /// multiple of 16.	106 /// multiple of 16.

102 int _lastDigit;	107 int _lastDigit;

103	108

104 _HexDecoderByteSink(this._sink);	109 _HexDecoderByteSink(this._sink);

105	110

106 void add(List<int> chunk) => addSlice(chunk, 0, chunk.length, false);	111 void add(List<int> chunk) => addSlice(chunk, 0, chunk.length, false);

107	112

108 void addSlice(List<int> chunk, int start, int end, bool isLast) {	113 void addSlice(List<int> chunk, int start, int end, bool isLast) {

109 RangeError.checkValidRange(start, end, chunk.length);	114 RangeError.checkValidRange(start, end, chunk.length);

110	115

111 if (start == end) {	116 if (start == end) {

112 if (isLast) close();	117 if (isLast) _close(chunk, end);

113 return;	118 return;

114 }	119 }

115	120

116 var bytes;	121 var bytes;

117 var bytesStart;	122 var bytesStart;

118 if (_lastDigit == null) {	123 if (_lastDigit == null) {

119 bytes = new Uint8List((end - start) ~/ 2);	124 bytes = new Uint8List((end - start) ~/ 2);

120 bytesStart = 0;	125 bytesStart = 0;

121 } else {	126 } else {

122 var hexPairs = (end - start - 1) ~/ 2;	127 var hexPairs = (end - start - 1) ~/ 2;

123 bytes = new Uint8List(1 + hexPairs);	128 bytes = new Uint8List(1 + hexPairs);

124 bytes[0] = _lastDigit + _digitForCodeUnit(chunk, start);	129 bytes[0] = _lastDigit + digitForCodeUnit(chunk, start);

125 start++;	130 start++;

126 bytesStart = 1;	131 bytesStart = 1;

127 }	132 }

128	133

129 _lastDigit = _decode(chunk, start, end, bytes, bytesStart);	134 _lastDigit = _decode(chunk, start, end, bytes, bytesStart);

130	135

131 _sink.add(bytes);	136 _sink.add(bytes);

132 if (isLast) close();	137 if (isLast) _close(chunk, end);

133 }	138 }

134	139

135 void close() {	140 void close() => _close();

	141

	142 /// Like [close], but includes [chunk] and [index] in the [FormatException]

	143 /// if one is thrown.

	144 void _close([List<int> chunk, int index]) {

136 if (_lastDigit != null) {	145 if (_lastDigit != null) {

137 throw new FormatException("Invalid input length, must be even.");	146 throw new FormatException(

	147 "Input ended with incomplete encoded byte.", chunk, index);

138 }	148 }

139	149

140 _sink.close();	150 _sink.close();

141 }	151 }

142 }	152 }

143	153

144 /// Decodes [codeUnits] and writes the result into [destination].	154 /// Decodes [codeUnits] and writes the result into [destination].

145 ///	155 ///

146 /// This reads from [codeUnits] between [sourceStart] and [sourceEnd]. It writes	156 /// This reads from [codeUnits] between [sourceStart] and [sourceEnd]. It writes

147 /// the result into [destination] starting at [destinationStart].	157 /// the result into [destination] starting at [destinationStart].

148 ///	158 ///

149 /// If there's a leftover digit at the end of the decoding, this returns that	159 /// If there's a leftover digit at the end of the decoding, this returns that

150 /// digit. Otherwise it returns `null`.	160 /// digit. Otherwise it returns `null`.

151 int _decode(List<int> codeUnits, int sourceStart, int sourceEnd,	161 int _decode(List<int> codeUnits, int sourceStart, int sourceEnd,

152 List<int> destination, int destinationStart) {	162 List<int> destination, int destinationStart) {

153 var destinationIndex = destinationStart;	163 var destinationIndex = destinationStart;

154 for (var i = sourceStart; i < sourceEnd - 1; i += 2) {	164 for (var i = sourceStart; i < sourceEnd - 1; i += 2) {

155 var firstDigit = _digitForCodeUnit(codeUnits, i);	165 var firstDigit = digitForCodeUnit(codeUnits, i);

156 var secondDigit = _digitForCodeUnit(codeUnits, i + 1);	166 var secondDigit = digitForCodeUnit(codeUnits, i + 1);

157 destination[destinationIndex++] = 16 * firstDigit + secondDigit;	167 destination[destinationIndex++] = 16 * firstDigit + secondDigit;

158 }	168 }

159	169

160 if ((sourceEnd - sourceStart).isEven) return null;	170 if ((sourceEnd - sourceStart).isEven) return null;

161 return 16 * _digitForCodeUnit(codeUnits, sourceEnd - 1);	171 return 16 * digitForCodeUnit(codeUnits, sourceEnd - 1);

162 }	172 }

163

164 /// Returns the digit (0 through 15) corresponding to the hexadecimal code unit

165 /// at index [i] in [codeUnits].

166 ///

167 /// If the given code unit isn't valid hexadecimal, throws a [FormatException].

168 int _digitForCodeUnit(List<int> codeUnits, int index) {

169 // If the code unit is a numeral, get its value. XOR works because 0 in ASCII

170 // is `0b110000` and the other numerals come after it in ascending order and

171 // take up at most four bits.

172 //

173 // We check for digits first because it ensures there's only a single branch

174 // for 10 out of 16 of the expected cases. We don't count the `digit >= 0`

175 // check because branch prediction will always work on it for valid data.

176 var codeUnit = codeUnits[index];

177 var digit = $0 ^ codeUnit;

178 if (digit <= 9) {

179 if (digit >= 0) return digit;

180 } else {

181 // If the code unit is an uppercase letter, convert it to lowercase. This

182 // works because uppercase letters in ASCII are exactly `0b100000 = 0x20`

183 // less than lowercase letters, so if we ensure that that bit is 1 we ensure

184 // that the letter is lowercase.

185 var letter = 0x20 \| codeUnit;

186 if ($a <= letter && letter <= $f) return letter - $a + 10;

187 }

188

189 throw new FormatException(

190 "Invalid hexadecimal code unit "

191 "U+${codeUnit.toRadixString(16).padLeft(4, '0')}.",

192 codeUnits, index);

193 }

OLD	NEW

« no previous file with comments | « lib/convert.dart ('k') | lib/src/hex/encoder.dart » ('j') | no next file with comments »