OLD | NEW |
1 // Copyright (c) 2015, the Dart project authors. Please see the AUTHORS file | 1 // Copyright (c) 2015, the Dart project authors. Please see the AUTHORS file |
2 // for details. All rights reserved. Use of this source code is governed by a | 2 // for details. All rights reserved. Use of this source code is governed by a |
3 // BSD-style license that can be found in the LICENSE file. | 3 // BSD-style license that can be found in the LICENSE file. |
4 | 4 |
5 library convert.hex.decoder; | 5 library convert.hex.decoder; |
6 | 6 |
7 import 'dart:convert'; | 7 import 'dart:convert'; |
8 import 'dart:typed_data'; | 8 import 'dart:typed_data'; |
9 | 9 |
10 import 'package:charcode/ascii.dart'; | 10 import '../utils.dart'; |
11 | 11 |
12 /// The canonical instance of [HexDecoder]. | 12 /// The canonical instance of [HexDecoder]. |
13 const hexDecoder = const HexDecoder._(); | 13 const hexDecoder = const HexDecoder._(); |
14 | 14 |
15 /// A converter that decodes hexadecimal strings into byte arrays. | 15 /// A converter that decodes hexadecimal strings into byte arrays. |
16 /// | 16 /// |
17 /// Because two hexadecimal digits correspond to a single byte, this will throw | 17 /// Because two hexadecimal digits correspond to a single byte, this will throw |
18 /// a [FormatException] if given an odd-length string. It will also throw a | 18 /// a [FormatException] if given an odd-length string. It will also throw a |
19 /// [FormatException] if given a string containing non-hexadecimal code units. | 19 /// [FormatException] if given a string containing non-hexadecimal code units. |
20 class HexDecoder extends Converter<String, List<int>> { | 20 class HexDecoder extends Converter<String, List<int>> { |
(...skipping 25 matching lines...) Expand all Loading... |
46 /// hexadecimal digits. Since it's the most significant digit, it's always a | 46 /// hexadecimal digits. Since it's the most significant digit, it's always a |
47 /// multiple of 16. | 47 /// multiple of 16. |
48 int _lastDigit; | 48 int _lastDigit; |
49 | 49 |
50 _HexDecoderSink(this._sink); | 50 _HexDecoderSink(this._sink); |
51 | 51 |
52 void addSlice(String string, int start, int end, bool isLast) { | 52 void addSlice(String string, int start, int end, bool isLast) { |
53 RangeError.checkValidRange(start, end, string.length); | 53 RangeError.checkValidRange(start, end, string.length); |
54 | 54 |
55 if (start == end) { | 55 if (start == end) { |
56 if (isLast) close(); | 56 if (isLast) _close(string, end); |
57 return; | 57 return; |
58 } | 58 } |
59 | 59 |
60 var codeUnits = string.codeUnits; | 60 var codeUnits = string.codeUnits; |
61 var bytes; | 61 var bytes; |
62 var bytesStart; | 62 var bytesStart; |
63 if (_lastDigit == null) { | 63 if (_lastDigit == null) { |
64 bytes = new Uint8List((end - start) ~/ 2); | 64 bytes = new Uint8List((end - start) ~/ 2); |
65 bytesStart = 0; | 65 bytesStart = 0; |
66 } else { | 66 } else { |
67 var hexPairs = (end - start - 1) ~/ 2; | 67 var hexPairs = (end - start - 1) ~/ 2; |
68 bytes = new Uint8List(1 + hexPairs); | 68 bytes = new Uint8List(1 + hexPairs); |
69 bytes[0] = _lastDigit + _digitForCodeUnit(codeUnits, start); | 69 bytes[0] = _lastDigit + digitForCodeUnit(codeUnits, start); |
70 start++; | 70 start++; |
71 bytesStart = 1; | 71 bytesStart = 1; |
72 } | 72 } |
73 | 73 |
74 _lastDigit = _decode(codeUnits, start, end, bytes, bytesStart); | 74 _lastDigit = _decode(codeUnits, start, end, bytes, bytesStart); |
75 | 75 |
76 _sink.add(bytes); | 76 _sink.add(bytes); |
77 if (isLast) close(); | 77 if (isLast) _close(string, end); |
78 } | 78 } |
79 | 79 |
80 ByteConversionSink asUtf8Sink(bool allowMalformed) => | 80 ByteConversionSink asUtf8Sink(bool allowMalformed) => |
81 new _HexDecoderByteSink(_sink); | 81 new _HexDecoderByteSink(_sink); |
82 | 82 |
83 void close() { | 83 void close() => _close(); |
| 84 |
| 85 /// Like [close], but includes [string] and [index] in the [FormatException] |
| 86 /// if one is thrown. |
| 87 void _close([String string, int index]) { |
84 if (_lastDigit != null) { | 88 if (_lastDigit != null) { |
85 throw new FormatException("Invalid input length, must be even."); | 89 throw new FormatException( |
| 90 "Input ended with incomplete encoded byte.", string, index); |
86 } | 91 } |
87 | 92 |
88 _sink.close(); | 93 _sink.close(); |
89 } | 94 } |
90 } | 95 } |
91 | 96 |
92 /// A conversion sink for chunked hexadecimal decoding from UTF-8 bytes. | 97 /// A conversion sink for chunked hexadecimal decoding from UTF-8 bytes. |
93 class _HexDecoderByteSink extends ByteConversionSinkBase { | 98 class _HexDecoderByteSink extends ByteConversionSinkBase { |
94 /// The underlying sink to which decoded byte arrays will be passed. | 99 /// The underlying sink to which decoded byte arrays will be passed. |
95 final Sink<List<int>> _sink; | 100 final Sink<List<int>> _sink; |
96 | 101 |
97 /// The trailing digit from the previous string. | 102 /// The trailing digit from the previous string. |
98 /// | 103 /// |
99 /// This will be non-`null` if the most recent string had an odd number of | 104 /// This will be non-`null` if the most recent string had an odd number of |
100 /// hexadecimal digits. Since it's the most significant digit, it's always a | 105 /// hexadecimal digits. Since it's the most significant digit, it's always a |
101 /// multiple of 16. | 106 /// multiple of 16. |
102 int _lastDigit; | 107 int _lastDigit; |
103 | 108 |
104 _HexDecoderByteSink(this._sink); | 109 _HexDecoderByteSink(this._sink); |
105 | 110 |
106 void add(List<int> chunk) => addSlice(chunk, 0, chunk.length, false); | 111 void add(List<int> chunk) => addSlice(chunk, 0, chunk.length, false); |
107 | 112 |
108 void addSlice(List<int> chunk, int start, int end, bool isLast) { | 113 void addSlice(List<int> chunk, int start, int end, bool isLast) { |
109 RangeError.checkValidRange(start, end, chunk.length); | 114 RangeError.checkValidRange(start, end, chunk.length); |
110 | 115 |
111 if (start == end) { | 116 if (start == end) { |
112 if (isLast) close(); | 117 if (isLast) _close(chunk, end); |
113 return; | 118 return; |
114 } | 119 } |
115 | 120 |
116 var bytes; | 121 var bytes; |
117 var bytesStart; | 122 var bytesStart; |
118 if (_lastDigit == null) { | 123 if (_lastDigit == null) { |
119 bytes = new Uint8List((end - start) ~/ 2); | 124 bytes = new Uint8List((end - start) ~/ 2); |
120 bytesStart = 0; | 125 bytesStart = 0; |
121 } else { | 126 } else { |
122 var hexPairs = (end - start - 1) ~/ 2; | 127 var hexPairs = (end - start - 1) ~/ 2; |
123 bytes = new Uint8List(1 + hexPairs); | 128 bytes = new Uint8List(1 + hexPairs); |
124 bytes[0] = _lastDigit + _digitForCodeUnit(chunk, start); | 129 bytes[0] = _lastDigit + digitForCodeUnit(chunk, start); |
125 start++; | 130 start++; |
126 bytesStart = 1; | 131 bytesStart = 1; |
127 } | 132 } |
128 | 133 |
129 _lastDigit = _decode(chunk, start, end, bytes, bytesStart); | 134 _lastDigit = _decode(chunk, start, end, bytes, bytesStart); |
130 | 135 |
131 _sink.add(bytes); | 136 _sink.add(bytes); |
132 if (isLast) close(); | 137 if (isLast) _close(chunk, end); |
133 } | 138 } |
134 | 139 |
135 void close() { | 140 void close() => _close(); |
| 141 |
| 142 /// Like [close], but includes [chunk] and [index] in the [FormatException] |
| 143 /// if one is thrown. |
| 144 void _close([List<int> chunk, int index]) { |
136 if (_lastDigit != null) { | 145 if (_lastDigit != null) { |
137 throw new FormatException("Invalid input length, must be even."); | 146 throw new FormatException( |
| 147 "Input ended with incomplete encoded byte.", chunk, index); |
138 } | 148 } |
139 | 149 |
140 _sink.close(); | 150 _sink.close(); |
141 } | 151 } |
142 } | 152 } |
143 | 153 |
144 /// Decodes [codeUnits] and writes the result into [destination]. | 154 /// Decodes [codeUnits] and writes the result into [destination]. |
145 /// | 155 /// |
146 /// This reads from [codeUnits] between [sourceStart] and [sourceEnd]. It writes | 156 /// This reads from [codeUnits] between [sourceStart] and [sourceEnd]. It writes |
147 /// the result into [destination] starting at [destinationStart]. | 157 /// the result into [destination] starting at [destinationStart]. |
148 /// | 158 /// |
149 /// If there's a leftover digit at the end of the decoding, this returns that | 159 /// If there's a leftover digit at the end of the decoding, this returns that |
150 /// digit. Otherwise it returns `null`. | 160 /// digit. Otherwise it returns `null`. |
151 int _decode(List<int> codeUnits, int sourceStart, int sourceEnd, | 161 int _decode(List<int> codeUnits, int sourceStart, int sourceEnd, |
152 List<int> destination, int destinationStart) { | 162 List<int> destination, int destinationStart) { |
153 var destinationIndex = destinationStart; | 163 var destinationIndex = destinationStart; |
154 for (var i = sourceStart; i < sourceEnd - 1; i += 2) { | 164 for (var i = sourceStart; i < sourceEnd - 1; i += 2) { |
155 var firstDigit = _digitForCodeUnit(codeUnits, i); | 165 var firstDigit = digitForCodeUnit(codeUnits, i); |
156 var secondDigit = _digitForCodeUnit(codeUnits, i + 1); | 166 var secondDigit = digitForCodeUnit(codeUnits, i + 1); |
157 destination[destinationIndex++] = 16 * firstDigit + secondDigit; | 167 destination[destinationIndex++] = 16 * firstDigit + secondDigit; |
158 } | 168 } |
159 | 169 |
160 if ((sourceEnd - sourceStart).isEven) return null; | 170 if ((sourceEnd - sourceStart).isEven) return null; |
161 return 16 * _digitForCodeUnit(codeUnits, sourceEnd - 1); | 171 return 16 * digitForCodeUnit(codeUnits, sourceEnd - 1); |
162 } | 172 } |
163 | |
164 /// Returns the digit (0 through 15) corresponding to the hexadecimal code unit | |
165 /// at index [i] in [codeUnits]. | |
166 /// | |
167 /// If the given code unit isn't valid hexadecimal, throws a [FormatException]. | |
168 int _digitForCodeUnit(List<int> codeUnits, int index) { | |
169 // If the code unit is a numeral, get its value. XOR works because 0 in ASCII | |
170 // is `0b110000` and the other numerals come after it in ascending order and | |
171 // take up at most four bits. | |
172 // | |
173 // We check for digits first because it ensures there's only a single branch | |
174 // for 10 out of 16 of the expected cases. We don't count the `digit >= 0` | |
175 // check because branch prediction will always work on it for valid data. | |
176 var codeUnit = codeUnits[index]; | |
177 var digit = $0 ^ codeUnit; | |
178 if (digit <= 9) { | |
179 if (digit >= 0) return digit; | |
180 } else { | |
181 // If the code unit is an uppercase letter, convert it to lowercase. This | |
182 // works because uppercase letters in ASCII are exactly `0b100000 = 0x20` | |
183 // less than lowercase letters, so if we ensure that that bit is 1 we ensure | |
184 // that the letter is lowercase. | |
185 var letter = 0x20 | codeUnit; | |
186 if ($a <= letter && letter <= $f) return letter - $a + 10; | |
187 } | |
188 | |
189 throw new FormatException( | |
190 "Invalid hexadecimal code unit " | |
191 "U+${codeUnit.toRadixString(16).padLeft(4, '0')}.", | |
192 codeUnits, index); | |
193 } | |
OLD | NEW |