| OLD | NEW |
| 1 // Copyright (c) 2015, the Dart project authors. Please see the AUTHORS file | 1 // Copyright (c) 2015, the Dart project authors. Please see the AUTHORS file |
| 2 // for details. All rights reserved. Use of this source code is governed by a | 2 // for details. All rights reserved. Use of this source code is governed by a |
| 3 // BSD-style license that can be found in the LICENSE file. | 3 // BSD-style license that can be found in the LICENSE file. |
| 4 | 4 |
| 5 library convert.hex.decoder; | 5 library convert.hex.decoder; |
| 6 | 6 |
| 7 import 'dart:convert'; | 7 import 'dart:convert'; |
| 8 import 'dart:typed_data'; | 8 import 'dart:typed_data'; |
| 9 | 9 |
| 10 import 'package:charcode/ascii.dart'; | 10 import '../utils.dart'; |
| 11 | 11 |
| 12 /// The canonical instance of [HexDecoder]. | 12 /// The canonical instance of [HexDecoder]. |
| 13 const hexDecoder = const HexDecoder._(); | 13 const hexDecoder = const HexDecoder._(); |
| 14 | 14 |
| 15 /// A converter that decodes hexadecimal strings into byte arrays. | 15 /// A converter that decodes hexadecimal strings into byte arrays. |
| 16 /// | 16 /// |
| 17 /// Because two hexadecimal digits correspond to a single byte, this will throw | 17 /// Because two hexadecimal digits correspond to a single byte, this will throw |
| 18 /// a [FormatException] if given an odd-length string. It will also throw a | 18 /// a [FormatException] if given an odd-length string. It will also throw a |
| 19 /// [FormatException] if given a string containing non-hexadecimal code units. | 19 /// [FormatException] if given a string containing non-hexadecimal code units. |
| 20 class HexDecoder extends Converter<String, List<int>> { | 20 class HexDecoder extends Converter<String, List<int>> { |
| (...skipping 25 matching lines...) Expand all Loading... |
| 46 /// hexadecimal digits. Since it's the most significant digit, it's always a | 46 /// hexadecimal digits. Since it's the most significant digit, it's always a |
| 47 /// multiple of 16. | 47 /// multiple of 16. |
| 48 int _lastDigit; | 48 int _lastDigit; |
| 49 | 49 |
| 50 _HexDecoderSink(this._sink); | 50 _HexDecoderSink(this._sink); |
| 51 | 51 |
| 52 void addSlice(String string, int start, int end, bool isLast) { | 52 void addSlice(String string, int start, int end, bool isLast) { |
| 53 RangeError.checkValidRange(start, end, string.length); | 53 RangeError.checkValidRange(start, end, string.length); |
| 54 | 54 |
| 55 if (start == end) { | 55 if (start == end) { |
| 56 if (isLast) close(); | 56 if (isLast) _close(string, end); |
| 57 return; | 57 return; |
| 58 } | 58 } |
| 59 | 59 |
| 60 var codeUnits = string.codeUnits; | 60 var codeUnits = string.codeUnits; |
| 61 var bytes; | 61 var bytes; |
| 62 var bytesStart; | 62 var bytesStart; |
| 63 if (_lastDigit == null) { | 63 if (_lastDigit == null) { |
| 64 bytes = new Uint8List((end - start) ~/ 2); | 64 bytes = new Uint8List((end - start) ~/ 2); |
| 65 bytesStart = 0; | 65 bytesStart = 0; |
| 66 } else { | 66 } else { |
| 67 var hexPairs = (end - start - 1) ~/ 2; | 67 var hexPairs = (end - start - 1) ~/ 2; |
| 68 bytes = new Uint8List(1 + hexPairs); | 68 bytes = new Uint8List(1 + hexPairs); |
| 69 bytes[0] = _lastDigit + _digitForCodeUnit(codeUnits, start); | 69 bytes[0] = _lastDigit + digitForCodeUnit(codeUnits, start); |
| 70 start++; | 70 start++; |
| 71 bytesStart = 1; | 71 bytesStart = 1; |
| 72 } | 72 } |
| 73 | 73 |
| 74 _lastDigit = _decode(codeUnits, start, end, bytes, bytesStart); | 74 _lastDigit = _decode(codeUnits, start, end, bytes, bytesStart); |
| 75 | 75 |
| 76 _sink.add(bytes); | 76 _sink.add(bytes); |
| 77 if (isLast) close(); | 77 if (isLast) _close(string, end); |
| 78 } | 78 } |
| 79 | 79 |
| 80 ByteConversionSink asUtf8Sink(bool allowMalformed) => | 80 ByteConversionSink asUtf8Sink(bool allowMalformed) => |
| 81 new _HexDecoderByteSink(_sink); | 81 new _HexDecoderByteSink(_sink); |
| 82 | 82 |
| 83 void close() { | 83 void close() => _close(); |
| 84 |
| 85 /// Like [close], but includes [string] and [index] in the [FormatException] |
| 86 /// if one is thrown. |
| 87 void _close([String string, int index]) { |
| 84 if (_lastDigit != null) { | 88 if (_lastDigit != null) { |
| 85 throw new FormatException("Invalid input length, must be even."); | 89 throw new FormatException( |
| 90 "Input ended with incomplete encoded byte.", string, index); |
| 86 } | 91 } |
| 87 | 92 |
| 88 _sink.close(); | 93 _sink.close(); |
| 89 } | 94 } |
| 90 } | 95 } |
| 91 | 96 |
| 92 /// A conversion sink for chunked hexadecimal decoding from UTF-8 bytes. | 97 /// A conversion sink for chunked hexadecimal decoding from UTF-8 bytes. |
| 93 class _HexDecoderByteSink extends ByteConversionSinkBase { | 98 class _HexDecoderByteSink extends ByteConversionSinkBase { |
| 94 /// The underlying sink to which decoded byte arrays will be passed. | 99 /// The underlying sink to which decoded byte arrays will be passed. |
| 95 final Sink<List<int>> _sink; | 100 final Sink<List<int>> _sink; |
| 96 | 101 |
| 97 /// The trailing digit from the previous string. | 102 /// The trailing digit from the previous string. |
| 98 /// | 103 /// |
| 99 /// This will be non-`null` if the most recent string had an odd number of | 104 /// This will be non-`null` if the most recent string had an odd number of |
| 100 /// hexadecimal digits. Since it's the most significant digit, it's always a | 105 /// hexadecimal digits. Since it's the most significant digit, it's always a |
| 101 /// multiple of 16. | 106 /// multiple of 16. |
| 102 int _lastDigit; | 107 int _lastDigit; |
| 103 | 108 |
| 104 _HexDecoderByteSink(this._sink); | 109 _HexDecoderByteSink(this._sink); |
| 105 | 110 |
| 106 void add(List<int> chunk) => addSlice(chunk, 0, chunk.length, false); | 111 void add(List<int> chunk) => addSlice(chunk, 0, chunk.length, false); |
| 107 | 112 |
| 108 void addSlice(List<int> chunk, int start, int end, bool isLast) { | 113 void addSlice(List<int> chunk, int start, int end, bool isLast) { |
| 109 RangeError.checkValidRange(start, end, chunk.length); | 114 RangeError.checkValidRange(start, end, chunk.length); |
| 110 | 115 |
| 111 if (start == end) { | 116 if (start == end) { |
| 112 if (isLast) close(); | 117 if (isLast) _close(chunk, end); |
| 113 return; | 118 return; |
| 114 } | 119 } |
| 115 | 120 |
| 116 var bytes; | 121 var bytes; |
| 117 var bytesStart; | 122 var bytesStart; |
| 118 if (_lastDigit == null) { | 123 if (_lastDigit == null) { |
| 119 bytes = new Uint8List((end - start) ~/ 2); | 124 bytes = new Uint8List((end - start) ~/ 2); |
| 120 bytesStart = 0; | 125 bytesStart = 0; |
| 121 } else { | 126 } else { |
| 122 var hexPairs = (end - start - 1) ~/ 2; | 127 var hexPairs = (end - start - 1) ~/ 2; |
| 123 bytes = new Uint8List(1 + hexPairs); | 128 bytes = new Uint8List(1 + hexPairs); |
| 124 bytes[0] = _lastDigit + _digitForCodeUnit(chunk, start); | 129 bytes[0] = _lastDigit + digitForCodeUnit(chunk, start); |
| 125 start++; | 130 start++; |
| 126 bytesStart = 1; | 131 bytesStart = 1; |
| 127 } | 132 } |
| 128 | 133 |
| 129 _lastDigit = _decode(chunk, start, end, bytes, bytesStart); | 134 _lastDigit = _decode(chunk, start, end, bytes, bytesStart); |
| 130 | 135 |
| 131 _sink.add(bytes); | 136 _sink.add(bytes); |
| 132 if (isLast) close(); | 137 if (isLast) _close(chunk, end); |
| 133 } | 138 } |
| 134 | 139 |
| 135 void close() { | 140 void close() => _close(); |
| 141 |
| 142 /// Like [close], but includes [chunk] and [index] in the [FormatException] |
| 143 /// if one is thrown. |
| 144 void _close([List<int> chunk, int index]) { |
| 136 if (_lastDigit != null) { | 145 if (_lastDigit != null) { |
| 137 throw new FormatException("Invalid input length, must be even."); | 146 throw new FormatException( |
| 147 "Input ended with incomplete encoded byte.", chunk, index); |
| 138 } | 148 } |
| 139 | 149 |
| 140 _sink.close(); | 150 _sink.close(); |
| 141 } | 151 } |
| 142 } | 152 } |
| 143 | 153 |
| 144 /// Decodes [codeUnits] and writes the result into [destination]. | 154 /// Decodes [codeUnits] and writes the result into [destination]. |
| 145 /// | 155 /// |
| 146 /// This reads from [codeUnits] between [sourceStart] and [sourceEnd]. It writes | 156 /// This reads from [codeUnits] between [sourceStart] and [sourceEnd]. It writes |
| 147 /// the result into [destination] starting at [destinationStart]. | 157 /// the result into [destination] starting at [destinationStart]. |
| 148 /// | 158 /// |
| 149 /// If there's a leftover digit at the end of the decoding, this returns that | 159 /// If there's a leftover digit at the end of the decoding, this returns that |
| 150 /// digit. Otherwise it returns `null`. | 160 /// digit. Otherwise it returns `null`. |
| 151 int _decode(List<int> codeUnits, int sourceStart, int sourceEnd, | 161 int _decode(List<int> codeUnits, int sourceStart, int sourceEnd, |
| 152 List<int> destination, int destinationStart) { | 162 List<int> destination, int destinationStart) { |
| 153 var destinationIndex = destinationStart; | 163 var destinationIndex = destinationStart; |
| 154 for (var i = sourceStart; i < sourceEnd - 1; i += 2) { | 164 for (var i = sourceStart; i < sourceEnd - 1; i += 2) { |
| 155 var firstDigit = _digitForCodeUnit(codeUnits, i); | 165 var firstDigit = digitForCodeUnit(codeUnits, i); |
| 156 var secondDigit = _digitForCodeUnit(codeUnits, i + 1); | 166 var secondDigit = digitForCodeUnit(codeUnits, i + 1); |
| 157 destination[destinationIndex++] = 16 * firstDigit + secondDigit; | 167 destination[destinationIndex++] = 16 * firstDigit + secondDigit; |
| 158 } | 168 } |
| 159 | 169 |
| 160 if ((sourceEnd - sourceStart).isEven) return null; | 170 if ((sourceEnd - sourceStart).isEven) return null; |
| 161 return 16 * _digitForCodeUnit(codeUnits, sourceEnd - 1); | 171 return 16 * digitForCodeUnit(codeUnits, sourceEnd - 1); |
| 162 } | 172 } |
| 163 | |
| 164 /// Returns the digit (0 through 15) corresponding to the hexadecimal code unit | |
| 165 /// at index [i] in [codeUnits]. | |
| 166 /// | |
| 167 /// If the given code unit isn't valid hexadecimal, throws a [FormatException]. | |
| 168 int _digitForCodeUnit(List<int> codeUnits, int index) { | |
| 169 // If the code unit is a numeral, get its value. XOR works because 0 in ASCII | |
| 170 // is `0b110000` and the other numerals come after it in ascending order and | |
| 171 // take up at most four bits. | |
| 172 // | |
| 173 // We check for digits first because it ensures there's only a single branch | |
| 174 // for 10 out of 16 of the expected cases. We don't count the `digit >= 0` | |
| 175 // check because branch prediction will always work on it for valid data. | |
| 176 var codeUnit = codeUnits[index]; | |
| 177 var digit = $0 ^ codeUnit; | |
| 178 if (digit <= 9) { | |
| 179 if (digit >= 0) return digit; | |
| 180 } else { | |
| 181 // If the code unit is an uppercase letter, convert it to lowercase. This | |
| 182 // works because uppercase letters in ASCII are exactly `0b100000 = 0x20` | |
| 183 // less than lowercase letters, so if we ensure that that bit is 1 we ensure | |
| 184 // that the letter is lowercase. | |
| 185 var letter = 0x20 | codeUnit; | |
| 186 if ($a <= letter && letter <= $f) return letter - $a + 10; | |
| 187 } | |
| 188 | |
| 189 throw new FormatException( | |
| 190 "Invalid hexadecimal code unit " | |
| 191 "U+${codeUnit.toRadixString(16).padLeft(4, '0')}.", | |
| 192 codeUnits, index); | |
| 193 } | |
| OLD | NEW |