Chromium Code Reviews| Index: sdk/lib/convert/base64.dart |
| diff --git a/sdk/lib/convert/base64.dart b/sdk/lib/convert/base64.dart |
| new file mode 100644 |
| index 0000000000000000000000000000000000000000..2b5d595a897b8d0dbdccefdca3794b2b5a4b3816 |
| --- /dev/null |
| +++ b/sdk/lib/convert/base64.dart |
| @@ -0,0 +1,503 @@ |
| +// Copyright (c) 2015, the Dart project authors. Please see the AUTHORS file |
| +// for details. All rights reserved. Use of this source code is governed by a |
| +// BSD-style license that can be found in the LICENSE file. |
| + |
| +part of dart.convert; |
| + |
| +/** |
| + * An instance of [Base64Codec]. |
| + * |
| + * This instance provides a convenient access to the most common |
| + * [BASE64](https://tools.ietf.org/html/rfc4648) use cases. |
| + * |
| + * It encodes and decodes using the default alphabet and does not allow |
| + * any invalid characters in the input to decoding. |
|
sra1
2015/09/28 17:29:42
Is this encoding/decoding compatible with btoa()/a
Lasse Reichstein Nielsen
2015/09/29 10:31:04
Compatible, but not identical.
The atob conversio
|
| + * |
| + * Examples: |
| + * |
| + * var encoded = BASE64.encode([0x62, 0x6c, 0xc3, 0xa5, 0x62, 0xc3, 0xa6, |
| + * 0x72, 0x67, 0x72, 0xc3, 0xb8, 0x64]); |
| + * var decoded = BASE64.decode("YmzDpWLDpnJncsO4ZAo="); |
| + */ |
| +const Base64Codec BASE64 = const Base64Codec(); |
|
Lasse Reichstein Nielsen
2015/09/29 10:31:04
This naming is *only* for consistency. I'd prefer
|
| + |
| +/** |
| + * The default encoding alphabet. |
|
Søren Gjesse
2015/09/28 17:18:52
This is not just "the default encoding alphabet" i
Lasse Reichstein Nielsen
2015/09/29 10:31:05
Rewording.
It is the default alphabet for base64 e
|
| + */ |
| +const String _base64Alphabet = |
| + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; |
| + |
| +const int _paddingChar = 0x3d; // '='. |
| +const int _sixBitMask = 0x3F; |
| +const int _eightBitMask = 0xFF; |
| + |
| + |
| +/** |
| + * A [Base64Codec] allows encoding bytes as BASE64 strings |
| + * and decoding BASE64 string to bytes. |
| + */ |
| +class Base64Codec extends Codec<List<int>, String> { |
| + const Base64Codec(); |
| + |
| + String get name => "base64"; |
|
Søren Gjesse
2015/09/28 17:18:52
Uppercase B?
Lasse Reichstein Nielsen
2015/09/29 10:31:05
I should remove it. This is not an "Encoding", jus
|
| + |
| + Base64Encoder get encoder => const Base64Encoder(); |
| + |
| + Base64Decoder get decoder => const Base64Decoder(); |
| +} |
| + |
| +// ------------------------------------------------------------------------ |
| +// Encoder |
| +// ------------------------------------------------------------------------ |
| + |
| +class Base64Encoder extends Converter<List<int>, String> { |
| + const Base64Encoder(); |
| + |
| + String convert(List<int> input) { |
| + if (input.isEmpty) return ""; |
| + var encoder = new _Base64Encoder(); |
|
floitsch
2015/09/28 16:08:28
Up to you if you want to add a type here.
Lasse Reichstein Nielsen
2015/09/29 10:31:04
Not really worth it.
|
| + Uint8List buffer = encoder._encode(input, 0, input.length, true); |
| + return new String.fromCharCodes(buffer); |
| + } |
| + |
| + /** |
| + * Starts a chunked conversion. |
| + */ |
| + ByteConversionSink startChunkedConversion(Sink<String> sink) { |
| + if (sink is StringConversionSink) { |
| + return new _Utf8Base64EncoderSink(sink.asUtf8Sink()); |
| + } |
| + return new _AsciiBase64EncoderSink(sink); |
| + } |
| +} |
| + |
| +/** |
| + * Helper class for encoding bytes to BASE-64. |
|
floitsch
2015/09/28 16:08:28
usage is not consistent. Sometimes BASE64. Here BA
Lasse Reichstein Nielsen
2015/09/29 10:31:04
I think the name should be "base64" or "base 64 en
|
| + */ |
| +class _Base64Encoder { |
|
floitsch
2015/09/28 16:08:28
I'm not a fan of reusing a class as a mixin.
I wo
Lasse Reichstein Nielsen
2015/09/29 10:31:05
That would be pretty redundant for a private helpe
|
| + /** Intermediiate state shift of the bits stored in the state. */ |
|
floitsch
2015/09/28 16:08:28
Intermediate
/// Shift-count to extract the value
Lasse Reichstein Nielsen
2015/09/29 10:31:05
Done.
|
| + static const int _valueShift = 2; |
| + /** Intermediate state encoding of the number of bytes stored in the state. */ |
|
floitsch
2015/09/28 16:08:28
/// Mask to extract the XYZ count from the state.
Lasse Reichstein Nielsen
2015/09/29 10:31:05
Done.
|
| + static const int _countMask = 3; |
| + |
| + int _state = 0; |
|
floitsch
2015/09/28 16:08:28
Might be nice to just write getters and setters.
Lasse Reichstein Nielsen
2015/09/29 10:31:05
I use the extraction in static methods, so I added
|
| + |
| + Uint8List _getBuffer(int bufferLength) => new Uint8List(bufferLength); |
|
floitsch
2015/09/28 16:08:28
_createBuffer
I do realize that "_buffer()" in my
Lasse Reichstein Nielsen
2015/09/29 10:31:05
Yes, "buffer" as a verb is not going to work, it's
|
| + |
| + Uint8List _encode(List<int> bytes, int start, int end, bool isLast) { |
| + assert(0 <= start); |
| + assert(start <= end); |
| + assert(bytes == null || end <= bytes.length); |
| + int length = end - start; |
| + |
| + int count = _state & _countMask; |
| + int byteCount = (count + length); |
| + int fullChunks = byteCount ~/ 3; |
| + int partialChunkLength = byteCount - fullChunks * 3; |
| + int bufferLength = fullChunks * 4; |
| + if (isLast && partialChunkLength > 0) { |
| + bufferLength += 4; // Room for padding. |
| + } |
| + var output = _getBuffer(bufferLength); |
|
floitsch
2015/09/28 16:08:28
type if you want to.
|
| + _state = _encodeChunk(bytes, start, end, isLast, output, 0, _state); |
| + if (bufferLength > 0) return output; |
| + // If the input plus the data in _state is still less than three bytes, |
|
floitsch
2015/09/28 16:08:28
-still-
Lasse Reichstein Nielsen
2015/09/29 10:31:05
Done.
|
| + // there may not be any output. |
|
floitsch
2015/09/28 16:08:28
s/may not be any/is no
Lasse Reichstein Nielsen
2015/09/29 10:31:04
Unless isLast is true, in which case there is outp
|
| + return null; |
| + } |
| + |
| + static int _encodeChunk(List<int> bytes, int start, int end, bool isLast, |
| + Uint8List output, int outputIndex, int state) { |
| + int bits = state >> _valueShift; |
| + // Count number of missing bytes in three-byte chunk. |
| + int count = 3 - (state & _countMask); |
| + |
| + int byteOr = 0; |
|
floitsch
2015/09/28 16:08:28
// The input must be a list of bytes.
// All input
Lasse Reichstein Nielsen
2015/09/29 10:31:04
Done.
|
| + for (int i = start; i < end; i++) { |
| + int byte = bytes[i]; |
| + byteOr |= byte; |
| + bits = (bits << 8) | byte; |
| + count--; |
|
floitsch
2015/09/28 16:08:28
I find "count" to be misleading (but it's not too
Lasse Reichstein Nielsen
2015/09/29 10:31:05
Renamed to "expectedChars".
|
| + if (count == 0) { |
| + output[outputIndex++] = |
| + _base64Alphabet.codeUnitAt((bits >> 18) & _sixBitMask); |
| + output[outputIndex++] = |
| + _base64Alphabet.codeUnitAt((bits >> 12) & _sixBitMask); |
| + output[outputIndex++] = |
| + _base64Alphabet.codeUnitAt((bits >> 6) & _sixBitMask); |
| + output[outputIndex++] = |
| + _base64Alphabet.codeUnitAt(bits & _sixBitMask); |
| + count = 3; |
| + bits = 0; |
| + } |
| + } |
| + if (byteOr >= 0 && byteOr <= 255) { |
|
floitsch
2015/09/28 16:08:28
I almost prefer to have the error-case guarded:
i
Lasse Reichstein Nielsen
2015/09/29 10:31:04
I like to keep the non-error flow connected if pos
|
| + if (isLast && count < 3) { |
| + _writeFinalChunk(output, outputIndex, 3 - count, bits); |
| + return 0; |
| + } |
| + return (bits << _valueShift) | (3 - count); |
| + } |
| + |
| + // There was an invalid byte value somewhere in the input - find it! |
| + int i = start; |
| + while (i < end) { |
| + int byte = bytes[i]; |
| + if (byte < 0 || byte > 255) break; |
| + i++; |
| + } |
| + throw new ArgumentError.value(bytes, |
| + "Not a byte value at index $i: 0x${bytes[i].toRadixString(16)}"); |
| + } |
| + |
| + /** |
| + * Writes a final encoded four-character chunk. |
| + * |
| + * Only used when the [state] contains a partial (1 or 2 byte) |
| + * input. |
| + */ |
| + static void _writeFinalChunk(Uint8List output, int outputIndex, |
| + int count, int bits) { |
| + assert(count > 0); |
| + if (count == 1) { |
|
Søren Gjesse
2015/09/28 17:18:52
Maybe this could be
if (count == 2) {
outpu
Lasse Reichstein Nielsen
2015/09/29 10:31:04
It's >> 2, << 4 for one of the branches, so that d
|
| + output[outputIndex++] = |
| + _base64Alphabet.codeUnitAt((bits >> 2) & _sixBitMask); |
| + output[outputIndex++] = |
| + _base64Alphabet.codeUnitAt((bits << 4) & _sixBitMask); |
| + output[outputIndex++] = _paddingChar; |
| + output[outputIndex++] = _paddingChar; |
| + } else { |
| + assert(count == 2); |
| + output[outputIndex++] = |
| + _base64Alphabet.codeUnitAt((bits >> 10) & _sixBitMask); |
| + output[outputIndex++] = |
| + _base64Alphabet.codeUnitAt((bits >> 4) & _sixBitMask); |
| + output[outputIndex++] = |
| + _base64Alphabet.codeUnitAt((bits << 2) & _sixBitMask); |
| + output[outputIndex++] = _paddingChar; |
| + } |
| + } |
| +} |
| + |
| +abstract class _Base64EncoderSink extends ByteConversionSinkBase |
| + with _Base64Encoder { |
| + void add(List<int> source) { |
| + _add(source, 0, source.length, false); |
| + } |
| + |
| + void close() { |
| + _add(null, 0, 0, true); |
| + } |
| + |
| + void addSlice(List<int> source, int start, int end, bool isLast) { |
| + if (end == null) throw new ArgumentError.notNull("end"); |
| + RangeError.checkValidRange(start, end, source.length); |
| + _add(source, start, end, isLast); |
| + } |
| + |
| + void _add(List<int> source, int start, int end, bool isLast); |
| +} |
| + |
| +class _AsciiBase64EncoderSink extends _Base64EncoderSink { |
| + final ChunkedConversionSink<String> _sink; |
| + /** |
|
Søren Gjesse
2015/09/28 17:18:52
Shouldn't be dartdoc.
Lasse Reichstein Nielsen
2015/09/29 10:31:04
Why not?
An editor should still show it to you, ev
|
| + * Reused buffer. |
| + * |
| + * Since the buffer isn't released to the sink, only the string created |
| + * from it, the buffer can be reused between chunks. |
| + */ |
| + Uint8List _bufferCache; |
| + |
| + _AsciiBase64EncoderSink(this._sink); |
| + |
| + Uint8List _getBuffer(int bufferLength) { |
| + if (_bufferCache == null || _bufferCache.length < bufferLength) { |
| + _bufferCache = new Uint8List(bufferLength); |
| + } |
| + return new Uint8List.view(_bufferCache.buffer, 0, bufferLength); |
| + } |
| + |
| + void _add(List<int> source, int start, int end, bool isLast) { |
| + Uint8List buffer = _encode(source, start, end, isLast); |
| + if (buffer != null) { |
| + String string = new String.fromCharCodes(buffer); |
| + _sink.add(string); |
| + } |
| + if (isLast) { |
| + _sink.close(); |
| + } |
| + } |
| +} |
| + |
| +class _Utf8Base64EncoderSink extends _Base64EncoderSink { |
| + final ByteConversionSink _sink; |
| + _Utf8Base64EncoderSink(this._sink, [int bufferSize]) : super(bufferSize); |
| + |
| + void _add(List<int> source, int start, int end, bool isLast) { |
| + Uint8List buffer = _encode(source, start, end, isLast); |
| + if (buffer != null) { |
| + _sink.addSlice(buffer, 0, buffer.length, isLast); |
| + } |
| + } |
| +} |
| + |
| +// ------------------------------------------------------------------------ |
| +// Decoder |
| +// ------------------------------------------------------------------------ |
| + |
| +class Base64Decoder extends Converter<String, List<int>> { |
| + const Base64Decoder(); |
| + |
| + List<int> convert(String input) { |
| + if (input.isEmpty) return new Uint8List(0); |
| + int length = input.length; |
| + if (length % 4 != 0) { |
| + throw new FormatException("Invalid length, must be multiple of four", |
| + input, length); |
| + } |
| + var decoder = new _Base64Decoder(); |
|
floitsch
2015/09/28 16:08:28
type if you want.
|
| + Uint8List buffer = decoder._decode(input, 0, input.length); |
| + decoder._close(input, input.length); |
| + return buffer; |
|
sra1
2015/09/28 17:29:42
If you are OK with an unmodifiable result, the a b
Lasse Reichstein Nielsen
2015/09/29 10:31:05
I probably want it to be a Uint8List.
Still, it sh
|
| + } |
| + |
| + StringConversionSink startChunkedConversion(Sink<List<int>> sink) { |
| + return new _Base64DecoderSink(sink); |
| + } |
| +} |
| + |
| +/** |
| + * Helper class implementing BASE64 decoding with intermediate state. |
| + */ |
| +class _Base64Decoder { |
| + static const int _valueShift = 2; |
|
floitsch
2015/09/28 16:08:28
Same as for the encoder.
Lasse Reichstein Nielsen
2015/09/29 10:31:04
Done.
|
| + static const int _countMask = 3; |
| + |
| + /** Invalid character in decoding table. */ |
| + static const int _invalid = -2; |
| + /** Padding character in decoding table. */ |
| + static const int _padding = -1; |
| + |
| + // Shorthand to make the table more readable. |
| + static const int __ = _invalid; |
| + static const int _p = _padding; |
| + |
| + /** |
| + * Mapping from ASCII characters to their index in [_base64alphabet]. |
| + * |
| + * Uses -1 for invalid indices and 64 for the padding character. |
| + */ |
| + static final List<int> _inverseAlphabet = new Int8List.fromList([ |
| + __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, |
| + __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, |
| + __, __, __, __, __, __, __, __, __, __, __, 62, __, __, __, 63, |
| + 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, __, __, __, _p, __, __, |
| + __, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, |
| + 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, __, __, __, __, __, |
| + __, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, |
| + 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, __, __, __, __, __, |
| + ]); |
| + |
| + /** |
| + * Maintains the intermediate state of a partly-decoded input. |
| + * |
| + * BASE-64 is decoded in chunks of four characters. If a chunk does not |
| + * contain a full block, the decoded bits (six per character) of the |
| + * available characters are stored in [_state] until the next call to |
| + * [_decode] or [_close]. |
| + * |
| + * If no padding has been seen, the value is |
| + * `numberOfCharactersSeen | (decodedBits << 2)` |
| + * where `numberOfCharactersSeen` is between 0 and 3 and decoded bits |
| + * contains six bits per seend character. |
|
floitsch
2015/09/28 16:08:28
seen
Lasse Reichstein Nielsen
2015/09/29 10:31:04
Done.
|
| + * |
| + * If padding has been seen the value is negative. It's the bitwise negation |
| + * of the number of remanining allowed padding characters (always ~0 or ~1). |
| + * |
| + * A state of `0` or `~0` are valid places to end decoding, all other values |
| + * means that a four-character block has not been completed. |
|
floitsch
2015/09/28 16:08:28
mean
Lasse Reichstein Nielsen
2015/09/29 10:31:04
Done.
|
| + */ |
| + int _state = 0; |
| + |
| + /** |
| + * Decodes [input] from [start] to [end]. |
| + * |
| + * Returns a [Uint8List] with the decoded bytes. |
| + * If a previous call had an incomplete four-character block, the bits from |
| + * those are included in decoding |
| + */ |
| + Uint8List _decode(String input, int start, int end) { |
| + assert(0 <= start); |
| + assert(start <= end); |
| + assert(end <= input.length); |
| + if (_state < 0) { |
| + _state = _checkPadding(input, start, end, _state); |
| + return null; |
| + } |
| + if (start == end) return new Uint8List(0); |
| + Uint8List buffer = _allocateBuffer(input, start, end, _state); |
| + _state = _decodeChunk(input, start, end, buffer, 0, _state); |
| + return buffer; |
| + } |
| + |
| + /** Checks that [state] represents a valid decoding. */ |
| + void _close(String input, int end) { |
| + if (_state < ~0) { |
| + throw new FormatException("Missing padding character", input, end); |
| + } |
| + if (_state > 0) { |
| + throw new FormatException("Invalid length, must be multiple of four", |
| + input, end); |
| + } |
| + _state = ~0; |
| + } |
| + |
| + /** |
| + * Decodes [input] from [start] to [end]. |
| + * |
| + * Includes the state returned by a previous call in the decoding. |
| + * Writes the decoding to [output] at [outIndex], and there must |
| + * be room in the output. |
| + */ |
| + static int _decodeChunk(String input, int start, int end, |
| + Uint8List output, int outIndex, |
| + int state) { |
| + const int asciiMask = 127; |
| + const int asciiMax = 127; |
| + int bits = state >> _valueShift; |
| + int count = state & _countMask; |
| + int charOr = 0; |
|
floitsch
2015/09/28 16:08:28
Add comment, what the charOr is for.
Lasse Reichstein Nielsen
2015/09/29 10:31:04
Done.
|
| + for (int i = start; i < end; i++) { |
| + var char = input.codeUnitAt(i); |
|
floitsch
2015/09/28 16:08:28
type if you want.
|
| + charOr |= char; |
| + int code = _inverseAlphabet[char & asciiMask]; |
| + if (code >= 0) { |
| + bits = ((bits << 6) | code); |
|
floitsch
2015/09/28 16:08:28
Add & to make it easier for the VM to optimize.
floitsch
2015/09/28 16:08:28
magic "6".
Lasse Reichstein Nielsen
2015/09/29 10:31:04
Done.
Lasse Reichstein Nielsen
2015/09/29 10:31:04
named it "bitsPerCharacter".
|
| + count = (count + 1) & 3; |
| + if (count == 0) { |
| + assert(outIndex + 3 <= output.length); |
| + output[outIndex++] = (bits >> 16) & _eightBitMask; |
| + output[outIndex++] = (bits >> 8) & _eightBitMask; |
| + output[outIndex++] = bits & _eightBitMask; |
| + bits = 0; |
| + } |
| + continue; |
| + } else if (code == _padding && count > 1) { |
| + if (count == 3) { |
| + if ((bits & 0x03) != 0) { |
| + throw new FormatException( |
| + "Invalid encoding before padding", input, i); |
| + } |
| + output[outIndex++] = bits >> 10; |
| + output[outIndex++] = bits >> 2; |
| + } else { |
| + if ((bits & 0x0F) != 0) { |
| + throw new FormatException( |
| + "Invalid encoding before padding", input, i); |
| + } |
| + output[outIndex++] = bits >> 4; |
| + } |
| + int expectedPadding = 3 - count; |
| + state = _checkPadding(input, i + 1, end, ~expectedPadding); |
| + return state; |
| + } |
| + throw new FormatException("Invalid character", input, i); |
| + } |
| + if (charOr >= 0 && charOr <= asciiMax) { |
|
floitsch
2015/09/28 16:08:28
Same as for the encoder.
Lasse Reichstein Nielsen
2015/09/29 10:31:04
Same answer.
|
| + return (bits << _valueShift) | count; |
| + } |
| + // There is an invalid (non-ASCII) character in the input. |
| + int i; |
| + for (i = start; i < end; i++) { |
| + int char = input.codeUnitAt(i); |
| + if (char < 0 || char > asciiMax) break; |
| + } |
| + throw new FormatException("Invalid character", input, i); |
| + } |
| + |
| + /** |
| + * Allocates a buffer with room for the decoding of a substring of [input]. |
| + * |
| + * Includes room for the characters in [state], and handles padding correctly. |
| + */ |
| + static Uint8List _allocateBuffer(String input, int start, int end, |
| + int state) { |
| + assert(state >= 0); |
| + int padding = 0; |
| + int length = (state & _countMask) + (end - start); |
| + if (end > start && input.codeUnitAt(end - 1) == _paddingChar) { |
| + padding++; |
| + if (end - 1 > start && input.codeUnitAt(end - 2) == _paddingChar) { |
| + padding++; |
| + } |
| + } |
| + // Three bytes per full four bytes in the input. |
| + int bufferLength = (length >> 2) * 3; |
| + // If padding was seen, then remove the padding if it was counter, or |
| + // add the last partial chunk it it wasn't counted. |
| + int remainderLength = length & 3; |
| + if (remainderLength == 0) { |
| + bufferLength -= padding; |
| + } else if (padding != 0 && remainderLength - padding > 1) { |
| + bufferLength += remainderLength - 1 - padding; |
| + } |
| + if (bufferLength > 0) return new Uint8List(bufferLength); |
| + // If the input plus state is still less than four characters, no buffer |
| + // is needed. |
| + return null; |
| + } |
| + |
| + /** |
| + * Check that the remainder of the string is valid padding. |
| + * |
| + * That means zero or one padding character (depending on [_state]) |
| + * and nothing else. |
| + */ |
| + static int _checkPadding(String input, int start, int end, int state) { |
| + assert(state < 0); |
| + if (start == end) return state; |
| + int expectedPadding = ~state; |
| + if (expectedPadding > 0) { |
| + int firstChar = input.codeUnitAt(start); |
| + if (firstChar != _paddingChar) { |
| + throw new FormatException("Missing padding character", string, start); |
| + } |
| + state = ~0; |
| + start++; |
| + } |
| + if (start != end) { |
| + throw new FormatException("Invalid character after padding", |
| + input, start); |
| + } |
| + return state; |
| + } |
| +} |
| + |
| +class _Base64DecoderSink extends StringConversionSinkBase with _Base64Decoder { |
| + /** Output sink */ |
| + final ChunkedConversionSink<List<int>> _sink; |
| + |
| + _Base64DecoderSink(this._sink); |
| + |
| + void add(String string) { |
| + if (string.isEmpty) return; |
| + Uint8List buffer = _decode(string, 0, string.length); |
| + if (buffer != null) _sink.add(buffer); |
| + } |
| + |
| + void close() { |
| + _close(null, null); |
| + _sink.close(); |
| + } |
| + |
| + void addSlice(String string, int start, int end, bool isLast) { |
| + end = RangeError.checkValidRange(start, end, string.length); |
| + if (start == end) return; |
| + Uint8List buffer = _decode(string, start, end); |
| + if (buffer != null) _sink.add(buffer); |
| + if (isLast) { |
| + _close(string, end); |
| + _sink.close(); |
| + } |
| + } |
| +} |