sdk/lib/convert/base64.dart - Issue 1370073002: Add Base64 codec to dart:convert.

Unified Diff: sdk/lib/convert/base64.dart

Issue 1370073002: Add Base64 codec to dart:convert. (Closed) Base URL: https://github.com/dart-lang/sdk.git@master

Patch Set: Created 5 years, 3 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Index: sdk/lib/convert/base64.dart

diff --git a/sdk/lib/convert/base64.dart b/sdk/lib/convert/base64.dart

new file mode 100644

index 0000000000000000000000000000000000000000..2b5d595a897b8d0dbdccefdca3794b2b5a4b3816

--- /dev/null

+++ b/sdk/lib/convert/base64.dart

@@ -0,0 +1,503 @@

+// BSD-style license that can be found in the LICENSE file.

+part of dart.convert;

+/**

+ * An instance of [Base64Codec].

+ *

+ * This instance provides a convenient access to the most common

+ * [BASE64](https://tools.ietf.org/html/rfc4648) use cases.

+ *

+ * It encodes and decodes using the default alphabet and does not allow

+ * any invalid characters in the input to decoding.

sra1 2015/09/28 17:29:42 Is this encoding/decoding compatible with btoa()/a

Lasse Reichstein Nielsen 2015/09/29 10:31:04 Compatible, but not identical. The atob conversio

+ *

+ * Examples:

+ *

+ * var encoded = BASE64.encode([0x62, 0x6c, 0xc3, 0xa5, 0x62, 0xc3, 0xa6,

+ * 0x72, 0x67, 0x72, 0xc3, 0xb8, 0x64]);

+ * var decoded = BASE64.decode("YmzDpWLDpnJncsO4ZAo=");

+ */

+const Base64Codec BASE64 = const Base64Codec();

Lasse Reichstein Nielsen 2015/09/29 10:31:04 This naming is *only* for consistency. I'd prefer

+/**

+ * The default encoding alphabet.

Søren Gjesse 2015/09/28 17:18:52 This is not just "the default encoding alphabet" i

Lasse Reichstein Nielsen 2015/09/29 10:31:05 Rewording. It is the default alphabet for base64 e

+ */

+const String _base64Alphabet =

+ "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";

+const int _paddingChar = 0x3d; // '='.

+const int _sixBitMask = 0x3F;

+const int _eightBitMask = 0xFF;

+/**

+ * A [Base64Codec] allows encoding bytes as BASE64 strings

+ * and decoding BASE64 string to bytes.

+ */

+class Base64Codec extends Codec<List<int>, String> {

+ const Base64Codec();

+ String get name => "base64";

Søren Gjesse 2015/09/28 17:18:52 Uppercase B?

Lasse Reichstein Nielsen 2015/09/29 10:31:05 I should remove it. This is not an "Encoding", jus

+ Base64Encoder get encoder => const Base64Encoder();

+ Base64Decoder get decoder => const Base64Decoder();

+// ------------------------------------------------------------------------

+// Encoder

+// ------------------------------------------------------------------------

+class Base64Encoder extends Converter<List<int>, String> {

+ const Base64Encoder();

+ String convert(List<int> input) {

+ if (input.isEmpty) return "";

+ var encoder = new _Base64Encoder();

floitsch 2015/09/28 16:08:28 Up to you if you want to add a type here.

Lasse Reichstein Nielsen 2015/09/29 10:31:04 Not really worth it.

+ Uint8List buffer = encoder._encode(input, 0, input.length, true);

+ return new String.fromCharCodes(buffer);

+ }

+ /**

+ * Starts a chunked conversion.

+ */

+ ByteConversionSink startChunkedConversion(Sink<String> sink) {

+ if (sink is StringConversionSink) {

+ return new _Utf8Base64EncoderSink(sink.asUtf8Sink());

+ }

+ return new _AsciiBase64EncoderSink(sink);

+ }

+/**

+ * Helper class for encoding bytes to BASE-64.

floitsch 2015/09/28 16:08:28 usage is not consistent. Sometimes BASE64. Here BA

Lasse Reichstein Nielsen 2015/09/29 10:31:04 I think the name should be "base64" or "base 64 en

+ */

+class _Base64Encoder {

floitsch 2015/09/28 16:08:28 I'm not a fan of reusing a class as a mixin. I wo

Lasse Reichstein Nielsen 2015/09/29 10:31:05 That would be pretty redundant for a private helpe

+ /** Intermediiate state shift of the bits stored in the state. */

floitsch 2015/09/28 16:08:28 Intermediate /// Shift-count to extract the value

Lasse Reichstein Nielsen 2015/09/29 10:31:05 Done.

+ static const int _valueShift = 2;

+ /** Intermediate state encoding of the number of bytes stored in the state. */

floitsch 2015/09/28 16:08:28 /// Mask to extract the XYZ count from the state.

Lasse Reichstein Nielsen 2015/09/29 10:31:05 Done.

+ static const int _countMask = 3;

+ int _state = 0;

floitsch 2015/09/28 16:08:28 Might be nice to just write getters and setters.

Lasse Reichstein Nielsen 2015/09/29 10:31:05 I use the extraction in static methods, so I added

+ Uint8List _getBuffer(int bufferLength) => new Uint8List(bufferLength);

floitsch 2015/09/28 16:08:28 _createBuffer I do realize that "_buffer()" in my

Lasse Reichstein Nielsen 2015/09/29 10:31:05 Yes, "buffer" as a verb is not going to work, it's

+ Uint8List _encode(List<int> bytes, int start, int end, bool isLast) {

+ assert(0 <= start);

+ assert(start <= end);

+ assert(bytes == null || end <= bytes.length);

+ int length = end - start;

+ int count = _state & _countMask;

+ int byteCount = (count + length);

+ int fullChunks = byteCount ~/ 3;

+ int partialChunkLength = byteCount - fullChunks * 3;

+ int bufferLength = fullChunks * 4;

+ if (isLast && partialChunkLength > 0) {

+ bufferLength += 4; // Room for padding.

+ }

+ var output = _getBuffer(bufferLength);

floitsch 2015/09/28 16:08:28 type if you want to.

+ _state = _encodeChunk(bytes, start, end, isLast, output, 0, _state);

+ if (bufferLength > 0) return output;

+ // If the input plus the data in _state is still less than three bytes,

floitsch 2015/09/28 16:08:28 -still-

Lasse Reichstein Nielsen 2015/09/29 10:31:05 Done.

+ // there may not be any output.

floitsch 2015/09/28 16:08:28 s/may not be any/is no

Lasse Reichstein Nielsen 2015/09/29 10:31:04 Unless isLast is true, in which case there is outp

+ return null;

+ }

+ static int _encodeChunk(List<int> bytes, int start, int end, bool isLast,

+ Uint8List output, int outputIndex, int state) {

+ int bits = state >> _valueShift;

+ // Count number of missing bytes in three-byte chunk.

+ int count = 3 - (state & _countMask);

+ int byteOr = 0;

floitsch 2015/09/28 16:08:28 // The input must be a list of bytes. // All input

Lasse Reichstein Nielsen 2015/09/29 10:31:04 Done.

+ for (int i = start; i < end; i++) {

+ int byte = bytes[i];

+ byteOr |= byte;

+ bits = (bits << 8) | byte;

+ count--;

floitsch 2015/09/28 16:08:28 I find "count" to be misleading (but it's not too

Lasse Reichstein Nielsen 2015/09/29 10:31:05 Renamed to "expectedChars".

+ if (count == 0) {

+ output[outputIndex++] =

+ _base64Alphabet.codeUnitAt((bits >> 18) & _sixBitMask);

+ output[outputIndex++] =

+ _base64Alphabet.codeUnitAt((bits >> 12) & _sixBitMask);

+ output[outputIndex++] =

+ _base64Alphabet.codeUnitAt((bits >> 6) & _sixBitMask);

+ output[outputIndex++] =

+ _base64Alphabet.codeUnitAt(bits & _sixBitMask);

+ count = 3;

+ bits = 0;

+ }

+ if (byteOr >= 0 && byteOr <= 255) {

floitsch 2015/09/28 16:08:28 I almost prefer to have the error-case guarded: i

Lasse Reichstein Nielsen 2015/09/29 10:31:04 I like to keep the non-error flow connected if pos

+ if (isLast && count < 3) {

+ _writeFinalChunk(output, outputIndex, 3 - count, bits);

+ return 0;

+ }

+ return (bits << _valueShift) | (3 - count);

+ }

+ // There was an invalid byte value somewhere in the input - find it!

+ int i = start;

+ while (i < end) {

+ int byte = bytes[i];

+ if (byte < 0 || byte > 255) break;

+ i++;

+ }

+ throw new ArgumentError.value(bytes,

+ "Not a byte value at index $i: 0x${bytes[i].toRadixString(16)}");

+ }

+ /**

+ * Writes a final encoded four-character chunk.

+ *

+ * Only used when the [state] contains a partial (1 or 2 byte)

+ * input.

+ */

+ static void _writeFinalChunk(Uint8List output, int outputIndex,

+ int count, int bits) {

+ assert(count > 0);

+ if (count == 1) {

Søren Gjesse 2015/09/28 17:18:52 Maybe this could be if (count == 2) { outpu

Lasse Reichstein Nielsen 2015/09/29 10:31:04 It's >> 2, << 4 for one of the branches, so that d

+ output[outputIndex++] =

+ _base64Alphabet.codeUnitAt((bits >> 2) & _sixBitMask);

+ output[outputIndex++] =

+ _base64Alphabet.codeUnitAt((bits << 4) & _sixBitMask);

+ output[outputIndex++] = _paddingChar;

+ } else {

+ assert(count == 2);

+ output[outputIndex++] =

+ _base64Alphabet.codeUnitAt((bits >> 10) & _sixBitMask);

+ output[outputIndex++] =

+ _base64Alphabet.codeUnitAt((bits >> 4) & _sixBitMask);

+ output[outputIndex++] =

+ _base64Alphabet.codeUnitAt((bits << 2) & _sixBitMask);

+ output[outputIndex++] = _paddingChar;

+ }

+abstract class _Base64EncoderSink extends ByteConversionSinkBase

+ with _Base64Encoder {

+ void add(List<int> source) {

+ _add(source, 0, source.length, false);

+ }

+ void close() {

+ _add(null, 0, 0, true);

+ }

+ void addSlice(List<int> source, int start, int end, bool isLast) {

+ if (end == null) throw new ArgumentError.notNull("end");

+ RangeError.checkValidRange(start, end, source.length);

+ _add(source, start, end, isLast);

+ }

+ void _add(List<int> source, int start, int end, bool isLast);

+class _AsciiBase64EncoderSink extends _Base64EncoderSink {

+ final ChunkedConversionSink<String> _sink;

+ /**

Søren Gjesse 2015/09/28 17:18:52 Shouldn't be dartdoc.

Lasse Reichstein Nielsen 2015/09/29 10:31:04 Why not? An editor should still show it to you, ev

+ * Reused buffer.

+ *

+ * Since the buffer isn't released to the sink, only the string created

+ * from it, the buffer can be reused between chunks.

+ */

+ Uint8List _bufferCache;

+ _AsciiBase64EncoderSink(this._sink);

+ Uint8List _getBuffer(int bufferLength) {

+ if (_bufferCache == null || _bufferCache.length < bufferLength) {

+ _bufferCache = new Uint8List(bufferLength);

+ }

+ return new Uint8List.view(_bufferCache.buffer, 0, bufferLength);

+ }

+ void _add(List<int> source, int start, int end, bool isLast) {

+ Uint8List buffer = _encode(source, start, end, isLast);

+ if (buffer != null) {

+ String string = new String.fromCharCodes(buffer);

+ _sink.add(string);

+ }

+ if (isLast) {

+ _sink.close();

+ }

+class _Utf8Base64EncoderSink extends _Base64EncoderSink {

+ final ByteConversionSink _sink;

+ _Utf8Base64EncoderSink(this._sink, [int bufferSize]) : super(bufferSize);

+ void _add(List<int> source, int start, int end, bool isLast) {

+ Uint8List buffer = _encode(source, start, end, isLast);

+ if (buffer != null) {

+ _sink.addSlice(buffer, 0, buffer.length, isLast);

+ }

+// ------------------------------------------------------------------------

+// Decoder

+// ------------------------------------------------------------------------

+class Base64Decoder extends Converter<String, List<int>> {

+ const Base64Decoder();

+ List<int> convert(String input) {

+ if (input.isEmpty) return new Uint8List(0);

+ int length = input.length;

+ if (length % 4 != 0) {

+ throw new FormatException("Invalid length, must be multiple of four",

+ input, length);

+ }

+ var decoder = new _Base64Decoder();

floitsch 2015/09/28 16:08:28 type if you want.

+ Uint8List buffer = decoder._decode(input, 0, input.length);

+ decoder._close(input, input.length);

+ return buffer;

sra1 2015/09/28 17:29:42 If you are OK with an unmodifiable result, the a b

Lasse Reichstein Nielsen 2015/09/29 10:31:05 I probably want it to be a Uint8List. Still, it sh

+ }

+ StringConversionSink startChunkedConversion(Sink<List<int>> sink) {

+ return new _Base64DecoderSink(sink);

+ }

+/**

+ * Helper class implementing BASE64 decoding with intermediate state.

+ */

+class _Base64Decoder {

+ static const int _valueShift = 2;

floitsch 2015/09/28 16:08:28 Same as for the encoder.

Lasse Reichstein Nielsen 2015/09/29 10:31:04 Done.

+ static const int _countMask = 3;

+ /** Invalid character in decoding table. */

+ static const int _invalid = -2;

+ /** Padding character in decoding table. */

+ static const int _padding = -1;

+ // Shorthand to make the table more readable.

+ static const int __ = _invalid;

+ static const int _p = _padding;

+ /**

+ * Mapping from ASCII characters to their index in [_base64alphabet].

+ *

+ * Uses -1 for invalid indices and 64 for the padding character.

+ */

+ static final List<int> _inverseAlphabet = new Int8List.fromList([

+ __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __,

+ __, __, __, __, __, __, __, __, __, __, __, 62, __, __, __, 63,

+ 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, __, __, __, _p, __, __,

+ __, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,

+ 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, __, __, __, __, __,

+ __, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,

+ 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, __, __, __, __, __,

+ ]);

+ /**

+ * Maintains the intermediate state of a partly-decoded input.

+ *

+ * BASE-64 is decoded in chunks of four characters. If a chunk does not

+ * contain a full block, the decoded bits (six per character) of the

+ * available characters are stored in [_state] until the next call to

+ * [_decode] or [_close].

+ *

+ * If no padding has been seen, the value is

+ * `numberOfCharactersSeen | (decodedBits << 2)`

+ * where `numberOfCharactersSeen` is between 0 and 3 and decoded bits

+ * contains six bits per seend character.

floitsch 2015/09/28 16:08:28 seen

Lasse Reichstein Nielsen 2015/09/29 10:31:04 Done.

+ *

+ * If padding has been seen the value is negative. It's the bitwise negation

+ * of the number of remanining allowed padding characters (always ~0 or ~1).

+ *

+ * A state of `0` or `~0` are valid places to end decoding, all other values

+ * means that a four-character block has not been completed.

floitsch 2015/09/28 16:08:28 mean

Lasse Reichstein Nielsen 2015/09/29 10:31:04 Done.

+ */

+ int _state = 0;

+ /**

+ * Decodes [input] from [start] to [end].

+ *

+ * Returns a [Uint8List] with the decoded bytes.

+ * If a previous call had an incomplete four-character block, the bits from

+ * those are included in decoding

+ */

+ Uint8List _decode(String input, int start, int end) {

+ assert(0 <= start);

+ assert(start <= end);

+ assert(end <= input.length);

+ if (_state < 0) {

+ _state = _checkPadding(input, start, end, _state);

+ return null;

+ }

+ if (start == end) return new Uint8List(0);

+ Uint8List buffer = _allocateBuffer(input, start, end, _state);

+ _state = _decodeChunk(input, start, end, buffer, 0, _state);

+ return buffer;

+ }

+ /** Checks that [state] represents a valid decoding. */

+ void _close(String input, int end) {

+ if (_state < ~0) {

+ throw new FormatException("Missing padding character", input, end);

+ }

+ if (_state > 0) {

+ throw new FormatException("Invalid length, must be multiple of four",

+ input, end);

+ }

+ _state = ~0;

+ }

+ /**

+ * Decodes [input] from [start] to [end].

+ *

+ * Includes the state returned by a previous call in the decoding.

+ * Writes the decoding to [output] at [outIndex], and there must

+ * be room in the output.

+ */

+ static int _decodeChunk(String input, int start, int end,

+ Uint8List output, int outIndex,

+ int state) {

+ const int asciiMask = 127;

+ const int asciiMax = 127;

+ int bits = state >> _valueShift;

+ int count = state & _countMask;

+ int charOr = 0;

floitsch 2015/09/28 16:08:28 Add comment, what the charOr is for.

Lasse Reichstein Nielsen 2015/09/29 10:31:04 Done.

+ for (int i = start; i < end; i++) {

+ var char = input.codeUnitAt(i);

floitsch 2015/09/28 16:08:28 type if you want.

+ charOr |= char;

+ int code = _inverseAlphabet[char & asciiMask];

+ if (code >= 0) {

+ bits = ((bits << 6) | code);

floitsch 2015/09/28 16:08:28 Add & to make it easier for the VM to optimize.

floitsch 2015/09/28 16:08:28 magic "6".

Lasse Reichstein Nielsen 2015/09/29 10:31:04 Done.

Lasse Reichstein Nielsen 2015/09/29 10:31:04 named it "bitsPerCharacter".

+ count = (count + 1) & 3;

+ if (count == 0) {

+ assert(outIndex + 3 <= output.length);

+ output[outIndex++] = (bits >> 16) & _eightBitMask;

+ output[outIndex++] = (bits >> 8) & _eightBitMask;

+ output[outIndex++] = bits & _eightBitMask;

+ bits = 0;

+ }

+ continue;

+ } else if (code == _padding && count > 1) {

+ if (count == 3) {

+ if ((bits & 0x03) != 0) {

+ throw new FormatException(

+ "Invalid encoding before padding", input, i);

+ }

+ output[outIndex++] = bits >> 10;

+ output[outIndex++] = bits >> 2;

+ } else {

+ if ((bits & 0x0F) != 0) {

+ throw new FormatException(

+ "Invalid encoding before padding", input, i);

+ }

+ output[outIndex++] = bits >> 4;

+ }

+ int expectedPadding = 3 - count;

+ state = _checkPadding(input, i + 1, end, ~expectedPadding);

+ return state;

+ }

+ throw new FormatException("Invalid character", input, i);

+ }

+ if (charOr >= 0 && charOr <= asciiMax) {

floitsch 2015/09/28 16:08:28 Same as for the encoder.

Lasse Reichstein Nielsen 2015/09/29 10:31:04 Same answer.

+ return (bits << _valueShift) | count;

+ }

+ // There is an invalid (non-ASCII) character in the input.

+ int i;

+ for (i = start; i < end; i++) {

+ int char = input.codeUnitAt(i);

+ if (char < 0 || char > asciiMax) break;

+ }

+ throw new FormatException("Invalid character", input, i);

+ }

+ /**

+ * Allocates a buffer with room for the decoding of a substring of [input].

+ *

+ * Includes room for the characters in [state], and handles padding correctly.

+ */

+ static Uint8List _allocateBuffer(String input, int start, int end,

+ int state) {

+ assert(state >= 0);

+ int padding = 0;

+ int length = (state & _countMask) + (end - start);

+ if (end > start && input.codeUnitAt(end - 1) == _paddingChar) {

+ padding++;

+ if (end - 1 > start && input.codeUnitAt(end - 2) == _paddingChar) {

+ padding++;

+ }

+ // Three bytes per full four bytes in the input.

+ int bufferLength = (length >> 2) * 3;

+ // If padding was seen, then remove the padding if it was counter, or

+ // add the last partial chunk it it wasn't counted.

+ int remainderLength = length & 3;

+ if (remainderLength == 0) {

+ bufferLength -= padding;

+ } else if (padding != 0 && remainderLength - padding > 1) {

+ bufferLength += remainderLength - 1 - padding;

+ }

+ if (bufferLength > 0) return new Uint8List(bufferLength);

+ // If the input plus state is still less than four characters, no buffer

+ // is needed.

+ return null;

+ }

+ /**

+ * Check that the remainder of the string is valid padding.

+ *

+ * That means zero or one padding character (depending on [_state])

+ * and nothing else.

+ */

+ static int _checkPadding(String input, int start, int end, int state) {

+ assert(state < 0);

+ if (start == end) return state;

+ int expectedPadding = ~state;

+ if (expectedPadding > 0) {

+ int firstChar = input.codeUnitAt(start);

+ if (firstChar != _paddingChar) {

+ throw new FormatException("Missing padding character", string, start);

+ }

+ state = ~0;

+ start++;

+ }

+ if (start != end) {

+ throw new FormatException("Invalid character after padding",

+ input, start);

+ }

+ return state;

+ }

+class _Base64DecoderSink extends StringConversionSinkBase with _Base64Decoder {

+ /** Output sink */

+ final ChunkedConversionSink<List<int>> _sink;

+ _Base64DecoderSink(this._sink);

+ void add(String string) {

+ if (string.isEmpty) return;

+ Uint8List buffer = _decode(string, 0, string.length);

+ if (buffer != null) _sink.add(buffer);

+ }

+ void close() {

+ _close(null, null);

+ _sink.close();

+ }

+ void addSlice(String string, int start, int end, bool isLast) {

+ end = RangeError.checkValidRange(start, end, string.length);

+ if (start == end) return;

+ Uint8List buffer = _decode(string, start, end);

+ if (buffer != null) _sink.add(buffer);

+ if (isLast) {

+ _close(string, end);

+ _sink.close();

+ }

« no previous file with comments | « pkg/expect/lib/expect.dart ('k') | sdk/lib/convert/convert.dart » ('j') | no next file with comments »