Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(168)

Side by Side Diff: sdk/lib/convert/base64.dart

Issue 1370073002: Add Base64 codec to dart:convert. (Closed) Base URL: https://github.com/dart-lang/sdk.git@master
Patch Set: Created 5 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « pkg/expect/lib/expect.dart ('k') | sdk/lib/convert/convert.dart » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 // Copyright (c) 2015, the Dart project authors. Please see the AUTHORS file
2 // for details. All rights reserved. Use of this source code is governed by a
3 // BSD-style license that can be found in the LICENSE file.
4
5 part of dart.convert;
6
7 /**
8 * An instance of [Base64Codec].
9 *
10 * This instance provides a convenient access to the most common
11 * [BASE64](https://tools.ietf.org/html/rfc4648) use cases.
12 *
13 * It encodes and decodes using the default alphabet and does not allow
14 * any invalid characters in the input to decoding.
sra1 2015/09/28 17:29:42 Is this encoding/decoding compatible with btoa()/a
Lasse Reichstein Nielsen 2015/09/29 10:31:04 Compatible, but not identical. The atob conversio
15 *
16 * Examples:
17 *
18 * var encoded = BASE64.encode([0x62, 0x6c, 0xc3, 0xa5, 0x62, 0xc3, 0xa6,
19 * 0x72, 0x67, 0x72, 0xc3, 0xb8, 0x64]);
20 * var decoded = BASE64.decode("YmzDpWLDpnJncsO4ZAo=");
21 */
22 const Base64Codec BASE64 = const Base64Codec();
Lasse Reichstein Nielsen 2015/09/29 10:31:04 This naming is *only* for consistency. I'd prefer
23
24 /**
25 * The default encoding alphabet.
Søren Gjesse 2015/09/28 17:18:52 This is not just "the default encoding alphabet" i
Lasse Reichstein Nielsen 2015/09/29 10:31:05 Rewording. It is the default alphabet for base64 e
26 */
27 const String _base64Alphabet =
28 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
29
30 const int _paddingChar = 0x3d; // '='.
31 const int _sixBitMask = 0x3F;
32 const int _eightBitMask = 0xFF;
33
34
35 /**
36 * A [Base64Codec] allows encoding bytes as BASE64 strings
37 * and decoding BASE64 string to bytes.
38 */
39 class Base64Codec extends Codec<List<int>, String> {
40 const Base64Codec();
41
42 String get name => "base64";
Søren Gjesse 2015/09/28 17:18:52 Uppercase B?
Lasse Reichstein Nielsen 2015/09/29 10:31:05 I should remove it. This is not an "Encoding", jus
43
44 Base64Encoder get encoder => const Base64Encoder();
45
46 Base64Decoder get decoder => const Base64Decoder();
47 }
48
49 // ------------------------------------------------------------------------
50 // Encoder
51 // ------------------------------------------------------------------------
52
53 class Base64Encoder extends Converter<List<int>, String> {
54 const Base64Encoder();
55
56 String convert(List<int> input) {
57 if (input.isEmpty) return "";
58 var encoder = new _Base64Encoder();
floitsch 2015/09/28 16:08:28 Up to you if you want to add a type here.
Lasse Reichstein Nielsen 2015/09/29 10:31:04 Not really worth it.
59 Uint8List buffer = encoder._encode(input, 0, input.length, true);
60 return new String.fromCharCodes(buffer);
61 }
62
63 /**
64 * Starts a chunked conversion.
65 */
66 ByteConversionSink startChunkedConversion(Sink<String> sink) {
67 if (sink is StringConversionSink) {
68 return new _Utf8Base64EncoderSink(sink.asUtf8Sink());
69 }
70 return new _AsciiBase64EncoderSink(sink);
71 }
72 }
73
74 /**
75 * Helper class for encoding bytes to BASE-64.
floitsch 2015/09/28 16:08:28 usage is not consistent. Sometimes BASE64. Here BA
Lasse Reichstein Nielsen 2015/09/29 10:31:04 I think the name should be "base64" or "base 64 en
76 */
77 class _Base64Encoder {
floitsch 2015/09/28 16:08:28 I'm not a fan of reusing a class as a mixin. I wo
Lasse Reichstein Nielsen 2015/09/29 10:31:05 That would be pretty redundant for a private helpe
78 /** Intermediiate state shift of the bits stored in the state. */
floitsch 2015/09/28 16:08:28 Intermediate /// Shift-count to extract the value
Lasse Reichstein Nielsen 2015/09/29 10:31:05 Done.
79 static const int _valueShift = 2;
80 /** Intermediate state encoding of the number of bytes stored in the state. */
floitsch 2015/09/28 16:08:28 /// Mask to extract the XYZ count from the state.
Lasse Reichstein Nielsen 2015/09/29 10:31:05 Done.
81 static const int _countMask = 3;
82
83 int _state = 0;
floitsch 2015/09/28 16:08:28 Might be nice to just write getters and setters.
Lasse Reichstein Nielsen 2015/09/29 10:31:05 I use the extraction in static methods, so I added
84
85 Uint8List _getBuffer(int bufferLength) => new Uint8List(bufferLength);
floitsch 2015/09/28 16:08:28 _createBuffer I do realize that "_buffer()" in my
Lasse Reichstein Nielsen 2015/09/29 10:31:05 Yes, "buffer" as a verb is not going to work, it's
86
87 Uint8List _encode(List<int> bytes, int start, int end, bool isLast) {
88 assert(0 <= start);
89 assert(start <= end);
90 assert(bytes == null || end <= bytes.length);
91 int length = end - start;
92
93 int count = _state & _countMask;
94 int byteCount = (count + length);
95 int fullChunks = byteCount ~/ 3;
96 int partialChunkLength = byteCount - fullChunks * 3;
97 int bufferLength = fullChunks * 4;
98 if (isLast && partialChunkLength > 0) {
99 bufferLength += 4; // Room for padding.
100 }
101 var output = _getBuffer(bufferLength);
floitsch 2015/09/28 16:08:28 type if you want to.
102 _state = _encodeChunk(bytes, start, end, isLast, output, 0, _state);
103 if (bufferLength > 0) return output;
104 // If the input plus the data in _state is still less than three bytes,
floitsch 2015/09/28 16:08:28 -still-
Lasse Reichstein Nielsen 2015/09/29 10:31:05 Done.
105 // there may not be any output.
floitsch 2015/09/28 16:08:28 s/may not be any/is no
Lasse Reichstein Nielsen 2015/09/29 10:31:04 Unless isLast is true, in which case there is outp
106 return null;
107 }
108
109 static int _encodeChunk(List<int> bytes, int start, int end, bool isLast,
110 Uint8List output, int outputIndex, int state) {
111 int bits = state >> _valueShift;
112 // Count number of missing bytes in three-byte chunk.
113 int count = 3 - (state & _countMask);
114
115 int byteOr = 0;
floitsch 2015/09/28 16:08:28 // The input must be a list of bytes. // All input
Lasse Reichstein Nielsen 2015/09/29 10:31:04 Done.
116 for (int i = start; i < end; i++) {
117 int byte = bytes[i];
118 byteOr |= byte;
119 bits = (bits << 8) | byte;
120 count--;
floitsch 2015/09/28 16:08:28 I find "count" to be misleading (but it's not too
Lasse Reichstein Nielsen 2015/09/29 10:31:05 Renamed to "expectedChars".
121 if (count == 0) {
122 output[outputIndex++] =
123 _base64Alphabet.codeUnitAt((bits >> 18) & _sixBitMask);
124 output[outputIndex++] =
125 _base64Alphabet.codeUnitAt((bits >> 12) & _sixBitMask);
126 output[outputIndex++] =
127 _base64Alphabet.codeUnitAt((bits >> 6) & _sixBitMask);
128 output[outputIndex++] =
129 _base64Alphabet.codeUnitAt(bits & _sixBitMask);
130 count = 3;
131 bits = 0;
132 }
133 }
134 if (byteOr >= 0 && byteOr <= 255) {
floitsch 2015/09/28 16:08:28 I almost prefer to have the error-case guarded: i
Lasse Reichstein Nielsen 2015/09/29 10:31:04 I like to keep the non-error flow connected if pos
135 if (isLast && count < 3) {
136 _writeFinalChunk(output, outputIndex, 3 - count, bits);
137 return 0;
138 }
139 return (bits << _valueShift) | (3 - count);
140 }
141
142 // There was an invalid byte value somewhere in the input - find it!
143 int i = start;
144 while (i < end) {
145 int byte = bytes[i];
146 if (byte < 0 || byte > 255) break;
147 i++;
148 }
149 throw new ArgumentError.value(bytes,
150 "Not a byte value at index $i: 0x${bytes[i].toRadixString(16)}");
151 }
152
153 /**
154 * Writes a final encoded four-character chunk.
155 *
156 * Only used when the [state] contains a partial (1 or 2 byte)
157 * input.
158 */
159 static void _writeFinalChunk(Uint8List output, int outputIndex,
160 int count, int bits) {
161 assert(count > 0);
162 if (count == 1) {
Søren Gjesse 2015/09/28 17:18:52 Maybe this could be if (count == 2) { outpu
Lasse Reichstein Nielsen 2015/09/29 10:31:04 It's >> 2, << 4 for one of the branches, so that d
163 output[outputIndex++] =
164 _base64Alphabet.codeUnitAt((bits >> 2) & _sixBitMask);
165 output[outputIndex++] =
166 _base64Alphabet.codeUnitAt((bits << 4) & _sixBitMask);
167 output[outputIndex++] = _paddingChar;
168 output[outputIndex++] = _paddingChar;
169 } else {
170 assert(count == 2);
171 output[outputIndex++] =
172 _base64Alphabet.codeUnitAt((bits >> 10) & _sixBitMask);
173 output[outputIndex++] =
174 _base64Alphabet.codeUnitAt((bits >> 4) & _sixBitMask);
175 output[outputIndex++] =
176 _base64Alphabet.codeUnitAt((bits << 2) & _sixBitMask);
177 output[outputIndex++] = _paddingChar;
178 }
179 }
180 }
181
182 abstract class _Base64EncoderSink extends ByteConversionSinkBase
183 with _Base64Encoder {
184 void add(List<int> source) {
185 _add(source, 0, source.length, false);
186 }
187
188 void close() {
189 _add(null, 0, 0, true);
190 }
191
192 void addSlice(List<int> source, int start, int end, bool isLast) {
193 if (end == null) throw new ArgumentError.notNull("end");
194 RangeError.checkValidRange(start, end, source.length);
195 _add(source, start, end, isLast);
196 }
197
198 void _add(List<int> source, int start, int end, bool isLast);
199 }
200
201 class _AsciiBase64EncoderSink extends _Base64EncoderSink {
202 final ChunkedConversionSink<String> _sink;
203 /**
Søren Gjesse 2015/09/28 17:18:52 Shouldn't be dartdoc.
Lasse Reichstein Nielsen 2015/09/29 10:31:04 Why not? An editor should still show it to you, ev
204 * Reused buffer.
205 *
206 * Since the buffer isn't released to the sink, only the string created
207 * from it, the buffer can be reused between chunks.
208 */
209 Uint8List _bufferCache;
210
211 _AsciiBase64EncoderSink(this._sink);
212
213 Uint8List _getBuffer(int bufferLength) {
214 if (_bufferCache == null || _bufferCache.length < bufferLength) {
215 _bufferCache = new Uint8List(bufferLength);
216 }
217 return new Uint8List.view(_bufferCache.buffer, 0, bufferLength);
218 }
219
220 void _add(List<int> source, int start, int end, bool isLast) {
221 Uint8List buffer = _encode(source, start, end, isLast);
222 if (buffer != null) {
223 String string = new String.fromCharCodes(buffer);
224 _sink.add(string);
225 }
226 if (isLast) {
227 _sink.close();
228 }
229 }
230 }
231
232 class _Utf8Base64EncoderSink extends _Base64EncoderSink {
233 final ByteConversionSink _sink;
234 _Utf8Base64EncoderSink(this._sink, [int bufferSize]) : super(bufferSize);
235
236 void _add(List<int> source, int start, int end, bool isLast) {
237 Uint8List buffer = _encode(source, start, end, isLast);
238 if (buffer != null) {
239 _sink.addSlice(buffer, 0, buffer.length, isLast);
240 }
241 }
242 }
243
244 // ------------------------------------------------------------------------
245 // Decoder
246 // ------------------------------------------------------------------------
247
248 class Base64Decoder extends Converter<String, List<int>> {
249 const Base64Decoder();
250
251 List<int> convert(String input) {
252 if (input.isEmpty) return new Uint8List(0);
253 int length = input.length;
254 if (length % 4 != 0) {
255 throw new FormatException("Invalid length, must be multiple of four",
256 input, length);
257 }
258 var decoder = new _Base64Decoder();
floitsch 2015/09/28 16:08:28 type if you want.
259 Uint8List buffer = decoder._decode(input, 0, input.length);
260 decoder._close(input, input.length);
261 return buffer;
sra1 2015/09/28 17:29:42 If you are OK with an unmodifiable result, the a b
Lasse Reichstein Nielsen 2015/09/29 10:31:05 I probably want it to be a Uint8List. Still, it sh
262 }
263
264 StringConversionSink startChunkedConversion(Sink<List<int>> sink) {
265 return new _Base64DecoderSink(sink);
266 }
267 }
268
269 /**
270 * Helper class implementing BASE64 decoding with intermediate state.
271 */
272 class _Base64Decoder {
273 static const int _valueShift = 2;
floitsch 2015/09/28 16:08:28 Same as for the encoder.
Lasse Reichstein Nielsen 2015/09/29 10:31:04 Done.
274 static const int _countMask = 3;
275
276 /** Invalid character in decoding table. */
277 static const int _invalid = -2;
278 /** Padding character in decoding table. */
279 static const int _padding = -1;
280
281 // Shorthand to make the table more readable.
282 static const int __ = _invalid;
283 static const int _p = _padding;
284
285 /**
286 * Mapping from ASCII characters to their index in [_base64alphabet].
287 *
288 * Uses -1 for invalid indices and 64 for the padding character.
289 */
290 static final List<int> _inverseAlphabet = new Int8List.fromList([
291 __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __,
292 __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __,
293 __, __, __, __, __, __, __, __, __, __, __, 62, __, __, __, 63,
294 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, __, __, __, _p, __, __,
295 __, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
296 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, __, __, __, __, __,
297 __, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
298 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, __, __, __, __, __,
299 ]);
300
301 /**
302 * Maintains the intermediate state of a partly-decoded input.
303 *
304 * BASE-64 is decoded in chunks of four characters. If a chunk does not
305 * contain a full block, the decoded bits (six per character) of the
306 * available characters are stored in [_state] until the next call to
307 * [_decode] or [_close].
308 *
309 * If no padding has been seen, the value is
310 * `numberOfCharactersSeen | (decodedBits << 2)`
311 * where `numberOfCharactersSeen` is between 0 and 3 and decoded bits
312 * contains six bits per seend character.
floitsch 2015/09/28 16:08:28 seen
Lasse Reichstein Nielsen 2015/09/29 10:31:04 Done.
313 *
314 * If padding has been seen the value is negative. It's the bitwise negation
315 * of the number of remanining allowed padding characters (always ~0 or ~1).
316 *
317 * A state of `0` or `~0` are valid places to end decoding, all other values
318 * means that a four-character block has not been completed.
floitsch 2015/09/28 16:08:28 mean
Lasse Reichstein Nielsen 2015/09/29 10:31:04 Done.
319 */
320 int _state = 0;
321
322 /**
323 * Decodes [input] from [start] to [end].
324 *
325 * Returns a [Uint8List] with the decoded bytes.
326 * If a previous call had an incomplete four-character block, the bits from
327 * those are included in decoding
328 */
329 Uint8List _decode(String input, int start, int end) {
330 assert(0 <= start);
331 assert(start <= end);
332 assert(end <= input.length);
333 if (_state < 0) {
334 _state = _checkPadding(input, start, end, _state);
335 return null;
336 }
337 if (start == end) return new Uint8List(0);
338 Uint8List buffer = _allocateBuffer(input, start, end, _state);
339 _state = _decodeChunk(input, start, end, buffer, 0, _state);
340 return buffer;
341 }
342
343 /** Checks that [state] represents a valid decoding. */
344 void _close(String input, int end) {
345 if (_state < ~0) {
346 throw new FormatException("Missing padding character", input, end);
347 }
348 if (_state > 0) {
349 throw new FormatException("Invalid length, must be multiple of four",
350 input, end);
351 }
352 _state = ~0;
353 }
354
355 /**
356 * Decodes [input] from [start] to [end].
357 *
358 * Includes the state returned by a previous call in the decoding.
359 * Writes the decoding to [output] at [outIndex], and there must
360 * be room in the output.
361 */
362 static int _decodeChunk(String input, int start, int end,
363 Uint8List output, int outIndex,
364 int state) {
365 const int asciiMask = 127;
366 const int asciiMax = 127;
367 int bits = state >> _valueShift;
368 int count = state & _countMask;
369 int charOr = 0;
floitsch 2015/09/28 16:08:28 Add comment, what the charOr is for.
Lasse Reichstein Nielsen 2015/09/29 10:31:04 Done.
370 for (int i = start; i < end; i++) {
371 var char = input.codeUnitAt(i);
floitsch 2015/09/28 16:08:28 type if you want.
372 charOr |= char;
373 int code = _inverseAlphabet[char & asciiMask];
374 if (code >= 0) {
375 bits = ((bits << 6) | code);
floitsch 2015/09/28 16:08:28 Add & to make it easier for the VM to optimize.
floitsch 2015/09/28 16:08:28 magic "6".
Lasse Reichstein Nielsen 2015/09/29 10:31:04 Done.
Lasse Reichstein Nielsen 2015/09/29 10:31:04 named it "bitsPerCharacter".
376 count = (count + 1) & 3;
377 if (count == 0) {
378 assert(outIndex + 3 <= output.length);
379 output[outIndex++] = (bits >> 16) & _eightBitMask;
380 output[outIndex++] = (bits >> 8) & _eightBitMask;
381 output[outIndex++] = bits & _eightBitMask;
382 bits = 0;
383 }
384 continue;
385 } else if (code == _padding && count > 1) {
386 if (count == 3) {
387 if ((bits & 0x03) != 0) {
388 throw new FormatException(
389 "Invalid encoding before padding", input, i);
390 }
391 output[outIndex++] = bits >> 10;
392 output[outIndex++] = bits >> 2;
393 } else {
394 if ((bits & 0x0F) != 0) {
395 throw new FormatException(
396 "Invalid encoding before padding", input, i);
397 }
398 output[outIndex++] = bits >> 4;
399 }
400 int expectedPadding = 3 - count;
401 state = _checkPadding(input, i + 1, end, ~expectedPadding);
402 return state;
403 }
404 throw new FormatException("Invalid character", input, i);
405 }
406 if (charOr >= 0 && charOr <= asciiMax) {
floitsch 2015/09/28 16:08:28 Same as for the encoder.
Lasse Reichstein Nielsen 2015/09/29 10:31:04 Same answer.
407 return (bits << _valueShift) | count;
408 }
409 // There is an invalid (non-ASCII) character in the input.
410 int i;
411 for (i = start; i < end; i++) {
412 int char = input.codeUnitAt(i);
413 if (char < 0 || char > asciiMax) break;
414 }
415 throw new FormatException("Invalid character", input, i);
416 }
417
418 /**
419 * Allocates a buffer with room for the decoding of a substring of [input].
420 *
421 * Includes room for the characters in [state], and handles padding correctly.
422 */
423 static Uint8List _allocateBuffer(String input, int start, int end,
424 int state) {
425 assert(state >= 0);
426 int padding = 0;
427 int length = (state & _countMask) + (end - start);
428 if (end > start && input.codeUnitAt(end - 1) == _paddingChar) {
429 padding++;
430 if (end - 1 > start && input.codeUnitAt(end - 2) == _paddingChar) {
431 padding++;
432 }
433 }
434 // Three bytes per full four bytes in the input.
435 int bufferLength = (length >> 2) * 3;
436 // If padding was seen, then remove the padding if it was counter, or
437 // add the last partial chunk it it wasn't counted.
438 int remainderLength = length & 3;
439 if (remainderLength == 0) {
440 bufferLength -= padding;
441 } else if (padding != 0 && remainderLength - padding > 1) {
442 bufferLength += remainderLength - 1 - padding;
443 }
444 if (bufferLength > 0) return new Uint8List(bufferLength);
445 // If the input plus state is still less than four characters, no buffer
446 // is needed.
447 return null;
448 }
449
450 /**
451 * Check that the remainder of the string is valid padding.
452 *
453 * That means zero or one padding character (depending on [_state])
454 * and nothing else.
455 */
456 static int _checkPadding(String input, int start, int end, int state) {
457 assert(state < 0);
458 if (start == end) return state;
459 int expectedPadding = ~state;
460 if (expectedPadding > 0) {
461 int firstChar = input.codeUnitAt(start);
462 if (firstChar != _paddingChar) {
463 throw new FormatException("Missing padding character", string, start);
464 }
465 state = ~0;
466 start++;
467 }
468 if (start != end) {
469 throw new FormatException("Invalid character after padding",
470 input, start);
471 }
472 return state;
473 }
474 }
475
476 class _Base64DecoderSink extends StringConversionSinkBase with _Base64Decoder {
477 /** Output sink */
478 final ChunkedConversionSink<List<int>> _sink;
479
480 _Base64DecoderSink(this._sink);
481
482 void add(String string) {
483 if (string.isEmpty) return;
484 Uint8List buffer = _decode(string, 0, string.length);
485 if (buffer != null) _sink.add(buffer);
486 }
487
488 void close() {
489 _close(null, null);
490 _sink.close();
491 }
492
493 void addSlice(String string, int start, int end, bool isLast) {
494 end = RangeError.checkValidRange(start, end, string.length);
495 if (start == end) return;
496 Uint8List buffer = _decode(string, start, end);
497 if (buffer != null) _sink.add(buffer);
498 if (isLast) {
499 _close(string, end);
500 _sink.close();
501 }
502 }
503 }
OLDNEW
« no previous file with comments | « pkg/expect/lib/expect.dart ('k') | sdk/lib/convert/convert.dart » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698