Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(6)

Side by Side Diff: sdk/lib/convert/base64.dart

Issue 1370073002: Add Base64 codec to dart:convert. (Closed) Base URL: https://github.com/dart-lang/sdk.git@master
Patch Set: Fix for dart2js Created 5 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « pkg/expect/lib/expect.dart ('k') | sdk/lib/convert/convert.dart » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 // Copyright (c) 2015, the Dart project authors. Please see the AUTHORS file
2 // for details. All rights reserved. Use of this source code is governed by a
3 // BSD-style license that can be found in the LICENSE file.
4
5 part of dart.convert;
6
7 /**
8 * An instance of [Base64Codec].
9 *
10 * This instance provides a convenient access to
11 * [base64](https://tools.ietf.org/html/rfc4648) encoding and decoding.
12 *
13 * It encodes and decodes using the default base64 alphabet, does not allow
14 * any invalid characters when decoding, and requires padding.
15 *
16 * Examples:
17 *
18 * var encoded = BASE64.encode([0x62, 0x6c, 0xc3, 0xa5, 0x62, 0xc3, 0xa6,
19 * 0x72, 0x67, 0x72, 0xc3, 0xb8, 0x64]);
20 * var decoded = BASE64.decode("YmzDpWLDpnJncsO4ZAo=");
21 */
22 const Base64Codec BASE64 = const Base64Codec();
23
24 // Constants used in more than one class.
25 const int _paddingChar = 0x3d; // '='.
26
27 /**
28 * A [base64](https://tools.ietf.org/html/rfc4648) encoder and decoder.
29 *
30 * A [Base64Codec] allows base64 encoding bytes into ASCII strings and
31 * decoding valid encodings back to bytes.
32 *
33 * This implementation only handles the simplest RFC 4648 base-64 encoding.
34 * It does not allow invalid characters when decoding and it requires,
35 * and generates, padding so that the input is always a multiple of four
36 * characters.
37 */
38 class Base64Codec extends Codec<List<int>, String> {
39 const Base64Codec();
40
41 Base64Encoder get encoder => const Base64Encoder();
42
43 Base64Decoder get decoder => const Base64Decoder();
44 }
45
46 // ------------------------------------------------------------------------
47 // Encoder
48 // ------------------------------------------------------------------------
49
50 /**
51 * Base-64 encoding converter.
52 *
53 * Encodes lists of bytes using base64 encoding.
54 * The result are ASCII strings using a restricted alphabet.
55 */
56 class Base64Encoder extends Converter<List<int>, String> {
57 const Base64Encoder();
58
59 String convert(List<int> input) {
60 if (input.isEmpty) return "";
61 var encoder = new _Base64Encoder();
62 Uint8List buffer = encoder.encode(input, 0, input.length, true);
63 return new String.fromCharCodes(buffer);
64 }
65
66 ByteConversionSink startChunkedConversion(Sink<String> sink) {
67 if (sink is StringConversionSink) {
68 return new _Utf8Base64EncoderSink(sink.asUtf8Sink());
69 }
70 return new _AsciiBase64EncoderSink(sink);
71 }
72
73 Stream<String> bind(Stream<List<int>> stream) {
74 return new Stream<String>.eventTransformed(
75 stream,
76 (EventSink sink) =>
77 new _ConverterStreamEventSink<List<int>, String>(this, sink));
78 }
79 }
80
81 /**
82 * Helper class for encoding bytes to BASE-64.
83 */
84 class _Base64Encoder {
85 /** The RFC 4648 base64 encoding alphabet. */
86 static const String _base64Alphabet =
87 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
88
89 /** Shift-count to extract the values stored in [_state]. */
90 static const int _valueShift = 2;
91
92 /** Mask to extract the count value stored in [_state]. */
93 static const int _countMask = 3;
94
95 static const int _sixBitMask = 0x3F;
96
97 /**
98 * Intermediate state between chunks.
99 *
100 * Encoding handles three bytes at a time.
101 * If fewer than three bytes has been seen, this value encodes
102 * the number of bytes seen (0, 1 or 2) and their values.
103 */
104 int _state = 0;
105
106 /** Encode count and bits into a value to be stored in [_state]. */
107 static int _encodeState(int count, int bits) {
108 assert(count <= _countMask);
109 return bits << _valueShift | count;
110 }
111
112 /** Extract bits from encoded state. */
113 static int _stateBits(int state) => state >> _valueShift;
114
115 /** Extract count from encoded state. */
116 static int _stateCount(int state) => state & _countMask;
117
118 /**
119 * Create a [Uint8List] with the provided length.
120 */
121 Uint8List createBuffer(int bufferLength) => new Uint8List(bufferLength);
122
123 /**
124 * Encode [bytes] from [start] to [end] and the bits in [_state].
125 *
126 * Returns a [Uint8List] of the ASCII codes of the encoded data.
127 *
128 * If the input, including left over [_state] from earlier encodings,
129 * are not a multiple of three bytes, then the partial state is stored
130 * back into [_state].
131 * If [isLast] is true, partial state is encoded in the output instead,
132 * with the necessary padding.
133 *
134 * Returns `null` if there is no output.
135 */
136 Uint8List encode(List<int> bytes, int start, int end, bool isLast) {
137 assert(0 <= start);
138 assert(start <= end);
139 assert(bytes == null || end <= bytes.length);
140 int length = end - start;
141
142 int count = _stateCount(_state);
143 int byteCount = (count + length);
144 int fullChunks = byteCount ~/ 3;
145 int partialChunkLength = byteCount - fullChunks * 3;
146 int bufferLength = fullChunks * 4;
147 if (isLast && partialChunkLength > 0) {
148 bufferLength += 4; // Room for padding.
149 }
150 var output = createBuffer(bufferLength);
151 _state = encodeChunk(bytes, start, end, isLast, output, 0, _state);
152 if (bufferLength > 0) return output;
153 // If the input plus the data in state is still less than three bytes,
154 // there may not be any output.
155 return null;
156 }
157
158 static int encodeChunk(List<int> bytes, int start, int end, bool isLast,
159 Uint8List output, int outputIndex, int state) {
160 int bits = _stateBits(state);
161 // Count number of missing bytes in three-byte chunk.
162 int expectedChars = 3 - _stateCount(state);
163
164 // The input must be a list of bytes (integers in the range 0..255).
165 // The value of `byteOr` will be the bitwise or of all the values in
166 // `bytes` and a later check will validate that they were all valid bytes.
167 int byteOr = 0;
168 for (int i = start; i < end; i++) {
169 int byte = bytes[i];
170 byteOr |= byte;
171 bits = ((bits << 8) | byte) & 0xFFFFFF; // Never store more than 24 bits.
172 expectedChars--;
173 if (expectedChars == 0) {
174 output[outputIndex++] =
175 _base64Alphabet.codeUnitAt((bits >> 18) & _sixBitMask);
176 output[outputIndex++] =
177 _base64Alphabet.codeUnitAt((bits >> 12) & _sixBitMask);
178 output[outputIndex++] =
179 _base64Alphabet.codeUnitAt((bits >> 6) & _sixBitMask);
180 output[outputIndex++] =
181 _base64Alphabet.codeUnitAt(bits & _sixBitMask);
182 expectedChars = 3;
183 bits = 0;
184 }
185 }
186 if (byteOr >= 0 && byteOr <= 255) {
187 if (isLast && expectedChars < 3) {
188 writeFinalChunk(output, outputIndex, 3 - expectedChars, bits);
189 return 0;
190 }
191 return _encodeState(3 - expectedChars, bits);
192 }
193
194 // There was an invalid byte value somewhere in the input - find it!
195 int i = start;
196 while (i < end) {
197 int byte = bytes[i];
198 if (byte < 0 || byte > 255) break;
199 i++;
200 }
201 throw new ArgumentError.value(bytes,
202 "Not a byte value at index $i: 0x${bytes[i].toRadixString(16)}");
203 }
204
205 /**
206 * Writes a final encoded four-character chunk.
207 *
208 * Only used when the [_state] contains a partial (1 or 2 byte)
209 * input.
210 */
211 static void writeFinalChunk(Uint8List output, int outputIndex,
212 int count, int bits) {
213 assert(count > 0);
214 if (count == 1) {
215 output[outputIndex++] =
216 _base64Alphabet.codeUnitAt((bits >> 2) & _sixBitMask);
217 output[outputIndex++] =
218 _base64Alphabet.codeUnitAt((bits << 4) & _sixBitMask);
219 output[outputIndex++] = _paddingChar;
220 output[outputIndex++] = _paddingChar;
221 } else {
222 assert(count == 2);
223 output[outputIndex++] =
224 _base64Alphabet.codeUnitAt((bits >> 10) & _sixBitMask);
225 output[outputIndex++] =
226 _base64Alphabet.codeUnitAt((bits >> 4) & _sixBitMask);
227 output[outputIndex++] =
228 _base64Alphabet.codeUnitAt((bits << 2) & _sixBitMask);
229 output[outputIndex++] = _paddingChar;
230 }
231 }
232 }
233
234 class _BufferCachingBase64Encoder extends _Base64Encoder {
235 /**
236 * Reused buffer.
237 *
238 * When the buffer isn't released to the sink, only used to create another
239 * value (a string), the buffer can be reused between chunks.
240 */
241 Uint8List bufferCache;
242
243 Uint8List createBuffer(int bufferLength) {
244 if (bufferCache == null || bufferCache.length < bufferLength) {
245 bufferCache = new Uint8List(bufferLength);
246 }
247 // Return a view of the buffer, so it has the reuested length.
248 return new Uint8List.view(bufferCache.buffer, 0, bufferLength);
249 }
250 }
251
252 abstract class _Base64EncoderSink extends ByteConversionSinkBase {
253 void add(List<int> source) {
254 _add(source, 0, source.length, false);
255 }
256
257 void close() {
258 _add(null, 0, 0, true);
259 }
260
261 void addSlice(List<int> source, int start, int end, bool isLast) {
262 if (end == null) throw new ArgumentError.notNull("end");
263 RangeError.checkValidRange(start, end, source.length);
264 _add(source, start, end, isLast);
265 }
266
267 void _add(List<int> source, int start, int end, bool isLast);
268 }
269
270 class _AsciiBase64EncoderSink extends _Base64EncoderSink {
271 final _Base64Encoder _encoder = new _BufferCachingBase64Encoder();
272
273 final ChunkedConversionSink<String> _sink;
274
275 _AsciiBase64EncoderSink(this._sink);
276
277 void _add(List<int> source, int start, int end, bool isLast) {
278 Uint8List buffer = _encoder.encode(source, start, end, isLast);
279 if (buffer != null) {
280 String string = new String.fromCharCodes(buffer);
281 _sink.add(string);
282 }
283 if (isLast) {
284 _sink.close();
285 }
286 }
287 }
288
289 class _Utf8Base64EncoderSink extends _Base64EncoderSink {
290 final ByteConversionSink _sink;
291 final _Base64Encoder _encoder = new _Base64Encoder();
292
293 _Utf8Base64EncoderSink(this._sink);
294
295 void _add(List<int> source, int start, int end, bool isLast) {
296 Uint8List buffer = _encoder.encode(source, start, end, isLast);
297 if (buffer != null) {
298 _sink.addSlice(buffer, 0, buffer.length, isLast);
299 }
300 }
301 }
302
303 // ------------------------------------------------------------------------
304 // Decoder
305 // ------------------------------------------------------------------------
306
307 class Base64Decoder extends Converter<String, List<int>> {
308 const Base64Decoder();
309
310 List<int> convert(String input) {
311 if (input.isEmpty) return new Uint8List(0);
312 int length = input.length;
313 if (length % 4 != 0) {
314 throw new FormatException("Invalid length, must be multiple of four",
315 input, length);
316 }
317 var decoder = new _Base64Decoder();
318 Uint8List buffer = decoder.decode(input, 0, input.length);
319 decoder.close(input, input.length);
320 return buffer;
321 }
322
323 StringConversionSink startChunkedConversion(Sink<List<int>> sink) {
324 return new _Base64DecoderSink(sink);
325 }
326 }
327
328 /**
329 * Helper class implementing base64 decoding with intermediate state.
330 */
331 class _Base64Decoder {
332 /** Shift-count to extract the values stored in [_state]. */
333 static const int _valueShift = 2;
334
335 /** Mask to extract the count value stored in [_state]. */
336 static const int _countMask = 3;
337
338 /** Invalid character in decoding table. */
339 static const int _invalid = -2;
340
341 /** Padding character in decoding table. */
342 static const int _padding = -1;
343
344 // Shorthands to make the table more readable.
345 static const int __ = _invalid;
346 static const int _p = _padding;
347
348 /**
349 * Mapping from ASCII characters to their index in the base64 alphabet.
350 *
351 * Uses [_invalid] for invalid indices and [_padding] for the padding
352 * character.
353 */
354 static final List<int> _inverseAlphabet = new Int8List.fromList([
355 __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __,
356 __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __,
357 __, __, __, __, __, __, __, __, __, __, __, 62, __, __, __, 63,
358 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, __, __, __, _p, __, __,
359 __, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
360 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, __, __, __, __, __,
361 __, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
362 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, __, __, __, __, __,
363 ]);
364
365 /**
366 * Maintains the intermediate state of a partly-decoded input.
367 *
368 * BASE-64 is decoded in chunks of four characters. If a chunk does not
369 * contain a full block, the decoded bits (six per character) of the
370 * available characters are stored in [_state] until the next call to
371 * [_decode] or [_close].
372 *
373 * If no padding has been seen, the value is
374 * `numberOfCharactersSeen | (decodedBits << 2)`
375 * where `numberOfCharactersSeen` is between 0 and 3 and decoded bits
376 * contains six bits per seen character.
377 *
378 * If padding has been seen the value is negative. It's the bitwise negation
379 * of the number of remanining allowed padding characters (always ~0 or ~1).
380 *
381 * A state of `0` or `~0` are valid places to end decoding, all other values
382 * mean that a four-character block has not been completed.
383 */
384 int _state = 0;
385
386 /**
387 * Encodes [count] and [bits] as a value to be stored in [_state].
388 */
389 static int _encodeCharacterState(int count, int bits) {
390 assert(count == (count & _countMask));
391 return (bits << _valueShift | count);
392 }
393
394 /**
395 * Extracts count from a [_state] value.
396 */
397 static int _stateCount(int state) {
398 assert(state >= 0);
399 return state & _countMask;
400 }
401
402 /**
403 * Extracts value bits from a [_state] value.
404 */
405 static int _stateBits(int state) {
406 assert(state >= 0);
407 return state >> _valueShift;
408 }
409
410 /**
411 * Encodes a number of expected padding characters to be stored in [_state].
412 */
413 static int _encodePaddingState(int expectedPadding) {
414 assert(expectedPadding >= 0);
415 assert(expectedPadding <= 1);
416 return -expectedPadding - 1; // ~expectedPadding adapted to dart2js.
417 }
418
419 /**
420 * Extracts expected padding character count from a [_state] value.
421 */
422 static int _statePadding(int state) {
423 assert(state < 0);
424 return -state - 1; // ~state adapted to dart2js.
425 }
426
427 static bool _hasSeenPadding(int state) => state < 0;
428
429 /**
430 * Decodes [input] from [start] to [end].
431 *
432 * Returns a [Uint8List] with the decoded bytes.
433 * If a previous call had an incomplete four-character block, the bits from
434 * those are included in decoding
435 */
436 Uint8List decode(String input, int start, int end) {
437 assert(0 <= start);
438 assert(start <= end);
439 assert(end <= input.length);
440 if (_hasSeenPadding(_state)) {
441 _state = _checkPadding(input, start, end, _state);
442 return null;
443 }
444 if (start == end) return new Uint8List(0);
445 Uint8List buffer = _allocateBuffer(input, start, end, _state);
446 _state = decodeChunk(input, start, end, buffer, 0, _state);
447 return buffer;
448 }
449
450 /** Checks that [_state] represents a valid decoding. */
451 void close(String input, int end) {
452 if (_state < _encodePaddingState(0)) {
453 throw new FormatException("Missing padding character", input, end);
454 }
455 if (_state > 0) {
456 throw new FormatException("Invalid length, must be multiple of four",
457 input, end);
458 }
459 _state = _encodePaddingState(0);
460 }
461
462 /**
463 * Decodes [input] from [start] to [end].
464 *
465 * Includes the state returned by a previous call in the decoding.
466 * Writes the decoding to [output] at [outIndex], and there must
467 * be room in the output.
468 */
469 static int decodeChunk(String input, int start, int end,
470 Uint8List output, int outIndex,
471 int state) {
472 assert(!_hasSeenPadding(state));
473 const int asciiMask = 127;
474 const int asciiMax = 127;
475 const int eightBitMask = 0xFF;
476 const int bitsPerCharacter = 6;
477
478 int bits = _stateBits(state);
479 int count = _stateCount(state);
480 // String contents should be all ASCII.
481 // Instead of checking for each character, we collect the bitwise-or of
482 // all the characters in `charOr` and later validate that all characters
483 // were ASCII.
484 int charOr = 0;
485 for (int i = start; i < end; i++) {
486 var char = input.codeUnitAt(i);
487 charOr |= char;
488 int code = _inverseAlphabet[char & asciiMask];
489 if (code >= 0) {
490 bits = ((bits << bitsPerCharacter) | code) & 0xFFFFFF;
491 count = (count + 1) & 3;
492 if (count == 0) {
493 assert(outIndex + 3 <= output.length);
494 output[outIndex++] = (bits >> 16) & eightBitMask;
495 output[outIndex++] = (bits >> 8) & eightBitMask;
496 output[outIndex++] = bits & eightBitMask;
497 bits = 0;
498 }
499 continue;
500 } else if (code == _padding && count > 1) {
501 if (count == 3) {
502 if ((bits & 0x03) != 0) {
503 throw new FormatException(
504 "Invalid encoding before padding", input, i);
505 }
506 output[outIndex++] = bits >> 10;
507 output[outIndex++] = bits >> 2;
508 } else {
509 if ((bits & 0x0F) != 0) {
510 throw new FormatException(
511 "Invalid encoding before padding", input, i);
512 }
513 output[outIndex++] = bits >> 4;
514 }
515 int expectedPadding = 3 - count;
516 state = _encodePaddingState(expectedPadding);
517 return _checkPadding(input, i + 1, end, state);
518 }
519 throw new FormatException("Invalid character", input, i);
520 }
521 if (charOr >= 0 && charOr <= asciiMax) {
522 return _encodeCharacterState(count, bits);
523 }
524 // There is an invalid (non-ASCII) character in the input.
525 int i;
526 for (i = start; i < end; i++) {
527 int char = input.codeUnitAt(i);
528 if (char < 0 || char > asciiMax) break;
529 }
530 throw new FormatException("Invalid character", input, i);
531 }
532
533 /**
534 * Allocates a buffer with room for the decoding of a substring of [input].
535 *
536 * Includes room for the characters in [state], and handles padding correctly.
537 */
538 static Uint8List _allocateBuffer(String input, int start, int end,
539 int state) {
540 assert(state >= 0);
541 int padding = 0;
542 int length = _stateCount(state) + (end - start);
543 if (end > start && input.codeUnitAt(end - 1) == _paddingChar) {
544 padding++;
545 if (end - 1 > start && input.codeUnitAt(end - 2) == _paddingChar) {
546 padding++;
547 }
548 }
549 // Three bytes per full four bytes in the input.
550 int bufferLength = (length >> 2) * 3;
551 // If padding was seen, then remove the padding if it was counter, or
552 // add the last partial chunk it it wasn't counted.
553 int remainderLength = length & 3;
554 if (remainderLength == 0) {
555 bufferLength -= padding;
556 } else if (padding != 0 && remainderLength - padding > 1) {
557 bufferLength += remainderLength - 1 - padding;
558 }
559 if (bufferLength > 0) return new Uint8List(bufferLength);
560 // If the input plus state is less than four characters, no buffer
561 // is needed.
562 return null;
563 }
564
565 /**
566 * Check that the remainder of the string is valid padding.
567 *
568 * That means zero or one padding character (depending on [_state])
569 * and nothing else.
570 */
571 static int _checkPadding(String input, int start, int end, int state) {
572 assert(_hasSeenPadding(state));
573 if (start == end) return state;
574 int expectedPadding = _statePadding(state);
575 if (expectedPadding > 0) {
576 int firstChar = input.codeUnitAt(start);
577 if (firstChar != _paddingChar) {
578 throw new FormatException("Missing padding character", string, start);
579 }
580 state = _encodePaddingState(0);
581 start++;
582 }
583 if (start != end) {
584 throw new FormatException("Invalid character after padding",
585 input, start);
586 }
587 return state;
588 }
589 }
590
591 class _Base64DecoderSink extends StringConversionSinkBase {
592 /** Output sink */
593 final ChunkedConversionSink<List<int>> _sink;
594 final _Base64Decoder _decoder = new _Base64Decoder();
595
596 _Base64DecoderSink(this._sink);
597
598 void add(String string) {
599 if (string.isEmpty) return;
600 Uint8List buffer = _decoder.decode(string, 0, string.length);
601 if (buffer != null) _sink.add(buffer);
602 }
603
604 void close() {
605 _decoder.close(null, null);
606 _sink.close();
607 }
608
609 void addSlice(String string, int start, int end, bool isLast) {
610 end = RangeError.checkValidRange(start, end, string.length);
611 if (start == end) return;
612 Uint8List buffer = _decoder.decode(string, start, end);
613 if (buffer != null) _sink.add(buffer);
614 if (isLast) {
615 _decoder.close(string, end);
616 _sink.close();
617 }
618 }
619 }
OLDNEW
« no previous file with comments | « pkg/expect/lib/expect.dart ('k') | sdk/lib/convert/convert.dart » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698