Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(11)

Side by Side Diff: sdk/lib/utf/utf8.dart

Issue 11368138: Add some support for the code-point code-unit distinction. (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart
Patch Set: Implemented feedback from patch set 3 Created 8 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file 1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file
2 // for details. All rights reserved. Use of this source code is governed by a 2 // for details. All rights reserved. Use of this source code is governed by a
3 // BSD-style license that can be found in the LICENSE file. 3 // BSD-style license that can be found in the LICENSE file.
4 4
5 const int _UTF8_ONE_BYTE_MAX = 0x7f; 5 const int _UTF8_ONE_BYTE_MAX = 0x7f;
6 const int _UTF8_TWO_BYTE_MAX = 0x7ff; 6 const int _UTF8_TWO_BYTE_MAX = 0x7ff;
7 const int _UTF8_THREE_BYTE_MAX = 0xffff; 7 const int _UTF8_THREE_BYTE_MAX = 0xffff;
8 8
9 const int _UTF8_LO_SIX_BIT_MASK = 0x3f; 9 const int _UTF8_LO_SIX_BIT_MASK = 0x3f;
10 10
(...skipping 23 matching lines...) Expand all
34 34
35 /** 35 /**
36 * Produce a String from a List of UTF-8 encoded bytes. The parameters 36 * Produce a String from a List of UTF-8 encoded bytes. The parameters
37 * can set an offset into a list of bytes (as int), limit the length of the 37 * can set an offset into a list of bytes (as int), limit the length of the
38 * values to be decoded, and override the default Unicode replacement character. 38 * values to be decoded, and override the default Unicode replacement character.
39 * Set the replacementCharacter to null to throw an ArgumentError 39 * Set the replacementCharacter to null to throw an ArgumentError
40 * rather than replace the bad value. 40 * rather than replace the bad value.
41 */ 41 */
42 String decodeUtf8(List<int> bytes, [int offset = 0, int length, 42 String decodeUtf8(List<int> bytes, [int offset = 0, int length,
43 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) { 43 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) {
44 return codepointsToString( 44 return new String.fromCharCodes(
45 (new Utf8Decoder(bytes, offset, length, replacementCodepoint)) 45 (new Utf8Decoder(bytes, offset, length, replacementCodepoint))
46 .decodeRest()); 46 .decodeRest());
47 } 47 }
48 48
49 /** 49 /**
50 * Produce a sequence of UTF-8 encoded bytes from the provided string. 50 * Produce a sequence of UTF-8 encoded bytes from the provided string.
51 */ 51 */
52 List<int> encodeUtf8(String str) => 52 List<int> encodeUtf8(String str) =>
53 codepointsToUtf8(stringToCodepoints(str)); 53 codepointsToUtf8(str.charCodes);
54 54
55 int _addToEncoding(int offset, int bytes, int value, List<int> buffer) { 55 int _addToEncoding(int offset, int bytes, int value, List<int> buffer) {
56 while (bytes > 0) { 56 while (bytes > 0) {
57 buffer[offset + bytes] = _UTF8_SUBSEQUENT_BYTE_BASE | 57 buffer[offset + bytes] = _UTF8_SUBSEQUENT_BYTE_BASE |
58 (value & _UTF8_LO_SIX_BIT_MASK); 58 (value & _UTF8_LO_SIX_BIT_MASK);
59 value = value >> 6; 59 value = value >> 6;
60 bytes--; 60 bytes--;
61 } 61 }
62 return value; 62 return value;
63 } 63 }
(...skipping 187 matching lines...) Expand 10 before | Expand all | Expand 10 after
251 if (validSequence && nonOverlong && inRange) { 251 if (validSequence && nonOverlong && inRange) {
252 return value; 252 return value;
253 } else if (replacementCodepoint != null) { 253 } else if (replacementCodepoint != null) {
254 return replacementCodepoint; 254 return replacementCodepoint;
255 } else { 255 } else {
256 throw new ArgumentError( 256 throw new ArgumentError(
257 "Invalid UTF8 at ${utf8EncodedBytesIterator.position - j}"); 257 "Invalid UTF8 at ${utf8EncodedBytesIterator.position - j}");
258 } 258 }
259 } 259 }
260 } 260 }
OLDNEW
« runtime/vm/unicode.h ('K') | « sdk/lib/utf/utf32.dart ('k') | sdk/lib/utf/utf_core.dart » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698