Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(15)

Side by Side Diff: dart/frog/leg/scanner/byte_strings.dart

Issue 9185046: Move UTF-8 decoder to utils. (Closed) Base URL: http://dart.googlecode.com/svn/branches/bleeding_edge/
Patch Set: Created 8 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « no previous file | dart/frog/leg/scanner/node_scanner_bench.dart » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright (c) 2011, the Dart project authors. Please see the AUTHORS file 1 // Copyright (c) 2011, the Dart project authors. Please see the AUTHORS file
2 // for details. All rights reserved. Use of this source code is governed by a 2 // for details. All rights reserved. Use of this source code is governed by a
3 // BSD-style license that can be found in the LICENSE file. 3 // BSD-style license that can be found in the LICENSE file.
4 4
5 /** 5 /**
6 * An abstract string representation. 6 * An abstract string representation.
7 */ 7 */
8 class ByteString implements SourceString { 8 class ByteString implements SourceString {
9 final List<int> bytes; 9 final List<int> bytes;
10 final int offset; 10 final int offset;
11 final int length; 11 final int length;
12 int _hashCode; 12 int _hashCode;
13 13
14 ByteString(List<int> this.bytes, int this.offset, int this.length); 14 ByteString(List<int> this.bytes, int this.offset, int this.length);
15 15
16 abstract String get charset(); 16 abstract String get charset();
17 17
18 String toString() { 18 String toString() => new Utf8Decoder(bytes, offset, length).toString();
19 var list;
20 try {
21 list = bytes.getRange(offset, length);
22 } catch (var ignored) {
23 // An exception may occur when running this on node. This is
24 // because [bytes] really is a buffer (or typed array).
25 list = new List<int>(length);
26 for (int i = 0; i < length; i++) {
27 list[i] = bytes[i + offset];
28 }
29 }
30 return new String.fromCharCodes(decodeUtf8(list));
31 }
32
33 static int decodeTrailing(int byte) {
34 if (byte < 0x80 || 0xBF < byte) {
35 throw new MalformedInputException('Cannot decode UTF-8 $byte');
36 } else {
37 return byte & 0x3F;
38 }
39 }
40
41 static List<int> decodeUtf8(List<int> bytes) {
42 List<int> result = new List<int>();
43 for (int i = 0; i < bytes.length; i++) {
44 if (bytes[i] < 0x80) {
45 result.add(bytes[i]);
46 } else if (bytes[i] < 0xC2) {
47 throw new MalformedInputException('Cannot decode UTF-8 @ $i');
48 } else if (bytes[i] < 0xE0) {
49 int char = (bytes[i++] & 0x1F) << 6;
50 char += decodeTrailing(bytes[i]);
51 if (char < 0x80) {
52 throw new MalformedInputException('Cannot decode UTF-8 @ ${i-1}');
53 } else {
54 result.add(char);
55 }
56 } else if (bytes[i] < 0xF0) {
57 int char = (bytes[i++] & 0x0F) << 6;
58 char += decodeTrailing(bytes[i++]);
59 char <<= 6;
60 char += decodeTrailing(bytes[i]);
61 if (char < 0x800 || (0xD800 <= char && char <= 0xDFFF)) {
62 throw new MalformedInputException('Cannot decode UTF-8 @ ${i-2}');
63 } else {
64 result.add(char);
65 }
66 } else if (bytes[i] < 0xF8) {
67 int char = (bytes[i++] & 0x07) << 6;
68 char += decodeTrailing(bytes[i++]);
69 char <<= 6;
70 char += decodeTrailing(bytes[i++]);
71 char <<= 6;
72 char += decodeTrailing(bytes[i]);
73 if (char < 0x10000) {
74 throw new MalformedInputException('Cannot decode UTF-8 @ ${i-3}');
75 } else {
76 result.add(char);
77 }
78 } else {
79 throw new MalformedInputException('Cannot decode UTF-8 @ $i');
80 }
81 }
82 return result;
83 }
84 19
85 bool operator ==(other) { 20 bool operator ==(other) {
86 throw "should be overridden in subclass"; 21 throw "should be overridden in subclass";
87 } 22 }
88 23
89 int hashCode() { 24 int hashCode() {
90 if (_hashCode === null) { 25 if (_hashCode === null) {
91 _hashCode = computeHashCode(); 26 _hashCode = computeHashCode();
92 } 27 }
93 return _hashCode; 28 return _hashCode;
(...skipping 55 matching lines...) Expand 10 before | Expand all | Expand 10 after
149 * A ByteString-valued token. 84 * A ByteString-valued token.
150 */ 85 */
151 class ByteStringToken extends Token { 86 class ByteStringToken extends Token {
152 final ByteString value; 87 final ByteString value;
153 88
154 ByteStringToken(PrecedenceInfo info, ByteString this.value, int charOffset) 89 ByteStringToken(PrecedenceInfo info, ByteString this.value, int charOffset)
155 : super(info, charOffset); 90 : super(info, charOffset);
156 91
157 String toString() => value.toString(); 92 String toString() => value.toString();
158 } 93 }
OLDNEW
« no previous file with comments | « no previous file | dart/frog/leg/scanner/node_scanner_bench.dart » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698