| OLD | NEW |
| 1 // Copyright (c) 2011, the Dart project authors. Please see the AUTHORS file | 1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file |
| 2 // for details. All rights reserved. Use of this source code is governed by a | 2 // for details. All rights reserved. Use of this source code is governed by a |
| 3 // BSD-style license that can be found in the LICENSE file. | 3 // BSD-style license that can be found in the LICENSE file. |
| 4 | 4 |
| 5 /** | 5 #library('utf8'); |
| 6 * An abstract string representation. | 6 |
| 7 */ | 7 class Utf8Decoder { |
| 8 class ByteString implements SourceString { | |
| 9 final List<int> bytes; | 8 final List<int> bytes; |
| 10 final int offset; | 9 final int offset; |
| 11 final int length; | 10 final int length; |
| 12 int _hashCode; | |
| 13 | 11 |
| 14 ByteString(List<int> this.bytes, int this.offset, int this.length); | 12 Utf8Decoder(List<int> this.bytes, int this.offset, int this.length); |
| 15 | |
| 16 abstract String get charset(); | |
| 17 | 13 |
| 18 String toString() { | 14 String toString() { |
| 19 var list; | 15 return new String.fromCharCodes(decodeUtf8(bytes.getRange(offset, length))); |
| 20 try { | |
| 21 list = bytes.getRange(offset, length); | |
| 22 } catch (var ignored) { | |
| 23 // An exception may occur when running this on node. This is | |
| 24 // because [bytes] really is a buffer (or typed array). | |
| 25 list = new List<int>(length); | |
| 26 for (int i = 0; i < length; i++) { | |
| 27 list[i] = bytes[i + offset]; | |
| 28 } | |
| 29 } | |
| 30 return new String.fromCharCodes(decodeUtf8(list)); | |
| 31 } | 16 } |
| 32 | 17 |
| 33 static int decodeTrailing(int byte) { | 18 static int decodeTrailing(int byte) { |
| 34 if (byte < 0x80 || 0xBF < byte) { | 19 if (byte < 0x80 || 0xBF < byte) { |
| 35 throw new MalformedInputException('Cannot decode UTF-8 $byte'); | 20 throw new Exception('Cannot decode UTF-8 $byte'); |
| 36 } else { | 21 } else { |
| 37 return byte & 0x3F; | 22 return byte & 0x3F; |
| 38 } | 23 } |
| 39 } | 24 } |
| 40 | 25 |
| 41 static List<int> decodeUtf8(List<int> bytes) { | 26 static List<int> decodeUtf8(List<int> bytes) { |
| 42 List<int> result = new List<int>(); | 27 List<int> result = new List<int>(); |
| 43 for (int i = 0; i < bytes.length; i++) { | 28 for (int i = 0; i < bytes.length; i++) { |
| 44 if (bytes[i] < 0x80) { | 29 if (bytes[i] < 0x80) { |
| 45 result.add(bytes[i]); | 30 result.add(bytes[i]); |
| 46 } else if (bytes[i] < 0xC2) { | 31 } else if (bytes[i] < 0xC2) { |
| 47 throw new MalformedInputException('Cannot decode UTF-8 @ $i'); | 32 throw new Exception('Cannot decode UTF-8 @ $i'); |
| 48 } else if (bytes[i] < 0xE0) { | 33 } else if (bytes[i] < 0xE0) { |
| 49 int char = (bytes[i++] & 0x1F) << 6; | 34 int char = (bytes[i++] & 0x1F) << 6; |
| 50 char += decodeTrailing(bytes[i]); | 35 char += decodeTrailing(bytes[i]); |
| 51 if (char < 0x80) { | 36 if (char < 0x80) { |
| 52 throw new MalformedInputException('Cannot decode UTF-8 @ ${i-1}'); | 37 throw new Exception('Cannot decode UTF-8 @ ${i-1}'); |
| 53 } else { | 38 } else { |
| 54 result.add(char); | 39 result.add(char); |
| 55 } | 40 } |
| 56 } else if (bytes[i] < 0xF0) { | 41 } else if (bytes[i] < 0xF0) { |
| 57 int char = (bytes[i++] & 0x0F) << 6; | 42 int char = (bytes[i++] & 0x0F) << 6; |
| 58 char += decodeTrailing(bytes[i++]); | 43 char += decodeTrailing(bytes[i++]); |
| 59 char <<= 6; | 44 char <<= 6; |
| 60 char += decodeTrailing(bytes[i]); | 45 char += decodeTrailing(bytes[i]); |
| 61 if (char < 0x800 || (0xD800 <= char && char <= 0xDFFF)) { | 46 if (char < 0x800 || (0xD800 <= char && char <= 0xDFFF)) { |
| 62 throw new MalformedInputException('Cannot decode UTF-8 @ ${i-2}'); | 47 throw new Exception('Cannot decode UTF-8 @ ${i-2}'); |
| 63 } else { | 48 } else { |
| 64 result.add(char); | 49 result.add(char); |
| 65 } | 50 } |
| 66 } else if (bytes[i] < 0xF8) { | 51 } else if (bytes[i] < 0xF8) { |
| 67 int char = (bytes[i++] & 0x07) << 6; | 52 int char = (bytes[i++] & 0x07) << 6; |
| 68 char += decodeTrailing(bytes[i++]); | 53 char += decodeTrailing(bytes[i++]); |
| 69 char <<= 6; | 54 char <<= 6; |
| 70 char += decodeTrailing(bytes[i++]); | 55 char += decodeTrailing(bytes[i++]); |
| 71 char <<= 6; | 56 char <<= 6; |
| 72 char += decodeTrailing(bytes[i]); | 57 char += decodeTrailing(bytes[i]); |
| 73 if (char < 0x10000) { | 58 if (char < 0x10000) { |
| 74 throw new MalformedInputException('Cannot decode UTF-8 @ ${i-3}'); | 59 throw new Exception('Cannot decode UTF-8 @ ${i-3}'); |
| 75 } else { | 60 } else { |
| 76 result.add(char); | 61 result.add(char); |
| 77 } | 62 } |
| 78 } else { | 63 } else { |
| 79 throw new MalformedInputException('Cannot decode UTF-8 @ $i'); | 64 throw new Exception('Cannot decode UTF-8 @ $i'); |
| 80 } | 65 } |
| 81 } | 66 } |
| 82 return result; | 67 return result; |
| 83 } | 68 } |
| 84 | |
| 85 bool operator ==(other) { | |
| 86 throw "should be overridden in subclass"; | |
| 87 } | |
| 88 | |
| 89 int hashCode() { | |
| 90 if (_hashCode === null) { | |
| 91 _hashCode = computeHashCode(); | |
| 92 } | |
| 93 return _hashCode; | |
| 94 } | |
| 95 | |
| 96 int computeHashCode() { | |
| 97 int code = 1; | |
| 98 int end = offset + length; | |
| 99 for (int i = offset; i < end; i++) { | |
| 100 code += 19 * code + bytes[i]; | |
| 101 } | |
| 102 return code; | |
| 103 } | |
| 104 | |
| 105 printOn(StringBuffer sb) { | |
| 106 sb.add(toString()); | |
| 107 } | |
| 108 } | 69 } |
| 109 | |
| 110 /** | |
| 111 * A string that consists purely of 7bit ASCII characters. | |
| 112 */ | |
| 113 class AsciiString extends ByteString { | |
| 114 final String charset = "ASCII"; | |
| 115 | |
| 116 AsciiString(List<int> bytes, int offset, int length) | |
| 117 : super(bytes, offset, length); | |
| 118 | |
| 119 static AsciiString of(List<int> bytes, int offset, int length) { | |
| 120 AsciiString string = new AsciiString(bytes, offset, length); | |
| 121 return string; | |
| 122 } | |
| 123 | |
| 124 static AsciiString fromString(String string) { | |
| 125 List<int> bytes = string.charCodes(); | |
| 126 return AsciiString.of(bytes, 0, bytes.length); | |
| 127 } | |
| 128 } | |
| 129 | |
| 130 /** | |
| 131 * A string that consists of characters that can be encoded as UTF-8. | |
| 132 */ | |
| 133 class Utf8String extends ByteString { | |
| 134 final String charset = "UTF8"; | |
| 135 | |
| 136 Utf8String(List<int> bytes, int offset, int length) | |
| 137 : super(bytes, offset, length); | |
| 138 | |
| 139 static Utf8String of(List<int> bytes, int offset, int length) { | |
| 140 return new Utf8String(bytes, offset, length); | |
| 141 } | |
| 142 | |
| 143 static Utf8String fromString(String string) { | |
| 144 throw "not implemented yet"; | |
| 145 } | |
| 146 } | |
| 147 | |
| 148 /** | |
| 149 * A ByteString-valued token. | |
| 150 */ | |
| 151 class ByteStringToken extends Token { | |
| 152 final ByteString value; | |
| 153 | |
| 154 ByteStringToken(PrecedenceInfo info, ByteString this.value, int charOffset) | |
| 155 : super(info, charOffset); | |
| 156 | |
| 157 String toString() => value.toString(); | |
| 158 } | |
| OLD | NEW |