OLD | NEW |
1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file | 1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file |
2 // for details. All rights reserved. Use of this source code is governed by a | 2 // for details. All rights reserved. Use of this source code is governed by a |
3 // BSD-style license that can be found in the LICENSE file. | 3 // BSD-style license that can be found in the LICENSE file. |
4 | 4 |
5 import "package:expect/expect.dart"; | 5 import "package:expect/expect.dart"; |
6 import 'package:compiler/src/scanner/utf8_bytes_scanner.dart'; | 6 import 'package:compiler/src/scanner/utf8_bytes_scanner.dart'; |
7 import 'package:compiler/src/tokens/precedence_constants.dart'; | 7 import 'package:compiler/src/tokens/precedence_constants.dart'; |
8 import 'package:compiler/src/tokens/token.dart'; | 8 import 'package:compiler/src/tokens/token.dart'; |
9 import 'package:compiler/src/util/characters.dart'; | 9 import 'package:compiler/src/util/characters.dart'; |
10 import 'dart:typed_data'; | 10 import 'dart:typed_data'; |
11 | 11 |
12 Token scan(List<int> bytes) { | 12 Token scan(List<int> bytes) { |
13 List<int> zeroTerminated = new Uint8List(bytes.length + 1); | 13 List<int> zeroTerminated = new Uint8List(bytes.length + 1); |
14 zeroTerminated.setRange(0, bytes.length, bytes); | 14 zeroTerminated.setRange(0, bytes.length, bytes); |
15 zeroTerminated[bytes.length] = 0; | 15 zeroTerminated[bytes.length] = 0; |
16 return new Utf8BytesScanner.fromBytes(zeroTerminated).tokenize(); | 16 return new Utf8BytesScanner.fromBytes(zeroTerminated).tokenize(); |
17 } | 17 } |
18 | 18 |
19 Token scanUTF8(List<int> bytes) { | 19 Token scanUTF8(List<int> bytes) { |
20 int l = bytes.length; | 20 int l = bytes.length; |
21 List<int> stringLiteral = new Uint8List(l + 3); | 21 List<int> stringLiteral = new Uint8List(l + 3); |
22 stringLiteral[0] = 0x27; // single quote | 22 stringLiteral[0] = 0x27; // single quote |
23 stringLiteral[l+1] = 0x27; // single quote | 23 stringLiteral[l + 1] = 0x27; // single quote |
24 // The bytes given to the scanner must be 0-terminated. | 24 // The bytes given to the scanner must be 0-terminated. |
25 stringLiteral[l+2] = $EOF; | 25 stringLiteral[l + 2] = $EOF; |
26 for (int i = 0; i < l; i++) { | 26 for (int i = 0; i < l; i++) { |
27 stringLiteral[i+1] = bytes[i]; | 27 stringLiteral[i + 1] = bytes[i]; |
28 } | 28 } |
29 return new Utf8BytesScanner.fromBytes(stringLiteral).tokenize(); | 29 return new Utf8BytesScanner.fromBytes(stringLiteral).tokenize(); |
30 } | 30 } |
31 | 31 |
32 bool isRunningOnJavaScript() => identical(1, 1.0); | 32 bool isRunningOnJavaScript() => identical(1, 1.0); |
33 | 33 |
34 main() { | 34 main() { |
35 // Google favorite: "Îñţérñåţîöñåļîžåţîờñ". | 35 // Google favorite: "Îñţérñåţîöñåļîžåţîờñ". |
36 Token token = scanUTF8([0xc3, 0x8e, 0xc3, 0xb1, 0xc5, 0xa3, 0xc3, 0xa9, 0x72, | 36 Token token = scanUTF8([ |
37 0xc3, 0xb1, 0xc3, 0xa5, 0xc5, 0xa3, 0xc3, 0xae, 0xc3, | 37 0xc3, |
38 0xb6, 0xc3, 0xb1, 0xc3, 0xa5, 0xc4, 0xbc, 0xc3, 0xae, | 38 0x8e, |
39 0xc5, 0xbe, 0xc3, 0xa5, 0xc5, 0xa3, 0xc3, 0xae, 0xe1, | 39 0xc3, |
40 0xbb, 0x9d, 0xc3, 0xb1]); | 40 0xb1, |
| 41 0xc5, |
| 42 0xa3, |
| 43 0xc3, |
| 44 0xa9, |
| 45 0x72, |
| 46 0xc3, |
| 47 0xb1, |
| 48 0xc3, |
| 49 0xa5, |
| 50 0xc5, |
| 51 0xa3, |
| 52 0xc3, |
| 53 0xae, |
| 54 0xc3, |
| 55 0xb6, |
| 56 0xc3, |
| 57 0xb1, |
| 58 0xc3, |
| 59 0xa5, |
| 60 0xc4, |
| 61 0xbc, |
| 62 0xc3, |
| 63 0xae, |
| 64 0xc5, |
| 65 0xbe, |
| 66 0xc3, |
| 67 0xa5, |
| 68 0xc5, |
| 69 0xa3, |
| 70 0xc3, |
| 71 0xae, |
| 72 0xe1, |
| 73 0xbb, |
| 74 0x9d, |
| 75 0xc3, |
| 76 0xb1 |
| 77 ]); |
41 Expect.stringEquals("'Îñţérñåţîöñåļîžåţîờñ'", token.value); | 78 Expect.stringEquals("'Îñţérñåţîöñåļîžåţîờñ'", token.value); |
42 | 79 |
43 // Blueberry porridge in Danish: "blåbærgrød". | 80 // Blueberry porridge in Danish: "blåbærgrød". |
44 token = scanUTF8([0x62, 0x6c, 0xc3, 0xa5, 0x62, 0xc3, 0xa6, 0x72, 0x67, 0x72, | 81 token = scanUTF8([ |
45 0xc3, 0xb8, 0x64]); | 82 0x62, |
| 83 0x6c, |
| 84 0xc3, |
| 85 0xa5, |
| 86 0x62, |
| 87 0xc3, |
| 88 0xa6, |
| 89 0x72, |
| 90 0x67, |
| 91 0x72, |
| 92 0xc3, |
| 93 0xb8, |
| 94 0x64 |
| 95 ]); |
46 Expect.stringEquals("'blåbærgrød'", token.value); | 96 Expect.stringEquals("'blåbærgrød'", token.value); |
47 | 97 |
48 // "சிவா அணாமாைல", that is "Siva Annamalai" in Tamil. | 98 // "சிவா அணாமாைல", that is "Siva Annamalai" in Tamil. |
49 token = scanUTF8([0xe0, 0xae, 0x9a, 0xe0, 0xae, 0xbf, 0xe0, 0xae, 0xb5, 0xe0, | 99 token = scanUTF8([ |
50 0xae, 0xbe, 0x20, 0xe0, 0xae, 0x85, 0xe0, 0xae, 0xa3, 0xe0, | 100 0xe0, |
51 0xae, 0xbe, 0xe0, 0xae, 0xae, 0xe0, 0xae, 0xbe, 0xe0, 0xaf, | 101 0xae, |
52 0x88, 0xe0, 0xae, 0xb2]); | 102 0x9a, |
| 103 0xe0, |
| 104 0xae, |
| 105 0xbf, |
| 106 0xe0, |
| 107 0xae, |
| 108 0xb5, |
| 109 0xe0, |
| 110 0xae, |
| 111 0xbe, |
| 112 0x20, |
| 113 0xe0, |
| 114 0xae, |
| 115 0x85, |
| 116 0xe0, |
| 117 0xae, |
| 118 0xa3, |
| 119 0xe0, |
| 120 0xae, |
| 121 0xbe, |
| 122 0xe0, |
| 123 0xae, |
| 124 0xae, |
| 125 0xe0, |
| 126 0xae, |
| 127 0xbe, |
| 128 0xe0, |
| 129 0xaf, |
| 130 0x88, |
| 131 0xe0, |
| 132 0xae, |
| 133 0xb2 |
| 134 ]); |
53 Expect.stringEquals("'சிவா அணாமாைல'", token.value); | 135 Expect.stringEquals("'சிவா அணாமாைல'", token.value); |
54 | 136 |
55 // "िसवा अणामालै", that is "Siva Annamalai" in Devanagari. | 137 // "िसवा अणामालै", that is "Siva Annamalai" in Devanagari. |
56 token = scanUTF8([0xe0, 0xa4, 0xbf, 0xe0, 0xa4, 0xb8, 0xe0, 0xa4, 0xb5, 0xe0, | 138 token = scanUTF8([ |
57 0xa4, 0xbe, 0x20, 0xe0, 0xa4, 0x85, 0xe0, 0xa4, 0xa3, 0xe0, | 139 0xe0, |
58 0xa4, 0xbe, 0xe0, 0xa4, 0xae, 0xe0, 0xa4, 0xbe, 0xe0, 0xa4, | 140 0xa4, |
59 0xb2, 0xe0, 0xa5, 0x88]); | 141 0xbf, |
| 142 0xe0, |
| 143 0xa4, |
| 144 0xb8, |
| 145 0xe0, |
| 146 0xa4, |
| 147 0xb5, |
| 148 0xe0, |
| 149 0xa4, |
| 150 0xbe, |
| 151 0x20, |
| 152 0xe0, |
| 153 0xa4, |
| 154 0x85, |
| 155 0xe0, |
| 156 0xa4, |
| 157 0xa3, |
| 158 0xe0, |
| 159 0xa4, |
| 160 0xbe, |
| 161 0xe0, |
| 162 0xa4, |
| 163 0xae, |
| 164 0xe0, |
| 165 0xa4, |
| 166 0xbe, |
| 167 0xe0, |
| 168 0xa4, |
| 169 0xb2, |
| 170 0xe0, |
| 171 0xa5, |
| 172 0x88 |
| 173 ]); |
60 Expect.stringEquals("'िसवा अणामालै'", token.value); | 174 Expect.stringEquals("'िसवा अणामालै'", token.value); |
61 | 175 |
62 if (!isRunningOnJavaScript()) { | 176 if (!isRunningOnJavaScript()) { |
63 // DESERET CAPITAL LETTER BEE, unicode 0x10412(0xD801+0xDC12) | 177 // DESERET CAPITAL LETTER BEE, unicode 0x10412(0xD801+0xDC12) |
64 // UTF-8: F0 90 90 92 | 178 // UTF-8: F0 90 90 92 |
65 token = scanUTF8([0xf0, 0x90, 0x90, 0x92]); | 179 token = scanUTF8([0xf0, 0x90, 0x90, 0x92]); |
66 Expect.stringEquals("'𐐒'", token.value); | 180 Expect.stringEquals("'𐐒'", token.value); |
67 } else { | 181 } else { |
68 print('Skipping non-BMP character test'); | 182 print('Skipping non-BMP character test'); |
69 } | 183 } |
70 | 184 |
71 // Regression test for issue 1761. | 185 // Regression test for issue 1761. |
72 // "#!" | 186 // "#!" |
73 token = scan([0x23, 0x21]); | 187 token = scan([0x23, 0x21]); |
74 Expect.equals(token.info, EOF_INFO); // Treated as a comment. | 188 Expect.equals(token.info, EOF_INFO); // Treated as a comment. |
75 | 189 |
76 // Regression test for issue 1761. | 190 // Regression test for issue 1761. |
77 // "#! Hello, World!" | 191 // "#! Hello, World!" |
78 token = scan([0x23, 0x21, 0x20, 0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x2c, 0x20, | 192 token = scan([ |
79 0x57, 0x6f, 0x72, 0x6c, 0x64, 0x21]); | 193 0x23, |
| 194 0x21, |
| 195 0x20, |
| 196 0x48, |
| 197 0x65, |
| 198 0x6c, |
| 199 0x6c, |
| 200 0x6f, |
| 201 0x2c, |
| 202 0x20, |
| 203 0x57, |
| 204 0x6f, |
| 205 0x72, |
| 206 0x6c, |
| 207 0x64, |
| 208 0x21 |
| 209 ]); |
80 Expect.equals(token.info, EOF_INFO); // Treated as a comment. | 210 Expect.equals(token.info, EOF_INFO); // Treated as a comment. |
81 } | 211 } |
OLD | NEW |