sdk/lib/_internal/compiler/implementation/scanner/scanner.dart - Issue 27510003: Scanner for UTF-8 byte arrays

Side by Side Diff: sdk/lib/_internal/compiler/implementation/scanner/scanner.dart

Issue 27510003: Scanner for UTF-8 byte arrays (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart

Patch Set: Re-add ArrayBasedScanner, minor fixes. Created 7 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

« sdk/lib/_internal/compiler/implementation/mirror_renamer/renamer.dart ('K') | « sdk/lib/_internal/compiler/implementation/scanner/parser.dart ('k') | sdk/lib/_internal/compiler/implementation/scanner/scanner_task.dart » ('j') | sdk/lib/_internal/compiler/implementation/scanner/token.dart » ('J')
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file	1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file

2 // for details. All rights reserved. Use of this source code is governed by a	2 // for details. All rights reserved. Use of this source code is governed by a

3 // BSD-style license that can be found in the LICENSE file.	3 // BSD-style license that can be found in the LICENSE file.

4	4

5 part of scanner;	5 part of scanner;

6	6

7 abstract class Scanner {	7 abstract class Scanner {

8 Token tokenize();	8 Token tokenize();

	9

	10 factory Scanner(SourceFile file, {bool includeComments: false}) {

	11 if (file is Utf8BytesSourceFile) {

	12 return new Utf8BytesScanner(file, includeComments: includeComments);

	13 } else {

	14 return new StringScanner(file, includeComments: includeComments);

	15 }

	16 }

9 }	17 }

10	18

11 /**	19 abstract class AbstractScanner implements Scanner {

12 * Common base class for a Dart scanner.	20 final bool includeComments;

13 */

14 abstract class AbstractScanner<T extends SourceString> implements Scanner {

15 int advance();

16 int nextByte();

17	21

18 /**	22 /**

19 * Returns the current character or byte depending on the underlying input	23 * The string offset for the next token that will be created.

20 * kind. For example, [StringScanner] operates on [String] and thus returns	24 *

21 * characters (Unicode codepoints represented as int) whereas	25 * Note that in the [Utf8BytesScanner], [stringOffset] and [scanOffset] values

22 * [ByteArrayScanner] operates on byte arrays and thus returns bytes.	26 * are different. One string character can be encoded using multiple UTF-8

	27 * bytes.

	28 */

	29 int tokenStart = -1;

	30

	31 /**

	32 * A pointer to the token stream created by this scanner. The first token

	33 * is a special token and not part of the source file. This is an

	34 * implementation detail to avoids special cases in the scanner. This token

	35 * is not exposed to clients of the scanner, which are expected to invoke

	36 * [firstToken] to access the token stream.

	37 */

	38 final Token tokens = new SymbolToken(EOF_INFO, -1);

	39

	40 /**

	41 * A pointer to the last scanned token.

	42 */

	43 Token tail;

	44

	45 /**

	46 * The source file that is being scanned. This field can be [:null:].

	47 * If the source file is available, the scanner assigns its [:lineStarts:] and

	48 * [:length:] fields at the end of [tokenize].

	49 */

	50 final SourceFile file;

	51

	52 final List<int> lineStarts = [0];

	53

	54 AbstractScanner(this.file, this.includeComments) {

	55 this.tail = this.tokens;

	56 }

	57

	58

	59 /**

	60 * Advances and returns the next character.

	61 *

	62 * If the next character is non-ASCII, then the returned value depends on the

	63 * scanner implementation. The [Utf8BytesScanner] returns a UTF-8 byte, while

	64 * the [StringScanner] returns a UTF-16 code unit.

	65 *

	66 * The scanner ensures that [advance] is not invoked after it returned [$EOF].

	67 * This allows implementations to omit bound checks if the data structure ends

	68 * with '0'.

	69 */

	70 int advance();

	71

	72 /**

	73 * Returns the current unicode character.

	74 *

	75 * If the current character is ASCII, then it is returned unchanged.

	76 *

	77 * The [Utf8BytesScanner] decodes the next unicode code point starting at the

	78 * current position. Note that every unicode character is returned as a single

	79 * code point, i.e., for '\u{1d11e}' it returns 119070, and the following

	80 * [advance] returns the next character.

	81 *

	82 * The [StringScanner] returns the current character unchanged, which might

	83 * be a surrogate character. In the case of '\u{1d11e}', it returns the first

	84 * code unit 55348, and the following [advance] returns the second code unit

	85 * 56606.

	86 *

	87 * Invoking [currentAsUnicode] multiple times is safe, i.e.,

	88 * [:currentAsUnicode(next) == currentAsUnicode(currentAsUnicode(next)):].

	89 */

	90 int currentAsUnicode(int next);

	91

	92 /**

	93 * Returns the character at the next poisition. Like in [advance], the

	94 * [Utf8BytesScanner] returns a UTF-8 byte, while the [StringScanner] returns

	95 * a UTF-16 code unit.

23 */	96 */

24 int peek();	97 int peek();

25	98

26 /**	99 /**

27 * Appends a fixed token based on whether the current char is [choice] or not.	100 * Notifies the scanner that unicode characters were detected in either a

28 * If the current char is [choice] a fixed token whose kind and content	101 * comment or a string literal between [startScanOffset] and the current

29 * is determined by [yes] is appended, otherwise a fixed token whose kind	102 * scan offset.

30 * and content is determined by [no] is appended.

31 */	103 */

	104 void handleUnicode(int startScanOffset);

	105

	106 /**

	107 * Returns the current scan offset.

	108 *

	109 * In the [Utf8BytesScanner] this is the offset into the byte list, in the

	110 * [StringScanner] the offset in the source string.

	111 */

	112 int get scanOffset;

	113

	114 /**

	115 * Returns the current string offset.

	116 *

	117 * In the [StringScanner] this is identical to the [scanOffset]. In the

	118 * [Utf8BytesScanner] it is computed based on encountered UTF-8 characters.

	119 */

	120 int get stringOffset;

	121

	122 /**

	123 * Returns the first token scanned by this [Scanner].

	124 */

	125 Token firstToken();

	126

	127 /**

	128 * Returns the last token scanned by this [Scanner].

	129 */

	130 Token previousToken();

	131

	132 /**

	133 * Notifies that a new token starts at current offset.

	134 */

	135 void beginToken() {

	136 tokenStart = stringOffset;

	137 }

	138

	139 /**

	140 * Appends a substring from the scan offset [:start:] to the current

	141 * [:scanOffset:] plus the [:extraOffset:]. For example, if the current

	142 * scanOffset is 10, then [:appendSubstringToken(5, -1):] will append the

	143 * substring string [5,9).

	144 *

	145 * Note that [extraOffset] can only be used if the covered character(s) are

	146 * known to be ASCII.

	147 */

	148 void appendSubstringToken(PrecedenceInfo info, int start,

	149 bool asciiOnly, [int extraOffset]);

	150

	151 /** Documentation in subclass [ArrayBasedScanner] */
	kasperl 2013/10/17 08:50:39 Terminate these comments / Documentation in subc Terminate these comments / Documentation in subclass [ArrayBasedScanner] / with . lukas* 2013/10/17 17:49:34 Done. Show quoted text On 2013/10/17 08:50:39, kasperl wrote: > Terminate these comments /** Documentation in subclass [ArrayBasedScanner] */ > with . Done.
	152 void appendStringToken(PrecedenceInfo info, String value);

	153

	154 /** Documentation in subclass [ArrayBasedScanner] */

	155 void appendPrecedenceToken(PrecedenceInfo info);

	156

	157 /** Documentation in subclass [ArrayBasedScanner] */

32 int select(int choice, PrecedenceInfo yes, PrecedenceInfo no);	158 int select(int choice, PrecedenceInfo yes, PrecedenceInfo no);

33	159

34 /**	160 /** Documentation in subclass [ArrayBasedScanner] */

35 * Appends a fixed token whose kind and content is determined by [info].	161 void appendKeywordToken(Keyword keyword);

36 */

37 void appendPrecedenceToken(PrecedenceInfo info);

38	162

39 /**	163 /** Documentation in subclass [ArrayBasedScanner] */

40 * Appends a token whose kind is determined by [info] and content is [value].

41 */

42 void appendStringToken(PrecedenceInfo info, String value);

43

44 /**

45 * Appends a token whose kind is determined by [info] and content is defined

46 * by the SourceString [value].

47 */

48 void appendByteStringToken(PrecedenceInfo info, T value);

49

50 /**

51 * Appends a keyword token whose kind is determined by [keyword].

52 */

53 void appendKeywordToken(Keyword keyword);

54 void appendWhiteSpace(int next);

55 void appendEofToken();	164 void appendEofToken();

56	165

57 /**	166 /** Documentation in subclass [ArrayBasedScanner] */

58 * Creates an ASCII SourceString whose content begins at the source byte	167 void appendWhiteSpace(int next);

59 * offset [start] and ends at [offset] bytes from the current byte offset of

60 * the scanner. For example, if the current byte offset is 10,

61 * [:asciiString(0,-1):] creates an ASCII SourceString whose content is found

62 * at the [0,9[ byte interval of the source text.

63 */

64 T asciiString(int start, int offset);

65 T utf8String(int start, int offset);

66 Token firstToken();

67 Token previousToken();

68 void beginToken();

69 void addToCharOffset(int offset);

70 int get charOffset;

71 int get byteOffset;

72 void appendBeginGroup(PrecedenceInfo info, String value);

73 int appendEndGroup(PrecedenceInfo info, String value, int openKind);

74 void appendGt(PrecedenceInfo info, String value);

75 void appendGtGt(PrecedenceInfo info, String value);

76 void appendGtGtGt(PrecedenceInfo info, String value);

77 void appendComment();

78	168

79 /**	169 /** Documentation in subclass [ArrayBasedScanner] */

80 * We call this method to discard '<' from the "grouping" stack	170 void lineFeedInMultiline();

81 * (maintained by subclasses).	171

82 *	172 /** Documentation in subclass [ArrayBasedScanner] */

83 * [PartialParser.skipExpression] relies on the fact that we do not	173 void appendBeginGroup(PrecedenceInfo info);

84 * create groups for stuff like:	174

85 * [:a = b < c, d = e > f:].	175 /** Documentation in subclass [ArrayBasedScanner] */

86 *	176 int appendEndGroup(PrecedenceInfo info, int openKind);

87 * In other words, this method is called when the scanner recognizes	177

88 * something which cannot possibly be part of a type	178 /** Documentation in subclass [ArrayBasedScanner] */

89 * parameter/argument list.	179 void appendGt(PrecedenceInfo info);

90 */	180

	181 /** Documentation in subclass [ArrayBasedScanner] */

	182 void appendGtGt(PrecedenceInfo info);

	183

	184 /** Documentation in subclass [ArrayBasedScanner] */

	185 void appendComment(start, bool asciiOnly);

	186

	187 /** Documentation in subclass [ArrayBasedScanner] */

91 void discardOpenLt();	188 void discardOpenLt();

92	189

93 // TODO(ahe): Move this class to implementation.	190 // TODO(ahe): Move this class to implementation.

94	191

95 Token tokenize() {	192 Token tokenize() {

96 int next = advance();	193 int next = advance();

97 while (!identical(next, $EOF)) {	194 while (!identical(next, $EOF)) {

98 next = bigSwitch(next);	195 next = bigSwitch(next);

99 }	196 }

100 appendEofToken();	197 appendEofToken();

	198

	199 if (file != null) {

	200 file.length = stringOffset;

	201 // One additional line start at the end, see [SourceFile.lineStarts].

	202 lineStarts.add(stringOffset + 1);

	203 file.lineStarts = lineStarts;

	204 }

	205

101 return firstToken();	206 return firstToken();

102 }	207 }

103	208

104 int bigSwitch(int next) {	209 int bigSwitch(int next) {

105 beginToken();	210 beginToken();

106 if (identical(next, $SPACE) \|\| identical(next, $TAB)	211 if (identical(next, $SPACE) \|\| identical(next, $TAB)

107 \|\| identical(next, $LF) \|\| identical(next, $CR)) {	212 \|\| identical(next, $LF) \|\| identical(next, $CR)) {

108 appendWhiteSpace(next);	213 appendWhiteSpace(next);

109 next = advance();	214 next = advance();

	215 // Sequences of spaces are common, so advance through them fast.

110 while (identical(next, $SPACE)) {	216 while (identical(next, $SPACE)) {

111 appendWhiteSpace(next);	217 // We don't invoke [:appendWhiteSpace(next):] here for efficiency,

	218 // assuming that it does not do anything for space characters.

112 next = advance();	219 next = advance();

113 }	220 }

114 return next;	221 return next;

115 }	222 }

116	223

117 if ($a <= next && next <= $z) {	224 if ($a <= next && next <= $z) {

118 if (identical($r, next)) {	225 if (identical($r, next)) {

119 return tokenizeRawStringKeywordOrIdentifier(next);	226 return tokenizeRawStringKeywordOrIdentifier(next);

120 }	227 }

121 return tokenizeKeywordOrIdentifier(next, true);	228 return tokenizeKeywordOrIdentifier(next, true);

122 }	229 }

123	230

124 if (($A <= next && next <= $Z) \|\| identical(next, $_) \|\| identical(next, $$) ) {	231 if (($A <= next && next <= $Z) \|\|

125 return tokenizeIdentifier(next, byteOffset, true);	232 identical(next, $_) \|\|

	233 identical(next, $$)) {

	234 return tokenizeIdentifier(next, scanOffset, true);

126 }	235 }

127	236

128 if (identical(next, $LT)) {	237 if (identical(next, $LT)) {

129 return tokenizeLessThan(next);	238 return tokenizeLessThan(next);

130 }	239 }

131	240

132 if (identical(next, $GT)) {	241 if (identical(next, $GT)) {

133 return tokenizeGreaterThan(next);	242 return tokenizeGreaterThan(next);

134 }	243 }

135	244

(...skipping 44 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
180 if (identical(next, $BACKSLASH)) {	289 if (identical(next, $BACKSLASH)) {

181 appendPrecedenceToken(BACKSLASH_INFO);	290 appendPrecedenceToken(BACKSLASH_INFO);

182 return advance();	291 return advance();

183 }	292 }

184	293

185 if (identical(next, $HASH)) {	294 if (identical(next, $HASH)) {

186 return tokenizeTag(next);	295 return tokenizeTag(next);

187 }	296 }

188	297

189 if (identical(next, $OPEN_PAREN)) {	298 if (identical(next, $OPEN_PAREN)) {

190 appendBeginGroup(OPEN_PAREN_INFO, "(");	299 appendBeginGroup(OPEN_PAREN_INFO);

191 return advance();	300 return advance();

192 }	301 }

193	302

194 if (identical(next, $CLOSE_PAREN)) {	303 if (identical(next, $CLOSE_PAREN)) {

195 return appendEndGroup(CLOSE_PAREN_INFO, ")", OPEN_PAREN_TOKEN);	304 return appendEndGroup(CLOSE_PAREN_INFO, OPEN_PAREN_TOKEN);

196 }	305 }

197	306

198 if (identical(next, $COMMA)) {	307 if (identical(next, $COMMA)) {

199 appendPrecedenceToken(COMMA_INFO);	308 appendPrecedenceToken(COMMA_INFO);

200 return advance();	309 return advance();

201 }	310 }

202	311

203 if (identical(next, $COLON)) {	312 if (identical(next, $COLON)) {

204 appendPrecedenceToken(COLON_INFO);	313 appendPrecedenceToken(COLON_INFO);

205 return advance();	314 return advance();

206 }	315 }

207	316

208 if (identical(next, $SEMICOLON)) {	317 if (identical(next, $SEMICOLON)) {

209 appendPrecedenceToken(SEMICOLON_INFO);	318 appendPrecedenceToken(SEMICOLON_INFO);

210 // Type parameters and arguments cannot contain semicolon.	319 // Type parameters and arguments cannot contain semicolon.

211 discardOpenLt();	320 discardOpenLt();

212 return advance();	321 return advance();

213 }	322 }

214	323

215 if (identical(next, $QUESTION)) {	324 if (identical(next, $QUESTION)) {

216 appendPrecedenceToken(QUESTION_INFO);	325 appendPrecedenceToken(QUESTION_INFO);

217 return advance();	326 return advance();

218 }	327 }

219	328

220 if (identical(next, $CLOSE_SQUARE_BRACKET)) {	329 if (identical(next, $CLOSE_SQUARE_BRACKET)) {

221 return appendEndGroup(CLOSE_SQUARE_BRACKET_INFO, "]",	330 return appendEndGroup(CLOSE_SQUARE_BRACKET_INFO,

222 OPEN_SQUARE_BRACKET_TOKEN);	331 OPEN_SQUARE_BRACKET_TOKEN);

223 }	332 }

224	333

225 if (identical(next, $BACKPING)) {	334 if (identical(next, $BACKPING)) {

226 appendPrecedenceToken(BACKPING_INFO);	335 appendPrecedenceToken(BACKPING_INFO);

227 return advance();	336 return advance();

228 }	337 }

229	338

230 if (identical(next, $OPEN_CURLY_BRACKET)) {	339 if (identical(next, $OPEN_CURLY_BRACKET)) {

231 appendBeginGroup(OPEN_CURLY_BRACKET_INFO, "{");	340 appendBeginGroup(OPEN_CURLY_BRACKET_INFO);

232 return advance();	341 return advance();

233 }	342 }

234	343

235 if (identical(next, $CLOSE_CURLY_BRACKET)) {	344 if (identical(next, $CLOSE_CURLY_BRACKET)) {

236 return appendEndGroup(CLOSE_CURLY_BRACKET_INFO, "}",	345 return appendEndGroup(CLOSE_CURLY_BRACKET_INFO,

237 OPEN_CURLY_BRACKET_TOKEN);	346 OPEN_CURLY_BRACKET_TOKEN);

238 }	347 }

239	348

240 if (identical(next, $SLASH)) {	349 if (identical(next, $SLASH)) {

241 return tokenizeSlashOrComment(next);	350 return tokenizeSlashOrComment(next);

242 }	351 }

243	352

244 if (identical(next, $AT)) {	353 if (identical(next, $AT)) {

245 return tokenizeAt(next);	354 return tokenizeAt(next);

246 }	355 }

247	356

248 if (identical(next, $DQ) \|\| identical(next, $SQ)) {	357 if (identical(next, $DQ) \|\| identical(next, $SQ)) {

249 return tokenizeString(next, byteOffset, false);	358 return tokenizeString(next, scanOffset, false);

250 }	359 }

251	360

252 if (identical(next, $PERIOD)) {	361 if (identical(next, $PERIOD)) {

253 return tokenizeDotsOrNumber(next);	362 return tokenizeDotsOrNumber(next);

254 }	363 }

255	364

256 if (identical(next, $0)) {	365 if (identical(next, $0)) {

257 return tokenizeHexOrNumber(next);	366 return tokenizeHexOrNumber(next);

258 }	367 }

259	368

260 // TODO(ahe): Would a range check be faster?	369 // TODO(ahe): Would a range check be faster?

261 if (identical(next, $1) \|\| identical(next, $2) \|\| identical(next, $3)	370 if (identical(next, $1) \|\| identical(next, $2) \|\| identical(next, $3)

262 \|\| identical(next, $4) \|\| identical(next, $5) \|\| identical(next, $6)	371 \|\| identical(next, $4) \|\| identical(next, $5) \|\| identical(next, $6)

263 \|\| identical(next, $7) \|\| identical(next, $8) \|\| identical(next, $9)) {	372 \|\| identical(next, $7) \|\| identical(next, $8) \|\| identical(next, $9)) {

264 return tokenizeNumber(next);	373 return tokenizeNumber(next);

265 }	374 }

266	375

267 if (identical(next, $EOF)) {	376 if (identical(next, $EOF)) {

268 return $EOF;	377 return $EOF;

269 }	378 }

270 if (next < 0x1f) {	379 if (next < 0x1f) {

271 return error(new SourceString("unexpected character $next"));	380 return error("unexpected character $next");

	381 }

	382

	383 if (next >= 128) {

	384 next = currentAsUnicode(next);

272 }	385 }

273	386

274 // The following are non-ASCII characters.	387 // The following are non-ASCII characters.
	kasperl 2013/10/17 08:50:39 Can the check for $NBSP be guarded by the next >= Can the check for $NBSP be guarded by the next >= 128 condition? Not that it matters for performance. lukas 2013/10/17 17:49:34 Actually we can just remove the check for >= 128. Show quoted text On 2013/10/17 08:50:39, kasperl wrote: > Can the check for $NBSP be guarded by the next >= 128 condition? Not that it > matters for performance. Actually we can just remove the check for >= 128.
275	388

276 if (identical(next, $NBSP)) {	389 if (identical(next, $NBSP)) {

277 appendWhiteSpace(next);	390 appendWhiteSpace(next);

278 return advance();	391 return advance();

279 }	392 }

280	393

281 return tokenizeIdentifier(next, byteOffset, true);	394 return error("unexpected unicode character $next");

282 }	395 }

283	396

284 int tokenizeTag(int next) {	397 int tokenizeTag(int next) {

285 // # or #!.*[\n\r]	398 // # or #!.*[\n\r]

286 if (byteOffset == 0) {	399 if (scanOffset == 0) {

287 if (identical(peek(), $BANG)) {	400 if (identical(peek(), $BANG)) {

	401 int start = scanOffset + 1;

	402 bool asciiOnly = true;

288 do {	403 do {

289 next = advance();	404 next = advance();

290 } while (!identical(next, $LF) && !identical(next, $CR) && !identical(ne xt, $EOF));	405 if (next > 127) asciiOnly = false;

	406 } while (!identical(next, $LF) &&

	407 !identical(next, $CR) &&

	408 !identical(next, $EOF));

	409 if (!asciiOnly) handleUnicode(start);

291 return next;	410 return next;

292 }	411 }

293 }	412 }

294 appendPrecedenceToken(HASH_INFO);	413 appendPrecedenceToken(HASH_INFO);

295 return advance();	414 return advance();

296 }	415 }

297	416

298 int tokenizeTilde(int next) {	417 int tokenizeTilde(int next) {

299 // ~ ~/ ~/=	418 // ~ ~/ ~/=

300 next = advance();	419 next = advance();

301 if (identical(next, $SLASH)) {	420 if (identical(next, $SLASH)) {

302 return select($EQ, TILDE_SLASH_EQ_INFO, TILDE_SLASH_INFO);	421 return select($EQ, TILDE_SLASH_EQ_INFO, TILDE_SLASH_INFO);

303 } else {	422 } else {

304 appendPrecedenceToken(TILDE_INFO);	423 appendPrecedenceToken(TILDE_INFO);

305 return next;	424 return next;

306 }	425 }

307 }	426 }

308	427

309 int tokenizeOpenSquareBracket(int next) {	428 int tokenizeOpenSquareBracket(int next) {

310 // [ [] []=	429 // [ [] []=

311 next = advance();	430 next = advance();

312 if (identical(next, $CLOSE_SQUARE_BRACKET)) {	431 if (identical(next, $CLOSE_SQUARE_BRACKET)) {

313 Token token = previousToken();	432 Token token = previousToken();

314 if (token is KeywordToken && identical(token.value.stringValue, 'operator' )) {	433 if (token is KeywordToken &&

	434 identical((token as KeywordToken).keyword.syntax, 'operator')) {

315 return select($EQ, INDEX_EQ_INFO, INDEX_INFO);	435 return select($EQ, INDEX_EQ_INFO, INDEX_INFO);

316 }	436 }

317 }	437 }

318 appendBeginGroup(OPEN_SQUARE_BRACKET_INFO, "[");	438 appendBeginGroup(OPEN_SQUARE_BRACKET_INFO);

319 return next;	439 return next;

320 }	440 }

321	441

322 int tokenizeCaret(int next) {	442 int tokenizeCaret(int next) {

323 // ^ ^=	443 // ^ ^=

324 return select($EQ, CARET_EQ_INFO, CARET_INFO);	444 return select($EQ, CARET_EQ_INFO, CARET_INFO);

325 }	445 }

326	446

327 int tokenizeBar(int next) {	447 int tokenizeBar(int next) {

328 // \| \|\| \|=	448 // \| \|\| \|=

(...skipping 43 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
372 return advance();	492 return advance();

373 } else if (identical(next, $EQ)) {	493 } else if (identical(next, $EQ)) {

374 appendPrecedenceToken(MINUS_EQ_INFO);	494 appendPrecedenceToken(MINUS_EQ_INFO);

375 return advance();	495 return advance();

376 } else {	496 } else {

377 appendPrecedenceToken(MINUS_INFO);	497 appendPrecedenceToken(MINUS_INFO);

378 return next;	498 return next;

379 }	499 }

380 }	500 }

381	501

382

383 int tokenizePlus(int next) {	502 int tokenizePlus(int next) {

384 // + ++ +=	503 // + ++ +=

385 next = advance();	504 next = advance();

386 if (identical($PLUS, next)) {	505 if (identical($PLUS, next)) {

387 appendPrecedenceToken(PLUS_PLUS_INFO);	506 appendPrecedenceToken(PLUS_PLUS_INFO);

388 return advance();	507 return advance();

389 } else if (identical($EQ, next)) {	508 } else if (identical($EQ, next)) {

390 appendPrecedenceToken(PLUS_EQ_INFO);	509 appendPrecedenceToken(PLUS_EQ_INFO);

391 return advance();	510 return advance();

392 } else {	511 } else {

393 appendPrecedenceToken(PLUS_INFO);	512 appendPrecedenceToken(PLUS_INFO);

394 return next;	513 return next;

395 }	514 }

396 }	515 }

397	516

398 int tokenizeExclamation(int next) {	517 int tokenizeExclamation(int next) {

399 // ! != !==	518 // ! !=

	519 // !== is kept for user-friendly error reporting
	kasperl 2013/10/17 08:50:39 Nit: I'd terminate the ... is kept ... comments wi Nit: I'd terminate the ... is kept ... comments with . lukas 2013/10/17 17:49:34 Done. Show quoted text On 2013/10/17 08:50:39, kasperl wrote: > Nit: I'd terminate the ... is kept ... comments with . Done.
	520

400 next = advance();	521 next = advance();

401 if (identical(next, $EQ)) {	522 if (identical(next, $EQ)) {

402 return select($EQ, BANG_EQ_EQ_INFO, BANG_EQ_INFO);	523 return select($EQ, BANG_EQ_EQ_INFO, BANG_EQ_INFO);

403 }	524 }

404 appendPrecedenceToken(BANG_INFO);	525 appendPrecedenceToken(BANG_INFO);

405 return next;	526 return next;

406 }	527 }

407	528

408 int tokenizeEquals(int next) {	529 int tokenizeEquals(int next) {

409 // = == ===	530 // = == =>

	531 // === is kept for user-friendly error reporting

410	532

411 // Type parameters and arguments cannot contain any token that	533 // Type parameters and arguments cannot contain any token that

412 // starts with '='.	534 // starts with '='.

413 discardOpenLt();	535 discardOpenLt();

414	536

415 next = advance();	537 next = advance();

416 if (identical(next, $EQ)) {	538 if (identical(next, $EQ)) {

417 return select($EQ, EQ_EQ_EQ_INFO, EQ_EQ_INFO);	539 return select($EQ, EQ_EQ_EQ_INFO, EQ_EQ_INFO);

418 } else if (identical(next, $GT)) {	540 } else if (identical(next, $GT)) {

419 appendPrecedenceToken(FUNCTION_INFO);	541 appendPrecedenceToken(FUNCTION_INFO);

420 return advance();	542 return advance();

421 }	543 }

422 appendPrecedenceToken(EQ_INFO);	544 appendPrecedenceToken(EQ_INFO);

423 return next;	545 return next;

424 }	546 }

425	547

426 int tokenizeGreaterThan(int next) {	548 int tokenizeGreaterThan(int next) {

427 // > >= >> >>= >>> >>>=	549 // > >= >> >>=

428 next = advance();	550 next = advance();

429 if (identical($EQ, next)) {	551 if (identical($EQ, next)) {

430 appendPrecedenceToken(GT_EQ_INFO);	552 appendPrecedenceToken(GT_EQ_INFO);

431 return advance();	553 return advance();

432 } else if (identical($GT, next)) {	554 } else if (identical($GT, next)) {

433 next = advance();	555 next = advance();

434 if (identical($EQ, next)) {	556 if (identical($EQ, next)) {

435 appendPrecedenceToken(GT_GT_EQ_INFO);	557 appendPrecedenceToken(GT_GT_EQ_INFO);

436 return advance();	558 return advance();

437 } else {	559 } else {

438 appendGtGt(GT_GT_INFO, ">>");	560 appendGtGt(GT_GT_INFO);

439 return next;	561 return next;

440 }	562 }

441 } else {	563 } else {

442 appendGt(GT_INFO, ">");	564 appendGt(GT_INFO);

443 return next;	565 return next;

444 }	566 }

445 }	567 }

446	568

447 int tokenizeLessThan(int next) {	569 int tokenizeLessThan(int next) {

448 // < <= << <<=	570 // < <= << <<=

449 next = advance();	571 next = advance();

450 if (identical($EQ, next)) {	572 if (identical($EQ, next)) {

451 appendPrecedenceToken(LT_EQ_INFO);	573 appendPrecedenceToken(LT_EQ_INFO);

452 return advance();	574 return advance();

453 } else if (identical($LT, next)) {	575 } else if (identical($LT, next)) {

454 return select($EQ, LT_LT_EQ_INFO, LT_LT_INFO);	576 return select($EQ, LT_LT_EQ_INFO, LT_LT_INFO);

455 } else {	577 } else {

456 appendBeginGroup(LT_INFO, "<");	578 appendBeginGroup(LT_INFO);

457 return next;	579 return next;

458 }	580 }

459 }	581 }

460	582

461 int tokenizeNumber(int next) {	583 int tokenizeNumber(int next) {

462 int start = byteOffset;	584 int start = scanOffset;

463 while (true) {	585 while (true) {

464 next = advance();	586 next = advance();

465 if ($0 <= next && next <= $9) {	587 if ($0 <= next && next <= $9) {

466 continue;	588 continue;

467 } else if (identical(next, $e) \|\| identical(next, $E)) {	589 } else if (identical(next, $e) \|\| identical(next, $E)) {

468 return tokenizeFractionPart(next, start);	590 return tokenizeFractionPart(next, start);

469 } else {	591 } else {

470 if (identical(next, $PERIOD)) {	592 if (identical(next, $PERIOD)) {

471 int nextnext = peek();	593 int nextnext = peek();

472 if ($0 <= nextnext && nextnext <= $9) {	594 if ($0 <= nextnext && nextnext <= $9) {

473 return tokenizeFractionPart(advance(), start);	595 return tokenizeFractionPart(advance(), start);

474 }	596 }

475 }	597 }

476 appendByteStringToken(INT_INFO, asciiString(start, 0));	598 appendSubstringToken(INT_INFO, start, true);

477 return next;	599 return next;

478 }	600 }

479 }	601 }

480 }	602 }

481	603

482 int tokenizeHexOrNumber(int next) {	604 int tokenizeHexOrNumber(int next) {

483 int x = peek();	605 int x = peek();

484 if (identical(x, $x) \|\| identical(x, $X)) {	606 if (identical(x, $x) \|\| identical(x, $X)) {

485 advance();	607 return tokenizeHex(next);

486 return tokenizeHex(x);

487 }	608 }

488 return tokenizeNumber(next);	609 return tokenizeNumber(next);

489 }	610 }

490	611

491 int tokenizeHex(int next) {	612 int tokenizeHex(int next) {

492 int start = byteOffset - 1;	613 int start = scanOffset;

	614 next = advance(); // Advance past the $x or $X.

493 bool hasDigits = false;	615 bool hasDigits = false;

494 while (true) {	616 while (true) {

495 next = advance();	617 next = advance();

496 if (($0 <= next && next <= $9)	618 if (($0 <= next && next <= $9)

497 \|\| ($A <= next && next <= $F)	619 \|\| ($A <= next && next <= $F)

498 \|\| ($a <= next && next <= $f)) {	620 \|\| ($a <= next && next <= $f)) {

499 hasDigits = true;	621 hasDigits = true;

500 } else {	622 } else {

501 if (!hasDigits) {	623 if (!hasDigits) {

502 return error(const SourceString("hex digit expected"));	624 return error("hex digit expected");

503 }	625 }

504 appendByteStringToken(HEXADECIMAL_INFO, asciiString(start, 0));	626 appendSubstringToken(HEXADECIMAL_INFO, start, true);

505 return next;	627 return next;

506 }	628 }

507 }	629 }

508 }	630 }

509	631

510 int tokenizeDotsOrNumber(int next) {	632 int tokenizeDotsOrNumber(int next) {

511 int start = byteOffset;	633 int start = scanOffset;

512 next = advance();	634 next = advance();

513 if (($0 <= next && next <= $9)) {	635 if (($0 <= next && next <= $9)) {

514 return tokenizeFractionPart(next, start);	636 return tokenizeFractionPart(next, start);

515 } else if (identical($PERIOD, next)) {	637 } else if (identical($PERIOD, next)) {

516 return select($PERIOD, PERIOD_PERIOD_PERIOD_INFO, PERIOD_PERIOD_INFO);	638 return select($PERIOD, PERIOD_PERIOD_PERIOD_INFO, PERIOD_PERIOD_INFO);

517 } else {	639 } else {

518 appendPrecedenceToken(PERIOD_INFO);	640 appendPrecedenceToken(PERIOD_INFO);

519 return next;	641 return next;

520 }	642 }

521 }	643 }

522	644

523 int tokenizeFractionPart(int next, int start) {	645 int tokenizeFractionPart(int next, int start) {

524 bool done = false;	646 bool done = false;

525 bool hasDigit = false;	647 bool hasDigit = false;

526 LOOP: while (!done) {	648 LOOP: while (!done) {

527 if ($0 <= next && next <= $9) {	649 if ($0 <= next && next <= $9) {

528 hasDigit = true;	650 hasDigit = true;

529 } else if (identical($e, next) \|\| identical($E, next)) {	651 } else if (identical($e, next) \|\| identical($E, next)) {

530 hasDigit = true;	652 hasDigit = true;

531 next = tokenizeExponent(advance());	653 next = tokenizeExponent(advance());

532 done = true;	654 done = true;

533 continue LOOP;	655 continue LOOP;

534 } else {	656 } else {

535 done = true;	657 done = true;

536 continue LOOP;	658 continue LOOP;

537 }	659 }

538 next = advance();	660 next = advance();

539 }	661 }

540 if (!hasDigit) {	662 if (!hasDigit) {

541 appendByteStringToken(INT_INFO, asciiString(start, -1));	663 // Reduce offset, we already advanced to the token past the period.

	664 appendSubstringToken(INT_INFO, start, true, -1);

	665

	666 // TODO(ahe): Wrong offset for the period. Cannot call beginToken because

	667 // the scanner already advanced past the period.

542 if (identical($PERIOD, next)) {	668 if (identical($PERIOD, next)) {

543 return select($PERIOD, PERIOD_PERIOD_PERIOD_INFO, PERIOD_PERIOD_INFO);	669 return select($PERIOD, PERIOD_PERIOD_PERIOD_INFO, PERIOD_PERIOD_INFO);

544 }	670 }

545 // TODO(ahe): Wrong offset for the period.

546 appendPrecedenceToken(PERIOD_INFO);	671 appendPrecedenceToken(PERIOD_INFO);

547 return bigSwitch(next);	672 return next;

548 }	673 }

549 appendByteStringToken(DOUBLE_INFO, asciiString(start, 0));	674 appendSubstringToken(DOUBLE_INFO, start, true);

550 return next;	675 return next;

551 }	676 }

552	677

553 int tokenizeExponent(int next) {	678 int tokenizeExponent(int next) {

554 if (identical(next, $PLUS) \|\| identical(next, $MINUS)) {	679 if (identical(next, $PLUS) \|\| identical(next, $MINUS)) {

555 next = advance();	680 next = advance();

556 }	681 }

557 bool hasDigits = false;	682 bool hasDigits = false;

558 while (true) {	683 while (true) {

559 if ($0 <= next && next <= $9) {	684 if ($0 <= next && next <= $9) {

560 hasDigits = true;	685 hasDigits = true;

561 } else {	686 } else {

562 if (!hasDigits) {	687 if (!hasDigits) {

563 return error(const SourceString("digit expected"));	688 return error("digit expected");

564 }	689 }

565 return next;	690 return next;

566 }	691 }

567 next = advance();	692 next = advance();

568 }	693 }

569 }	694 }

570	695

571 int tokenizeSlashOrComment(int next) {	696 int tokenizeSlashOrComment(int next) {

	697 int start = scanOffset;

572 next = advance();	698 next = advance();

573 if (identical($STAR, next)) {	699 if (identical($STAR, next)) {

574 return tokenizeMultiLineComment(next);	700 return tokenizeMultiLineComment(next, start);

575 } else if (identical($SLASH, next)) {	701 } else if (identical($SLASH, next)) {

576 return tokenizeSingleLineComment(next);	702 return tokenizeSingleLineComment(next, start);

577 } else if (identical($EQ, next)) {	703 } else if (identical($EQ, next)) {

578 appendPrecedenceToken(SLASH_EQ_INFO);	704 appendPrecedenceToken(SLASH_EQ_INFO);

579 return advance();	705 return advance();

580 } else {	706 } else {

581 appendPrecedenceToken(SLASH_INFO);	707 appendPrecedenceToken(SLASH_INFO);

582 return next;	708 return next;

583 }	709 }

584 }	710 }

585	711

586 int tokenizeSingleLineComment(int next) {	712 int tokenizeSingleLineComment(int next, int start) {

	713 bool asciiOnly = true;

587 while (true) {	714 while (true) {

588 next = advance();	715 next = advance();

589 if (identical($LF, next) \|\| identical($CR, next) \|\| identical($EOF, next)) {	716 if (next > 127) asciiOnly = false;

590 appendComment();	717 if (identical($LF, next) \|\|

	718 identical($CR, next) \|\|

	719 identical($EOF, next)) {

	720 if (!asciiOnly) handleUnicode(start);

	721 appendComment(start, asciiOnly);

591 return next;	722 return next;

592 }	723 }

593 }	724 }

594 }	725 }

595	726

596 int tokenizeMultiLineComment(int next) {	727

	728 int tokenizeMultiLineComment(int next, int start) {

	729 bool asciiOnlyComment = true; // Track if the entire comment is ASCII.

	730 bool asciiOnlyLines = true; // Track ASCII since the last handleUnicode.

	731 int unicodeStart = start;

597 int nesting = 1;	732 int nesting = 1;

598 next = advance();	733 next = advance();

599 while (true) {	734 while (true) {

600 if (identical($EOF, next)) {	735 if (identical($EOF, next)) {

601 // TODO(ahe): Report error.	736 if (!asciiOnlyLines) handleUnicode(unicodeStart);

	737 appendStringToken(BAD_INPUT_INFO, "unterminated multi-line comment");

602 return next;	738 return next;

603 } else if (identical($STAR, next)) {	739 } else if (identical($STAR, next)) {

604 next = advance();	740 next = advance();

605 if (identical($SLASH, next)) {	741 if (identical($SLASH, next)) {

606 --nesting;	742 --nesting;

607 if (0 == nesting) {	743 if (0 == nesting) {

	744 if (!asciiOnlyLines) handleUnicode(unicodeStart);

608 next = advance();	745 next = advance();

609 appendComment();	746 appendComment(start, asciiOnlyComment);

610 return next;	747 return next;

611 } else {	748 } else {

612 next = advance();	749 next = advance();

613 }	750 }

614 }	751 }

615 } else if (identical($SLASH, next)) {	752 } else if (identical($SLASH, next)) {

616 next = advance();	753 next = advance();

617 if (identical($STAR, next)) {	754 if (identical($STAR, next)) {

618 next = advance();	755 next = advance();

619 ++nesting;	756 ++nesting;

620 }	757 }

	758 } else if (identical(next, $LF)) {

	759 if (!asciiOnlyLines) {

	760 // Synchronize the string offset in the utf8 scanner.

	761 handleUnicode(unicodeStart);

	762 asciiOnlyLines = true;

	763 unicodeStart = scanOffset;

	764 }

	765 lineFeedInMultiline();

	766 next = advance();

621 } else {	767 } else {

	768 if (next > 127) {

	769 asciiOnlyLines = false;

	770 asciiOnlyComment = false;

	771 }

622 next = advance();	772 next = advance();

623 }	773 }

624 }	774 }

625 }	775 }

626	776

627 int tokenizeRawStringKeywordOrIdentifier(int next) {	777 int tokenizeRawStringKeywordOrIdentifier(int next) {

	778 // [next] is $r.

628 int nextnext = peek();	779 int nextnext = peek();

629 if (identical(nextnext, $DQ) \|\| identical(nextnext, $SQ)) {	780 if (identical(nextnext, $DQ) \|\| identical(nextnext, $SQ)) {

630 int start = byteOffset;	781 int start = scanOffset;

631 next = advance();	782 next = advance();

632 return tokenizeString(next, start, true);	783 return tokenizeString(next, start, true);

633 }	784 }

634 return tokenizeKeywordOrIdentifier(next, true);	785 return tokenizeKeywordOrIdentifier(next, true);

635 }	786 }

636	787

637 int tokenizeKeywordOrIdentifier(int next, bool allowDollar) {	788 int tokenizeKeywordOrIdentifier(int next, bool allowDollar) {

638 KeywordState state = KeywordState.KEYWORD_STATE;	789 KeywordState state = KeywordState.KEYWORD_STATE;

639 int start = byteOffset;	790 int start = scanOffset;

640 while (state != null && $a <= next && next <= $z) {	791 while (state != null && $a <= next && next <= $z) {

641 state = state.next(next);	792 state = state.next(next);

642 next = advance();	793 next = advance();

643 }	794 }

644 if (state == null \|\| state.keyword == null) {	795 if (state == null \|\| state.keyword == null) {

645 return tokenizeIdentifier(next, start, allowDollar);	796 return tokenizeIdentifier(next, start, allowDollar);

646 }	797 }

647 if (($A <= next && next <= $Z) \|\|	798 if (($A <= next && next <= $Z) \|\|

648 ($0 <= next && next <= $9) \|\|	799 ($0 <= next && next <= $9) \|\|

649 identical(next, $_) \|\|	800 identical(next, $_) \|\|

650 identical(next, $$)) {	801 identical(next, $$)) {

651 return tokenizeIdentifier(next, start, allowDollar);	802 return tokenizeIdentifier(next, start, allowDollar);

652 } else if (next < 128) {	803 } else {

653 appendKeywordToken(state.keyword);	804 appendKeywordToken(state.keyword);

654 return next;	805 return next;

655 } else {

656 return tokenizeIdentifier(next, start, allowDollar);

657 }	806 }

658 }	807 }

659	808

	809 /**

	810 * [allowDollar] can exclude '$', which is not allowed as part of a string

	811 * interpolation identifier.

	812 */

660 int tokenizeIdentifier(int next, int start, bool allowDollar) {	813 int tokenizeIdentifier(int next, int start, bool allowDollar) {

661 bool isAscii = true;

662

663 while (true) {	814 while (true) {

664 if (($a <= next && next <= $z) \|\|	815 if (($a <= next && next <= $z) \|\|

665 ($A <= next && next <= $Z) \|\|	816 ($A <= next && next <= $Z) \|\|

666 ($0 <= next && next <= $9) \|\|	817 ($0 <= next && next <= $9) \|\|

667 identical(next, $_) \|\|	818 identical(next, $_) \|\|

668 (identical(next, $$) && allowDollar)) {	819 (identical(next, $$) && allowDollar)) {

669 next = advance();	820 next = advance();

670 } else if ((next < 128) \|\| (identical(next, $NBSP))) {	821 } else {

671 // Identifier ends here.	822 // Identifier ends here.

672 if (start == byteOffset) {	823 if (start == scanOffset) {

673 return error(const SourceString("expected identifier"));	824 return error("expected identifier");

674 } else if (isAscii) {

675 appendByteStringToken(IDENTIFIER_INFO, asciiString(start, 0));

676 } else {	825 } else {

677 appendByteStringToken(BAD_INPUT_INFO, utf8String(start, -1));	826 appendSubstringToken(IDENTIFIER_INFO, start, true);

678 }	827 }

679 return next;	828 return next;

680 } else {

681 int nonAsciiStart = byteOffset;

682 do {

683 next = nextByte();

684 if (identical(next, $NBSP)) break;

685 } while (next > 127);

686 String string = utf8String(nonAsciiStart, -1).slowToString();

687 isAscii = false;

688 int byteLength = nonAsciiStart - byteOffset;

689 addToCharOffset(string.length - byteLength);

690 }	829 }

691 }	830 }

692 }	831 }

693	832

694 int tokenizeAt(int next) {	833 int tokenizeAt(int next) {

695 int start = byteOffset;

696 next = advance();

697 appendPrecedenceToken(AT_INFO);	834 appendPrecedenceToken(AT_INFO);

698 return next;	835 return advance();

699 }	836 }

700	837

701 int tokenizeString(int next, int start, bool raw) {	838 int tokenizeString(int next, int start, bool raw) {

702 int quoteChar = next;	839 int quoteChar = next;

703 next = advance();	840 next = advance();

704 if (identical(quoteChar, next)) {	841 if (identical(quoteChar, next)) {

705 next = advance();	842 next = advance();

706 if (identical(quoteChar, next)) {	843 if (identical(quoteChar, next)) {

707 // Multiline string.	844 // Multiline string.

708 return tokenizeMultiLineString(quoteChar, start, raw);	845 return tokenizeMultiLineString(quoteChar, start, raw);

709 } else {	846 } else {

710 // Empty string.	847 // Empty string.

711 appendByteStringToken(STRING_INFO, utf8String(start, -1));	848 appendSubstringToken(STRING_INFO, start, true);

712 return next;	849 return next;

713 }	850 }

714 }	851 }

715 if (raw) {	852 if (raw) {

716 return tokenizeSingleLineRawString(next, quoteChar, start);	853 return tokenizeSingleLineRawString(next, quoteChar, start);

717 } else {	854 } else {

718 return tokenizeSingleLineString(next, quoteChar, start);	855 return tokenizeSingleLineString(next, quoteChar, start);

719 }	856 }

720 }	857 }

721	858

722 static bool isHexDigit(int character) {	859 /**

723 if ($0 <= character && character <= $9) return true;	860 * [next] is the first character after the qoute.

724 character \|= 0x20;	861 * [start] is the scanOffset of the quote.

725 return ($a <= character && character <= $f);	862 *

726 }	863 * The token contains a substring of the source file, including the

727	864 * string quotes, backslashes for escaping. For interpolated strings,

	865 * the parts before and after are separate tokens.

	866 *

	867 * "a $b c"

	868 *

	869 * gives StringToken("a $), StringToken(b) and StringToken( c").

	870 */

728 int tokenizeSingleLineString(int next, int quoteChar, int start) {	871 int tokenizeSingleLineString(int next, int quoteChar, int start) {

	872 bool asciiOnly = true;

729 while (!identical(next, quoteChar)) {	873 while (!identical(next, quoteChar)) {

730 if (identical(next, $BACKSLASH)) {	874 if (identical(next, $BACKSLASH)) {

731 next = advance();	875 next = advance();

732 } else if (identical(next, $$)) {	876 } else if (identical(next, $$)) {

733 next = tokenizeStringInterpolation(start);	877 if (!asciiOnly) handleUnicode(start);

734 start = byteOffset;	878 next = tokenizeStringInterpolation(start, asciiOnly);

	879 start = scanOffset;

	880 asciiOnly = true;

735 continue;	881 continue;

736 }	882 }

737 if (next <= $CR	883 if (next <= $CR

738 && (identical(next, $LF) \|\| identical(next, $CR) \|\| identical(next, $E OF))) {	884 && (identical(next, $LF) \|\|

739 return error(const SourceString("unterminated string literal"));	885 identical(next, $CR) \|\|

	886 identical(next, $EOF))) {

	887 if (!asciiOnly) handleUnicode(start);

	888 return error("unterminated string literal");

740 }	889 }

	890 if (next > 127) asciiOnly = false;

741 next = advance();	891 next = advance();

742 }	892 }

743 appendByteStringToken(STRING_INFO, utf8String(start, 0));	893 if (!asciiOnly) handleUnicode(start);

744 return advance();	894 // Advance past the quote character.

	895 next = advance();

	896 appendSubstringToken(STRING_INFO, start, asciiOnly);

	897 return next;

745 }	898 }

746	899

747 int tokenizeStringInterpolation(int start) {	900 int tokenizeStringInterpolation(int start, bool asciiOnly) {

748 appendByteStringToken(STRING_INFO, utf8String(start, -1));	901 appendSubstringToken(STRING_INFO, start, asciiOnly);

749 beginToken(); // $ starts here.	902 beginToken(); // $ starts here.

750 int next = advance();	903 int next = advance();

751 if (identical(next, $OPEN_CURLY_BRACKET)) {	904 if (identical(next, $OPEN_CURLY_BRACKET)) {

752 return tokenizeInterpolatedExpression(next, start);	905 return tokenizeInterpolatedExpression(next);

753 } else {	906 } else {

754 return tokenizeInterpolatedIdentifier(next, start);	907 return tokenizeInterpolatedIdentifier(next);

755 }	908 }

756 }	909 }

757	910

758 int tokenizeInterpolatedExpression(int next, int start) {	911 int tokenizeInterpolatedExpression(int next) {

759 appendBeginGroup(STRING_INTERPOLATION_INFO, "\${");	912 appendBeginGroup(STRING_INTERPOLATION_INFO);

760 beginToken(); // The expression starts here.	913 beginToken(); // The expression starts here.

761 next = advance();	914 next = advance(); // Move past the curly bracket.

762 while (!identical(next, $EOF) && !identical(next, $STX)) {	915 while (!identical(next, $EOF) && !identical(next, $STX)) {

763 next = bigSwitch(next);	916 next = bigSwitch(next);

764 }	917 }

765 if (identical(next, $EOF)) return next;	918 if (identical(next, $EOF)) return next;

766 next = advance();	919 next = advance(); // Move past the $STX.

767 beginToken(); // The string interpolation suffix starts here.	920 beginToken(); // The string interpolation suffix starts here.

768 return next;	921 return next;

769 }	922 }

770	923

771 int tokenizeInterpolatedIdentifier(int next, int start) {	924 int tokenizeInterpolatedIdentifier(int next) {

772 appendPrecedenceToken(STRING_INTERPOLATION_IDENTIFIER_INFO);	925 appendPrecedenceToken(STRING_INTERPOLATION_IDENTIFIER_INFO);

773 beginToken(); // The identifier starts here.	926 beginToken(); // The identifier starts here.

774 next = tokenizeKeywordOrIdentifier(next, false);	927 next = tokenizeKeywordOrIdentifier(next, false);

775 beginToken(); // The string interpolation suffix starts here.	928 beginToken(); // The string interpolation suffix starts here.

776 return next;	929 return next;

777 }	930 }

778	931

779 int tokenizeSingleLineRawString(int next, int quoteChar, int start) {	932 int tokenizeSingleLineRawString(int next, int quoteChar, int start) {

780 next = advance();	933 bool asciiOnly = true;

	934 next = advance(); // Advance past the quote

781 while (next != $EOF) {	935 while (next != $EOF) {

782 if (identical(next, quoteChar)) {	936 if (identical(next, quoteChar)) {

783 appendByteStringToken(STRING_INFO, utf8String(start, 0));	937 if (!asciiOnly) handleUnicode(start);

784 return advance();	938 next = advance();

	939 appendSubstringToken(STRING_INFO, start, asciiOnly);

	940 return next;

785 } else if (identical(next, $LF) \|\| identical(next, $CR)) {	941 } else if (identical(next, $LF) \|\| identical(next, $CR)) {

786 return error(const SourceString("unterminated string literal"));	942 if (!asciiOnly) handleUnicode(start);

	943 return error("unterminated string literal");

	944 } else if (next > 127) {

	945 asciiOnly = false;

787 }	946 }

788 next = advance();	947 next = advance();

789 }	948 }

790 return error(const SourceString("unterminated string literal"));	949 if (!asciiOnly) handleUnicode(start);

	950 return error("unterminated string literal");

791 }	951 }

792	952

793 int tokenizeMultiLineRawString(int quoteChar, int start) {	953 int tokenizeMultiLineRawString(int quoteChar, int start) {

794 int next = advance();	954 bool asciiOnlyString = true;

	955 bool asciiOnlyLine = true;

	956 int unicodeStart = start;

	957 int next = advance(); // Advance past the (last) quote (of three)

795 outer: while (!identical(next, $EOF)) {	958 outer: while (!identical(next, $EOF)) {

796 while (!identical(next, quoteChar)) {	959 while (!identical(next, quoteChar)) {

	960 if (identical(next, $LF)) {

	961 if (!asciiOnlyLine) {

	962 // Synchronize the string offset in the utf8 scanner.

	963 handleUnicode(unicodeStart);

	964 asciiOnlyLine = true;

	965 unicodeStart = scanOffset;

	966 }

	967 lineFeedInMultiline();

	968 } else if (next > 127) {

	969 asciiOnlyLine = false;

	970 asciiOnlyString = false;

	971 }

797 next = advance();	972 next = advance();

798 if (identical(next, $EOF)) break outer;	973 if (identical(next, $EOF)) break outer;

799 }	974 }

800 next = advance();	975 next = advance();

801 if (identical(next, quoteChar)) {	976 if (identical(next, quoteChar)) {

802 next = advance();	977 next = advance();

803 if (identical(next, quoteChar)) {	978 if (identical(next, quoteChar)) {

804 appendByteStringToken(STRING_INFO, utf8String(start, 0));	979 if (!asciiOnlyLine) handleUnicode(unicodeStart);

805 return advance();	980 next = advance();

	981 appendSubstringToken(STRING_INFO, start, asciiOnlyString);

	982 return next;

806 }	983 }

807 }	984 }

808 }	985 }

809 return error(const SourceString("unterminated string literal"));	986 if (!asciiOnlyLine) handleUnicode(unicodeStart);

	987 return error("unterminated string literal");

810 }	988 }

811	989

812 int tokenizeMultiLineString(int quoteChar, int start, bool raw) {	990 int tokenizeMultiLineString(int quoteChar, int start, bool raw) {

813 if (raw) return tokenizeMultiLineRawString(quoteChar, start);	991 if (raw) return tokenizeMultiLineRawString(quoteChar, start);

814 int next = advance();	992 bool asciiOnlyString = true;

	993 bool asciiOnlyLine = true;

	994 int unicodeStart = start;

	995 int next = advance(); // Advance past the (last) quote (of three).

815 while (!identical(next, $EOF)) {	996 while (!identical(next, $EOF)) {

816 if (identical(next, $$)) {	997 if (identical(next, $$)) {

817 next = tokenizeStringInterpolation(start);	998 if (!asciiOnlyLine) handleUnicode(unicodeStart);

818 start = byteOffset;	999 next = tokenizeStringInterpolation(start, asciiOnlyString);

	1000 start = scanOffset;

	1001 unicodeStart = start;

	1002 asciiOnlyString = true; // A new string token is created for the rest.

	1003 asciiOnlyLine = true;

819 continue;	1004 continue;

820 }	1005 }

821 if (identical(next, quoteChar)) {	1006 if (identical(next, quoteChar)) {

822 next = advance();	1007 next = advance();

823 if (identical(next, quoteChar)) {	1008 if (identical(next, quoteChar)) {

824 next = advance();	1009 next = advance();

825 if (identical(next, quoteChar)) {	1010 if (identical(next, quoteChar)) {

826 appendByteStringToken(STRING_INFO, utf8String(start, 0));	1011 if (!asciiOnlyLine) handleUnicode(unicodeStart);

827 return advance();	1012 next = advance();

	1013 appendSubstringToken(STRING_INFO, start, asciiOnlyString);

	1014 return next;

828 }	1015 }

829 }	1016 }

830 continue;	1017 continue;

831 }	1018 }

832 if (identical(next, $BACKSLASH)) {	1019 if (identical(next, $BACKSLASH)) {

833 next = advance();	1020 next = advance();

834 if (identical(next, $EOF)) break;	1021 if (identical(next, $EOF)) break;

835 }	1022 }

	1023 if (identical(next, $LF)) {

	1024 if (!asciiOnlyLine) {

	1025 // Synchronize the string offset in the utf8 scanner.

	1026 handleUnicode(unicodeStart);

	1027 asciiOnlyLine = true;

	1028 unicodeStart = scanOffset;

	1029 }

	1030 lineFeedInMultiline();

	1031 } else if (next > 127) {

	1032 asciiOnlyString = false;

	1033 asciiOnlyLine = false;

	1034 }

836 next = advance();	1035 next = advance();

837 }	1036 }

838 return error(const SourceString("unterminated string literal"));	1037 if (!asciiOnlyLine) handleUnicode(unicodeStart);

	1038 return error("unterminated string literal");

839 }	1039 }

840	1040

841 int error(SourceString message) {	1041 int error(String message) {

842 appendByteStringToken(BAD_INPUT_INFO, message);	1042 appendStringToken(BAD_INPUT_INFO, message);

843 return advance(); // Ensure progress.	1043 return advance(); // Ensure progress.

844 }	1044 }

	1045

	1046 void unmatchedBeginGroup(BeginGroupToken begin) {

	1047 String error = 'unmatched "${begin.stringValue}"';

	1048 Token close =

	1049 new StringToken.fromString(

	1050 BAD_INPUT_INFO, error, begin.charOffset, true);

	1051

	1052 // We want to ensure that unmatched BeginGroupTokens are reported

	1053 // as errors. However, the rest of the parser assume the groups

	1054 // are well-balanced and will never look at the endGroup

	1055 // token. This is a nice property that allows us to skip quickly

	1056 // over correct code. By inserting an additional error token in

	1057 // the stream, we can keep ignoring endGroup tokens.

	1058 //

	1059 // [begin] --next--> [tail]

	1060 // [begin] --endG--> [close] --next--> [next] --next--> [tail]

	1061 //

	1062 // This allows the parser to skip from [begin] via endGroup to [close] and

	1063 // ignore the [close] token (assuming it's correct), then the error will be

	1064 // reported when parsing the [next] token.

	1065

	1066 Token next = new StringToken.fromString(

	1067 BAD_INPUT_INFO, error, begin.charOffset, true);

	1068 begin.endGroup = close;

	1069 close.next = next;

	1070 next.next = begin.next;

	1071 }

845 }	1072 }

OLD	NEW