sdk/lib/_internal/compiler/implementation/scanner/scanner.dart - Issue 27510003: Scanner for UTF-8 byte arrays

Side by Side Diff: sdk/lib/_internal/compiler/implementation/scanner/scanner.dart

Issue 27510003: Scanner for UTF-8 byte arrays (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart

Patch Set: fixes compiler tests Created 7 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

« sdk/lib/_internal/compiler/implementation/scanner/array_based_scanner.dart ('K') | « sdk/lib/_internal/compiler/implementation/scanner/parser.dart ('k') | sdk/lib/_internal/compiler/implementation/scanner/scanner_task.dart » ('j') | sdk/lib/_internal/compiler/implementation/scanner/scanner_task.dart » ('J')
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file	1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file

2 // for details. All rights reserved. Use of this source code is governed by a	2 // for details. All rights reserved. Use of this source code is governed by a

3 // BSD-style license that can be found in the LICENSE file.	3 // BSD-style license that can be found in the LICENSE file.

4	4

5 part of scanner;	5 part of scanner;

6	6

7 abstract class Scanner {	7 abstract class Scanner {

8 Token tokenize();	8 Token tokenize();

	9

	10 factory Scanner(SourceFile file, {bool includeComments: false}) {

	11 if (file is Utf8BytesSourceFile) {

	12 return new Utf8BytesScanner(file, includeComments: includeComments);

	13 } else {

	14 return new StringScanner(file, includeComments: includeComments);

	15 }

	16 }

9 }	17 }

10	18

11 /**	19 abstract class AbstractScanner implements Scanner {

12 * Common base class for a Dart scanner.	20 final bool includeComments;

13 */

14 abstract class AbstractScanner<T extends SourceString> implements Scanner {

15 int advance();

16 int nextByte();

17	21

18 /**	22 /**

19 * Returns the current character or byte depending on the underlying input	23 * The string offset for the next token that will be created.

20 * kind. For example, [StringScanner] operates on [String] and thus returns	24 *

21 * characters (Unicode codepoints represented as int) whereas	25 * Note that in the [Utf8BytesScanner], [stringOffset] and [scanOffset] values

22 * [ByteArrayScanner] operates on byte arrays and thus returns bytes.	26 * are different. One string character can be encoded using multiple UTF-8

	27 * bytes.

	28 */

	29 int tokenStart = -1;

	30

	31 /**

	32 * A pointer to the token stream created by this scanner. The first token

	33 * is a special token and not part of the source file. This is an

	34 * implementation detail to avoids special cases in the scanner. This token

	35 * is not exposed to clients of the scanner, which are expected to invoke

	36 * [firstToken] to access the token stream.

	37 */

	38 final Token tokens = new SymbolToken(EOF_INFO, -1);

	39

	40 /**

	41 * A pointer to the last scanned token.

	42 */

	43 Token tail;

	44

	45 /**

	46 * The source file that is being scanned. This field can be [:null:].

	47 * If the source file is available, the scanner assigns its [:lineStarts:] and

	48 * [:length:] fields at the end of [tokenize].

	49 */

	50 final SourceFile file;

	51

	52 final List<int> lineStarts = [0];
	ngeoffray 2013/10/18 10:19:37 <int>[0] <int>[0] lukas 2013/10/24 16:48:36 Done. Show quoted text On 2013/10/18 10:19:37, ngeoffray wrote: > <int>[0] Done.
	53

	54 AbstractScanner(this.file, this.includeComments) {

	55 this.tail = this.tokens;

	56 }

	57

	58
	ngeoffray 2013/10/18 10:19:37 Extra line. Extra line. lukas 2013/10/24 16:48:36 Done. Show quoted text On 2013/10/18 10:19:37, ngeoffray wrote: > Extra line. Done.
	59 /**

	60 * Advances and returns the next character.

	61 *

	62 * If the next character is non-ASCII, then the returned value depends on the

	63 * scanner implementation. The [Utf8BytesScanner] returns a UTF-8 byte, while

	64 * the [StringScanner] returns a UTF-16 code unit.

	65 *

	66 * The scanner ensures that [advance] is not invoked after it returned [$EOF].

	67 * This allows implementations to omit bound checks if the data structure ends

	68 * with '0'.

	69 */

	70 int advance();

	71

	72 /**

	73 * Returns the current unicode character.

	74 *

	75 * If the current character is ASCII, then it is returned unchanged.

	76 *

	77 * The [Utf8BytesScanner] decodes the next unicode code point starting at the

	78 * current position. Note that every unicode character is returned as a single

	79 * code point, i.e., for '\u{1d11e}' it returns 119070, and the following

	80 * [advance] returns the next character.

	81 *

	82 * The [StringScanner] returns the current character unchanged, which might

	83 * be a surrogate character. In the case of '\u{1d11e}', it returns the first

	84 * code unit 55348, and the following [advance] returns the second code unit

	85 * 56606.

	86 *

	87 * Invoking [currentAsUnicode] multiple times is safe, i.e.,
	ngeoffray 2013/10/18 10:19:37 i.e. -> that is i.e. -> that is lukas 2013/10/24 16:48:36 Done. Show quoted text On 2013/10/18 10:19:37, ngeoffray wrote: > i.e. -> that is Done.
	88 * [:currentAsUnicode(next) == currentAsUnicode(currentAsUnicode(next)):].

	89 */

	90 int currentAsUnicode(int next);

	91

	92 /**

	93 * Returns the character at the next poisition. Like in [advance], the

	94 * [Utf8BytesScanner] returns a UTF-8 byte, while the [StringScanner] returns

	95 * a UTF-16 code unit.

23 */	96 */

24 int peek();	97 int peek();

25	98

26 /**	99 /**

27 * Appends a fixed token based on whether the current char is [choice] or not.	100 * Notifies the scanner that unicode characters were detected in either a

28 * If the current char is [choice] a fixed token whose kind and content	101 * comment or a string literal between [startScanOffset] and the current

29 * is determined by [yes] is appended, otherwise a fixed token whose kind	102 * scan offset.

30 * and content is determined by [no] is appended.

31 */	103 */

	104 void handleUnicode(int startScanOffset);

	105

	106 /**

	107 * Returns the current scan offset.

	108 *

	109 * In the [Utf8BytesScanner] this is the offset into the byte list, in the

	110 * [StringScanner] the offset in the source string.

	111 */

	112 int get scanOffset;

	113

	114 /**

	115 * Returns the current string offset.

	116 *

	117 * In the [StringScanner] this is identical to the [scanOffset]. In the

	118 * [Utf8BytesScanner] it is computed based on encountered UTF-8 characters.

	119 */

	120 int get stringOffset;

	121

	122 /**

	123 * Returns the first token scanned by this [Scanner].

	124 */

	125 Token firstToken();

	126

	127 /**

	128 * Returns the last token scanned by this [Scanner].

	129 */

	130 Token previousToken();

	131

	132 /**

	133 * Notifies that a new token starts at current offset.

	134 */

	135 void beginToken() {

	136 tokenStart = stringOffset;

	137 }

	138

	139 /**

	140 * Appends a substring from the scan offset [:start:] to the current

	141 * [:scanOffset:] plus the [:extraOffset:]. For example, if the current

	142 * scanOffset is 10, then [:appendSubstringToken(5, -1):] will append the

	143 * substring string [5,9).

	144 *

	145 * Note that [extraOffset] can only be used if the covered character(s) are

	146 * known to be ASCII.

	147 */

	148 void appendSubstringToken(PrecedenceInfo info, int start,

	149 bool asciiOnly, [int extraOffset]);

	150

	151 /** Documentation in subclass [ArrayBasedScanner]. */

	152 void appendStringToken(PrecedenceInfo info, String value);

	153

	154 /** Documentation in subclass [ArrayBasedScanner]. */

	155 void appendPrecedenceToken(PrecedenceInfo info);

	156

	157 /** Documentation in subclass [ArrayBasedScanner]. */

32 int select(int choice, PrecedenceInfo yes, PrecedenceInfo no);	158 int select(int choice, PrecedenceInfo yes, PrecedenceInfo no);

33	159

34 /**	160 /** Documentation in subclass [ArrayBasedScanner]. */

35 * Appends a fixed token whose kind and content is determined by [info].	161 void appendKeywordToken(Keyword keyword);

36 */

37 void appendPrecedenceToken(PrecedenceInfo info);

38	162

39 /**	163 /** Documentation in subclass [ArrayBasedScanner]. */

40 * Appends a token whose kind is determined by [info] and content is [value].

41 */

42 void appendStringToken(PrecedenceInfo info, String value);

43

44 /**

45 * Appends a token whose kind is determined by [info] and content is defined

46 * by the SourceString [value].

47 */

48 void appendByteStringToken(PrecedenceInfo info, T value);

49

50 /**

51 * Appends a keyword token whose kind is determined by [keyword].

52 */

53 void appendKeywordToken(Keyword keyword);

54 void appendWhiteSpace(int next);

55 void appendEofToken();	164 void appendEofToken();

56	165

57 /**	166 /** Documentation in subclass [ArrayBasedScanner]. */
	ngeoffray 2013/10/18 10:19:37 So do the following methods only apply to the Arra So do the following methods only apply to the ArrayBasedScanner? lukas 2013/10/24 16:48:36 I just decided to put the documentation together w Show quoted text On 2013/10/18 10:19:37, ngeoffray wrote: > So do the following methods only apply to the ArrayBasedScanner? I just decided to put the documentation together with the actual implementation. Technically the implementations could go here, but Peter preferred to separate scanning code from token building code.
58 * Creates an ASCII SourceString whose content begins at the source byte	167 void appendWhiteSpace(int next);

59 * offset [start] and ends at [offset] bytes from the current byte offset of

60 * the scanner. For example, if the current byte offset is 10,

61 * [:asciiString(0,-1):] creates an ASCII SourceString whose content is found

62 * at the [0,9[ byte interval of the source text.

63 */

64 T asciiString(int start, int offset);

65 T utf8String(int start, int offset);

66 Token firstToken();

67 Token previousToken();

68 void beginToken();

69 void addToCharOffset(int offset);

70 int get charOffset;

71 int get byteOffset;

72 void appendBeginGroup(PrecedenceInfo info, String value);

73 int appendEndGroup(PrecedenceInfo info, String value, int openKind);

74 void appendGt(PrecedenceInfo info, String value);

75 void appendGtGt(PrecedenceInfo info, String value);

76 void appendGtGtGt(PrecedenceInfo info, String value);

77 void appendComment();

78	168

79 /**	169 /** Documentation in subclass [ArrayBasedScanner]. */

80 * We call this method to discard '<' from the "grouping" stack	170 void lineFeedInMultiline();

81 * (maintained by subclasses).	171

82 *	172 /** Documentation in subclass [ArrayBasedScanner]. */

83 * [PartialParser.skipExpression] relies on the fact that we do not	173 void appendBeginGroup(PrecedenceInfo info);

84 * create groups for stuff like:	174

85 * [:a = b < c, d = e > f:].	175 /** Documentation in subclass [ArrayBasedScanner]. */

86 *	176 int appendEndGroup(PrecedenceInfo info, int openKind);

87 * In other words, this method is called when the scanner recognizes	177

88 * something which cannot possibly be part of a type	178 /** Documentation in subclass [ArrayBasedScanner]. */

89 * parameter/argument list.	179 void appendGt(PrecedenceInfo info);

90 */	180

	181 /** Documentation in subclass [ArrayBasedScanner]. */

	182 void appendGtGt(PrecedenceInfo info);

	183

	184 /** Documentation in subclass [ArrayBasedScanner]. */

	185 void appendComment(start, bool asciiOnly);

	186

	187 /** Documentation in subclass [ArrayBasedScanner]. */

91 void discardOpenLt();	188 void discardOpenLt();

92	189

93 // TODO(ahe): Move this class to implementation.	190 // TODO(ahe): Move this class to implementation.

94	191

95 Token tokenize() {	192 Token tokenize() {

96 int next = advance();	193 int next = advance();

97 while (!identical(next, $EOF)) {	194 while (!identical(next, $EOF)) {

98 next = bigSwitch(next);	195 next = bigSwitch(next);

99 }	196 }

100 appendEofToken();	197 appendEofToken();

	198

	199 if (file != null) {

	200 file.length = stringOffset;

	201 // One additional line start at the end, see [SourceFile.lineStarts].

	202 lineStarts.add(stringOffset + 1);

	203 file.lineStarts = lineStarts;

	204 }

	205

101 return firstToken();	206 return firstToken();

102 }	207 }

103	208

104 int bigSwitch(int next) {	209 int bigSwitch(int next) {

105 beginToken();	210 beginToken();

106 if (identical(next, $SPACE) \|\| identical(next, $TAB)	211 if (identical(next, $SPACE) \|\| identical(next, $TAB)

107 \|\| identical(next, $LF) \|\| identical(next, $CR)) {	212 \|\| identical(next, $LF) \|\| identical(next, $CR)) {

108 appendWhiteSpace(next);	213 appendWhiteSpace(next);

109 next = advance();	214 next = advance();

	215 // Sequences of spaces are common, so advance through them fast.

110 while (identical(next, $SPACE)) {	216 while (identical(next, $SPACE)) {

111 appendWhiteSpace(next);	217 // We don't invoke [:appendWhiteSpace(next):] here for efficiency,

	218 // assuming that it does not do anything for space characters.

112 next = advance();	219 next = advance();

113 }	220 }

114 return next;	221 return next;

115 }	222 }

116	223

117 if ($a <= next && next <= $z) {	224 if ($a <= next && next <= $z) {

118 if (identical($r, next)) {	225 if (identical($r, next)) {

119 return tokenizeRawStringKeywordOrIdentifier(next);	226 return tokenizeRawStringKeywordOrIdentifier(next);

120 }	227 }

121 return tokenizeKeywordOrIdentifier(next, true);	228 return tokenizeKeywordOrIdentifier(next, true);

122 }	229 }

123	230

124 if (($A <= next && next <= $Z) \|\| identical(next, $_) \|\| identical(next, $$) ) {	231 if (($A <= next && next <= $Z) \|\|

125 return tokenizeIdentifier(next, byteOffset, true);	232 identical(next, $_) \|\|

	233 identical(next, $$)) {

	234 return tokenizeIdentifier(next, scanOffset, true);

126 }	235 }

127	236

128 if (identical(next, $LT)) {	237 if (identical(next, $LT)) {

129 return tokenizeLessThan(next);	238 return tokenizeLessThan(next);

130 }	239 }

131	240

132 if (identical(next, $GT)) {	241 if (identical(next, $GT)) {

133 return tokenizeGreaterThan(next);	242 return tokenizeGreaterThan(next);

134 }	243 }

135	244

(...skipping 44 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
180 if (identical(next, $BACKSLASH)) {	289 if (identical(next, $BACKSLASH)) {

181 appendPrecedenceToken(BACKSLASH_INFO);	290 appendPrecedenceToken(BACKSLASH_INFO);

182 return advance();	291 return advance();

183 }	292 }

184	293

185 if (identical(next, $HASH)) {	294 if (identical(next, $HASH)) {

186 return tokenizeTag(next);	295 return tokenizeTag(next);

187 }	296 }

188	297

189 if (identical(next, $OPEN_PAREN)) {	298 if (identical(next, $OPEN_PAREN)) {

190 appendBeginGroup(OPEN_PAREN_INFO, "(");	299 appendBeginGroup(OPEN_PAREN_INFO);

191 return advance();	300 return advance();

192 }	301 }

193	302

194 if (identical(next, $CLOSE_PAREN)) {	303 if (identical(next, $CLOSE_PAREN)) {

195 return appendEndGroup(CLOSE_PAREN_INFO, ")", OPEN_PAREN_TOKEN);	304 return appendEndGroup(CLOSE_PAREN_INFO, OPEN_PAREN_TOKEN);

196 }	305 }

197	306

198 if (identical(next, $COMMA)) {	307 if (identical(next, $COMMA)) {

199 appendPrecedenceToken(COMMA_INFO);	308 appendPrecedenceToken(COMMA_INFO);

200 return advance();	309 return advance();

201 }	310 }

202	311

203 if (identical(next, $COLON)) {	312 if (identical(next, $COLON)) {

204 appendPrecedenceToken(COLON_INFO);	313 appendPrecedenceToken(COLON_INFO);

205 return advance();	314 return advance();

206 }	315 }

207	316

208 if (identical(next, $SEMICOLON)) {	317 if (identical(next, $SEMICOLON)) {

209 appendPrecedenceToken(SEMICOLON_INFO);	318 appendPrecedenceToken(SEMICOLON_INFO);

210 // Type parameters and arguments cannot contain semicolon.	319 // Type parameters and arguments cannot contain semicolon.

211 discardOpenLt();	320 discardOpenLt();

212 return advance();	321 return advance();

213 }	322 }

214	323

215 if (identical(next, $QUESTION)) {	324 if (identical(next, $QUESTION)) {

216 appendPrecedenceToken(QUESTION_INFO);	325 appendPrecedenceToken(QUESTION_INFO);

217 return advance();	326 return advance();

218 }	327 }

219	328

220 if (identical(next, $CLOSE_SQUARE_BRACKET)) {	329 if (identical(next, $CLOSE_SQUARE_BRACKET)) {

221 return appendEndGroup(CLOSE_SQUARE_BRACKET_INFO, "]",	330 return appendEndGroup(CLOSE_SQUARE_BRACKET_INFO,

222 OPEN_SQUARE_BRACKET_TOKEN);	331 OPEN_SQUARE_BRACKET_TOKEN);

223 }	332 }

224	333

225 if (identical(next, $BACKPING)) {	334 if (identical(next, $BACKPING)) {

226 appendPrecedenceToken(BACKPING_INFO);	335 appendPrecedenceToken(BACKPING_INFO);

227 return advance();	336 return advance();

228 }	337 }

229	338

230 if (identical(next, $OPEN_CURLY_BRACKET)) {	339 if (identical(next, $OPEN_CURLY_BRACKET)) {

231 appendBeginGroup(OPEN_CURLY_BRACKET_INFO, "{");	340 appendBeginGroup(OPEN_CURLY_BRACKET_INFO);

232 return advance();	341 return advance();

233 }	342 }

234	343

235 if (identical(next, $CLOSE_CURLY_BRACKET)) {	344 if (identical(next, $CLOSE_CURLY_BRACKET)) {

236 return appendEndGroup(CLOSE_CURLY_BRACKET_INFO, "}",	345 return appendEndGroup(CLOSE_CURLY_BRACKET_INFO,

237 OPEN_CURLY_BRACKET_TOKEN);	346 OPEN_CURLY_BRACKET_TOKEN);

238 }	347 }

239	348

240 if (identical(next, $SLASH)) {	349 if (identical(next, $SLASH)) {

241 return tokenizeSlashOrComment(next);	350 return tokenizeSlashOrComment(next);

242 }	351 }

243	352

244 if (identical(next, $AT)) {	353 if (identical(next, $AT)) {

245 return tokenizeAt(next);	354 return tokenizeAt(next);

246 }	355 }

247	356

248 if (identical(next, $DQ) \|\| identical(next, $SQ)) {	357 if (identical(next, $DQ) \|\| identical(next, $SQ)) {

249 return tokenizeString(next, byteOffset, false);	358 return tokenizeString(next, scanOffset, false);

250 }	359 }

251	360

252 if (identical(next, $PERIOD)) {	361 if (identical(next, $PERIOD)) {

253 return tokenizeDotsOrNumber(next);	362 return tokenizeDotsOrNumber(next);

254 }	363 }

255	364

256 if (identical(next, $0)) {	365 if (identical(next, $0)) {

257 return tokenizeHexOrNumber(next);	366 return tokenizeHexOrNumber(next);

258 }	367 }

259	368

260 // TODO(ahe): Would a range check be faster?	369 // TODO(ahe): Would a range check be faster?

261 if (identical(next, $1) \|\| identical(next, $2) \|\| identical(next, $3)	370 if (identical(next, $1) \|\| identical(next, $2) \|\| identical(next, $3)

262 \|\| identical(next, $4) \|\| identical(next, $5) \|\| identical(next, $6)	371 \|\| identical(next, $4) \|\| identical(next, $5) \|\| identical(next, $6)

263 \|\| identical(next, $7) \|\| identical(next, $8) \|\| identical(next, $9)) {	372 \|\| identical(next, $7) \|\| identical(next, $8) \|\| identical(next, $9)) {

264 return tokenizeNumber(next);	373 return tokenizeNumber(next);

265 }	374 }

266	375

267 if (identical(next, $EOF)) {	376 if (identical(next, $EOF)) {

268 return $EOF;	377 return $EOF;

269 }	378 }

270 if (next < 0x1f) {	379 if (next < 0x1f) {

271 return error(new SourceString("unexpected character $next"));	380 return error("unexpected character $next");

272 }	381 }

273	382

	383 next = currentAsUnicode(next);

	384

274 // The following are non-ASCII characters.	385 // The following are non-ASCII characters.

275	386

276 if (identical(next, $NBSP)) {	387 if (identical(next, $NBSP)) {

277 appendWhiteSpace(next);	388 appendWhiteSpace(next);

278 return advance();	389 return advance();

279 }	390 }

280	391

281 return tokenizeIdentifier(next, byteOffset, true);	392 return error("unexpected unicode character $next");

282 }	393 }

283	394

284 int tokenizeTag(int next) {	395 int tokenizeTag(int next) {

285 // # or #!.*[\n\r]	396 // # or #!.*[\n\r]

286 if (byteOffset == 0) {	397 if (scanOffset == 0) {

287 if (identical(peek(), $BANG)) {	398 if (identical(peek(), $BANG)) {

	399 int start = scanOffset + 1;

	400 bool asciiOnly = true;

288 do {	401 do {

289 next = advance();	402 next = advance();

290 } while (!identical(next, $LF) && !identical(next, $CR) && !identical(ne xt, $EOF));	403 if (next > 127) asciiOnly = false;

	404 } while (!identical(next, $LF) &&

	405 !identical(next, $CR) &&

	406 !identical(next, $EOF));

	407 if (!asciiOnly) handleUnicode(start);

291 return next;	408 return next;

292 }	409 }

293 }	410 }

294 appendPrecedenceToken(HASH_INFO);	411 appendPrecedenceToken(HASH_INFO);

295 return advance();	412 return advance();

296 }	413 }

297	414

298 int tokenizeTilde(int next) {	415 int tokenizeTilde(int next) {

299 // ~ ~/ ~/=	416 // ~ ~/ ~/=

300 next = advance();	417 next = advance();

301 if (identical(next, $SLASH)) {	418 if (identical(next, $SLASH)) {

302 return select($EQ, TILDE_SLASH_EQ_INFO, TILDE_SLASH_INFO);	419 return select($EQ, TILDE_SLASH_EQ_INFO, TILDE_SLASH_INFO);

303 } else {	420 } else {

304 appendPrecedenceToken(TILDE_INFO);	421 appendPrecedenceToken(TILDE_INFO);

305 return next;	422 return next;

306 }	423 }

307 }	424 }

308	425

309 int tokenizeOpenSquareBracket(int next) {	426 int tokenizeOpenSquareBracket(int next) {

310 // [ [] []=	427 // [ [] []=

311 next = advance();	428 next = advance();

312 if (identical(next, $CLOSE_SQUARE_BRACKET)) {	429 if (identical(next, $CLOSE_SQUARE_BRACKET)) {

313 Token token = previousToken();	430 Token token = previousToken();

314 if (token is KeywordToken && identical(token.value.stringValue, 'operator' )) {	431 if (token is KeywordToken &&

	432 identical((token as KeywordToken).keyword.syntax, 'operator')) {

315 return select($EQ, INDEX_EQ_INFO, INDEX_INFO);	433 return select($EQ, INDEX_EQ_INFO, INDEX_INFO);

316 }	434 }

317 }	435 }

318 appendBeginGroup(OPEN_SQUARE_BRACKET_INFO, "[");	436 appendBeginGroup(OPEN_SQUARE_BRACKET_INFO);

319 return next;	437 return next;

320 }	438 }

321	439

322 int tokenizeCaret(int next) {	440 int tokenizeCaret(int next) {

323 // ^ ^=	441 // ^ ^=

324 return select($EQ, CARET_EQ_INFO, CARET_INFO);	442 return select($EQ, CARET_EQ_INFO, CARET_INFO);

325 }	443 }

326	444

327 int tokenizeBar(int next) {	445 int tokenizeBar(int next) {

328 // \| \|\| \|=	446 // \| \|\| \|=

(...skipping 43 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
372 return advance();	490 return advance();

373 } else if (identical(next, $EQ)) {	491 } else if (identical(next, $EQ)) {

374 appendPrecedenceToken(MINUS_EQ_INFO);	492 appendPrecedenceToken(MINUS_EQ_INFO);

375 return advance();	493 return advance();

376 } else {	494 } else {

377 appendPrecedenceToken(MINUS_INFO);	495 appendPrecedenceToken(MINUS_INFO);

378 return next;	496 return next;

379 }	497 }

380 }	498 }

381	499

382

383 int tokenizePlus(int next) {	500 int tokenizePlus(int next) {

384 // + ++ +=	501 // + ++ +=

385 next = advance();	502 next = advance();

386 if (identical($PLUS, next)) {	503 if (identical($PLUS, next)) {

387 appendPrecedenceToken(PLUS_PLUS_INFO);	504 appendPrecedenceToken(PLUS_PLUS_INFO);

388 return advance();	505 return advance();

389 } else if (identical($EQ, next)) {	506 } else if (identical($EQ, next)) {

390 appendPrecedenceToken(PLUS_EQ_INFO);	507 appendPrecedenceToken(PLUS_EQ_INFO);

391 return advance();	508 return advance();

392 } else {	509 } else {

393 appendPrecedenceToken(PLUS_INFO);	510 appendPrecedenceToken(PLUS_INFO);

394 return next;	511 return next;

395 }	512 }

396 }	513 }

397	514

398 int tokenizeExclamation(int next) {	515 int tokenizeExclamation(int next) {

399 // ! != !==	516 // ! !=

	517 // !== is kept for user-friendly error reporting.

	518

400 next = advance();	519 next = advance();

401 if (identical(next, $EQ)) {	520 if (identical(next, $EQ)) {

402 return select($EQ, BANG_EQ_EQ_INFO, BANG_EQ_INFO);	521 return select($EQ, BANG_EQ_EQ_INFO, BANG_EQ_INFO);

403 }	522 }

404 appendPrecedenceToken(BANG_INFO);	523 appendPrecedenceToken(BANG_INFO);

405 return next;	524 return next;

406 }	525 }

407	526

408 int tokenizeEquals(int next) {	527 int tokenizeEquals(int next) {

409 // = == ===	528 // = == =>

	529 // === is kept for user-friendly error reporting.

410	530

411 // Type parameters and arguments cannot contain any token that	531 // Type parameters and arguments cannot contain any token that

412 // starts with '='.	532 // starts with '='.

413 discardOpenLt();	533 discardOpenLt();

414	534

415 next = advance();	535 next = advance();

416 if (identical(next, $EQ)) {	536 if (identical(next, $EQ)) {

417 return select($EQ, EQ_EQ_EQ_INFO, EQ_EQ_INFO);	537 return select($EQ, EQ_EQ_EQ_INFO, EQ_EQ_INFO);

418 } else if (identical(next, $GT)) {	538 } else if (identical(next, $GT)) {

419 appendPrecedenceToken(FUNCTION_INFO);	539 appendPrecedenceToken(FUNCTION_INFO);

420 return advance();	540 return advance();

421 }	541 }

422 appendPrecedenceToken(EQ_INFO);	542 appendPrecedenceToken(EQ_INFO);

423 return next;	543 return next;

424 }	544 }

425	545

426 int tokenizeGreaterThan(int next) {	546 int tokenizeGreaterThan(int next) {

427 // > >= >> >>= >>> >>>=	547 // > >= >> >>=

428 next = advance();	548 next = advance();

429 if (identical($EQ, next)) {	549 if (identical($EQ, next)) {

430 appendPrecedenceToken(GT_EQ_INFO);	550 appendPrecedenceToken(GT_EQ_INFO);

431 return advance();	551 return advance();

432 } else if (identical($GT, next)) {	552 } else if (identical($GT, next)) {

433 next = advance();	553 next = advance();

434 if (identical($EQ, next)) {	554 if (identical($EQ, next)) {

435 appendPrecedenceToken(GT_GT_EQ_INFO);	555 appendPrecedenceToken(GT_GT_EQ_INFO);

436 return advance();	556 return advance();

437 } else {	557 } else {

438 appendGtGt(GT_GT_INFO, ">>");	558 appendGtGt(GT_GT_INFO);

439 return next;	559 return next;

440 }	560 }

441 } else {	561 } else {

442 appendGt(GT_INFO, ">");	562 appendGt(GT_INFO);

443 return next;	563 return next;

444 }	564 }

445 }	565 }

446	566

447 int tokenizeLessThan(int next) {	567 int tokenizeLessThan(int next) {

448 // < <= << <<=	568 // < <= << <<=

449 next = advance();	569 next = advance();

450 if (identical($EQ, next)) {	570 if (identical($EQ, next)) {

451 appendPrecedenceToken(LT_EQ_INFO);	571 appendPrecedenceToken(LT_EQ_INFO);

452 return advance();	572 return advance();

453 } else if (identical($LT, next)) {	573 } else if (identical($LT, next)) {

454 return select($EQ, LT_LT_EQ_INFO, LT_LT_INFO);	574 return select($EQ, LT_LT_EQ_INFO, LT_LT_INFO);

455 } else {	575 } else {

456 appendBeginGroup(LT_INFO, "<");	576 appendBeginGroup(LT_INFO);

457 return next;	577 return next;

458 }	578 }

459 }	579 }

460	580

461 int tokenizeNumber(int next) {	581 int tokenizeNumber(int next) {

462 int start = byteOffset;	582 int start = scanOffset;

463 while (true) {	583 while (true) {

464 next = advance();	584 next = advance();

465 if ($0 <= next && next <= $9) {	585 if ($0 <= next && next <= $9) {

466 continue;	586 continue;

467 } else if (identical(next, $e) \|\| identical(next, $E)) {	587 } else if (identical(next, $e) \|\| identical(next, $E)) {

468 return tokenizeFractionPart(next, start);	588 return tokenizeFractionPart(next, start);

469 } else {	589 } else {

470 if (identical(next, $PERIOD)) {	590 if (identical(next, $PERIOD)) {

471 int nextnext = peek();	591 int nextnext = peek();

472 if ($0 <= nextnext && nextnext <= $9) {	592 if ($0 <= nextnext && nextnext <= $9) {

473 return tokenizeFractionPart(advance(), start);	593 return tokenizeFractionPart(advance(), start);

474 }	594 }

475 }	595 }

476 appendByteStringToken(INT_INFO, asciiString(start, 0));	596 appendSubstringToken(INT_INFO, start, true);

477 return next;	597 return next;

478 }	598 }

479 }	599 }

480 }	600 }

481	601

482 int tokenizeHexOrNumber(int next) {	602 int tokenizeHexOrNumber(int next) {

483 int x = peek();	603 int x = peek();

484 if (identical(x, $x) \|\| identical(x, $X)) {	604 if (identical(x, $x) \|\| identical(x, $X)) {

485 advance();	605 return tokenizeHex(next);

486 return tokenizeHex(x);

487 }	606 }

488 return tokenizeNumber(next);	607 return tokenizeNumber(next);

489 }	608 }

490	609

491 int tokenizeHex(int next) {	610 int tokenizeHex(int next) {

492 int start = byteOffset - 1;	611 int start = scanOffset;

	612 next = advance(); // Advance past the $x or $X.

493 bool hasDigits = false;	613 bool hasDigits = false;

494 while (true) {	614 while (true) {

495 next = advance();	615 next = advance();

496 if (($0 <= next && next <= $9)	616 if (($0 <= next && next <= $9)

497 \|\| ($A <= next && next <= $F)	617 \|\| ($A <= next && next <= $F)

498 \|\| ($a <= next && next <= $f)) {	618 \|\| ($a <= next && next <= $f)) {

499 hasDigits = true;	619 hasDigits = true;

500 } else {	620 } else {

501 if (!hasDigits) {	621 if (!hasDigits) {

502 return error(const SourceString("hex digit expected"));	622 return error("hex digit expected");

503 }	623 }

504 appendByteStringToken(HEXADECIMAL_INFO, asciiString(start, 0));	624 appendSubstringToken(HEXADECIMAL_INFO, start, true);

505 return next;	625 return next;

506 }	626 }

507 }	627 }

508 }	628 }

509	629

510 int tokenizeDotsOrNumber(int next) {	630 int tokenizeDotsOrNumber(int next) {

511 int start = byteOffset;	631 int start = scanOffset;

512 next = advance();	632 next = advance();

513 if (($0 <= next && next <= $9)) {	633 if (($0 <= next && next <= $9)) {

514 return tokenizeFractionPart(next, start);	634 return tokenizeFractionPart(next, start);

515 } else if (identical($PERIOD, next)) {	635 } else if (identical($PERIOD, next)) {

516 return select($PERIOD, PERIOD_PERIOD_PERIOD_INFO, PERIOD_PERIOD_INFO);	636 return select($PERIOD, PERIOD_PERIOD_PERIOD_INFO, PERIOD_PERIOD_INFO);

517 } else {	637 } else {

518 appendPrecedenceToken(PERIOD_INFO);	638 appendPrecedenceToken(PERIOD_INFO);

519 return next;	639 return next;

520 }	640 }

521 }	641 }

522	642

523 int tokenizeFractionPart(int next, int start) {	643 int tokenizeFractionPart(int next, int start) {

524 bool done = false;	644 bool done = false;

525 bool hasDigit = false;	645 bool hasDigit = false;

526 LOOP: while (!done) {	646 LOOP: while (!done) {

527 if ($0 <= next && next <= $9) {	647 if ($0 <= next && next <= $9) {

528 hasDigit = true;	648 hasDigit = true;

529 } else if (identical($e, next) \|\| identical($E, next)) {	649 } else if (identical($e, next) \|\| identical($E, next)) {

530 hasDigit = true;	650 hasDigit = true;

531 next = tokenizeExponent(advance());	651 next = tokenizeExponent(advance());

532 done = true;	652 done = true;

533 continue LOOP;	653 continue LOOP;

534 } else {	654 } else {

535 done = true;	655 done = true;

536 continue LOOP;	656 continue LOOP;

537 }	657 }

538 next = advance();	658 next = advance();

539 }	659 }

540 if (!hasDigit) {	660 if (!hasDigit) {

541 appendByteStringToken(INT_INFO, asciiString(start, -1));	661 // Reduce offset, we already advanced to the token past the period.

	662 appendSubstringToken(INT_INFO, start, true, -1);

	663

	664 // TODO(ahe): Wrong offset for the period. Cannot call beginToken because

	665 // the scanner already advanced past the period.

542 if (identical($PERIOD, next)) {	666 if (identical($PERIOD, next)) {

543 return select($PERIOD, PERIOD_PERIOD_PERIOD_INFO, PERIOD_PERIOD_INFO);	667 return select($PERIOD, PERIOD_PERIOD_PERIOD_INFO, PERIOD_PERIOD_INFO);

544 }	668 }

545 // TODO(ahe): Wrong offset for the period.

546 appendPrecedenceToken(PERIOD_INFO);	669 appendPrecedenceToken(PERIOD_INFO);

547 return bigSwitch(next);	670 return next;

548 }	671 }

549 appendByteStringToken(DOUBLE_INFO, asciiString(start, 0));	672 appendSubstringToken(DOUBLE_INFO, start, true);

550 return next;	673 return next;

551 }	674 }

552	675

553 int tokenizeExponent(int next) {	676 int tokenizeExponent(int next) {

554 if (identical(next, $PLUS) \|\| identical(next, $MINUS)) {	677 if (identical(next, $PLUS) \|\| identical(next, $MINUS)) {

555 next = advance();	678 next = advance();

556 }	679 }

557 bool hasDigits = false;	680 bool hasDigits = false;

558 while (true) {	681 while (true) {

559 if ($0 <= next && next <= $9) {	682 if ($0 <= next && next <= $9) {

560 hasDigits = true;	683 hasDigits = true;

561 } else {	684 } else {

562 if (!hasDigits) {	685 if (!hasDigits) {

563 return error(const SourceString("digit expected"));	686 return error("digit expected");

564 }	687 }

565 return next;	688 return next;

566 }	689 }

567 next = advance();	690 next = advance();

568 }	691 }

569 }	692 }

570	693

571 int tokenizeSlashOrComment(int next) {	694 int tokenizeSlashOrComment(int next) {

	695 int start = scanOffset;

572 next = advance();	696 next = advance();

573 if (identical($STAR, next)) {	697 if (identical($STAR, next)) {

574 return tokenizeMultiLineComment(next);	698 return tokenizeMultiLineComment(next, start);

575 } else if (identical($SLASH, next)) {	699 } else if (identical($SLASH, next)) {

576 return tokenizeSingleLineComment(next);	700 return tokenizeSingleLineComment(next, start);

577 } else if (identical($EQ, next)) {	701 } else if (identical($EQ, next)) {

578 appendPrecedenceToken(SLASH_EQ_INFO);	702 appendPrecedenceToken(SLASH_EQ_INFO);

579 return advance();	703 return advance();

580 } else {	704 } else {

581 appendPrecedenceToken(SLASH_INFO);	705 appendPrecedenceToken(SLASH_INFO);

582 return next;	706 return next;

583 }	707 }

584 }	708 }

585	709

586 int tokenizeSingleLineComment(int next) {	710 int tokenizeSingleLineComment(int next, int start) {

	711 bool asciiOnly = true;

587 while (true) {	712 while (true) {

588 next = advance();	713 next = advance();

589 if (identical($LF, next) \|\| identical($CR, next) \|\| identical($EOF, next)) {	714 if (next > 127) asciiOnly = false;

590 appendComment();	715 if (identical($LF, next) \|\|

	716 identical($CR, next) \|\|

	717 identical($EOF, next)) {

	718 if (!asciiOnly) handleUnicode(start);

	719 appendComment(start, asciiOnly);

591 return next;	720 return next;

592 }	721 }

593 }	722 }

594 }	723 }

595	724

596 int tokenizeMultiLineComment(int next) {	725

	726 int tokenizeMultiLineComment(int next, int start) {

	727 bool asciiOnlyComment = true; // Track if the entire comment is ASCII.

	728 bool asciiOnlyLines = true; // Track ASCII since the last handleUnicode.

	729 int unicodeStart = start;

597 int nesting = 1;	730 int nesting = 1;

598 next = advance();	731 next = advance();

599 while (true) {	732 while (true) {

600 if (identical($EOF, next)) {	733 if (identical($EOF, next)) {

601 // TODO(ahe): Report error.	734 if (!asciiOnlyLines) handleUnicode(unicodeStart);

	735 appendStringToken(BAD_INPUT_INFO, "unterminated multi-line comment");

602 return next;	736 return next;

603 } else if (identical($STAR, next)) {	737 } else if (identical($STAR, next)) {

604 next = advance();	738 next = advance();

605 if (identical($SLASH, next)) {	739 if (identical($SLASH, next)) {

606 --nesting;	740 --nesting;

607 if (0 == nesting) {	741 if (0 == nesting) {

	742 if (!asciiOnlyLines) handleUnicode(unicodeStart);

608 next = advance();	743 next = advance();

609 appendComment();	744 appendComment(start, asciiOnlyComment);

610 return next;	745 return next;

611 } else {	746 } else {

612 next = advance();	747 next = advance();

613 }	748 }

614 }	749 }

615 } else if (identical($SLASH, next)) {	750 } else if (identical($SLASH, next)) {

616 next = advance();	751 next = advance();

617 if (identical($STAR, next)) {	752 if (identical($STAR, next)) {

618 next = advance();	753 next = advance();

619 ++nesting;	754 ++nesting;

620 }	755 }

	756 } else if (identical(next, $LF)) {

	757 if (!asciiOnlyLines) {

	758 // Synchronize the string offset in the utf8 scanner.

	759 handleUnicode(unicodeStart);

	760 asciiOnlyLines = true;

	761 unicodeStart = scanOffset;

	762 }

	763 lineFeedInMultiline();

	764 next = advance();

621 } else {	765 } else {

	766 if (next > 127) {

	767 asciiOnlyLines = false;

	768 asciiOnlyComment = false;

	769 }

622 next = advance();	770 next = advance();

623 }	771 }

624 }	772 }

625 }	773 }

626	774

627 int tokenizeRawStringKeywordOrIdentifier(int next) {	775 int tokenizeRawStringKeywordOrIdentifier(int next) {

	776 // [next] is $r.

628 int nextnext = peek();	777 int nextnext = peek();

629 if (identical(nextnext, $DQ) \|\| identical(nextnext, $SQ)) {	778 if (identical(nextnext, $DQ) \|\| identical(nextnext, $SQ)) {

630 int start = byteOffset;	779 int start = scanOffset;

631 next = advance();	780 next = advance();

632 return tokenizeString(next, start, true);	781 return tokenizeString(next, start, true);

633 }	782 }

634 return tokenizeKeywordOrIdentifier(next, true);	783 return tokenizeKeywordOrIdentifier(next, true);

635 }	784 }

636	785

637 int tokenizeKeywordOrIdentifier(int next, bool allowDollar) {	786 int tokenizeKeywordOrIdentifier(int next, bool allowDollar) {

638 KeywordState state = KeywordState.KEYWORD_STATE;	787 KeywordState state = KeywordState.KEYWORD_STATE;

639 int start = byteOffset;	788 int start = scanOffset;

640 while (state != null && $a <= next && next <= $z) {	789 while (state != null && $a <= next && next <= $z) {

641 state = state.next(next);	790 state = state.next(next);

642 next = advance();	791 next = advance();

643 }	792 }

644 if (state == null \|\| state.keyword == null) {	793 if (state == null \|\| state.keyword == null) {

645 return tokenizeIdentifier(next, start, allowDollar);	794 return tokenizeIdentifier(next, start, allowDollar);

646 }	795 }

647 if (($A <= next && next <= $Z) \|\|	796 if (($A <= next && next <= $Z) \|\|

648 ($0 <= next && next <= $9) \|\|	797 ($0 <= next && next <= $9) \|\|

649 identical(next, $_) \|\|	798 identical(next, $_) \|\|

650 identical(next, $$)) {	799 identical(next, $$)) {

651 return tokenizeIdentifier(next, start, allowDollar);	800 return tokenizeIdentifier(next, start, allowDollar);

652 } else if (next < 128) {	801 } else {

653 appendKeywordToken(state.keyword);	802 appendKeywordToken(state.keyword);

654 return next;	803 return next;

655 } else {

656 return tokenizeIdentifier(next, start, allowDollar);

657 }	804 }

658 }	805 }

659	806

	807 /**

	808 * [allowDollar] can exclude '$', which is not allowed as part of a string

	809 * interpolation identifier.

	810 */

660 int tokenizeIdentifier(int next, int start, bool allowDollar) {	811 int tokenizeIdentifier(int next, int start, bool allowDollar) {

661 bool isAscii = true;

662

663 while (true) {	812 while (true) {

664 if (($a <= next && next <= $z) \|\|	813 if (($a <= next && next <= $z) \|\|

665 ($A <= next && next <= $Z) \|\|	814 ($A <= next && next <= $Z) \|\|

666 ($0 <= next && next <= $9) \|\|	815 ($0 <= next && next <= $9) \|\|

667 identical(next, $_) \|\|	816 identical(next, $_) \|\|

668 (identical(next, $$) && allowDollar)) {	817 (identical(next, $$) && allowDollar)) {

669 next = advance();	818 next = advance();

670 } else if ((next < 128) \|\| (identical(next, $NBSP))) {	819 } else {

671 // Identifier ends here.	820 // Identifier ends here.

672 if (start == byteOffset) {	821 if (start == scanOffset) {

673 return error(const SourceString("expected identifier"));	822 return error("expected identifier");

674 } else if (isAscii) {

675 appendByteStringToken(IDENTIFIER_INFO, asciiString(start, 0));

676 } else {	823 } else {

677 appendByteStringToken(BAD_INPUT_INFO, utf8String(start, -1));	824 appendSubstringToken(IDENTIFIER_INFO, start, true);

678 }	825 }

679 return next;	826 return next;

680 } else {

681 int nonAsciiStart = byteOffset;

682 do {

683 next = nextByte();

684 if (identical(next, $NBSP)) break;

685 } while (next > 127);

686 String string = utf8String(nonAsciiStart, -1).slowToString();

687 isAscii = false;

688 int byteLength = nonAsciiStart - byteOffset;

689 addToCharOffset(string.length - byteLength);

690 }	827 }

691 }	828 }

692 }	829 }

693	830

694 int tokenizeAt(int next) {	831 int tokenizeAt(int next) {

695 int start = byteOffset;

696 next = advance();

697 appendPrecedenceToken(AT_INFO);	832 appendPrecedenceToken(AT_INFO);

698 return next;	833 return advance();

699 }	834 }

700	835

701 int tokenizeString(int next, int start, bool raw) {	836 int tokenizeString(int next, int start, bool raw) {

702 int quoteChar = next;	837 int quoteChar = next;

703 next = advance();	838 next = advance();

704 if (identical(quoteChar, next)) {	839 if (identical(quoteChar, next)) {

705 next = advance();	840 next = advance();

706 if (identical(quoteChar, next)) {	841 if (identical(quoteChar, next)) {

707 // Multiline string.	842 // Multiline string.

708 return tokenizeMultiLineString(quoteChar, start, raw);	843 return tokenizeMultiLineString(quoteChar, start, raw);

709 } else {	844 } else {

710 // Empty string.	845 // Empty string.

711 appendByteStringToken(STRING_INFO, utf8String(start, -1));	846 appendSubstringToken(STRING_INFO, start, true);

712 return next;	847 return next;

713 }	848 }

714 }	849 }

715 if (raw) {	850 if (raw) {

716 return tokenizeSingleLineRawString(next, quoteChar, start);	851 return tokenizeSingleLineRawString(next, quoteChar, start);

717 } else {	852 } else {

718 return tokenizeSingleLineString(next, quoteChar, start);	853 return tokenizeSingleLineString(next, quoteChar, start);

719 }	854 }

720 }	855 }

721	856

722 static bool isHexDigit(int character) {	857 /**

723 if ($0 <= character && character <= $9) return true;	858 * [next] is the first character after the qoute.
	ngeoffray 2013/10/18 10:19:37 qoute -> quote qoute -> quote lukas 2013/10/24 16:48:36 Done. Show quoted text On 2013/10/18 10:19:37, ngeoffray wrote: > qoute -> quote Done.
724 character \|= 0x20;	859 * [start] is the scanOffset of the quote.

725 return ($a <= character && character <= $f);	860 *

726 }	861 * The token contains a substring of the source file, including the

727	862 * string quotes, backslashes for escaping. For interpolated strings,

	863 * the parts before and after are separate tokens.

	864 *

	865 * "a $b c"

	866 *

	867 * gives StringToken("a $), StringToken(b) and StringToken( c").

	868 */

728 int tokenizeSingleLineString(int next, int quoteChar, int start) {	869 int tokenizeSingleLineString(int next, int quoteChar, int start) {

	870 bool asciiOnly = true;

729 while (!identical(next, quoteChar)) {	871 while (!identical(next, quoteChar)) {

730 if (identical(next, $BACKSLASH)) {	872 if (identical(next, $BACKSLASH)) {

731 next = advance();	873 next = advance();

732 } else if (identical(next, $$)) {	874 } else if (identical(next, $$)) {

733 next = tokenizeStringInterpolation(start);	875 if (!asciiOnly) handleUnicode(start);

734 start = byteOffset;	876 next = tokenizeStringInterpolation(start, asciiOnly);

	877 start = scanOffset;

	878 asciiOnly = true;

735 continue;	879 continue;

736 }	880 }

737 if (next <= $CR	881 if (next <= $CR

738 && (identical(next, $LF) \|\| identical(next, $CR) \|\| identical(next, $E OF))) {	882 && (identical(next, $LF) \|\|

739 return error(const SourceString("unterminated string literal"));	883 identical(next, $CR) \|\|

	884 identical(next, $EOF))) {

	885 if (!asciiOnly) handleUnicode(start);

	886 return error("unterminated string literal");

740 }	887 }

	888 if (next > 127) asciiOnly = false;

741 next = advance();	889 next = advance();

742 }	890 }

743 appendByteStringToken(STRING_INFO, utf8String(start, 0));	891 if (!asciiOnly) handleUnicode(start);

744 return advance();	892 // Advance past the quote character.

	893 next = advance();

	894 appendSubstringToken(STRING_INFO, start, asciiOnly);

	895 return next;

745 }	896 }

746	897

747 int tokenizeStringInterpolation(int start) {	898 int tokenizeStringInterpolation(int start, bool asciiOnly) {

748 appendByteStringToken(STRING_INFO, utf8String(start, -1));	899 appendSubstringToken(STRING_INFO, start, asciiOnly);

749 beginToken(); // $ starts here.	900 beginToken(); // $ starts here.

750 int next = advance();	901 int next = advance();

751 if (identical(next, $OPEN_CURLY_BRACKET)) {	902 if (identical(next, $OPEN_CURLY_BRACKET)) {

752 return tokenizeInterpolatedExpression(next, start);	903 return tokenizeInterpolatedExpression(next);

753 } else {	904 } else {

754 return tokenizeInterpolatedIdentifier(next, start);	905 return tokenizeInterpolatedIdentifier(next);

755 }	906 }

756 }	907 }

757	908

758 int tokenizeInterpolatedExpression(int next, int start) {	909 int tokenizeInterpolatedExpression(int next) {

759 appendBeginGroup(STRING_INTERPOLATION_INFO, "\${");	910 appendBeginGroup(STRING_INTERPOLATION_INFO);

760 beginToken(); // The expression starts here.	911 beginToken(); // The expression starts here.

761 next = advance();	912 next = advance(); // Move past the curly bracket.

762 while (!identical(next, $EOF) && !identical(next, $STX)) {	913 while (!identical(next, $EOF) && !identical(next, $STX)) {

763 next = bigSwitch(next);	914 next = bigSwitch(next);

764 }	915 }

765 if (identical(next, $EOF)) return next;	916 if (identical(next, $EOF)) return next;

766 next = advance();	917 next = advance(); // Move past the $STX.

767 beginToken(); // The string interpolation suffix starts here.	918 beginToken(); // The string interpolation suffix starts here.

768 return next;	919 return next;

769 }	920 }

770	921

771 int tokenizeInterpolatedIdentifier(int next, int start) {	922 int tokenizeInterpolatedIdentifier(int next) {

772 appendPrecedenceToken(STRING_INTERPOLATION_IDENTIFIER_INFO);	923 appendPrecedenceToken(STRING_INTERPOLATION_IDENTIFIER_INFO);

773 beginToken(); // The identifier starts here.	924 beginToken(); // The identifier starts here.

774 next = tokenizeKeywordOrIdentifier(next, false);	925 next = tokenizeKeywordOrIdentifier(next, false);

775 beginToken(); // The string interpolation suffix starts here.	926 beginToken(); // The string interpolation suffix starts here.

776 return next;	927 return next;

777 }	928 }

778	929

779 int tokenizeSingleLineRawString(int next, int quoteChar, int start) {	930 int tokenizeSingleLineRawString(int next, int quoteChar, int start) {

780 next = advance();	931 bool asciiOnly = true;

	932 next = advance(); // Advance past the quote
	ngeoffray 2013/10/18 10:19:37 Missing . Missing . lukas 2013/10/24 16:48:36 Done. Show quoted text On 2013/10/18 10:19:37, ngeoffray wrote: > Missing . Done.
781 while (next != $EOF) {	933 while (next != $EOF) {

782 if (identical(next, quoteChar)) {	934 if (identical(next, quoteChar)) {

783 appendByteStringToken(STRING_INFO, utf8String(start, 0));	935 if (!asciiOnly) handleUnicode(start);

784 return advance();	936 next = advance();

	937 appendSubstringToken(STRING_INFO, start, asciiOnly);

	938 return next;

785 } else if (identical(next, $LF) \|\| identical(next, $CR)) {	939 } else if (identical(next, $LF) \|\| identical(next, $CR)) {

786 return error(const SourceString("unterminated string literal"));	940 if (!asciiOnly) handleUnicode(start);

	941 return error("unterminated string literal");

	942 } else if (next > 127) {

	943 asciiOnly = false;

787 }	944 }

788 next = advance();	945 next = advance();

789 }	946 }

790 return error(const SourceString("unterminated string literal"));	947 if (!asciiOnly) handleUnicode(start);

	948 return error("unterminated string literal");

791 }	949 }

792	950

793 int tokenizeMultiLineRawString(int quoteChar, int start) {	951 int tokenizeMultiLineRawString(int quoteChar, int start) {

794 int next = advance();	952 bool asciiOnlyString = true;

	953 bool asciiOnlyLine = true;

	954 int unicodeStart = start;

	955 int next = advance(); // Advance past the (last) quote (of three)
	ngeoffray 2013/10/18 10:19:37 Missing . Missing . lukas 2013/10/24 16:48:36 Done. Show quoted text On 2013/10/18 10:19:37, ngeoffray wrote: > Missing . Done.
795 outer: while (!identical(next, $EOF)) {	956 outer: while (!identical(next, $EOF)) {

796 while (!identical(next, quoteChar)) {	957 while (!identical(next, quoteChar)) {

	958 if (identical(next, $LF)) {

	959 if (!asciiOnlyLine) {

	960 // Synchronize the string offset in the utf8 scanner.

	961 handleUnicode(unicodeStart);

	962 asciiOnlyLine = true;

	963 unicodeStart = scanOffset;

	964 }

	965 lineFeedInMultiline();

	966 } else if (next > 127) {

	967 asciiOnlyLine = false;

	968 asciiOnlyString = false;

	969 }

797 next = advance();	970 next = advance();

798 if (identical(next, $EOF)) break outer;	971 if (identical(next, $EOF)) break outer;

799 }	972 }

800 next = advance();	973 next = advance();

801 if (identical(next, quoteChar)) {	974 if (identical(next, quoteChar)) {

802 next = advance();	975 next = advance();

803 if (identical(next, quoteChar)) {	976 if (identical(next, quoteChar)) {

804 appendByteStringToken(STRING_INFO, utf8String(start, 0));	977 if (!asciiOnlyLine) handleUnicode(unicodeStart);

805 return advance();	978 next = advance();

	979 appendSubstringToken(STRING_INFO, start, asciiOnlyString);

	980 return next;

806 }	981 }

807 }	982 }

808 }	983 }

809 return error(const SourceString("unterminated string literal"));	984 if (!asciiOnlyLine) handleUnicode(unicodeStart);

	985 return error("unterminated string literal");

810 }	986 }

811	987

812 int tokenizeMultiLineString(int quoteChar, int start, bool raw) {	988 int tokenizeMultiLineString(int quoteChar, int start, bool raw) {

813 if (raw) return tokenizeMultiLineRawString(quoteChar, start);	989 if (raw) return tokenizeMultiLineRawString(quoteChar, start);

814 int next = advance();	990 bool asciiOnlyString = true;

	991 bool asciiOnlyLine = true;

	992 int unicodeStart = start;

	993 int next = advance(); // Advance past the (last) quote (of three).

815 while (!identical(next, $EOF)) {	994 while (!identical(next, $EOF)) {

816 if (identical(next, $$)) {	995 if (identical(next, $$)) {

817 next = tokenizeStringInterpolation(start);	996 if (!asciiOnlyLine) handleUnicode(unicodeStart);

818 start = byteOffset;	997 next = tokenizeStringInterpolation(start, asciiOnlyString);

	998 start = scanOffset;

	999 unicodeStart = start;

	1000 asciiOnlyString = true; // A new string token is created for the rest.

	1001 asciiOnlyLine = true;

819 continue;	1002 continue;

820 }	1003 }

821 if (identical(next, quoteChar)) {	1004 if (identical(next, quoteChar)) {

822 next = advance();	1005 next = advance();

823 if (identical(next, quoteChar)) {	1006 if (identical(next, quoteChar)) {

824 next = advance();	1007 next = advance();

825 if (identical(next, quoteChar)) {	1008 if (identical(next, quoteChar)) {

826 appendByteStringToken(STRING_INFO, utf8String(start, 0));	1009 if (!asciiOnlyLine) handleUnicode(unicodeStart);

827 return advance();	1010 next = advance();

	1011 appendSubstringToken(STRING_INFO, start, asciiOnlyString);

	1012 return next;

828 }	1013 }

829 }	1014 }

830 continue;	1015 continue;

831 }	1016 }

832 if (identical(next, $BACKSLASH)) {	1017 if (identical(next, $BACKSLASH)) {

833 next = advance();	1018 next = advance();

834 if (identical(next, $EOF)) break;	1019 if (identical(next, $EOF)) break;

835 }	1020 }

	1021 if (identical(next, $LF)) {

	1022 if (!asciiOnlyLine) {

	1023 // Synchronize the string offset in the utf8 scanner.

	1024 handleUnicode(unicodeStart);

	1025 asciiOnlyLine = true;

	1026 unicodeStart = scanOffset;

	1027 }

	1028 lineFeedInMultiline();

	1029 } else if (next > 127) {

	1030 asciiOnlyString = false;

	1031 asciiOnlyLine = false;

	1032 }

836 next = advance();	1033 next = advance();

837 }	1034 }

838 return error(const SourceString("unterminated string literal"));	1035 if (!asciiOnlyLine) handleUnicode(unicodeStart);

	1036 return error("unterminated string literal");

839 }	1037 }

840	1038

841 int error(SourceString message) {	1039 int error(String message) {

842 appendByteStringToken(BAD_INPUT_INFO, message);	1040 appendStringToken(BAD_INPUT_INFO, message);

843 return advance(); // Ensure progress.	1041 return advance(); // Ensure progress.

844 }	1042 }

	1043

	1044 void unmatchedBeginGroup(BeginGroupToken begin) {

	1045 String error = 'unmatched "${begin.stringValue}"';

	1046 Token close =

	1047 new StringToken.fromString(

	1048 BAD_INPUT_INFO, error, begin.charOffset, true);

	1049

	1050 // We want to ensure that unmatched BeginGroupTokens are reported

	1051 // as errors. However, the rest of the parser assume the groups
	ngeoffray 2013/10/18 10:19:37 assume -> assumes assume -> assumes lukas 2013/10/24 16:48:36 Done. Show quoted text On 2013/10/18 10:19:37, ngeoffray wrote: > assume -> assumes Done.
	1052 // are well-balanced and will never look at the endGroup

	1053 // token. This is a nice property that allows us to skip quickly

	1054 // over correct code. By inserting an additional error token in

	1055 // the stream, we can keep ignoring endGroup tokens.

	1056 //

	1057 // [begin] --next--> [tail]

	1058 // [begin] --endG--> [close] --next--> [next] --next--> [tail]

	1059 //

	1060 // This allows the parser to skip from [begin] via endGroup to [close] and

	1061 // ignore the [close] token (assuming it's correct), then the error will be

	1062 // reported when parsing the [next] token.

	1063

	1064 Token next = new StringToken.fromString(

	1065 BAD_INPUT_INFO, error, begin.charOffset, true);

	1066 begin.endGroup = close;

	1067 close.next = next;

	1068 next.next = begin.next;

	1069 }

845 }	1070 }

OLD	NEW