src/scanner.cc - Issue 185653004: Experimental parser: merge to r19637

Side by Side Diff: src/scanner.cc

Issue 185653004: Experimental parser: merge to r19637 (Closed) Base URL: https://v8.googlecode.com/svn/branches/experimental/parser

Patch Set: Created 6 years, 9 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 // Copyright 2011 the V8 project authors. All rights reserved.	1 // Copyright 2011 the V8 project authors. All rights reserved.

2 // Redistribution and use in source and binary forms, with or without	2 // Redistribution and use in source and binary forms, with or without

3 // modification, are permitted provided that the following conditions are	3 // modification, are permitted provided that the following conditions are

4 // met:	4 // met:

5 //	5 //

6 // * Redistributions of source code must retain the above copyright	6 // * Redistributions of source code must retain the above copyright

7 // notice, this list of conditions and the following disclaimer.	7 // notice, this list of conditions and the following disclaimer.

8 // * Redistributions in binary form must reproduce the above	8 // * Redistributions in binary form must reproduce the above

9 // copyright notice, this list of conditions and the following	9 // copyright notice, this list of conditions and the following

10 // disclaimer in the documentation and/or other materials provided	10 // disclaimer in the documentation and/or other materials provided

(...skipping 17 matching lines...) Expand all Loading...
28 // Features shared by parsing and pre-parsing scanners.	28 // Features shared by parsing and pre-parsing scanners.

29	29

30 #include <cmath>	30 #include <cmath>

31	31

32 #include "scanner.h"	32 #include "scanner.h"

33	33

34 #include "../include/v8stdint.h"	34 #include "../include/v8stdint.h"

35 #include "char-predicates-inl.h"	35 #include "char-predicates-inl.h"

36 #include "conversions-inl.h"	36 #include "conversions-inl.h"

37 #include "list-inl.h"	37 #include "list-inl.h"

	38 #include "v8.h"

38	39

39 namespace v8 {	40 namespace v8 {

40 namespace internal {	41 namespace internal {

41	42

	43

	44 #ifndef V8_USE_GENERATED_LEXER

42 // ----------------------------------------------------------------------------	45 // ----------------------------------------------------------------------------

43 // Scanner	46 // Scanner

44	47

45 Scanner::Scanner(UnicodeCache* unicode_cache)	48 Scanner::Scanner(UnicodeCache* unicode_cache)

46 : unicode_cache_(unicode_cache),	49 : unicode_cache_(unicode_cache),

47 octal_pos_(Location::invalid()),	50 octal_pos_(Location::invalid()),

48 harmony_scoping_(false),	51 harmony_scoping_(false),

49 harmony_modules_(false),	52 harmony_modules_(false),

50 harmony_numeric_literals_(false) { }	53 harmony_numeric_literals_(false) { }

51	54

(...skipping 187 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
239 next_.location.end_pos = pos + 1;	242 next_.location.end_pos = pos + 1;

240 Advance();	243 Advance();

241 return current_.token;	244 return current_.token;

242 }	245 }

243 }	246 }

244 Scan();	247 Scan();

245 return current_.token;	248 return current_.token;

246 }	249 }

247	250

248	251

249 static inline bool IsByteOrderMark(uc32 c) {	252 // TODO(yangguo): check whether this is actually necessary.

	253 static inline bool IsLittleEndianByteOrderMark(uc32 c) {

250 // The Unicode value U+FFFE is guaranteed never to be assigned as a	254 // The Unicode value U+FFFE is guaranteed never to be assigned as a

251 // Unicode character; this implies that in a Unicode context the	255 // Unicode character; this implies that in a Unicode context the

252 // 0xFF, 0xFE byte pattern can only be interpreted as the U+FEFF	256 // 0xFF, 0xFE byte pattern can only be interpreted as the U+FEFF

253 // character expressed in little-endian byte order (since it could	257 // character expressed in little-endian byte order (since it could

254 // not be a U+FFFE character expressed in big-endian byte	258 // not be a U+FFFE character expressed in big-endian byte

255 // order). Nevertheless, we check for it to be compatible with	259 // order). Nevertheless, we check for it to be compatible with

256 // Spidermonkey.	260 // Spidermonkey.

257 return c == 0xFEFF \|\| c == 0xFFFE;	261 return c == 0xFFFE;

258 }	262 }

259	263

260	264

261 bool Scanner::SkipWhiteSpace() {	265 bool Scanner::SkipWhiteSpace() {

262 int start_position = source_pos();	266 int start_position = source_pos();

263	267

264 while (true) {	268 while (true) {

265 // We treat byte-order marks (BOMs) as whitespace for better	269 while (true) {

266 // compatibility with Spidermonkey and other JavaScript engines.	270 // Advance as long as character is a WhiteSpace or LineTerminator.

267 while (unicode_cache_->IsWhiteSpace(c0_) \|\| IsByteOrderMark(c0_)) {	271 // Remember if the latter is the case.

268 // IsWhiteSpace() includes line terminators!

269 if (unicode_cache_->IsLineTerminator(c0_)) {	272 if (unicode_cache_->IsLineTerminator(c0_)) {

270 // Ignore line terminators, but remember them. This is necessary

271 // for automatic semicolon insertion.

272 has_line_terminator_before_next_ = true;	273 has_line_terminator_before_next_ = true;

	274 } else if (!unicode_cache_->IsWhiteSpace(c0_) &&

	275 !IsLittleEndianByteOrderMark(c0_)) {

	276 break;

273 }	277 }

274 Advance();	278 Advance();

275 }	279 }

276	280

277 // If there is an HTML comment end '-->' at the beginning of a	281 // If there is an HTML comment end '-->' at the beginning of a

278 // line (with only whitespace in front of it), we treat the rest	282 // line (with only whitespace in front of it), we treat the rest

279 // of the line as a comment. This is in line with the way	283 // of the line as a comment. This is in line with the way

280 // SpiderMonkey handles it.	284 // SpiderMonkey handles it.

281 if (c0_ == '-' && has_line_terminator_before_next_) {	285 if (c0_ == '-' && has_line_terminator_before_next_) {

282 Advance();	286 Advance();

(...skipping 823 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1106 Advance();	1110 Advance();

1107 }	1111 }

1108 }	1112 }

1109 literal.Complete();	1113 literal.Complete();

1110	1114

1111 next_.location.end_pos = source_pos() - 1;	1115 next_.location.end_pos = source_pos() - 1;

1112 return true;	1116 return true;

1113 }	1117 }

1114	1118

1115	1119

	1120 #endif

	1121

	1122

1116 int DuplicateFinder::AddAsciiSymbol(Vector<const char> key, int value) {	1123 int DuplicateFinder::AddAsciiSymbol(Vector<const char> key, int value) {

1117 return AddSymbol(Vector<const byte>::cast(key), true, value);	1124 return AddSymbol(Vector<const byte>::cast(key), true, value);

1118 }	1125 }

1119	1126

1120	1127

1121 int DuplicateFinder::AddUtf16Symbol(Vector<const uint16_t> key, int value) {	1128 int DuplicateFinder::AddUtf16Symbol(Vector<const uint16_t> key, int value) {

1122 return AddSymbol(Vector<const byte>::cast(key), false, value);	1129 return AddSymbol(Vector<const byte>::cast(key), false, value);

1123 }	1130 }

1124	1131

1125	1132

(...skipping 116 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1242 }	1249 }

1243 backing_store_.Add(static_cast<byte>((ascii_length >> 7) \| 0x80u));	1250 backing_store_.Add(static_cast<byte>((ascii_length >> 7) \| 0x80u));

1244 }	1251 }

1245 backing_store_.Add(static_cast<byte>(ascii_length & 0x7f));	1252 backing_store_.Add(static_cast<byte>(ascii_length & 0x7f));

1246	1253

1247 backing_store_.AddBlock(bytes);	1254 backing_store_.AddBlock(bytes);

1248 return backing_store_.EndSequence().start();	1255 return backing_store_.EndSequence().start();

1249 }	1256 }

1250	1257

1251 } } // namespace v8::internal	1258 } } // namespace v8::internal

OLD	NEW

« no previous file with comments | « src/scanner.h ('k') | src/scanner-character-streams.h » ('j') | no next file with comments »