src/lexer/experimental-scanner.h - Issue 88653003: Add literal handling to experimental scanner.

Side by Side Diff: src/lexer/experimental-scanner.h

Issue 88653003: Add literal handling to experimental scanner. (Closed) Base URL: https://v8.googlecode.com/svn/branches/experimental/parser

Patch Set: Landing Created 7 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 // Copyright 2013 the V8 project authors. All rights reserved.	1 // Copyright 2013 the V8 project authors. All rights reserved.

2 // Redistribution and use in source and binary forms, with or without	2 // Redistribution and use in source and binary forms, with or without

3 // modification, are permitted provided that the following conditions are	3 // modification, are permitted provided that the following conditions are

4 // met:	4 // met:

5 //	5 //

6 // * Redistributions of source code must retain the above copyright	6 // * Redistributions of source code must retain the above copyright

7 // notice, this list of conditions and the following disclaimer.	7 // notice, this list of conditions and the following disclaimer.

8 // * Redistributions in binary form must reproduce the above	8 // * Redistributions in binary form must reproduce the above

9 // copyright notice, this list of conditions and the following	9 // copyright notice, this list of conditions and the following

10 // disclaimer in the documentation and/or other materials provided	10 // disclaimer in the documentation and/or other materials provided

(...skipping 18 matching lines...) Expand all Loading...
29 #define V8_LEXER_EXPERIMENTAL_SCANNER_H	29 #define V8_LEXER_EXPERIMENTAL_SCANNER_H

30	30

31 #include <set>	31 #include <set>

32	32

33 #include "compiler.h"	33 #include "compiler.h"

34 #include "isolate.h"	34 #include "isolate.h"

35 #include "scanner.h" // UnicodeCache.	35 #include "scanner.h" // UnicodeCache.

36 #include "token.h"	36 #include "token.h"

37 #include "utils.h"	37 #include "utils.h"

38 #include "v8stdint.h"	38 #include "v8stdint.h"

	39 #include "char-predicates-inl.h"

39	40

40 namespace v8 {	41 namespace v8 {

41 namespace internal {	42 namespace internal {

42	43

43 class UnicodeCache;	44 class UnicodeCache;

44	45

45 // Base class for scanners for different encodings. The meat is the pure virtual	46 // Base class for scanners for different encodings. The meat is the pure virtual

46 // Scan() which each of them specializes.	47 // Scan() which each of them specializes.

47 class ScannerBase {	48 class ScannerBase {

48 public:	49 public:

49 struct Location {	50 struct Location {

50 Location(int b, int e) : beg_pos(b), end_pos(e) { }	51 Location(int b, int e) : beg_pos(b), end_pos(e) { }

51 Location() : beg_pos(0), end_pos(0) { }	52 Location() : beg_pos(0), end_pos(0) { }

52	53

53 bool IsValid() const {	54 bool IsValid() const {

54 return beg_pos >= 0 && end_pos >= beg_pos;	55 return beg_pos >= 0 && end_pos >= beg_pos;

55 }	56 }

56	57

57 static Location invalid() { return Location(-1, -1); }	58 static Location invalid() { return Location(-1, -1); }

58	59

59 int beg_pos;	60 int beg_pos;

60 int end_pos;	61 int end_pos;

61 };	62 };

62	63

63 explicit ScannerBase(Isolate* isolate)	64 explicit ScannerBase(Isolate* isolate)

64 : isolate_(isolate),	65 : isolate_(isolate),

65 unicode_cache_(isolate->unicode_cache()),	66 unicode_cache_(isolate->unicode_cache()),

66 has_line_terminator_before_next_(true),	67 has_line_terminator_before_next_(true),

	68 current_literal_(&literals_[0]),

	69 next_literal_(&literals_[1]),

	70 octal_pos_(Location::invalid()),

67 harmony_numeric_literals_(false),	71 harmony_numeric_literals_(false),

68 harmony_modules_(false),	72 harmony_modules_(false),

69 harmony_scoping_(false) {	73 harmony_scoping_(false) {

70 if (!scanners_) {	74 if (!scanners_) {

71 scanners_ = new std::set<ScannerBase*>();	75 scanners_ = new std::set<ScannerBase*>();

72 isolate->heap()->AddGCEpilogueCallback(&ScannerBase::UpdateBuffersAfterGC,	76 isolate->heap()->AddGCEpilogueCallback(&ScannerBase::UpdateBuffersAfterGC,

73 kGCTypeAll, false);	77 kGCTypeAll, false);

74 }	78 }

75 scanners_->insert(this);	79 scanners_->insert(this);

76 }	80 }

77	81

78 virtual ~ScannerBase() {	82 virtual ~ScannerBase() {

79 scanners_->erase(this);	83 scanners_->erase(this);

80 if (scanners_->empty()) {	84 if (scanners_->empty()) {

81 isolate_->heap()->RemoveGCEpilogueCallback(	85 isolate_->heap()->RemoveGCEpilogueCallback(

82 &ScannerBase::UpdateBuffersAfterGC);	86 &ScannerBase::UpdateBuffersAfterGC);

83 delete scanners_;	87 delete scanners_;

84 scanners_ = NULL;	88 scanners_ = NULL;

85 }	89 }

86 }	90 }

87	91

88 // Returns the next token and advances input.	92 // Returns the next token and advances input.

89 Token::Value Next() {	93 Token::Value Next() {

90 has_line_terminator_before_next_ = false;	94 has_line_terminator_before_next_ = false;

91 current_ = next_;	95 current_ = next_;

	96 std::swap(current_literal_, next_literal_);

92 Scan(); // Virtual! Will fill in next_.	97 Scan(); // Virtual! Will fill in next_.

93 return current_.token;	98 return current_.token;

94 }	99 }

95	100

96 // Returns the current token again.	101 // Returns the current token again.

97 Token::Value current_token() { return current_.token; }	102 Token::Value current_token() { return current_.token; }

98	103

99 // Returns the location information for the current token	104 // Returns the location information for the current token

100 // (the token last returned by Next()).	105 // (the token last returned by Next()).

101 Location location() {	106 Location location() {

(...skipping 29 matching lines...) Expand all Loading...
131 }	136 }

132	137

133 // Returns true if there was a line terminator before the peek'ed token,	138 // Returns true if there was a line terminator before the peek'ed token,

134 // possibly inside a multi-line comment.	139 // possibly inside a multi-line comment.

135 bool HasAnyLineTerminatorBeforeNext() const {	140 bool HasAnyLineTerminatorBeforeNext() const {

136 return has_line_terminator_before_next_;	141 return has_line_terminator_before_next_;

137 // FIXME: do we need to distinguish between newlines inside and outside	142 // FIXME: do we need to distinguish between newlines inside and outside

138 // multiline comments? Atm doesn't look like we need to.	143 // multiline comments? Atm doesn't look like we need to.

139 }	144 }

140	145

141 // FIXME: implement these

142 Vector<const char> literal_ascii_string() {	146 Vector<const char> literal_ascii_string() {

143 return Vector<const char>(); // FIXME	147 if (!current_literal_->Valid(current_.beg_pos)) {

	148 FillLiteral(current_, current_literal_);

	149 }

	150 return current_literal_->ascii_string;

144 }	151 }

	152

145 Vector<const uc16> literal_utf16_string() {	153 Vector<const uc16> literal_utf16_string() {

146 return Vector<const uc16>(); // FIXME	154 if (!current_literal_->Valid(current_.beg_pos)) {

	155 FillLiteral(current_, current_literal_);

	156 }

	157 return current_literal_->utf16_string;

147 }	158 }

	159

	160 int literal_length() {

	161 if (!current_literal_->Valid(current_.beg_pos)) {

	162 FillLiteral(current_, current_literal_);

	163 }

	164 return current_literal_->length;

	165 }

	166

148 bool is_literal_ascii() {	167 bool is_literal_ascii() {

149 return true; // FIXME	168 if (!current_literal_->Valid(current_.beg_pos)) {

	169 FillLiteral(current_, current_literal_);

	170 }

	171 return current_literal_->is_ascii;

150 }	172 }

	173

151 bool is_literal_contextual_keyword(Vector<const char> keyword) {	174 bool is_literal_contextual_keyword(Vector<const char> keyword) {

152 return false; // FIXME	175 if (!is_literal_ascii()) return false;

	176 Vector<const char> literal = literal_ascii_string();

	177 return literal.length() == keyword.length() &&

	178 (memcmp(literal.start(), keyword.start(), literal.length()) == 0);

153 }	179 }

154 int literal_length() const {	180

155 return 0; // FIXME

156 }

157 bool literal_contains_escapes() const {	181 bool literal_contains_escapes() const {

158 return false; // FIXME	182 return current_.has_escapes;

159 }	183 }

160	184

161 Vector<const char> next_literal_ascii_string() {	185 Vector<const char> next_literal_ascii_string() {

162 return Vector<const char>(); // FIXME	186 if (!next_literal_->Valid(next_.beg_pos)) {

163 }	187 FillLiteral(next_, next_literal_);

164 Vector<const uc16> next_literal_utf16_string() {	188 }

165 return Vector<const uc16>(); // FIXME	189 return next_literal_->ascii_string;

166 }

167 bool is_next_literal_ascii() {

168 return true; // FIXME

169 }

170 bool is_next_contextual_keyword(Vector<const char> keyword) {

171 return false; // FIXME

172 }

173 int next_literal_length() const {

174 return 0; // FIXME

175 }	190 }

176	191

177 uc32 ScanOctalEscape(uc32 c, int length) { return 0; } // FIXME	192 Vector<const uc16> next_literal_utf16_string() {

	193 if (!next_literal_->Valid(next_.beg_pos)) {

	194 FillLiteral(next_, next_literal_);

	195 }

	196 return next_literal_->utf16_string;

	197 }

178	198

179 Location octal_position() const {	199 int next_literal_length() {

180 return Location(0, 0); // FIXME	200 if (!next_literal_->Valid(next_.beg_pos)) {

	201 FillLiteral(next_, next_literal_);

	202 }

	203 return next_literal_->length;

181 }	204 }

182 void clear_octal_position() { } // FIXME	205

	206 bool is_next_literal_ascii() {

	207 if (!next_literal_->Valid(next_.beg_pos)) {

	208 FillLiteral(next_, next_literal_);

	209 }

	210 return next_literal_->is_ascii;

	211 }

	212

	213 bool is_next_contextual_keyword(Vector<const char> keyword) {

	214 if (!is_next_literal_ascii()) return false;

	215 Vector<const char> literal = next_literal_ascii_string();

	216 return literal.length() == keyword.length() &&

	217 (memcmp(literal.start(), keyword.start(), literal.length()) == 0);

	218 }

	219

	220 // Returns the location of the last seen octal literal.

	221 Location octal_position() const { return octal_pos_; }

	222 void clear_octal_position() { octal_pos_ = Location::invalid(); }

183	223

184 // Seek forward to the given position. This operation works for simple cases	224 // Seek forward to the given position. This operation works for simple cases

185 // such as seeking forward until simple delimiter tokens, which is what it is	225 // such as seeking forward until simple delimiter tokens, which is what it is

186 // used for. After this call, we will have the token at the given position as	226 // used for. After this call, we will have the token at the given position as

187 // the "next" token. The "current" token will be invalid. FIXME: for utf-8,	227 // the "next" token. The "current" token will be invalid. FIXME: for utf-8,

188 // we need to decide if pos is counted in characters or in bytes.	228 // we need to decide if pos is counted in characters or in bytes.

189 virtual void SeekForward(int pos) = 0;	229 virtual void SeekForward(int pos) = 0;

	230 virtual void SetEnd(int pos) = 0;

190	231

191 // Scans the input as a regular expression pattern, previous character(s) must	232 // Scans the input as a regular expression pattern, previous character(s) must

192 // be /(=). Returns true if a pattern is scanned. FIXME: this won't work for	233 // be /(=). Returns true if a pattern is scanned. FIXME: this won't work for

193 // utf-8 newlines.	234 // utf-8 newlines.

194 virtual bool ScanRegExpPattern(bool seen_equal) = 0;	235 virtual bool ScanRegExpPattern(bool seen_equal) = 0;

195 // Returns true if regexp flags are scanned (always since flags can	236 // Returns true if regexp flags are scanned (always since flags can

196 // be empty).	237 // be empty).

197 virtual bool ScanRegExpFlags() = 0;	238 virtual bool ScanRegExpFlags() = 0;

198	239

199 protected:	240 protected:

200 struct TokenDesc {	241 struct TokenDesc {

201 Token::Value token;	242 Token::Value token;

202 int beg_pos;	243 int beg_pos;

203 int end_pos;	244 int end_pos;

204 bool has_escapes;	245 bool has_escapes;

205 };	246 };

206	247

	248 struct LiteralDesc {

	249 int beg_pos;

	250 bool is_ascii;

	251 int length;

	252 Vector<const char> ascii_string;

	253 Vector<const uc16> utf16_string;

	254 LiteralBuffer buffer;

	255 bool Valid(int pos) { return beg_pos == pos; }

	256 };

	257

207 virtual void Scan() = 0;	258 virtual void Scan() = 0;

208 virtual void SetBufferBasedOnHandle() = 0;	259 virtual void SetBufferBasedOnHandle() = 0;

209	260

210 static void UpdateBuffersAfterGC(v8::Isolate*, GCType, GCCallbackFlags);	261 static void UpdateBuffersAfterGC(v8::Isolate*, GCType, GCCallbackFlags);

	262 virtual bool FillLiteral(const TokenDesc& token, LiteralDesc* literal) = 0;

211	263

212 Isolate* isolate_;	264 Isolate* isolate_;

213 UnicodeCache* unicode_cache_;	265 UnicodeCache* unicode_cache_;

214	266

215 bool has_line_terminator_before_next_;	267 bool has_line_terminator_before_next_;

216	268

217 TokenDesc current_; // desc for current token (as returned by Next())	269 TokenDesc current_; // desc for current token (as returned by Next())

218 TokenDesc next_; // desc for next token (one token look-ahead)	270 TokenDesc next_; // desc for next token (one token look-ahead)

219	271

	272 LiteralDesc* current_literal_;

	273 LiteralDesc* next_literal_;

	274 LiteralDesc literals_[2];

	275

	276 Location octal_pos_;

	277

220 bool harmony_numeric_literals_;	278 bool harmony_numeric_literals_;

221 bool harmony_modules_;	279 bool harmony_modules_;

222 bool harmony_scoping_;	280 bool harmony_scoping_;

223	281

224 private:	282 private:

225 static std::set<ScannerBase> scanners_;	283 static std::set<ScannerBase> scanners_;

226 };	284 };

227	285

228	286

229 template<typename Char>	287 template<typename Char>

230 class ExperimentalScanner : public ScannerBase {	288 class ExperimentalScanner : public ScannerBase {

231 public:	289 public:

232 explicit ExperimentalScanner(	290 explicit ExperimentalScanner(

233 Handle<String> source,	291 Handle<String> source,

234 Isolate* isolate)	292 Isolate* isolate)

235 : ScannerBase(isolate),	293 : ScannerBase(isolate),

236 source_handle_(source),	294 source_handle_(source),

237 buffer_(NULL),	295 buffer_(NULL),

238 buffer_end_(NULL),	296 buffer_end_(NULL),

239 start_(NULL),	297 start_(NULL),

240 cursor_(NULL),	298 cursor_(NULL),

241 marker_(NULL) {	299 marker_(NULL) {

242 ASSERT(source->IsFlat());	300 ASSERT(source->IsFlat());

243 SetBufferBasedOnHandle();	301 SetBufferBasedOnHandle();

244 Scan();	302 Scan();

245 }	303 }

246	304

247 virtual ~ExperimentalScanner() { }	305 virtual ~ExperimentalScanner() { }

248	306

	307 protected:

249 virtual void Scan();	308 virtual void Scan();

250 virtual void SeekForward(int pos);	309 virtual void SeekForward(int pos);

	310 virtual void SetEnd(int pos);

251 virtual bool ScanRegExpPattern(bool seen_equal);	311 virtual bool ScanRegExpPattern(bool seen_equal);

252 virtual bool ScanRegExpFlags();	312 virtual bool ScanRegExpFlags();

253	313

254 virtual void SetBufferBasedOnHandle() {	314 virtual void SetBufferBasedOnHandle() {

255 // We get a raw pointer from the Handle, but we also update it every time	315 // We get a raw pointer from the Handle, but we also update it every time

256 // there is a GC, so it is safe.	316 // there is a GC, so it is safe.

257 DisallowHeapAllocation no_gc;	317 DisallowHeapAllocation no_gc;

258 const Char* new_buffer = GetNewBufferBasedOnHandle();	318 const Char* new_buffer = GetNewBufferBasedOnHandle();

259 if (new_buffer != buffer_) {	319 if (new_buffer != buffer_) {

260 int start_offset = start_ - buffer_;	320 int start_offset = start_ - buffer_;

261 int cursor_offset = cursor_ - buffer_;	321 int cursor_offset = cursor_ - buffer_;

262 int marker_offset = marker_ - buffer_;	322 int marker_offset = marker_ - buffer_;

263 buffer_ = new_buffer;	323 buffer_ = new_buffer;

264 buffer_end_ = buffer_ + source_handle_->length();	324 buffer_end_ = buffer_ + source_handle_->length();

265 start_ = buffer_ + start_offset;	325 start_ = buffer_ + start_offset;

266 cursor_ = buffer_ + cursor_offset;	326 cursor_ = buffer_ + cursor_offset;

267 marker_ = buffer_ + marker_offset;	327 marker_ = buffer_ + marker_offset;

268 }	328 }

269 }	329 }

270	330

271 const Char* GetNewBufferBasedOnHandle() const;	331 const Char* GetNewBufferBasedOnHandle() const;

272	332

	333 virtual bool FillLiteral(const TokenDesc& token, LiteralDesc* literal);

	334

273 private:	335 private:

274 bool ValidIdentifierPart() {	336 bool ValidIdentifierPart() {

275 return unicode_cache_->IsIdentifierPart(ScanHexNumber(4));	337 return unicode_cache_->IsIdentifierPart(ScanHexNumber(4));

276 }	338 }

277	339

278 bool ValidIdentifierStart() {	340 bool ValidIdentifierStart() {

279 return unicode_cache_->IsIdentifierStart(ScanHexNumber(4));	341 return unicode_cache_->IsIdentifierStart(ScanHexNumber(4));

280 }	342 }

281	343

282 uc32 ScanHexNumber(int length);	344 uc32 ScanHexNumber(int length);

283 bool ScanLiteralUnicodeEscape();	345 bool ScanLiteralUnicodeEscape();

284	346

	347 const Char* ScanHexNumber(const Char* start,

	348 const Char* end,

	349 uc32* result);

	350 const Char* ScanOctalEscape(const Char* start,

	351 const Char* end,

	352 uc32* result);

	353 const Char* ScanIdentifierUnicodeEscape(const Char* start,

	354 const Char* end,

	355 uc32* result);

	356 const Char* ScanEscape(const Char* start,

	357 const Char* end,

	358 LiteralBuffer* literal);

	359

285 Handle<String> source_handle_;	360 Handle<String> source_handle_;

286 const Char* buffer_;	361 const Char* buffer_;

287 const Char* buffer_end_;	362 const Char* buffer_end_;

288 const Char* start_;	363 const Char* start_;

289 const Char* cursor_;	364 const Char* cursor_;

290 const Char* marker_;	365 const Char* marker_;

291 };	366 };

292	367

293	368

294 template<typename Char>	369 template<typename Char>

295 void ExperimentalScanner<Char>::SeekForward(int pos) {	370 void ExperimentalScanner<Char>::SeekForward(int pos) {

296 cursor_ = buffer_ + pos;	371 cursor_ = buffer_ + pos;

297 start_ = cursor_;	372 start_ = cursor_;

298 marker_ = cursor_;	373 marker_ = cursor_;

299 has_line_terminator_before_next_ = false;	374 has_line_terminator_before_next_ = false;

300 Scan(); // Fills in next_.	375 Scan(); // Fills in next_.

301 }	376 }

302	377

303	378

304 template<typename Char>	379 template<typename Char>

	380 void ExperimentalScanner<Char>::SetEnd(int pos) {

	381 buffer_end_ = buffer_ + pos;

	382 }

	383

	384

	385 template<typename Char>

305 bool ExperimentalScanner<Char>::ScanRegExpPattern(bool seen_equal) {	386 bool ExperimentalScanner<Char>::ScanRegExpPattern(bool seen_equal) {

306 // Scan: ('/' \| '/=') RegularExpressionBody '/' RegularExpressionFlags	387 // Scan: ('/' \| '/=') RegularExpressionBody '/' RegularExpressionFlags

307 bool in_character_class = false;	388 bool in_character_class = false;

308	389

309 // Previous token is either '/' or '/=', in the second case, the	390 // Previous token is either '/' or '/=', in the second case, the

310 // pattern starts at =.	391 // pattern starts at =.

311 next_.beg_pos = (cursor_ - buffer_) - (seen_equal ? 2 : 1);	392 next_.beg_pos = (cursor_ - buffer_) - (seen_equal ? 2 : 1);

312 next_.end_pos = (cursor_ - buffer_) - (seen_equal ? 1 : 0);	393 next_.end_pos = (cursor_ - buffer_) - (seen_equal ? 1 : 0);

313	394

314 // Scan regular expression body: According to ECMA-262, 3rd, 7.8.5,	395 // Scan regular expression body: According to ECMA-262, 3rd, 7.8.5,

(...skipping 38 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
353 if (++cursor_ >= buffer_end_) break;	434 if (++cursor_ >= buffer_end_) break;

354 } else {	435 } else {

355 if (!ScanLiteralUnicodeEscape()) break;	436 if (!ScanLiteralUnicodeEscape()) break;

356 if (++cursor_ >= buffer_end_) break;	437 if (++cursor_ >= buffer_end_) break;

357 }	438 }

358 }	439 }

359 next_.end_pos = cursor_ - buffer_ - 1;	440 next_.end_pos = cursor_ - buffer_ - 1;

360 return true;	441 return true;

361 }	442 }

362	443

	444

363 template<typename Char>	445 template<typename Char>

364 uc32 ExperimentalScanner<Char>::ScanHexNumber(int length) {	446 uc32 ExperimentalScanner<Char>::ScanHexNumber(int length) {

365 // We have seen \uXXXX, let's see what it is.	447 // We have seen \uXXXX, let's see what it is.

366 uc32 x = 0;	448 uc32 x = 0;

367 for (const Char* s = cursor_ - length; s != cursor_; ++s) {	449 for (const Char* s = cursor_ - length; s != cursor_; ++s) {

368 int d = HexValue(*s);	450 int d = HexValue(*s);

369 if (d < 0) {	451 if (d < 0) {

370 return -1;	452 return -1;

371 }	453 }

372 x = x * 16 + d;	454 x = x * 16 + d;

373 }	455 }

374 return x;	456 return x;

375 }	457 }

376	458

	459

	460 template<typename Char>

	461 const Char* ExperimentalScanner<Char>::ScanHexNumber(

	462 const Char* cursor, const Char* end, uc32* result) {

	463 uc32 x = 0;

	464 for ( ; cursor < end; ++cursor) {

	465 int d = HexValue(*cursor);

	466 if (d < 0) {

	467 *result = -1;

	468 return NULL;

	469 }

	470 x = x * 16 + d;

	471 }

	472 *result = x;

	473 return cursor;

	474 }

	475

	476

	477 // Octal escapes of the forms '\0xx' and '\xxx' are not a part of

	478 // ECMA-262. Other JS VMs support them.

	479 template<typename Char>

	480 const Char* ExperimentalScanner<Char>::ScanOctalEscape(

	481 const Char* start, const Char* end, uc32* result) {

	482 uc32 x = *result - '0';

	483 const Char* cursor;

	484 for (cursor = start; cursor < end; cursor++) {

	485 int d = *cursor - '0';

	486 if (d < 0 \|\| d > 7) break;

	487 int nx = x * 8 + d;

	488 if (nx >= 256) break;

	489 x = nx;

	490 }

	491 // Anything except '\0' is an octal escape sequence, illegal in strict mode.

	492 // Remember the position of octal escape sequences so that an error

	493 // can be reported later (in strict mode).

	494 // We don't report the error immediately, because the octal escape can

	495 // occur before the "use strict" directive.

	496 if (*result != '0' \|\| cursor > start) {

	497 octal_pos_ = Location(start - 1 - buffer_, cursor - 1 - buffer_);

	498 }

	499 *result = x;

	500 return cursor;

	501 }

	502

	503

377 template<typename Char>	504 template<typename Char>

378 bool ExperimentalScanner<Char>::ScanLiteralUnicodeEscape() {	505 bool ExperimentalScanner<Char>::ScanLiteralUnicodeEscape() {

379 ASSERT(cursor_ < buffer_end_);	506 ASSERT(cursor_ < buffer_end_);

380 Char primary_char = *(cursor_);	507 Char primary_char = *(cursor_);

381 ASSERT(primary_char == '\\');	508 ASSERT(primary_char == '\\');

382 if (++cursor_ >= buffer_end_) return false;	509 if (++cursor_ >= buffer_end_) return false;

383 primary_char = *(cursor_);	510 primary_char = *(cursor_);

384 int i = 1;	511 int i = 1;

385 if (primary_char == 'u') {	512 if (primary_char == 'u') {

386 i++;	513 i++;

387 while (i < 6) {	514 while (i < 6) {

388 if (++cursor_ >= buffer_end_) return false;	515 if (++cursor_ >= buffer_end_) return false;

389 primary_char = *(cursor_);	516 primary_char = *(cursor_);

390 if (!IsHexDigit(primary_char)) break;	517 if (!IsHexDigit(primary_char)) break;

391 i++;	518 i++;

392 }	519 }

393 }	520 }

394 return i == 6;	521 return i == 6;

395 }	522 }

396	523

397	524

	525 template<typename Char>

	526 const Char* ExperimentalScanner<Char>::ScanIdentifierUnicodeEscape(

	527 const Char* cursor, const Char* end, uc32* result) {

	528 ASSERT(*cursor == '\\');

	529 if (++cursor >= end) return NULL;

	530 if (*cursor != 'u') return NULL;

	531 ++cursor;

	532 if (cursor + 4 > end) return NULL;

	533 cursor = ScanHexNumber(cursor, cursor + 4, result);

	534 return cursor;

	535 }

	536

	537

	538 template<typename Char>

	539 const Char* ExperimentalScanner<Char>::ScanEscape(

	540 const Char* cursor, const Char* end, LiteralBuffer* literal) {

	541 ASSERT(*cursor == '\\');

	542 if (++cursor >= end) return NULL;

	543 uc32 c = *cursor;

	544 if (++cursor > end) return NULL;

	545 // Skip escaped newlines.

	546 if (unicode_cache_->IsLineTerminator(c)) {

	547 uc32 peek = *cursor;

	548 // Allow CR+LF newlines in multiline string literals.

	549 if (IsCarriageReturn(c) && IsLineFeed(peek)) cursor++;

	550 // Allow LF+CR newlines in multiline string literals.

	551 if (IsLineFeed(c) && IsCarriageReturn(peek)) cursor++;

	552 return cursor;

	553 }

	554

	555 switch (c) {

	556 case '\'': // fall through

	557 case '"' : // fall through

	558 case '\\': break;

	559 case 'b' : c = '\b'; break;

	560 case 'f' : c = '\f'; break;

	561 case 'n' : c = '\n'; break;

	562 case 'r' : c = '\r'; break;

	563 case 't' : c = '\t'; break;

	564 case 'u' : {

	565 if (end > cursor + 4) return NULL;

	566 cursor = ScanHexNumber(cursor, cursor + 4, &c);

	567 if (cursor == NULL) return NULL;

	568 break;

	569 }

	570 case 'v' : c = '\v'; break;

	571 case 'x' : {

	572 if (end > cursor + 2) return NULL ;

	573 cursor = ScanHexNumber(cursor, cursor + 2, &c);

	574 if (cursor == NULL) return NULL;

	575 break;

	576 }

	577 case '0' : // fall through

	578 case '1' : // fall through

	579 case '2' : // fall through

	580 case '3' : // fall through

	581 case '4' : // fall through

	582 case '5' : // fall through

	583 case '6' : // fall through

	584 case '7' :

	585 if (end > cursor + 2) end = cursor + 2;

	586 cursor = ScanOctalEscape(cursor, end, &c); break;

	587 }

	588

	589 // According to ECMA-262, section 7.8.4, characters not covered by the

	590 // above cases should be illegal, but they are commonly handled as

	591 // non-escaped characters by JS VMs.

	592 literal->AddChar(c);

	593 return cursor;

	594 }

	595

	596

398 } }	597 } }

399	598

400 #endif // V8_LEXER_EXPERIMENTAL_SCANNER_H	599 #endif // V8_LEXER_EXPERIMENTAL_SCANNER_H

OLD	NEW

« no previous file with comments | « no previous file | src/lexer/experimental-scanner.cc » ('j') | no next file with comments »