Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1)

Side by Side Diff: src/lexer/experimental-scanner.h

Issue 88653003: Add literal handling to experimental scanner. (Closed) Base URL: https://v8.googlecode.com/svn/branches/experimental/parser
Patch Set: Landing Created 7 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « no previous file | src/lexer/experimental-scanner.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2013 the V8 project authors. All rights reserved. 1 // Copyright 2013 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 18 matching lines...) Expand all
29 #define V8_LEXER_EXPERIMENTAL_SCANNER_H 29 #define V8_LEXER_EXPERIMENTAL_SCANNER_H
30 30
31 #include <set> 31 #include <set>
32 32
33 #include "compiler.h" 33 #include "compiler.h"
34 #include "isolate.h" 34 #include "isolate.h"
35 #include "scanner.h" // UnicodeCache. 35 #include "scanner.h" // UnicodeCache.
36 #include "token.h" 36 #include "token.h"
37 #include "utils.h" 37 #include "utils.h"
38 #include "v8stdint.h" 38 #include "v8stdint.h"
39 #include "char-predicates-inl.h"
39 40
40 namespace v8 { 41 namespace v8 {
41 namespace internal { 42 namespace internal {
42 43
43 class UnicodeCache; 44 class UnicodeCache;
44 45
45 // Base class for scanners for different encodings. The meat is the pure virtual 46 // Base class for scanners for different encodings. The meat is the pure virtual
46 // Scan() which each of them specializes. 47 // Scan() which each of them specializes.
47 class ScannerBase { 48 class ScannerBase {
48 public: 49 public:
49 struct Location { 50 struct Location {
50 Location(int b, int e) : beg_pos(b), end_pos(e) { } 51 Location(int b, int e) : beg_pos(b), end_pos(e) { }
51 Location() : beg_pos(0), end_pos(0) { } 52 Location() : beg_pos(0), end_pos(0) { }
52 53
53 bool IsValid() const { 54 bool IsValid() const {
54 return beg_pos >= 0 && end_pos >= beg_pos; 55 return beg_pos >= 0 && end_pos >= beg_pos;
55 } 56 }
56 57
57 static Location invalid() { return Location(-1, -1); } 58 static Location invalid() { return Location(-1, -1); }
58 59
59 int beg_pos; 60 int beg_pos;
60 int end_pos; 61 int end_pos;
61 }; 62 };
62 63
63 explicit ScannerBase(Isolate* isolate) 64 explicit ScannerBase(Isolate* isolate)
64 : isolate_(isolate), 65 : isolate_(isolate),
65 unicode_cache_(isolate->unicode_cache()), 66 unicode_cache_(isolate->unicode_cache()),
66 has_line_terminator_before_next_(true), 67 has_line_terminator_before_next_(true),
68 current_literal_(&literals_[0]),
69 next_literal_(&literals_[1]),
70 octal_pos_(Location::invalid()),
67 harmony_numeric_literals_(false), 71 harmony_numeric_literals_(false),
68 harmony_modules_(false), 72 harmony_modules_(false),
69 harmony_scoping_(false) { 73 harmony_scoping_(false) {
70 if (!scanners_) { 74 if (!scanners_) {
71 scanners_ = new std::set<ScannerBase*>(); 75 scanners_ = new std::set<ScannerBase*>();
72 isolate->heap()->AddGCEpilogueCallback(&ScannerBase::UpdateBuffersAfterGC, 76 isolate->heap()->AddGCEpilogueCallback(&ScannerBase::UpdateBuffersAfterGC,
73 kGCTypeAll, false); 77 kGCTypeAll, false);
74 } 78 }
75 scanners_->insert(this); 79 scanners_->insert(this);
76 } 80 }
77 81
78 virtual ~ScannerBase() { 82 virtual ~ScannerBase() {
79 scanners_->erase(this); 83 scanners_->erase(this);
80 if (scanners_->empty()) { 84 if (scanners_->empty()) {
81 isolate_->heap()->RemoveGCEpilogueCallback( 85 isolate_->heap()->RemoveGCEpilogueCallback(
82 &ScannerBase::UpdateBuffersAfterGC); 86 &ScannerBase::UpdateBuffersAfterGC);
83 delete scanners_; 87 delete scanners_;
84 scanners_ = NULL; 88 scanners_ = NULL;
85 } 89 }
86 } 90 }
87 91
88 // Returns the next token and advances input. 92 // Returns the next token and advances input.
89 Token::Value Next() { 93 Token::Value Next() {
90 has_line_terminator_before_next_ = false; 94 has_line_terminator_before_next_ = false;
91 current_ = next_; 95 current_ = next_;
96 std::swap(current_literal_, next_literal_);
92 Scan(); // Virtual! Will fill in next_. 97 Scan(); // Virtual! Will fill in next_.
93 return current_.token; 98 return current_.token;
94 } 99 }
95 100
96 // Returns the current token again. 101 // Returns the current token again.
97 Token::Value current_token() { return current_.token; } 102 Token::Value current_token() { return current_.token; }
98 103
99 // Returns the location information for the current token 104 // Returns the location information for the current token
100 // (the token last returned by Next()). 105 // (the token last returned by Next()).
101 Location location() { 106 Location location() {
(...skipping 29 matching lines...) Expand all
131 } 136 }
132 137
133 // Returns true if there was a line terminator before the peek'ed token, 138 // Returns true if there was a line terminator before the peek'ed token,
134 // possibly inside a multi-line comment. 139 // possibly inside a multi-line comment.
135 bool HasAnyLineTerminatorBeforeNext() const { 140 bool HasAnyLineTerminatorBeforeNext() const {
136 return has_line_terminator_before_next_; 141 return has_line_terminator_before_next_;
137 // FIXME: do we need to distinguish between newlines inside and outside 142 // FIXME: do we need to distinguish between newlines inside and outside
138 // multiline comments? Atm doesn't look like we need to. 143 // multiline comments? Atm doesn't look like we need to.
139 } 144 }
140 145
141 // FIXME: implement these
142 Vector<const char> literal_ascii_string() { 146 Vector<const char> literal_ascii_string() {
143 return Vector<const char>(); // FIXME 147 if (!current_literal_->Valid(current_.beg_pos)) {
148 FillLiteral(current_, current_literal_);
149 }
150 return current_literal_->ascii_string;
144 } 151 }
152
145 Vector<const uc16> literal_utf16_string() { 153 Vector<const uc16> literal_utf16_string() {
146 return Vector<const uc16>(); // FIXME 154 if (!current_literal_->Valid(current_.beg_pos)) {
155 FillLiteral(current_, current_literal_);
156 }
157 return current_literal_->utf16_string;
147 } 158 }
159
160 int literal_length() {
161 if (!current_literal_->Valid(current_.beg_pos)) {
162 FillLiteral(current_, current_literal_);
163 }
164 return current_literal_->length;
165 }
166
148 bool is_literal_ascii() { 167 bool is_literal_ascii() {
149 return true; // FIXME 168 if (!current_literal_->Valid(current_.beg_pos)) {
169 FillLiteral(current_, current_literal_);
170 }
171 return current_literal_->is_ascii;
150 } 172 }
173
151 bool is_literal_contextual_keyword(Vector<const char> keyword) { 174 bool is_literal_contextual_keyword(Vector<const char> keyword) {
152 return false; // FIXME 175 if (!is_literal_ascii()) return false;
176 Vector<const char> literal = literal_ascii_string();
177 return literal.length() == keyword.length() &&
178 (memcmp(literal.start(), keyword.start(), literal.length()) == 0);
153 } 179 }
154 int literal_length() const { 180
155 return 0; // FIXME
156 }
157 bool literal_contains_escapes() const { 181 bool literal_contains_escapes() const {
158 return false; // FIXME 182 return current_.has_escapes;
159 } 183 }
160 184
161 Vector<const char> next_literal_ascii_string() { 185 Vector<const char> next_literal_ascii_string() {
162 return Vector<const char>(); // FIXME 186 if (!next_literal_->Valid(next_.beg_pos)) {
163 } 187 FillLiteral(next_, next_literal_);
164 Vector<const uc16> next_literal_utf16_string() { 188 }
165 return Vector<const uc16>(); // FIXME 189 return next_literal_->ascii_string;
166 }
167 bool is_next_literal_ascii() {
168 return true; // FIXME
169 }
170 bool is_next_contextual_keyword(Vector<const char> keyword) {
171 return false; // FIXME
172 }
173 int next_literal_length() const {
174 return 0; // FIXME
175 } 190 }
176 191
177 uc32 ScanOctalEscape(uc32 c, int length) { return 0; } // FIXME 192 Vector<const uc16> next_literal_utf16_string() {
193 if (!next_literal_->Valid(next_.beg_pos)) {
194 FillLiteral(next_, next_literal_);
195 }
196 return next_literal_->utf16_string;
197 }
178 198
179 Location octal_position() const { 199 int next_literal_length() {
180 return Location(0, 0); // FIXME 200 if (!next_literal_->Valid(next_.beg_pos)) {
201 FillLiteral(next_, next_literal_);
202 }
203 return next_literal_->length;
181 } 204 }
182 void clear_octal_position() { } // FIXME 205
206 bool is_next_literal_ascii() {
207 if (!next_literal_->Valid(next_.beg_pos)) {
208 FillLiteral(next_, next_literal_);
209 }
210 return next_literal_->is_ascii;
211 }
212
213 bool is_next_contextual_keyword(Vector<const char> keyword) {
214 if (!is_next_literal_ascii()) return false;
215 Vector<const char> literal = next_literal_ascii_string();
216 return literal.length() == keyword.length() &&
217 (memcmp(literal.start(), keyword.start(), literal.length()) == 0);
218 }
219
220 // Returns the location of the last seen octal literal.
221 Location octal_position() const { return octal_pos_; }
222 void clear_octal_position() { octal_pos_ = Location::invalid(); }
183 223
184 // Seek forward to the given position. This operation works for simple cases 224 // Seek forward to the given position. This operation works for simple cases
185 // such as seeking forward until simple delimiter tokens, which is what it is 225 // such as seeking forward until simple delimiter tokens, which is what it is
186 // used for. After this call, we will have the token at the given position as 226 // used for. After this call, we will have the token at the given position as
187 // the "next" token. The "current" token will be invalid. FIXME: for utf-8, 227 // the "next" token. The "current" token will be invalid. FIXME: for utf-8,
188 // we need to decide if pos is counted in characters or in bytes. 228 // we need to decide if pos is counted in characters or in bytes.
189 virtual void SeekForward(int pos) = 0; 229 virtual void SeekForward(int pos) = 0;
230 virtual void SetEnd(int pos) = 0;
190 231
191 // Scans the input as a regular expression pattern, previous character(s) must 232 // Scans the input as a regular expression pattern, previous character(s) must
192 // be /(=). Returns true if a pattern is scanned. FIXME: this won't work for 233 // be /(=). Returns true if a pattern is scanned. FIXME: this won't work for
193 // utf-8 newlines. 234 // utf-8 newlines.
194 virtual bool ScanRegExpPattern(bool seen_equal) = 0; 235 virtual bool ScanRegExpPattern(bool seen_equal) = 0;
195 // Returns true if regexp flags are scanned (always since flags can 236 // Returns true if regexp flags are scanned (always since flags can
196 // be empty). 237 // be empty).
197 virtual bool ScanRegExpFlags() = 0; 238 virtual bool ScanRegExpFlags() = 0;
198 239
199 protected: 240 protected:
200 struct TokenDesc { 241 struct TokenDesc {
201 Token::Value token; 242 Token::Value token;
202 int beg_pos; 243 int beg_pos;
203 int end_pos; 244 int end_pos;
204 bool has_escapes; 245 bool has_escapes;
205 }; 246 };
206 247
248 struct LiteralDesc {
249 int beg_pos;
250 bool is_ascii;
251 int length;
252 Vector<const char> ascii_string;
253 Vector<const uc16> utf16_string;
254 LiteralBuffer buffer;
255 bool Valid(int pos) { return beg_pos == pos; }
256 };
257
207 virtual void Scan() = 0; 258 virtual void Scan() = 0;
208 virtual void SetBufferBasedOnHandle() = 0; 259 virtual void SetBufferBasedOnHandle() = 0;
209 260
210 static void UpdateBuffersAfterGC(v8::Isolate*, GCType, GCCallbackFlags); 261 static void UpdateBuffersAfterGC(v8::Isolate*, GCType, GCCallbackFlags);
262 virtual bool FillLiteral(const TokenDesc& token, LiteralDesc* literal) = 0;
211 263
212 Isolate* isolate_; 264 Isolate* isolate_;
213 UnicodeCache* unicode_cache_; 265 UnicodeCache* unicode_cache_;
214 266
215 bool has_line_terminator_before_next_; 267 bool has_line_terminator_before_next_;
216 268
217 TokenDesc current_; // desc for current token (as returned by Next()) 269 TokenDesc current_; // desc for current token (as returned by Next())
218 TokenDesc next_; // desc for next token (one token look-ahead) 270 TokenDesc next_; // desc for next token (one token look-ahead)
219 271
272 LiteralDesc* current_literal_;
273 LiteralDesc* next_literal_;
274 LiteralDesc literals_[2];
275
276 Location octal_pos_;
277
220 bool harmony_numeric_literals_; 278 bool harmony_numeric_literals_;
221 bool harmony_modules_; 279 bool harmony_modules_;
222 bool harmony_scoping_; 280 bool harmony_scoping_;
223 281
224 private: 282 private:
225 static std::set<ScannerBase*>* scanners_; 283 static std::set<ScannerBase*>* scanners_;
226 }; 284 };
227 285
228 286
229 template<typename Char> 287 template<typename Char>
230 class ExperimentalScanner : public ScannerBase { 288 class ExperimentalScanner : public ScannerBase {
231 public: 289 public:
232 explicit ExperimentalScanner( 290 explicit ExperimentalScanner(
233 Handle<String> source, 291 Handle<String> source,
234 Isolate* isolate) 292 Isolate* isolate)
235 : ScannerBase(isolate), 293 : ScannerBase(isolate),
236 source_handle_(source), 294 source_handle_(source),
237 buffer_(NULL), 295 buffer_(NULL),
238 buffer_end_(NULL), 296 buffer_end_(NULL),
239 start_(NULL), 297 start_(NULL),
240 cursor_(NULL), 298 cursor_(NULL),
241 marker_(NULL) { 299 marker_(NULL) {
242 ASSERT(source->IsFlat()); 300 ASSERT(source->IsFlat());
243 SetBufferBasedOnHandle(); 301 SetBufferBasedOnHandle();
244 Scan(); 302 Scan();
245 } 303 }
246 304
247 virtual ~ExperimentalScanner() { } 305 virtual ~ExperimentalScanner() { }
248 306
307 protected:
249 virtual void Scan(); 308 virtual void Scan();
250 virtual void SeekForward(int pos); 309 virtual void SeekForward(int pos);
310 virtual void SetEnd(int pos);
251 virtual bool ScanRegExpPattern(bool seen_equal); 311 virtual bool ScanRegExpPattern(bool seen_equal);
252 virtual bool ScanRegExpFlags(); 312 virtual bool ScanRegExpFlags();
253 313
254 virtual void SetBufferBasedOnHandle() { 314 virtual void SetBufferBasedOnHandle() {
255 // We get a raw pointer from the Handle, but we also update it every time 315 // We get a raw pointer from the Handle, but we also update it every time
256 // there is a GC, so it is safe. 316 // there is a GC, so it is safe.
257 DisallowHeapAllocation no_gc; 317 DisallowHeapAllocation no_gc;
258 const Char* new_buffer = GetNewBufferBasedOnHandle(); 318 const Char* new_buffer = GetNewBufferBasedOnHandle();
259 if (new_buffer != buffer_) { 319 if (new_buffer != buffer_) {
260 int start_offset = start_ - buffer_; 320 int start_offset = start_ - buffer_;
261 int cursor_offset = cursor_ - buffer_; 321 int cursor_offset = cursor_ - buffer_;
262 int marker_offset = marker_ - buffer_; 322 int marker_offset = marker_ - buffer_;
263 buffer_ = new_buffer; 323 buffer_ = new_buffer;
264 buffer_end_ = buffer_ + source_handle_->length(); 324 buffer_end_ = buffer_ + source_handle_->length();
265 start_ = buffer_ + start_offset; 325 start_ = buffer_ + start_offset;
266 cursor_ = buffer_ + cursor_offset; 326 cursor_ = buffer_ + cursor_offset;
267 marker_ = buffer_ + marker_offset; 327 marker_ = buffer_ + marker_offset;
268 } 328 }
269 } 329 }
270 330
271 const Char* GetNewBufferBasedOnHandle() const; 331 const Char* GetNewBufferBasedOnHandle() const;
272 332
333 virtual bool FillLiteral(const TokenDesc& token, LiteralDesc* literal);
334
273 private: 335 private:
274 bool ValidIdentifierPart() { 336 bool ValidIdentifierPart() {
275 return unicode_cache_->IsIdentifierPart(ScanHexNumber(4)); 337 return unicode_cache_->IsIdentifierPart(ScanHexNumber(4));
276 } 338 }
277 339
278 bool ValidIdentifierStart() { 340 bool ValidIdentifierStart() {
279 return unicode_cache_->IsIdentifierStart(ScanHexNumber(4)); 341 return unicode_cache_->IsIdentifierStart(ScanHexNumber(4));
280 } 342 }
281 343
282 uc32 ScanHexNumber(int length); 344 uc32 ScanHexNumber(int length);
283 bool ScanLiteralUnicodeEscape(); 345 bool ScanLiteralUnicodeEscape();
284 346
347 const Char* ScanHexNumber(const Char* start,
348 const Char* end,
349 uc32* result);
350 const Char* ScanOctalEscape(const Char* start,
351 const Char* end,
352 uc32* result);
353 const Char* ScanIdentifierUnicodeEscape(const Char* start,
354 const Char* end,
355 uc32* result);
356 const Char* ScanEscape(const Char* start,
357 const Char* end,
358 LiteralBuffer* literal);
359
285 Handle<String> source_handle_; 360 Handle<String> source_handle_;
286 const Char* buffer_; 361 const Char* buffer_;
287 const Char* buffer_end_; 362 const Char* buffer_end_;
288 const Char* start_; 363 const Char* start_;
289 const Char* cursor_; 364 const Char* cursor_;
290 const Char* marker_; 365 const Char* marker_;
291 }; 366 };
292 367
293 368
294 template<typename Char> 369 template<typename Char>
295 void ExperimentalScanner<Char>::SeekForward(int pos) { 370 void ExperimentalScanner<Char>::SeekForward(int pos) {
296 cursor_ = buffer_ + pos; 371 cursor_ = buffer_ + pos;
297 start_ = cursor_; 372 start_ = cursor_;
298 marker_ = cursor_; 373 marker_ = cursor_;
299 has_line_terminator_before_next_ = false; 374 has_line_terminator_before_next_ = false;
300 Scan(); // Fills in next_. 375 Scan(); // Fills in next_.
301 } 376 }
302 377
303 378
304 template<typename Char> 379 template<typename Char>
380 void ExperimentalScanner<Char>::SetEnd(int pos) {
381 buffer_end_ = buffer_ + pos;
382 }
383
384
385 template<typename Char>
305 bool ExperimentalScanner<Char>::ScanRegExpPattern(bool seen_equal) { 386 bool ExperimentalScanner<Char>::ScanRegExpPattern(bool seen_equal) {
306 // Scan: ('/' | '/=') RegularExpressionBody '/' RegularExpressionFlags 387 // Scan: ('/' | '/=') RegularExpressionBody '/' RegularExpressionFlags
307 bool in_character_class = false; 388 bool in_character_class = false;
308 389
309 // Previous token is either '/' or '/=', in the second case, the 390 // Previous token is either '/' or '/=', in the second case, the
310 // pattern starts at =. 391 // pattern starts at =.
311 next_.beg_pos = (cursor_ - buffer_) - (seen_equal ? 2 : 1); 392 next_.beg_pos = (cursor_ - buffer_) - (seen_equal ? 2 : 1);
312 next_.end_pos = (cursor_ - buffer_) - (seen_equal ? 1 : 0); 393 next_.end_pos = (cursor_ - buffer_) - (seen_equal ? 1 : 0);
313 394
314 // Scan regular expression body: According to ECMA-262, 3rd, 7.8.5, 395 // Scan regular expression body: According to ECMA-262, 3rd, 7.8.5,
(...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after
353 if (++cursor_ >= buffer_end_) break; 434 if (++cursor_ >= buffer_end_) break;
354 } else { 435 } else {
355 if (!ScanLiteralUnicodeEscape()) break; 436 if (!ScanLiteralUnicodeEscape()) break;
356 if (++cursor_ >= buffer_end_) break; 437 if (++cursor_ >= buffer_end_) break;
357 } 438 }
358 } 439 }
359 next_.end_pos = cursor_ - buffer_ - 1; 440 next_.end_pos = cursor_ - buffer_ - 1;
360 return true; 441 return true;
361 } 442 }
362 443
444
363 template<typename Char> 445 template<typename Char>
364 uc32 ExperimentalScanner<Char>::ScanHexNumber(int length) { 446 uc32 ExperimentalScanner<Char>::ScanHexNumber(int length) {
365 // We have seen \uXXXX, let's see what it is. 447 // We have seen \uXXXX, let's see what it is.
366 uc32 x = 0; 448 uc32 x = 0;
367 for (const Char* s = cursor_ - length; s != cursor_; ++s) { 449 for (const Char* s = cursor_ - length; s != cursor_; ++s) {
368 int d = HexValue(*s); 450 int d = HexValue(*s);
369 if (d < 0) { 451 if (d < 0) {
370 return -1; 452 return -1;
371 } 453 }
372 x = x * 16 + d; 454 x = x * 16 + d;
373 } 455 }
374 return x; 456 return x;
375 } 457 }
376 458
459
460 template<typename Char>
461 const Char* ExperimentalScanner<Char>::ScanHexNumber(
462 const Char* cursor, const Char* end, uc32* result) {
463 uc32 x = 0;
464 for ( ; cursor < end; ++cursor) {
465 int d = HexValue(*cursor);
466 if (d < 0) {
467 *result = -1;
468 return NULL;
469 }
470 x = x * 16 + d;
471 }
472 *result = x;
473 return cursor;
474 }
475
476
477 // Octal escapes of the forms '\0xx' and '\xxx' are not a part of
478 // ECMA-262. Other JS VMs support them.
479 template<typename Char>
480 const Char* ExperimentalScanner<Char>::ScanOctalEscape(
481 const Char* start, const Char* end, uc32* result) {
482 uc32 x = *result - '0';
483 const Char* cursor;
484 for (cursor = start; cursor < end; cursor++) {
485 int d = *cursor - '0';
486 if (d < 0 || d > 7) break;
487 int nx = x * 8 + d;
488 if (nx >= 256) break;
489 x = nx;
490 }
491 // Anything except '\0' is an octal escape sequence, illegal in strict mode.
492 // Remember the position of octal escape sequences so that an error
493 // can be reported later (in strict mode).
494 // We don't report the error immediately, because the octal escape can
495 // occur before the "use strict" directive.
496 if (*result != '0' || cursor > start) {
497 octal_pos_ = Location(start - 1 - buffer_, cursor - 1 - buffer_);
498 }
499 *result = x;
500 return cursor;
501 }
502
503
377 template<typename Char> 504 template<typename Char>
378 bool ExperimentalScanner<Char>::ScanLiteralUnicodeEscape() { 505 bool ExperimentalScanner<Char>::ScanLiteralUnicodeEscape() {
379 ASSERT(cursor_ < buffer_end_); 506 ASSERT(cursor_ < buffer_end_);
380 Char primary_char = *(cursor_); 507 Char primary_char = *(cursor_);
381 ASSERT(primary_char == '\\'); 508 ASSERT(primary_char == '\\');
382 if (++cursor_ >= buffer_end_) return false; 509 if (++cursor_ >= buffer_end_) return false;
383 primary_char = *(cursor_); 510 primary_char = *(cursor_);
384 int i = 1; 511 int i = 1;
385 if (primary_char == 'u') { 512 if (primary_char == 'u') {
386 i++; 513 i++;
387 while (i < 6) { 514 while (i < 6) {
388 if (++cursor_ >= buffer_end_) return false; 515 if (++cursor_ >= buffer_end_) return false;
389 primary_char = *(cursor_); 516 primary_char = *(cursor_);
390 if (!IsHexDigit(primary_char)) break; 517 if (!IsHexDigit(primary_char)) break;
391 i++; 518 i++;
392 } 519 }
393 } 520 }
394 return i == 6; 521 return i == 6;
395 } 522 }
396 523
397 524
525 template<typename Char>
526 const Char* ExperimentalScanner<Char>::ScanIdentifierUnicodeEscape(
527 const Char* cursor, const Char* end, uc32* result) {
528 ASSERT(*cursor == '\\');
529 if (++cursor >= end) return NULL;
530 if (*cursor != 'u') return NULL;
531 ++cursor;
532 if (cursor + 4 > end) return NULL;
533 cursor = ScanHexNumber(cursor, cursor + 4, result);
534 return cursor;
535 }
536
537
538 template<typename Char>
539 const Char* ExperimentalScanner<Char>::ScanEscape(
540 const Char* cursor, const Char* end, LiteralBuffer* literal) {
541 ASSERT(*cursor == '\\');
542 if (++cursor >= end) return NULL;
543 uc32 c = *cursor;
544 if (++cursor > end) return NULL;
545 // Skip escaped newlines.
546 if (unicode_cache_->IsLineTerminator(c)) {
547 uc32 peek = *cursor;
548 // Allow CR+LF newlines in multiline string literals.
549 if (IsCarriageReturn(c) && IsLineFeed(peek)) cursor++;
550 // Allow LF+CR newlines in multiline string literals.
551 if (IsLineFeed(c) && IsCarriageReturn(peek)) cursor++;
552 return cursor;
553 }
554
555 switch (c) {
556 case '\'': // fall through
557 case '"' : // fall through
558 case '\\': break;
559 case 'b' : c = '\b'; break;
560 case 'f' : c = '\f'; break;
561 case 'n' : c = '\n'; break;
562 case 'r' : c = '\r'; break;
563 case 't' : c = '\t'; break;
564 case 'u' : {
565 if (end > cursor + 4) return NULL;
566 cursor = ScanHexNumber(cursor, cursor + 4, &c);
567 if (cursor == NULL) return NULL;
568 break;
569 }
570 case 'v' : c = '\v'; break;
571 case 'x' : {
572 if (end > cursor + 2) return NULL ;
573 cursor = ScanHexNumber(cursor, cursor + 2, &c);
574 if (cursor == NULL) return NULL;
575 break;
576 }
577 case '0' : // fall through
578 case '1' : // fall through
579 case '2' : // fall through
580 case '3' : // fall through
581 case '4' : // fall through
582 case '5' : // fall through
583 case '6' : // fall through
584 case '7' :
585 if (end > cursor + 2) end = cursor + 2;
586 cursor = ScanOctalEscape(cursor, end, &c); break;
587 }
588
589 // According to ECMA-262, section 7.8.4, characters not covered by the
590 // above cases should be illegal, but they are commonly handled as
591 // non-escaped characters by JS VMs.
592 literal->AddChar(c);
593 return cursor;
594 }
595
596
398 } } 597 } }
399 598
400 #endif // V8_LEXER_EXPERIMENTAL_SCANNER_H 599 #endif // V8_LEXER_EXPERIMENTAL_SCANNER_H
OLDNEW
« no previous file with comments | « no previous file | src/lexer/experimental-scanner.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698