| OLD | NEW |
| 1 // Copyright 2013 the V8 project authors. All rights reserved. | 1 // Copyright 2013 the V8 project authors. All rights reserved. |
| 2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
| 3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
| 4 // met: | 4 // met: |
| 5 // | 5 // |
| 6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
| 7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
| 8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
| 9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
| 10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
| (...skipping 18 matching lines...) Expand all Loading... |
| 29 #define V8_LEXER_EXPERIMENTAL_SCANNER_H | 29 #define V8_LEXER_EXPERIMENTAL_SCANNER_H |
| 30 | 30 |
| 31 #include <set> | 31 #include <set> |
| 32 | 32 |
| 33 #include "compiler.h" | 33 #include "compiler.h" |
| 34 #include "isolate.h" | 34 #include "isolate.h" |
| 35 #include "scanner.h" // UnicodeCache. | 35 #include "scanner.h" // UnicodeCache. |
| 36 #include "token.h" | 36 #include "token.h" |
| 37 #include "utils.h" | 37 #include "utils.h" |
| 38 #include "v8stdint.h" | 38 #include "v8stdint.h" |
| 39 #include "char-predicates-inl.h" |
| 39 | 40 |
| 40 namespace v8 { | 41 namespace v8 { |
| 41 namespace internal { | 42 namespace internal { |
| 42 | 43 |
| 43 class UnicodeCache; | 44 class UnicodeCache; |
| 44 | 45 |
| 45 // Base class for scanners for different encodings. The meat is the pure virtual | 46 // Base class for scanners for different encodings. The meat is the pure virtual |
| 46 // Scan() which each of them specializes. | 47 // Scan() which each of them specializes. |
| 47 class ScannerBase { | 48 class ScannerBase { |
| 48 public: | 49 public: |
| 49 struct Location { | 50 struct Location { |
| 50 Location(int b, int e) : beg_pos(b), end_pos(e) { } | 51 Location(int b, int e) : beg_pos(b), end_pos(e) { } |
| 51 Location() : beg_pos(0), end_pos(0) { } | 52 Location() : beg_pos(0), end_pos(0) { } |
| 52 | 53 |
| 53 bool IsValid() const { | 54 bool IsValid() const { |
| 54 return beg_pos >= 0 && end_pos >= beg_pos; | 55 return beg_pos >= 0 && end_pos >= beg_pos; |
| 55 } | 56 } |
| 56 | 57 |
| 57 static Location invalid() { return Location(-1, -1); } | 58 static Location invalid() { return Location(-1, -1); } |
| 58 | 59 |
| 59 int beg_pos; | 60 int beg_pos; |
| 60 int end_pos; | 61 int end_pos; |
| 61 }; | 62 }; |
| 62 | 63 |
| 63 explicit ScannerBase(Isolate* isolate) | 64 explicit ScannerBase(Isolate* isolate) |
| 64 : isolate_(isolate), | 65 : isolate_(isolate), |
| 65 unicode_cache_(isolate->unicode_cache()), | 66 unicode_cache_(isolate->unicode_cache()), |
| 66 has_line_terminator_before_next_(true), | 67 has_line_terminator_before_next_(true), |
| 68 current_literal_(&literals_[0]), |
| 69 next_literal_(&literals_[1]), |
| 70 octal_pos_(Location::invalid()), |
| 67 harmony_numeric_literals_(false), | 71 harmony_numeric_literals_(false), |
| 68 harmony_modules_(false), | 72 harmony_modules_(false), |
| 69 harmony_scoping_(false) { | 73 harmony_scoping_(false) { |
| 70 if (!scanners_) { | 74 if (!scanners_) { |
| 71 scanners_ = new std::set<ScannerBase*>(); | 75 scanners_ = new std::set<ScannerBase*>(); |
| 72 isolate->heap()->AddGCEpilogueCallback(&ScannerBase::UpdateBuffersAfterGC, | 76 isolate->heap()->AddGCEpilogueCallback(&ScannerBase::UpdateBuffersAfterGC, |
| 73 kGCTypeAll, false); | 77 kGCTypeAll, false); |
| 74 } | 78 } |
| 75 scanners_->insert(this); | 79 scanners_->insert(this); |
| 76 } | 80 } |
| 77 | 81 |
| 78 virtual ~ScannerBase() { | 82 virtual ~ScannerBase() { |
| 79 scanners_->erase(this); | 83 scanners_->erase(this); |
| 80 if (scanners_->empty()) { | 84 if (scanners_->empty()) { |
| 81 isolate_->heap()->RemoveGCEpilogueCallback( | 85 isolate_->heap()->RemoveGCEpilogueCallback( |
| 82 &ScannerBase::UpdateBuffersAfterGC); | 86 &ScannerBase::UpdateBuffersAfterGC); |
| 83 delete scanners_; | 87 delete scanners_; |
| 84 scanners_ = NULL; | 88 scanners_ = NULL; |
| 85 } | 89 } |
| 86 } | 90 } |
| 87 | 91 |
| 88 // Returns the next token and advances input. | 92 // Returns the next token and advances input. |
| 89 Token::Value Next() { | 93 Token::Value Next() { |
| 90 has_line_terminator_before_next_ = false; | 94 has_line_terminator_before_next_ = false; |
| 91 current_ = next_; | 95 current_ = next_; |
| 96 std::swap(current_literal_, next_literal_); |
| 92 Scan(); // Virtual! Will fill in next_. | 97 Scan(); // Virtual! Will fill in next_. |
| 93 return current_.token; | 98 return current_.token; |
| 94 } | 99 } |
| 95 | 100 |
| 96 // Returns the current token again. | 101 // Returns the current token again. |
| 97 Token::Value current_token() { return current_.token; } | 102 Token::Value current_token() { return current_.token; } |
| 98 | 103 |
| 99 // Returns the location information for the current token | 104 // Returns the location information for the current token |
| 100 // (the token last returned by Next()). | 105 // (the token last returned by Next()). |
| 101 Location location() { | 106 Location location() { |
| (...skipping 29 matching lines...) Expand all Loading... |
| 131 } | 136 } |
| 132 | 137 |
| 133 // Returns true if there was a line terminator before the peek'ed token, | 138 // Returns true if there was a line terminator before the peek'ed token, |
| 134 // possibly inside a multi-line comment. | 139 // possibly inside a multi-line comment. |
| 135 bool HasAnyLineTerminatorBeforeNext() const { | 140 bool HasAnyLineTerminatorBeforeNext() const { |
| 136 return has_line_terminator_before_next_; | 141 return has_line_terminator_before_next_; |
| 137 // FIXME: do we need to distinguish between newlines inside and outside | 142 // FIXME: do we need to distinguish between newlines inside and outside |
| 138 // multiline comments? Atm doesn't look like we need to. | 143 // multiline comments? Atm doesn't look like we need to. |
| 139 } | 144 } |
| 140 | 145 |
| 141 // FIXME: implement these | |
| 142 Vector<const char> literal_ascii_string() { | 146 Vector<const char> literal_ascii_string() { |
| 143 return Vector<const char>(); // FIXME | 147 if (!current_literal_->Valid(current_.beg_pos)) { |
| 148 FillLiteral(current_, current_literal_); |
| 149 } |
| 150 return current_literal_->ascii_string; |
| 144 } | 151 } |
| 152 |
| 145 Vector<const uc16> literal_utf16_string() { | 153 Vector<const uc16> literal_utf16_string() { |
| 146 return Vector<const uc16>(); // FIXME | 154 if (!current_literal_->Valid(current_.beg_pos)) { |
| 155 FillLiteral(current_, current_literal_); |
| 156 } |
| 157 return current_literal_->utf16_string; |
| 147 } | 158 } |
| 159 |
| 160 int literal_length() { |
| 161 if (!current_literal_->Valid(current_.beg_pos)) { |
| 162 FillLiteral(current_, current_literal_); |
| 163 } |
| 164 return current_literal_->length; |
| 165 } |
| 166 |
| 148 bool is_literal_ascii() { | 167 bool is_literal_ascii() { |
| 149 return true; // FIXME | 168 if (!current_literal_->Valid(current_.beg_pos)) { |
| 169 FillLiteral(current_, current_literal_); |
| 170 } |
| 171 return current_literal_->is_ascii; |
| 150 } | 172 } |
| 173 |
| 151 bool is_literal_contextual_keyword(Vector<const char> keyword) { | 174 bool is_literal_contextual_keyword(Vector<const char> keyword) { |
| 152 return false; // FIXME | 175 if (!is_literal_ascii()) return false; |
| 176 Vector<const char> literal = literal_ascii_string(); |
| 177 return literal.length() == keyword.length() && |
| 178 (memcmp(literal.start(), keyword.start(), literal.length()) == 0); |
| 153 } | 179 } |
| 154 int literal_length() const { | 180 |
| 155 return 0; // FIXME | |
| 156 } | |
| 157 bool literal_contains_escapes() const { | 181 bool literal_contains_escapes() const { |
| 158 return false; // FIXME | 182 return current_.has_escapes; |
| 159 } | 183 } |
| 160 | 184 |
| 161 Vector<const char> next_literal_ascii_string() { | 185 Vector<const char> next_literal_ascii_string() { |
| 162 return Vector<const char>(); // FIXME | 186 if (!next_literal_->Valid(next_.beg_pos)) { |
| 163 } | 187 FillLiteral(next_, next_literal_); |
| 164 Vector<const uc16> next_literal_utf16_string() { | 188 } |
| 165 return Vector<const uc16>(); // FIXME | 189 return next_literal_->ascii_string; |
| 166 } | |
| 167 bool is_next_literal_ascii() { | |
| 168 return true; // FIXME | |
| 169 } | |
| 170 bool is_next_contextual_keyword(Vector<const char> keyword) { | |
| 171 return false; // FIXME | |
| 172 } | |
| 173 int next_literal_length() const { | |
| 174 return 0; // FIXME | |
| 175 } | 190 } |
| 176 | 191 |
| 177 uc32 ScanOctalEscape(uc32 c, int length) { return 0; } // FIXME | 192 Vector<const uc16> next_literal_utf16_string() { |
| 193 if (!next_literal_->Valid(next_.beg_pos)) { |
| 194 FillLiteral(next_, next_literal_); |
| 195 } |
| 196 return next_literal_->utf16_string; |
| 197 } |
| 178 | 198 |
| 179 Location octal_position() const { | 199 int next_literal_length() { |
| 180 return Location(0, 0); // FIXME | 200 if (!next_literal_->Valid(next_.beg_pos)) { |
| 201 FillLiteral(next_, next_literal_); |
| 202 } |
| 203 return next_literal_->length; |
| 181 } | 204 } |
| 182 void clear_octal_position() { } // FIXME | 205 |
| 206 bool is_next_literal_ascii() { |
| 207 if (!next_literal_->Valid(next_.beg_pos)) { |
| 208 FillLiteral(next_, next_literal_); |
| 209 } |
| 210 return next_literal_->is_ascii; |
| 211 } |
| 212 |
| 213 bool is_next_contextual_keyword(Vector<const char> keyword) { |
| 214 if (!is_next_literal_ascii()) return false; |
| 215 Vector<const char> literal = next_literal_ascii_string(); |
| 216 return literal.length() == keyword.length() && |
| 217 (memcmp(literal.start(), keyword.start(), literal.length()) == 0); |
| 218 } |
| 219 |
| 220 // Returns the location of the last seen octal literal. |
| 221 Location octal_position() const { return octal_pos_; } |
| 222 void clear_octal_position() { octal_pos_ = Location::invalid(); } |
| 183 | 223 |
| 184 // Seek forward to the given position. This operation works for simple cases | 224 // Seek forward to the given position. This operation works for simple cases |
| 185 // such as seeking forward until simple delimiter tokens, which is what it is | 225 // such as seeking forward until simple delimiter tokens, which is what it is |
| 186 // used for. After this call, we will have the token at the given position as | 226 // used for. After this call, we will have the token at the given position as |
| 187 // the "next" token. The "current" token will be invalid. FIXME: for utf-8, | 227 // the "next" token. The "current" token will be invalid. FIXME: for utf-8, |
| 188 // we need to decide if pos is counted in characters or in bytes. | 228 // we need to decide if pos is counted in characters or in bytes. |
| 189 virtual void SeekForward(int pos) = 0; | 229 virtual void SeekForward(int pos) = 0; |
| 230 virtual void SetEnd(int pos) = 0; |
| 190 | 231 |
| 191 // Scans the input as a regular expression pattern, previous character(s) must | 232 // Scans the input as a regular expression pattern, previous character(s) must |
| 192 // be /(=). Returns true if a pattern is scanned. FIXME: this won't work for | 233 // be /(=). Returns true if a pattern is scanned. FIXME: this won't work for |
| 193 // utf-8 newlines. | 234 // utf-8 newlines. |
| 194 virtual bool ScanRegExpPattern(bool seen_equal) = 0; | 235 virtual bool ScanRegExpPattern(bool seen_equal) = 0; |
| 195 // Returns true if regexp flags are scanned (always since flags can | 236 // Returns true if regexp flags are scanned (always since flags can |
| 196 // be empty). | 237 // be empty). |
| 197 virtual bool ScanRegExpFlags() = 0; | 238 virtual bool ScanRegExpFlags() = 0; |
| 198 | 239 |
| 199 protected: | 240 protected: |
| 200 struct TokenDesc { | 241 struct TokenDesc { |
| 201 Token::Value token; | 242 Token::Value token; |
| 202 int beg_pos; | 243 int beg_pos; |
| 203 int end_pos; | 244 int end_pos; |
| 204 bool has_escapes; | 245 bool has_escapes; |
| 205 }; | 246 }; |
| 206 | 247 |
| 248 struct LiteralDesc { |
| 249 int beg_pos; |
| 250 bool is_ascii; |
| 251 int length; |
| 252 Vector<const char> ascii_string; |
| 253 Vector<const uc16> utf16_string; |
| 254 LiteralBuffer buffer; |
| 255 bool Valid(int pos) { return beg_pos == pos; } |
| 256 }; |
| 257 |
| 207 virtual void Scan() = 0; | 258 virtual void Scan() = 0; |
| 208 virtual void SetBufferBasedOnHandle() = 0; | 259 virtual void SetBufferBasedOnHandle() = 0; |
| 209 | 260 |
| 210 static void UpdateBuffersAfterGC(v8::Isolate*, GCType, GCCallbackFlags); | 261 static void UpdateBuffersAfterGC(v8::Isolate*, GCType, GCCallbackFlags); |
| 262 virtual bool FillLiteral(const TokenDesc& token, LiteralDesc* literal) = 0; |
| 211 | 263 |
| 212 Isolate* isolate_; | 264 Isolate* isolate_; |
| 213 UnicodeCache* unicode_cache_; | 265 UnicodeCache* unicode_cache_; |
| 214 | 266 |
| 215 bool has_line_terminator_before_next_; | 267 bool has_line_terminator_before_next_; |
| 216 | 268 |
| 217 TokenDesc current_; // desc for current token (as returned by Next()) | 269 TokenDesc current_; // desc for current token (as returned by Next()) |
| 218 TokenDesc next_; // desc for next token (one token look-ahead) | 270 TokenDesc next_; // desc for next token (one token look-ahead) |
| 219 | 271 |
| 272 LiteralDesc* current_literal_; |
| 273 LiteralDesc* next_literal_; |
| 274 LiteralDesc literals_[2]; |
| 275 |
| 276 Location octal_pos_; |
| 277 |
| 220 bool harmony_numeric_literals_; | 278 bool harmony_numeric_literals_; |
| 221 bool harmony_modules_; | 279 bool harmony_modules_; |
| 222 bool harmony_scoping_; | 280 bool harmony_scoping_; |
| 223 | 281 |
| 224 private: | 282 private: |
| 225 static std::set<ScannerBase*>* scanners_; | 283 static std::set<ScannerBase*>* scanners_; |
| 226 }; | 284 }; |
| 227 | 285 |
| 228 | 286 |
| 229 template<typename Char> | 287 template<typename Char> |
| 230 class ExperimentalScanner : public ScannerBase { | 288 class ExperimentalScanner : public ScannerBase { |
| 231 public: | 289 public: |
| 232 explicit ExperimentalScanner( | 290 explicit ExperimentalScanner( |
| 233 Handle<String> source, | 291 Handle<String> source, |
| 234 Isolate* isolate) | 292 Isolate* isolate) |
| 235 : ScannerBase(isolate), | 293 : ScannerBase(isolate), |
| 236 source_handle_(source), | 294 source_handle_(source), |
| 237 buffer_(NULL), | 295 buffer_(NULL), |
| 238 buffer_end_(NULL), | 296 buffer_end_(NULL), |
| 239 start_(NULL), | 297 start_(NULL), |
| 240 cursor_(NULL), | 298 cursor_(NULL), |
| 241 marker_(NULL) { | 299 marker_(NULL) { |
| 242 ASSERT(source->IsFlat()); | 300 ASSERT(source->IsFlat()); |
| 243 SetBufferBasedOnHandle(); | 301 SetBufferBasedOnHandle(); |
| 244 Scan(); | 302 Scan(); |
| 245 } | 303 } |
| 246 | 304 |
| 247 virtual ~ExperimentalScanner() { } | 305 virtual ~ExperimentalScanner() { } |
| 248 | 306 |
| 307 protected: |
| 249 virtual void Scan(); | 308 virtual void Scan(); |
| 250 virtual void SeekForward(int pos); | 309 virtual void SeekForward(int pos); |
| 310 virtual void SetEnd(int pos); |
| 251 virtual bool ScanRegExpPattern(bool seen_equal); | 311 virtual bool ScanRegExpPattern(bool seen_equal); |
| 252 virtual bool ScanRegExpFlags(); | 312 virtual bool ScanRegExpFlags(); |
| 253 | 313 |
| 254 virtual void SetBufferBasedOnHandle() { | 314 virtual void SetBufferBasedOnHandle() { |
| 255 // We get a raw pointer from the Handle, but we also update it every time | 315 // We get a raw pointer from the Handle, but we also update it every time |
| 256 // there is a GC, so it is safe. | 316 // there is a GC, so it is safe. |
| 257 DisallowHeapAllocation no_gc; | 317 DisallowHeapAllocation no_gc; |
| 258 const Char* new_buffer = GetNewBufferBasedOnHandle(); | 318 const Char* new_buffer = GetNewBufferBasedOnHandle(); |
| 259 if (new_buffer != buffer_) { | 319 if (new_buffer != buffer_) { |
| 260 int start_offset = start_ - buffer_; | 320 int start_offset = start_ - buffer_; |
| 261 int cursor_offset = cursor_ - buffer_; | 321 int cursor_offset = cursor_ - buffer_; |
| 262 int marker_offset = marker_ - buffer_; | 322 int marker_offset = marker_ - buffer_; |
| 263 buffer_ = new_buffer; | 323 buffer_ = new_buffer; |
| 264 buffer_end_ = buffer_ + source_handle_->length(); | 324 buffer_end_ = buffer_ + source_handle_->length(); |
| 265 start_ = buffer_ + start_offset; | 325 start_ = buffer_ + start_offset; |
| 266 cursor_ = buffer_ + cursor_offset; | 326 cursor_ = buffer_ + cursor_offset; |
| 267 marker_ = buffer_ + marker_offset; | 327 marker_ = buffer_ + marker_offset; |
| 268 } | 328 } |
| 269 } | 329 } |
| 270 | 330 |
| 271 const Char* GetNewBufferBasedOnHandle() const; | 331 const Char* GetNewBufferBasedOnHandle() const; |
| 272 | 332 |
| 333 virtual bool FillLiteral(const TokenDesc& token, LiteralDesc* literal); |
| 334 |
| 273 private: | 335 private: |
| 274 bool ValidIdentifierPart() { | 336 bool ValidIdentifierPart() { |
| 275 return unicode_cache_->IsIdentifierPart(ScanHexNumber(4)); | 337 return unicode_cache_->IsIdentifierPart(ScanHexNumber(4)); |
| 276 } | 338 } |
| 277 | 339 |
| 278 bool ValidIdentifierStart() { | 340 bool ValidIdentifierStart() { |
| 279 return unicode_cache_->IsIdentifierStart(ScanHexNumber(4)); | 341 return unicode_cache_->IsIdentifierStart(ScanHexNumber(4)); |
| 280 } | 342 } |
| 281 | 343 |
| 282 uc32 ScanHexNumber(int length); | 344 uc32 ScanHexNumber(int length); |
| 283 bool ScanLiteralUnicodeEscape(); | 345 bool ScanLiteralUnicodeEscape(); |
| 284 | 346 |
| 347 const Char* ScanHexNumber(const Char* start, |
| 348 const Char* end, |
| 349 uc32* result); |
| 350 const Char* ScanOctalEscape(const Char* start, |
| 351 const Char* end, |
| 352 uc32* result); |
| 353 const Char* ScanIdentifierUnicodeEscape(const Char* start, |
| 354 const Char* end, |
| 355 uc32* result); |
| 356 const Char* ScanEscape(const Char* start, |
| 357 const Char* end, |
| 358 LiteralBuffer* literal); |
| 359 |
| 285 Handle<String> source_handle_; | 360 Handle<String> source_handle_; |
| 286 const Char* buffer_; | 361 const Char* buffer_; |
| 287 const Char* buffer_end_; | 362 const Char* buffer_end_; |
| 288 const Char* start_; | 363 const Char* start_; |
| 289 const Char* cursor_; | 364 const Char* cursor_; |
| 290 const Char* marker_; | 365 const Char* marker_; |
| 291 }; | 366 }; |
| 292 | 367 |
| 293 | 368 |
| 294 template<typename Char> | 369 template<typename Char> |
| 295 void ExperimentalScanner<Char>::SeekForward(int pos) { | 370 void ExperimentalScanner<Char>::SeekForward(int pos) { |
| 296 cursor_ = buffer_ + pos; | 371 cursor_ = buffer_ + pos; |
| 297 start_ = cursor_; | 372 start_ = cursor_; |
| 298 marker_ = cursor_; | 373 marker_ = cursor_; |
| 299 has_line_terminator_before_next_ = false; | 374 has_line_terminator_before_next_ = false; |
| 300 Scan(); // Fills in next_. | 375 Scan(); // Fills in next_. |
| 301 } | 376 } |
| 302 | 377 |
| 303 | 378 |
| 304 template<typename Char> | 379 template<typename Char> |
| 380 void ExperimentalScanner<Char>::SetEnd(int pos) { |
| 381 buffer_end_ = buffer_ + pos; |
| 382 } |
| 383 |
| 384 |
| 385 template<typename Char> |
| 305 bool ExperimentalScanner<Char>::ScanRegExpPattern(bool seen_equal) { | 386 bool ExperimentalScanner<Char>::ScanRegExpPattern(bool seen_equal) { |
| 306 // Scan: ('/' | '/=') RegularExpressionBody '/' RegularExpressionFlags | 387 // Scan: ('/' | '/=') RegularExpressionBody '/' RegularExpressionFlags |
| 307 bool in_character_class = false; | 388 bool in_character_class = false; |
| 308 | 389 |
| 309 // Previous token is either '/' or '/=', in the second case, the | 390 // Previous token is either '/' or '/=', in the second case, the |
| 310 // pattern starts at =. | 391 // pattern starts at =. |
| 311 next_.beg_pos = (cursor_ - buffer_) - (seen_equal ? 2 : 1); | 392 next_.beg_pos = (cursor_ - buffer_) - (seen_equal ? 2 : 1); |
| 312 next_.end_pos = (cursor_ - buffer_) - (seen_equal ? 1 : 0); | 393 next_.end_pos = (cursor_ - buffer_) - (seen_equal ? 1 : 0); |
| 313 | 394 |
| 314 // Scan regular expression body: According to ECMA-262, 3rd, 7.8.5, | 395 // Scan regular expression body: According to ECMA-262, 3rd, 7.8.5, |
| (...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 353 if (++cursor_ >= buffer_end_) break; | 434 if (++cursor_ >= buffer_end_) break; |
| 354 } else { | 435 } else { |
| 355 if (!ScanLiteralUnicodeEscape()) break; | 436 if (!ScanLiteralUnicodeEscape()) break; |
| 356 if (++cursor_ >= buffer_end_) break; | 437 if (++cursor_ >= buffer_end_) break; |
| 357 } | 438 } |
| 358 } | 439 } |
| 359 next_.end_pos = cursor_ - buffer_ - 1; | 440 next_.end_pos = cursor_ - buffer_ - 1; |
| 360 return true; | 441 return true; |
| 361 } | 442 } |
| 362 | 443 |
| 444 |
| 363 template<typename Char> | 445 template<typename Char> |
| 364 uc32 ExperimentalScanner<Char>::ScanHexNumber(int length) { | 446 uc32 ExperimentalScanner<Char>::ScanHexNumber(int length) { |
| 365 // We have seen \uXXXX, let's see what it is. | 447 // We have seen \uXXXX, let's see what it is. |
| 366 uc32 x = 0; | 448 uc32 x = 0; |
| 367 for (const Char* s = cursor_ - length; s != cursor_; ++s) { | 449 for (const Char* s = cursor_ - length; s != cursor_; ++s) { |
| 368 int d = HexValue(*s); | 450 int d = HexValue(*s); |
| 369 if (d < 0) { | 451 if (d < 0) { |
| 370 return -1; | 452 return -1; |
| 371 } | 453 } |
| 372 x = x * 16 + d; | 454 x = x * 16 + d; |
| 373 } | 455 } |
| 374 return x; | 456 return x; |
| 375 } | 457 } |
| 376 | 458 |
| 459 |
| 460 template<typename Char> |
| 461 const Char* ExperimentalScanner<Char>::ScanHexNumber( |
| 462 const Char* cursor, const Char* end, uc32* result) { |
| 463 uc32 x = 0; |
| 464 for ( ; cursor < end; ++cursor) { |
| 465 int d = HexValue(*cursor); |
| 466 if (d < 0) { |
| 467 *result = -1; |
| 468 return NULL; |
| 469 } |
| 470 x = x * 16 + d; |
| 471 } |
| 472 *result = x; |
| 473 return cursor; |
| 474 } |
| 475 |
| 476 |
| 477 // Octal escapes of the forms '\0xx' and '\xxx' are not a part of |
| 478 // ECMA-262. Other JS VMs support them. |
| 479 template<typename Char> |
| 480 const Char* ExperimentalScanner<Char>::ScanOctalEscape( |
| 481 const Char* start, const Char* end, uc32* result) { |
| 482 uc32 x = *result - '0'; |
| 483 const Char* cursor; |
| 484 for (cursor = start; cursor < end; cursor++) { |
| 485 int d = *cursor - '0'; |
| 486 if (d < 0 || d > 7) break; |
| 487 int nx = x * 8 + d; |
| 488 if (nx >= 256) break; |
| 489 x = nx; |
| 490 } |
| 491 // Anything except '\0' is an octal escape sequence, illegal in strict mode. |
| 492 // Remember the position of octal escape sequences so that an error |
| 493 // can be reported later (in strict mode). |
| 494 // We don't report the error immediately, because the octal escape can |
| 495 // occur before the "use strict" directive. |
| 496 if (*result != '0' || cursor > start) { |
| 497 octal_pos_ = Location(start - 1 - buffer_, cursor - 1 - buffer_); |
| 498 } |
| 499 *result = x; |
| 500 return cursor; |
| 501 } |
| 502 |
| 503 |
| 377 template<typename Char> | 504 template<typename Char> |
| 378 bool ExperimentalScanner<Char>::ScanLiteralUnicodeEscape() { | 505 bool ExperimentalScanner<Char>::ScanLiteralUnicodeEscape() { |
| 379 ASSERT(cursor_ < buffer_end_); | 506 ASSERT(cursor_ < buffer_end_); |
| 380 Char primary_char = *(cursor_); | 507 Char primary_char = *(cursor_); |
| 381 ASSERT(primary_char == '\\'); | 508 ASSERT(primary_char == '\\'); |
| 382 if (++cursor_ >= buffer_end_) return false; | 509 if (++cursor_ >= buffer_end_) return false; |
| 383 primary_char = *(cursor_); | 510 primary_char = *(cursor_); |
| 384 int i = 1; | 511 int i = 1; |
| 385 if (primary_char == 'u') { | 512 if (primary_char == 'u') { |
| 386 i++; | 513 i++; |
| 387 while (i < 6) { | 514 while (i < 6) { |
| 388 if (++cursor_ >= buffer_end_) return false; | 515 if (++cursor_ >= buffer_end_) return false; |
| 389 primary_char = *(cursor_); | 516 primary_char = *(cursor_); |
| 390 if (!IsHexDigit(primary_char)) break; | 517 if (!IsHexDigit(primary_char)) break; |
| 391 i++; | 518 i++; |
| 392 } | 519 } |
| 393 } | 520 } |
| 394 return i == 6; | 521 return i == 6; |
| 395 } | 522 } |
| 396 | 523 |
| 397 | 524 |
| 525 template<typename Char> |
| 526 const Char* ExperimentalScanner<Char>::ScanIdentifierUnicodeEscape( |
| 527 const Char* cursor, const Char* end, uc32* result) { |
| 528 ASSERT(*cursor == '\\'); |
| 529 if (++cursor >= end) return NULL; |
| 530 if (*cursor != 'u') return NULL; |
| 531 ++cursor; |
| 532 if (cursor + 4 > end) return NULL; |
| 533 cursor = ScanHexNumber(cursor, cursor + 4, result); |
| 534 return cursor; |
| 535 } |
| 536 |
| 537 |
| 538 template<typename Char> |
| 539 const Char* ExperimentalScanner<Char>::ScanEscape( |
| 540 const Char* cursor, const Char* end, LiteralBuffer* literal) { |
| 541 ASSERT(*cursor == '\\'); |
| 542 if (++cursor >= end) return NULL; |
| 543 uc32 c = *cursor; |
| 544 if (++cursor > end) return NULL; |
| 545 // Skip escaped newlines. |
| 546 if (unicode_cache_->IsLineTerminator(c)) { |
| 547 uc32 peek = *cursor; |
| 548 // Allow CR+LF newlines in multiline string literals. |
| 549 if (IsCarriageReturn(c) && IsLineFeed(peek)) cursor++; |
| 550 // Allow LF+CR newlines in multiline string literals. |
| 551 if (IsLineFeed(c) && IsCarriageReturn(peek)) cursor++; |
| 552 return cursor; |
| 553 } |
| 554 |
| 555 switch (c) { |
| 556 case '\'': // fall through |
| 557 case '"' : // fall through |
| 558 case '\\': break; |
| 559 case 'b' : c = '\b'; break; |
| 560 case 'f' : c = '\f'; break; |
| 561 case 'n' : c = '\n'; break; |
| 562 case 'r' : c = '\r'; break; |
| 563 case 't' : c = '\t'; break; |
| 564 case 'u' : { |
| 565 if (end > cursor + 4) return NULL; |
| 566 cursor = ScanHexNumber(cursor, cursor + 4, &c); |
| 567 if (cursor == NULL) return NULL; |
| 568 break; |
| 569 } |
| 570 case 'v' : c = '\v'; break; |
| 571 case 'x' : { |
| 572 if (end > cursor + 2) return NULL ; |
| 573 cursor = ScanHexNumber(cursor, cursor + 2, &c); |
| 574 if (cursor == NULL) return NULL; |
| 575 break; |
| 576 } |
| 577 case '0' : // fall through |
| 578 case '1' : // fall through |
| 579 case '2' : // fall through |
| 580 case '3' : // fall through |
| 581 case '4' : // fall through |
| 582 case '5' : // fall through |
| 583 case '6' : // fall through |
| 584 case '7' : |
| 585 if (end > cursor + 2) end = cursor + 2; |
| 586 cursor = ScanOctalEscape(cursor, end, &c); break; |
| 587 } |
| 588 |
| 589 // According to ECMA-262, section 7.8.4, characters not covered by the |
| 590 // above cases should be illegal, but they are commonly handled as |
| 591 // non-escaped characters by JS VMs. |
| 592 literal->AddChar(c); |
| 593 return cursor; |
| 594 } |
| 595 |
| 596 |
| 398 } } | 597 } } |
| 399 | 598 |
| 400 #endif // V8_LEXER_EXPERIMENTAL_SCANNER_H | 599 #endif // V8_LEXER_EXPERIMENTAL_SCANNER_H |
| OLD | NEW |