| OLD | NEW |
| 1 // Copyright 2013 the V8 project authors. All rights reserved. | 1 // Copyright 2013 the V8 project authors. All rights reserved. |
| 2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
| 3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
| 4 // met: | 4 // met: |
| 5 // | 5 // |
| 6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
| 7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
| 8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
| 9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
| 10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
| (...skipping 39 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 50 LexerSet lexers_; | 50 LexerSet lexers_; |
| 51 }; | 51 }; |
| 52 | 52 |
| 53 | 53 |
| 54 class LexerBase { | 54 class LexerBase { |
| 55 public: | 55 public: |
| 56 struct Location { | 56 struct Location { |
| 57 Location(int b, int e) : beg_pos(b), end_pos(e) { } | 57 Location(int b, int e) : beg_pos(b), end_pos(e) { } |
| 58 Location() : beg_pos(0), end_pos(0) { } | 58 Location() : beg_pos(0), end_pos(0) { } |
| 59 | 59 |
| 60 bool IsValid() const { | 60 bool IsValid() const { return beg_pos >= 0 && end_pos >= beg_pos; } |
| 61 return beg_pos >= 0 && end_pos >= beg_pos; | |
| 62 } | |
| 63 | |
| 64 static Location invalid() { return Location(-1, -1); } | 61 static Location invalid() { return Location(-1, -1); } |
| 65 | 62 |
| 66 int beg_pos; | 63 int beg_pos; |
| 67 int end_pos; | 64 int end_pos; |
| 68 }; | 65 }; |
| 69 | 66 |
| 70 explicit LexerBase(UnicodeCache* unicode_cache); | 67 explicit LexerBase(UnicodeCache* unicode_cache); |
| 71 | 68 |
| 72 virtual ~LexerBase(); | 69 virtual ~LexerBase(); |
| 73 | 70 |
| (...skipping 39 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 113 | 110 |
| 114 // Returns true if there was a line terminator before the peek'ed token, | 111 // Returns true if there was a line terminator before the peek'ed token, |
| 115 // possibly inside a multi-line comment. | 112 // possibly inside a multi-line comment. |
| 116 bool HasAnyLineTerminatorBeforeNext() const { | 113 bool HasAnyLineTerminatorBeforeNext() const { |
| 117 return has_line_terminator_before_next_ || | 114 return has_line_terminator_before_next_ || |
| 118 has_multiline_comment_before_next_; | 115 has_multiline_comment_before_next_; |
| 119 } | 116 } |
| 120 | 117 |
| 121 Vector<const uint8_t> literal_one_byte_string() { | 118 Vector<const uint8_t> literal_one_byte_string() { |
| 122 EnsureCurrentLiteralIsValid(); | 119 EnsureCurrentLiteralIsValid(); |
| 123 return current_literal_->one_byte_string; | 120 return current_literal_->one_byte_string(); |
| 124 } | 121 } |
| 125 | 122 |
| 126 Vector<const uint16_t> literal_two_byte_string() { | 123 Vector<const uint16_t> literal_two_byte_string() { |
| 127 EnsureCurrentLiteralIsValid(); | 124 EnsureCurrentLiteralIsValid(); |
| 128 return current_literal_->two_byte_string; | 125 return current_literal_->two_byte_string(); |
| 129 } | 126 } |
| 130 | 127 |
| 131 int literal_length() { | 128 int literal_length() { |
| 132 EnsureCurrentLiteralIsValid(); | 129 EnsureCurrentLiteralIsValid(); |
| 133 return current_literal_->length; | 130 return current_literal_->length; |
| 134 } | 131 } |
| 135 | 132 |
| 136 bool is_literal_one_byte() { | 133 bool is_literal_one_byte() { |
| 137 EnsureCurrentLiteralIsValid(); | 134 EnsureCurrentLiteralIsValid(); |
| 138 return current_literal_->is_one_byte; | 135 return current_literal_->is_one_byte(); |
| 139 } | 136 } |
| 140 | 137 |
| 141 bool is_literal_contextual_keyword(Vector<const uint8_t> keyword) { | 138 bool is_literal_contextual_keyword(Vector<const uint8_t> keyword) { |
| 142 if (!is_literal_one_byte()) return false; | 139 if (!is_literal_one_byte()) return false; |
| 143 Vector<const uint8_t> literal = literal_one_byte_string(); | 140 Vector<const uint8_t> literal = literal_one_byte_string(); |
| 144 return literal.length() == keyword.length() && | 141 return literal.length() == keyword.length() && |
| 145 (memcmp(literal.start(), keyword.start(), literal.length()) == 0); | 142 (memcmp(literal.start(), keyword.start(), literal.length()) == 0); |
| 146 } | 143 } |
| 147 | 144 |
| 148 bool literal_contains_escapes() const { | 145 bool literal_contains_escapes() const { |
| 149 return current_.has_escapes; | 146 return current_.has_escapes; |
| 150 } | 147 } |
| 151 | 148 |
| 152 Vector<const uint8_t> next_literal_one_byte_string() { | 149 Vector<const uint8_t> next_literal_one_byte_string() { |
| 153 EnsureNextLiteralIsValid(); | 150 EnsureNextLiteralIsValid(); |
| 154 return next_literal_->one_byte_string; | 151 return next_literal_->one_byte_string(); |
| 155 } | 152 } |
| 156 | 153 |
| 157 Vector<const uint16_t> next_literal_two_byte_string() { | 154 Vector<const uint16_t> next_literal_two_byte_string() { |
| 158 EnsureNextLiteralIsValid(); | 155 EnsureNextLiteralIsValid(); |
| 159 return next_literal_->two_byte_string; | 156 return next_literal_->two_byte_string(); |
| 160 } | 157 } |
| 161 | 158 |
| 162 int next_literal_length() { | 159 int next_literal_length() { |
| 163 EnsureNextLiteralIsValid(); | 160 EnsureNextLiteralIsValid(); |
| 164 return next_literal_->length; | 161 return next_literal_->length; |
| 165 } | 162 } |
| 166 | 163 |
| 167 bool is_next_literal_one_byte() { | 164 bool is_next_literal_one_byte() { |
| 168 EnsureNextLiteralIsValid(); | 165 EnsureNextLiteralIsValid(); |
| 169 return next_literal_->is_one_byte; | 166 return next_literal_->is_one_byte(); |
| 170 } | 167 } |
| 171 | 168 |
| 172 bool is_next_contextual_keyword(Vector<const uint8_t> keyword) { | 169 bool is_next_contextual_keyword(Vector<const uint8_t> keyword) { |
| 173 if (!is_next_literal_one_byte()) return false; | 170 if (!is_next_literal_one_byte()) return false; |
| 174 Vector<const uint8_t> literal = next_literal_one_byte_string(); | 171 Vector<const uint8_t> literal = next_literal_one_byte_string(); |
| 175 return literal.length() == keyword.length() && | 172 return literal.length() == keyword.length() && |
| 176 (memcmp(literal.start(), keyword.start(), literal.length()) == 0); | 173 (memcmp(literal.start(), keyword.start(), literal.length()) == 0); |
| 177 } | 174 } |
| 178 | 175 |
| 179 bool HarmonyScoping() const { | 176 bool HarmonyScoping() const { |
| (...skipping 15 matching lines...) Expand all Loading... |
| 195 bool HarmonyNumericLiterals() const { | 192 bool HarmonyNumericLiterals() const { |
| 196 return harmony_numeric_literals_; | 193 return harmony_numeric_literals_; |
| 197 } | 194 } |
| 198 | 195 |
| 199 void SetHarmonyNumericLiterals(bool numeric_literals) { | 196 void SetHarmonyNumericLiterals(bool numeric_literals) { |
| 200 harmony_numeric_literals_ = numeric_literals; | 197 harmony_numeric_literals_ = numeric_literals; |
| 201 } | 198 } |
| 202 | 199 |
| 203 UnicodeCache* unicode_cache() { return unicode_cache_; } | 200 UnicodeCache* unicode_cache() { return unicode_cache_; } |
| 204 | 201 |
| 202 class LiteralDesc { |
| 203 public: |
| 204 LiteralDesc() |
| 205 : beg_pos(-1), |
| 206 offset(0), |
| 207 length(0), |
| 208 is_one_byte_(false), |
| 209 is_in_buffer_(false), |
| 210 is_one_byte_string_owned_(false) // TODO(dcarney): move to buffer |
| 211 { } |
| 212 |
| 213 ~LiteralDesc() { |
| 214 if (is_one_byte_string_owned_) { |
| 215 one_byte_string_.Dispose(); |
| 216 } |
| 217 } |
| 218 |
| 219 inline bool is_one_byte() { return is_one_byte_; } |
| 220 inline Vector<const uint8_t> one_byte_string() { |
| 221 ASSERT(is_one_byte_); |
| 222 return one_byte_string_; |
| 223 } |
| 224 inline Vector<const uint16_t> two_byte_string() { |
| 225 ASSERT(!is_one_byte_); |
| 226 return two_byte_string_; |
| 227 } |
| 228 |
| 229 inline bool Valid(int pos) { return beg_pos == pos; } |
| 230 inline void Invalidate() { if (is_in_buffer_) beg_pos = -1; } |
| 231 |
| 232 // TODO(dcarney): make private as well. |
| 233 int beg_pos; |
| 234 int offset; |
| 235 int length; |
| 236 LiteralBuffer buffer; |
| 237 |
| 238 void SetOneByteString(Vector<const uint8_t> string, bool owned); |
| 239 void SetTwoByteString(Vector<const uint16_t> string); |
| 240 void SetStringFromLiteralBuffer(); |
| 241 |
| 242 private: |
| 243 bool is_one_byte_; |
| 244 bool is_in_buffer_; |
| 245 bool is_one_byte_string_owned_; |
| 246 Vector<const uint8_t> one_byte_string_; |
| 247 Vector<const uint16_t> two_byte_string_; |
| 248 |
| 249 DISALLOW_COPY_AND_ASSIGN(LiteralDesc); |
| 250 }; |
| 251 |
| 205 protected: | 252 protected: |
| 206 struct TokenDesc { | 253 struct TokenDesc { |
| 207 Token::Value token; | |
| 208 int beg_pos; | 254 int beg_pos; |
| 209 int end_pos; | 255 int end_pos; |
| 256 Token::Value token; |
| 210 bool has_escapes; | 257 bool has_escapes; |
| 211 bool is_onebyte; | 258 bool is_onebyte; |
| 212 }; | 259 }; |
| 213 | 260 |
| 214 struct LiteralDesc { | |
| 215 int beg_pos; | |
| 216 bool is_one_byte; | |
| 217 bool is_in_buffer; | |
| 218 int offset; | |
| 219 int length; | |
| 220 Vector<const uint8_t> one_byte_string; | |
| 221 Vector<const uint16_t> two_byte_string; | |
| 222 LiteralBuffer buffer; | |
| 223 LiteralDesc() : beg_pos(-1), is_one_byte(false), is_in_buffer(false), | |
| 224 offset(0), length(0) { } | |
| 225 bool Valid(int pos) { return beg_pos == pos; } | |
| 226 }; | |
| 227 | |
| 228 virtual void Scan() = 0; | 261 virtual void Scan() = 0; |
| 229 | |
| 230 virtual void UpdateBufferBasedOnHandle() = 0; | 262 virtual void UpdateBufferBasedOnHandle() = 0; |
| 231 virtual bool FillLiteral(const TokenDesc& token, LiteralDesc* literal) = 0; | 263 virtual bool FillLiteral(const TokenDesc& token, LiteralDesc* literal) = 0; |
| 232 virtual Handle<String> InternalizeLiteral(LiteralDesc* literal) = 0; | 264 virtual Handle<String> InternalizeLiteral(LiteralDesc* literal) = 0; |
| 233 virtual Handle<String> AllocateLiteral(LiteralDesc* literal, | 265 virtual Handle<String> AllocateLiteral(LiteralDesc* literal, |
| 234 PretenureFlag tenured) = 0; | 266 PretenureFlag tenured) = 0; |
| 235 | 267 |
| 236 void ResetLiterals() { | |
| 237 if (!current_literal_->is_in_buffer) current_literal_->beg_pos = -1; | |
| 238 if (!next_literal_->is_in_buffer) next_literal_->beg_pos = -1; | |
| 239 } | |
| 240 | |
| 241 void EnsureCurrentLiteralIsValid() { | 268 void EnsureCurrentLiteralIsValid() { |
| 242 if (!current_literal_->Valid(current_.beg_pos)) { | 269 if (!current_literal_->Valid(current_.beg_pos)) { |
| 243 FillLiteral(current_, current_literal_); | 270 FillLiteral(current_, current_literal_); |
| 244 } | 271 } |
| 245 } | 272 } |
| 246 | 273 |
| 247 void EnsureNextLiteralIsValid() { | 274 void EnsureNextLiteralIsValid() { |
| 248 if (!next_literal_->Valid(next_.beg_pos)) { | 275 if (!next_literal_->Valid(next_.beg_pos)) { |
| 249 FillLiteral(next_, next_literal_); | 276 FillLiteral(next_, next_literal_); |
| 250 } | 277 } |
| 251 } | 278 } |
| 252 | 279 |
| 253 UnicodeCache* unicode_cache_; | 280 UnicodeCache* unicode_cache_; |
| 281 LiteralDesc* current_literal_; |
| 282 LiteralDesc* next_literal_; |
| 283 LiteralDesc literals_[2]; |
| 254 | 284 |
| 285 TokenDesc current_; // desc for current token (as returned by Next()) |
| 286 TokenDesc next_; // desc for next token (one token look-ahead) |
| 287 |
| 288 // TODO(dcarney): encode flags in uint8_t |
| 255 bool has_line_terminator_before_next_; | 289 bool has_line_terminator_before_next_; |
| 256 // Whether there is a multiline comment *with a line break* before the next | 290 // Whether there is a multiline comment *with a line break* before the next |
| 257 // token. | 291 // token. |
| 258 bool has_multiline_comment_before_next_; | 292 bool has_multiline_comment_before_next_; |
| 259 | |
| 260 TokenDesc current_; // desc for current token (as returned by Next()) | |
| 261 TokenDesc next_; // desc for next token (one token look-ahead) | |
| 262 | |
| 263 LiteralDesc* current_literal_; | |
| 264 LiteralDesc* next_literal_; | |
| 265 LiteralDesc literals_[2]; | |
| 266 | |
| 267 bool harmony_numeric_literals_; | 293 bool harmony_numeric_literals_; |
| 268 bool harmony_modules_; | 294 bool harmony_modules_; |
| 269 bool harmony_scoping_; | 295 bool harmony_scoping_; |
| 270 | 296 |
| 271 friend class Scanner; | 297 friend class Scanner; |
| 272 friend class LexerGCHandler; | 298 friend class LexerGCHandler; |
| 273 }; | 299 }; |
| 274 | 300 |
| 275 | 301 |
| 276 template<typename Char> | 302 template<typename Char> |
| 277 class Lexer : public LexerBase { | 303 class Lexer : public LexerBase { |
| 278 public: | 304 public: |
| 279 Lexer(UnicodeCache* unicode_cache, | 305 Lexer(UnicodeCache* unicode_cache, |
| 280 Handle<String> source, | 306 Handle<String> source, |
| 281 int start_position, | 307 int start_position, |
| 282 int end_position); | 308 int end_position); |
| 283 Lexer(UnicodeCache* unicode_cache, const Char* source_ptr, int length); | 309 Lexer(UnicodeCache* unicode_cache, const Char* source_ptr, int length); |
| 284 virtual ~Lexer(); | 310 virtual ~Lexer(); |
| 285 | 311 |
| 286 virtual void SeekForward(int pos); | 312 virtual void SeekForward(int pos); |
| 287 virtual bool ScanRegExpPattern(bool seen_equal); | 313 virtual bool ScanRegExpPattern(bool seen_equal); |
| 288 virtual bool ScanRegExpFlags(); | 314 virtual bool ScanRegExpFlags(); |
| 289 virtual Location octal_position() const; | 315 virtual Location octal_position() const; |
| 290 virtual void clear_octal_position() { last_octal_end_ = NULL; } | 316 virtual void clear_octal_position() { last_octal_end_ = NULL; } |
| 291 | 317 |
| 292 protected: | 318 protected: |
| 293 virtual void Scan(); | 319 virtual void Scan(); |
| 294 | 320 |
| 321 private: |
| 322 uc32 ScanHexNumber(int length); |
| 323 |
| 324 bool ScanLiteralUnicodeEscape(); |
| 325 |
| 295 const Char* GetNewBufferBasedOnHandle() const; | 326 const Char* GetNewBufferBasedOnHandle() const; |
| 296 virtual void UpdateBufferBasedOnHandle(); | 327 virtual void UpdateBufferBasedOnHandle(); |
| 297 | 328 |
| 298 virtual bool FillLiteral(const TokenDesc& token, LiteralDesc* literal); | 329 virtual bool FillLiteral(const TokenDesc& token, LiteralDesc* literal); |
| 299 virtual Handle<String> InternalizeLiteral(LiteralDesc* literal); | 330 virtual Handle<String> InternalizeLiteral(LiteralDesc* literal); |
| 300 virtual Handle<String> AllocateLiteral(LiteralDesc* literal, | 331 virtual Handle<String> AllocateLiteral(LiteralDesc* literal, |
| 301 PretenureFlag tenured); | 332 PretenureFlag tenured); |
| 302 | 333 |
| 303 private: | 334 // Helper function for FillLiteral. |
| 304 uc32 ScanHexNumber(int length); | 335 template<bool is_one_byte> |
| 305 | 336 static void SetLiteral( |
| 306 bool ScanLiteralUnicodeEscape(); | 337 const Char* start, const Char* end, LiteralDesc* literal); |
| 307 | |
| 308 const Char* ScanHexNumber(const Char* start, | |
| 309 const Char* end, | |
| 310 uc32* result); | |
| 311 const Char* ScanOctalEscape(const Char* start, | |
| 312 const Char* end, | |
| 313 uc32* result); | |
| 314 const Char* ScanIdentifierUnicodeEscape(const Char* start, | |
| 315 const Char* end, | |
| 316 uc32* result); | |
| 317 const Char* ScanEscape(const Char* start, | |
| 318 const Char* end, | |
| 319 LiteralBuffer* literal); | |
| 320 | |
| 321 // Returns true if the literal of the token can be represented as a | |
| 322 // substring of the source. | |
| 323 bool IsSubstringOfSource(const TokenDesc& token); | |
| 324 | 338 |
| 325 bool CopyToLiteralBuffer(const Char* start, | 339 bool CopyToLiteralBuffer(const Char* start, |
| 326 const Char* end, | 340 const Char* end, |
| 327 const TokenDesc& token, | 341 const TokenDesc& token, |
| 328 LiteralDesc* literal); | 342 LiteralDesc* literal); |
| 329 | 343 |
| 330 // One of source_handle_ or source_ptr_ is set. | 344 // One of source_handle_ or source_ptr_ is set. |
| 331 // If source_ptr_ is set, isolate_ is 0 and no isolate accesses are allowed. | 345 // If source_ptr_ is set, isolate_ is 0 and no isolate accesses are allowed. |
| 332 Isolate* isolate_; | 346 Isolate* isolate_; |
| 333 const Handle<String> source_handle_; | 347 const Handle<String> source_handle_; |
| 334 const Char* const source_ptr_; | 348 const Char* const source_ptr_; |
| 335 const int start_position_; | |
| 336 const int end_position_; | 349 const int end_position_; |
| 337 // Stream variables. | 350 // Stream variables. |
| 338 const Char* buffer_; | 351 const Char* buffer_; |
| 339 const Char* buffer_end_; | 352 const Char* buffer_end_; |
| 340 const Char* start_; | 353 const Char* start_; |
| 341 const Char* cursor_; | 354 const Char* cursor_; |
| 342 // Where we have seen the last octal number or an octal escape inside a | 355 // Where we have seen the last octal number or an octal escape inside a |
| 343 // string. Used by octal_position(). | 356 // string. Used by octal_position(). |
| 344 const Char* last_octal_end_; | 357 const Char* last_octal_end_; |
| 345 }; | 358 }; |
| (...skipping 126 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 472 bool harmony_scoping_; | 485 bool harmony_scoping_; |
| 473 }; | 486 }; |
| 474 | 487 |
| 475 | 488 |
| 476 #endif | 489 #endif |
| 477 | 490 |
| 478 | 491 |
| 479 } } | 492 } } |
| 480 | 493 |
| 481 #endif // V8_LEXER_EXPERIMENTAL_SCANNER_H | 494 #endif // V8_LEXER_EXPERIMENTAL_SCANNER_H |
| OLD | NEW |