OLD | NEW |
---|---|
1 // Copyright 2011 the V8 project authors. All rights reserved. | 1 // Copyright 2011 the V8 project authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 // Features shared by parsing and pre-parsing scanners. | 5 // Features shared by parsing and pre-parsing scanners. |
6 | 6 |
7 #ifndef V8_PARSING_SCANNER_H_ | 7 #ifndef V8_PARSING_SCANNER_H_ |
8 #define V8_PARSING_SCANNER_H_ | 8 #define V8_PARSING_SCANNER_H_ |
9 | 9 |
10 #include "src/allocation.h" | 10 #include "src/allocation.h" |
(...skipping 130 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
141 static const int kBufferSize = 100; | 141 static const int kBufferSize = 100; |
142 | 142 |
143 UnicodeCache* unicode_constants_; | 143 UnicodeCache* unicode_constants_; |
144 // Backing store used to store strings used as hashmap keys. | 144 // Backing store used to store strings used as hashmap keys. |
145 SequenceCollector<unsigned char> backing_store_; | 145 SequenceCollector<unsigned char> backing_store_; |
146 base::HashMap map_; | 146 base::HashMap map_; |
147 // Buffer used for string->number->canonical string conversions. | 147 // Buffer used for string->number->canonical string conversions. |
148 char number_buffer_[kBufferSize]; | 148 char number_buffer_[kBufferSize]; |
149 }; | 149 }; |
150 | 150 |
151 // ---------------------------------------------------------------------------- | |
152 // LiteralBuffer - Collector of chars of literals. | |
153 | |
154 const int kMaxAscii = 127; | |
155 | |
156 class LiteralBuffer { | |
157 public: | |
158 LiteralBuffer() : is_one_byte_(true), position_(0), backing_store_() { } | |
159 | |
160 ~LiteralBuffer() { backing_store_.Dispose(); } | |
161 | |
162 INLINE(void AddChar(char code_unit)) { | |
163 if (position_ >= backing_store_.length()) ExpandBuffer(); | |
164 DCHECK(is_one_byte_); | |
165 DCHECK(IsValidAscii(code_unit)); | |
166 backing_store_[position_] = static_cast<byte>(code_unit); | |
167 position_ += kOneByteSize; | |
168 return; | |
169 } | |
170 | |
171 INLINE(void AddChar(uc32 code_unit)) { | |
172 if (position_ >= backing_store_.length()) ExpandBuffer(); | |
173 if (is_one_byte_) { | |
174 if (code_unit <= unibrow::Latin1::kMaxChar) { | |
175 backing_store_[position_] = static_cast<byte>(code_unit); | |
176 position_ += kOneByteSize; | |
177 return; | |
178 } | |
179 ConvertToTwoByte(); | |
180 } | |
181 if (code_unit <= unibrow::Utf16::kMaxNonSurrogateCharCode) { | |
182 *reinterpret_cast<uint16_t*>(&backing_store_[position_]) = code_unit; | |
183 position_ += kUC16Size; | |
184 } else { | |
185 *reinterpret_cast<uint16_t*>(&backing_store_[position_]) = | |
186 unibrow::Utf16::LeadSurrogate(code_unit); | |
187 position_ += kUC16Size; | |
188 if (position_ >= backing_store_.length()) ExpandBuffer(); | |
189 *reinterpret_cast<uint16_t*>(&backing_store_[position_]) = | |
190 unibrow::Utf16::TrailSurrogate(code_unit); | |
191 position_ += kUC16Size; | |
192 } | |
193 } | |
194 | |
195 bool is_one_byte() const { return is_one_byte_; } | |
196 | |
197 bool is_contextual_keyword(Vector<const char> keyword) const { | |
198 return is_one_byte() && keyword.length() == position_ && | |
199 (memcmp(keyword.start(), backing_store_.start(), position_) == 0); | |
200 } | |
201 | |
202 Vector<const uint16_t> two_byte_literal() const { | |
203 DCHECK(!is_one_byte_); | |
204 DCHECK((position_ & 0x1) == 0); | |
205 return Vector<const uint16_t>( | |
206 reinterpret_cast<const uint16_t*>(backing_store_.start()), | |
207 position_ >> 1); | |
208 } | |
209 | |
210 Vector<const uint8_t> one_byte_literal() const { | |
211 DCHECK(is_one_byte_); | |
212 return Vector<const uint8_t>( | |
213 reinterpret_cast<const uint8_t*>(backing_store_.start()), | |
214 position_); | |
215 } | |
216 | |
217 int length() const { | |
218 return is_one_byte_ ? position_ : (position_ >> 1); | |
219 } | |
220 | |
221 void ReduceLength(int delta) { | |
222 position_ -= delta * (is_one_byte_ ? kOneByteSize : kUC16Size); | |
223 } | |
224 | |
225 void Reset() { | |
226 position_ = 0; | |
227 is_one_byte_ = true; | |
228 } | |
229 | |
230 Handle<String> Internalize(Isolate* isolate) const; | |
231 | |
232 void CopyFrom(const LiteralBuffer* other) { | |
233 if (other == nullptr) { | |
234 Reset(); | |
235 } else { | |
236 is_one_byte_ = other->is_one_byte_; | |
237 position_ = other->position_; | |
238 if (position_ < backing_store_.length()) { | |
239 std::copy(other->backing_store_.begin(), | |
240 other->backing_store_.begin() + position_, | |
241 backing_store_.begin()); | |
242 } else { | |
243 backing_store_.Dispose(); | |
244 backing_store_ = other->backing_store_.Clone(); | |
245 } | |
246 } | |
247 } | |
248 | |
249 private: | |
250 static const int kInitialCapacity = 16; | |
251 static const int kGrowthFactory = 4; | |
252 static const int kMinConversionSlack = 256; | |
253 static const int kMaxGrowth = 1 * MB; | |
254 | |
255 inline bool IsValidAscii(char code_unit) { | |
256 // Control characters and printable characters span the range of | |
257 // valid ASCII characters (0-127). Chars are unsigned on some | |
258 // platforms which causes compiler warnings if the validity check | |
259 // tests the lower bound >= 0 as it's always true. | |
260 return iscntrl(code_unit) || isprint(code_unit); | |
261 } | |
262 | |
263 inline int NewCapacity(int min_capacity) { | |
264 int capacity = Max(min_capacity, backing_store_.length()); | |
265 int new_capacity = Min(capacity * kGrowthFactory, capacity + kMaxGrowth); | |
266 return new_capacity; | |
267 } | |
268 | |
269 void ExpandBuffer() { | |
270 Vector<byte> new_store = Vector<byte>::New(NewCapacity(kInitialCapacity)); | |
271 MemCopy(new_store.start(), backing_store_.start(), position_); | |
272 backing_store_.Dispose(); | |
273 backing_store_ = new_store; | |
274 } | |
275 | |
276 void ConvertToTwoByte() { | |
277 DCHECK(is_one_byte_); | |
278 Vector<byte> new_store; | |
279 int new_content_size = position_ * kUC16Size; | |
280 if (new_content_size >= backing_store_.length()) { | |
281 // Ensure room for all currently read code units as UC16 as well | |
282 // as the code unit about to be stored. | |
283 new_store = Vector<byte>::New(NewCapacity(new_content_size)); | |
284 } else { | |
285 new_store = backing_store_; | |
286 } | |
287 uint8_t* src = backing_store_.start(); | |
288 uint16_t* dst = reinterpret_cast<uint16_t*>(new_store.start()); | |
289 for (int i = position_ - 1; i >= 0; i--) { | |
290 dst[i] = src[i]; | |
291 } | |
292 if (new_store.start() != backing_store_.start()) { | |
293 backing_store_.Dispose(); | |
294 backing_store_ = new_store; | |
295 } | |
296 position_ = new_content_size; | |
297 is_one_byte_ = false; | |
298 } | |
299 | |
300 bool is_one_byte_; | |
301 int position_; | |
302 Vector<byte> backing_store_; | |
303 | |
304 DISALLOW_COPY_AND_ASSIGN(LiteralBuffer); | |
305 }; | |
306 | |
307 | 151 |
308 // ---------------------------------------------------------------------------- | 152 // ---------------------------------------------------------------------------- |
309 // JavaScript Scanner. | 153 // JavaScript Scanner. |
310 | 154 |
311 class Scanner { | 155 class Scanner { |
312 public: | 156 public: |
313 // Scoped helper for literal recording. Automatically drops the literal | |
314 // if aborting the scanning before it's complete. | |
315 class LiteralScope { | |
316 public: | |
317 explicit LiteralScope(Scanner* self) : scanner_(self), complete_(false) { | |
318 scanner_->StartLiteral(); | |
319 } | |
320 ~LiteralScope() { | |
321 if (!complete_) scanner_->DropLiteral(); | |
322 } | |
323 void Complete() { | |
324 complete_ = true; | |
325 } | |
326 | |
327 private: | |
328 Scanner* scanner_; | |
329 bool complete_; | |
330 }; | |
331 | |
332 // Scoped helper for a re-settable bookmark. | 157 // Scoped helper for a re-settable bookmark. |
333 class BookmarkScope { | 158 class BookmarkScope { |
334 public: | 159 public: |
335 explicit BookmarkScope(Scanner* scanner) : scanner_(scanner) { | 160 explicit BookmarkScope(Scanner* scanner) : scanner_(scanner) { |
336 DCHECK_NOT_NULL(scanner_); | 161 DCHECK_NOT_NULL(scanner_); |
337 } | 162 } |
338 ~BookmarkScope() { scanner_->DropBookmark(); } | 163 ~BookmarkScope() { scanner_->DropBookmark(); } |
339 | 164 |
340 bool Set() { return scanner_->SetBookmark(); } | 165 bool Set() { return scanner_->SetBookmark(); } |
341 void Reset() { scanner_->ResetToBookmark(); } | 166 void Reset() { scanner_->ResetToBookmark(); } |
(...skipping 14 matching lines...) Expand all Loading... | |
356 bool IsValid() const { | 181 bool IsValid() const { |
357 return beg_pos >= 0 && end_pos >= beg_pos; | 182 return beg_pos >= 0 && end_pos >= beg_pos; |
358 } | 183 } |
359 | 184 |
360 static Location invalid() { return Location(-1, -1); } | 185 static Location invalid() { return Location(-1, -1); } |
361 | 186 |
362 int beg_pos; | 187 int beg_pos; |
363 int end_pos; | 188 int end_pos; |
364 }; | 189 }; |
365 | 190 |
191 private: | |
192 class LiteralScope; // Private helper class to start/end scanning literals. | |
193 class LiteralBuffer; // Private helper class to hold scanned literals. | |
194 | |
195 public: | |
marja
2016/08/05 07:05:46
We had some discussions about this part and... wha
vogelheim
2016/08/09 11:47:27
We concluded that I shouldn't do this. Will update
vogelheim
2016/08/09 11:57:24
Done. (As in: Removed this by reordering the decla
| |
366 // -1 is outside of the range of any real source code. | 196 // -1 is outside of the range of any real source code. |
367 static const int kNoOctalLocation = -1; | 197 static const int kNoOctalLocation = -1; |
368 | 198 |
369 explicit Scanner(UnicodeCache* scanner_contants); | 199 explicit Scanner(UnicodeCache* scanner_contants); |
370 | 200 |
371 void Initialize(Utf16CharacterStream* source); | 201 void Initialize(Utf16CharacterStream* source); |
372 | 202 |
373 // Returns the next token and advances input. | 203 // Returns the next token and advances input. |
374 Token::Value Next(); | 204 Token::Value Next(); |
375 // Returns the token following peek() | 205 // Returns the token following peek() |
(...skipping 101 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
477 // Scans the input as a regular expression pattern, previous | 307 // Scans the input as a regular expression pattern, previous |
478 // character(s) must be /(=). Returns true if a pattern is scanned. | 308 // character(s) must be /(=). Returns true if a pattern is scanned. |
479 bool ScanRegExpPattern(bool seen_equal); | 309 bool ScanRegExpPattern(bool seen_equal); |
480 // Scans the input as regular expression flags. Returns the flags on success. | 310 // Scans the input as regular expression flags. Returns the flags on success. |
481 Maybe<RegExp::Flags> ScanRegExpFlags(); | 311 Maybe<RegExp::Flags> ScanRegExpFlags(); |
482 | 312 |
483 // Scans the input as a template literal | 313 // Scans the input as a template literal |
484 Token::Value ScanTemplateStart(); | 314 Token::Value ScanTemplateStart(); |
485 Token::Value ScanTemplateContinuation(); | 315 Token::Value ScanTemplateContinuation(); |
486 | 316 |
487 const LiteralBuffer* source_url() const { return &source_url_; } | 317 Handle<String> SourceUrl(Isolate* isolate) const { |
488 const LiteralBuffer* source_mapping_url() const { | 318 Handle<String> tmp; |
489 return &source_mapping_url_; | 319 if (source_url_.length() > 0) tmp = source_url_.Internalize(isolate); |
320 return tmp; | |
321 } | |
322 | |
323 Handle<String> SourceMappingUrl(Isolate* isolate) const { | |
324 Handle<String> tmp; | |
325 if (source_mapping_url_.length() > 0) | |
326 tmp = source_mapping_url_.Internalize(isolate); | |
327 return tmp; | |
490 } | 328 } |
491 | 329 |
492 bool IdentifierIsFutureStrictReserved(const AstRawString* string) const; | 330 bool IdentifierIsFutureStrictReserved(const AstRawString* string) const; |
493 | 331 |
494 bool FoundHtmlComment() const { return found_html_comment_; } | 332 bool FoundHtmlComment() const { return found_html_comment_; } |
495 | 333 |
496 #define DECLARE_ACCESSORS(name) \ | 334 #define DECLARE_ACCESSORS(name) \ |
497 inline bool allow_##name() const { return allow_##name##_; } \ | 335 inline bool allow_##name() const { return allow_##name##_; } \ |
498 inline void set_allow_##name(bool allow) { allow_##name##_ = allow; } | 336 inline void set_allow_##name(bool allow) { allow_##name##_ = allow; } |
499 DECLARE_ACCESSORS(harmony_exponentiation_operator) | 337 DECLARE_ACCESSORS(harmony_exponentiation_operator) |
500 #undef ACCESSOR | 338 #undef ACCESSOR |
501 | 339 |
502 private: | 340 private: |
503 // The current and look-ahead token. | 341 // The current and look-ahead token. |
504 struct TokenDesc { | 342 struct TokenDesc { |
505 Token::Value token; | 343 Token::Value token; |
506 Location location; | 344 Location location; |
507 LiteralBuffer* literal_chars; | 345 LiteralBuffer* literal_chars; |
508 LiteralBuffer* raw_literal_chars; | 346 LiteralBuffer* raw_literal_chars; |
509 int smi_value_; | 347 int smi_value_; |
510 }; | 348 }; |
511 | 349 |
350 // Scoped helper for literal recording. Automatically drops the literal | |
351 // if aborting the scanning before it's complete. | |
352 class LiteralScope { | |
353 public: | |
354 explicit LiteralScope(Scanner* self) : scanner_(self), complete_(false) { | |
355 scanner_->StartLiteral(); | |
356 } | |
357 ~LiteralScope() { | |
358 if (!complete_) scanner_->DropLiteral(); | |
359 } | |
360 void Complete() { complete_ = true; } | |
361 | |
362 private: | |
363 Scanner* scanner_; | |
364 bool complete_; | |
365 }; | |
366 | |
367 // LiteralBuffer - Collector of chars of literals. | |
368 class LiteralBuffer { | |
369 public: | |
370 LiteralBuffer() : is_one_byte_(true), position_(0), backing_store_() {} | |
371 | |
372 ~LiteralBuffer() { backing_store_.Dispose(); } | |
373 | |
374 INLINE(void AddChar(char code_unit)) { | |
375 if (position_ >= backing_store_.length()) ExpandBuffer(); | |
376 DCHECK(is_one_byte_); | |
377 DCHECK(IsValidAscii(code_unit)); | |
378 backing_store_[position_] = static_cast<byte>(code_unit); | |
379 position_ += kOneByteSize; | |
380 return; | |
381 } | |
382 | |
383 INLINE(void AddChar(uc32 code_unit)) { | |
384 if (position_ >= backing_store_.length()) ExpandBuffer(); | |
385 if (is_one_byte_) { | |
386 if (code_unit <= unibrow::Latin1::kMaxChar) { | |
387 backing_store_[position_] = static_cast<byte>(code_unit); | |
388 position_ += kOneByteSize; | |
389 return; | |
390 } | |
391 ConvertToTwoByte(); | |
392 } | |
393 if (code_unit <= unibrow::Utf16::kMaxNonSurrogateCharCode) { | |
394 *reinterpret_cast<uint16_t*>(&backing_store_[position_]) = code_unit; | |
395 position_ += kUC16Size; | |
396 } else { | |
397 *reinterpret_cast<uint16_t*>(&backing_store_[position_]) = | |
398 unibrow::Utf16::LeadSurrogate(code_unit); | |
399 position_ += kUC16Size; | |
400 if (position_ >= backing_store_.length()) ExpandBuffer(); | |
401 *reinterpret_cast<uint16_t*>(&backing_store_[position_]) = | |
402 unibrow::Utf16::TrailSurrogate(code_unit); | |
403 position_ += kUC16Size; | |
404 } | |
405 } | |
406 | |
407 bool is_one_byte() const { return is_one_byte_; } | |
408 | |
409 bool is_contextual_keyword(Vector<const char> keyword) const { | |
410 return is_one_byte() && keyword.length() == position_ && | |
411 (memcmp(keyword.start(), backing_store_.start(), position_) == 0); | |
412 } | |
413 | |
414 Vector<const uint16_t> two_byte_literal() const { | |
415 DCHECK(!is_one_byte_); | |
416 DCHECK((position_ & 0x1) == 0); | |
417 return Vector<const uint16_t>( | |
418 reinterpret_cast<const uint16_t*>(backing_store_.start()), | |
419 position_ >> 1); | |
420 } | |
421 | |
422 Vector<const uint8_t> one_byte_literal() const { | |
423 DCHECK(is_one_byte_); | |
424 return Vector<const uint8_t>( | |
425 reinterpret_cast<const uint8_t*>(backing_store_.start()), position_); | |
426 } | |
427 | |
428 int length() const { return is_one_byte_ ? position_ : (position_ >> 1); } | |
429 | |
430 void ReduceLength(int delta) { | |
431 position_ -= delta * (is_one_byte_ ? kOneByteSize : kUC16Size); | |
432 } | |
433 | |
434 void Reset() { | |
435 position_ = 0; | |
436 is_one_byte_ = true; | |
437 } | |
438 | |
439 Handle<String> Internalize(Isolate* isolate) const; | |
440 | |
441 void CopyFrom(const LiteralBuffer* other) { | |
442 if (other == nullptr) { | |
443 Reset(); | |
444 } else { | |
445 is_one_byte_ = other->is_one_byte_; | |
446 position_ = other->position_; | |
447 if (position_ < backing_store_.length()) { | |
448 std::copy(other->backing_store_.begin(), | |
449 other->backing_store_.begin() + position_, | |
450 backing_store_.begin()); | |
451 } else { | |
452 backing_store_.Dispose(); | |
453 backing_store_ = other->backing_store_.Clone(); | |
454 } | |
455 } | |
456 } | |
457 | |
458 private: | |
459 static const int kInitialCapacity = 16; | |
460 static const int kGrowthFactory = 4; | |
461 static const int kMinConversionSlack = 256; | |
462 static const int kMaxGrowth = 1 * MB; | |
463 | |
464 inline bool IsValidAscii(char code_unit) { | |
465 // Control characters and printable characters span the range of | |
466 // valid ASCII characters (0-127). Chars are unsigned on some | |
467 // platforms which causes compiler warnings if the validity check | |
468 // tests the lower bound >= 0 as it's always true. | |
469 return iscntrl(code_unit) || isprint(code_unit); | |
470 } | |
471 | |
472 inline int NewCapacity(int min_capacity) { | |
473 int capacity = Max(min_capacity, backing_store_.length()); | |
474 int new_capacity = Min(capacity * kGrowthFactory, capacity + kMaxGrowth); | |
475 return new_capacity; | |
476 } | |
477 | |
478 void ExpandBuffer() { | |
479 Vector<byte> new_store = Vector<byte>::New(NewCapacity(kInitialCapacity)); | |
480 MemCopy(new_store.start(), backing_store_.start(), position_); | |
481 backing_store_.Dispose(); | |
482 backing_store_ = new_store; | |
483 } | |
484 | |
485 void ConvertToTwoByte() { | |
486 DCHECK(is_one_byte_); | |
487 Vector<byte> new_store; | |
488 int new_content_size = position_ * kUC16Size; | |
489 if (new_content_size >= backing_store_.length()) { | |
490 // Ensure room for all currently read code units as UC16 as well | |
491 // as the code unit about to be stored. | |
492 new_store = Vector<byte>::New(NewCapacity(new_content_size)); | |
493 } else { | |
494 new_store = backing_store_; | |
495 } | |
496 uint8_t* src = backing_store_.start(); | |
497 uint16_t* dst = reinterpret_cast<uint16_t*>(new_store.start()); | |
498 for (int i = position_ - 1; i >= 0; i--) { | |
499 dst[i] = src[i]; | |
500 } | |
501 if (new_store.start() != backing_store_.start()) { | |
502 backing_store_.Dispose(); | |
503 backing_store_ = new_store; | |
504 } | |
505 position_ = new_content_size; | |
506 is_one_byte_ = false; | |
507 } | |
508 | |
509 bool is_one_byte_; | |
510 int position_; | |
511 Vector<byte> backing_store_; | |
512 | |
513 DISALLOW_COPY_AND_ASSIGN(LiteralBuffer); | |
514 }; | |
515 | |
512 static const int kCharacterLookaheadBufferSize = 1; | 516 static const int kCharacterLookaheadBufferSize = 1; |
517 const int kMaxAscii = 127; | |
513 | 518 |
514 // Scans octal escape sequence. Also accepts "\0" decimal escape sequence. | 519 // Scans octal escape sequence. Also accepts "\0" decimal escape sequence. |
515 template <bool capture_raw> | 520 template <bool capture_raw> |
516 uc32 ScanOctalEscape(uc32 c, int length); | 521 uc32 ScanOctalEscape(uc32 c, int length); |
517 | 522 |
518 // Call this after setting source_ to the input. | 523 // Call this after setting source_ to the input. |
519 void Init() { | 524 void Init() { |
520 // Set c0_ (one character ahead) | 525 // Set c0_ (one character ahead) |
521 STATIC_ASSERT(kCharacterLookaheadBufferSize == 1); | 526 STATIC_ASSERT(kCharacterLookaheadBufferSize == 1); |
522 Advance(); | 527 Advance(); |
(...skipping 308 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
831 bool allow_harmony_exponentiation_operator_; | 836 bool allow_harmony_exponentiation_operator_; |
832 | 837 |
833 MessageTemplate::Template scanner_error_; | 838 MessageTemplate::Template scanner_error_; |
834 Location scanner_error_location_; | 839 Location scanner_error_location_; |
835 }; | 840 }; |
836 | 841 |
837 } // namespace internal | 842 } // namespace internal |
838 } // namespace v8 | 843 } // namespace v8 |
839 | 844 |
840 #endif // V8_PARSING_SCANNER_H_ | 845 #endif // V8_PARSING_SCANNER_H_ |
OLD | NEW |