OLD | NEW |
---|---|
1 // Copyright 2011 the V8 project authors. All rights reserved. | 1 // Copyright 2011 the V8 project authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 // Features shared by parsing and pre-parsing scanners. | 5 // Features shared by parsing and pre-parsing scanners. |
6 | 6 |
7 #ifndef V8_PARSING_SCANNER_H_ | 7 #ifndef V8_PARSING_SCANNER_H_ |
8 #define V8_PARSING_SCANNER_H_ | 8 #define V8_PARSING_SCANNER_H_ |
9 | 9 |
10 #include "src/allocation.h" | 10 #include "src/allocation.h" |
(...skipping 130 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
141 static const int kBufferSize = 100; | 141 static const int kBufferSize = 100; |
142 | 142 |
143 UnicodeCache* unicode_constants_; | 143 UnicodeCache* unicode_constants_; |
144 // Backing store used to store strings used as hashmap keys. | 144 // Backing store used to store strings used as hashmap keys. |
145 SequenceCollector<unsigned char> backing_store_; | 145 SequenceCollector<unsigned char> backing_store_; |
146 base::HashMap map_; | 146 base::HashMap map_; |
147 // Buffer used for string->number->canonical string conversions. | 147 // Buffer used for string->number->canonical string conversions. |
148 char number_buffer_[kBufferSize]; | 148 char number_buffer_[kBufferSize]; |
149 }; | 149 }; |
150 | 150 |
151 // ---------------------------------------------------------------------------- | |
152 // LiteralBuffer - Collector of chars of literals. | |
153 | |
154 const int kMaxAscii = 127; | |
155 | |
156 class LiteralBuffer { | |
157 public: | |
158 LiteralBuffer() : is_one_byte_(true), position_(0), backing_store_() { } | |
159 | |
160 ~LiteralBuffer() { backing_store_.Dispose(); } | |
161 | |
162 INLINE(void AddChar(char code_unit)) { | |
163 if (position_ >= backing_store_.length()) ExpandBuffer(); | |
164 DCHECK(is_one_byte_); | |
165 DCHECK(IsValidAscii(code_unit)); | |
166 backing_store_[position_] = static_cast<byte>(code_unit); | |
167 position_ += kOneByteSize; | |
168 return; | |
169 } | |
170 | |
171 INLINE(void AddChar(uc32 code_unit)) { | |
172 if (position_ >= backing_store_.length()) ExpandBuffer(); | |
173 if (is_one_byte_) { | |
174 if (code_unit <= unibrow::Latin1::kMaxChar) { | |
175 backing_store_[position_] = static_cast<byte>(code_unit); | |
176 position_ += kOneByteSize; | |
177 return; | |
178 } | |
179 ConvertToTwoByte(); | |
180 } | |
181 if (code_unit <= unibrow::Utf16::kMaxNonSurrogateCharCode) { | |
182 *reinterpret_cast<uint16_t*>(&backing_store_[position_]) = code_unit; | |
183 position_ += kUC16Size; | |
184 } else { | |
185 *reinterpret_cast<uint16_t*>(&backing_store_[position_]) = | |
186 unibrow::Utf16::LeadSurrogate(code_unit); | |
187 position_ += kUC16Size; | |
188 if (position_ >= backing_store_.length()) ExpandBuffer(); | |
189 *reinterpret_cast<uint16_t*>(&backing_store_[position_]) = | |
190 unibrow::Utf16::TrailSurrogate(code_unit); | |
191 position_ += kUC16Size; | |
192 } | |
193 } | |
194 | |
195 bool is_one_byte() const { return is_one_byte_; } | |
196 | |
197 bool is_contextual_keyword(Vector<const char> keyword) const { | |
198 return is_one_byte() && keyword.length() == position_ && | |
199 (memcmp(keyword.start(), backing_store_.start(), position_) == 0); | |
200 } | |
201 | |
202 Vector<const uint16_t> two_byte_literal() const { | |
203 DCHECK(!is_one_byte_); | |
204 DCHECK((position_ & 0x1) == 0); | |
205 return Vector<const uint16_t>( | |
206 reinterpret_cast<const uint16_t*>(backing_store_.start()), | |
207 position_ >> 1); | |
208 } | |
209 | |
210 Vector<const uint8_t> one_byte_literal() const { | |
211 DCHECK(is_one_byte_); | |
212 return Vector<const uint8_t>( | |
213 reinterpret_cast<const uint8_t*>(backing_store_.start()), | |
214 position_); | |
215 } | |
216 | |
217 int length() const { | |
218 return is_one_byte_ ? position_ : (position_ >> 1); | |
219 } | |
220 | |
221 void ReduceLength(int delta) { | |
222 position_ -= delta * (is_one_byte_ ? kOneByteSize : kUC16Size); | |
223 } | |
224 | |
225 void Reset() { | |
226 position_ = 0; | |
227 is_one_byte_ = true; | |
228 } | |
229 | |
230 Handle<String> Internalize(Isolate* isolate) const; | |
231 | |
232 void CopyFrom(const LiteralBuffer* other) { | |
233 if (other == nullptr) { | |
234 Reset(); | |
235 } else { | |
236 is_one_byte_ = other->is_one_byte_; | |
237 position_ = other->position_; | |
238 if (position_ < backing_store_.length()) { | |
239 std::copy(other->backing_store_.begin(), | |
240 other->backing_store_.begin() + position_, | |
241 backing_store_.begin()); | |
242 } else { | |
243 backing_store_.Dispose(); | |
244 backing_store_ = other->backing_store_.Clone(); | |
245 } | |
246 } | |
247 } | |
248 | |
249 private: | |
250 static const int kInitialCapacity = 16; | |
251 static const int kGrowthFactory = 4; | |
252 static const int kMinConversionSlack = 256; | |
253 static const int kMaxGrowth = 1 * MB; | |
254 | |
255 inline bool IsValidAscii(char code_unit) { | |
256 // Control characters and printable characters span the range of | |
257 // valid ASCII characters (0-127). Chars are unsigned on some | |
258 // platforms which causes compiler warnings if the validity check | |
259 // tests the lower bound >= 0 as it's always true. | |
260 return iscntrl(code_unit) || isprint(code_unit); | |
261 } | |
262 | |
263 inline int NewCapacity(int min_capacity) { | |
264 int capacity = Max(min_capacity, backing_store_.length()); | |
265 int new_capacity = Min(capacity * kGrowthFactory, capacity + kMaxGrowth); | |
266 return new_capacity; | |
267 } | |
268 | |
269 void ExpandBuffer() { | |
270 Vector<byte> new_store = Vector<byte>::New(NewCapacity(kInitialCapacity)); | |
271 MemCopy(new_store.start(), backing_store_.start(), position_); | |
272 backing_store_.Dispose(); | |
273 backing_store_ = new_store; | |
274 } | |
275 | |
276 void ConvertToTwoByte() { | |
277 DCHECK(is_one_byte_); | |
278 Vector<byte> new_store; | |
279 int new_content_size = position_ * kUC16Size; | |
280 if (new_content_size >= backing_store_.length()) { | |
281 // Ensure room for all currently read code units as UC16 as well | |
282 // as the code unit about to be stored. | |
283 new_store = Vector<byte>::New(NewCapacity(new_content_size)); | |
284 } else { | |
285 new_store = backing_store_; | |
286 } | |
287 uint8_t* src = backing_store_.start(); | |
288 uint16_t* dst = reinterpret_cast<uint16_t*>(new_store.start()); | |
289 for (int i = position_ - 1; i >= 0; i--) { | |
290 dst[i] = src[i]; | |
291 } | |
292 if (new_store.start() != backing_store_.start()) { | |
293 backing_store_.Dispose(); | |
294 backing_store_ = new_store; | |
295 } | |
296 position_ = new_content_size; | |
297 is_one_byte_ = false; | |
298 } | |
299 | |
300 bool is_one_byte_; | |
301 int position_; | |
302 Vector<byte> backing_store_; | |
303 | |
304 DISALLOW_COPY_AND_ASSIGN(LiteralBuffer); | |
305 }; | |
306 | |
307 | 151 |
308 // ---------------------------------------------------------------------------- | 152 // ---------------------------------------------------------------------------- |
309 // JavaScript Scanner. | 153 // JavaScript Scanner. |
310 | 154 |
311 class Scanner { | 155 class Scanner { |
312 public: | 156 public: |
313 // Scoped helper for literal recording. Automatically drops the literal | |
314 // if aborting the scanning before it's complete. | |
315 class LiteralScope { | |
316 public: | |
317 explicit LiteralScope(Scanner* self) : scanner_(self), complete_(false) { | |
318 scanner_->StartLiteral(); | |
319 } | |
320 ~LiteralScope() { | |
321 if (!complete_) scanner_->DropLiteral(); | |
322 } | |
323 void Complete() { | |
324 complete_ = true; | |
325 } | |
326 | |
327 private: | |
328 Scanner* scanner_; | |
329 bool complete_; | |
330 }; | |
331 | |
332 // Scoped helper for a re-settable bookmark. | 157 // Scoped helper for a re-settable bookmark. |
333 class BookmarkScope { | 158 class BookmarkScope { |
334 public: | 159 public: |
335 explicit BookmarkScope(Scanner* scanner) : scanner_(scanner) { | 160 explicit BookmarkScope(Scanner* scanner) : scanner_(scanner) { |
336 DCHECK_NOT_NULL(scanner_); | 161 DCHECK_NOT_NULL(scanner_); |
337 } | 162 } |
338 ~BookmarkScope() { scanner_->DropBookmark(); } | 163 ~BookmarkScope() { scanner_->DropBookmark(); } |
339 | 164 |
340 bool Set() { return scanner_->SetBookmark(); } | 165 bool Set() { return scanner_->SetBookmark(); } |
341 void Reset() { scanner_->ResetToBookmark(); } | 166 void Reset() { scanner_->ResetToBookmark(); } |
(...skipping 135 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
477 // Scans the input as a regular expression pattern, previous | 302 // Scans the input as a regular expression pattern, previous |
478 // character(s) must be /(=). Returns true if a pattern is scanned. | 303 // character(s) must be /(=). Returns true if a pattern is scanned. |
479 bool ScanRegExpPattern(bool seen_equal); | 304 bool ScanRegExpPattern(bool seen_equal); |
480 // Scans the input as regular expression flags. Returns the flags on success. | 305 // Scans the input as regular expression flags. Returns the flags on success. |
481 Maybe<RegExp::Flags> ScanRegExpFlags(); | 306 Maybe<RegExp::Flags> ScanRegExpFlags(); |
482 | 307 |
483 // Scans the input as a template literal | 308 // Scans the input as a template literal |
484 Token::Value ScanTemplateStart(); | 309 Token::Value ScanTemplateStart(); |
485 Token::Value ScanTemplateContinuation(); | 310 Token::Value ScanTemplateContinuation(); |
486 | 311 |
487 const LiteralBuffer* source_url() const { return &source_url_; } | 312 Handle<String> SourceUrl(Isolate* isolate) const { |
488 const LiteralBuffer* source_mapping_url() const { | 313 Handle<String> tmp; |
489 return &source_mapping_url_; | 314 if (source_url_.length() > 0) tmp = source_url_.Internalize(isolate); |
315 return tmp; | |
316 } | |
317 | |
318 Handle<String> SourceMappingUrl(Isolate* isolate) const { | |
319 Handle<String> tmp; | |
320 if (source_mapping_url_.length() > 0) | |
321 tmp = source_mapping_url_.Internalize(isolate); | |
322 return tmp; | |
490 } | 323 } |
491 | 324 |
492 bool IdentifierIsFutureStrictReserved(const AstRawString* string) const; | 325 bool IdentifierIsFutureStrictReserved(const AstRawString* string) const; |
493 | 326 |
494 bool FoundHtmlComment() const { return found_html_comment_; } | 327 bool FoundHtmlComment() const { return found_html_comment_; } |
495 | 328 |
496 #define DECLARE_ACCESSORS(name) \ | 329 #define DECLARE_ACCESSORS(name) \ |
497 inline bool allow_##name() const { return allow_##name##_; } \ | 330 inline bool allow_##name() const { return allow_##name##_; } \ |
498 inline void set_allow_##name(bool allow) { allow_##name##_ = allow; } | 331 inline void set_allow_##name(bool allow) { allow_##name##_ = allow; } |
499 DECLARE_ACCESSORS(harmony_exponentiation_operator) | 332 DECLARE_ACCESSORS(harmony_exponentiation_operator) |
500 #undef ACCESSOR | 333 #undef ACCESSOR |
501 | 334 |
502 private: | 335 private: |
336 // Scoped helper for literal recording. Automatically drops the literal | |
337 // if aborting the scanning before it's complete. | |
338 class LiteralScope { | |
339 public: | |
340 explicit LiteralScope(Scanner* self) : scanner_(self), complete_(false) { | |
341 scanner_->StartLiteral(); | |
342 } | |
343 ~LiteralScope() { | |
344 if (!complete_) scanner_->DropLiteral(); | |
345 } | |
346 void Complete() { complete_ = true; } | |
347 | |
348 private: | |
349 Scanner* scanner_; | |
350 bool complete_; | |
351 }; | |
352 | |
353 // LiteralBuffer - Collector of chars of literals. | |
354 class LiteralBuffer { | |
355 public: | |
356 LiteralBuffer() : is_one_byte_(true), position_(0), backing_store_() {} | |
357 | |
358 ~LiteralBuffer() { backing_store_.Dispose(); } | |
359 | |
360 INLINE(void AddChar(char code_unit)) { | |
361 if (position_ >= backing_store_.length()) ExpandBuffer(); | |
362 DCHECK(is_one_byte_); | |
363 DCHECK(IsValidAscii(code_unit)); | |
364 backing_store_[position_] = static_cast<byte>(code_unit); | |
365 position_ += kOneByteSize; | |
366 return; | |
367 } | |
368 | |
369 INLINE(void AddChar(uc32 code_unit)) { | |
370 if (position_ >= backing_store_.length()) ExpandBuffer(); | |
371 if (is_one_byte_) { | |
372 if (code_unit <= unibrow::Latin1::kMaxChar) { | |
373 backing_store_[position_] = static_cast<byte>(code_unit); | |
374 position_ += kOneByteSize; | |
375 return; | |
376 } | |
377 ConvertToTwoByte(); | |
378 } | |
379 if (code_unit <= unibrow::Utf16::kMaxNonSurrogateCharCode) { | |
380 *reinterpret_cast<uint16_t*>(&backing_store_[position_]) = code_unit; | |
381 position_ += kUC16Size; | |
382 } else { | |
383 *reinterpret_cast<uint16_t*>(&backing_store_[position_]) = | |
384 unibrow::Utf16::LeadSurrogate(code_unit); | |
385 position_ += kUC16Size; | |
386 if (position_ >= backing_store_.length()) ExpandBuffer(); | |
387 *reinterpret_cast<uint16_t*>(&backing_store_[position_]) = | |
388 unibrow::Utf16::TrailSurrogate(code_unit); | |
389 position_ += kUC16Size; | |
390 } | |
391 } | |
392 | |
393 bool is_one_byte() const { return is_one_byte_; } | |
394 | |
395 bool is_contextual_keyword(Vector<const char> keyword) const { | |
396 return is_one_byte() && keyword.length() == position_ && | |
397 (memcmp(keyword.start(), backing_store_.start(), position_) == 0); | |
398 } | |
399 | |
400 Vector<const uint16_t> two_byte_literal() const { | |
401 DCHECK(!is_one_byte_); | |
402 DCHECK((position_ & 0x1) == 0); | |
403 return Vector<const uint16_t>( | |
404 reinterpret_cast<const uint16_t*>(backing_store_.start()), | |
405 position_ >> 1); | |
406 } | |
407 | |
408 Vector<const uint8_t> one_byte_literal() const { | |
409 DCHECK(is_one_byte_); | |
410 return Vector<const uint8_t>( | |
411 reinterpret_cast<const uint8_t*>(backing_store_.start()), position_); | |
412 } | |
413 | |
414 int length() const { return is_one_byte_ ? position_ : (position_ >> 1); } | |
415 | |
416 void ReduceLength(int delta) { | |
417 position_ -= delta * (is_one_byte_ ? kOneByteSize : kUC16Size); | |
418 } | |
419 | |
420 void Reset() { | |
421 position_ = 0; | |
422 is_one_byte_ = true; | |
423 } | |
424 | |
425 Handle<String> Internalize(Isolate* isolate) const; | |
426 | |
427 void CopyFrom(const LiteralBuffer* other) { | |
428 if (other == nullptr) { | |
429 Reset(); | |
430 } else { | |
431 is_one_byte_ = other->is_one_byte_; | |
432 position_ = other->position_; | |
433 if (position_ < backing_store_.length()) { | |
434 std::copy(other->backing_store_.begin(), | |
435 other->backing_store_.begin() + position_, | |
436 backing_store_.begin()); | |
437 } else { | |
438 backing_store_.Dispose(); | |
439 backing_store_ = other->backing_store_.Clone(); | |
440 } | |
441 } | |
442 } | |
443 | |
444 private: | |
445 static const int kInitialCapacity = 16; | |
446 static const int kGrowthFactory = 4; | |
447 static const int kMinConversionSlack = 256; | |
448 static const int kMaxGrowth = 1 * MB; | |
449 | |
450 inline bool IsValidAscii(char code_unit) { | |
451 // Control characters and printable characters span the range of | |
452 // valid ASCII characters (0-127). Chars are unsigned on some | |
453 // platforms which causes compiler warnings if the validity check | |
454 // tests the lower bound >= 0 as it's always true. | |
455 return iscntrl(code_unit) || isprint(code_unit); | |
456 } | |
457 | |
458 inline int NewCapacity(int min_capacity) { | |
459 int capacity = Max(min_capacity, backing_store_.length()); | |
460 int new_capacity = Min(capacity * kGrowthFactory, capacity + kMaxGrowth); | |
461 return new_capacity; | |
462 } | |
463 | |
464 void ExpandBuffer() { | |
465 Vector<byte> new_store = Vector<byte>::New(NewCapacity(kInitialCapacity)); | |
466 MemCopy(new_store.start(), backing_store_.start(), position_); | |
467 backing_store_.Dispose(); | |
468 backing_store_ = new_store; | |
469 } | |
470 | |
471 void ConvertToTwoByte() { | |
472 DCHECK(is_one_byte_); | |
473 Vector<byte> new_store; | |
474 int new_content_size = position_ * kUC16Size; | |
475 if (new_content_size >= backing_store_.length()) { | |
476 // Ensure room for all currently read code units as UC16 as well | |
477 // as the code unit about to be stored. | |
478 new_store = Vector<byte>::New(NewCapacity(new_content_size)); | |
479 } else { | |
480 new_store = backing_store_; | |
481 } | |
482 uint8_t* src = backing_store_.start(); | |
483 uint16_t* dst = reinterpret_cast<uint16_t*>(new_store.start()); | |
484 for (int i = position_ - 1; i >= 0; i--) { | |
485 dst[i] = src[i]; | |
486 } | |
487 if (new_store.start() != backing_store_.start()) { | |
488 backing_store_.Dispose(); | |
489 backing_store_ = new_store; | |
490 } | |
491 position_ = new_content_size; | |
492 is_one_byte_ = false; | |
493 } | |
494 | |
495 bool is_one_byte_; | |
496 int position_; | |
497 Vector<byte> backing_store_; | |
498 | |
499 DISALLOW_COPY_AND_ASSIGN(LiteralBuffer); | |
500 }; | |
501 | |
503 // The current and look-ahead token. | 502 // The current and look-ahead token. |
504 struct TokenDesc { | 503 struct TokenDesc { |
505 Token::Value token; | 504 Token::Value token; |
506 Location location; | 505 Location location; |
507 LiteralBuffer* literal_chars; | 506 LiteralBuffer* literal_chars; |
508 LiteralBuffer* raw_literal_chars; | 507 LiteralBuffer* raw_literal_chars; |
509 int smi_value_; | 508 int smi_value_; |
510 }; | 509 }; |
511 | 510 |
512 static const int kCharacterLookaheadBufferSize = 1; | 511 static const int kCharacterLookaheadBufferSize = 1; |
512 const int kMaxAscii = 127; | |
marja
2016/08/10 08:15:08
Not a problem of this CL, but I was wondering don'
vogelheim
2016/08/10 08:24:07
https://cs.chromium.org/search/?q=127+file:src/v8/
| |
513 | 513 |
514 // Scans octal escape sequence. Also accepts "\0" decimal escape sequence. | 514 // Scans octal escape sequence. Also accepts "\0" decimal escape sequence. |
515 template <bool capture_raw> | 515 template <bool capture_raw> |
516 uc32 ScanOctalEscape(uc32 c, int length); | 516 uc32 ScanOctalEscape(uc32 c, int length); |
517 | 517 |
518 // Call this after setting source_ to the input. | 518 // Call this after setting source_ to the input. |
519 void Init() { | 519 void Init() { |
520 // Set c0_ (one character ahead) | 520 // Set c0_ (one character ahead) |
521 STATIC_ASSERT(kCharacterLookaheadBufferSize == 1); | 521 STATIC_ASSERT(kCharacterLookaheadBufferSize == 1); |
522 Advance(); | 522 Advance(); |
(...skipping 308 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
831 bool allow_harmony_exponentiation_operator_; | 831 bool allow_harmony_exponentiation_operator_; |
832 | 832 |
833 MessageTemplate::Template scanner_error_; | 833 MessageTemplate::Template scanner_error_; |
834 Location scanner_error_location_; | 834 Location scanner_error_location_; |
835 }; | 835 }; |
836 | 836 |
837 } // namespace internal | 837 } // namespace internal |
838 } // namespace v8 | 838 } // namespace v8 |
839 | 839 |
840 #endif // V8_PARSING_SCANNER_H_ | 840 #endif // V8_PARSING_SCANNER_H_ |
OLD | NEW |