Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1)

Side by Side Diff: src/scanner-base.h

Issue 5136002: Extract scanner base/JS/JSON and move base and JS to scanner-base. (Closed)
Patch Set: Created 10 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/scanner.cc ('k') | src/scanner-base.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2010 the V8 project authors. All rights reserved. 1 // Copyright 2010 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 19 matching lines...) Expand all
30 #ifndef V8_SCANNER_BASE_H_ 30 #ifndef V8_SCANNER_BASE_H_
31 #define V8_SCANNER_BASE_H_ 31 #define V8_SCANNER_BASE_H_
32 32
33 #include "globals.h" 33 #include "globals.h"
34 #include "checks.h" 34 #include "checks.h"
35 #include "allocation.h" 35 #include "allocation.h"
36 #include "token.h" 36 #include "token.h"
37 #include "unicode-inl.h" 37 #include "unicode-inl.h"
38 #include "char-predicates.h" 38 #include "char-predicates.h"
39 #include "utils.h" 39 #include "utils.h"
40 #include "list-inl.h"
40 41
41 namespace v8 { 42 namespace v8 {
42 namespace internal { 43 namespace internal {
43 44
44 // Interface through which the scanner reads characters from the input source. 45 // Returns the value (0 .. 15) of a hexadecimal character c.
46 // If c is not a legal hexadecimal character, returns a value < 0.
47 inline int HexValue(uc32 c) {
48 c -= '0';
49 if (static_cast<unsigned>(c) <= 9) return c;
50 c = (c | 0x20) - ('a' - '0'); // detect 0x11..0x16 and 0x31..0x36.
51 if (static_cast<unsigned>(c) <= 6) return c + 10;
52 return -1;
53 }
54
55 // ----------------------------------------------------------------------------
56 // UTF16Buffer - scanner input source with pushback.
57
45 class UTF16Buffer { 58 class UTF16Buffer {
46 public: 59 public:
47 UTF16Buffer(); 60 UTF16Buffer();
48 virtual ~UTF16Buffer() {} 61 virtual ~UTF16Buffer() {}
49 62
50 virtual void PushBack(uc32 ch) = 0; 63 virtual void PushBack(uc32 ch) = 0;
51 // Returns a value < 0 when the buffer end is reached. 64 // Returns a value < 0 when the buffer end is reached.
52 virtual uc32 Advance() = 0; 65 virtual uc32 Advance() = 0;
53 virtual void SeekForward(int pos) = 0; 66 virtual void SeekForward(int pos) = 0;
54 67
55 int pos() const { return pos_; } 68 int pos() const { return pos_; }
56 69
70 static const int kNoEndPosition = 1;
71
57 protected: 72 protected:
73 // Initial value of end_ before the input stream is initialized.
74
58 int pos_; // Current position in the buffer. 75 int pos_; // Current position in the buffer.
59 int end_; // Position where scanning should stop (EOF). 76 int end_; // Position where scanning should stop (EOF).
60 }; 77 };
61 78
62 79
63 class ScannerConstants : AllStatic { 80 class ScannerConstants : AllStatic {
64 public: 81 public:
65 typedef unibrow::Utf8InputBuffer<1024> Utf8Decoder; 82 typedef unibrow::Utf8InputBuffer<1024> Utf8Decoder;
66 83
67 static StaticResource<Utf8Decoder>* utf8_decoder() { 84 static StaticResource<Utf8Decoder>* utf8_decoder() {
68 return &utf8_decoder_; 85 return &utf8_decoder_;
69 } 86 }
70 87
71 static unibrow::Predicate<IdentifierStart, 128> kIsIdentifierStart; 88 static unibrow::Predicate<IdentifierStart, 128> kIsIdentifierStart;
72 static unibrow::Predicate<IdentifierPart, 128> kIsIdentifierPart; 89 static unibrow::Predicate<IdentifierPart, 128> kIsIdentifierPart;
73 static unibrow::Predicate<unibrow::LineTerminator, 128> kIsLineTerminator; 90 static unibrow::Predicate<unibrow::LineTerminator, 128> kIsLineTerminator;
74 static unibrow::Predicate<unibrow::WhiteSpace, 128> kIsWhiteSpace; 91 static unibrow::Predicate<unibrow::WhiteSpace, 128> kIsWhiteSpace;
75 92
76 static bool IsIdentifier(unibrow::CharacterStream* buffer); 93 static bool IsIdentifier(unibrow::CharacterStream* buffer);
77 94
78 private: 95 private:
79 static StaticResource<Utf8Decoder> utf8_decoder_; 96 static StaticResource<Utf8Decoder> utf8_decoder_;
80 }; 97 };
81 98
99 // ----------------------------------------------------------------------------
100 // LiteralCollector - Collector of chars of literals.
101
102 class LiteralCollector {
103 public:
104 LiteralCollector();
105 ~LiteralCollector();
106
107 inline void AddChar(uc32 c) {
108 if (recording_) {
109 if (static_cast<unsigned>(c) <= unibrow::Utf8::kMaxOneByteChar) {
110 buffer_.Add(static_cast<char>(c));
111 } else {
112 AddCharSlow(c);
113 }
114 }
115 }
116
117 void StartLiteral() {
118 buffer_.StartSequence();
119 recording_ = true;
120 }
121
122 Vector<const char> EndLiteral() {
123 if (recording_) {
124 recording_ = false;
125 buffer_.Add(kEndMarker);
126 Vector<char> sequence = buffer_.EndSequence();
127 return Vector<const char>(sequence.start(), sequence.length());
128 }
129 return Vector<const char>();
130 }
131
132 void DropLiteral() {
133 if (recording_) {
134 recording_ = false;
135 buffer_.DropSequence();
136 }
137 }
138
139 void Reset() {
140 buffer_.Reset();
141 }
142
143 // The end marker added after a parsed literal.
144 // Using zero allows the usage of strlen and similar functions on
145 // identifiers and numbers (but not strings, since they may contain zero
146 // bytes).
147 static const char kEndMarker = '\x00';
148 private:
149 static const int kInitialCapacity = 256;
150 SequenceCollector<char, 4> buffer_;
151 bool recording_;
152 void AddCharSlow(uc32 c);
153 };
154
155 // ----------------------------------------------------------------------------
156 // Scanner base-class.
157
158 // Generic functionality used by both JSON and JavaScript scanners.
159 class Scanner {
160 public:
161 typedef unibrow::Utf8InputBuffer<1024> Utf8Decoder;
162
163 class LiteralScope {
164 public:
165 explicit LiteralScope(Scanner* self);
166 ~LiteralScope();
167 void Complete();
168
169 private:
170 Scanner* scanner_;
171 bool complete_;
172 };
173
174 Scanner();
175
176 // Returns the current token again.
177 Token::Value current_token() { return current_.token; }
178
179 // One token look-ahead (past the token returned by Next()).
180 Token::Value peek() const { return next_.token; }
181
182 struct Location {
183 Location(int b, int e) : beg_pos(b), end_pos(e) { }
184 Location() : beg_pos(0), end_pos(0) { }
185 int beg_pos;
186 int end_pos;
187 };
188
189 // Returns the location information for the current token
190 // (the token returned by Next()).
191 Location location() const { return current_.location; }
192 Location peek_location() const { return next_.location; }
193
194 // Returns the literal string, if any, for the current token (the
195 // token returned by Next()). The string is 0-terminated and in
196 // UTF-8 format; they may contain 0-characters. Literal strings are
197 // collected for identifiers, strings, and numbers.
198 // These functions only give the correct result if the literal
199 // was scanned between calls to StartLiteral() and TerminateLiteral().
200 const char* literal_string() const {
201 return current_.literal_chars.start();
202 }
203
204 int literal_length() const {
205 // Excluding terminal '\x00' added by TerminateLiteral().
206 return current_.literal_chars.length() - 1;
207 }
208
209 Vector<const char> literal() const {
210 return Vector<const char>(literal_string(), literal_length());
211 }
212
213 // Returns the literal string for the next token (the token that
214 // would be returned if Next() were called).
215 const char* next_literal_string() const {
216 return next_.literal_chars.start();
217 }
218
219
220 // Returns the length of the next token (that would be returned if
221 // Next() were called).
222 int next_literal_length() const {
223 // Excluding terminal '\x00' added by TerminateLiteral().
224 return next_.literal_chars.length() - 1;
225 }
226
227 Vector<const char> next_literal() const {
228 return Vector<const char>(next_literal_string(), next_literal_length());
229 }
230
231 bool stack_overflow() { return stack_overflow_; }
232
233 static const int kCharacterLookaheadBufferSize = 1;
234
235 protected:
236 // The current and look-ahead token.
237 struct TokenDesc {
238 Token::Value token;
239 Location location;
240 Vector<const char> literal_chars;
241 };
242
243 // Call this after setting source_ to the input.
244 void Init() {
245 // Set c0_ (one character ahead)
246 ASSERT(kCharacterLookaheadBufferSize == 1);
247 Advance();
248 // Initialize current_ to not refer to a literal.
249 current_.literal_chars = Vector<const char>();
250 // Reset literal buffer.
251 literal_buffer_.Reset();
252 }
253
254 // Literal buffer support
255 inline void StartLiteral() {
256 literal_buffer_.StartLiteral();
257 }
258
259 inline void AddLiteralChar(uc32 c) {
260 literal_buffer_.AddChar(c);
261 }
262
263 // Complete scanning of a literal.
264 inline void TerminateLiteral() {
265 next_.literal_chars = literal_buffer_.EndLiteral();
266 }
267
268 // Stops scanning of a literal and drop the collected characters,
269 // e.g., due to an encountered error.
270 inline void DropLiteral() {
271 literal_buffer_.DropLiteral();
272 }
273
274 inline void AddLiteralCharAdvance() {
275 AddLiteralChar(c0_);
276 Advance();
277 }
278
279 // Low-level scanning support.
280 void Advance() { c0_ = source_->Advance(); }
281 void PushBack(uc32 ch) {
282 source_->PushBack(ch);
283 c0_ = ch;
284 }
285
286 inline Token::Value Select(Token::Value tok) {
287 Advance();
288 return tok;
289 }
290
291 inline Token::Value Select(uc32 next, Token::Value then, Token::Value else_) {
292 Advance();
293 if (c0_ == next) {
294 Advance();
295 return then;
296 } else {
297 return else_;
298 }
299 }
300
301 uc32 ScanHexEscape(uc32 c, int length);
302 uc32 ScanOctalEscape(uc32 c, int length);
303
304 // Return the current source position.
305 int source_pos() {
306 return source_->pos() - kCharacterLookaheadBufferSize;
307 }
308
309 TokenDesc current_; // desc for current token (as returned by Next())
310 TokenDesc next_; // desc for next token (one token look-ahead)
311
312 // Input stream. Must be initialized to an UTF16Buffer.
313 UTF16Buffer* source_;
314
315 // Buffer to hold literal values (identifiers, strings, numbers)
316 // using '\x00'-terminated UTF-8 encoding. Handles allocation internally.
317 LiteralCollector literal_buffer_;
318
319 bool stack_overflow_;
320
321 // One Unicode character look-ahead; c0_ < 0 at the end of the input.
322 uc32 c0_;
323 };
324
325 // ----------------------------------------------------------------------------
326 // JavaScriptScanner - base logic for JavaScript scanning.
327
328 class JavaScriptScanner : public Scanner {
329 public:
330 JavaScriptScanner();
331
332 // Returns the next token.
333 Token::Value Next();
334
335 // Returns true if there was a line terminator before the peek'ed token.
336 bool has_line_terminator_before_next() const {
337 return has_line_terminator_before_next_;
338 }
339
340 // Scans the input as a regular expression pattern, previous
341 // character(s) must be /(=). Returns true if a pattern is scanned.
342 bool ScanRegExpPattern(bool seen_equal);
343 // Returns true if regexp flags are scanned (always since flags can
344 // be empty).
345 bool ScanRegExpFlags();
346
347 // Tells whether the buffer contains an identifier (no escapes).
348 // Used for checking if a property name is an identifier.
349 static bool IsIdentifier(unibrow::CharacterStream* buffer);
350
351 // Seek forward to the given position. This operation does not
352 // work in general, for instance when there are pushed back
353 // characters, but works for seeking forward until simple delimiter
354 // tokens, which is what it is used for.
355 void SeekForward(int pos);
356
357 protected:
358 bool SkipWhiteSpace();
359 Token::Value SkipSingleLineComment();
360 Token::Value SkipMultiLineComment();
361
362 // Scans a single JavaScript token.
363 void Scan();
364
365 void ScanDecimalDigits();
366 Token::Value ScanNumber(bool seen_period);
367 Token::Value ScanIdentifier();
368
369 void ScanEscape();
370 Token::Value ScanString();
371
372 // Scans a possible HTML comment -- begins with '<!'.
373 Token::Value ScanHtmlComment();
374
375 // Decodes a unicode escape-sequence which is part of an identifier.
376 // If the escape sequence cannot be decoded the result is kBadChar.
377 uc32 ScanIdentifierUnicodeEscape();
378
379 bool has_line_terminator_before_next_;
380 };
381
382
383 // ----------------------------------------------------------------------------
384 // Keyword matching state machine.
82 385
83 class KeywordMatcher { 386 class KeywordMatcher {
84 // Incrementally recognize keywords. 387 // Incrementally recognize keywords.
85 // 388 //
86 // Recognized keywords: 389 // Recognized keywords:
87 // break case catch const* continue debugger* default delete do else 390 // break case catch const* continue debugger* default delete do else
88 // finally false for function if in instanceof native* new null 391 // finally false for function if in instanceof native* new null
89 // return switch this throw true try typeof var void while with 392 // return switch this throw true try typeof var void while with
90 // 393 //
91 // *: Actually "future reserved keywords". These are the only ones we 394 // *: Actually "future reserved keywords". These are the only ones we
(...skipping 105 matching lines...) Expand 10 before | Expand all | Expand 10 after
197 // keyword with the current prefix). 500 // keyword with the current prefix).
198 const char* keyword_; 501 const char* keyword_;
199 int counter_; 502 int counter_;
200 Token::Value keyword_token_; 503 Token::Value keyword_token_;
201 }; 504 };
202 505
203 506
204 } } // namespace v8::internal 507 } } // namespace v8::internal
205 508
206 #endif // V8_SCANNER_BASE_H_ 509 #endif // V8_SCANNER_BASE_H_
OLDNEW
« no previous file with comments | « src/scanner.cc ('k') | src/scanner-base.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698