Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1)

Side by Side Diff: src/scanner.cc

Issue 379005: Fix lint issue. (Closed)
Patch Set: Created 11 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2006-2008 the V8 project authors. All rights reserved. 1 // Copyright 2006-2008 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 24 matching lines...) Expand all
35 35
36 // ---------------------------------------------------------------------------- 36 // ----------------------------------------------------------------------------
37 // Character predicates 37 // Character predicates
38 38
39 39
40 unibrow::Predicate<IdentifierStart, 128> Scanner::kIsIdentifierStart; 40 unibrow::Predicate<IdentifierStart, 128> Scanner::kIsIdentifierStart;
41 unibrow::Predicate<IdentifierPart, 128> Scanner::kIsIdentifierPart; 41 unibrow::Predicate<IdentifierPart, 128> Scanner::kIsIdentifierPart;
42 unibrow::Predicate<unibrow::LineTerminator, 128> Scanner::kIsLineTerminator; 42 unibrow::Predicate<unibrow::LineTerminator, 128> Scanner::kIsLineTerminator;
43 unibrow::Predicate<unibrow::WhiteSpace, 128> Scanner::kIsWhiteSpace; 43 unibrow::Predicate<unibrow::WhiteSpace, 128> Scanner::kIsWhiteSpace;
44 44
45
45 StaticResource<Scanner::Utf8Decoder> Scanner::utf8_decoder_; 46 StaticResource<Scanner::Utf8Decoder> Scanner::utf8_decoder_;
46 47
48
47 // ---------------------------------------------------------------------------- 49 // ----------------------------------------------------------------------------
48 // UTF8Buffer 50 // UTF8Buffer
49 51
50 UTF8Buffer::UTF8Buffer() : 52 UTF8Buffer::UTF8Buffer() : data_(NULL), limit_(NULL) { }
51 data_(NULL), limit_(NULL) { 53
52 }
53 54
54 UTF8Buffer::~UTF8Buffer() { 55 UTF8Buffer::~UTF8Buffer() {
55 DeleteArray(data_); 56 DeleteArray(data_);
56 } 57 }
57 58
59
58 void UTF8Buffer::AddCharSlow(uc32 c) { 60 void UTF8Buffer::AddCharSlow(uc32 c) {
59 static const int kCapacityGrowthLimit = 1 * MB; 61 static const int kCapacityGrowthLimit = 1 * MB;
60 if (cursor_ > limit_) { 62 if (cursor_ > limit_) {
61 int old_capacity = Capacity(); 63 int old_capacity = Capacity();
62 int old_position = pos(); 64 int old_position = pos();
63 int new_capacity = Min(old_capacity * 3, old_capacity 65 int new_capacity =
64 + kCapacityGrowthLimit); 66 Min(old_capacity * 3, old_capacity + kCapacityGrowthLimit);
65 char* new_data = NewArray<char> (new_capacity); 67 char* new_data = NewArray<char>(new_capacity);
66 memcpy(new_data, data_, old_position); 68 memcpy(new_data, data_, old_position);
67 DeleteArray(data_); 69 DeleteArray(data_);
68 data_ = new_data; 70 data_ = new_data;
69 cursor_ = new_data + old_position; 71 cursor_ = new_data + old_position;
70 limit_ = ComputeLimit(new_data, new_capacity); 72 limit_ = ComputeLimit(new_data, new_capacity);
71 ASSERT(Capacity() == new_capacity && pos() == old_position); 73 ASSERT(Capacity() == new_capacity && pos() == old_position);
72 } 74 }
73 if (static_cast<unsigned> (c) <= unibrow::Utf8::kMaxOneByteChar) { 75 if (static_cast<unsigned>(c) <= unibrow::Utf8::kMaxOneByteChar) {
74 *cursor_++ = c; // Common case: 7-bit ASCII. 76 *cursor_++ = c; // Common case: 7-bit ASCII.
75 } else { 77 } else {
76 cursor_ += unibrow::Utf8::Encode(cursor_, c); 78 cursor_ += unibrow::Utf8::Encode(cursor_, c);
77 } 79 }
78 ASSERT(pos() <= Capacity()); 80 ASSERT(pos() <= Capacity());
79 } 81 }
80 82
83
81 // ---------------------------------------------------------------------------- 84 // ----------------------------------------------------------------------------
82 // UTF16Buffer 85 // UTF16Buffer
83 86
84 87
85 UTF16Buffer::UTF16Buffer() : 88 UTF16Buffer::UTF16Buffer()
86 pos_(0), size_(0) { 89 : pos_(0), size_(0) { }
87 } 90
88 91
89 Handle<String> UTF16Buffer::SubString(int start, int end) { 92 Handle<String> UTF16Buffer::SubString(int start, int end) {
90 return internal::SubString(data_, start, end); 93 return internal::SubString(data_, start, end);
91 } 94 }
92 95
96
93 // CharacterStreamUTF16Buffer 97 // CharacterStreamUTF16Buffer
94 CharacterStreamUTF16Buffer::CharacterStreamUTF16Buffer() : 98 CharacterStreamUTF16Buffer::CharacterStreamUTF16Buffer()
95 pushback_buffer_(0), last_(0), stream_(NULL) { 99 : pushback_buffer_(0), last_(0), stream_(NULL) { }
96 } 100
97 101
98 void CharacterStreamUTF16Buffer::Initialize(Handle<String> data, 102 void CharacterStreamUTF16Buffer::Initialize(Handle<String> data,
99 unibrow::CharacterStream* input) { 103 unibrow::CharacterStream* input) {
100 data_ = data; 104 data_ = data;
101 pos_ = 0; 105 pos_ = 0;
102 stream_ = input; 106 stream_ = input;
103 } 107 }
104 108
109
105 void CharacterStreamUTF16Buffer::PushBack(uc32 ch) { 110 void CharacterStreamUTF16Buffer::PushBack(uc32 ch) {
106 pushback_buffer()->Add(last_); 111 pushback_buffer()->Add(last_);
107 last_ = ch; 112 last_ = ch;
108 pos_--; 113 pos_--;
109 } 114 }
110 115
116
111 uc32 CharacterStreamUTF16Buffer::Advance() { 117 uc32 CharacterStreamUTF16Buffer::Advance() {
112 // NOTE: It is of importance to Persian / Farsi resources that we do 118 // NOTE: It is of importance to Persian / Farsi resources that we do
113 // *not* strip format control characters in the scanner; see 119 // *not* strip format control characters in the scanner; see
114 // 120 //
115 // https://bugzilla.mozilla.org/show_bug.cgi?id=274152 121 // https://bugzilla.mozilla.org/show_bug.cgi?id=274152
116 // 122 //
117 // So, even though ECMA-262, section 7.1, page 11, dictates that we 123 // So, even though ECMA-262, section 7.1, page 11, dictates that we
118 // must remove Unicode format-control characters, we do not. This is 124 // must remove Unicode format-control characters, we do not. This is
119 // in line with how IE and SpiderMonkey handles it. 125 // in line with how IE and SpiderMonkey handles it.
120 if (!pushback_buffer()->is_empty()) { 126 if (!pushback_buffer()->is_empty()) {
121 pos_++; 127 pos_++;
122 return last_ = pushback_buffer()->RemoveLast(); 128 return last_ = pushback_buffer()->RemoveLast();
123 } else if (stream_->has_more()) { 129 } else if (stream_->has_more()) {
124 pos_++; 130 pos_++;
125 uc32 next = stream_->GetNext(); 131 uc32 next = stream_->GetNext();
126 return last_ = next; 132 return last_ = next;
127 } else { 133 } else {
128 // Note: currently the following increment is necessary to avoid a 134 // Note: currently the following increment is necessary to avoid a
129 // test-parser problem! 135 // test-parser problem!
130 pos_++; 136 pos_++;
131 return last_ = static_cast<uc32> (-1); 137 return last_ = static_cast<uc32>(-1);
132 } 138 }
133 } 139 }
134 140
141
135 void CharacterStreamUTF16Buffer::SeekForward(int pos) { 142 void CharacterStreamUTF16Buffer::SeekForward(int pos) {
136 pos_ = pos; 143 pos_ = pos;
137 ASSERT(pushback_buffer()->is_empty()); 144 ASSERT(pushback_buffer()->is_empty());
138 stream_->Seek(pos); 145 stream_->Seek(pos);
139 } 146 }
140 147
148
141 // TwoByteStringUTF16Buffer 149 // TwoByteStringUTF16Buffer
142 TwoByteStringUTF16Buffer::TwoByteStringUTF16Buffer() : 150 TwoByteStringUTF16Buffer::TwoByteStringUTF16Buffer()
143 raw_data_(NULL) { 151 : raw_data_(NULL) { }
144 }
145 152
146 void TwoByteStringUTF16Buffer::Initialize(Handle<ExternalTwoByteString> data) { 153
154 void TwoByteStringUTF16Buffer::Initialize(
155 Handle<ExternalTwoByteString> data) {
147 ASSERT(!data.is_null()); 156 ASSERT(!data.is_null());
148 157
149 data_ = data; 158 data_ = data;
150 pos_ = 0; 159 pos_ = 0;
151 160
152 raw_data_ = data->resource()->data(); 161 raw_data_ = data->resource()->data();
153 size_ = data->length(); 162 size_ = data->length();
154 } 163 }
155 164
165
156 uc32 TwoByteStringUTF16Buffer::Advance() { 166 uc32 TwoByteStringUTF16Buffer::Advance() {
157 if (pos_ < size_) { 167 if (pos_ < size_) {
158 return raw_data_[pos_++]; 168 return raw_data_[pos_++];
159 } else { 169 } else {
160 // note: currently the following increment is necessary to avoid a 170 // note: currently the following increment is necessary to avoid a
161 // test-parser problem! 171 // test-parser problem!
162 pos_++; 172 pos_++;
163 return static_cast<uc32> (-1); 173 return static_cast<uc32>(-1);
164 } 174 }
165 } 175 }
166 176
177
167 void TwoByteStringUTF16Buffer::PushBack(uc32 ch) { 178 void TwoByteStringUTF16Buffer::PushBack(uc32 ch) {
168 pos_--; 179 pos_--;
169 ASSERT(pos_ >= Scanner::kCharacterLookaheadBufferSize); 180 ASSERT(pos_ >= Scanner::kCharacterLookaheadBufferSize);
170 ASSERT(raw_data_[pos_ - Scanner::kCharacterLookaheadBufferSize] == ch); 181 ASSERT(raw_data_[pos_ - Scanner::kCharacterLookaheadBufferSize] == ch);
171 } 182 }
172 183
184
173 void TwoByteStringUTF16Buffer::SeekForward(int pos) { 185 void TwoByteStringUTF16Buffer::SeekForward(int pos) {
174 pos_ = pos; 186 pos_ = pos;
175 } 187 }
176 188
189
177 // ---------------------------------------------------------------------------- 190 // ----------------------------------------------------------------------------
178 // Keyword Matcher 191 // Keyword Matcher
179 KeywordMatcher::FirstState KeywordMatcher::first_states_[] = { { "break", 192 KeywordMatcher::FirstState KeywordMatcher::first_states_[] = {
180 KEYWORD_PREFIX, Token::BREAK }, { NULL, C, Token::ILLEGAL }, { NULL, D, 193 { "break", KEYWORD_PREFIX, Token::BREAK },
181 Token::ILLEGAL }, { "else", KEYWORD_PREFIX, Token::ELSE }, { NULL, F, 194 { NULL, C, Token::ILLEGAL },
182 Token::ILLEGAL }, { NULL, UNMATCHABLE, Token::ILLEGAL }, { NULL, 195 { NULL, D, Token::ILLEGAL },
183 UNMATCHABLE, Token::ILLEGAL }, { NULL, I, Token::ILLEGAL }, { NULL, 196 { "else", KEYWORD_PREFIX, Token::ELSE },
184 UNMATCHABLE, Token::ILLEGAL }, { NULL, UNMATCHABLE, Token::ILLEGAL }, { 197 { NULL, F, Token::ILLEGAL },
185 NULL, UNMATCHABLE, Token::ILLEGAL }, { NULL, UNMATCHABLE, Token::ILLEGAL }, 198 { NULL, UNMATCHABLE, Token::ILLEGAL },
186 { NULL, N, Token::ILLEGAL }, { NULL, UNMATCHABLE, Token::ILLEGAL }, { NULL, 199 { NULL, UNMATCHABLE, Token::ILLEGAL },
187 UNMATCHABLE, Token::ILLEGAL }, { NULL, UNMATCHABLE, Token::ILLEGAL }, { 200 { NULL, I, Token::ILLEGAL },
188 "return", KEYWORD_PREFIX, Token::RETURN }, { "switch", KEYWORD_PREFIX, 201 { NULL, UNMATCHABLE, Token::ILLEGAL },
189 Token::SWITCH }, { NULL, T, Token::ILLEGAL }, { NULL, UNMATCHABLE, 202 { NULL, UNMATCHABLE, Token::ILLEGAL },
190 Token::ILLEGAL }, { NULL, V, Token::ILLEGAL }, { NULL, W, 203 { NULL, UNMATCHABLE, Token::ILLEGAL },
191 Token::ILLEGAL } }; 204 { NULL, UNMATCHABLE, Token::ILLEGAL },
205 { NULL, N, Token::ILLEGAL },
206 { NULL, UNMATCHABLE, Token::ILLEGAL },
207 { NULL, UNMATCHABLE, Token::ILLEGAL },
208 { NULL, UNMATCHABLE, Token::ILLEGAL },
209 { "return", KEYWORD_PREFIX, Token::RETURN },
210 { "switch", KEYWORD_PREFIX, Token::SWITCH },
211 { NULL, T, Token::ILLEGAL },
212 { NULL, UNMATCHABLE, Token::ILLEGAL },
213 { NULL, V, Token::ILLEGAL },
214 { NULL, W, Token::ILLEGAL }
215 };
216
192 217
193 void KeywordMatcher::Step(uc32 input) { 218 void KeywordMatcher::Step(uc32 input) {
194 switch (state_) { 219 switch (state_) {
195 case INITIAL: { 220 case INITIAL: {
196 // matching the first character is the only state with significant fanout. 221 // matching the first character is the only state with significant fanout.
197 // Match only lower-case letters in range 'b'..'w'. 222 // Match only lower-case letters in range 'b'..'w'.
198 unsigned int offset = input - kFirstCharRangeMin; 223 unsigned int offset = input - kFirstCharRangeMin;
199 if (offset < kFirstCharRangeLength) { 224 if (offset < kFirstCharRangeLength) {
200 state_ = first_states_[offset].state; 225 state_ = first_states_[offset].state;
201 if (state_ == KEYWORD_PREFIX) { 226 if (state_ == KEYWORD_PREFIX) {
(...skipping 13 matching lines...) Expand all
215 state_ = KEYWORD_MATCHED; 240 state_ = KEYWORD_MATCHED;
216 token_ = keyword_token_; 241 token_ = keyword_token_;
217 } 242 }
218 return; 243 return;
219 } 244 }
220 break; 245 break;
221 case KEYWORD_MATCHED: 246 case KEYWORD_MATCHED:
222 token_ = Token::IDENTIFIER; 247 token_ = Token::IDENTIFIER;
223 break; 248 break;
224 case C: 249 case C:
225 if (MatchState(input, 'a', CA)) 250 if (MatchState(input, 'a', CA)) return;
226 return; 251 if (MatchState(input, 'o', CO)) return;
227 if (MatchState(input, 'o', CO))
228 return;
229 break; 252 break;
230 case CA: 253 case CA:
231 if (MatchKeywordStart(input, "case", 2, Token::CASE)) 254 if (MatchKeywordStart(input, "case", 2, Token::CASE)) return;
232 return; 255 if (MatchKeywordStart(input, "catch", 2, Token::CATCH)) return;
233 if (MatchKeywordStart(input, "catch", 2, Token::CATCH))
234 return;
235 break; 256 break;
236 case CO: 257 case CO:
237 if (MatchState(input, 'n', CON)) 258 if (MatchState(input, 'n', CON)) return;
238 return;
239 break; 259 break;
240 case CON: 260 case CON:
241 if (MatchKeywordStart(input, "const", 3, Token::CONST)) 261 if (MatchKeywordStart(input, "const", 3, Token::CONST)) return;
242 return; 262 if (MatchKeywordStart(input, "continue", 3, Token::CONTINUE)) return;
243 if (MatchKeywordStart(input, "continue", 3, Token::CONTINUE))
244 return;
245 break; 263 break;
246 case D: 264 case D:
247 if (MatchState(input, 'e', DE)) 265 if (MatchState(input, 'e', DE)) return;
248 return; 266 if (MatchKeyword(input, 'o', KEYWORD_MATCHED, Token::DO)) return;
249 if (MatchKeyword(input, 'o', KEYWORD_MATCHED, Token::DO))
250 return;
251 break; 267 break;
252 case DE: 268 case DE:
253 if (MatchKeywordStart(input, "debugger", 2, Token::DEBUGGER)) 269 if (MatchKeywordStart(input, "debugger", 2, Token::DEBUGGER)) return;
254 return; 270 if (MatchKeywordStart(input, "default", 2, Token::DEFAULT)) return;
255 if (MatchKeywordStart(input, "default", 2, Token::DEFAULT)) 271 if (MatchKeywordStart(input, "delete", 2, Token::DELETE)) return;
256 return;
257 if (MatchKeywordStart(input, "delete", 2, Token::DELETE))
258 return;
259 break; 272 break;
260 case F: 273 case F:
261 if (MatchKeywordStart(input, "false", 1, Token::FALSE_LITERAL)) 274 if (MatchKeywordStart(input, "false", 1, Token::FALSE_LITERAL)) return;
262 return; 275 if (MatchKeywordStart(input, "finally", 1, Token::FINALLY)) return;
263 if (MatchKeywordStart(input, "finally", 1, Token::FINALLY)) 276 if (MatchKeywordStart(input, "for", 1, Token::FOR)) return;
264 return; 277 if (MatchKeywordStart(input, "function", 1, Token::FUNCTION)) return;
265 if (MatchKeywordStart(input, "for", 1, Token::FOR))
266 return;
267 if (MatchKeywordStart(input, "function", 1, Token::FUNCTION))
268 return;
269 break; 278 break;
270 case I: 279 case I:
271 if (MatchKeyword(input, 'f', KEYWORD_MATCHED, Token::IF)) 280 if (MatchKeyword(input, 'f', KEYWORD_MATCHED, Token::IF)) return;
272 return; 281 if (MatchKeyword(input, 'n', IN, Token::IN)) return;
273 if (MatchKeyword(input, 'n', IN, Token::IN))
274 return;
275 break; 282 break;
276 case IN: 283 case IN:
277 token_ = Token::IDENTIFIER; 284 token_ = Token::IDENTIFIER;
278 if (MatchKeywordStart(input, "instanceof", 2, Token::INSTANCEOF)) { 285 if (MatchKeywordStart(input, "instanceof", 2, Token::INSTANCEOF)) {
279 return; 286 return;
280 } 287 }
281 break; 288 break;
282 case N: 289 case N:
283 if (MatchKeywordStart(input, "native", 1, Token::NATIVE)) 290 if (MatchKeywordStart(input, "native", 1, Token::NATIVE)) return;
284 return; 291 if (MatchKeywordStart(input, "new", 1, Token::NEW)) return;
285 if (MatchKeywordStart(input, "new", 1, Token::NEW)) 292 if (MatchKeywordStart(input, "null", 1, Token::NULL_LITERAL)) return;
286 return;
287 if (MatchKeywordStart(input, "null", 1, Token::NULL_LITERAL))
288 return;
289 break; 293 break;
290 case T: 294 case T:
291 if (MatchState(input, 'h', TH)) 295 if (MatchState(input, 'h', TH)) return;
292 return; 296 if (MatchState(input, 'r', TR)) return;
293 if (MatchState(input, 'r', TR)) 297 if (MatchKeywordStart(input, "typeof", 1, Token::TYPEOF)) return;
294 return;
295 if (MatchKeywordStart(input, "typeof", 1, Token::TYPEOF))
296 return;
297 break; 298 break;
298 case TH: 299 case TH:
299 if (MatchKeywordStart(input, "this", 2, Token::THIS)) 300 if (MatchKeywordStart(input, "this", 2, Token::THIS)) return;
300 return; 301 if (MatchKeywordStart(input, "throw", 2, Token::THROW)) return;
301 if (MatchKeywordStart(input, "throw", 2, Token::THROW))
302 return;
303 break; 302 break;
304 case TR: 303 case TR:
305 if (MatchKeywordStart(input, "true", 2, Token::TRUE_LITERAL)) 304 if (MatchKeywordStart(input, "true", 2, Token::TRUE_LITERAL)) return;
306 return; 305 if (MatchKeyword(input, 'y', KEYWORD_MATCHED, Token::TRY)) return;
307 if (MatchKeyword(input, 'y', KEYWORD_MATCHED, Token::TRY))
308 return;
309 break; 306 break;
310 case V: 307 case V:
311 if (MatchKeywordStart(input, "var", 1, Token::VAR)) 308 if (MatchKeywordStart(input, "var", 1, Token::VAR)) return;
312 return; 309 if (MatchKeywordStart(input, "void", 1, Token::VOID)) return;
313 if (MatchKeywordStart(input, "void", 1, Token::VOID))
314 return;
315 break; 310 break;
316 case W: 311 case W:
317 if (MatchKeywordStart(input, "while", 1, Token::WHILE)) 312 if (MatchKeywordStart(input, "while", 1, Token::WHILE)) return;
318 return; 313 if (MatchKeywordStart(input, "with", 1, Token::WITH)) return;
319 if (MatchKeywordStart(input, "with", 1, Token::WITH))
320 return;
321 break; 314 break;
322 default: 315 default:
323 UNREACHABLE(); 316 UNREACHABLE();
324 } 317 }
325 // On fallthrough, it's a failure. 318 // On fallthrough, it's a failure.
326 state_ = UNMATCHABLE; 319 state_ = UNMATCHABLE;
327 } 320 }
328 321
322
329 // ---------------------------------------------------------------------------- 323 // ----------------------------------------------------------------------------
330 // Scanner 324 // Scanner
331 325
332 Scanner::Scanner(bool pre) : 326 Scanner::Scanner(bool pre) : stack_overflow_(false), is_pre_parsing_(pre) { }
333 stack_overflow_(false), is_pre_parsing_(pre) { 327
334 }
335 328
336 void Scanner::Init(Handle<String> source, unibrow::CharacterStream* stream, 329 void Scanner::Init(Handle<String> source, unibrow::CharacterStream* stream,
337 int position) { 330 int position) {
338 // Initialize the source buffer. 331 // Initialize the source buffer.
339 if (!source.is_null() && StringShape(*source).IsExternalTwoByte()) { 332 if (!source.is_null() && StringShape(*source).IsExternalTwoByte()) {
340 two_byte_string_buffer_.Initialize( 333 two_byte_string_buffer_.Initialize(
341 Handle<ExternalTwoByteString>::cast( 334 Handle<ExternalTwoByteString>::cast(source));
342 sourc e));
343 source_ = &two_byte_string_buffer_; 335 source_ = &two_byte_string_buffer_;
344 } else { 336 } else {
345 char_stream_buffer_.Initialize(source, stream); 337 char_stream_buffer_.Initialize(source, stream);
346 source_ = &char_stream_buffer_; 338 source_ = &char_stream_buffer_;
347 } 339 }
348 340
349 position_ = position; 341 position_ = position;
350 342
351 // Set c0_ (one character ahead) 343 // Set c0_ (one character ahead)
352 ASSERT(kCharacterLookaheadBufferSize == 1); 344 ASSERT(kCharacterLookaheadBufferSize == 1);
353 Advance(); 345 Advance();
354 346
355 // Skip initial whitespace allowing HTML comment ends just like 347 // Skip initial whitespace allowing HTML comment ends just like
356 // after a newline and scan first token. 348 // after a newline and scan first token.
357 has_line_terminator_before_next_ = true; 349 has_line_terminator_before_next_ = true;
358 SkipWhiteSpace(); 350 SkipWhiteSpace();
359 Scan(); 351 Scan();
360 } 352 }
361 353
354
362 Handle<String> Scanner::SubString(int start, int end) { 355 Handle<String> Scanner::SubString(int start, int end) {
363 return source_->SubString(start - position_, end - position_); 356 return source_->SubString(start - position_, end - position_);
364 } 357 }
365 358
359
366 Token::Value Scanner::Next() { 360 Token::Value Scanner::Next() {
367 // BUG 1215673: Find a thread safe way to set a stack limit in 361 // BUG 1215673: Find a thread safe way to set a stack limit in
368 // pre-parse mode. Otherwise, we cannot safely pre-parse from other 362 // pre-parse mode. Otherwise, we cannot safely pre-parse from other
369 // threads. 363 // threads.
370 current_ = next_; 364 current_ = next_;
371 // Check for stack-overflow before returning any tokens. 365 // Check for stack-overflow before returning any tokens.
372 StackLimitCheck check; 366 StackLimitCheck check;
373 if (check.HasOverflowed()) { 367 if (check.HasOverflowed()) {
374 stack_overflow_ = true; 368 stack_overflow_ = true;
375 next_.token = Token::ILLEGAL; 369 next_.token = Token::ILLEGAL;
376 next_.literal_buffer = NULL; 370 next_.literal_buffer = NULL;
377 } else { 371 } else {
378 Scan(); 372 Scan();
379 } 373 }
380 return current_.token; 374 return current_.token;
381 } 375 }
382 376
377
383 void Scanner::StartLiteral() { 378 void Scanner::StartLiteral() {
384 // Use the first buffer unless it's currently in use by the current_ token. 379 // Use the first buffer unless it's currently in use by the current_ token.
385 // In most cases we won't have two literals/identifiers in a row, so 380 // In most cases, we won't have two literals/identifiers in a row so
386 // the second buffer won't be used very often and is unlikely to grow much. 381 // the second buffer won't be used very often and is unlikely to grow much.
387 UTF8Buffer* free_buffer = 382 UTF8Buffer* free_buffer =
388 (current_.literal_buffer != &literal_buffer_1_) ? &literal_buffer_1_ 383 (current_.literal_buffer != &literal_buffer_1_) ? &literal_buffer_1_
389 : &literal_buffer_2_; 384 : &literal_buffer_2_;
390 next_.literal_buffer = free_buffer; 385 next_.literal_buffer = free_buffer;
391 free_buffer->Reset(); 386 free_buffer->Reset();
392 } 387 }
393 388
389
394 void Scanner::AddChar(uc32 c) { 390 void Scanner::AddChar(uc32 c) {
395 next_.literal_buffer->AddChar(c); 391 next_.literal_buffer->AddChar(c);
396 } 392 }
397 393
394
398 void Scanner::TerminateLiteral() { 395 void Scanner::TerminateLiteral() {
399 AddChar(0); 396 AddChar(0);
400 } 397 }
401 398
399
402 void Scanner::AddCharAdvance() { 400 void Scanner::AddCharAdvance() {
403 AddChar(c0_); 401 AddChar(c0_);
404 Advance(); 402 Advance();
405 } 403 }
406 404
405
407 static inline bool IsByteOrderMark(uc32 c) { 406 static inline bool IsByteOrderMark(uc32 c) {
408 // The Unicode value U+FFFE is guaranteed never to be assigned as a 407 // The Unicode value U+FFFE is guaranteed never to be assigned as a
409 // Unicode character; this implies that in a Unicode context the 408 // Unicode character; this implies that in a Unicode context the
410 // 0xFF, 0xFE byte pattern can only be interpreted as the U+FEFF 409 // 0xFF, 0xFE byte pattern can only be interpreted as the U+FEFF
411 // character expressed in little-endian byte order (since it could 410 // character expressed in little-endian byte order (since it could
412 // not be a U+FFFE character expressed in big-endian byte 411 // not be a U+FFFE character expressed in big-endian byte
413 // order). Nevertheless, we check for it to be compatible with 412 // order). Nevertheless, we check for it to be compatible with
414 // Spidermonkey. 413 // Spidermonkey.
415 return c == 0xFEFF || c == 0xFFFE; 414 return c == 0xFEFF || c == 0xFFFE;
416 } 415 }
417 416
417
418 bool Scanner::SkipWhiteSpace() { 418 bool Scanner::SkipWhiteSpace() {
419 int start_position = source_pos(); 419 int start_position = source_pos();
420 420
421 while (true) { 421 while (true) {
422 // We treat byte-order marks (BOMs) as whitespace for better 422 // We treat byte-order marks (BOMs) as whitespace for better
423 // compatibility with Spidermonkey and other JavaScript engines. 423 // compatibility with Spidermonkey and other JavaScript engines.
424 while (kIsWhiteSpace.get(c0_) || IsByteOrderMark(c0_)) { 424 while (kIsWhiteSpace.get(c0_) || IsByteOrderMark(c0_)) {
425 // IsWhiteSpace() includes line terminators! 425 // IsWhiteSpace() includes line terminators!
426 if (kIsLineTerminator.get(c0_)) { 426 if (kIsLineTerminator.get(c0_)) {
427 // Ignore line terminators, but remember them. This is necessary 427 // Ignore line terminators, but remember them. This is necessary
(...skipping 10 matching lines...) Expand all
438 if (c0_ == '-' && has_line_terminator_before_next_) { 438 if (c0_ == '-' && has_line_terminator_before_next_) {
439 Advance(); 439 Advance();
440 if (c0_ == '-') { 440 if (c0_ == '-') {
441 Advance(); 441 Advance();
442 if (c0_ == '>') { 442 if (c0_ == '>') {
443 // Treat the rest of the line as a comment. 443 // Treat the rest of the line as a comment.
444 SkipSingleLineComment(); 444 SkipSingleLineComment();
445 // Continue skipping white space after the comment. 445 // Continue skipping white space after the comment.
446 continue; 446 continue;
447 } 447 }
448 PushBack('-'); // undo Advance() 448 PushBack('-'); // undo Advance()
449 } 449 }
450 PushBack('-'); // undo Advance() 450 PushBack('-'); // undo Advance()
451 } 451 }
452 // Return whether or not we skipped any characters. 452 // Return whether or not we skipped any characters.
453 return source_pos() != start_position; 453 return source_pos() != start_position;
454 } 454 }
455 } 455 }
456 456
457
457 Token::Value Scanner::SkipSingleLineComment() { 458 Token::Value Scanner::SkipSingleLineComment() {
458 Advance(); 459 Advance();
459 460
460 // The line terminator at the end of the line is not considered 461 // The line terminator at the end of the line is not considered
461 // to be part of the single-line comment; it is recognized 462 // to be part of the single-line comment; it is recognized
462 // separately by the lexical grammar and becomes part of the 463 // separately by the lexical grammar and becomes part of the
463 // stream of input elements for the syntactic grammar (see 464 // stream of input elements for the syntactic grammar (see
464 // ECMA-262, section 7.4, page 12). 465 // ECMA-262, section 7.4, page 12).
465 while (c0_ >= 0 && !kIsLineTerminator.get(c0_)) { 466 while (c0_ >= 0 && !kIsLineTerminator.get(c0_)) {
466 Advance(); 467 Advance();
467 } 468 }
468 469
469 return Token::WHITESPACE; 470 return Token::WHITESPACE;
470 } 471 }
471 472
473
472 Token::Value Scanner::SkipMultiLineComment() { 474 Token::Value Scanner::SkipMultiLineComment() {
473 ASSERT(c0_ == '*'); 475 ASSERT(c0_ == '*');
474 Advance(); 476 Advance();
475 477
476 while (c0_ >= 0) { 478 while (c0_ >= 0) {
477 char ch = c0_; 479 char ch = c0_;
478 Advance(); 480 Advance();
479 // If we have reached the end of the multi-line comment, we 481 // If we have reached the end of the multi-line comment, we
480 // consume the '/' and insert a whitespace. This way all 482 // consume the '/' and insert a whitespace. This way all
481 // multi-line comments are treated as whitespace - even the ones 483 // multi-line comments are treated as whitespace - even the ones
482 // containing line terminators. This contradicts ECMA-262, section 484 // containing line terminators. This contradicts ECMA-262, section
483 // 7.4, page 12, that says that multi-line comments containing 485 // 7.4, page 12, that says that multi-line comments containing
484 // line terminators should be treated as a line terminator, but it 486 // line terminators should be treated as a line terminator, but it
485 // matches the behaviour of SpiderMonkey and KJS. 487 // matches the behaviour of SpiderMonkey and KJS.
486 if (ch == '*' && c0_ == '/') { 488 if (ch == '*' && c0_ == '/') {
487 c0_ = ' '; 489 c0_ = ' ';
488 return Token::WHITESPACE; 490 return Token::WHITESPACE;
489 } 491 }
490 } 492 }
491 493
492 // Unterminated multi-line comment. 494 // Unterminated multi-line comment.
493 return Token::ILLEGAL; 495 return Token::ILLEGAL;
494 } 496 }
495 497
498
496 Token::Value Scanner::ScanHtmlComment() { 499 Token::Value Scanner::ScanHtmlComment() {
497 // Check for <!-- comments. 500 // Check for <!-- comments.
498 ASSERT(c0_ == '!'); 501 ASSERT(c0_ == '!');
499 Advance(); 502 Advance();
500 if (c0_ == '-') { 503 if (c0_ == '-') {
501 Advance(); 504 Advance();
502 if (c0_ == '-') 505 if (c0_ == '-') return SkipSingleLineComment();
503 return SkipSingleLineComment(); 506 PushBack('-'); // undo Advance()
504 PushBack('-'); // undo Advance()
505 } 507 }
506 PushBack('!'); // undo Advance() 508 PushBack('!'); // undo Advance()
507 ASSERT(c0_ == '!'); 509 ASSERT(c0_ == '!');
508 return Token::LT; 510 return Token::LT;
509 } 511 }
510 512
513
511 void Scanner::Scan() { 514 void Scanner::Scan() {
512 Token::Value token; 515 Token::Value token;
513 has_line_terminator_before_next_ = false; 516 has_line_terminator_before_next_ = false;
514 do { 517 do {
515 // Remember the position of the next token 518 // Remember the position of the next token
516 next_.location.beg_pos = source_pos(); 519 next_.location.beg_pos = source_pos();
517 520
518 switch (c0_) { 521 switch (c0_) {
519 case ' ': 522 case ' ':
520 case '\t': 523 case '\t':
521 Advance(); 524 Advance();
522 token = Token::WHITESPACE; 525 token = Token::WHITESPACE;
523 break; 526 break;
524 527
525 case '\n': 528 case '\n':
526 Advance(); 529 Advance();
527 has_line_terminator_before_next_ = true; 530 has_line_terminator_before_next_ = true;
528 token = Token::WHITESPACE; 531 token = Token::WHITESPACE;
529 break; 532 break;
530 533
531 case '"': 534 case '"': case '\'':
532 case '\'':
533 token = ScanString(); 535 token = ScanString();
534 break; 536 break;
535 537
536 case '<': 538 case '<':
537 // < <= << <<= <!-- 539 // < <= << <<= <!--
538 Advance(); 540 Advance();
539 if (c0_ == '=') { 541 if (c0_ == '=') {
540 token = Select(Token::LTE); 542 token = Select(Token::LTE);
541 } else if (c0_ == '<') { 543 } else if (c0_ == '<') {
542 token = Select('=', Token::ASSIGN_SHL, Token::SHL); 544 token = Select('=', Token::ASSIGN_SHL, Token::SHL);
(...skipping 198 matching lines...) Expand 10 before | Expand all | Expand 10 after
741 } 743 }
742 744
743 // Continue scanning for tokens as long as we're just skipping 745 // Continue scanning for tokens as long as we're just skipping
744 // whitespace. 746 // whitespace.
745 } while (token == Token::WHITESPACE); 747 } while (token == Token::WHITESPACE);
746 748
747 next_.location.end_pos = source_pos(); 749 next_.location.end_pos = source_pos();
748 next_.token = token; 750 next_.token = token;
749 } 751 }
750 752
753
751 void Scanner::SeekForward(int pos) { 754 void Scanner::SeekForward(int pos) {
752 source_->SeekForward(pos - 1); 755 source_->SeekForward(pos - 1);
753 Advance(); 756 Advance();
754 Scan(); 757 Scan();
755 } 758 }
756 759
760
757 uc32 Scanner::ScanHexEscape(uc32 c, int length) { 761 uc32 Scanner::ScanHexEscape(uc32 c, int length) {
758 ASSERT(length <= 4); // prevent overflow 762 ASSERT(length <= 4); // prevent overflow
759 763
760 uc32 digits[4]; 764 uc32 digits[4];
761 uc32 x = 0; 765 uc32 x = 0;
762 for (int i = 0; i < length; i++) { 766 for (int i = 0; i < length; i++) {
763 digits[i] = c0_; 767 digits[i] = c0_;
764 int d = HexValue(c0_); 768 int d = HexValue(c0_);
765 if (d < 0) { 769 if (d < 0) {
766 // According to ECMA-262, 3rd, 7.8.4, page 18, these hex escapes 770 // According to ECMA-262, 3rd, 7.8.4, page 18, these hex escapes
767 // should be illegal, but other JS VMs just return the 771 // should be illegal, but other JS VMs just return the
768 // non-escaped version of the original character. 772 // non-escaped version of the original character.
769 773
770 // Push back digits read, except the last one (in c0_). 774 // Push back digits read, except the last one (in c0_).
771 for (int j = i - 1; j >= 0; j--) { 775 for (int j = i-1; j >= 0; j--) {
772 PushBack(digits[j]); 776 PushBack(digits[j]);
773 } 777 }
774 // Notice: No handling of error - treat it as "\u"->"u". 778 // Notice: No handling of error - treat it as "\u"->"u".
775 return c; 779 return c;
776 } 780 }
777 x = x * 16 + d; 781 x = x * 16 + d;
778 Advance(); 782 Advance();
779 } 783 }
780 784
781 return x; 785 return x;
782 } 786 }
783 787
788
784 // Octal escapes of the forms '\0xx' and '\xxx' are not a part of 789 // Octal escapes of the forms '\0xx' and '\xxx' are not a part of
785 // ECMA-262. Other JS VMs support them. 790 // ECMA-262. Other JS VMs support them.
786 uc32 Scanner::ScanOctalEscape(uc32 c, int length) { 791 uc32 Scanner::ScanOctalEscape(uc32 c, int length) {
787 uc32 x = c - '0'; 792 uc32 x = c - '0';
788 for (int i = 0; i < length; i++) { 793 for (int i = 0; i < length; i++) {
789 int d = c0_ - '0'; 794 int d = c0_ - '0';
790 if (d < 0 || d > 7) 795 if (d < 0 || d > 7) break;
791 break;
792 int nx = x * 8 + d; 796 int nx = x * 8 + d;
793 if (nx >= 256) 797 if (nx >= 256) break;
794 break;
795 x = nx; 798 x = nx;
796 Advance(); 799 Advance();
797 } 800 }
798 return x; 801 return x;
799 } 802 }
800 803
804
801 void Scanner::ScanEscape() { 805 void Scanner::ScanEscape() {
802 uc32 c = c0_; 806 uc32 c = c0_;
803 Advance(); 807 Advance();
804 808
805 // Skip escaped newlines. 809 // Skip escaped newlines.
806 if (kIsLineTerminator.get(c)) { 810 if (kIsLineTerminator.get(c)) {
807 // Allow CR+LF newlines in multiline string literals. 811 // Allow CR+LF newlines in multiline string literals.
808 if (IsCarriageReturn(c) && IsLineFeed(c0_)) 812 if (IsCarriageReturn(c) && IsLineFeed(c0_)) Advance();
809 Advance();
810 // Allow LF+CR newlines in multiline string literals. 813 // Allow LF+CR newlines in multiline string literals.
811 if (IsLineFeed(c) && IsCarriageReturn(c0_)) 814 if (IsLineFeed(c) && IsCarriageReturn(c0_)) Advance();
812 Advance();
813 return; 815 return;
814 } 816 }
815 817
816 switch (c) { 818 switch (c) {
817 case '\'': // fall through 819 case '\'': // fall through
818 case '"': // fall through 820 case '"' : // fall through
819 case '\\': 821 case '\\': break;
820 break; 822 case 'b' : c = '\b'; break;
821 case 'b': 823 case 'f' : c = '\f'; break;
822 c = '\b'; 824 case 'n' : c = '\n'; break;
823 break; 825 case 'r' : c = '\r'; break;
824 case 'f': 826 case 't' : c = '\t'; break;
825 c = '\f'; 827 case 'u' : c = ScanHexEscape(c, 4); break;
826 break; 828 case 'v' : c = '\v'; break;
827 case 'n': 829 case 'x' : c = ScanHexEscape(c, 2); break;
828 c = '\n'; 830 case '0' : // fall through
829 break; 831 case '1' : // fall through
830 case 'r': 832 case '2' : // fall through
831 c = '\r'; 833 case '3' : // fall through
832 break; 834 case '4' : // fall through
833 case 't': 835 case '5' : // fall through
834 c = '\t'; 836 case '6' : // fall through
835 break; 837 case '7' : c = ScanOctalEscape(c, 2); break;
836 case 'u':
837 c = ScanHexEscape(c, 4);
838 break;
839 case 'v':
840 c = '\v';
841 break;
842 case 'x':
843 c = ScanHexEscape(c, 2);
844 break;
845 case '0': // fall through
846 case '1': // fall through
847 case '2': // fall through
848 case '3': // fall through
849 case '4': // fall through
850 case '5': // fall through
851 case '6': // fall through
852 case '7':
853 c = ScanOctalEscape(c, 2);
854 break;
855 } 838 }
856 839
857 // According to ECMA-262, 3rd, 7.8.4 (p 18ff) these 840 // According to ECMA-262, 3rd, 7.8.4 (p 18ff) these
858 // should be illegal, but they are commonly handled 841 // should be illegal, but they are commonly handled
859 // as non-escaped characters by JS VMs. 842 // as non-escaped characters by JS VMs.
860 AddChar(c); 843 AddChar(c);
861 } 844 }
862 845
846
863 Token::Value Scanner::ScanString() { 847 Token::Value Scanner::ScanString() {
864 uc32 quote = c0_; 848 uc32 quote = c0_;
865 Advance(); // consume quote 849 Advance(); // consume quote
866 850
867 StartLiteral(); 851 StartLiteral();
868 while (c0_ != quote && c0_ >= 0 && !kIsLineTerminator.get(c0_)) { 852 while (c0_ != quote && c0_ >= 0 && !kIsLineTerminator.get(c0_)) {
869 uc32 c = c0_; 853 uc32 c = c0_;
870 Advance(); 854 Advance();
871 if (c == '\\') { 855 if (c == '\\') {
872 if (c0_ < 0) 856 if (c0_ < 0) return Token::ILLEGAL;
873 return Token::ILLEGAL;
874 ScanEscape(); 857 ScanEscape();
875 } else { 858 } else {
876 AddChar(c); 859 AddChar(c);
877 } 860 }
878 } 861 }
879 if (c0_ != quote) { 862 if (c0_ != quote) {
880 return Token::ILLEGAL; 863 return Token::ILLEGAL;
881 } 864 }
882 TerminateLiteral(); 865 TerminateLiteral();
883 866
884 Advance(); // consume quote 867 Advance(); // consume quote
885 return Token::STRING; 868 return Token::STRING;
886 } 869 }
887 870
871
888 Token::Value Scanner::Select(Token::Value tok) { 872 Token::Value Scanner::Select(Token::Value tok) {
889 Advance(); 873 Advance();
890 return tok; 874 return tok;
891 } 875 }
892 876
877
893 Token::Value Scanner::Select(uc32 next, Token::Value then, Token::Value else_) { 878 Token::Value Scanner::Select(uc32 next, Token::Value then, Token::Value else_) {
894 Advance(); 879 Advance();
895 if (c0_ == next) { 880 if (c0_ == next) {
896 Advance(); 881 Advance();
897 return then; 882 return then;
898 } else { 883 } else {
899 return else_; 884 return else_;
900 } 885 }
901 } 886 }
902 887
888
903 // Returns true if any decimal digits were scanned, returns false otherwise. 889 // Returns true if any decimal digits were scanned, returns false otherwise.
904 void Scanner::ScanDecimalDigits() { 890 void Scanner::ScanDecimalDigits() {
905 while (IsDecimalDigit(c0_)) 891 while (IsDecimalDigit(c0_))
906 AddCharAdvance(); 892 AddCharAdvance();
907 } 893 }
908 894
895
909 Token::Value Scanner::ScanNumber(bool seen_period) { 896 Token::Value Scanner::ScanNumber(bool seen_period) {
910 ASSERT(IsDecimalDigit(c0_)); // the first digit of the number or the fraction 897 ASSERT(IsDecimalDigit(c0_)); // the first digit of the number or the fraction
911 898
912 enum { 899 enum { DECIMAL, HEX, OCTAL } kind = DECIMAL;
913 DECIMAL, HEX, OCTAL
914 } kind = DECIMAL;
915 900
916 StartLiteral(); 901 StartLiteral();
917 if (seen_period) { 902 if (seen_period) {
918 // we have already seen a decimal point of the float 903 // we have already seen a decimal point of the float
919 AddChar('.'); 904 AddChar('.');
920 ScanDecimalDigits(); // we know we have at least one digit 905 ScanDecimalDigits(); // we know we have at least one digit
921 906
922 } else { 907 } else {
923 // if the first character is '0' we must check for octals and hex 908 // if the first character is '0' we must check for octals and hex
924 if (c0_ == '0') { 909 if (c0_ == '0') {
925 AddCharAdvance(); 910 AddCharAdvance();
926 911
927 // either 0, 0exxx, 0Exxx, 0.xxx, an octal number, or a hex number 912 // either 0, 0exxx, 0Exxx, 0.xxx, an octal number, or a hex number
928 if (c0_ == 'x' || c0_ == 'X') { 913 if (c0_ == 'x' || c0_ == 'X') {
929 // hex number 914 // hex number
930 kind = HEX; 915 kind = HEX;
931 AddCharAdvance(); 916 AddCharAdvance();
932 if (!IsHexDigit(c0_)) 917 if (!IsHexDigit(c0_))
933 // we must have at least one hex digit after 'x'/'X' 918 // we must have at least one hex digit after 'x'/'X'
934 return Token::ILLEGAL; 919 return Token::ILLEGAL;
935 while (IsHexDigit(c0_)) 920 while (IsHexDigit(c0_))
936 AddCharAdvance(); 921 AddCharAdvance();
937 922
938 } else if ('0' <= c0_ && c0_ <= '7') { 923 } else if ('0' <= c0_ && c0_ <= '7') {
939 // (possible) octal number 924 // (possible) octal number
940 kind = OCTAL; 925 kind = OCTAL;
941 while (true) { 926 while (true) {
942 if (c0_ == '8' || c0_ == '9') { 927 if (c0_ == '8' || c0_ == '9') {
943 kind = DECIMAL; 928 kind = DECIMAL;
944 break; 929 break;
945 } 930 }
946 if (c0_ < '0' || '7' < c0_) 931 if (c0_ < '0' || '7' < c0_) break;
947 break;
948 AddCharAdvance(); 932 AddCharAdvance();
949 } 933 }
950 } 934 }
951 } 935 }
952 936
953 // Parse decimal digits and allow trailing fractional part. 937 // Parse decimal digits and allow trailing fractional part.
954 if (kind == DECIMAL) { 938 if (kind == DECIMAL) {
955 ScanDecimalDigits(); // optional 939 ScanDecimalDigits(); // optional
956 if (c0_ == '.') { 940 if (c0_ == '.') {
957 AddCharAdvance(); 941 AddCharAdvance();
958 ScanDecimalDigits(); // optional 942 ScanDecimalDigits(); // optional
959 } 943 }
960 } 944 }
961 } 945 }
962 946
963 // scan exponent, if any 947 // scan exponent, if any
964 if (c0_ == 'e' || c0_ == 'E') { 948 if (c0_ == 'e' || c0_ == 'E') {
965 ASSERT(kind != HEX); // 'e'/'E' must be scanned as part of the hex number 949 ASSERT(kind != HEX); // 'e'/'E' must be scanned as part of the hex number
966 if (kind == OCTAL) 950 if (kind == OCTAL) return Token::ILLEGAL; // no exponent for octals allowed
967 return Token::ILLEGAL; // no exponent for octals allowed
968 // scan exponent 951 // scan exponent
969 AddCharAdvance(); 952 AddCharAdvance();
970 if (c0_ == '+' || c0_ == '-') 953 if (c0_ == '+' || c0_ == '-')
971 AddCharAdvance(); 954 AddCharAdvance();
972 if (!IsDecimalDigit(c0_)) 955 if (!IsDecimalDigit(c0_))
973 // we must have at least one decimal digit after 'e'/'E' 956 // we must have at least one decimal digit after 'e'/'E'
974 return Token::ILLEGAL; 957 return Token::ILLEGAL;
975 ScanDecimalDigits(); 958 ScanDecimalDigits();
976 } 959 }
977 TerminateLiteral(); 960 TerminateLiteral();
978 961
979 // The source character immediately following a numeric literal must 962 // The source character immediately following a numeric literal must
980 // not be an identifier start or a decimal digit; see ECMA-262 963 // not be an identifier start or a decimal digit; see ECMA-262
981 // section 7.8.3, page 17 (note that we read only one decimal digit 964 // section 7.8.3, page 17 (note that we read only one decimal digit
982 // if the value is 0). 965 // if the value is 0).
983 if (IsDecimalDigit(c0_) || kIsIdentifierStart.get(c0_)) 966 if (IsDecimalDigit(c0_) || kIsIdentifierStart.get(c0_))
984 return Token::ILLEGAL; 967 return Token::ILLEGAL;
985 968
986 return Token::NUMBER; 969 return Token::NUMBER;
987 } 970 }
988 971
972
989 uc32 Scanner::ScanIdentifierUnicodeEscape() { 973 uc32 Scanner::ScanIdentifierUnicodeEscape() {
990 Advance(); 974 Advance();
991 if (c0_ != 'u') 975 if (c0_ != 'u') return unibrow::Utf8::kBadChar;
992 return unibrow::Utf8::kBadChar;
993 Advance(); 976 Advance();
994 uc32 c = ScanHexEscape('u', 4); 977 uc32 c = ScanHexEscape('u', 4);
995 // We do not allow a unicode escape sequence to start another 978 // We do not allow a unicode escape sequence to start another
996 // unicode escape sequence. 979 // unicode escape sequence.
997 if (c == '\\') 980 if (c == '\\') return unibrow::Utf8::kBadChar;
998 return unibrow::Utf8::kBadChar;
999 return c; 981 return c;
1000 } 982 }
1001 983
984
1002 Token::Value Scanner::ScanIdentifier() { 985 Token::Value Scanner::ScanIdentifier() {
1003 ASSERT(kIsIdentifierStart.get(c0_)); 986 ASSERT(kIsIdentifierStart.get(c0_));
1004 987
1005 StartLiteral(); 988 StartLiteral();
1006 KeywordMatcher keyword_match; 989 KeywordMatcher keyword_match;
1007 990
1008 // Scan identifier start character. 991 // Scan identifier start character.
1009 if (c0_ == '\\') { 992 if (c0_ == '\\') {
1010 uc32 c = ScanIdentifierUnicodeEscape(); 993 uc32 c = ScanIdentifierUnicodeEscape();
1011 // Only allow legal identifier start characters. 994 // Only allow legal identifier start characters.
1012 if (!kIsIdentifierStart.get(c)) 995 if (!kIsIdentifierStart.get(c)) return Token::ILLEGAL;
1013 return Token::ILLEGAL;
1014 AddChar(c); 996 AddChar(c);
1015 keyword_match.Fail(); 997 keyword_match.Fail();
1016 } else { 998 } else {
1017 AddChar(c0_); 999 AddChar(c0_);
1018 keyword_match.AddChar(c0_); 1000 keyword_match.AddChar(c0_);
1019 Advance(); 1001 Advance();
1020 } 1002 }
1021 1003
1022 // Scan the rest of the identifier characters. 1004 // Scan the rest of the identifier characters.
1023 while (kIsIdentifierPart.get(c0_)) { 1005 while (kIsIdentifierPart.get(c0_)) {
1024 if (c0_ == '\\') { 1006 if (c0_ == '\\') {
1025 uc32 c = ScanIdentifierUnicodeEscape(); 1007 uc32 c = ScanIdentifierUnicodeEscape();
1026 // Only allow legal identifier part characters. 1008 // Only allow legal identifier part characters.
1027 if (!kIsIdentifierPart.get(c)) 1009 if (!kIsIdentifierPart.get(c)) return Token::ILLEGAL;
1028 return Token::ILLEGAL;
1029 AddChar(c); 1010 AddChar(c);
1030 keyword_match.Fail(); 1011 keyword_match.Fail();
1031 } else { 1012 } else {
1032 AddChar(c0_); 1013 AddChar(c0_);
1033 keyword_match.AddChar(c0_); 1014 keyword_match.AddChar(c0_);
1034 Advance(); 1015 Advance();
1035 } 1016 }
1036 } 1017 }
1037 TerminateLiteral(); 1018 TerminateLiteral();
1038 1019
1039 return keyword_match.token(); 1020 return keyword_match.token();
1040 } 1021 }
1041 1022
1023
1024
1042 bool Scanner::IsIdentifier(unibrow::CharacterStream* buffer) { 1025 bool Scanner::IsIdentifier(unibrow::CharacterStream* buffer) {
1043 // Checks whether the buffer contains an identifier (no escape). 1026 // Checks whether the buffer contains an identifier (no escape).
1044 if (!buffer->has_more()) 1027 if (!buffer->has_more()) return false;
1045 return false; 1028 if (!kIsIdentifierStart.get(buffer->GetNext())) return false;
1046 if (!kIsIdentifierStart.get(buffer->GetNext()))
1047 return false;
1048 while (buffer->has_more()) { 1029 while (buffer->has_more()) {
1049 if (!kIsIdentifierPart.get(buffer->GetNext())) 1030 if (!kIsIdentifierPart.get(buffer->GetNext())) return false;
1050 return false;
1051 } 1031 }
1052 return true; 1032 return true;
1053 } 1033 }
1054 1034
1035
1055 bool Scanner::ScanRegExpPattern(bool seen_equal) { 1036 bool Scanner::ScanRegExpPattern(bool seen_equal) {
1056 // Scan: ('/' | '/=') RegularExpressionBody '/' RegularExpressionFlags 1037 // Scan: ('/' | '/=') RegularExpressionBody '/' RegularExpressionFlags
1057 bool in_character_class = false; 1038 bool in_character_class = false;
1058 1039
1059 // Previous token is either '/' or '/=', in the second case, the 1040 // Previous token is either '/' or '/=', in the second case, the
1060 // pattern starts at =. 1041 // pattern starts at =.
1061 next_.location.beg_pos = source_pos() - (seen_equal ? 2 : 1); 1042 next_.location.beg_pos = source_pos() - (seen_equal ? 2 : 1);
1062 next_.location.end_pos = source_pos() - (seen_equal ? 1 : 0); 1043 next_.location.end_pos = source_pos() - (seen_equal ? 1 : 0);
1063 1044
1064 // Scan regular expression body: According to ECMA-262, 3rd, 7.8.5, 1045 // Scan regular expression body: According to ECMA-262, 3rd, 7.8.5,
1065 // the scanner should pass uninterpreted bodies to the RegExp 1046 // the scanner should pass uninterpreted bodies to the RegExp
1066 // constructor. 1047 // constructor.
1067 StartLiteral(); 1048 StartLiteral();
1068 if (seen_equal) 1049 if (seen_equal)
1069 AddChar('='); 1050 AddChar('=');
1070 1051
1071 while (c0_ != '/' || in_character_class) { 1052 while (c0_ != '/' || in_character_class) {
1072 if (kIsLineTerminator.get(c0_) || c0_ < 0) 1053 if (kIsLineTerminator.get(c0_) || c0_ < 0)
1073 return false; 1054 return false;
1074 if (c0_ == '\\') { // escaped character 1055 if (c0_ == '\\') { // escaped character
1075 AddCharAdvance(); 1056 AddCharAdvance();
1076 if (kIsLineTerminator.get(c0_) || c0_ < 0) 1057 if (kIsLineTerminator.get(c0_) || c0_ < 0)
1077 return false; 1058 return false;
1078 AddCharAdvance(); 1059 AddCharAdvance();
1079 } else { // unescaped character 1060 } else { // unescaped character
1080 if (c0_ == '[') 1061 if (c0_ == '[')
1081 in_character_class = true; 1062 in_character_class = true;
1082 if (c0_ == ']') 1063 if (c0_ == ']')
1083 in_character_class = false; 1064 in_character_class = false;
1084 AddCharAdvance(); 1065 AddCharAdvance();
1085 } 1066 }
1086 } 1067 }
1087 Advance(); // consume '/' 1068 Advance(); // consume '/'
1088 1069
1089 TerminateLiteral(); 1070 TerminateLiteral();
1090 1071
1091 return true; 1072 return true;
1092 } 1073 }
1093 1074
1094 bool Scanner::ScanRegExpFlags() { 1075 bool Scanner::ScanRegExpFlags() {
1095 // Scan regular expression flags. 1076 // Scan regular expression flags.
1096 StartLiteral(); 1077 StartLiteral();
1097 while (kIsIdentifierPart.get(c0_)) { 1078 while (kIsIdentifierPart.get(c0_)) {
1098 if (c0_ == '\\') { 1079 if (c0_ == '\\') {
1099 uc32 c = ScanIdentifierUnicodeEscape(); 1080 uc32 c = ScanIdentifierUnicodeEscape();
1100 if (c != static_cast<uc32> (unibrow::Utf8::kBadChar)) { 1081 if (c != static_cast<uc32>(unibrow::Utf8::kBadChar)) {
1101 // We allow any escaped character, unlike the restriction on 1082 // We allow any escaped character, unlike the restriction on
1102 // IdentifierPart when it is used to build an IdentifierName. 1083 // IdentifierPart when it is used to build an IdentifierName.
1103 AddChar(c); 1084 AddChar(c);
1104 continue; 1085 continue;
1105 } 1086 }
1106 } 1087 }
1107 AddCharAdvance(); 1088 AddCharAdvance();
1108 } 1089 }
1109 TerminateLiteral(); 1090 TerminateLiteral();
1110 1091
1111 next_.location.end_pos = source_pos() - 1; 1092 next_.location.end_pos = source_pos() - 1;
1112 return true; 1093 return true;
1113 } 1094 }
1114 1095
1115 } 1096 } } // namespace v8::internal
1116 } // namespace v8::internal
OLDNEW
« no previous file with comments | « no previous file | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698