OLD | NEW |
1 // Copyright 2006-2008 the V8 project authors. All rights reserved. | 1 // Copyright 2006-2008 the V8 project authors. All rights reserved. |
2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
4 // met: | 4 // met: |
5 // | 5 // |
6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
(...skipping 24 matching lines...) Expand all Loading... |
35 | 35 |
36 // ---------------------------------------------------------------------------- | 36 // ---------------------------------------------------------------------------- |
37 // Character predicates | 37 // Character predicates |
38 | 38 |
39 | 39 |
40 unibrow::Predicate<IdentifierStart, 128> Scanner::kIsIdentifierStart; | 40 unibrow::Predicate<IdentifierStart, 128> Scanner::kIsIdentifierStart; |
41 unibrow::Predicate<IdentifierPart, 128> Scanner::kIsIdentifierPart; | 41 unibrow::Predicate<IdentifierPart, 128> Scanner::kIsIdentifierPart; |
42 unibrow::Predicate<unibrow::LineTerminator, 128> Scanner::kIsLineTerminator; | 42 unibrow::Predicate<unibrow::LineTerminator, 128> Scanner::kIsLineTerminator; |
43 unibrow::Predicate<unibrow::WhiteSpace, 128> Scanner::kIsWhiteSpace; | 43 unibrow::Predicate<unibrow::WhiteSpace, 128> Scanner::kIsWhiteSpace; |
44 | 44 |
| 45 |
45 StaticResource<Scanner::Utf8Decoder> Scanner::utf8_decoder_; | 46 StaticResource<Scanner::Utf8Decoder> Scanner::utf8_decoder_; |
46 | 47 |
| 48 |
47 // ---------------------------------------------------------------------------- | 49 // ---------------------------------------------------------------------------- |
48 // UTF8Buffer | 50 // UTF8Buffer |
49 | 51 |
50 UTF8Buffer::UTF8Buffer() : | 52 UTF8Buffer::UTF8Buffer() : data_(NULL), limit_(NULL) { } |
51 data_(NULL), limit_(NULL) { | 53 |
52 } | |
53 | 54 |
54 UTF8Buffer::~UTF8Buffer() { | 55 UTF8Buffer::~UTF8Buffer() { |
55 DeleteArray(data_); | 56 DeleteArray(data_); |
56 } | 57 } |
57 | 58 |
| 59 |
58 void UTF8Buffer::AddCharSlow(uc32 c) { | 60 void UTF8Buffer::AddCharSlow(uc32 c) { |
59 static const int kCapacityGrowthLimit = 1 * MB; | 61 static const int kCapacityGrowthLimit = 1 * MB; |
60 if (cursor_ > limit_) { | 62 if (cursor_ > limit_) { |
61 int old_capacity = Capacity(); | 63 int old_capacity = Capacity(); |
62 int old_position = pos(); | 64 int old_position = pos(); |
63 int new_capacity = Min(old_capacity * 3, old_capacity | 65 int new_capacity = |
64 + kCapacityGrowthLimit); | 66 Min(old_capacity * 3, old_capacity + kCapacityGrowthLimit); |
65 char* new_data = NewArray<char> (new_capacity); | 67 char* new_data = NewArray<char>(new_capacity); |
66 memcpy(new_data, data_, old_position); | 68 memcpy(new_data, data_, old_position); |
67 DeleteArray(data_); | 69 DeleteArray(data_); |
68 data_ = new_data; | 70 data_ = new_data; |
69 cursor_ = new_data + old_position; | 71 cursor_ = new_data + old_position; |
70 limit_ = ComputeLimit(new_data, new_capacity); | 72 limit_ = ComputeLimit(new_data, new_capacity); |
71 ASSERT(Capacity() == new_capacity && pos() == old_position); | 73 ASSERT(Capacity() == new_capacity && pos() == old_position); |
72 } | 74 } |
73 if (static_cast<unsigned> (c) <= unibrow::Utf8::kMaxOneByteChar) { | 75 if (static_cast<unsigned>(c) <= unibrow::Utf8::kMaxOneByteChar) { |
74 *cursor_++ = c; // Common case: 7-bit ASCII. | 76 *cursor_++ = c; // Common case: 7-bit ASCII. |
75 } else { | 77 } else { |
76 cursor_ += unibrow::Utf8::Encode(cursor_, c); | 78 cursor_ += unibrow::Utf8::Encode(cursor_, c); |
77 } | 79 } |
78 ASSERT(pos() <= Capacity()); | 80 ASSERT(pos() <= Capacity()); |
79 } | 81 } |
80 | 82 |
| 83 |
81 // ---------------------------------------------------------------------------- | 84 // ---------------------------------------------------------------------------- |
82 // UTF16Buffer | 85 // UTF16Buffer |
83 | 86 |
84 | 87 |
85 UTF16Buffer::UTF16Buffer() : | 88 UTF16Buffer::UTF16Buffer() |
86 pos_(0), size_(0) { | 89 : pos_(0), size_(0) { } |
87 } | 90 |
88 | 91 |
89 Handle<String> UTF16Buffer::SubString(int start, int end) { | 92 Handle<String> UTF16Buffer::SubString(int start, int end) { |
90 return internal::SubString(data_, start, end); | 93 return internal::SubString(data_, start, end); |
91 } | 94 } |
92 | 95 |
| 96 |
93 // CharacterStreamUTF16Buffer | 97 // CharacterStreamUTF16Buffer |
94 CharacterStreamUTF16Buffer::CharacterStreamUTF16Buffer() : | 98 CharacterStreamUTF16Buffer::CharacterStreamUTF16Buffer() |
95 pushback_buffer_(0), last_(0), stream_(NULL) { | 99 : pushback_buffer_(0), last_(0), stream_(NULL) { } |
96 } | 100 |
97 | 101 |
98 void CharacterStreamUTF16Buffer::Initialize(Handle<String> data, | 102 void CharacterStreamUTF16Buffer::Initialize(Handle<String> data, |
99 unibrow::CharacterStream* input) { | 103 unibrow::CharacterStream* input) { |
100 data_ = data; | 104 data_ = data; |
101 pos_ = 0; | 105 pos_ = 0; |
102 stream_ = input; | 106 stream_ = input; |
103 } | 107 } |
104 | 108 |
| 109 |
105 void CharacterStreamUTF16Buffer::PushBack(uc32 ch) { | 110 void CharacterStreamUTF16Buffer::PushBack(uc32 ch) { |
106 pushback_buffer()->Add(last_); | 111 pushback_buffer()->Add(last_); |
107 last_ = ch; | 112 last_ = ch; |
108 pos_--; | 113 pos_--; |
109 } | 114 } |
110 | 115 |
| 116 |
111 uc32 CharacterStreamUTF16Buffer::Advance() { | 117 uc32 CharacterStreamUTF16Buffer::Advance() { |
112 // NOTE: It is of importance to Persian / Farsi resources that we do | 118 // NOTE: It is of importance to Persian / Farsi resources that we do |
113 // *not* strip format control characters in the scanner; see | 119 // *not* strip format control characters in the scanner; see |
114 // | 120 // |
115 // https://bugzilla.mozilla.org/show_bug.cgi?id=274152 | 121 // https://bugzilla.mozilla.org/show_bug.cgi?id=274152 |
116 // | 122 // |
117 // So, even though ECMA-262, section 7.1, page 11, dictates that we | 123 // So, even though ECMA-262, section 7.1, page 11, dictates that we |
118 // must remove Unicode format-control characters, we do not. This is | 124 // must remove Unicode format-control characters, we do not. This is |
119 // in line with how IE and SpiderMonkey handles it. | 125 // in line with how IE and SpiderMonkey handles it. |
120 if (!pushback_buffer()->is_empty()) { | 126 if (!pushback_buffer()->is_empty()) { |
121 pos_++; | 127 pos_++; |
122 return last_ = pushback_buffer()->RemoveLast(); | 128 return last_ = pushback_buffer()->RemoveLast(); |
123 } else if (stream_->has_more()) { | 129 } else if (stream_->has_more()) { |
124 pos_++; | 130 pos_++; |
125 uc32 next = stream_->GetNext(); | 131 uc32 next = stream_->GetNext(); |
126 return last_ = next; | 132 return last_ = next; |
127 } else { | 133 } else { |
128 // Note: currently the following increment is necessary to avoid a | 134 // Note: currently the following increment is necessary to avoid a |
129 // test-parser problem! | 135 // test-parser problem! |
130 pos_++; | 136 pos_++; |
131 return last_ = static_cast<uc32> (-1); | 137 return last_ = static_cast<uc32>(-1); |
132 } | 138 } |
133 } | 139 } |
134 | 140 |
| 141 |
135 void CharacterStreamUTF16Buffer::SeekForward(int pos) { | 142 void CharacterStreamUTF16Buffer::SeekForward(int pos) { |
136 pos_ = pos; | 143 pos_ = pos; |
137 ASSERT(pushback_buffer()->is_empty()); | 144 ASSERT(pushback_buffer()->is_empty()); |
138 stream_->Seek(pos); | 145 stream_->Seek(pos); |
139 } | 146 } |
140 | 147 |
| 148 |
141 // TwoByteStringUTF16Buffer | 149 // TwoByteStringUTF16Buffer |
142 TwoByteStringUTF16Buffer::TwoByteStringUTF16Buffer() : | 150 TwoByteStringUTF16Buffer::TwoByteStringUTF16Buffer() |
143 raw_data_(NULL) { | 151 : raw_data_(NULL) { } |
144 } | |
145 | 152 |
146 void TwoByteStringUTF16Buffer::Initialize(Handle<ExternalTwoByteString> data) { | 153 |
| 154 void TwoByteStringUTF16Buffer::Initialize( |
| 155 Handle<ExternalTwoByteString> data) { |
147 ASSERT(!data.is_null()); | 156 ASSERT(!data.is_null()); |
148 | 157 |
149 data_ = data; | 158 data_ = data; |
150 pos_ = 0; | 159 pos_ = 0; |
151 | 160 |
152 raw_data_ = data->resource()->data(); | 161 raw_data_ = data->resource()->data(); |
153 size_ = data->length(); | 162 size_ = data->length(); |
154 } | 163 } |
155 | 164 |
| 165 |
156 uc32 TwoByteStringUTF16Buffer::Advance() { | 166 uc32 TwoByteStringUTF16Buffer::Advance() { |
157 if (pos_ < size_) { | 167 if (pos_ < size_) { |
158 return raw_data_[pos_++]; | 168 return raw_data_[pos_++]; |
159 } else { | 169 } else { |
160 // note: currently the following increment is necessary to avoid a | 170 // note: currently the following increment is necessary to avoid a |
161 // test-parser problem! | 171 // test-parser problem! |
162 pos_++; | 172 pos_++; |
163 return static_cast<uc32> (-1); | 173 return static_cast<uc32>(-1); |
164 } | 174 } |
165 } | 175 } |
166 | 176 |
| 177 |
167 void TwoByteStringUTF16Buffer::PushBack(uc32 ch) { | 178 void TwoByteStringUTF16Buffer::PushBack(uc32 ch) { |
168 pos_--; | 179 pos_--; |
169 ASSERT(pos_ >= Scanner::kCharacterLookaheadBufferSize); | 180 ASSERT(pos_ >= Scanner::kCharacterLookaheadBufferSize); |
170 ASSERT(raw_data_[pos_ - Scanner::kCharacterLookaheadBufferSize] == ch); | 181 ASSERT(raw_data_[pos_ - Scanner::kCharacterLookaheadBufferSize] == ch); |
171 } | 182 } |
172 | 183 |
| 184 |
173 void TwoByteStringUTF16Buffer::SeekForward(int pos) { | 185 void TwoByteStringUTF16Buffer::SeekForward(int pos) { |
174 pos_ = pos; | 186 pos_ = pos; |
175 } | 187 } |
176 | 188 |
| 189 |
177 // ---------------------------------------------------------------------------- | 190 // ---------------------------------------------------------------------------- |
178 // Keyword Matcher | 191 // Keyword Matcher |
179 KeywordMatcher::FirstState KeywordMatcher::first_states_[] = { { "break", | 192 KeywordMatcher::FirstState KeywordMatcher::first_states_[] = { |
180 KEYWORD_PREFIX, Token::BREAK }, { NULL, C, Token::ILLEGAL }, { NULL, D, | 193 { "break", KEYWORD_PREFIX, Token::BREAK }, |
181 Token::ILLEGAL }, { "else", KEYWORD_PREFIX, Token::ELSE }, { NULL, F, | 194 { NULL, C, Token::ILLEGAL }, |
182 Token::ILLEGAL }, { NULL, UNMATCHABLE, Token::ILLEGAL }, { NULL, | 195 { NULL, D, Token::ILLEGAL }, |
183 UNMATCHABLE, Token::ILLEGAL }, { NULL, I, Token::ILLEGAL }, { NULL, | 196 { "else", KEYWORD_PREFIX, Token::ELSE }, |
184 UNMATCHABLE, Token::ILLEGAL }, { NULL, UNMATCHABLE, Token::ILLEGAL }, { | 197 { NULL, F, Token::ILLEGAL }, |
185 NULL, UNMATCHABLE, Token::ILLEGAL }, { NULL, UNMATCHABLE, Token::ILLEGAL }, | 198 { NULL, UNMATCHABLE, Token::ILLEGAL }, |
186 { NULL, N, Token::ILLEGAL }, { NULL, UNMATCHABLE, Token::ILLEGAL }, { NULL, | 199 { NULL, UNMATCHABLE, Token::ILLEGAL }, |
187 UNMATCHABLE, Token::ILLEGAL }, { NULL, UNMATCHABLE, Token::ILLEGAL }, { | 200 { NULL, I, Token::ILLEGAL }, |
188 "return", KEYWORD_PREFIX, Token::RETURN }, { "switch", KEYWORD_PREFIX, | 201 { NULL, UNMATCHABLE, Token::ILLEGAL }, |
189 Token::SWITCH }, { NULL, T, Token::ILLEGAL }, { NULL, UNMATCHABLE, | 202 { NULL, UNMATCHABLE, Token::ILLEGAL }, |
190 Token::ILLEGAL }, { NULL, V, Token::ILLEGAL }, { NULL, W, | 203 { NULL, UNMATCHABLE, Token::ILLEGAL }, |
191 Token::ILLEGAL } }; | 204 { NULL, UNMATCHABLE, Token::ILLEGAL }, |
| 205 { NULL, N, Token::ILLEGAL }, |
| 206 { NULL, UNMATCHABLE, Token::ILLEGAL }, |
| 207 { NULL, UNMATCHABLE, Token::ILLEGAL }, |
| 208 { NULL, UNMATCHABLE, Token::ILLEGAL }, |
| 209 { "return", KEYWORD_PREFIX, Token::RETURN }, |
| 210 { "switch", KEYWORD_PREFIX, Token::SWITCH }, |
| 211 { NULL, T, Token::ILLEGAL }, |
| 212 { NULL, UNMATCHABLE, Token::ILLEGAL }, |
| 213 { NULL, V, Token::ILLEGAL }, |
| 214 { NULL, W, Token::ILLEGAL } |
| 215 }; |
| 216 |
192 | 217 |
193 void KeywordMatcher::Step(uc32 input) { | 218 void KeywordMatcher::Step(uc32 input) { |
194 switch (state_) { | 219 switch (state_) { |
195 case INITIAL: { | 220 case INITIAL: { |
196 // matching the first character is the only state with significant fanout. | 221 // matching the first character is the only state with significant fanout. |
197 // Match only lower-case letters in range 'b'..'w'. | 222 // Match only lower-case letters in range 'b'..'w'. |
198 unsigned int offset = input - kFirstCharRangeMin; | 223 unsigned int offset = input - kFirstCharRangeMin; |
199 if (offset < kFirstCharRangeLength) { | 224 if (offset < kFirstCharRangeLength) { |
200 state_ = first_states_[offset].state; | 225 state_ = first_states_[offset].state; |
201 if (state_ == KEYWORD_PREFIX) { | 226 if (state_ == KEYWORD_PREFIX) { |
(...skipping 13 matching lines...) Expand all Loading... |
215 state_ = KEYWORD_MATCHED; | 240 state_ = KEYWORD_MATCHED; |
216 token_ = keyword_token_; | 241 token_ = keyword_token_; |
217 } | 242 } |
218 return; | 243 return; |
219 } | 244 } |
220 break; | 245 break; |
221 case KEYWORD_MATCHED: | 246 case KEYWORD_MATCHED: |
222 token_ = Token::IDENTIFIER; | 247 token_ = Token::IDENTIFIER; |
223 break; | 248 break; |
224 case C: | 249 case C: |
225 if (MatchState(input, 'a', CA)) | 250 if (MatchState(input, 'a', CA)) return; |
226 return; | 251 if (MatchState(input, 'o', CO)) return; |
227 if (MatchState(input, 'o', CO)) | |
228 return; | |
229 break; | 252 break; |
230 case CA: | 253 case CA: |
231 if (MatchKeywordStart(input, "case", 2, Token::CASE)) | 254 if (MatchKeywordStart(input, "case", 2, Token::CASE)) return; |
232 return; | 255 if (MatchKeywordStart(input, "catch", 2, Token::CATCH)) return; |
233 if (MatchKeywordStart(input, "catch", 2, Token::CATCH)) | |
234 return; | |
235 break; | 256 break; |
236 case CO: | 257 case CO: |
237 if (MatchState(input, 'n', CON)) | 258 if (MatchState(input, 'n', CON)) return; |
238 return; | |
239 break; | 259 break; |
240 case CON: | 260 case CON: |
241 if (MatchKeywordStart(input, "const", 3, Token::CONST)) | 261 if (MatchKeywordStart(input, "const", 3, Token::CONST)) return; |
242 return; | 262 if (MatchKeywordStart(input, "continue", 3, Token::CONTINUE)) return; |
243 if (MatchKeywordStart(input, "continue", 3, Token::CONTINUE)) | |
244 return; | |
245 break; | 263 break; |
246 case D: | 264 case D: |
247 if (MatchState(input, 'e', DE)) | 265 if (MatchState(input, 'e', DE)) return; |
248 return; | 266 if (MatchKeyword(input, 'o', KEYWORD_MATCHED, Token::DO)) return; |
249 if (MatchKeyword(input, 'o', KEYWORD_MATCHED, Token::DO)) | |
250 return; | |
251 break; | 267 break; |
252 case DE: | 268 case DE: |
253 if (MatchKeywordStart(input, "debugger", 2, Token::DEBUGGER)) | 269 if (MatchKeywordStart(input, "debugger", 2, Token::DEBUGGER)) return; |
254 return; | 270 if (MatchKeywordStart(input, "default", 2, Token::DEFAULT)) return; |
255 if (MatchKeywordStart(input, "default", 2, Token::DEFAULT)) | 271 if (MatchKeywordStart(input, "delete", 2, Token::DELETE)) return; |
256 return; | |
257 if (MatchKeywordStart(input, "delete", 2, Token::DELETE)) | |
258 return; | |
259 break; | 272 break; |
260 case F: | 273 case F: |
261 if (MatchKeywordStart(input, "false", 1, Token::FALSE_LITERAL)) | 274 if (MatchKeywordStart(input, "false", 1, Token::FALSE_LITERAL)) return; |
262 return; | 275 if (MatchKeywordStart(input, "finally", 1, Token::FINALLY)) return; |
263 if (MatchKeywordStart(input, "finally", 1, Token::FINALLY)) | 276 if (MatchKeywordStart(input, "for", 1, Token::FOR)) return; |
264 return; | 277 if (MatchKeywordStart(input, "function", 1, Token::FUNCTION)) return; |
265 if (MatchKeywordStart(input, "for", 1, Token::FOR)) | |
266 return; | |
267 if (MatchKeywordStart(input, "function", 1, Token::FUNCTION)) | |
268 return; | |
269 break; | 278 break; |
270 case I: | 279 case I: |
271 if (MatchKeyword(input, 'f', KEYWORD_MATCHED, Token::IF)) | 280 if (MatchKeyword(input, 'f', KEYWORD_MATCHED, Token::IF)) return; |
272 return; | 281 if (MatchKeyword(input, 'n', IN, Token::IN)) return; |
273 if (MatchKeyword(input, 'n', IN, Token::IN)) | |
274 return; | |
275 break; | 282 break; |
276 case IN: | 283 case IN: |
277 token_ = Token::IDENTIFIER; | 284 token_ = Token::IDENTIFIER; |
278 if (MatchKeywordStart(input, "instanceof", 2, Token::INSTANCEOF)) { | 285 if (MatchKeywordStart(input, "instanceof", 2, Token::INSTANCEOF)) { |
279 return; | 286 return; |
280 } | 287 } |
281 break; | 288 break; |
282 case N: | 289 case N: |
283 if (MatchKeywordStart(input, "native", 1, Token::NATIVE)) | 290 if (MatchKeywordStart(input, "native", 1, Token::NATIVE)) return; |
284 return; | 291 if (MatchKeywordStart(input, "new", 1, Token::NEW)) return; |
285 if (MatchKeywordStart(input, "new", 1, Token::NEW)) | 292 if (MatchKeywordStart(input, "null", 1, Token::NULL_LITERAL)) return; |
286 return; | |
287 if (MatchKeywordStart(input, "null", 1, Token::NULL_LITERAL)) | |
288 return; | |
289 break; | 293 break; |
290 case T: | 294 case T: |
291 if (MatchState(input, 'h', TH)) | 295 if (MatchState(input, 'h', TH)) return; |
292 return; | 296 if (MatchState(input, 'r', TR)) return; |
293 if (MatchState(input, 'r', TR)) | 297 if (MatchKeywordStart(input, "typeof", 1, Token::TYPEOF)) return; |
294 return; | |
295 if (MatchKeywordStart(input, "typeof", 1, Token::TYPEOF)) | |
296 return; | |
297 break; | 298 break; |
298 case TH: | 299 case TH: |
299 if (MatchKeywordStart(input, "this", 2, Token::THIS)) | 300 if (MatchKeywordStart(input, "this", 2, Token::THIS)) return; |
300 return; | 301 if (MatchKeywordStart(input, "throw", 2, Token::THROW)) return; |
301 if (MatchKeywordStart(input, "throw", 2, Token::THROW)) | |
302 return; | |
303 break; | 302 break; |
304 case TR: | 303 case TR: |
305 if (MatchKeywordStart(input, "true", 2, Token::TRUE_LITERAL)) | 304 if (MatchKeywordStart(input, "true", 2, Token::TRUE_LITERAL)) return; |
306 return; | 305 if (MatchKeyword(input, 'y', KEYWORD_MATCHED, Token::TRY)) return; |
307 if (MatchKeyword(input, 'y', KEYWORD_MATCHED, Token::TRY)) | |
308 return; | |
309 break; | 306 break; |
310 case V: | 307 case V: |
311 if (MatchKeywordStart(input, "var", 1, Token::VAR)) | 308 if (MatchKeywordStart(input, "var", 1, Token::VAR)) return; |
312 return; | 309 if (MatchKeywordStart(input, "void", 1, Token::VOID)) return; |
313 if (MatchKeywordStart(input, "void", 1, Token::VOID)) | |
314 return; | |
315 break; | 310 break; |
316 case W: | 311 case W: |
317 if (MatchKeywordStart(input, "while", 1, Token::WHILE)) | 312 if (MatchKeywordStart(input, "while", 1, Token::WHILE)) return; |
318 return; | 313 if (MatchKeywordStart(input, "with", 1, Token::WITH)) return; |
319 if (MatchKeywordStart(input, "with", 1, Token::WITH)) | |
320 return; | |
321 break; | 314 break; |
322 default: | 315 default: |
323 UNREACHABLE(); | 316 UNREACHABLE(); |
324 } | 317 } |
325 // On fallthrough, it's a failure. | 318 // On fallthrough, it's a failure. |
326 state_ = UNMATCHABLE; | 319 state_ = UNMATCHABLE; |
327 } | 320 } |
328 | 321 |
| 322 |
329 // ---------------------------------------------------------------------------- | 323 // ---------------------------------------------------------------------------- |
330 // Scanner | 324 // Scanner |
331 | 325 |
332 Scanner::Scanner(bool pre) : | 326 Scanner::Scanner(bool pre) : stack_overflow_(false), is_pre_parsing_(pre) { } |
333 stack_overflow_(false), is_pre_parsing_(pre) { | 327 |
334 } | |
335 | 328 |
336 void Scanner::Init(Handle<String> source, unibrow::CharacterStream* stream, | 329 void Scanner::Init(Handle<String> source, unibrow::CharacterStream* stream, |
337 int position) { | 330 int position) { |
338 // Initialize the source buffer. | 331 // Initialize the source buffer. |
339 if (!source.is_null() && StringShape(*source).IsExternalTwoByte()) { | 332 if (!source.is_null() && StringShape(*source).IsExternalTwoByte()) { |
340 two_byte_string_buffer_.Initialize( | 333 two_byte_string_buffer_.Initialize( |
341 Handle<ExternalTwoByteString>::cast( | 334 Handle<ExternalTwoByteString>::cast(source)); |
342 sourc
e)); | |
343 source_ = &two_byte_string_buffer_; | 335 source_ = &two_byte_string_buffer_; |
344 } else { | 336 } else { |
345 char_stream_buffer_.Initialize(source, stream); | 337 char_stream_buffer_.Initialize(source, stream); |
346 source_ = &char_stream_buffer_; | 338 source_ = &char_stream_buffer_; |
347 } | 339 } |
348 | 340 |
349 position_ = position; | 341 position_ = position; |
350 | 342 |
351 // Set c0_ (one character ahead) | 343 // Set c0_ (one character ahead) |
352 ASSERT(kCharacterLookaheadBufferSize == 1); | 344 ASSERT(kCharacterLookaheadBufferSize == 1); |
353 Advance(); | 345 Advance(); |
354 | 346 |
355 // Skip initial whitespace allowing HTML comment ends just like | 347 // Skip initial whitespace allowing HTML comment ends just like |
356 // after a newline and scan first token. | 348 // after a newline and scan first token. |
357 has_line_terminator_before_next_ = true; | 349 has_line_terminator_before_next_ = true; |
358 SkipWhiteSpace(); | 350 SkipWhiteSpace(); |
359 Scan(); | 351 Scan(); |
360 } | 352 } |
361 | 353 |
| 354 |
362 Handle<String> Scanner::SubString(int start, int end) { | 355 Handle<String> Scanner::SubString(int start, int end) { |
363 return source_->SubString(start - position_, end - position_); | 356 return source_->SubString(start - position_, end - position_); |
364 } | 357 } |
365 | 358 |
| 359 |
366 Token::Value Scanner::Next() { | 360 Token::Value Scanner::Next() { |
367 // BUG 1215673: Find a thread safe way to set a stack limit in | 361 // BUG 1215673: Find a thread safe way to set a stack limit in |
368 // pre-parse mode. Otherwise, we cannot safely pre-parse from other | 362 // pre-parse mode. Otherwise, we cannot safely pre-parse from other |
369 // threads. | 363 // threads. |
370 current_ = next_; | 364 current_ = next_; |
371 // Check for stack-overflow before returning any tokens. | 365 // Check for stack-overflow before returning any tokens. |
372 StackLimitCheck check; | 366 StackLimitCheck check; |
373 if (check.HasOverflowed()) { | 367 if (check.HasOverflowed()) { |
374 stack_overflow_ = true; | 368 stack_overflow_ = true; |
375 next_.token = Token::ILLEGAL; | 369 next_.token = Token::ILLEGAL; |
376 next_.literal_buffer = NULL; | 370 next_.literal_buffer = NULL; |
377 } else { | 371 } else { |
378 Scan(); | 372 Scan(); |
379 } | 373 } |
380 return current_.token; | 374 return current_.token; |
381 } | 375 } |
382 | 376 |
| 377 |
383 void Scanner::StartLiteral() { | 378 void Scanner::StartLiteral() { |
384 // Use the first buffer unless it's currently in use by the current_ token. | 379 // Use the first buffer unless it's currently in use by the current_ token. |
385 // In most cases we won't have two literals/identifiers in a row, so | 380 // In most cases, we won't have two literals/identifiers in a row so |
386 // the second buffer won't be used very often and is unlikely to grow much. | 381 // the second buffer won't be used very often and is unlikely to grow much. |
387 UTF8Buffer* free_buffer = | 382 UTF8Buffer* free_buffer = |
388 (current_.literal_buffer != &literal_buffer_1_) ? &literal_buffer_1_ | 383 (current_.literal_buffer != &literal_buffer_1_) ? &literal_buffer_1_ |
389 : &literal_buffer_2_; | 384 : &literal_buffer_2_; |
390 next_.literal_buffer = free_buffer; | 385 next_.literal_buffer = free_buffer; |
391 free_buffer->Reset(); | 386 free_buffer->Reset(); |
392 } | 387 } |
393 | 388 |
| 389 |
394 void Scanner::AddChar(uc32 c) { | 390 void Scanner::AddChar(uc32 c) { |
395 next_.literal_buffer->AddChar(c); | 391 next_.literal_buffer->AddChar(c); |
396 } | 392 } |
397 | 393 |
| 394 |
398 void Scanner::TerminateLiteral() { | 395 void Scanner::TerminateLiteral() { |
399 AddChar(0); | 396 AddChar(0); |
400 } | 397 } |
401 | 398 |
| 399 |
402 void Scanner::AddCharAdvance() { | 400 void Scanner::AddCharAdvance() { |
403 AddChar(c0_); | 401 AddChar(c0_); |
404 Advance(); | 402 Advance(); |
405 } | 403 } |
406 | 404 |
| 405 |
407 static inline bool IsByteOrderMark(uc32 c) { | 406 static inline bool IsByteOrderMark(uc32 c) { |
408 // The Unicode value U+FFFE is guaranteed never to be assigned as a | 407 // The Unicode value U+FFFE is guaranteed never to be assigned as a |
409 // Unicode character; this implies that in a Unicode context the | 408 // Unicode character; this implies that in a Unicode context the |
410 // 0xFF, 0xFE byte pattern can only be interpreted as the U+FEFF | 409 // 0xFF, 0xFE byte pattern can only be interpreted as the U+FEFF |
411 // character expressed in little-endian byte order (since it could | 410 // character expressed in little-endian byte order (since it could |
412 // not be a U+FFFE character expressed in big-endian byte | 411 // not be a U+FFFE character expressed in big-endian byte |
413 // order). Nevertheless, we check for it to be compatible with | 412 // order). Nevertheless, we check for it to be compatible with |
414 // Spidermonkey. | 413 // Spidermonkey. |
415 return c == 0xFEFF || c == 0xFFFE; | 414 return c == 0xFEFF || c == 0xFFFE; |
416 } | 415 } |
417 | 416 |
| 417 |
418 bool Scanner::SkipWhiteSpace() { | 418 bool Scanner::SkipWhiteSpace() { |
419 int start_position = source_pos(); | 419 int start_position = source_pos(); |
420 | 420 |
421 while (true) { | 421 while (true) { |
422 // We treat byte-order marks (BOMs) as whitespace for better | 422 // We treat byte-order marks (BOMs) as whitespace for better |
423 // compatibility with Spidermonkey and other JavaScript engines. | 423 // compatibility with Spidermonkey and other JavaScript engines. |
424 while (kIsWhiteSpace.get(c0_) || IsByteOrderMark(c0_)) { | 424 while (kIsWhiteSpace.get(c0_) || IsByteOrderMark(c0_)) { |
425 // IsWhiteSpace() includes line terminators! | 425 // IsWhiteSpace() includes line terminators! |
426 if (kIsLineTerminator.get(c0_)) { | 426 if (kIsLineTerminator.get(c0_)) { |
427 // Ignore line terminators, but remember them. This is necessary | 427 // Ignore line terminators, but remember them. This is necessary |
(...skipping 10 matching lines...) Expand all Loading... |
438 if (c0_ == '-' && has_line_terminator_before_next_) { | 438 if (c0_ == '-' && has_line_terminator_before_next_) { |
439 Advance(); | 439 Advance(); |
440 if (c0_ == '-') { | 440 if (c0_ == '-') { |
441 Advance(); | 441 Advance(); |
442 if (c0_ == '>') { | 442 if (c0_ == '>') { |
443 // Treat the rest of the line as a comment. | 443 // Treat the rest of the line as a comment. |
444 SkipSingleLineComment(); | 444 SkipSingleLineComment(); |
445 // Continue skipping white space after the comment. | 445 // Continue skipping white space after the comment. |
446 continue; | 446 continue; |
447 } | 447 } |
448 PushBack('-'); // undo Advance() | 448 PushBack('-'); // undo Advance() |
449 } | 449 } |
450 PushBack('-'); // undo Advance() | 450 PushBack('-'); // undo Advance() |
451 } | 451 } |
452 // Return whether or not we skipped any characters. | 452 // Return whether or not we skipped any characters. |
453 return source_pos() != start_position; | 453 return source_pos() != start_position; |
454 } | 454 } |
455 } | 455 } |
456 | 456 |
| 457 |
457 Token::Value Scanner::SkipSingleLineComment() { | 458 Token::Value Scanner::SkipSingleLineComment() { |
458 Advance(); | 459 Advance(); |
459 | 460 |
460 // The line terminator at the end of the line is not considered | 461 // The line terminator at the end of the line is not considered |
461 // to be part of the single-line comment; it is recognized | 462 // to be part of the single-line comment; it is recognized |
462 // separately by the lexical grammar and becomes part of the | 463 // separately by the lexical grammar and becomes part of the |
463 // stream of input elements for the syntactic grammar (see | 464 // stream of input elements for the syntactic grammar (see |
464 // ECMA-262, section 7.4, page 12). | 465 // ECMA-262, section 7.4, page 12). |
465 while (c0_ >= 0 && !kIsLineTerminator.get(c0_)) { | 466 while (c0_ >= 0 && !kIsLineTerminator.get(c0_)) { |
466 Advance(); | 467 Advance(); |
467 } | 468 } |
468 | 469 |
469 return Token::WHITESPACE; | 470 return Token::WHITESPACE; |
470 } | 471 } |
471 | 472 |
| 473 |
472 Token::Value Scanner::SkipMultiLineComment() { | 474 Token::Value Scanner::SkipMultiLineComment() { |
473 ASSERT(c0_ == '*'); | 475 ASSERT(c0_ == '*'); |
474 Advance(); | 476 Advance(); |
475 | 477 |
476 while (c0_ >= 0) { | 478 while (c0_ >= 0) { |
477 char ch = c0_; | 479 char ch = c0_; |
478 Advance(); | 480 Advance(); |
479 // If we have reached the end of the multi-line comment, we | 481 // If we have reached the end of the multi-line comment, we |
480 // consume the '/' and insert a whitespace. This way all | 482 // consume the '/' and insert a whitespace. This way all |
481 // multi-line comments are treated as whitespace - even the ones | 483 // multi-line comments are treated as whitespace - even the ones |
482 // containing line terminators. This contradicts ECMA-262, section | 484 // containing line terminators. This contradicts ECMA-262, section |
483 // 7.4, page 12, that says that multi-line comments containing | 485 // 7.4, page 12, that says that multi-line comments containing |
484 // line terminators should be treated as a line terminator, but it | 486 // line terminators should be treated as a line terminator, but it |
485 // matches the behaviour of SpiderMonkey and KJS. | 487 // matches the behaviour of SpiderMonkey and KJS. |
486 if (ch == '*' && c0_ == '/') { | 488 if (ch == '*' && c0_ == '/') { |
487 c0_ = ' '; | 489 c0_ = ' '; |
488 return Token::WHITESPACE; | 490 return Token::WHITESPACE; |
489 } | 491 } |
490 } | 492 } |
491 | 493 |
492 // Unterminated multi-line comment. | 494 // Unterminated multi-line comment. |
493 return Token::ILLEGAL; | 495 return Token::ILLEGAL; |
494 } | 496 } |
495 | 497 |
| 498 |
496 Token::Value Scanner::ScanHtmlComment() { | 499 Token::Value Scanner::ScanHtmlComment() { |
497 // Check for <!-- comments. | 500 // Check for <!-- comments. |
498 ASSERT(c0_ == '!'); | 501 ASSERT(c0_ == '!'); |
499 Advance(); | 502 Advance(); |
500 if (c0_ == '-') { | 503 if (c0_ == '-') { |
501 Advance(); | 504 Advance(); |
502 if (c0_ == '-') | 505 if (c0_ == '-') return SkipSingleLineComment(); |
503 return SkipSingleLineComment(); | 506 PushBack('-'); // undo Advance() |
504 PushBack('-'); // undo Advance() | |
505 } | 507 } |
506 PushBack('!'); // undo Advance() | 508 PushBack('!'); // undo Advance() |
507 ASSERT(c0_ == '!'); | 509 ASSERT(c0_ == '!'); |
508 return Token::LT; | 510 return Token::LT; |
509 } | 511 } |
510 | 512 |
| 513 |
511 void Scanner::Scan() { | 514 void Scanner::Scan() { |
512 Token::Value token; | 515 Token::Value token; |
513 has_line_terminator_before_next_ = false; | 516 has_line_terminator_before_next_ = false; |
514 do { | 517 do { |
515 // Remember the position of the next token | 518 // Remember the position of the next token |
516 next_.location.beg_pos = source_pos(); | 519 next_.location.beg_pos = source_pos(); |
517 | 520 |
518 switch (c0_) { | 521 switch (c0_) { |
519 case ' ': | 522 case ' ': |
520 case '\t': | 523 case '\t': |
521 Advance(); | 524 Advance(); |
522 token = Token::WHITESPACE; | 525 token = Token::WHITESPACE; |
523 break; | 526 break; |
524 | 527 |
525 case '\n': | 528 case '\n': |
526 Advance(); | 529 Advance(); |
527 has_line_terminator_before_next_ = true; | 530 has_line_terminator_before_next_ = true; |
528 token = Token::WHITESPACE; | 531 token = Token::WHITESPACE; |
529 break; | 532 break; |
530 | 533 |
531 case '"': | 534 case '"': case '\'': |
532 case '\'': | |
533 token = ScanString(); | 535 token = ScanString(); |
534 break; | 536 break; |
535 | 537 |
536 case '<': | 538 case '<': |
537 // < <= << <<= <!-- | 539 // < <= << <<= <!-- |
538 Advance(); | 540 Advance(); |
539 if (c0_ == '=') { | 541 if (c0_ == '=') { |
540 token = Select(Token::LTE); | 542 token = Select(Token::LTE); |
541 } else if (c0_ == '<') { | 543 } else if (c0_ == '<') { |
542 token = Select('=', Token::ASSIGN_SHL, Token::SHL); | 544 token = Select('=', Token::ASSIGN_SHL, Token::SHL); |
(...skipping 198 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
741 } | 743 } |
742 | 744 |
743 // Continue scanning for tokens as long as we're just skipping | 745 // Continue scanning for tokens as long as we're just skipping |
744 // whitespace. | 746 // whitespace. |
745 } while (token == Token::WHITESPACE); | 747 } while (token == Token::WHITESPACE); |
746 | 748 |
747 next_.location.end_pos = source_pos(); | 749 next_.location.end_pos = source_pos(); |
748 next_.token = token; | 750 next_.token = token; |
749 } | 751 } |
750 | 752 |
| 753 |
751 void Scanner::SeekForward(int pos) { | 754 void Scanner::SeekForward(int pos) { |
752 source_->SeekForward(pos - 1); | 755 source_->SeekForward(pos - 1); |
753 Advance(); | 756 Advance(); |
754 Scan(); | 757 Scan(); |
755 } | 758 } |
756 | 759 |
| 760 |
757 uc32 Scanner::ScanHexEscape(uc32 c, int length) { | 761 uc32 Scanner::ScanHexEscape(uc32 c, int length) { |
758 ASSERT(length <= 4); // prevent overflow | 762 ASSERT(length <= 4); // prevent overflow |
759 | 763 |
760 uc32 digits[4]; | 764 uc32 digits[4]; |
761 uc32 x = 0; | 765 uc32 x = 0; |
762 for (int i = 0; i < length; i++) { | 766 for (int i = 0; i < length; i++) { |
763 digits[i] = c0_; | 767 digits[i] = c0_; |
764 int d = HexValue(c0_); | 768 int d = HexValue(c0_); |
765 if (d < 0) { | 769 if (d < 0) { |
766 // According to ECMA-262, 3rd, 7.8.4, page 18, these hex escapes | 770 // According to ECMA-262, 3rd, 7.8.4, page 18, these hex escapes |
767 // should be illegal, but other JS VMs just return the | 771 // should be illegal, but other JS VMs just return the |
768 // non-escaped version of the original character. | 772 // non-escaped version of the original character. |
769 | 773 |
770 // Push back digits read, except the last one (in c0_). | 774 // Push back digits read, except the last one (in c0_). |
771 for (int j = i - 1; j >= 0; j--) { | 775 for (int j = i-1; j >= 0; j--) { |
772 PushBack(digits[j]); | 776 PushBack(digits[j]); |
773 } | 777 } |
774 // Notice: No handling of error - treat it as "\u"->"u". | 778 // Notice: No handling of error - treat it as "\u"->"u". |
775 return c; | 779 return c; |
776 } | 780 } |
777 x = x * 16 + d; | 781 x = x * 16 + d; |
778 Advance(); | 782 Advance(); |
779 } | 783 } |
780 | 784 |
781 return x; | 785 return x; |
782 } | 786 } |
783 | 787 |
| 788 |
784 // Octal escapes of the forms '\0xx' and '\xxx' are not a part of | 789 // Octal escapes of the forms '\0xx' and '\xxx' are not a part of |
785 // ECMA-262. Other JS VMs support them. | 790 // ECMA-262. Other JS VMs support them. |
786 uc32 Scanner::ScanOctalEscape(uc32 c, int length) { | 791 uc32 Scanner::ScanOctalEscape(uc32 c, int length) { |
787 uc32 x = c - '0'; | 792 uc32 x = c - '0'; |
788 for (int i = 0; i < length; i++) { | 793 for (int i = 0; i < length; i++) { |
789 int d = c0_ - '0'; | 794 int d = c0_ - '0'; |
790 if (d < 0 || d > 7) | 795 if (d < 0 || d > 7) break; |
791 break; | |
792 int nx = x * 8 + d; | 796 int nx = x * 8 + d; |
793 if (nx >= 256) | 797 if (nx >= 256) break; |
794 break; | |
795 x = nx; | 798 x = nx; |
796 Advance(); | 799 Advance(); |
797 } | 800 } |
798 return x; | 801 return x; |
799 } | 802 } |
800 | 803 |
| 804 |
801 void Scanner::ScanEscape() { | 805 void Scanner::ScanEscape() { |
802 uc32 c = c0_; | 806 uc32 c = c0_; |
803 Advance(); | 807 Advance(); |
804 | 808 |
805 // Skip escaped newlines. | 809 // Skip escaped newlines. |
806 if (kIsLineTerminator.get(c)) { | 810 if (kIsLineTerminator.get(c)) { |
807 // Allow CR+LF newlines in multiline string literals. | 811 // Allow CR+LF newlines in multiline string literals. |
808 if (IsCarriageReturn(c) && IsLineFeed(c0_)) | 812 if (IsCarriageReturn(c) && IsLineFeed(c0_)) Advance(); |
809 Advance(); | |
810 // Allow LF+CR newlines in multiline string literals. | 813 // Allow LF+CR newlines in multiline string literals. |
811 if (IsLineFeed(c) && IsCarriageReturn(c0_)) | 814 if (IsLineFeed(c) && IsCarriageReturn(c0_)) Advance(); |
812 Advance(); | |
813 return; | 815 return; |
814 } | 816 } |
815 | 817 |
816 switch (c) { | 818 switch (c) { |
817 case '\'': // fall through | 819 case '\'': // fall through |
818 case '"': // fall through | 820 case '"' : // fall through |
819 case '\\': | 821 case '\\': break; |
820 break; | 822 case 'b' : c = '\b'; break; |
821 case 'b': | 823 case 'f' : c = '\f'; break; |
822 c = '\b'; | 824 case 'n' : c = '\n'; break; |
823 break; | 825 case 'r' : c = '\r'; break; |
824 case 'f': | 826 case 't' : c = '\t'; break; |
825 c = '\f'; | 827 case 'u' : c = ScanHexEscape(c, 4); break; |
826 break; | 828 case 'v' : c = '\v'; break; |
827 case 'n': | 829 case 'x' : c = ScanHexEscape(c, 2); break; |
828 c = '\n'; | 830 case '0' : // fall through |
829 break; | 831 case '1' : // fall through |
830 case 'r': | 832 case '2' : // fall through |
831 c = '\r'; | 833 case '3' : // fall through |
832 break; | 834 case '4' : // fall through |
833 case 't': | 835 case '5' : // fall through |
834 c = '\t'; | 836 case '6' : // fall through |
835 break; | 837 case '7' : c = ScanOctalEscape(c, 2); break; |
836 case 'u': | |
837 c = ScanHexEscape(c, 4); | |
838 break; | |
839 case 'v': | |
840 c = '\v'; | |
841 break; | |
842 case 'x': | |
843 c = ScanHexEscape(c, 2); | |
844 break; | |
845 case '0': // fall through | |
846 case '1': // fall through | |
847 case '2': // fall through | |
848 case '3': // fall through | |
849 case '4': // fall through | |
850 case '5': // fall through | |
851 case '6': // fall through | |
852 case '7': | |
853 c = ScanOctalEscape(c, 2); | |
854 break; | |
855 } | 838 } |
856 | 839 |
857 // According to ECMA-262, 3rd, 7.8.4 (p 18ff) these | 840 // According to ECMA-262, 3rd, 7.8.4 (p 18ff) these |
858 // should be illegal, but they are commonly handled | 841 // should be illegal, but they are commonly handled |
859 // as non-escaped characters by JS VMs. | 842 // as non-escaped characters by JS VMs. |
860 AddChar(c); | 843 AddChar(c); |
861 } | 844 } |
862 | 845 |
| 846 |
863 Token::Value Scanner::ScanString() { | 847 Token::Value Scanner::ScanString() { |
864 uc32 quote = c0_; | 848 uc32 quote = c0_; |
865 Advance(); // consume quote | 849 Advance(); // consume quote |
866 | 850 |
867 StartLiteral(); | 851 StartLiteral(); |
868 while (c0_ != quote && c0_ >= 0 && !kIsLineTerminator.get(c0_)) { | 852 while (c0_ != quote && c0_ >= 0 && !kIsLineTerminator.get(c0_)) { |
869 uc32 c = c0_; | 853 uc32 c = c0_; |
870 Advance(); | 854 Advance(); |
871 if (c == '\\') { | 855 if (c == '\\') { |
872 if (c0_ < 0) | 856 if (c0_ < 0) return Token::ILLEGAL; |
873 return Token::ILLEGAL; | |
874 ScanEscape(); | 857 ScanEscape(); |
875 } else { | 858 } else { |
876 AddChar(c); | 859 AddChar(c); |
877 } | 860 } |
878 } | 861 } |
879 if (c0_ != quote) { | 862 if (c0_ != quote) { |
880 return Token::ILLEGAL; | 863 return Token::ILLEGAL; |
881 } | 864 } |
882 TerminateLiteral(); | 865 TerminateLiteral(); |
883 | 866 |
884 Advance(); // consume quote | 867 Advance(); // consume quote |
885 return Token::STRING; | 868 return Token::STRING; |
886 } | 869 } |
887 | 870 |
| 871 |
888 Token::Value Scanner::Select(Token::Value tok) { | 872 Token::Value Scanner::Select(Token::Value tok) { |
889 Advance(); | 873 Advance(); |
890 return tok; | 874 return tok; |
891 } | 875 } |
892 | 876 |
| 877 |
893 Token::Value Scanner::Select(uc32 next, Token::Value then, Token::Value else_) { | 878 Token::Value Scanner::Select(uc32 next, Token::Value then, Token::Value else_) { |
894 Advance(); | 879 Advance(); |
895 if (c0_ == next) { | 880 if (c0_ == next) { |
896 Advance(); | 881 Advance(); |
897 return then; | 882 return then; |
898 } else { | 883 } else { |
899 return else_; | 884 return else_; |
900 } | 885 } |
901 } | 886 } |
902 | 887 |
| 888 |
903 // Returns true if any decimal digits were scanned, returns false otherwise. | 889 // Returns true if any decimal digits were scanned, returns false otherwise. |
904 void Scanner::ScanDecimalDigits() { | 890 void Scanner::ScanDecimalDigits() { |
905 while (IsDecimalDigit(c0_)) | 891 while (IsDecimalDigit(c0_)) |
906 AddCharAdvance(); | 892 AddCharAdvance(); |
907 } | 893 } |
908 | 894 |
| 895 |
909 Token::Value Scanner::ScanNumber(bool seen_period) { | 896 Token::Value Scanner::ScanNumber(bool seen_period) { |
910 ASSERT(IsDecimalDigit(c0_)); // the first digit of the number or the fraction | 897 ASSERT(IsDecimalDigit(c0_)); // the first digit of the number or the fraction |
911 | 898 |
912 enum { | 899 enum { DECIMAL, HEX, OCTAL } kind = DECIMAL; |
913 DECIMAL, HEX, OCTAL | |
914 } kind = DECIMAL; | |
915 | 900 |
916 StartLiteral(); | 901 StartLiteral(); |
917 if (seen_period) { | 902 if (seen_period) { |
918 // we have already seen a decimal point of the float | 903 // we have already seen a decimal point of the float |
919 AddChar('.'); | 904 AddChar('.'); |
920 ScanDecimalDigits(); // we know we have at least one digit | 905 ScanDecimalDigits(); // we know we have at least one digit |
921 | 906 |
922 } else { | 907 } else { |
923 // if the first character is '0' we must check for octals and hex | 908 // if the first character is '0' we must check for octals and hex |
924 if (c0_ == '0') { | 909 if (c0_ == '0') { |
925 AddCharAdvance(); | 910 AddCharAdvance(); |
926 | 911 |
927 // either 0, 0exxx, 0Exxx, 0.xxx, an octal number, or a hex number | 912 // either 0, 0exxx, 0Exxx, 0.xxx, an octal number, or a hex number |
928 if (c0_ == 'x' || c0_ == 'X') { | 913 if (c0_ == 'x' || c0_ == 'X') { |
929 // hex number | 914 // hex number |
930 kind = HEX; | 915 kind = HEX; |
931 AddCharAdvance(); | 916 AddCharAdvance(); |
932 if (!IsHexDigit(c0_)) | 917 if (!IsHexDigit(c0_)) |
933 // we must have at least one hex digit after 'x'/'X' | 918 // we must have at least one hex digit after 'x'/'X' |
934 return Token::ILLEGAL; | 919 return Token::ILLEGAL; |
935 while (IsHexDigit(c0_)) | 920 while (IsHexDigit(c0_)) |
936 AddCharAdvance(); | 921 AddCharAdvance(); |
937 | 922 |
938 } else if ('0' <= c0_ && c0_ <= '7') { | 923 } else if ('0' <= c0_ && c0_ <= '7') { |
939 // (possible) octal number | 924 // (possible) octal number |
940 kind = OCTAL; | 925 kind = OCTAL; |
941 while (true) { | 926 while (true) { |
942 if (c0_ == '8' || c0_ == '9') { | 927 if (c0_ == '8' || c0_ == '9') { |
943 kind = DECIMAL; | 928 kind = DECIMAL; |
944 break; | 929 break; |
945 } | 930 } |
946 if (c0_ < '0' || '7' < c0_) | 931 if (c0_ < '0' || '7' < c0_) break; |
947 break; | |
948 AddCharAdvance(); | 932 AddCharAdvance(); |
949 } | 933 } |
950 } | 934 } |
951 } | 935 } |
952 | 936 |
953 // Parse decimal digits and allow trailing fractional part. | 937 // Parse decimal digits and allow trailing fractional part. |
954 if (kind == DECIMAL) { | 938 if (kind == DECIMAL) { |
955 ScanDecimalDigits(); // optional | 939 ScanDecimalDigits(); // optional |
956 if (c0_ == '.') { | 940 if (c0_ == '.') { |
957 AddCharAdvance(); | 941 AddCharAdvance(); |
958 ScanDecimalDigits(); // optional | 942 ScanDecimalDigits(); // optional |
959 } | 943 } |
960 } | 944 } |
961 } | 945 } |
962 | 946 |
963 // scan exponent, if any | 947 // scan exponent, if any |
964 if (c0_ == 'e' || c0_ == 'E') { | 948 if (c0_ == 'e' || c0_ == 'E') { |
965 ASSERT(kind != HEX); // 'e'/'E' must be scanned as part of the hex number | 949 ASSERT(kind != HEX); // 'e'/'E' must be scanned as part of the hex number |
966 if (kind == OCTAL) | 950 if (kind == OCTAL) return Token::ILLEGAL; // no exponent for octals allowed |
967 return Token::ILLEGAL; // no exponent for octals allowed | |
968 // scan exponent | 951 // scan exponent |
969 AddCharAdvance(); | 952 AddCharAdvance(); |
970 if (c0_ == '+' || c0_ == '-') | 953 if (c0_ == '+' || c0_ == '-') |
971 AddCharAdvance(); | 954 AddCharAdvance(); |
972 if (!IsDecimalDigit(c0_)) | 955 if (!IsDecimalDigit(c0_)) |
973 // we must have at least one decimal digit after 'e'/'E' | 956 // we must have at least one decimal digit after 'e'/'E' |
974 return Token::ILLEGAL; | 957 return Token::ILLEGAL; |
975 ScanDecimalDigits(); | 958 ScanDecimalDigits(); |
976 } | 959 } |
977 TerminateLiteral(); | 960 TerminateLiteral(); |
978 | 961 |
979 // The source character immediately following a numeric literal must | 962 // The source character immediately following a numeric literal must |
980 // not be an identifier start or a decimal digit; see ECMA-262 | 963 // not be an identifier start or a decimal digit; see ECMA-262 |
981 // section 7.8.3, page 17 (note that we read only one decimal digit | 964 // section 7.8.3, page 17 (note that we read only one decimal digit |
982 // if the value is 0). | 965 // if the value is 0). |
983 if (IsDecimalDigit(c0_) || kIsIdentifierStart.get(c0_)) | 966 if (IsDecimalDigit(c0_) || kIsIdentifierStart.get(c0_)) |
984 return Token::ILLEGAL; | 967 return Token::ILLEGAL; |
985 | 968 |
986 return Token::NUMBER; | 969 return Token::NUMBER; |
987 } | 970 } |
988 | 971 |
| 972 |
989 uc32 Scanner::ScanIdentifierUnicodeEscape() { | 973 uc32 Scanner::ScanIdentifierUnicodeEscape() { |
990 Advance(); | 974 Advance(); |
991 if (c0_ != 'u') | 975 if (c0_ != 'u') return unibrow::Utf8::kBadChar; |
992 return unibrow::Utf8::kBadChar; | |
993 Advance(); | 976 Advance(); |
994 uc32 c = ScanHexEscape('u', 4); | 977 uc32 c = ScanHexEscape('u', 4); |
995 // We do not allow a unicode escape sequence to start another | 978 // We do not allow a unicode escape sequence to start another |
996 // unicode escape sequence. | 979 // unicode escape sequence. |
997 if (c == '\\') | 980 if (c == '\\') return unibrow::Utf8::kBadChar; |
998 return unibrow::Utf8::kBadChar; | |
999 return c; | 981 return c; |
1000 } | 982 } |
1001 | 983 |
| 984 |
1002 Token::Value Scanner::ScanIdentifier() { | 985 Token::Value Scanner::ScanIdentifier() { |
1003 ASSERT(kIsIdentifierStart.get(c0_)); | 986 ASSERT(kIsIdentifierStart.get(c0_)); |
1004 | 987 |
1005 StartLiteral(); | 988 StartLiteral(); |
1006 KeywordMatcher keyword_match; | 989 KeywordMatcher keyword_match; |
1007 | 990 |
1008 // Scan identifier start character. | 991 // Scan identifier start character. |
1009 if (c0_ == '\\') { | 992 if (c0_ == '\\') { |
1010 uc32 c = ScanIdentifierUnicodeEscape(); | 993 uc32 c = ScanIdentifierUnicodeEscape(); |
1011 // Only allow legal identifier start characters. | 994 // Only allow legal identifier start characters. |
1012 if (!kIsIdentifierStart.get(c)) | 995 if (!kIsIdentifierStart.get(c)) return Token::ILLEGAL; |
1013 return Token::ILLEGAL; | |
1014 AddChar(c); | 996 AddChar(c); |
1015 keyword_match.Fail(); | 997 keyword_match.Fail(); |
1016 } else { | 998 } else { |
1017 AddChar(c0_); | 999 AddChar(c0_); |
1018 keyword_match.AddChar(c0_); | 1000 keyword_match.AddChar(c0_); |
1019 Advance(); | 1001 Advance(); |
1020 } | 1002 } |
1021 | 1003 |
1022 // Scan the rest of the identifier characters. | 1004 // Scan the rest of the identifier characters. |
1023 while (kIsIdentifierPart.get(c0_)) { | 1005 while (kIsIdentifierPart.get(c0_)) { |
1024 if (c0_ == '\\') { | 1006 if (c0_ == '\\') { |
1025 uc32 c = ScanIdentifierUnicodeEscape(); | 1007 uc32 c = ScanIdentifierUnicodeEscape(); |
1026 // Only allow legal identifier part characters. | 1008 // Only allow legal identifier part characters. |
1027 if (!kIsIdentifierPart.get(c)) | 1009 if (!kIsIdentifierPart.get(c)) return Token::ILLEGAL; |
1028 return Token::ILLEGAL; | |
1029 AddChar(c); | 1010 AddChar(c); |
1030 keyword_match.Fail(); | 1011 keyword_match.Fail(); |
1031 } else { | 1012 } else { |
1032 AddChar(c0_); | 1013 AddChar(c0_); |
1033 keyword_match.AddChar(c0_); | 1014 keyword_match.AddChar(c0_); |
1034 Advance(); | 1015 Advance(); |
1035 } | 1016 } |
1036 } | 1017 } |
1037 TerminateLiteral(); | 1018 TerminateLiteral(); |
1038 | 1019 |
1039 return keyword_match.token(); | 1020 return keyword_match.token(); |
1040 } | 1021 } |
1041 | 1022 |
| 1023 |
| 1024 |
1042 bool Scanner::IsIdentifier(unibrow::CharacterStream* buffer) { | 1025 bool Scanner::IsIdentifier(unibrow::CharacterStream* buffer) { |
1043 // Checks whether the buffer contains an identifier (no escape). | 1026 // Checks whether the buffer contains an identifier (no escape). |
1044 if (!buffer->has_more()) | 1027 if (!buffer->has_more()) return false; |
1045 return false; | 1028 if (!kIsIdentifierStart.get(buffer->GetNext())) return false; |
1046 if (!kIsIdentifierStart.get(buffer->GetNext())) | |
1047 return false; | |
1048 while (buffer->has_more()) { | 1029 while (buffer->has_more()) { |
1049 if (!kIsIdentifierPart.get(buffer->GetNext())) | 1030 if (!kIsIdentifierPart.get(buffer->GetNext())) return false; |
1050 return false; | |
1051 } | 1031 } |
1052 return true; | 1032 return true; |
1053 } | 1033 } |
1054 | 1034 |
| 1035 |
1055 bool Scanner::ScanRegExpPattern(bool seen_equal) { | 1036 bool Scanner::ScanRegExpPattern(bool seen_equal) { |
1056 // Scan: ('/' | '/=') RegularExpressionBody '/' RegularExpressionFlags | 1037 // Scan: ('/' | '/=') RegularExpressionBody '/' RegularExpressionFlags |
1057 bool in_character_class = false; | 1038 bool in_character_class = false; |
1058 | 1039 |
1059 // Previous token is either '/' or '/=', in the second case, the | 1040 // Previous token is either '/' or '/=', in the second case, the |
1060 // pattern starts at =. | 1041 // pattern starts at =. |
1061 next_.location.beg_pos = source_pos() - (seen_equal ? 2 : 1); | 1042 next_.location.beg_pos = source_pos() - (seen_equal ? 2 : 1); |
1062 next_.location.end_pos = source_pos() - (seen_equal ? 1 : 0); | 1043 next_.location.end_pos = source_pos() - (seen_equal ? 1 : 0); |
1063 | 1044 |
1064 // Scan regular expression body: According to ECMA-262, 3rd, 7.8.5, | 1045 // Scan regular expression body: According to ECMA-262, 3rd, 7.8.5, |
1065 // the scanner should pass uninterpreted bodies to the RegExp | 1046 // the scanner should pass uninterpreted bodies to the RegExp |
1066 // constructor. | 1047 // constructor. |
1067 StartLiteral(); | 1048 StartLiteral(); |
1068 if (seen_equal) | 1049 if (seen_equal) |
1069 AddChar('='); | 1050 AddChar('='); |
1070 | 1051 |
1071 while (c0_ != '/' || in_character_class) { | 1052 while (c0_ != '/' || in_character_class) { |
1072 if (kIsLineTerminator.get(c0_) || c0_ < 0) | 1053 if (kIsLineTerminator.get(c0_) || c0_ < 0) |
1073 return false; | 1054 return false; |
1074 if (c0_ == '\\') { // escaped character | 1055 if (c0_ == '\\') { // escaped character |
1075 AddCharAdvance(); | 1056 AddCharAdvance(); |
1076 if (kIsLineTerminator.get(c0_) || c0_ < 0) | 1057 if (kIsLineTerminator.get(c0_) || c0_ < 0) |
1077 return false; | 1058 return false; |
1078 AddCharAdvance(); | 1059 AddCharAdvance(); |
1079 } else { // unescaped character | 1060 } else { // unescaped character |
1080 if (c0_ == '[') | 1061 if (c0_ == '[') |
1081 in_character_class = true; | 1062 in_character_class = true; |
1082 if (c0_ == ']') | 1063 if (c0_ == ']') |
1083 in_character_class = false; | 1064 in_character_class = false; |
1084 AddCharAdvance(); | 1065 AddCharAdvance(); |
1085 } | 1066 } |
1086 } | 1067 } |
1087 Advance(); // consume '/' | 1068 Advance(); // consume '/' |
1088 | 1069 |
1089 TerminateLiteral(); | 1070 TerminateLiteral(); |
1090 | 1071 |
1091 return true; | 1072 return true; |
1092 } | 1073 } |
1093 | 1074 |
1094 bool Scanner::ScanRegExpFlags() { | 1075 bool Scanner::ScanRegExpFlags() { |
1095 // Scan regular expression flags. | 1076 // Scan regular expression flags. |
1096 StartLiteral(); | 1077 StartLiteral(); |
1097 while (kIsIdentifierPart.get(c0_)) { | 1078 while (kIsIdentifierPart.get(c0_)) { |
1098 if (c0_ == '\\') { | 1079 if (c0_ == '\\') { |
1099 uc32 c = ScanIdentifierUnicodeEscape(); | 1080 uc32 c = ScanIdentifierUnicodeEscape(); |
1100 if (c != static_cast<uc32> (unibrow::Utf8::kBadChar)) { | 1081 if (c != static_cast<uc32>(unibrow::Utf8::kBadChar)) { |
1101 // We allow any escaped character, unlike the restriction on | 1082 // We allow any escaped character, unlike the restriction on |
1102 // IdentifierPart when it is used to build an IdentifierName. | 1083 // IdentifierPart when it is used to build an IdentifierName. |
1103 AddChar(c); | 1084 AddChar(c); |
1104 continue; | 1085 continue; |
1105 } | 1086 } |
1106 } | 1087 } |
1107 AddCharAdvance(); | 1088 AddCharAdvance(); |
1108 } | 1089 } |
1109 TerminateLiteral(); | 1090 TerminateLiteral(); |
1110 | 1091 |
1111 next_.location.end_pos = source_pos() - 1; | 1092 next_.location.end_pos = source_pos() - 1; |
1112 return true; | 1093 return true; |
1113 } | 1094 } |
1114 | 1095 |
1115 } | 1096 } } // namespace v8::internal |
1116 } // namespace v8::internal | |
OLD | NEW |