| OLD | NEW |
| 1 // Copyright 2006-2008 the V8 project authors. All rights reserved. | 1 // Copyright 2006-2008 the V8 project authors. All rights reserved. |
| 2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
| 3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
| 4 // met: | 4 // met: |
| 5 // | 5 // |
| 6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
| 7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
| 8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
| 9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
| 10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
| (...skipping 24 matching lines...) Expand all Loading... |
| 35 | 35 |
| 36 // ---------------------------------------------------------------------------- | 36 // ---------------------------------------------------------------------------- |
| 37 // Character predicates | 37 // Character predicates |
| 38 | 38 |
| 39 | 39 |
| 40 unibrow::Predicate<IdentifierStart, 128> Scanner::kIsIdentifierStart; | 40 unibrow::Predicate<IdentifierStart, 128> Scanner::kIsIdentifierStart; |
| 41 unibrow::Predicate<IdentifierPart, 128> Scanner::kIsIdentifierPart; | 41 unibrow::Predicate<IdentifierPart, 128> Scanner::kIsIdentifierPart; |
| 42 unibrow::Predicate<unibrow::LineTerminator, 128> Scanner::kIsLineTerminator; | 42 unibrow::Predicate<unibrow::LineTerminator, 128> Scanner::kIsLineTerminator; |
| 43 unibrow::Predicate<unibrow::WhiteSpace, 128> Scanner::kIsWhiteSpace; | 43 unibrow::Predicate<unibrow::WhiteSpace, 128> Scanner::kIsWhiteSpace; |
| 44 | 44 |
| 45 |
| 45 StaticResource<Scanner::Utf8Decoder> Scanner::utf8_decoder_; | 46 StaticResource<Scanner::Utf8Decoder> Scanner::utf8_decoder_; |
| 46 | 47 |
| 48 |
| 47 // ---------------------------------------------------------------------------- | 49 // ---------------------------------------------------------------------------- |
| 48 // UTF8Buffer | 50 // UTF8Buffer |
| 49 | 51 |
| 50 UTF8Buffer::UTF8Buffer() : | 52 UTF8Buffer::UTF8Buffer() : data_(NULL), limit_(NULL) { } |
| 51 data_(NULL), limit_(NULL) { | 53 |
| 52 } | |
| 53 | 54 |
| 54 UTF8Buffer::~UTF8Buffer() { | 55 UTF8Buffer::~UTF8Buffer() { |
| 55 DeleteArray(data_); | 56 DeleteArray(data_); |
| 56 } | 57 } |
| 57 | 58 |
| 59 |
| 58 void UTF8Buffer::AddCharSlow(uc32 c) { | 60 void UTF8Buffer::AddCharSlow(uc32 c) { |
| 59 static const int kCapacityGrowthLimit = 1 * MB; | 61 static const int kCapacityGrowthLimit = 1 * MB; |
| 60 if (cursor_ > limit_) { | 62 if (cursor_ > limit_) { |
| 61 int old_capacity = Capacity(); | 63 int old_capacity = Capacity(); |
| 62 int old_position = pos(); | 64 int old_position = pos(); |
| 63 int new_capacity = Min(old_capacity * 3, old_capacity | 65 int new_capacity = |
| 64 + kCapacityGrowthLimit); | 66 Min(old_capacity * 3, old_capacity + kCapacityGrowthLimit); |
| 65 char* new_data = NewArray<char> (new_capacity); | 67 char* new_data = NewArray<char>(new_capacity); |
| 66 memcpy(new_data, data_, old_position); | 68 memcpy(new_data, data_, old_position); |
| 67 DeleteArray(data_); | 69 DeleteArray(data_); |
| 68 data_ = new_data; | 70 data_ = new_data; |
| 69 cursor_ = new_data + old_position; | 71 cursor_ = new_data + old_position; |
| 70 limit_ = ComputeLimit(new_data, new_capacity); | 72 limit_ = ComputeLimit(new_data, new_capacity); |
| 71 ASSERT(Capacity() == new_capacity && pos() == old_position); | 73 ASSERT(Capacity() == new_capacity && pos() == old_position); |
| 72 } | 74 } |
| 73 if (static_cast<unsigned> (c) <= unibrow::Utf8::kMaxOneByteChar) { | 75 if (static_cast<unsigned>(c) <= unibrow::Utf8::kMaxOneByteChar) { |
| 74 *cursor_++ = c; // Common case: 7-bit ASCII. | 76 *cursor_++ = c; // Common case: 7-bit ASCII. |
| 75 } else { | 77 } else { |
| 76 cursor_ += unibrow::Utf8::Encode(cursor_, c); | 78 cursor_ += unibrow::Utf8::Encode(cursor_, c); |
| 77 } | 79 } |
| 78 ASSERT(pos() <= Capacity()); | 80 ASSERT(pos() <= Capacity()); |
| 79 } | 81 } |
| 80 | 82 |
| 83 |
| 81 // ---------------------------------------------------------------------------- | 84 // ---------------------------------------------------------------------------- |
| 82 // UTF16Buffer | 85 // UTF16Buffer |
| 83 | 86 |
| 84 | 87 |
| 85 UTF16Buffer::UTF16Buffer() : | 88 UTF16Buffer::UTF16Buffer() |
| 86 pos_(0), size_(0) { | 89 : pos_(0), size_(0) { } |
| 87 } | 90 |
| 88 | 91 |
| 89 Handle<String> UTF16Buffer::SubString(int start, int end) { | 92 Handle<String> UTF16Buffer::SubString(int start, int end) { |
| 90 return internal::SubString(data_, start, end); | 93 return internal::SubString(data_, start, end); |
| 91 } | 94 } |
| 92 | 95 |
| 96 |
| 93 // CharacterStreamUTF16Buffer | 97 // CharacterStreamUTF16Buffer |
| 94 CharacterStreamUTF16Buffer::CharacterStreamUTF16Buffer() : | 98 CharacterStreamUTF16Buffer::CharacterStreamUTF16Buffer() |
| 95 pushback_buffer_(0), last_(0), stream_(NULL) { | 99 : pushback_buffer_(0), last_(0), stream_(NULL) { } |
| 96 } | 100 |
| 97 | 101 |
| 98 void CharacterStreamUTF16Buffer::Initialize(Handle<String> data, | 102 void CharacterStreamUTF16Buffer::Initialize(Handle<String> data, |
| 99 unibrow::CharacterStream* input) { | 103 unibrow::CharacterStream* input) { |
| 100 data_ = data; | 104 data_ = data; |
| 101 pos_ = 0; | 105 pos_ = 0; |
| 102 stream_ = input; | 106 stream_ = input; |
| 103 } | 107 } |
| 104 | 108 |
| 109 |
| 105 void CharacterStreamUTF16Buffer::PushBack(uc32 ch) { | 110 void CharacterStreamUTF16Buffer::PushBack(uc32 ch) { |
| 106 pushback_buffer()->Add(last_); | 111 pushback_buffer()->Add(last_); |
| 107 last_ = ch; | 112 last_ = ch; |
| 108 pos_--; | 113 pos_--; |
| 109 } | 114 } |
| 110 | 115 |
| 116 |
| 111 uc32 CharacterStreamUTF16Buffer::Advance() { | 117 uc32 CharacterStreamUTF16Buffer::Advance() { |
| 112 // NOTE: It is of importance to Persian / Farsi resources that we do | 118 // NOTE: It is of importance to Persian / Farsi resources that we do |
| 113 // *not* strip format control characters in the scanner; see | 119 // *not* strip format control characters in the scanner; see |
| 114 // | 120 // |
| 115 // https://bugzilla.mozilla.org/show_bug.cgi?id=274152 | 121 // https://bugzilla.mozilla.org/show_bug.cgi?id=274152 |
| 116 // | 122 // |
| 117 // So, even though ECMA-262, section 7.1, page 11, dictates that we | 123 // So, even though ECMA-262, section 7.1, page 11, dictates that we |
| 118 // must remove Unicode format-control characters, we do not. This is | 124 // must remove Unicode format-control characters, we do not. This is |
| 119 // in line with how IE and SpiderMonkey handles it. | 125 // in line with how IE and SpiderMonkey handles it. |
| 120 if (!pushback_buffer()->is_empty()) { | 126 if (!pushback_buffer()->is_empty()) { |
| 121 pos_++; | 127 pos_++; |
| 122 return last_ = pushback_buffer()->RemoveLast(); | 128 return last_ = pushback_buffer()->RemoveLast(); |
| 123 } else if (stream_->has_more()) { | 129 } else if (stream_->has_more()) { |
| 124 pos_++; | 130 pos_++; |
| 125 uc32 next = stream_->GetNext(); | 131 uc32 next = stream_->GetNext(); |
| 126 return last_ = next; | 132 return last_ = next; |
| 127 } else { | 133 } else { |
| 128 // Note: currently the following increment is necessary to avoid a | 134 // Note: currently the following increment is necessary to avoid a |
| 129 // test-parser problem! | 135 // test-parser problem! |
| 130 pos_++; | 136 pos_++; |
| 131 return last_ = static_cast<uc32> (-1); | 137 return last_ = static_cast<uc32>(-1); |
| 132 } | 138 } |
| 133 } | 139 } |
| 134 | 140 |
| 141 |
| 135 void CharacterStreamUTF16Buffer::SeekForward(int pos) { | 142 void CharacterStreamUTF16Buffer::SeekForward(int pos) { |
| 136 pos_ = pos; | 143 pos_ = pos; |
| 137 ASSERT(pushback_buffer()->is_empty()); | 144 ASSERT(pushback_buffer()->is_empty()); |
| 138 stream_->Seek(pos); | 145 stream_->Seek(pos); |
| 139 } | 146 } |
| 140 | 147 |
| 148 |
| 141 // TwoByteStringUTF16Buffer | 149 // TwoByteStringUTF16Buffer |
| 142 TwoByteStringUTF16Buffer::TwoByteStringUTF16Buffer() : | 150 TwoByteStringUTF16Buffer::TwoByteStringUTF16Buffer() |
| 143 raw_data_(NULL) { | 151 : raw_data_(NULL) { } |
| 144 } | |
| 145 | 152 |
| 146 void TwoByteStringUTF16Buffer::Initialize(Handle<ExternalTwoByteString> data) { | 153 |
| 154 void TwoByteStringUTF16Buffer::Initialize( |
| 155 Handle<ExternalTwoByteString> data) { |
| 147 ASSERT(!data.is_null()); | 156 ASSERT(!data.is_null()); |
| 148 | 157 |
| 149 data_ = data; | 158 data_ = data; |
| 150 pos_ = 0; | 159 pos_ = 0; |
| 151 | 160 |
| 152 raw_data_ = data->resource()->data(); | 161 raw_data_ = data->resource()->data(); |
| 153 size_ = data->length(); | 162 size_ = data->length(); |
| 154 } | 163 } |
| 155 | 164 |
| 165 |
| 156 uc32 TwoByteStringUTF16Buffer::Advance() { | 166 uc32 TwoByteStringUTF16Buffer::Advance() { |
| 157 if (pos_ < size_) { | 167 if (pos_ < size_) { |
| 158 return raw_data_[pos_++]; | 168 return raw_data_[pos_++]; |
| 159 } else { | 169 } else { |
| 160 // note: currently the following increment is necessary to avoid a | 170 // note: currently the following increment is necessary to avoid a |
| 161 // test-parser problem! | 171 // test-parser problem! |
| 162 pos_++; | 172 pos_++; |
| 163 return static_cast<uc32> (-1); | 173 return static_cast<uc32>(-1); |
| 164 } | 174 } |
| 165 } | 175 } |
| 166 | 176 |
| 177 |
| 167 void TwoByteStringUTF16Buffer::PushBack(uc32 ch) { | 178 void TwoByteStringUTF16Buffer::PushBack(uc32 ch) { |
| 168 pos_--; | 179 pos_--; |
| 169 ASSERT(pos_ >= Scanner::kCharacterLookaheadBufferSize); | 180 ASSERT(pos_ >= Scanner::kCharacterLookaheadBufferSize); |
| 170 ASSERT(raw_data_[pos_ - Scanner::kCharacterLookaheadBufferSize] == ch); | 181 ASSERT(raw_data_[pos_ - Scanner::kCharacterLookaheadBufferSize] == ch); |
| 171 } | 182 } |
| 172 | 183 |
| 184 |
| 173 void TwoByteStringUTF16Buffer::SeekForward(int pos) { | 185 void TwoByteStringUTF16Buffer::SeekForward(int pos) { |
| 174 pos_ = pos; | 186 pos_ = pos; |
| 175 } | 187 } |
| 176 | 188 |
| 189 |
| 177 // ---------------------------------------------------------------------------- | 190 // ---------------------------------------------------------------------------- |
| 178 // Keyword Matcher | 191 // Keyword Matcher |
| 179 KeywordMatcher::FirstState KeywordMatcher::first_states_[] = { { "break", | 192 KeywordMatcher::FirstState KeywordMatcher::first_states_[] = { |
| 180 KEYWORD_PREFIX, Token::BREAK }, { NULL, C, Token::ILLEGAL }, { NULL, D, | 193 { "break", KEYWORD_PREFIX, Token::BREAK }, |
| 181 Token::ILLEGAL }, { "else", KEYWORD_PREFIX, Token::ELSE }, { NULL, F, | 194 { NULL, C, Token::ILLEGAL }, |
| 182 Token::ILLEGAL }, { NULL, UNMATCHABLE, Token::ILLEGAL }, { NULL, | 195 { NULL, D, Token::ILLEGAL }, |
| 183 UNMATCHABLE, Token::ILLEGAL }, { NULL, I, Token::ILLEGAL }, { NULL, | 196 { "else", KEYWORD_PREFIX, Token::ELSE }, |
| 184 UNMATCHABLE, Token::ILLEGAL }, { NULL, UNMATCHABLE, Token::ILLEGAL }, { | 197 { NULL, F, Token::ILLEGAL }, |
| 185 NULL, UNMATCHABLE, Token::ILLEGAL }, { NULL, UNMATCHABLE, Token::ILLEGAL }, | 198 { NULL, UNMATCHABLE, Token::ILLEGAL }, |
| 186 { NULL, N, Token::ILLEGAL }, { NULL, UNMATCHABLE, Token::ILLEGAL }, { NULL, | 199 { NULL, UNMATCHABLE, Token::ILLEGAL }, |
| 187 UNMATCHABLE, Token::ILLEGAL }, { NULL, UNMATCHABLE, Token::ILLEGAL }, { | 200 { NULL, I, Token::ILLEGAL }, |
| 188 "return", KEYWORD_PREFIX, Token::RETURN }, { "switch", KEYWORD_PREFIX, | 201 { NULL, UNMATCHABLE, Token::ILLEGAL }, |
| 189 Token::SWITCH }, { NULL, T, Token::ILLEGAL }, { NULL, UNMATCHABLE, | 202 { NULL, UNMATCHABLE, Token::ILLEGAL }, |
| 190 Token::ILLEGAL }, { NULL, V, Token::ILLEGAL }, { NULL, W, | 203 { NULL, UNMATCHABLE, Token::ILLEGAL }, |
| 191 Token::ILLEGAL } }; | 204 { NULL, UNMATCHABLE, Token::ILLEGAL }, |
| 205 { NULL, N, Token::ILLEGAL }, |
| 206 { NULL, UNMATCHABLE, Token::ILLEGAL }, |
| 207 { NULL, UNMATCHABLE, Token::ILLEGAL }, |
| 208 { NULL, UNMATCHABLE, Token::ILLEGAL }, |
| 209 { "return", KEYWORD_PREFIX, Token::RETURN }, |
| 210 { "switch", KEYWORD_PREFIX, Token::SWITCH }, |
| 211 { NULL, T, Token::ILLEGAL }, |
| 212 { NULL, UNMATCHABLE, Token::ILLEGAL }, |
| 213 { NULL, V, Token::ILLEGAL }, |
| 214 { NULL, W, Token::ILLEGAL } |
| 215 }; |
| 216 |
| 192 | 217 |
| 193 void KeywordMatcher::Step(uc32 input) { | 218 void KeywordMatcher::Step(uc32 input) { |
| 194 switch (state_) { | 219 switch (state_) { |
| 195 case INITIAL: { | 220 case INITIAL: { |
| 196 // matching the first character is the only state with significant fanout. | 221 // matching the first character is the only state with significant fanout. |
| 197 // Match only lower-case letters in range 'b'..'w'. | 222 // Match only lower-case letters in range 'b'..'w'. |
| 198 unsigned int offset = input - kFirstCharRangeMin; | 223 unsigned int offset = input - kFirstCharRangeMin; |
| 199 if (offset < kFirstCharRangeLength) { | 224 if (offset < kFirstCharRangeLength) { |
| 200 state_ = first_states_[offset].state; | 225 state_ = first_states_[offset].state; |
| 201 if (state_ == KEYWORD_PREFIX) { | 226 if (state_ == KEYWORD_PREFIX) { |
| (...skipping 13 matching lines...) Expand all Loading... |
| 215 state_ = KEYWORD_MATCHED; | 240 state_ = KEYWORD_MATCHED; |
| 216 token_ = keyword_token_; | 241 token_ = keyword_token_; |
| 217 } | 242 } |
| 218 return; | 243 return; |
| 219 } | 244 } |
| 220 break; | 245 break; |
| 221 case KEYWORD_MATCHED: | 246 case KEYWORD_MATCHED: |
| 222 token_ = Token::IDENTIFIER; | 247 token_ = Token::IDENTIFIER; |
| 223 break; | 248 break; |
| 224 case C: | 249 case C: |
| 225 if (MatchState(input, 'a', CA)) | 250 if (MatchState(input, 'a', CA)) return; |
| 226 return; | 251 if (MatchState(input, 'o', CO)) return; |
| 227 if (MatchState(input, 'o', CO)) | |
| 228 return; | |
| 229 break; | 252 break; |
| 230 case CA: | 253 case CA: |
| 231 if (MatchKeywordStart(input, "case", 2, Token::CASE)) | 254 if (MatchKeywordStart(input, "case", 2, Token::CASE)) return; |
| 232 return; | 255 if (MatchKeywordStart(input, "catch", 2, Token::CATCH)) return; |
| 233 if (MatchKeywordStart(input, "catch", 2, Token::CATCH)) | |
| 234 return; | |
| 235 break; | 256 break; |
| 236 case CO: | 257 case CO: |
| 237 if (MatchState(input, 'n', CON)) | 258 if (MatchState(input, 'n', CON)) return; |
| 238 return; | |
| 239 break; | 259 break; |
| 240 case CON: | 260 case CON: |
| 241 if (MatchKeywordStart(input, "const", 3, Token::CONST)) | 261 if (MatchKeywordStart(input, "const", 3, Token::CONST)) return; |
| 242 return; | 262 if (MatchKeywordStart(input, "continue", 3, Token::CONTINUE)) return; |
| 243 if (MatchKeywordStart(input, "continue", 3, Token::CONTINUE)) | |
| 244 return; | |
| 245 break; | 263 break; |
| 246 case D: | 264 case D: |
| 247 if (MatchState(input, 'e', DE)) | 265 if (MatchState(input, 'e', DE)) return; |
| 248 return; | 266 if (MatchKeyword(input, 'o', KEYWORD_MATCHED, Token::DO)) return; |
| 249 if (MatchKeyword(input, 'o', KEYWORD_MATCHED, Token::DO)) | |
| 250 return; | |
| 251 break; | 267 break; |
| 252 case DE: | 268 case DE: |
| 253 if (MatchKeywordStart(input, "debugger", 2, Token::DEBUGGER)) | 269 if (MatchKeywordStart(input, "debugger", 2, Token::DEBUGGER)) return; |
| 254 return; | 270 if (MatchKeywordStart(input, "default", 2, Token::DEFAULT)) return; |
| 255 if (MatchKeywordStart(input, "default", 2, Token::DEFAULT)) | 271 if (MatchKeywordStart(input, "delete", 2, Token::DELETE)) return; |
| 256 return; | |
| 257 if (MatchKeywordStart(input, "delete", 2, Token::DELETE)) | |
| 258 return; | |
| 259 break; | 272 break; |
| 260 case F: | 273 case F: |
| 261 if (MatchKeywordStart(input, "false", 1, Token::FALSE_LITERAL)) | 274 if (MatchKeywordStart(input, "false", 1, Token::FALSE_LITERAL)) return; |
| 262 return; | 275 if (MatchKeywordStart(input, "finally", 1, Token::FINALLY)) return; |
| 263 if (MatchKeywordStart(input, "finally", 1, Token::FINALLY)) | 276 if (MatchKeywordStart(input, "for", 1, Token::FOR)) return; |
| 264 return; | 277 if (MatchKeywordStart(input, "function", 1, Token::FUNCTION)) return; |
| 265 if (MatchKeywordStart(input, "for", 1, Token::FOR)) | |
| 266 return; | |
| 267 if (MatchKeywordStart(input, "function", 1, Token::FUNCTION)) | |
| 268 return; | |
| 269 break; | 278 break; |
| 270 case I: | 279 case I: |
| 271 if (MatchKeyword(input, 'f', KEYWORD_MATCHED, Token::IF)) | 280 if (MatchKeyword(input, 'f', KEYWORD_MATCHED, Token::IF)) return; |
| 272 return; | 281 if (MatchKeyword(input, 'n', IN, Token::IN)) return; |
| 273 if (MatchKeyword(input, 'n', IN, Token::IN)) | |
| 274 return; | |
| 275 break; | 282 break; |
| 276 case IN: | 283 case IN: |
| 277 token_ = Token::IDENTIFIER; | 284 token_ = Token::IDENTIFIER; |
| 278 if (MatchKeywordStart(input, "instanceof", 2, Token::INSTANCEOF)) { | 285 if (MatchKeywordStart(input, "instanceof", 2, Token::INSTANCEOF)) { |
| 279 return; | 286 return; |
| 280 } | 287 } |
| 281 break; | 288 break; |
| 282 case N: | 289 case N: |
| 283 if (MatchKeywordStart(input, "native", 1, Token::NATIVE)) | 290 if (MatchKeywordStart(input, "native", 1, Token::NATIVE)) return; |
| 284 return; | 291 if (MatchKeywordStart(input, "new", 1, Token::NEW)) return; |
| 285 if (MatchKeywordStart(input, "new", 1, Token::NEW)) | 292 if (MatchKeywordStart(input, "null", 1, Token::NULL_LITERAL)) return; |
| 286 return; | |
| 287 if (MatchKeywordStart(input, "null", 1, Token::NULL_LITERAL)) | |
| 288 return; | |
| 289 break; | 293 break; |
| 290 case T: | 294 case T: |
| 291 if (MatchState(input, 'h', TH)) | 295 if (MatchState(input, 'h', TH)) return; |
| 292 return; | 296 if (MatchState(input, 'r', TR)) return; |
| 293 if (MatchState(input, 'r', TR)) | 297 if (MatchKeywordStart(input, "typeof", 1, Token::TYPEOF)) return; |
| 294 return; | |
| 295 if (MatchKeywordStart(input, "typeof", 1, Token::TYPEOF)) | |
| 296 return; | |
| 297 break; | 298 break; |
| 298 case TH: | 299 case TH: |
| 299 if (MatchKeywordStart(input, "this", 2, Token::THIS)) | 300 if (MatchKeywordStart(input, "this", 2, Token::THIS)) return; |
| 300 return; | 301 if (MatchKeywordStart(input, "throw", 2, Token::THROW)) return; |
| 301 if (MatchKeywordStart(input, "throw", 2, Token::THROW)) | |
| 302 return; | |
| 303 break; | 302 break; |
| 304 case TR: | 303 case TR: |
| 305 if (MatchKeywordStart(input, "true", 2, Token::TRUE_LITERAL)) | 304 if (MatchKeywordStart(input, "true", 2, Token::TRUE_LITERAL)) return; |
| 306 return; | 305 if (MatchKeyword(input, 'y', KEYWORD_MATCHED, Token::TRY)) return; |
| 307 if (MatchKeyword(input, 'y', KEYWORD_MATCHED, Token::TRY)) | |
| 308 return; | |
| 309 break; | 306 break; |
| 310 case V: | 307 case V: |
| 311 if (MatchKeywordStart(input, "var", 1, Token::VAR)) | 308 if (MatchKeywordStart(input, "var", 1, Token::VAR)) return; |
| 312 return; | 309 if (MatchKeywordStart(input, "void", 1, Token::VOID)) return; |
| 313 if (MatchKeywordStart(input, "void", 1, Token::VOID)) | |
| 314 return; | |
| 315 break; | 310 break; |
| 316 case W: | 311 case W: |
| 317 if (MatchKeywordStart(input, "while", 1, Token::WHILE)) | 312 if (MatchKeywordStart(input, "while", 1, Token::WHILE)) return; |
| 318 return; | 313 if (MatchKeywordStart(input, "with", 1, Token::WITH)) return; |
| 319 if (MatchKeywordStart(input, "with", 1, Token::WITH)) | |
| 320 return; | |
| 321 break; | 314 break; |
| 322 default: | 315 default: |
| 323 UNREACHABLE(); | 316 UNREACHABLE(); |
| 324 } | 317 } |
| 325 // On fallthrough, it's a failure. | 318 // On fallthrough, it's a failure. |
| 326 state_ = UNMATCHABLE; | 319 state_ = UNMATCHABLE; |
| 327 } | 320 } |
| 328 | 321 |
| 322 |
| 329 // ---------------------------------------------------------------------------- | 323 // ---------------------------------------------------------------------------- |
| 330 // Scanner | 324 // Scanner |
| 331 | 325 |
| 332 Scanner::Scanner(bool pre) : | 326 Scanner::Scanner(bool pre) : stack_overflow_(false), is_pre_parsing_(pre) { } |
| 333 stack_overflow_(false), is_pre_parsing_(pre) { | 327 |
| 334 } | |
| 335 | 328 |
| 336 void Scanner::Init(Handle<String> source, unibrow::CharacterStream* stream, | 329 void Scanner::Init(Handle<String> source, unibrow::CharacterStream* stream, |
| 337 int position) { | 330 int position) { |
| 338 // Initialize the source buffer. | 331 // Initialize the source buffer. |
| 339 if (!source.is_null() && StringShape(*source).IsExternalTwoByte()) { | 332 if (!source.is_null() && StringShape(*source).IsExternalTwoByte()) { |
| 340 two_byte_string_buffer_.Initialize( | 333 two_byte_string_buffer_.Initialize( |
| 341 Handle<ExternalTwoByteString>::cast( | 334 Handle<ExternalTwoByteString>::cast(source)); |
| 342 sourc
e)); | |
| 343 source_ = &two_byte_string_buffer_; | 335 source_ = &two_byte_string_buffer_; |
| 344 } else { | 336 } else { |
| 345 char_stream_buffer_.Initialize(source, stream); | 337 char_stream_buffer_.Initialize(source, stream); |
| 346 source_ = &char_stream_buffer_; | 338 source_ = &char_stream_buffer_; |
| 347 } | 339 } |
| 348 | 340 |
| 349 position_ = position; | 341 position_ = position; |
| 350 | 342 |
| 351 // Set c0_ (one character ahead) | 343 // Set c0_ (one character ahead) |
| 352 ASSERT(kCharacterLookaheadBufferSize == 1); | 344 ASSERT(kCharacterLookaheadBufferSize == 1); |
| 353 Advance(); | 345 Advance(); |
| 354 | 346 |
| 355 // Skip initial whitespace allowing HTML comment ends just like | 347 // Skip initial whitespace allowing HTML comment ends just like |
| 356 // after a newline and scan first token. | 348 // after a newline and scan first token. |
| 357 has_line_terminator_before_next_ = true; | 349 has_line_terminator_before_next_ = true; |
| 358 SkipWhiteSpace(); | 350 SkipWhiteSpace(); |
| 359 Scan(); | 351 Scan(); |
| 360 } | 352 } |
| 361 | 353 |
| 354 |
| 362 Handle<String> Scanner::SubString(int start, int end) { | 355 Handle<String> Scanner::SubString(int start, int end) { |
| 363 return source_->SubString(start - position_, end - position_); | 356 return source_->SubString(start - position_, end - position_); |
| 364 } | 357 } |
| 365 | 358 |
| 359 |
| 366 Token::Value Scanner::Next() { | 360 Token::Value Scanner::Next() { |
| 367 // BUG 1215673: Find a thread safe way to set a stack limit in | 361 // BUG 1215673: Find a thread safe way to set a stack limit in |
| 368 // pre-parse mode. Otherwise, we cannot safely pre-parse from other | 362 // pre-parse mode. Otherwise, we cannot safely pre-parse from other |
| 369 // threads. | 363 // threads. |
| 370 current_ = next_; | 364 current_ = next_; |
| 371 // Check for stack-overflow before returning any tokens. | 365 // Check for stack-overflow before returning any tokens. |
| 372 StackLimitCheck check; | 366 StackLimitCheck check; |
| 373 if (check.HasOverflowed()) { | 367 if (check.HasOverflowed()) { |
| 374 stack_overflow_ = true; | 368 stack_overflow_ = true; |
| 375 next_.token = Token::ILLEGAL; | 369 next_.token = Token::ILLEGAL; |
| 376 next_.literal_buffer = NULL; | 370 next_.literal_buffer = NULL; |
| 377 } else { | 371 } else { |
| 378 Scan(); | 372 Scan(); |
| 379 } | 373 } |
| 380 return current_.token; | 374 return current_.token; |
| 381 } | 375 } |
| 382 | 376 |
| 377 |
| 383 void Scanner::StartLiteral() { | 378 void Scanner::StartLiteral() { |
| 384 // Use the first buffer unless it's currently in use by the current_ token. | 379 // Use the first buffer unless it's currently in use by the current_ token. |
| 385 // In most cases we won't have two literals/identifiers in a row, so | 380 // In most cases, we won't have two literals/identifiers in a row so |
| 386 // the second buffer won't be used very often and is unlikely to grow much. | 381 // the second buffer won't be used very often and is unlikely to grow much. |
| 387 UTF8Buffer* free_buffer = | 382 UTF8Buffer* free_buffer = |
| 388 (current_.literal_buffer != &literal_buffer_1_) ? &literal_buffer_1_ | 383 (current_.literal_buffer != &literal_buffer_1_) ? &literal_buffer_1_ |
| 389 : &literal_buffer_2_; | 384 : &literal_buffer_2_; |
| 390 next_.literal_buffer = free_buffer; | 385 next_.literal_buffer = free_buffer; |
| 391 free_buffer->Reset(); | 386 free_buffer->Reset(); |
| 392 } | 387 } |
| 393 | 388 |
| 389 |
| 394 void Scanner::AddChar(uc32 c) { | 390 void Scanner::AddChar(uc32 c) { |
| 395 next_.literal_buffer->AddChar(c); | 391 next_.literal_buffer->AddChar(c); |
| 396 } | 392 } |
| 397 | 393 |
| 394 |
| 398 void Scanner::TerminateLiteral() { | 395 void Scanner::TerminateLiteral() { |
| 399 AddChar(0); | 396 AddChar(0); |
| 400 } | 397 } |
| 401 | 398 |
| 399 |
| 402 void Scanner::AddCharAdvance() { | 400 void Scanner::AddCharAdvance() { |
| 403 AddChar(c0_); | 401 AddChar(c0_); |
| 404 Advance(); | 402 Advance(); |
| 405 } | 403 } |
| 406 | 404 |
| 405 |
| 407 static inline bool IsByteOrderMark(uc32 c) { | 406 static inline bool IsByteOrderMark(uc32 c) { |
| 408 // The Unicode value U+FFFE is guaranteed never to be assigned as a | 407 // The Unicode value U+FFFE is guaranteed never to be assigned as a |
| 409 // Unicode character; this implies that in a Unicode context the | 408 // Unicode character; this implies that in a Unicode context the |
| 410 // 0xFF, 0xFE byte pattern can only be interpreted as the U+FEFF | 409 // 0xFF, 0xFE byte pattern can only be interpreted as the U+FEFF |
| 411 // character expressed in little-endian byte order (since it could | 410 // character expressed in little-endian byte order (since it could |
| 412 // not be a U+FFFE character expressed in big-endian byte | 411 // not be a U+FFFE character expressed in big-endian byte |
| 413 // order). Nevertheless, we check for it to be compatible with | 412 // order). Nevertheless, we check for it to be compatible with |
| 414 // Spidermonkey. | 413 // Spidermonkey. |
| 415 return c == 0xFEFF || c == 0xFFFE; | 414 return c == 0xFEFF || c == 0xFFFE; |
| 416 } | 415 } |
| 417 | 416 |
| 417 |
| 418 bool Scanner::SkipWhiteSpace() { | 418 bool Scanner::SkipWhiteSpace() { |
| 419 int start_position = source_pos(); | 419 int start_position = source_pos(); |
| 420 | 420 |
| 421 while (true) { | 421 while (true) { |
| 422 // We treat byte-order marks (BOMs) as whitespace for better | 422 // We treat byte-order marks (BOMs) as whitespace for better |
| 423 // compatibility with Spidermonkey and other JavaScript engines. | 423 // compatibility with Spidermonkey and other JavaScript engines. |
| 424 while (kIsWhiteSpace.get(c0_) || IsByteOrderMark(c0_)) { | 424 while (kIsWhiteSpace.get(c0_) || IsByteOrderMark(c0_)) { |
| 425 // IsWhiteSpace() includes line terminators! | 425 // IsWhiteSpace() includes line terminators! |
| 426 if (kIsLineTerminator.get(c0_)) { | 426 if (kIsLineTerminator.get(c0_)) { |
| 427 // Ignore line terminators, but remember them. This is necessary | 427 // Ignore line terminators, but remember them. This is necessary |
| (...skipping 10 matching lines...) Expand all Loading... |
| 438 if (c0_ == '-' && has_line_terminator_before_next_) { | 438 if (c0_ == '-' && has_line_terminator_before_next_) { |
| 439 Advance(); | 439 Advance(); |
| 440 if (c0_ == '-') { | 440 if (c0_ == '-') { |
| 441 Advance(); | 441 Advance(); |
| 442 if (c0_ == '>') { | 442 if (c0_ == '>') { |
| 443 // Treat the rest of the line as a comment. | 443 // Treat the rest of the line as a comment. |
| 444 SkipSingleLineComment(); | 444 SkipSingleLineComment(); |
| 445 // Continue skipping white space after the comment. | 445 // Continue skipping white space after the comment. |
| 446 continue; | 446 continue; |
| 447 } | 447 } |
| 448 PushBack('-'); // undo Advance() | 448 PushBack('-'); // undo Advance() |
| 449 } | 449 } |
| 450 PushBack('-'); // undo Advance() | 450 PushBack('-'); // undo Advance() |
| 451 } | 451 } |
| 452 // Return whether or not we skipped any characters. | 452 // Return whether or not we skipped any characters. |
| 453 return source_pos() != start_position; | 453 return source_pos() != start_position; |
| 454 } | 454 } |
| 455 } | 455 } |
| 456 | 456 |
| 457 |
| 457 Token::Value Scanner::SkipSingleLineComment() { | 458 Token::Value Scanner::SkipSingleLineComment() { |
| 458 Advance(); | 459 Advance(); |
| 459 | 460 |
| 460 // The line terminator at the end of the line is not considered | 461 // The line terminator at the end of the line is not considered |
| 461 // to be part of the single-line comment; it is recognized | 462 // to be part of the single-line comment; it is recognized |
| 462 // separately by the lexical grammar and becomes part of the | 463 // separately by the lexical grammar and becomes part of the |
| 463 // stream of input elements for the syntactic grammar (see | 464 // stream of input elements for the syntactic grammar (see |
| 464 // ECMA-262, section 7.4, page 12). | 465 // ECMA-262, section 7.4, page 12). |
| 465 while (c0_ >= 0 && !kIsLineTerminator.get(c0_)) { | 466 while (c0_ >= 0 && !kIsLineTerminator.get(c0_)) { |
| 466 Advance(); | 467 Advance(); |
| 467 } | 468 } |
| 468 | 469 |
| 469 return Token::WHITESPACE; | 470 return Token::WHITESPACE; |
| 470 } | 471 } |
| 471 | 472 |
| 473 |
| 472 Token::Value Scanner::SkipMultiLineComment() { | 474 Token::Value Scanner::SkipMultiLineComment() { |
| 473 ASSERT(c0_ == '*'); | 475 ASSERT(c0_ == '*'); |
| 474 Advance(); | 476 Advance(); |
| 475 | 477 |
| 476 while (c0_ >= 0) { | 478 while (c0_ >= 0) { |
| 477 char ch = c0_; | 479 char ch = c0_; |
| 478 Advance(); | 480 Advance(); |
| 479 // If we have reached the end of the multi-line comment, we | 481 // If we have reached the end of the multi-line comment, we |
| 480 // consume the '/' and insert a whitespace. This way all | 482 // consume the '/' and insert a whitespace. This way all |
| 481 // multi-line comments are treated as whitespace - even the ones | 483 // multi-line comments are treated as whitespace - even the ones |
| 482 // containing line terminators. This contradicts ECMA-262, section | 484 // containing line terminators. This contradicts ECMA-262, section |
| 483 // 7.4, page 12, that says that multi-line comments containing | 485 // 7.4, page 12, that says that multi-line comments containing |
| 484 // line terminators should be treated as a line terminator, but it | 486 // line terminators should be treated as a line terminator, but it |
| 485 // matches the behaviour of SpiderMonkey and KJS. | 487 // matches the behaviour of SpiderMonkey and KJS. |
| 486 if (ch == '*' && c0_ == '/') { | 488 if (ch == '*' && c0_ == '/') { |
| 487 c0_ = ' '; | 489 c0_ = ' '; |
| 488 return Token::WHITESPACE; | 490 return Token::WHITESPACE; |
| 489 } | 491 } |
| 490 } | 492 } |
| 491 | 493 |
| 492 // Unterminated multi-line comment. | 494 // Unterminated multi-line comment. |
| 493 return Token::ILLEGAL; | 495 return Token::ILLEGAL; |
| 494 } | 496 } |
| 495 | 497 |
| 498 |
| 496 Token::Value Scanner::ScanHtmlComment() { | 499 Token::Value Scanner::ScanHtmlComment() { |
| 497 // Check for <!-- comments. | 500 // Check for <!-- comments. |
| 498 ASSERT(c0_ == '!'); | 501 ASSERT(c0_ == '!'); |
| 499 Advance(); | 502 Advance(); |
| 500 if (c0_ == '-') { | 503 if (c0_ == '-') { |
| 501 Advance(); | 504 Advance(); |
| 502 if (c0_ == '-') | 505 if (c0_ == '-') return SkipSingleLineComment(); |
| 503 return SkipSingleLineComment(); | 506 PushBack('-'); // undo Advance() |
| 504 PushBack('-'); // undo Advance() | |
| 505 } | 507 } |
| 506 PushBack('!'); // undo Advance() | 508 PushBack('!'); // undo Advance() |
| 507 ASSERT(c0_ == '!'); | 509 ASSERT(c0_ == '!'); |
| 508 return Token::LT; | 510 return Token::LT; |
| 509 } | 511 } |
| 510 | 512 |
| 513 |
| 511 void Scanner::Scan() { | 514 void Scanner::Scan() { |
| 512 Token::Value token; | 515 Token::Value token; |
| 513 has_line_terminator_before_next_ = false; | 516 has_line_terminator_before_next_ = false; |
| 514 do { | 517 do { |
| 515 // Remember the position of the next token | 518 // Remember the position of the next token |
| 516 next_.location.beg_pos = source_pos(); | 519 next_.location.beg_pos = source_pos(); |
| 517 | 520 |
| 518 switch (c0_) { | 521 switch (c0_) { |
| 519 case ' ': | 522 case ' ': |
| 520 case '\t': | 523 case '\t': |
| 521 Advance(); | 524 Advance(); |
| 522 token = Token::WHITESPACE; | 525 token = Token::WHITESPACE; |
| 523 break; | 526 break; |
| 524 | 527 |
| 525 case '\n': | 528 case '\n': |
| 526 Advance(); | 529 Advance(); |
| 527 has_line_terminator_before_next_ = true; | 530 has_line_terminator_before_next_ = true; |
| 528 token = Token::WHITESPACE; | 531 token = Token::WHITESPACE; |
| 529 break; | 532 break; |
| 530 | 533 |
| 531 case '"': | 534 case '"': case '\'': |
| 532 case '\'': | |
| 533 token = ScanString(); | 535 token = ScanString(); |
| 534 break; | 536 break; |
| 535 | 537 |
| 536 case '<': | 538 case '<': |
| 537 // < <= << <<= <!-- | 539 // < <= << <<= <!-- |
| 538 Advance(); | 540 Advance(); |
| 539 if (c0_ == '=') { | 541 if (c0_ == '=') { |
| 540 token = Select(Token::LTE); | 542 token = Select(Token::LTE); |
| 541 } else if (c0_ == '<') { | 543 } else if (c0_ == '<') { |
| 542 token = Select('=', Token::ASSIGN_SHL, Token::SHL); | 544 token = Select('=', Token::ASSIGN_SHL, Token::SHL); |
| (...skipping 198 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 741 } | 743 } |
| 742 | 744 |
| 743 // Continue scanning for tokens as long as we're just skipping | 745 // Continue scanning for tokens as long as we're just skipping |
| 744 // whitespace. | 746 // whitespace. |
| 745 } while (token == Token::WHITESPACE); | 747 } while (token == Token::WHITESPACE); |
| 746 | 748 |
| 747 next_.location.end_pos = source_pos(); | 749 next_.location.end_pos = source_pos(); |
| 748 next_.token = token; | 750 next_.token = token; |
| 749 } | 751 } |
| 750 | 752 |
| 753 |
| 751 void Scanner::SeekForward(int pos) { | 754 void Scanner::SeekForward(int pos) { |
| 752 source_->SeekForward(pos - 1); | 755 source_->SeekForward(pos - 1); |
| 753 Advance(); | 756 Advance(); |
| 754 Scan(); | 757 Scan(); |
| 755 } | 758 } |
| 756 | 759 |
| 760 |
| 757 uc32 Scanner::ScanHexEscape(uc32 c, int length) { | 761 uc32 Scanner::ScanHexEscape(uc32 c, int length) { |
| 758 ASSERT(length <= 4); // prevent overflow | 762 ASSERT(length <= 4); // prevent overflow |
| 759 | 763 |
| 760 uc32 digits[4]; | 764 uc32 digits[4]; |
| 761 uc32 x = 0; | 765 uc32 x = 0; |
| 762 for (int i = 0; i < length; i++) { | 766 for (int i = 0; i < length; i++) { |
| 763 digits[i] = c0_; | 767 digits[i] = c0_; |
| 764 int d = HexValue(c0_); | 768 int d = HexValue(c0_); |
| 765 if (d < 0) { | 769 if (d < 0) { |
| 766 // According to ECMA-262, 3rd, 7.8.4, page 18, these hex escapes | 770 // According to ECMA-262, 3rd, 7.8.4, page 18, these hex escapes |
| 767 // should be illegal, but other JS VMs just return the | 771 // should be illegal, but other JS VMs just return the |
| 768 // non-escaped version of the original character. | 772 // non-escaped version of the original character. |
| 769 | 773 |
| 770 // Push back digits read, except the last one (in c0_). | 774 // Push back digits read, except the last one (in c0_). |
| 771 for (int j = i - 1; j >= 0; j--) { | 775 for (int j = i-1; j >= 0; j--) { |
| 772 PushBack(digits[j]); | 776 PushBack(digits[j]); |
| 773 } | 777 } |
| 774 // Notice: No handling of error - treat it as "\u"->"u". | 778 // Notice: No handling of error - treat it as "\u"->"u". |
| 775 return c; | 779 return c; |
| 776 } | 780 } |
| 777 x = x * 16 + d; | 781 x = x * 16 + d; |
| 778 Advance(); | 782 Advance(); |
| 779 } | 783 } |
| 780 | 784 |
| 781 return x; | 785 return x; |
| 782 } | 786 } |
| 783 | 787 |
| 788 |
| 784 // Octal escapes of the forms '\0xx' and '\xxx' are not a part of | 789 // Octal escapes of the forms '\0xx' and '\xxx' are not a part of |
| 785 // ECMA-262. Other JS VMs support them. | 790 // ECMA-262. Other JS VMs support them. |
| 786 uc32 Scanner::ScanOctalEscape(uc32 c, int length) { | 791 uc32 Scanner::ScanOctalEscape(uc32 c, int length) { |
| 787 uc32 x = c - '0'; | 792 uc32 x = c - '0'; |
| 788 for (int i = 0; i < length; i++) { | 793 for (int i = 0; i < length; i++) { |
| 789 int d = c0_ - '0'; | 794 int d = c0_ - '0'; |
| 790 if (d < 0 || d > 7) | 795 if (d < 0 || d > 7) break; |
| 791 break; | |
| 792 int nx = x * 8 + d; | 796 int nx = x * 8 + d; |
| 793 if (nx >= 256) | 797 if (nx >= 256) break; |
| 794 break; | |
| 795 x = nx; | 798 x = nx; |
| 796 Advance(); | 799 Advance(); |
| 797 } | 800 } |
| 798 return x; | 801 return x; |
| 799 } | 802 } |
| 800 | 803 |
| 804 |
| 801 void Scanner::ScanEscape() { | 805 void Scanner::ScanEscape() { |
| 802 uc32 c = c0_; | 806 uc32 c = c0_; |
| 803 Advance(); | 807 Advance(); |
| 804 | 808 |
| 805 // Skip escaped newlines. | 809 // Skip escaped newlines. |
| 806 if (kIsLineTerminator.get(c)) { | 810 if (kIsLineTerminator.get(c)) { |
| 807 // Allow CR+LF newlines in multiline string literals. | 811 // Allow CR+LF newlines in multiline string literals. |
| 808 if (IsCarriageReturn(c) && IsLineFeed(c0_)) | 812 if (IsCarriageReturn(c) && IsLineFeed(c0_)) Advance(); |
| 809 Advance(); | |
| 810 // Allow LF+CR newlines in multiline string literals. | 813 // Allow LF+CR newlines in multiline string literals. |
| 811 if (IsLineFeed(c) && IsCarriageReturn(c0_)) | 814 if (IsLineFeed(c) && IsCarriageReturn(c0_)) Advance(); |
| 812 Advance(); | |
| 813 return; | 815 return; |
| 814 } | 816 } |
| 815 | 817 |
| 816 switch (c) { | 818 switch (c) { |
| 817 case '\'': // fall through | 819 case '\'': // fall through |
| 818 case '"': // fall through | 820 case '"' : // fall through |
| 819 case '\\': | 821 case '\\': break; |
| 820 break; | 822 case 'b' : c = '\b'; break; |
| 821 case 'b': | 823 case 'f' : c = '\f'; break; |
| 822 c = '\b'; | 824 case 'n' : c = '\n'; break; |
| 823 break; | 825 case 'r' : c = '\r'; break; |
| 824 case 'f': | 826 case 't' : c = '\t'; break; |
| 825 c = '\f'; | 827 case 'u' : c = ScanHexEscape(c, 4); break; |
| 826 break; | 828 case 'v' : c = '\v'; break; |
| 827 case 'n': | 829 case 'x' : c = ScanHexEscape(c, 2); break; |
| 828 c = '\n'; | 830 case '0' : // fall through |
| 829 break; | 831 case '1' : // fall through |
| 830 case 'r': | 832 case '2' : // fall through |
| 831 c = '\r'; | 833 case '3' : // fall through |
| 832 break; | 834 case '4' : // fall through |
| 833 case 't': | 835 case '5' : // fall through |
| 834 c = '\t'; | 836 case '6' : // fall through |
| 835 break; | 837 case '7' : c = ScanOctalEscape(c, 2); break; |
| 836 case 'u': | |
| 837 c = ScanHexEscape(c, 4); | |
| 838 break; | |
| 839 case 'v': | |
| 840 c = '\v'; | |
| 841 break; | |
| 842 case 'x': | |
| 843 c = ScanHexEscape(c, 2); | |
| 844 break; | |
| 845 case '0': // fall through | |
| 846 case '1': // fall through | |
| 847 case '2': // fall through | |
| 848 case '3': // fall through | |
| 849 case '4': // fall through | |
| 850 case '5': // fall through | |
| 851 case '6': // fall through | |
| 852 case '7': | |
| 853 c = ScanOctalEscape(c, 2); | |
| 854 break; | |
| 855 } | 838 } |
| 856 | 839 |
| 857 // According to ECMA-262, 3rd, 7.8.4 (p 18ff) these | 840 // According to ECMA-262, 3rd, 7.8.4 (p 18ff) these |
| 858 // should be illegal, but they are commonly handled | 841 // should be illegal, but they are commonly handled |
| 859 // as non-escaped characters by JS VMs. | 842 // as non-escaped characters by JS VMs. |
| 860 AddChar(c); | 843 AddChar(c); |
| 861 } | 844 } |
| 862 | 845 |
| 846 |
| 863 Token::Value Scanner::ScanString() { | 847 Token::Value Scanner::ScanString() { |
| 864 uc32 quote = c0_; | 848 uc32 quote = c0_; |
| 865 Advance(); // consume quote | 849 Advance(); // consume quote |
| 866 | 850 |
| 867 StartLiteral(); | 851 StartLiteral(); |
| 868 while (c0_ != quote && c0_ >= 0 && !kIsLineTerminator.get(c0_)) { | 852 while (c0_ != quote && c0_ >= 0 && !kIsLineTerminator.get(c0_)) { |
| 869 uc32 c = c0_; | 853 uc32 c = c0_; |
| 870 Advance(); | 854 Advance(); |
| 871 if (c == '\\') { | 855 if (c == '\\') { |
| 872 if (c0_ < 0) | 856 if (c0_ < 0) return Token::ILLEGAL; |
| 873 return Token::ILLEGAL; | |
| 874 ScanEscape(); | 857 ScanEscape(); |
| 875 } else { | 858 } else { |
| 876 AddChar(c); | 859 AddChar(c); |
| 877 } | 860 } |
| 878 } | 861 } |
| 879 if (c0_ != quote) { | 862 if (c0_ != quote) { |
| 880 return Token::ILLEGAL; | 863 return Token::ILLEGAL; |
| 881 } | 864 } |
| 882 TerminateLiteral(); | 865 TerminateLiteral(); |
| 883 | 866 |
| 884 Advance(); // consume quote | 867 Advance(); // consume quote |
| 885 return Token::STRING; | 868 return Token::STRING; |
| 886 } | 869 } |
| 887 | 870 |
| 871 |
| 888 Token::Value Scanner::Select(Token::Value tok) { | 872 Token::Value Scanner::Select(Token::Value tok) { |
| 889 Advance(); | 873 Advance(); |
| 890 return tok; | 874 return tok; |
| 891 } | 875 } |
| 892 | 876 |
| 877 |
| 893 Token::Value Scanner::Select(uc32 next, Token::Value then, Token::Value else_) { | 878 Token::Value Scanner::Select(uc32 next, Token::Value then, Token::Value else_) { |
| 894 Advance(); | 879 Advance(); |
| 895 if (c0_ == next) { | 880 if (c0_ == next) { |
| 896 Advance(); | 881 Advance(); |
| 897 return then; | 882 return then; |
| 898 } else { | 883 } else { |
| 899 return else_; | 884 return else_; |
| 900 } | 885 } |
| 901 } | 886 } |
| 902 | 887 |
| 888 |
| 903 // Returns true if any decimal digits were scanned, returns false otherwise. | 889 // Returns true if any decimal digits were scanned, returns false otherwise. |
| 904 void Scanner::ScanDecimalDigits() { | 890 void Scanner::ScanDecimalDigits() { |
| 905 while (IsDecimalDigit(c0_)) | 891 while (IsDecimalDigit(c0_)) |
| 906 AddCharAdvance(); | 892 AddCharAdvance(); |
| 907 } | 893 } |
| 908 | 894 |
| 895 |
| 909 Token::Value Scanner::ScanNumber(bool seen_period) { | 896 Token::Value Scanner::ScanNumber(bool seen_period) { |
| 910 ASSERT(IsDecimalDigit(c0_)); // the first digit of the number or the fraction | 897 ASSERT(IsDecimalDigit(c0_)); // the first digit of the number or the fraction |
| 911 | 898 |
| 912 enum { | 899 enum { DECIMAL, HEX, OCTAL } kind = DECIMAL; |
| 913 DECIMAL, HEX, OCTAL | |
| 914 } kind = DECIMAL; | |
| 915 | 900 |
| 916 StartLiteral(); | 901 StartLiteral(); |
| 917 if (seen_period) { | 902 if (seen_period) { |
| 918 // we have already seen a decimal point of the float | 903 // we have already seen a decimal point of the float |
| 919 AddChar('.'); | 904 AddChar('.'); |
| 920 ScanDecimalDigits(); // we know we have at least one digit | 905 ScanDecimalDigits(); // we know we have at least one digit |
| 921 | 906 |
| 922 } else { | 907 } else { |
| 923 // if the first character is '0' we must check for octals and hex | 908 // if the first character is '0' we must check for octals and hex |
| 924 if (c0_ == '0') { | 909 if (c0_ == '0') { |
| 925 AddCharAdvance(); | 910 AddCharAdvance(); |
| 926 | 911 |
| 927 // either 0, 0exxx, 0Exxx, 0.xxx, an octal number, or a hex number | 912 // either 0, 0exxx, 0Exxx, 0.xxx, an octal number, or a hex number |
| 928 if (c0_ == 'x' || c0_ == 'X') { | 913 if (c0_ == 'x' || c0_ == 'X') { |
| 929 // hex number | 914 // hex number |
| 930 kind = HEX; | 915 kind = HEX; |
| 931 AddCharAdvance(); | 916 AddCharAdvance(); |
| 932 if (!IsHexDigit(c0_)) | 917 if (!IsHexDigit(c0_)) |
| 933 // we must have at least one hex digit after 'x'/'X' | 918 // we must have at least one hex digit after 'x'/'X' |
| 934 return Token::ILLEGAL; | 919 return Token::ILLEGAL; |
| 935 while (IsHexDigit(c0_)) | 920 while (IsHexDigit(c0_)) |
| 936 AddCharAdvance(); | 921 AddCharAdvance(); |
| 937 | 922 |
| 938 } else if ('0' <= c0_ && c0_ <= '7') { | 923 } else if ('0' <= c0_ && c0_ <= '7') { |
| 939 // (possible) octal number | 924 // (possible) octal number |
| 940 kind = OCTAL; | 925 kind = OCTAL; |
| 941 while (true) { | 926 while (true) { |
| 942 if (c0_ == '8' || c0_ == '9') { | 927 if (c0_ == '8' || c0_ == '9') { |
| 943 kind = DECIMAL; | 928 kind = DECIMAL; |
| 944 break; | 929 break; |
| 945 } | 930 } |
| 946 if (c0_ < '0' || '7' < c0_) | 931 if (c0_ < '0' || '7' < c0_) break; |
| 947 break; | |
| 948 AddCharAdvance(); | 932 AddCharAdvance(); |
| 949 } | 933 } |
| 950 } | 934 } |
| 951 } | 935 } |
| 952 | 936 |
| 953 // Parse decimal digits and allow trailing fractional part. | 937 // Parse decimal digits and allow trailing fractional part. |
| 954 if (kind == DECIMAL) { | 938 if (kind == DECIMAL) { |
| 955 ScanDecimalDigits(); // optional | 939 ScanDecimalDigits(); // optional |
| 956 if (c0_ == '.') { | 940 if (c0_ == '.') { |
| 957 AddCharAdvance(); | 941 AddCharAdvance(); |
| 958 ScanDecimalDigits(); // optional | 942 ScanDecimalDigits(); // optional |
| 959 } | 943 } |
| 960 } | 944 } |
| 961 } | 945 } |
| 962 | 946 |
| 963 // scan exponent, if any | 947 // scan exponent, if any |
| 964 if (c0_ == 'e' || c0_ == 'E') { | 948 if (c0_ == 'e' || c0_ == 'E') { |
| 965 ASSERT(kind != HEX); // 'e'/'E' must be scanned as part of the hex number | 949 ASSERT(kind != HEX); // 'e'/'E' must be scanned as part of the hex number |
| 966 if (kind == OCTAL) | 950 if (kind == OCTAL) return Token::ILLEGAL; // no exponent for octals allowed |
| 967 return Token::ILLEGAL; // no exponent for octals allowed | |
| 968 // scan exponent | 951 // scan exponent |
| 969 AddCharAdvance(); | 952 AddCharAdvance(); |
| 970 if (c0_ == '+' || c0_ == '-') | 953 if (c0_ == '+' || c0_ == '-') |
| 971 AddCharAdvance(); | 954 AddCharAdvance(); |
| 972 if (!IsDecimalDigit(c0_)) | 955 if (!IsDecimalDigit(c0_)) |
| 973 // we must have at least one decimal digit after 'e'/'E' | 956 // we must have at least one decimal digit after 'e'/'E' |
| 974 return Token::ILLEGAL; | 957 return Token::ILLEGAL; |
| 975 ScanDecimalDigits(); | 958 ScanDecimalDigits(); |
| 976 } | 959 } |
| 977 TerminateLiteral(); | 960 TerminateLiteral(); |
| 978 | 961 |
| 979 // The source character immediately following a numeric literal must | 962 // The source character immediately following a numeric literal must |
| 980 // not be an identifier start or a decimal digit; see ECMA-262 | 963 // not be an identifier start or a decimal digit; see ECMA-262 |
| 981 // section 7.8.3, page 17 (note that we read only one decimal digit | 964 // section 7.8.3, page 17 (note that we read only one decimal digit |
| 982 // if the value is 0). | 965 // if the value is 0). |
| 983 if (IsDecimalDigit(c0_) || kIsIdentifierStart.get(c0_)) | 966 if (IsDecimalDigit(c0_) || kIsIdentifierStart.get(c0_)) |
| 984 return Token::ILLEGAL; | 967 return Token::ILLEGAL; |
| 985 | 968 |
| 986 return Token::NUMBER; | 969 return Token::NUMBER; |
| 987 } | 970 } |
| 988 | 971 |
| 972 |
| 989 uc32 Scanner::ScanIdentifierUnicodeEscape() { | 973 uc32 Scanner::ScanIdentifierUnicodeEscape() { |
| 990 Advance(); | 974 Advance(); |
| 991 if (c0_ != 'u') | 975 if (c0_ != 'u') return unibrow::Utf8::kBadChar; |
| 992 return unibrow::Utf8::kBadChar; | |
| 993 Advance(); | 976 Advance(); |
| 994 uc32 c = ScanHexEscape('u', 4); | 977 uc32 c = ScanHexEscape('u', 4); |
| 995 // We do not allow a unicode escape sequence to start another | 978 // We do not allow a unicode escape sequence to start another |
| 996 // unicode escape sequence. | 979 // unicode escape sequence. |
| 997 if (c == '\\') | 980 if (c == '\\') return unibrow::Utf8::kBadChar; |
| 998 return unibrow::Utf8::kBadChar; | |
| 999 return c; | 981 return c; |
| 1000 } | 982 } |
| 1001 | 983 |
| 984 |
| 1002 Token::Value Scanner::ScanIdentifier() { | 985 Token::Value Scanner::ScanIdentifier() { |
| 1003 ASSERT(kIsIdentifierStart.get(c0_)); | 986 ASSERT(kIsIdentifierStart.get(c0_)); |
| 1004 | 987 |
| 1005 StartLiteral(); | 988 StartLiteral(); |
| 1006 KeywordMatcher keyword_match; | 989 KeywordMatcher keyword_match; |
| 1007 | 990 |
| 1008 // Scan identifier start character. | 991 // Scan identifier start character. |
| 1009 if (c0_ == '\\') { | 992 if (c0_ == '\\') { |
| 1010 uc32 c = ScanIdentifierUnicodeEscape(); | 993 uc32 c = ScanIdentifierUnicodeEscape(); |
| 1011 // Only allow legal identifier start characters. | 994 // Only allow legal identifier start characters. |
| 1012 if (!kIsIdentifierStart.get(c)) | 995 if (!kIsIdentifierStart.get(c)) return Token::ILLEGAL; |
| 1013 return Token::ILLEGAL; | |
| 1014 AddChar(c); | 996 AddChar(c); |
| 1015 keyword_match.Fail(); | 997 keyword_match.Fail(); |
| 1016 } else { | 998 } else { |
| 1017 AddChar(c0_); | 999 AddChar(c0_); |
| 1018 keyword_match.AddChar(c0_); | 1000 keyword_match.AddChar(c0_); |
| 1019 Advance(); | 1001 Advance(); |
| 1020 } | 1002 } |
| 1021 | 1003 |
| 1022 // Scan the rest of the identifier characters. | 1004 // Scan the rest of the identifier characters. |
| 1023 while (kIsIdentifierPart.get(c0_)) { | 1005 while (kIsIdentifierPart.get(c0_)) { |
| 1024 if (c0_ == '\\') { | 1006 if (c0_ == '\\') { |
| 1025 uc32 c = ScanIdentifierUnicodeEscape(); | 1007 uc32 c = ScanIdentifierUnicodeEscape(); |
| 1026 // Only allow legal identifier part characters. | 1008 // Only allow legal identifier part characters. |
| 1027 if (!kIsIdentifierPart.get(c)) | 1009 if (!kIsIdentifierPart.get(c)) return Token::ILLEGAL; |
| 1028 return Token::ILLEGAL; | |
| 1029 AddChar(c); | 1010 AddChar(c); |
| 1030 keyword_match.Fail(); | 1011 keyword_match.Fail(); |
| 1031 } else { | 1012 } else { |
| 1032 AddChar(c0_); | 1013 AddChar(c0_); |
| 1033 keyword_match.AddChar(c0_); | 1014 keyword_match.AddChar(c0_); |
| 1034 Advance(); | 1015 Advance(); |
| 1035 } | 1016 } |
| 1036 } | 1017 } |
| 1037 TerminateLiteral(); | 1018 TerminateLiteral(); |
| 1038 | 1019 |
| 1039 return keyword_match.token(); | 1020 return keyword_match.token(); |
| 1040 } | 1021 } |
| 1041 | 1022 |
| 1023 |
| 1024 |
| 1042 bool Scanner::IsIdentifier(unibrow::CharacterStream* buffer) { | 1025 bool Scanner::IsIdentifier(unibrow::CharacterStream* buffer) { |
| 1043 // Checks whether the buffer contains an identifier (no escape). | 1026 // Checks whether the buffer contains an identifier (no escape). |
| 1044 if (!buffer->has_more()) | 1027 if (!buffer->has_more()) return false; |
| 1045 return false; | 1028 if (!kIsIdentifierStart.get(buffer->GetNext())) return false; |
| 1046 if (!kIsIdentifierStart.get(buffer->GetNext())) | |
| 1047 return false; | |
| 1048 while (buffer->has_more()) { | 1029 while (buffer->has_more()) { |
| 1049 if (!kIsIdentifierPart.get(buffer->GetNext())) | 1030 if (!kIsIdentifierPart.get(buffer->GetNext())) return false; |
| 1050 return false; | |
| 1051 } | 1031 } |
| 1052 return true; | 1032 return true; |
| 1053 } | 1033 } |
| 1054 | 1034 |
| 1035 |
| 1055 bool Scanner::ScanRegExpPattern(bool seen_equal) { | 1036 bool Scanner::ScanRegExpPattern(bool seen_equal) { |
| 1056 // Scan: ('/' | '/=') RegularExpressionBody '/' RegularExpressionFlags | 1037 // Scan: ('/' | '/=') RegularExpressionBody '/' RegularExpressionFlags |
| 1057 bool in_character_class = false; | 1038 bool in_character_class = false; |
| 1058 | 1039 |
| 1059 // Previous token is either '/' or '/=', in the second case, the | 1040 // Previous token is either '/' or '/=', in the second case, the |
| 1060 // pattern starts at =. | 1041 // pattern starts at =. |
| 1061 next_.location.beg_pos = source_pos() - (seen_equal ? 2 : 1); | 1042 next_.location.beg_pos = source_pos() - (seen_equal ? 2 : 1); |
| 1062 next_.location.end_pos = source_pos() - (seen_equal ? 1 : 0); | 1043 next_.location.end_pos = source_pos() - (seen_equal ? 1 : 0); |
| 1063 | 1044 |
| 1064 // Scan regular expression body: According to ECMA-262, 3rd, 7.8.5, | 1045 // Scan regular expression body: According to ECMA-262, 3rd, 7.8.5, |
| 1065 // the scanner should pass uninterpreted bodies to the RegExp | 1046 // the scanner should pass uninterpreted bodies to the RegExp |
| 1066 // constructor. | 1047 // constructor. |
| 1067 StartLiteral(); | 1048 StartLiteral(); |
| 1068 if (seen_equal) | 1049 if (seen_equal) |
| 1069 AddChar('='); | 1050 AddChar('='); |
| 1070 | 1051 |
| 1071 while (c0_ != '/' || in_character_class) { | 1052 while (c0_ != '/' || in_character_class) { |
| 1072 if (kIsLineTerminator.get(c0_) || c0_ < 0) | 1053 if (kIsLineTerminator.get(c0_) || c0_ < 0) |
| 1073 return false; | 1054 return false; |
| 1074 if (c0_ == '\\') { // escaped character | 1055 if (c0_ == '\\') { // escaped character |
| 1075 AddCharAdvance(); | 1056 AddCharAdvance(); |
| 1076 if (kIsLineTerminator.get(c0_) || c0_ < 0) | 1057 if (kIsLineTerminator.get(c0_) || c0_ < 0) |
| 1077 return false; | 1058 return false; |
| 1078 AddCharAdvance(); | 1059 AddCharAdvance(); |
| 1079 } else { // unescaped character | 1060 } else { // unescaped character |
| 1080 if (c0_ == '[') | 1061 if (c0_ == '[') |
| 1081 in_character_class = true; | 1062 in_character_class = true; |
| 1082 if (c0_ == ']') | 1063 if (c0_ == ']') |
| 1083 in_character_class = false; | 1064 in_character_class = false; |
| 1084 AddCharAdvance(); | 1065 AddCharAdvance(); |
| 1085 } | 1066 } |
| 1086 } | 1067 } |
| 1087 Advance(); // consume '/' | 1068 Advance(); // consume '/' |
| 1088 | 1069 |
| 1089 TerminateLiteral(); | 1070 TerminateLiteral(); |
| 1090 | 1071 |
| 1091 return true; | 1072 return true; |
| 1092 } | 1073 } |
| 1093 | 1074 |
| 1094 bool Scanner::ScanRegExpFlags() { | 1075 bool Scanner::ScanRegExpFlags() { |
| 1095 // Scan regular expression flags. | 1076 // Scan regular expression flags. |
| 1096 StartLiteral(); | 1077 StartLiteral(); |
| 1097 while (kIsIdentifierPart.get(c0_)) { | 1078 while (kIsIdentifierPart.get(c0_)) { |
| 1098 if (c0_ == '\\') { | 1079 if (c0_ == '\\') { |
| 1099 uc32 c = ScanIdentifierUnicodeEscape(); | 1080 uc32 c = ScanIdentifierUnicodeEscape(); |
| 1100 if (c != static_cast<uc32> (unibrow::Utf8::kBadChar)) { | 1081 if (c != static_cast<uc32>(unibrow::Utf8::kBadChar)) { |
| 1101 // We allow any escaped character, unlike the restriction on | 1082 // We allow any escaped character, unlike the restriction on |
| 1102 // IdentifierPart when it is used to build an IdentifierName. | 1083 // IdentifierPart when it is used to build an IdentifierName. |
| 1103 AddChar(c); | 1084 AddChar(c); |
| 1104 continue; | 1085 continue; |
| 1105 } | 1086 } |
| 1106 } | 1087 } |
| 1107 AddCharAdvance(); | 1088 AddCharAdvance(); |
| 1108 } | 1089 } |
| 1109 TerminateLiteral(); | 1090 TerminateLiteral(); |
| 1110 | 1091 |
| 1111 next_.location.end_pos = source_pos() - 1; | 1092 next_.location.end_pos = source_pos() - 1; |
| 1112 return true; | 1093 return true; |
| 1113 } | 1094 } |
| 1114 | 1095 |
| 1115 } | 1096 } } // namespace v8::internal |
| 1116 } // namespace v8::internal | |
| OLD | NEW |