OLD | NEW |
---|---|
1 // Copyright 2011 the V8 project authors. All rights reserved. | 1 // Copyright 2011 the V8 project authors. All rights reserved. |
2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
4 // met: | 4 // met: |
5 // | 5 // |
6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
(...skipping 62 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
73 // ---------------------------------------------------------------------------- | 73 // ---------------------------------------------------------------------------- |
74 // JavaScriptScanner | 74 // JavaScriptScanner |
75 | 75 |
76 JavaScriptScanner::JavaScriptScanner(UnicodeCache* scanner_contants) | 76 JavaScriptScanner::JavaScriptScanner(UnicodeCache* scanner_contants) |
77 : Scanner(scanner_contants), octal_pos_(Location::invalid()) { } | 77 : Scanner(scanner_contants), octal_pos_(Location::invalid()) { } |
78 | 78 |
79 | 79 |
80 Token::Value JavaScriptScanner::Next() { | 80 Token::Value JavaScriptScanner::Next() { |
81 current_ = next_; | 81 current_ = next_; |
82 has_line_terminator_before_next_ = false; | 82 has_line_terminator_before_next_ = false; |
83 next_is_first_on_line_ = false; | |
William Hesse
2011/06/21 11:37:41
Could we call these things something more parallel
| |
83 Scan(); | 84 Scan(); |
84 return current_.token; | 85 return current_.token; |
85 } | 86 } |
86 | 87 |
87 | 88 |
88 static inline bool IsByteOrderMark(uc32 c) { | 89 static inline bool IsByteOrderMark(uc32 c) { |
89 // The Unicode value U+FFFE is guaranteed never to be assigned as a | 90 // The Unicode value U+FFFE is guaranteed never to be assigned as a |
90 // Unicode character; this implies that in a Unicode context the | 91 // Unicode character; this implies that in a Unicode context the |
91 // 0xFF, 0xFE byte pattern can only be interpreted as the U+FEFF | 92 // 0xFF, 0xFE byte pattern can only be interpreted as the U+FEFF |
92 // character expressed in little-endian byte order (since it could | 93 // character expressed in little-endian byte order (since it could |
93 // not be a U+FFFE character expressed in big-endian byte | 94 // not be a U+FFFE character expressed in big-endian byte |
94 // order). Nevertheless, we check for it to be compatible with | 95 // order). Nevertheless, we check for it to be compatible with |
95 // Spidermonkey. | 96 // Spidermonkey. |
96 return c == 0xFEFF || c == 0xFFFE; | 97 return c == 0xFEFF || c == 0xFFFE; |
97 } | 98 } |
98 | 99 |
99 | 100 |
100 bool JavaScriptScanner::SkipWhiteSpace() { | 101 bool JavaScriptScanner::SkipWhiteSpace() { |
101 int start_position = source_pos(); | 102 int start_position = source_pos(); |
102 | 103 |
103 while (true) { | 104 while (true) { |
104 // We treat byte-order marks (BOMs) as whitespace for better | 105 // We treat byte-order marks (BOMs) as whitespace for better |
105 // compatibility with Spidermonkey and other JavaScript engines. | 106 // compatibility with Spidermonkey and other JavaScript engines. |
106 while (unicode_cache_->IsWhiteSpace(c0_) || IsByteOrderMark(c0_)) { | 107 while (unicode_cache_->IsWhiteSpace(c0_) || IsByteOrderMark(c0_)) { |
107 // IsWhiteSpace() includes line terminators! | 108 // IsWhiteSpace() includes line terminators! |
108 if (unicode_cache_->IsLineTerminator(c0_)) { | 109 if (unicode_cache_->IsLineTerminator(c0_)) { |
109 // Ignore line terminators, but remember them. This is necessary | 110 // Ignore line terminators, but remember them. This is necessary |
110 // for automatic semicolon insertion. | 111 // for automatic semicolon insertion. |
111 has_line_terminator_before_next_ = true; | 112 has_line_terminator_before_next_ = true; |
113 next_is_first_on_line_ = true; | |
112 } | 114 } |
113 Advance(); | 115 Advance(); |
114 } | 116 } |
115 | 117 |
116 // If there is an HTML comment end '-->' at the beginning of a | 118 // If there is an HTML comment end '-->' at the beginning of a |
117 // line (with only whitespace in front of it), we treat the rest | 119 // line (with only whitespace in front of it), we treat the rest |
118 // of the line as a comment. This is in line with the way | 120 // of the line as a comment. This is in line with the way |
119 // SpiderMonkey handles it. | 121 // SpiderMonkey handles it. |
120 if (c0_ == '-' && has_line_terminator_before_next_) { | 122 if (c0_ == '-' && next_is_first_on_line_) { |
121 Advance(); | 123 Advance(); |
122 if (c0_ == '-') { | 124 if (c0_ == '-') { |
123 Advance(); | 125 Advance(); |
124 if (c0_ == '>') { | 126 if (c0_ == '>') { |
125 // Treat the rest of the line as a comment. | 127 // Treat the rest of the line as a comment. |
126 SkipSingleLineComment(); | 128 SkipSingleLineComment(); |
127 // Continue skipping white space after the comment. | 129 // Continue skipping white space after the comment. |
128 continue; | 130 continue; |
129 } | 131 } |
130 PushBack('-'); // undo Advance() | 132 PushBack('-'); // undo Advance() |
(...skipping 29 matching lines...) Expand all Loading... | |
160 while (c0_ >= 0) { | 162 while (c0_ >= 0) { |
161 char ch = c0_; | 163 char ch = c0_; |
162 Advance(); | 164 Advance(); |
163 if (unicode_cache_->IsLineTerminator(ch)) { | 165 if (unicode_cache_->IsLineTerminator(ch)) { |
164 // Following ECMA-262, section 7.4, a comment containing | 166 // Following ECMA-262, section 7.4, a comment containing |
165 // a newline will make the comment count as a line-terminator. | 167 // a newline will make the comment count as a line-terminator. |
166 has_line_terminator_before_next_ = true; | 168 has_line_terminator_before_next_ = true; |
167 } | 169 } |
168 // If we have reached the end of the multi-line comment, we | 170 // If we have reached the end of the multi-line comment, we |
169 // consume the '/' and insert a whitespace. This way all | 171 // consume the '/' and insert a whitespace. This way all |
170 // multi-line comments are treated as whitespace. | 172 // multi-line comments are treated as whitespace (except |
173 // when checking whether there is non-whitespace before a | |
174 // --> comment). | |
171 if (ch == '*' && c0_ == '/') { | 175 if (ch == '*' && c0_ == '/') { |
172 c0_ = ' '; | 176 c0_ = ' '; |
173 return Token::WHITESPACE; | 177 return Token::WHITESPACE; |
174 } | 178 } |
175 } | 179 } |
176 | 180 |
177 // Unterminated multi-line comment. | 181 // Unterminated multi-line comment. |
178 return Token::ILLEGAL; | 182 return Token::ILLEGAL; |
179 } | 183 } |
180 | 184 |
(...skipping 23 matching lines...) Expand all Loading... | |
204 switch (c0_) { | 208 switch (c0_) { |
205 case ' ': | 209 case ' ': |
206 case '\t': | 210 case '\t': |
207 Advance(); | 211 Advance(); |
208 token = Token::WHITESPACE; | 212 token = Token::WHITESPACE; |
209 break; | 213 break; |
210 | 214 |
211 case '\n': | 215 case '\n': |
212 Advance(); | 216 Advance(); |
213 has_line_terminator_before_next_ = true; | 217 has_line_terminator_before_next_ = true; |
218 next_is_first_on_line_ = true; | |
214 token = Token::WHITESPACE; | 219 token = Token::WHITESPACE; |
215 break; | 220 break; |
216 | 221 |
217 case '"': case '\'': | 222 case '"': case '\'': |
218 token = ScanString(); | 223 token = ScanString(); |
219 break; | 224 break; |
220 | 225 |
221 case '<': | 226 case '<': |
222 // < <= << <<= <!-- | 227 // < <= << <<= <!-- |
223 Advance(); | 228 Advance(); |
(...skipping 58 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
282 } else { | 287 } else { |
283 token = Token::ADD; | 288 token = Token::ADD; |
284 } | 289 } |
285 break; | 290 break; |
286 | 291 |
287 case '-': | 292 case '-': |
288 // - -- --> -= | 293 // - -- --> -= |
289 Advance(); | 294 Advance(); |
290 if (c0_ == '-') { | 295 if (c0_ == '-') { |
291 Advance(); | 296 Advance(); |
292 if (c0_ == '>' && has_line_terminator_before_next_) { | 297 if (c0_ == '>' && next_is_first_on_line_) { |
293 // For compatibility with SpiderMonkey, we skip lines that | 298 // For compatibility with SpiderMonkey, we skip lines that |
294 // start with an HTML comment end '-->'. | 299 // start with an HTML comment end '-->'. |
295 token = SkipSingleLineComment(); | 300 token = SkipSingleLineComment(); |
296 } else { | 301 } else { |
297 token = Token::DEC; | 302 token = Token::DEC; |
298 } | 303 } |
299 } else if (c0_ == '=') { | 304 } else if (c0_ == '=') { |
300 token = Select(Token::ASSIGN_SUB); | 305 token = Select(Token::ASSIGN_SUB); |
301 } else { | 306 } else { |
302 token = Token::SUB; | 307 token = Token::SUB; |
(...skipping 640 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
943 if (MatchKeywordStart(input, "with", 1, Token::WITH)) return; | 948 if (MatchKeywordStart(input, "with", 1, Token::WITH)) return; |
944 break; | 949 break; |
945 case UNMATCHABLE: | 950 case UNMATCHABLE: |
946 break; | 951 break; |
947 } | 952 } |
948 // On fallthrough, it's a failure. | 953 // On fallthrough, it's a failure. |
949 state_ = UNMATCHABLE; | 954 state_ = UNMATCHABLE; |
950 } | 955 } |
951 | 956 |
952 } } // namespace v8::internal | 957 } } // namespace v8::internal |
OLD | NEW |