| OLD | NEW |
| 1 // Copyright 2006-2008 the V8 project authors. All rights reserved. | 1 // Copyright 2006-2008 the V8 project authors. All rights reserved. |
| 2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
| 3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
| 4 // met: | 4 // met: |
| 5 // | 5 // |
| 6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
| 7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
| 8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
| 9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
| 10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
| (...skipping 101 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 112 } | 112 } |
| 113 | 113 |
| 114 | 114 |
| 115 void UTF16Buffer::PushBack(uc32 ch) { | 115 void UTF16Buffer::PushBack(uc32 ch) { |
| 116 pushback_buffer()->Add(last_); | 116 pushback_buffer()->Add(last_); |
| 117 last_ = ch; | 117 last_ = ch; |
| 118 pos_--; | 118 pos_--; |
| 119 } | 119 } |
| 120 | 120 |
| 121 | 121 |
| 122 static inline bool IsByteOrderMark(uc32 c) { |
| 123 // The Unicode value U+FFFE is guaranteed never to be assigned as a |
| 124 // Unicode character; this implies that in a Unicode context the |
| 125 // 0xFF, 0xFE byte pattern can only be interpreted as the U+FEFF |
| 126 // character expressed in little-endian byte order (since it could |
| 127 // not be a U+FFFE character expressed in big-endian byte |
| 128 // order). Nevertheless, we check for it to be compatible with |
| 129 // Spidermonkey. |
| 130 return c == 0xFEFF || c == 0xFFFE; |
| 131 } |
| 132 |
| 133 |
| 122 uc32 UTF16Buffer::Advance() { | 134 uc32 UTF16Buffer::Advance() { |
| 123 // NOTE: It is of importance to Persian / Farsi resources that we do | 135 // NOTE: It is of importance to Persian / Farsi resources that we do |
| 124 // *not* strip format control characters in the scanner; see | 136 // *not* strip format control characters in the scanner; see |
| 125 // | 137 // |
| 126 // https://bugzilla.mozilla.org/show_bug.cgi?id=274152 | 138 // https://bugzilla.mozilla.org/show_bug.cgi?id=274152 |
| 127 // | 139 // |
| 128 // So, even though ECMA-262, section 7.1, page 11, dictates that we | 140 // So, even though ECMA-262, section 7.1, page 11, dictates that we |
| 129 // must remove Unicode format-control characters, we do not. This is | 141 // must remove Unicode format-control characters, we only remove the BOM. |
| 130 // in line with how IE and SpiderMonkey handles it. | 142 // This is in line with how Safari handles it. |
| 131 if (!pushback_buffer()->is_empty()) { | 143 if (!pushback_buffer()->is_empty()) { |
| 132 pos_++; | 144 pos_++; |
| 133 return last_ = pushback_buffer()->RemoveLast(); | 145 return last_ = pushback_buffer()->RemoveLast(); |
| 134 } else if (stream_->has_more()) { | |
| 135 pos_++; | |
| 136 uc32 next = stream_->GetNext(); | |
| 137 return last_ = next; | |
| 138 } else { | 146 } else { |
| 147 while (stream_->has_more()) { |
| 148 pos_++; |
| 149 uc32 next = stream_->GetNext(); |
| 150 if (!IsByteOrderMark(next)) return last_ = next; |
| 151 } |
| 139 // note: currently the following increment is necessary to avoid a | 152 // note: currently the following increment is necessary to avoid a |
| 140 // test-parser problem! | 153 // test-parser problem! |
| 141 pos_++; | 154 pos_++; |
| 142 return last_ = static_cast<uc32>(-1); | 155 return last_ = static_cast<uc32>(-1); |
| 143 } | 156 } |
| 144 } | 157 } |
| 145 | 158 |
| 146 | 159 |
| 147 void UTF16Buffer::SeekForward(int pos) { | 160 void UTF16Buffer::SeekForward(int pos) { |
| 148 pos_ = pos; | 161 pos_ = pos; |
| (...skipping 78 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 227 c0_ = source_.Advance(); | 240 c0_ = source_.Advance(); |
| 228 } | 241 } |
| 229 | 242 |
| 230 | 243 |
| 231 void Scanner::PushBack(uc32 ch) { | 244 void Scanner::PushBack(uc32 ch) { |
| 232 source_.PushBack(ch); | 245 source_.PushBack(ch); |
| 233 c0_ = ch; | 246 c0_ = ch; |
| 234 } | 247 } |
| 235 | 248 |
| 236 | 249 |
| 237 static inline bool IsByteOrderMark(uc32 c) { | |
| 238 // The Unicode value U+FFFE is guaranteed never to be assigned as a | |
| 239 // Unicode character; this implies that in a Unicode context the | |
| 240 // 0xFF, 0xFE byte pattern can only be interpreted as the U+FEFF | |
| 241 // character expressed in little-endian byte order (since it could | |
| 242 // not be a U+FFFE character expressed in big-endian byte | |
| 243 // order). Nevertheless, we check for it to be compatible with | |
| 244 // Spidermonkey. | |
| 245 return c == 0xFEFF || c == 0xFFFE; | |
| 246 } | |
| 247 | |
| 248 | |
| 249 void Scanner::SkipWhiteSpace(bool initial) { | 250 void Scanner::SkipWhiteSpace(bool initial) { |
| 250 has_line_terminator_before_next_ = initial; | 251 has_line_terminator_before_next_ = initial; |
| 251 | 252 |
| 252 while (true) { | 253 while (true) { |
| 253 // We treat byte-order marks (BOMs) as whitespace for better | 254 while (kIsWhiteSpace.get(c0_)) { |
| 254 // compatibility with Spidermonkey and other JavaScript engines. | |
| 255 while (kIsWhiteSpace.get(c0_) || IsByteOrderMark(c0_)) { | |
| 256 // IsWhiteSpace() includes line terminators! | 255 // IsWhiteSpace() includes line terminators! |
| 257 if (kIsLineTerminator.get(c0_)) | 256 if (kIsLineTerminator.get(c0_)) |
| 258 // Ignore line terminators, but remember them. This is necessary | 257 // Ignore line terminators, but remember them. This is necessary |
| 259 // for automatic semicolon insertion. | 258 // for automatic semicolon insertion. |
| 260 has_line_terminator_before_next_ = true; | 259 has_line_terminator_before_next_ = true; |
| 261 Advance(); | 260 Advance(); |
| 262 } | 261 } |
| 263 | 262 |
| 264 // If there is an HTML comment end '-->' at the beginning of a | 263 // If there is an HTML comment end '-->' at the beginning of a |
| 265 // line (with only whitespace in front of it), we treat the rest | 264 // line (with only whitespace in front of it), we treat the rest |
| (...skipping 565 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 831 StartLiteral(); | 830 StartLiteral(); |
| 832 while (kIsIdentifierPart.get(c0_)) | 831 while (kIsIdentifierPart.get(c0_)) |
| 833 AddCharAdvance(); | 832 AddCharAdvance(); |
| 834 TerminateLiteral(); | 833 TerminateLiteral(); |
| 835 | 834 |
| 836 next_.location.end_pos = source_pos() - 1; | 835 next_.location.end_pos = source_pos() - 1; |
| 837 return true; | 836 return true; |
| 838 } | 837 } |
| 839 | 838 |
| 840 } } // namespace v8::internal | 839 } } // namespace v8::internal |
| OLD | NEW |