OLD | NEW |
---|---|
1 // Copyright 2006-2008 the V8 project authors. All rights reserved. | 1 // Copyright 2006-2008 the V8 project authors. All rights reserved. |
2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
4 // met: | 4 // met: |
5 // | 5 // |
6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
(...skipping 74 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
85 } | 85 } |
86 ASSERT(pos() <= Capacity()); | 86 ASSERT(pos() <= Capacity()); |
87 } | 87 } |
88 | 88 |
89 | 89 |
90 // ---------------------------------------------------------------------------- | 90 // ---------------------------------------------------------------------------- |
91 // UTF16Buffer | 91 // UTF16Buffer |
92 | 92 |
93 | 93 |
94 UTF16Buffer::UTF16Buffer() | 94 UTF16Buffer::UTF16Buffer() |
95 : pos_(0), | 95 : pos_(0), size_(0) { } |
Kasper Lund
2009/08/18 06:49:41
4 space indent.
Feng Qian
2009/08/18 07:14:10
Done.
Feng Qian
2009/08/18 07:14:10
Done.
| |
96 pushback_buffer_(0), | |
97 last_(0), | |
98 stream_(NULL) { } | |
99 | |
100 | |
101 void UTF16Buffer::Initialize(Handle<String> data, | |
102 unibrow::CharacterStream* input) { | |
103 data_ = data; | |
104 pos_ = 0; | |
105 stream_ = input; | |
106 } | |
107 | 96 |
108 | 97 |
109 Handle<String> UTF16Buffer::SubString(int start, int end) { | 98 Handle<String> UTF16Buffer::SubString(int start, int end) { |
110 return internal::SubString(data_, start, end); | 99 return internal::SubString(data_, start, end); |
111 } | 100 } |
112 | 101 |
113 | 102 |
114 void UTF16Buffer::PushBack(uc32 ch) { | 103 // CharacterStreamUTF16Buffer |
104 CharacterStreamUTF16Buffer::CharacterStreamUTF16Buffer() | |
105 : pushback_buffer_(0), last_(0), stream_(NULL) { } | |
Kasper Lund
2009/08/18 06:49:41
4 space indent.
Feng Qian
2009/08/18 07:14:10
Done.
Feng Qian
2009/08/18 07:14:10
Done.
| |
106 | |
107 | |
108 void CharacterStreamUTF16Buffer::Initialize(Handle<String> data, | |
109 unibrow::CharacterStream* input) { | |
110 data_ = data; | |
111 pos_ = 0; | |
112 stream_ = input; | |
113 } | |
114 | |
115 | |
116 void CharacterStreamUTF16Buffer::PushBack(uc32 ch) { | |
115 pushback_buffer()->Add(last_); | 117 pushback_buffer()->Add(last_); |
116 last_ = ch; | 118 last_ = ch; |
117 pos_--; | 119 pos_--; |
118 } | 120 } |
119 | 121 |
120 | 122 |
121 uc32 UTF16Buffer::Advance() { | 123 uc32 CharacterStreamUTF16Buffer::Advance() { |
122 // NOTE: It is of importance to Persian / Farsi resources that we do | 124 // NOTE: It is of importance to Persian / Farsi resources that we do |
123 // *not* strip format control characters in the scanner; see | 125 // *not* strip format control characters in the scanner; see |
124 // | 126 // |
125 // https://bugzilla.mozilla.org/show_bug.cgi?id=274152 | 127 // https://bugzilla.mozilla.org/show_bug.cgi?id=274152 |
126 // | 128 // |
127 // So, even though ECMA-262, section 7.1, page 11, dictates that we | 129 // So, even though ECMA-262, section 7.1, page 11, dictates that we |
128 // must remove Unicode format-control characters, we do not. This is | 130 // must remove Unicode format-control characters, we do not. This is |
129 // in line with how IE and SpiderMonkey handles it. | 131 // in line with how IE and SpiderMonkey handles it. |
130 if (!pushback_buffer()->is_empty()) { | 132 if (!pushback_buffer()->is_empty()) { |
131 pos_++; | 133 pos_++; |
132 return last_ = pushback_buffer()->RemoveLast(); | 134 return last_ = pushback_buffer()->RemoveLast(); |
133 } else if (stream_->has_more()) { | 135 } else if (stream_->has_more()) { |
134 pos_++; | 136 pos_++; |
135 uc32 next = stream_->GetNext(); | 137 uc32 next = stream_->GetNext(); |
136 return last_ = next; | 138 return last_ = next; |
137 } else { | 139 } else { |
138 // note: currently the following increment is necessary to avoid a | 140 // note: currently the following increment is necessary to avoid a |
139 // test-parser problem! | 141 // test-parser problem! |
140 pos_++; | 142 pos_++; |
141 return last_ = static_cast<uc32>(-1); | 143 return last_ = static_cast<uc32>(-1); |
142 } | 144 } |
143 } | 145 } |
144 | 146 |
145 | 147 |
146 void UTF16Buffer::SeekForward(int pos) { | 148 void CharacterStreamUTF16Buffer::SeekForward(int pos) { |
147 pos_ = pos; | 149 pos_ = pos; |
148 ASSERT(pushback_buffer()->is_empty()); | 150 ASSERT(pushback_buffer()->is_empty()); |
149 stream_->Seek(pos); | 151 stream_->Seek(pos); |
150 } | 152 } |
151 | 153 |
152 | 154 |
155 // TwoByteStringUTF16Buffer | |
156 TwoByteStringUTF16Buffer::TwoByteStringUTF16Buffer() | |
157 : raw_data_(NULL) { } | |
Kasper Lund
2009/08/18 06:49:41
4 space indent.
Feng Qian
2009/08/18 07:14:10
Done.
| |
158 | |
159 | |
160 void TwoByteStringUTF16Buffer::Initialize( | |
161 Handle<ExternalTwoByteString> data) { | |
162 ASSERT(!data.is_null() && StringShape(*data).IsExternalTwoByte()); | |
Kasper Lund
2009/08/18 06:49:41
StringShape(*data).IsExternalTwoByte() -> data->Is
Feng Qian
2009/08/18 07:14:10
IsExternalTwoByte check is unnecessary here, remov
| |
163 | |
164 data_ = data; | |
165 pos_ = 0; | |
166 | |
167 raw_data_ = data->resource()->data(); | |
168 size_ = data->length(); | |
169 } | |
170 | |
171 | |
172 uc32 TwoByteStringUTF16Buffer::Advance() { | |
173 if (pos_ < size_) { | |
174 return raw_data_[pos_++]; | |
175 } else { | |
176 // note: currently the following increment is necessary to avoid a | |
Kasper Lund
2009/08/18 06:49:41
note -> Note
Feng Qian
2009/08/18 07:14:10
Done.
| |
177 // test-parser problem! | |
178 pos_++; | |
179 return static_cast<uc32>(-1); | |
180 } | |
181 } | |
182 | |
183 | |
184 void TwoByteStringUTF16Buffer::PushBack(uc32 ch) { | |
185 pos_--; | |
186 ASSERT(pos_ >= 0 && raw_data_[pos_] == ch); | |
187 } | |
188 | |
189 | |
190 void TwoByteStringUTF16Buffer::SeekForward(int pos) { | |
191 pos_ = pos; | |
192 } | |
193 | |
194 | |
153 // ---------------------------------------------------------------------------- | 195 // ---------------------------------------------------------------------------- |
154 // Scanner | 196 // Scanner |
155 | 197 |
156 Scanner::Scanner(bool pre) : stack_overflow_(false), is_pre_parsing_(pre) { | 198 Scanner::Scanner(bool pre) : stack_overflow_(false), is_pre_parsing_(pre) { |
157 Token::Initialize(); | 199 Token::Initialize(); |
158 } | 200 } |
159 | 201 |
160 | 202 |
161 void Scanner::Init(Handle<String> source, unibrow::CharacterStream* stream, | 203 void Scanner::Init(Handle<String> source, unibrow::CharacterStream* stream, |
162 int position) { | 204 int position) { |
163 // Initialize the source buffer. | 205 // Initialize the source buffer. |
164 source_.Initialize(source, stream); | 206 if (!source.is_null() && StringShape(*source).IsExternalTwoByte()) { |
Kasper Lund
2009/08/18 06:49:41
StringShape(*source).IsExternalTwoByte() -> source
Feng Qian
2009/08/18 07:14:10
IsExternalTwoByteString is only implemented in #if
| |
207 two_byte_string_buffer_.Initialize( | |
208 Handle<ExternalTwoByteString>::cast(source)); | |
209 source_ = &two_byte_string_buffer_; | |
210 } else { | |
211 char_stream_buffer_.Initialize(source, stream); | |
212 source_ = &char_stream_buffer_; | |
213 } | |
214 | |
165 position_ = position; | 215 position_ = position; |
166 | 216 |
167 // Reset literals buffer | 217 // Reset literals buffer |
168 literals_.Reset(); | 218 literals_.Reset(); |
169 | 219 |
170 // Set c0_ (one character ahead) | 220 // Set c0_ (one character ahead) |
171 ASSERT(kCharacterLookaheadBufferSize == 1); | 221 ASSERT(kCharacterLookaheadBufferSize == 1); |
172 Advance(); | 222 Advance(); |
173 | 223 |
174 // Skip initial whitespace allowing HTML comment ends just like | 224 // Skip initial whitespace allowing HTML comment ends just like |
175 // after a newline and scan first token. | 225 // after a newline and scan first token. |
176 has_line_terminator_before_next_ = true; | 226 has_line_terminator_before_next_ = true; |
177 SkipWhiteSpace(); | 227 SkipWhiteSpace(); |
178 Scan(); | 228 Scan(); |
179 } | 229 } |
180 | 230 |
181 | 231 |
182 Handle<String> Scanner::SubString(int start, int end) { | 232 Handle<String> Scanner::SubString(int start, int end) { |
183 return source_.SubString(start - position_, end - position_); | 233 return source_->SubString(start - position_, end - position_); |
184 } | 234 } |
185 | 235 |
186 | 236 |
187 Token::Value Scanner::Next() { | 237 Token::Value Scanner::Next() { |
188 // BUG 1215673: Find a thread safe way to set a stack limit in | 238 // BUG 1215673: Find a thread safe way to set a stack limit in |
189 // pre-parse mode. Otherwise, we cannot safely pre-parse from other | 239 // pre-parse mode. Otherwise, we cannot safely pre-parse from other |
190 // threads. | 240 // threads. |
191 current_ = next_; | 241 current_ = next_; |
192 // Check for stack-overflow before returning any tokens. | 242 // Check for stack-overflow before returning any tokens. |
193 StackLimitCheck check; | 243 StackLimitCheck check; |
(...skipping 22 matching lines...) Expand all Loading... | |
216 AddChar(0); | 266 AddChar(0); |
217 } | 267 } |
218 | 268 |
219 | 269 |
220 void Scanner::AddCharAdvance() { | 270 void Scanner::AddCharAdvance() { |
221 AddChar(c0_); | 271 AddChar(c0_); |
222 Advance(); | 272 Advance(); |
223 } | 273 } |
224 | 274 |
225 | 275 |
226 void Scanner::Advance() { | |
227 c0_ = source_.Advance(); | |
228 } | |
229 | |
230 | |
231 void Scanner::PushBack(uc32 ch) { | |
232 source_.PushBack(ch); | |
233 c0_ = ch; | |
234 } | |
235 | |
236 | |
237 static inline bool IsByteOrderMark(uc32 c) { | 276 static inline bool IsByteOrderMark(uc32 c) { |
238 // The Unicode value U+FFFE is guaranteed never to be assigned as a | 277 // The Unicode value U+FFFE is guaranteed never to be assigned as a |
239 // Unicode character; this implies that in a Unicode context the | 278 // Unicode character; this implies that in a Unicode context the |
240 // 0xFF, 0xFE byte pattern can only be interpreted as the U+FEFF | 279 // 0xFF, 0xFE byte pattern can only be interpreted as the U+FEFF |
241 // character expressed in little-endian byte order (since it could | 280 // character expressed in little-endian byte order (since it could |
242 // not be a U+FFFE character expressed in big-endian byte | 281 // not be a U+FFFE character expressed in big-endian byte |
243 // order). Nevertheless, we check for it to be compatible with | 282 // order). Nevertheless, we check for it to be compatible with |
244 // Spidermonkey. | 283 // Spidermonkey. |
245 return c == 0xFEFF || c == 0xFFFE; | 284 return c == 0xFEFF || c == 0xFFFE; |
246 } | 285 } |
(...skipping 329 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
576 // Continue scanning for tokens as long as we're just skipping | 615 // Continue scanning for tokens as long as we're just skipping |
577 // whitespace. | 616 // whitespace. |
578 } while (token == Token::WHITESPACE); | 617 } while (token == Token::WHITESPACE); |
579 | 618 |
580 next_.location.end_pos = source_pos(); | 619 next_.location.end_pos = source_pos(); |
581 next_.token = token; | 620 next_.token = token; |
582 } | 621 } |
583 | 622 |
584 | 623 |
585 void Scanner::SeekForward(int pos) { | 624 void Scanner::SeekForward(int pos) { |
586 source_.SeekForward(pos - 1); | 625 source_->SeekForward(pos - 1); |
587 Advance(); | 626 Advance(); |
588 Scan(); | 627 Scan(); |
589 } | 628 } |
590 | 629 |
591 | 630 |
592 uc32 Scanner::ScanHexEscape(uc32 c, int length) { | 631 uc32 Scanner::ScanHexEscape(uc32 c, int length) { |
593 ASSERT(length <= 4); // prevent overflow | 632 ASSERT(length <= 4); // prevent overflow |
594 | 633 |
595 uc32 digits[4]; | 634 uc32 digits[4]; |
596 uc32 x = 0; | 635 uc32 x = 0; |
(...skipping 329 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
926 } | 965 } |
927 AddCharAdvance(); | 966 AddCharAdvance(); |
928 } | 967 } |
929 TerminateLiteral(); | 968 TerminateLiteral(); |
930 | 969 |
931 next_.location.end_pos = source_pos() - 1; | 970 next_.location.end_pos = source_pos() - 1; |
932 return true; | 971 return true; |
933 } | 972 } |
934 | 973 |
935 } } // namespace v8::internal | 974 } } // namespace v8::internal |
OLD | NEW |