Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(337)

Side by Side Diff: src/scanner.cc

Issue 165403: Streamline the scanner for external two byte string input. (Closed) Base URL: http://v8.googlecode.com/svn/branches/bleeding_edge/
Patch Set: '' Created 11 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« src/scanner.h ('K') | « src/scanner.h ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2006-2008 the V8 project authors. All rights reserved. 1 // Copyright 2006-2008 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 74 matching lines...) Expand 10 before | Expand all | Expand 10 after
85 } 85 }
86 ASSERT(pos() <= Capacity()); 86 ASSERT(pos() <= Capacity());
87 } 87 }
88 88
89 89
90 // ---------------------------------------------------------------------------- 90 // ----------------------------------------------------------------------------
91 // UTF16Buffer 91 // UTF16Buffer
92 92
93 93
94 UTF16Buffer::UTF16Buffer() 94 UTF16Buffer::UTF16Buffer()
95 : pos_(0), 95 : pos_(0), size_(0) { }
Kasper Lund 2009/08/18 06:49:41 4 space indent.
Feng Qian 2009/08/18 07:14:10 Done.
Feng Qian 2009/08/18 07:14:10 Done.
96 pushback_buffer_(0),
97 last_(0),
98 stream_(NULL) { }
99
100
101 void UTF16Buffer::Initialize(Handle<String> data,
102 unibrow::CharacterStream* input) {
103 data_ = data;
104 pos_ = 0;
105 stream_ = input;
106 }
107 96
108 97
109 Handle<String> UTF16Buffer::SubString(int start, int end) { 98 Handle<String> UTF16Buffer::SubString(int start, int end) {
110 return internal::SubString(data_, start, end); 99 return internal::SubString(data_, start, end);
111 } 100 }
112 101
113 102
114 void UTF16Buffer::PushBack(uc32 ch) { 103 // CharacterStreamUTF16Buffer
104 CharacterStreamUTF16Buffer::CharacterStreamUTF16Buffer()
105 : pushback_buffer_(0), last_(0), stream_(NULL) { }
Kasper Lund 2009/08/18 06:49:41 4 space indent.
Feng Qian 2009/08/18 07:14:10 Done.
Feng Qian 2009/08/18 07:14:10 Done.
106
107
108 void CharacterStreamUTF16Buffer::Initialize(Handle<String> data,
109 unibrow::CharacterStream* input) {
110 data_ = data;
111 pos_ = 0;
112 stream_ = input;
113 }
114
115
116 void CharacterStreamUTF16Buffer::PushBack(uc32 ch) {
115 pushback_buffer()->Add(last_); 117 pushback_buffer()->Add(last_);
116 last_ = ch; 118 last_ = ch;
117 pos_--; 119 pos_--;
118 } 120 }
119 121
120 122
121 uc32 UTF16Buffer::Advance() { 123 uc32 CharacterStreamUTF16Buffer::Advance() {
122 // NOTE: It is of importance to Persian / Farsi resources that we do 124 // NOTE: It is of importance to Persian / Farsi resources that we do
123 // *not* strip format control characters in the scanner; see 125 // *not* strip format control characters in the scanner; see
124 // 126 //
125 // https://bugzilla.mozilla.org/show_bug.cgi?id=274152 127 // https://bugzilla.mozilla.org/show_bug.cgi?id=274152
126 // 128 //
127 // So, even though ECMA-262, section 7.1, page 11, dictates that we 129 // So, even though ECMA-262, section 7.1, page 11, dictates that we
128 // must remove Unicode format-control characters, we do not. This is 130 // must remove Unicode format-control characters, we do not. This is
129 // in line with how IE and SpiderMonkey handles it. 131 // in line with how IE and SpiderMonkey handles it.
130 if (!pushback_buffer()->is_empty()) { 132 if (!pushback_buffer()->is_empty()) {
131 pos_++; 133 pos_++;
132 return last_ = pushback_buffer()->RemoveLast(); 134 return last_ = pushback_buffer()->RemoveLast();
133 } else if (stream_->has_more()) { 135 } else if (stream_->has_more()) {
134 pos_++; 136 pos_++;
135 uc32 next = stream_->GetNext(); 137 uc32 next = stream_->GetNext();
136 return last_ = next; 138 return last_ = next;
137 } else { 139 } else {
138 // note: currently the following increment is necessary to avoid a 140 // note: currently the following increment is necessary to avoid a
139 // test-parser problem! 141 // test-parser problem!
140 pos_++; 142 pos_++;
141 return last_ = static_cast<uc32>(-1); 143 return last_ = static_cast<uc32>(-1);
142 } 144 }
143 } 145 }
144 146
145 147
146 void UTF16Buffer::SeekForward(int pos) { 148 void CharacterStreamUTF16Buffer::SeekForward(int pos) {
147 pos_ = pos; 149 pos_ = pos;
148 ASSERT(pushback_buffer()->is_empty()); 150 ASSERT(pushback_buffer()->is_empty());
149 stream_->Seek(pos); 151 stream_->Seek(pos);
150 } 152 }
151 153
152 154
155 // TwoByteStringUTF16Buffer
156 TwoByteStringUTF16Buffer::TwoByteStringUTF16Buffer()
157 : raw_data_(NULL) { }
Kasper Lund 2009/08/18 06:49:41 4 space indent.
Feng Qian 2009/08/18 07:14:10 Done.
158
159
160 void TwoByteStringUTF16Buffer::Initialize(
161 Handle<ExternalTwoByteString> data) {
162 ASSERT(!data.is_null() && StringShape(*data).IsExternalTwoByte());
Kasper Lund 2009/08/18 06:49:41 StringShape(*data).IsExternalTwoByte() -> data->Is
Feng Qian 2009/08/18 07:14:10 IsExternalTwoByte check is unnecessary here, remov
163
164 data_ = data;
165 pos_ = 0;
166
167 raw_data_ = data->resource()->data();
168 size_ = data->length();
169 }
170
171
172 uc32 TwoByteStringUTF16Buffer::Advance() {
173 if (pos_ < size_) {
174 return raw_data_[pos_++];
175 } else {
176 // note: currently the following increment is necessary to avoid a
Kasper Lund 2009/08/18 06:49:41 note -> Note
Feng Qian 2009/08/18 07:14:10 Done.
177 // test-parser problem!
178 pos_++;
179 return static_cast<uc32>(-1);
180 }
181 }
182
183
184 void TwoByteStringUTF16Buffer::PushBack(uc32 ch) {
185 pos_--;
186 ASSERT(pos_ >= 0 && raw_data_[pos_] == ch);
187 }
188
189
190 void TwoByteStringUTF16Buffer::SeekForward(int pos) {
191 pos_ = pos;
192 }
193
194
153 // ---------------------------------------------------------------------------- 195 // ----------------------------------------------------------------------------
154 // Scanner 196 // Scanner
155 197
156 Scanner::Scanner(bool pre) : stack_overflow_(false), is_pre_parsing_(pre) { 198 Scanner::Scanner(bool pre) : stack_overflow_(false), is_pre_parsing_(pre) {
157 Token::Initialize(); 199 Token::Initialize();
158 } 200 }
159 201
160 202
161 void Scanner::Init(Handle<String> source, unibrow::CharacterStream* stream, 203 void Scanner::Init(Handle<String> source, unibrow::CharacterStream* stream,
162 int position) { 204 int position) {
163 // Initialize the source buffer. 205 // Initialize the source buffer.
164 source_.Initialize(source, stream); 206 if (!source.is_null() && StringShape(*source).IsExternalTwoByte()) {
Kasper Lund 2009/08/18 06:49:41 StringShape(*source).IsExternalTwoByte() -> source
Feng Qian 2009/08/18 07:14:10 IsExternalTwoByteString is only implemented in #if
207 two_byte_string_buffer_.Initialize(
208 Handle<ExternalTwoByteString>::cast(source));
209 source_ = &two_byte_string_buffer_;
210 } else {
211 char_stream_buffer_.Initialize(source, stream);
212 source_ = &char_stream_buffer_;
213 }
214
165 position_ = position; 215 position_ = position;
166 216
167 // Reset literals buffer 217 // Reset literals buffer
168 literals_.Reset(); 218 literals_.Reset();
169 219
170 // Set c0_ (one character ahead) 220 // Set c0_ (one character ahead)
171 ASSERT(kCharacterLookaheadBufferSize == 1); 221 ASSERT(kCharacterLookaheadBufferSize == 1);
172 Advance(); 222 Advance();
173 223
174 // Skip initial whitespace allowing HTML comment ends just like 224 // Skip initial whitespace allowing HTML comment ends just like
175 // after a newline and scan first token. 225 // after a newline and scan first token.
176 has_line_terminator_before_next_ = true; 226 has_line_terminator_before_next_ = true;
177 SkipWhiteSpace(); 227 SkipWhiteSpace();
178 Scan(); 228 Scan();
179 } 229 }
180 230
181 231
182 Handle<String> Scanner::SubString(int start, int end) { 232 Handle<String> Scanner::SubString(int start, int end) {
183 return source_.SubString(start - position_, end - position_); 233 return source_->SubString(start - position_, end - position_);
184 } 234 }
185 235
186 236
187 Token::Value Scanner::Next() { 237 Token::Value Scanner::Next() {
188 // BUG 1215673: Find a thread safe way to set a stack limit in 238 // BUG 1215673: Find a thread safe way to set a stack limit in
189 // pre-parse mode. Otherwise, we cannot safely pre-parse from other 239 // pre-parse mode. Otherwise, we cannot safely pre-parse from other
190 // threads. 240 // threads.
191 current_ = next_; 241 current_ = next_;
192 // Check for stack-overflow before returning any tokens. 242 // Check for stack-overflow before returning any tokens.
193 StackLimitCheck check; 243 StackLimitCheck check;
(...skipping 22 matching lines...) Expand all
216 AddChar(0); 266 AddChar(0);
217 } 267 }
218 268
219 269
220 void Scanner::AddCharAdvance() { 270 void Scanner::AddCharAdvance() {
221 AddChar(c0_); 271 AddChar(c0_);
222 Advance(); 272 Advance();
223 } 273 }
224 274
225 275
226 void Scanner::Advance() {
227 c0_ = source_.Advance();
228 }
229
230
231 void Scanner::PushBack(uc32 ch) {
232 source_.PushBack(ch);
233 c0_ = ch;
234 }
235
236
237 static inline bool IsByteOrderMark(uc32 c) { 276 static inline bool IsByteOrderMark(uc32 c) {
238 // The Unicode value U+FFFE is guaranteed never to be assigned as a 277 // The Unicode value U+FFFE is guaranteed never to be assigned as a
239 // Unicode character; this implies that in a Unicode context the 278 // Unicode character; this implies that in a Unicode context the
240 // 0xFF, 0xFE byte pattern can only be interpreted as the U+FEFF 279 // 0xFF, 0xFE byte pattern can only be interpreted as the U+FEFF
241 // character expressed in little-endian byte order (since it could 280 // character expressed in little-endian byte order (since it could
242 // not be a U+FFFE character expressed in big-endian byte 281 // not be a U+FFFE character expressed in big-endian byte
243 // order). Nevertheless, we check for it to be compatible with 282 // order). Nevertheless, we check for it to be compatible with
244 // Spidermonkey. 283 // Spidermonkey.
245 return c == 0xFEFF || c == 0xFFFE; 284 return c == 0xFEFF || c == 0xFFFE;
246 } 285 }
(...skipping 329 matching lines...) Expand 10 before | Expand all | Expand 10 after
576 // Continue scanning for tokens as long as we're just skipping 615 // Continue scanning for tokens as long as we're just skipping
577 // whitespace. 616 // whitespace.
578 } while (token == Token::WHITESPACE); 617 } while (token == Token::WHITESPACE);
579 618
580 next_.location.end_pos = source_pos(); 619 next_.location.end_pos = source_pos();
581 next_.token = token; 620 next_.token = token;
582 } 621 }
583 622
584 623
585 void Scanner::SeekForward(int pos) { 624 void Scanner::SeekForward(int pos) {
586 source_.SeekForward(pos - 1); 625 source_->SeekForward(pos - 1);
587 Advance(); 626 Advance();
588 Scan(); 627 Scan();
589 } 628 }
590 629
591 630
592 uc32 Scanner::ScanHexEscape(uc32 c, int length) { 631 uc32 Scanner::ScanHexEscape(uc32 c, int length) {
593 ASSERT(length <= 4); // prevent overflow 632 ASSERT(length <= 4); // prevent overflow
594 633
595 uc32 digits[4]; 634 uc32 digits[4];
596 uc32 x = 0; 635 uc32 x = 0;
(...skipping 329 matching lines...) Expand 10 before | Expand all | Expand 10 after
926 } 965 }
927 AddCharAdvance(); 966 AddCharAdvance();
928 } 967 }
929 TerminateLiteral(); 968 TerminateLiteral();
930 969
931 next_.location.end_pos = source_pos() - 1; 970 next_.location.end_pos = source_pos() - 1;
932 return true; 971 return true;
933 } 972 }
934 973
935 } } // namespace v8::internal 974 } } // namespace v8::internal
OLDNEW
« src/scanner.h ('K') | « src/scanner.h ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698