Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(369)

Side by Side Diff: src/scanner.h

Issue 165403: Streamline the scanner for external two byte string input. (Closed) Base URL: http://v8.googlecode.com/svn/branches/bleeding_edge/
Patch Set: '' Created 11 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright 2006-2008 the V8 project authors. All rights reserved. 1 // Copyright 2006-2008 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 55 matching lines...) Expand 10 before | Expand all | Expand 10 after
66 return (data + capacity) - unibrow::Utf8::kMaxEncodedSize; 66 return (data + capacity) - unibrow::Utf8::kMaxEncodedSize;
67 } 67 }
68 68
69 void AddCharSlow(uc32 c); 69 void AddCharSlow(uc32 c);
70 }; 70 };
71 71
72 72
73 class UTF16Buffer { 73 class UTF16Buffer {
74 public: 74 public:
75 UTF16Buffer(); 75 UTF16Buffer();
76 virtual ~UTF16Buffer() {}
76 77
77 void Initialize(Handle<String> data, unibrow::CharacterStream* stream); 78 virtual void PushBack(uc32 ch) = 0;
Kasper Lund 2009/08/18 06:49:41 Ideally, these functions could be made non-virtual
Feng Qian 2009/08/18 07:14:10 Let's do it later. On 2009/08/18 06:49:41, Kasper
78 void PushBack(uc32 ch); 79 // returns a value < 0 when the buffer end is reached
79 uc32 Advance(); // returns a value < 0 when the buffer end is reached 80 virtual uc32 Advance() = 0;
80 uint16_t CharAt(int index); 81 virtual void SeekForward(int pos) = 0;
82
81 int pos() const { return pos_; } 83 int pos() const { return pos_; }
82 int size() const { return size_; } 84 int size() const { return size_; }
83 Handle<String> SubString(int start, int end); 85 Handle<String> SubString(int start, int end);
84 List<uc32>* pushback_buffer() { return &pushback_buffer_; }
85 void SeekForward(int pos);
86 86
87 private: 87 protected:
88 Handle<String> data_; 88 Handle<String> data_;
89 int pos_; 89 int pos_;
90 int size_; 90 int size_;
91 };
92
93
94 class CharacterStreamUTF16Buffer: public UTF16Buffer {
95 public:
96 CharacterStreamUTF16Buffer();
97 virtual ~CharacterStreamUTF16Buffer() {}
98 void Initialize(Handle<String> data, unibrow::CharacterStream* stream);
99 virtual void PushBack(uc32 ch);
100 virtual uc32 Advance();
101 virtual void SeekForward(int pos);
102
103 private:
91 List<uc32> pushback_buffer_; 104 List<uc32> pushback_buffer_;
92 uc32 last_; 105 uc32 last_;
93 unibrow::CharacterStream* stream_; 106 unibrow::CharacterStream* stream_;
107
108 List<uc32>* pushback_buffer() { return &pushback_buffer_; }
109 };
110
111
112 class TwoByteStringUTF16Buffer: public UTF16Buffer {
113 public:
114 TwoByteStringUTF16Buffer();
115 virtual ~TwoByteStringUTF16Buffer() {}
116 void Initialize(Handle<ExternalTwoByteString> data);
117 virtual void PushBack(uc32 ch);
118 virtual uc32 Advance();
119 virtual void SeekForward(int pos);
120
121 private:
122 const uint16_t* raw_data_;
94 }; 123 };
95 124
96 125
97 class Scanner { 126 class Scanner {
98 public: 127 public:
99 128
100 typedef unibrow::Utf8InputBuffer<1024> Utf8Decoder; 129 typedef unibrow::Utf8InputBuffer<1024> Utf8Decoder;
101 130
102 // Construction 131 // Construction
103 explicit Scanner(bool is_pre_parsing); 132 explicit Scanner(bool is_pre_parsing);
(...skipping 73 matching lines...) Expand 10 before | Expand all | Expand 10 after
177 // Tells whether the buffer contains an identifier (no escapes). 206 // Tells whether the buffer contains an identifier (no escapes).
178 // Used for checking if a property name is an identifier. 207 // Used for checking if a property name is an identifier.
179 static bool IsIdentifier(unibrow::CharacterStream* buffer); 208 static bool IsIdentifier(unibrow::CharacterStream* buffer);
180 209
181 static unibrow::Predicate<IdentifierStart, 128> kIsIdentifierStart; 210 static unibrow::Predicate<IdentifierStart, 128> kIsIdentifierStart;
182 static unibrow::Predicate<IdentifierPart, 128> kIsIdentifierPart; 211 static unibrow::Predicate<IdentifierPart, 128> kIsIdentifierPart;
183 static unibrow::Predicate<unibrow::LineTerminator, 128> kIsLineTerminator; 212 static unibrow::Predicate<unibrow::LineTerminator, 128> kIsLineTerminator;
184 static unibrow::Predicate<unibrow::WhiteSpace, 128> kIsWhiteSpace; 213 static unibrow::Predicate<unibrow::WhiteSpace, 128> kIsWhiteSpace;
185 214
186 private: 215 private:
216 CharacterStreamUTF16Buffer char_stream_buffer_;
217 TwoByteStringUTF16Buffer two_byte_string_buffer_;
218
187 // Source. 219 // Source.
188 UTF16Buffer source_; 220 UTF16Buffer* source_;
189 int position_; 221 int position_;
190 222
191 // Buffer to hold literal values (identifiers, strings, numbers) 223 // Buffer to hold literal values (identifiers, strings, numbers)
192 // using 0-terminated UTF-8 encoding. 224 // using 0-terminated UTF-8 encoding.
193 UTF8Buffer literals_; 225 UTF8Buffer literals_;
194 226
195 bool stack_overflow_; 227 bool stack_overflow_;
196 static StaticResource<Utf8Decoder> utf8_decoder_; 228 static StaticResource<Utf8Decoder> utf8_decoder_;
197 229
198 // One Unicode character look-ahead; c0_ < 0 at the end of the input. 230 // One Unicode character look-ahead; c0_ < 0 at the end of the input.
(...skipping 13 matching lines...) Expand all
212 244
213 static const int kCharacterLookaheadBufferSize = 1; 245 static const int kCharacterLookaheadBufferSize = 1;
214 246
215 // Literal buffer support 247 // Literal buffer support
216 void StartLiteral(); 248 void StartLiteral();
217 void AddChar(uc32 ch); 249 void AddChar(uc32 ch);
218 void AddCharAdvance(); 250 void AddCharAdvance();
219 void TerminateLiteral(); 251 void TerminateLiteral();
220 252
221 // Low-level scanning support. 253 // Low-level scanning support.
222 void Advance(); 254 void Advance() { c0_ = source_->Advance(); }
223 void PushBack(uc32 ch); 255 void PushBack(uc32 ch) {
256 source_->PushBack(ch);
257 c0_ = ch;
258 }
224 259
225 bool SkipWhiteSpace(); 260 bool SkipWhiteSpace();
226 Token::Value SkipSingleLineComment(); 261 Token::Value SkipSingleLineComment();
227 Token::Value SkipMultiLineComment(); 262 Token::Value SkipMultiLineComment();
228 263
229 inline Token::Value Select(Token::Value tok); 264 inline Token::Value Select(Token::Value tok);
230 inline Token::Value Select(uc32 next, Token::Value then, Token::Value else_); 265 inline Token::Value Select(uc32 next, Token::Value then, Token::Value else_);
231 266
232 void Scan(); 267 void Scan();
233 void ScanDecimalDigits(); 268 void ScanDecimalDigits();
234 Token::Value ScanNumber(bool seen_period); 269 Token::Value ScanNumber(bool seen_period);
235 Token::Value ScanIdentifier(); 270 Token::Value ScanIdentifier();
236 uc32 ScanHexEscape(uc32 c, int length); 271 uc32 ScanHexEscape(uc32 c, int length);
237 uc32 ScanOctalEscape(uc32 c, int length); 272 uc32 ScanOctalEscape(uc32 c, int length);
238 void ScanEscape(); 273 void ScanEscape();
239 Token::Value ScanString(); 274 Token::Value ScanString();
240 275
241 // Scans a possible HTML comment -- begins with '<!'. 276 // Scans a possible HTML comment -- begins with '<!'.
242 Token::Value ScanHtmlComment(); 277 Token::Value ScanHtmlComment();
243 278
244 // Return the current source position. 279 // Return the current source position.
245 int source_pos() { 280 int source_pos() {
246 return source_.pos() - kCharacterLookaheadBufferSize + position_; 281 return source_->pos() - kCharacterLookaheadBufferSize + position_;
247 } 282 }
248 283
249 // Decodes a unicode escape-sequence which is part of an identifier. 284 // Decodes a unicode escape-sequence which is part of an identifier.
250 // If the escape sequence cannot be decoded the result is kBadRune. 285 // If the escape sequence cannot be decoded the result is kBadRune.
251 uc32 ScanIdentifierUnicodeEscape(); 286 uc32 ScanIdentifierUnicodeEscape();
252 }; 287 };
253 288
254 } } // namespace v8::internal 289 } } // namespace v8::internal
255 290
256 #endif // V8_SCANNER_H_ 291 #endif // V8_SCANNER_H_
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698