src/scanner.cc - Issue 165403: Streamline the scanner for external two byte string input.

Side by Side Diff: src/scanner.cc

Issue 165403: Streamline the scanner for external two byte string input. (Closed) Base URL: http://v8.googlecode.com/svn/branches/bleeding_edge/

Patch Set: '' Created 11 years, 4 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 // Copyright 2006-2008 the V8 project authors. All rights reserved.	1 // Copyright 2006-2008 the V8 project authors. All rights reserved.

2 // Redistribution and use in source and binary forms, with or without	2 // Redistribution and use in source and binary forms, with or without

3 // modification, are permitted provided that the following conditions are	3 // modification, are permitted provided that the following conditions are

4 // met:	4 // met:

5 //	5 //

6 // * Redistributions of source code must retain the above copyright	6 // * Redistributions of source code must retain the above copyright

7 // notice, this list of conditions and the following disclaimer.	7 // notice, this list of conditions and the following disclaimer.

8 // * Redistributions in binary form must reproduce the above	8 // * Redistributions in binary form must reproduce the above

9 // copyright notice, this list of conditions and the following	9 // copyright notice, this list of conditions and the following

10 // disclaimer in the documentation and/or other materials provided	10 // disclaimer in the documentation and/or other materials provided

(...skipping 74 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
85 }	85 }

86 ASSERT(pos() <= Capacity());	86 ASSERT(pos() <= Capacity());

87 }	87 }

88	88

89	89

90 // ----------------------------------------------------------------------------	90 // ----------------------------------------------------------------------------

91 // UTF16Buffer	91 // UTF16Buffer

92	92

93	93

94 UTF16Buffer::UTF16Buffer()	94 UTF16Buffer::UTF16Buffer()

95 : pos_(0),	95 : pos_(0), size_(0) { }
	Kasper Lund 2009/08/18 06:49:41 4 space indent. 4 space indent. Feng Qian 2009/08/18 07:14:10 Done. Show quoted text On 2009/08/18 06:49:41, Kasper Lund wrote: > 4 space indent. Done. Feng Qian 2009/08/18 07:14:10 Done. Show quoted text On 2009/08/18 06:49:41, Kasper Lund wrote: > 4 space indent. Done.
96 pushback_buffer_(0),

97 last_(0),

98 stream_(NULL) { }

99

100

101 void UTF16Buffer::Initialize(Handle<String> data,

102 unibrow::CharacterStream* input) {

103 data_ = data;

104 pos_ = 0;

105 stream_ = input;

106 }

107	96

108	97

109 Handle<String> UTF16Buffer::SubString(int start, int end) {	98 Handle<String> UTF16Buffer::SubString(int start, int end) {

110 return internal::SubString(data_, start, end);	99 return internal::SubString(data_, start, end);

111 }	100 }

112	101

113	102

114 void UTF16Buffer::PushBack(uc32 ch) {	103 // CharacterStreamUTF16Buffer

	104 CharacterStreamUTF16Buffer::CharacterStreamUTF16Buffer()

	105 : pushback_buffer_(0), last_(0), stream_(NULL) { }
	Kasper Lund 2009/08/18 06:49:41 4 space indent. 4 space indent. Feng Qian 2009/08/18 07:14:10 Done. Show quoted text On 2009/08/18 06:49:41, Kasper Lund wrote: > 4 space indent. Done. Feng Qian 2009/08/18 07:14:10 Done. Show quoted text On 2009/08/18 06:49:41, Kasper Lund wrote: > 4 space indent. Done.
	106

	107

	108 void CharacterStreamUTF16Buffer::Initialize(Handle<String> data,

	109 unibrow::CharacterStream* input) {

	110 data_ = data;

	111 pos_ = 0;

	112 stream_ = input;

	113 }

	114

	115

	116 void CharacterStreamUTF16Buffer::PushBack(uc32 ch) {

115 pushback_buffer()->Add(last_);	117 pushback_buffer()->Add(last_);

116 last_ = ch;	118 last_ = ch;

117 pos_--;	119 pos_--;

118 }	120 }

119	121

120	122

121 uc32 UTF16Buffer::Advance() {	123 uc32 CharacterStreamUTF16Buffer::Advance() {

122 // NOTE: It is of importance to Persian / Farsi resources that we do	124 // NOTE: It is of importance to Persian / Farsi resources that we do

123 // not strip format control characters in the scanner; see	125 // not strip format control characters in the scanner; see

124 //	126 //

125 // https://bugzilla.mozilla.org/show_bug.cgi?id=274152	127 // https://bugzilla.mozilla.org/show_bug.cgi?id=274152

126 //	128 //

127 // So, even though ECMA-262, section 7.1, page 11, dictates that we	129 // So, even though ECMA-262, section 7.1, page 11, dictates that we

128 // must remove Unicode format-control characters, we do not. This is	130 // must remove Unicode format-control characters, we do not. This is

129 // in line with how IE and SpiderMonkey handles it.	131 // in line with how IE and SpiderMonkey handles it.

130 if (!pushback_buffer()->is_empty()) {	132 if (!pushback_buffer()->is_empty()) {

131 pos_++;	133 pos_++;

132 return last_ = pushback_buffer()->RemoveLast();	134 return last_ = pushback_buffer()->RemoveLast();

133 } else if (stream_->has_more()) {	135 } else if (stream_->has_more()) {

134 pos_++;	136 pos_++;

135 uc32 next = stream_->GetNext();	137 uc32 next = stream_->GetNext();

136 return last_ = next;	138 return last_ = next;

137 } else {	139 } else {

138 // note: currently the following increment is necessary to avoid a	140 // note: currently the following increment is necessary to avoid a

139 // test-parser problem!	141 // test-parser problem!

140 pos_++;	142 pos_++;

141 return last_ = static_cast<uc32>(-1);	143 return last_ = static_cast<uc32>(-1);

142 }	144 }

143 }	145 }

144	146

145	147

146 void UTF16Buffer::SeekForward(int pos) {	148 void CharacterStreamUTF16Buffer::SeekForward(int pos) {

147 pos_ = pos;	149 pos_ = pos;

148 ASSERT(pushback_buffer()->is_empty());	150 ASSERT(pushback_buffer()->is_empty());

149 stream_->Seek(pos);	151 stream_->Seek(pos);

150 }	152 }

151	153

152	154

	155 // TwoByteStringUTF16Buffer

	156 TwoByteStringUTF16Buffer::TwoByteStringUTF16Buffer()

	157 : raw_data_(NULL) { }
	Kasper Lund 2009/08/18 06:49:41 4 space indent. 4 space indent. Feng Qian 2009/08/18 07:14:10 Done. Show quoted text On 2009/08/18 06:49:41, Kasper Lund wrote: > 4 space indent. Done.
	158

	159

	160 void TwoByteStringUTF16Buffer::Initialize(

	161 Handle<ExternalTwoByteString> data) {

	162 ASSERT(!data.is_null() && StringShape(*data).IsExternalTwoByte());
	Kasper Lund 2009/08/18 06:49:41 StringShape(data).IsExternalTwoByte() -> data->Is StringShape(data).IsExternalTwoByte() -> data->IsExternalTwoByteString() Feng Qian 2009/08/18 07:14:10 IsExternalTwoByte check is unnecessary here, remov Show quoted text On 2009/08/18 06:49:41, Kasper Lund wrote: > StringShape(*data).IsExternalTwoByte() -> data->IsExternalTwoByteString() IsExternalTwoByte check is unnecessary here, removed.
	163

	164 data_ = data;

	165 pos_ = 0;

	166

	167 raw_data_ = data->resource()->data();

	168 size_ = data->length();

	169 }

	170

	171

	172 uc32 TwoByteStringUTF16Buffer::Advance() {

	173 if (pos_ < size_) {

	174 return raw_data_[pos_++];

	175 } else {

	176 // note: currently the following increment is necessary to avoid a
	Kasper Lund 2009/08/18 06:49:41 note -> Note note -> Note Feng Qian 2009/08/18 07:14:10 Done. Show quoted text On 2009/08/18 06:49:41, Kasper Lund wrote: > note -> Note Done.
	177 // test-parser problem!

	178 pos_++;

	179 return static_cast<uc32>(-1);

	180 }

	181 }

	182

	183

	184 void TwoByteStringUTF16Buffer::PushBack(uc32 ch) {

	185 pos_--;

	186 ASSERT(pos_ >= 0 && raw_data_[pos_] == ch);

	187 }

	188

	189

	190 void TwoByteStringUTF16Buffer::SeekForward(int pos) {

	191 pos_ = pos;

	192 }

	193

	194

153 // ----------------------------------------------------------------------------	195 // ----------------------------------------------------------------------------

154 // Scanner	196 // Scanner

155	197

156 Scanner::Scanner(bool pre) : stack_overflow_(false), is_pre_parsing_(pre) {	198 Scanner::Scanner(bool pre) : stack_overflow_(false), is_pre_parsing_(pre) {

157 Token::Initialize();	199 Token::Initialize();

158 }	200 }

159	201

160	202

161 void Scanner::Init(Handle<String> source, unibrow::CharacterStream* stream,	203 void Scanner::Init(Handle<String> source, unibrow::CharacterStream* stream,

162 int position) {	204 int position) {

163 // Initialize the source buffer.	205 // Initialize the source buffer.

164 source_.Initialize(source, stream);	206 if (!source.is_null() && StringShape(*source).IsExternalTwoByte()) {
	Kasper Lund 2009/08/18 06:49:41 StringShape(source).IsExternalTwoByte() -> source StringShape(source).IsExternalTwoByte() -> source->IsExternalTwoByteString()? Feng Qian 2009/08/18 07:14:10 IsExternalTwoByteString is only implemented in #if Show quoted text On 2009/08/18 06:49:41, Kasper Lund wrote: > StringShape(*source).IsExternalTwoByte() -> source->IsExternalTwoByteString()? IsExternalTwoByteString is only implemented in #if DEBUG, and comments in objects-inl.h states that one should consider use StringShape instead of moving the implementation out of #if DEBUG
	207 two_byte_string_buffer_.Initialize(

	208 Handle<ExternalTwoByteString>::cast(source));

	209 source_ = &two_byte_string_buffer_;

	210 } else {

	211 char_stream_buffer_.Initialize(source, stream);

	212 source_ = &char_stream_buffer_;

	213 }

	214

165 position_ = position;	215 position_ = position;

166	216

167 // Reset literals buffer	217 // Reset literals buffer

168 literals_.Reset();	218 literals_.Reset();

169	219

170 // Set c0_ (one character ahead)	220 // Set c0_ (one character ahead)

171 ASSERT(kCharacterLookaheadBufferSize == 1);	221 ASSERT(kCharacterLookaheadBufferSize == 1);

172 Advance();	222 Advance();

173	223

174 // Skip initial whitespace allowing HTML comment ends just like	224 // Skip initial whitespace allowing HTML comment ends just like

175 // after a newline and scan first token.	225 // after a newline and scan first token.

176 has_line_terminator_before_next_ = true;	226 has_line_terminator_before_next_ = true;

177 SkipWhiteSpace();	227 SkipWhiteSpace();

178 Scan();	228 Scan();

179 }	229 }

180	230

181	231

182 Handle<String> Scanner::SubString(int start, int end) {	232 Handle<String> Scanner::SubString(int start, int end) {

183 return source_.SubString(start - position_, end - position_);	233 return source_->SubString(start - position_, end - position_);

184 }	234 }

185	235

186	236

187 Token::Value Scanner::Next() {	237 Token::Value Scanner::Next() {

188 // BUG 1215673: Find a thread safe way to set a stack limit in	238 // BUG 1215673: Find a thread safe way to set a stack limit in

189 // pre-parse mode. Otherwise, we cannot safely pre-parse from other	239 // pre-parse mode. Otherwise, we cannot safely pre-parse from other

190 // threads.	240 // threads.

191 current_ = next_;	241 current_ = next_;

192 // Check for stack-overflow before returning any tokens.	242 // Check for stack-overflow before returning any tokens.

193 StackLimitCheck check;	243 StackLimitCheck check;

(...skipping 22 matching lines...) Expand all Loading...
216 AddChar(0);	266 AddChar(0);

217 }	267 }

218	268

219	269

220 void Scanner::AddCharAdvance() {	270 void Scanner::AddCharAdvance() {

221 AddChar(c0_);	271 AddChar(c0_);

222 Advance();	272 Advance();

223 }	273 }

224	274

225	275

226 void Scanner::Advance() {

227 c0_ = source_.Advance();

228 }

229

230

231 void Scanner::PushBack(uc32 ch) {

232 source_.PushBack(ch);

233 c0_ = ch;

234 }

235

236

237 static inline bool IsByteOrderMark(uc32 c) {	276 static inline bool IsByteOrderMark(uc32 c) {

238 // The Unicode value U+FFFE is guaranteed never to be assigned as a	277 // The Unicode value U+FFFE is guaranteed never to be assigned as a

239 // Unicode character; this implies that in a Unicode context the	278 // Unicode character; this implies that in a Unicode context the

240 // 0xFF, 0xFE byte pattern can only be interpreted as the U+FEFF	279 // 0xFF, 0xFE byte pattern can only be interpreted as the U+FEFF

241 // character expressed in little-endian byte order (since it could	280 // character expressed in little-endian byte order (since it could

242 // not be a U+FFFE character expressed in big-endian byte	281 // not be a U+FFFE character expressed in big-endian byte

243 // order). Nevertheless, we check for it to be compatible with	282 // order). Nevertheless, we check for it to be compatible with

244 // Spidermonkey.	283 // Spidermonkey.

245 return c == 0xFEFF \|\| c == 0xFFFE;	284 return c == 0xFEFF \|\| c == 0xFFFE;

246 }	285 }

(...skipping 329 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
576 // Continue scanning for tokens as long as we're just skipping	615 // Continue scanning for tokens as long as we're just skipping

577 // whitespace.	616 // whitespace.

578 } while (token == Token::WHITESPACE);	617 } while (token == Token::WHITESPACE);

579	618

580 next_.location.end_pos = source_pos();	619 next_.location.end_pos = source_pos();

581 next_.token = token;	620 next_.token = token;

582 }	621 }

583	622

584	623

585 void Scanner::SeekForward(int pos) {	624 void Scanner::SeekForward(int pos) {

586 source_.SeekForward(pos - 1);	625 source_->SeekForward(pos - 1);

587 Advance();	626 Advance();

588 Scan();	627 Scan();

589 }	628 }

590	629

591	630

592 uc32 Scanner::ScanHexEscape(uc32 c, int length) {	631 uc32 Scanner::ScanHexEscape(uc32 c, int length) {

593 ASSERT(length <= 4); // prevent overflow	632 ASSERT(length <= 4); // prevent overflow

594	633

595 uc32 digits[4];	634 uc32 digits[4];

596 uc32 x = 0;	635 uc32 x = 0;

(...skipping 329 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
926 }	965 }

927 AddCharAdvance();	966 AddCharAdvance();

928 }	967 }

929 TerminateLiteral();	968 TerminateLiteral();

930	969

931 next_.location.end_pos = source_pos() - 1;	970 next_.location.end_pos = source_pos() - 1;

932 return true;	971 return true;

933 }	972 }

934	973

935 } } // namespace v8::internal	974 } } // namespace v8::internal

OLD	NEW

« src/scanner.h ('K') | « src/scanner.h ('k') | no next file » | no next file with comments »