src/lexer/experimental-scanner.h - Issue 140913009: Experimental lexer: fix internalization and allocation of literals.

Side by Side Diff: src/lexer/experimental-scanner.h

Issue 140913009: Experimental lexer: fix internalization and allocation of literals. (Closed) Base URL: https://v8.googlecode.com/svn/branches/experimental/parser

Patch Set: minor fixes Created 6 years, 11 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 // Copyright 2013 the V8 project authors. All rights reserved.	1 // Copyright 2013 the V8 project authors. All rights reserved.

2 // Redistribution and use in source and binary forms, with or without	2 // Redistribution and use in source and binary forms, with or without

3 // modification, are permitted provided that the following conditions are	3 // modification, are permitted provided that the following conditions are

4 // met:	4 // met:

5 //	5 //

6 // * Redistributions of source code must retain the above copyright	6 // * Redistributions of source code must retain the above copyright

7 // notice, this list of conditions and the following disclaimer.	7 // notice, this list of conditions and the following disclaimer.

8 // * Redistributions in binary form must reproduce the above	8 // * Redistributions in binary form must reproduce the above

9 // copyright notice, this list of conditions and the following	9 // copyright notice, this list of conditions and the following

10 // disclaimer in the documentation and/or other materials provided	10 // disclaimer in the documentation and/or other materials provided

(...skipping 139 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
150 harmony_numeric_literals_ = numeric_literals;	150 harmony_numeric_literals_ = numeric_literals;

151 }	151 }

152	152

153 // Returns true if there was a line terminator before the peek'ed token,	153 // Returns true if there was a line terminator before the peek'ed token,

154 // possibly inside a multi-line comment.	154 // possibly inside a multi-line comment.

155 bool HasAnyLineTerminatorBeforeNext() const {	155 bool HasAnyLineTerminatorBeforeNext() const {

156 return has_line_terminator_before_next_ \|\|	156 return has_line_terminator_before_next_ \|\|

157 has_multiline_comment_before_next_;	157 has_multiline_comment_before_next_;

158 }	158 }

159	159

	160 Handle<String> GetLiteralSymbol() {

	161 EnsureCurrentLiteralIsValid();

	162 return InternalizeLiteral(current_literal_);

	163 }

	164

	165 Handle<String> GetLiteralString(PretenureFlag tenured) {

	166 EnsureCurrentLiteralIsValid();

	167 return AllocateLiteral(current_literal_, tenured);

	168 }

	169

	170 Handle<String> GetNextLiteralString(PretenureFlag tenured) {

	171 EnsureNextLiteralIsValid();

	172 return AllocateLiteral(next_literal_, tenured);

	173 }

	174

160 Vector<const char> literal_ascii_string() {	175 Vector<const char> literal_ascii_string() {

161 if (!current_literal_->Valid(current_.beg_pos)) {	176 EnsureCurrentLiteralIsValid();

162 FillLiteral(current_, current_literal_);

163 }

164 return current_literal_->ascii_string;	177 return current_literal_->ascii_string;

165 }	178 }

166	179

167 Vector<const uc16> literal_utf16_string() {	180 Vector<const uc16> literal_utf16_string() {

168 if (!current_literal_->Valid(current_.beg_pos)) {	181 EnsureCurrentLiteralIsValid();

169 FillLiteral(current_, current_literal_);

170 }

171 return current_literal_->utf16_string;	182 return current_literal_->utf16_string;

172 }	183 }

173	184

174 int literal_length() {	185 int literal_length() {

175 if (!current_literal_->Valid(current_.beg_pos)) {	186 EnsureCurrentLiteralIsValid();

176 FillLiteral(current_, current_literal_);

177 }

178 return current_literal_->length;	187 return current_literal_->length;

179 }	188 }

180	189

181 // This should be is_onebyte or is_latin1; it doesn't mean ASCII for real.	190 // This should be is_onebyte or is_latin1; it doesn't mean ASCII for real.

182 bool is_literal_ascii() {	191 bool is_literal_ascii() {

183 if (!current_literal_->Valid(current_.beg_pos)) {	192 EnsureCurrentLiteralIsValid();

184 FillLiteral(current_, current_literal_);

185 }

186 return current_literal_->is_ascii;	193 return current_literal_->is_ascii;

187 }	194 }

188	195

189 bool is_literal_contextual_keyword(Vector<const char> keyword) {	196 bool is_literal_contextual_keyword(Vector<const char> keyword) {

190 if (!is_literal_ascii()) return false;	197 if (!is_literal_ascii()) return false;

191 Vector<const char> literal = literal_ascii_string();	198 Vector<const char> literal = literal_ascii_string();

192 return literal.length() == keyword.length() &&	199 return literal.length() == keyword.length() &&

193 (memcmp(literal.start(), keyword.start(), literal.length()) == 0);	200 (memcmp(literal.start(), keyword.start(), literal.length()) == 0);

194 }	201 }

195	202

196 bool literal_contains_escapes() const {	203 bool literal_contains_escapes() const {

197 return current_.has_escapes;	204 return current_.has_escapes;

198 }	205 }

199	206

200 Vector<const char> next_literal_ascii_string() {	207 Vector<const char> next_literal_ascii_string() {

201 if (!next_literal_->Valid(next_.beg_pos)) {	208 EnsureNextLiteralIsValid();

202 FillLiteral(next_, next_literal_);

203 }

204 return next_literal_->ascii_string;	209 return next_literal_->ascii_string;

205 }	210 }

206	211

207 Vector<const uc16> next_literal_utf16_string() {	212 Vector<const uc16> next_literal_utf16_string() {

208 if (!next_literal_->Valid(next_.beg_pos)) {	213 EnsureNextLiteralIsValid();

209 FillLiteral(next_, next_literal_);

210 }

211 return next_literal_->utf16_string;	214 return next_literal_->utf16_string;

212 }	215 }

213	216

214 int next_literal_length() {	217 int next_literal_length() {

215 if (!next_literal_->Valid(next_.beg_pos)) {	218 EnsureNextLiteralIsValid();

216 FillLiteral(next_, next_literal_);

217 }

218 return next_literal_->length;	219 return next_literal_->length;

219 }	220 }

220	221

221 bool is_next_literal_ascii() {	222 bool is_next_literal_ascii() {

222 if (!next_literal_->Valid(next_.beg_pos)) {	223 EnsureNextLiteralIsValid();

223 FillLiteral(next_, next_literal_);

224 }

225 return next_literal_->is_ascii;	224 return next_literal_->is_ascii;

226 }	225 }

227	226

228 bool is_next_contextual_keyword(Vector<const char> keyword) {	227 bool is_next_contextual_keyword(Vector<const char> keyword) {

229 if (!is_next_literal_ascii()) return false;	228 if (!is_next_literal_ascii()) return false;

230 Vector<const char> literal = next_literal_ascii_string();	229 Vector<const char> literal = next_literal_ascii_string();

231 return literal.length() == keyword.length() &&	230 return literal.length() == keyword.length() &&

232 (memcmp(literal.start(), keyword.start(), literal.length()) == 0);	231 (memcmp(literal.start(), keyword.start(), literal.length()) == 0);

233 }	232 }

234	233

235 protected:	234 protected:

236 struct TokenDesc {	235 struct TokenDesc {

237 Token::Value token;	236 Token::Value token;

238 int beg_pos;	237 int beg_pos;

239 int end_pos;	238 int end_pos;

240 bool has_escapes;	239 bool has_escapes;

241 };	240 };

242	241

243 struct LiteralDesc {	242 struct LiteralDesc {

244 int beg_pos;	243 int beg_pos;

245 bool is_ascii;	244 bool is_ascii;

	245 bool is_in_buffer;

	246 int offset;

246 int length;	247 int length;

247 Vector<const char> ascii_string;	248 Vector<const char> ascii_string;

248 Vector<const uc16> utf16_string;	249 Vector<const uc16> utf16_string;

249 LiteralBuffer buffer;	250 LiteralBuffer buffer;

250 LiteralDesc() : beg_pos(-1), is_ascii(false), length(0) { }	251 LiteralDesc() : beg_pos(-1), is_ascii(false), is_in_buffer(false),

	252 offset(0), length(0) { }

251 bool Valid(int pos) { return beg_pos == pos; }	253 bool Valid(int pos) { return beg_pos == pos; }

252 };	254 };

253	255

254 virtual void Scan() = 0;	256 virtual void Scan() = 0;

255 virtual bool FillLiteral(const TokenDesc& token, LiteralDesc* literal) = 0;	257 virtual bool FillLiteral(const TokenDesc& token, LiteralDesc* literal) = 0;

256	258

257 void ResetLiterals() {	259 void ResetLiterals() {

258 current_literal_->beg_pos = -1;	260 if (!current_literal_->is_in_buffer) current_literal_->beg_pos = -1;

259 next_literal_->beg_pos = -1;	261 if (!next_literal_->is_in_buffer) next_literal_->beg_pos = -1;
	marja 2014/01/20 07:54:33 Shouldn't we reset is_in_buffer here too? Why not? Shouldn't we reset is_in_buffer here too? Why not? ulan 2014/01/20 09:15:00 beg_pos == -1 indicates an invalid buffer, for inv Show quoted text On 2014/01/20 07:54:33, marja wrote: > Shouldn't we reset is_in_buffer here too? Why not? beg_pos == -1 indicates an invalid buffer, for invalid buffer we do not look at is_in_buffer.
260 }	262 }

261	263

	264 void EnsureCurrentLiteralIsValid() {

	265 if (!current_literal_->Valid(current_.beg_pos)) {

	266 FillLiteral(current_, current_literal_);

	267 }

	268 }

	269

	270 void EnsureNextLiteralIsValid() {

	271 if (!next_literal_->Valid(next_.beg_pos)) {

	272 FillLiteral(next_, next_literal_);

	273 }

	274 }

	275

	276 virtual Handle<String> InternalizeLiteral(LiteralDesc* literal) = 0;

	277 virtual Handle<String> AllocateLiteral(LiteralDesc* literal,

	278 PretenureFlag tenured) = 0;

	279

262 Isolate* isolate_;	280 Isolate* isolate_;

263 UnicodeCache* unicode_cache_;	281 UnicodeCache* unicode_cache_;

264	282

265 bool has_line_terminator_before_next_;	283 bool has_line_terminator_before_next_;

266 // Whether there is a multiline comment with a line break before the next	284 // Whether there is a multiline comment with a line break before the next

267 // token.	285 // token.

268 bool has_multiline_comment_before_next_;	286 bool has_multiline_comment_before_next_;

269	287

270 TokenDesc current_; // desc for current token (as returned by Next())	288 TokenDesc current_; // desc for current token (as returned by Next())

271 TokenDesc next_; // desc for next token (one token look-ahead)	289 TokenDesc next_; // desc for next token (one token look-ahead)

(...skipping 60 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
332 ResetLiterals();	350 ResetLiterals();

333 }	351 }

334 }	352 }

335	353

336 protected:	354 protected:

337 virtual void Scan();	355 virtual void Scan();

338	356

339 const Char* GetNewBufferBasedOnHandle() const;	357 const Char* GetNewBufferBasedOnHandle() const;

340	358

341 virtual bool FillLiteral(const TokenDesc& token, LiteralDesc* literal);	359 virtual bool FillLiteral(const TokenDesc& token, LiteralDesc* literal);

	360 virtual Handle<String> InternalizeLiteral(LiteralDesc* literal);

	361 virtual Handle<String> AllocateLiteral(LiteralDesc* literal,

	362 PretenureFlag tenured);

	363

342	364

343 private:	365 private:

344 bool ValidIdentifierPart() {	366 bool ValidIdentifierPart() {

345 return unicode_cache_->IsIdentifierPart(ScanHexNumber(4));	367 return unicode_cache_->IsIdentifierPart(ScanHexNumber(4));

346 }	368 }

347	369

348 bool ValidIdentifierStart() {	370 bool ValidIdentifierStart() {

349 return unicode_cache_->IsIdentifierStart(ScanHexNumber(4));	371 return unicode_cache_->IsIdentifierStart(ScanHexNumber(4));

350 }	372 }

351	373

352 uc32 ScanHexNumber(int length);	374 uc32 ScanHexNumber(int length);

353 bool ScanLiteralUnicodeEscape();	375 bool ScanLiteralUnicodeEscape();

354	376

355 const Char* ScanHexNumber(const Char* start,	377 const Char* ScanHexNumber(const Char* start,

356 const Char* end,	378 const Char* end,

357 uc32* result);	379 uc32* result);

358 const Char* ScanOctalEscape(const Char* start,	380 const Char* ScanOctalEscape(const Char* start,

359 const Char* end,	381 const Char* end,

360 uc32* result);	382 uc32* result);

361 const Char* ScanIdentifierUnicodeEscape(const Char* start,	383 const Char* ScanIdentifierUnicodeEscape(const Char* start,

362 const Char* end,	384 const Char* end,

363 uc32* result);	385 uc32* result);

364 const Char* ScanEscape(const Char* start,	386 const Char* ScanEscape(const Char* start,

365 const Char* end,	387 const Char* end,

366 LiteralBuffer* literal);	388 LiteralBuffer* literal);

367	389

	390 bool IsSubstringOfSource(const TokenDesc& token);

	391

	392 bool CopyToLiteralBuffer(const Char* start,

	393 const Char* end,

	394 const TokenDesc& token,

	395 LiteralDesc* literal);

	396

368 Handle<String> source_handle_;	397 Handle<String> source_handle_;

369 const Char* buffer_;	398 const Char* buffer_;

370 const Char* buffer_end_;	399 const Char* buffer_end_;

371 const Char* start_;	400 const Char* start_;

372 const Char* cursor_;	401 const Char* cursor_;

373	402

374 // Where we have seen the last octal number or an octal escape inside a	403 // Where we have seen the last octal number or an octal escape inside a

375 // string. Used by octal_position().	404 // string. Used by octal_position().

376 const Char* last_octal_end_;	405 const Char* last_octal_end_;

377 };	406 };

(...skipping 228 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
606 // character.	635 // character.

607 const Char* temp_cursor = last_octal_end_ - 1;	636 const Char* temp_cursor = last_octal_end_ - 1;

608 while (temp_cursor >= buffer_ && temp_cursor >= '0' && temp_cursor <= '7')	637 while (temp_cursor >= buffer_ && temp_cursor >= '0' && temp_cursor <= '7')

609 --temp_cursor;	638 --temp_cursor;

610 return Location(temp_cursor - buffer_ + 1, last_octal_end_ - buffer_);	639 return Location(temp_cursor - buffer_ + 1, last_octal_end_ - buffer_);

611 }	640 }

612	641

613 } }	642 } }

614	643

615 #endif // V8_LEXER_EXPERIMENTAL_SCANNER_H	644 #endif // V8_LEXER_EXPERIMENTAL_SCANNER_H

OLD	NEW

« no previous file with comments | « src/factory.cc ('k') | src/lexer/experimental-scanner.cc » ('j') | src/lexer/experimental-scanner.cc » ('J')