src/lexer/lexer.h - Issue 187603004: Experimental parser: make utf8 sort of work

Side by Side Diff: src/lexer/lexer.h

Issue 187603004: Experimental parser: make utf8 sort of work (Closed) Base URL: https://v8.googlecode.com/svn/branches/experimental/parser

Patch Set: Created 6 years, 9 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 // Copyright 2013 the V8 project authors. All rights reserved.	1 // Copyright 2013 the V8 project authors. All rights reserved.

2 // Redistribution and use in source and binary forms, with or without	2 // Redistribution and use in source and binary forms, with or without

3 // modification, are permitted provided that the following conditions are	3 // modification, are permitted provided that the following conditions are

4 // met:	4 // met:

5 //	5 //

6 // * Redistributions of source code must retain the above copyright	6 // * Redistributions of source code must retain the above copyright

7 // notice, this list of conditions and the following disclaimer.	7 // notice, this list of conditions and the following disclaimer.

8 // * Redistributions in binary form must reproduce the above	8 // * Redistributions in binary form must reproduce the above

9 // copyright notice, this list of conditions and the following	9 // copyright notice, this list of conditions and the following

10 // disclaimer in the documentation and/or other materials provided	10 // disclaimer in the documentation and/or other materials provided

(...skipping 39 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
50 LexerSet lexers_;	50 LexerSet lexers_;

51 };	51 };

52	52

53	53

54 class LexerBase {	54 class LexerBase {

55 public:	55 public:

56 struct Location {	56 struct Location {

57 Location(int b, int e) : beg_pos(b), end_pos(e) { }	57 Location(int b, int e) : beg_pos(b), end_pos(e) { }

58 Location() : beg_pos(0), end_pos(0) { }	58 Location() : beg_pos(0), end_pos(0) { }

59	59

60 bool IsValid() const {	60 bool IsValid() const { return beg_pos >= 0 && end_pos >= beg_pos; }

61 return beg_pos >= 0 && end_pos >= beg_pos;

62 }

63

64 static Location invalid() { return Location(-1, -1); }	61 static Location invalid() { return Location(-1, -1); }

65	62

66 int beg_pos;	63 int beg_pos;

67 int end_pos;	64 int end_pos;

68 };	65 };

69	66

70 explicit LexerBase(UnicodeCache* unicode_cache);	67 explicit LexerBase(UnicodeCache* unicode_cache);

71	68

72 virtual ~LexerBase();	69 virtual ~LexerBase();

73	70

(...skipping 39 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
113	110

114 // Returns true if there was a line terminator before the peek'ed token,	111 // Returns true if there was a line terminator before the peek'ed token,

115 // possibly inside a multi-line comment.	112 // possibly inside a multi-line comment.

116 bool HasAnyLineTerminatorBeforeNext() const {	113 bool HasAnyLineTerminatorBeforeNext() const {

117 return has_line_terminator_before_next_ \|\|	114 return has_line_terminator_before_next_ \|\|

118 has_multiline_comment_before_next_;	115 has_multiline_comment_before_next_;

119 }	116 }

120	117

121 Vector<const uint8_t> literal_one_byte_string() {	118 Vector<const uint8_t> literal_one_byte_string() {

122 EnsureCurrentLiteralIsValid();	119 EnsureCurrentLiteralIsValid();

123 return current_literal_->one_byte_string;	120 return current_literal_->one_byte_string();

124 }	121 }

125	122

126 Vector<const uint16_t> literal_two_byte_string() {	123 Vector<const uint16_t> literal_two_byte_string() {

127 EnsureCurrentLiteralIsValid();	124 EnsureCurrentLiteralIsValid();

128 return current_literal_->two_byte_string;	125 return current_literal_->two_byte_string();

129 }	126 }

130	127

131 int literal_length() {	128 int literal_length() {

132 EnsureCurrentLiteralIsValid();	129 EnsureCurrentLiteralIsValid();

133 return current_literal_->length;	130 return current_literal_->length;

134 }	131 }

135	132

136 bool is_literal_one_byte() {	133 bool is_literal_one_byte() {

137 EnsureCurrentLiteralIsValid();	134 EnsureCurrentLiteralIsValid();

138 return current_literal_->is_one_byte;	135 return current_literal_->is_one_byte();

139 }	136 }

140	137

141 bool is_literal_contextual_keyword(Vector<const uint8_t> keyword) {	138 bool is_literal_contextual_keyword(Vector<const uint8_t> keyword) {

142 if (!is_literal_one_byte()) return false;	139 if (!is_literal_one_byte()) return false;

143 Vector<const uint8_t> literal = literal_one_byte_string();	140 Vector<const uint8_t> literal = literal_one_byte_string();

144 return literal.length() == keyword.length() &&	141 return literal.length() == keyword.length() &&

145 (memcmp(literal.start(), keyword.start(), literal.length()) == 0);	142 (memcmp(literal.start(), keyword.start(), literal.length()) == 0);

146 }	143 }

147	144

148 bool literal_contains_escapes() const {	145 bool literal_contains_escapes() const {

149 return current_.has_escapes;	146 return current_.has_escapes;

150 }	147 }

151	148

152 Vector<const uint8_t> next_literal_one_byte_string() {	149 Vector<const uint8_t> next_literal_one_byte_string() {

153 EnsureNextLiteralIsValid();	150 EnsureNextLiteralIsValid();

154 return next_literal_->one_byte_string;	151 return next_literal_->one_byte_string();

155 }	152 }

156	153

157 Vector<const uint16_t> next_literal_two_byte_string() {	154 Vector<const uint16_t> next_literal_two_byte_string() {

158 EnsureNextLiteralIsValid();	155 EnsureNextLiteralIsValid();

159 return next_literal_->two_byte_string;	156 return next_literal_->two_byte_string();

160 }	157 }

161	158

162 int next_literal_length() {	159 int next_literal_length() {

163 EnsureNextLiteralIsValid();	160 EnsureNextLiteralIsValid();

164 return next_literal_->length;	161 return next_literal_->length;

165 }	162 }

166	163

167 bool is_next_literal_one_byte() {	164 bool is_next_literal_one_byte() {

168 EnsureNextLiteralIsValid();	165 EnsureNextLiteralIsValid();

169 return next_literal_->is_one_byte;	166 return next_literal_->is_one_byte();

170 }	167 }

171	168

172 bool is_next_contextual_keyword(Vector<const uint8_t> keyword) {	169 bool is_next_contextual_keyword(Vector<const uint8_t> keyword) {

173 if (!is_next_literal_one_byte()) return false;	170 if (!is_next_literal_one_byte()) return false;

174 Vector<const uint8_t> literal = next_literal_one_byte_string();	171 Vector<const uint8_t> literal = next_literal_one_byte_string();

175 return literal.length() == keyword.length() &&	172 return literal.length() == keyword.length() &&

176 (memcmp(literal.start(), keyword.start(), literal.length()) == 0);	173 (memcmp(literal.start(), keyword.start(), literal.length()) == 0);

177 }	174 }

178	175

179 bool HarmonyScoping() const {	176 bool HarmonyScoping() const {

(...skipping 15 matching lines...) Expand all Loading...
195 bool HarmonyNumericLiterals() const {	192 bool HarmonyNumericLiterals() const {

196 return harmony_numeric_literals_;	193 return harmony_numeric_literals_;

197 }	194 }

198	195

199 void SetHarmonyNumericLiterals(bool numeric_literals) {	196 void SetHarmonyNumericLiterals(bool numeric_literals) {

200 harmony_numeric_literals_ = numeric_literals;	197 harmony_numeric_literals_ = numeric_literals;

201 }	198 }

202	199

203 UnicodeCache* unicode_cache() { return unicode_cache_; }	200 UnicodeCache* unicode_cache() { return unicode_cache_; }

204	201

	202 class LiteralDesc {

	203 public:

	204 LiteralDesc()

	205 : beg_pos(-1),

	206 offset(0),

	207 length(0),

	208 is_one_byte_(false),

	209 is_in_buffer_(false),

	210 is_one_byte_string_owned_(false) // TODO(dcarney): move to buffer

	211 { }

	212

	213 ~LiteralDesc() {

	214 if (is_one_byte_string_owned_) {

	215 one_byte_string_.Dispose();

	216 }

	217 }

	218

	219 inline bool is_one_byte() { return is_one_byte_; }

	220 inline Vector<const uint8_t> one_byte_string() {

	221 ASSERT(is_one_byte_);

	222 return one_byte_string_;

	223 }

	224 inline Vector<const uint16_t> two_byte_string() {

	225 ASSERT(!is_one_byte_);

	226 return two_byte_string_;

	227 }

	228

	229 inline bool Valid(int pos) { return beg_pos == pos; }

	230 inline void Invalidate() { if (is_in_buffer_) beg_pos = -1; }

	231

	232 // TODO(dcarney): make private as well.

	233 int beg_pos;

	234 int offset;

	235 int length;

	236 LiteralBuffer buffer;

	237

	238 void SetOneByteString(Vector<const uint8_t> string, bool owned);

	239 void SetTwoByteString(Vector<const uint16_t> string);

	240 void SetStringFromLiteralBuffer();

	241

	242 private:

	243 bool is_one_byte_;

	244 bool is_in_buffer_;

	245 bool is_one_byte_string_owned_;

	246 Vector<const uint8_t> one_byte_string_;

	247 Vector<const uint16_t> two_byte_string_;

	248

	249 DISALLOW_COPY_AND_ASSIGN(LiteralDesc);

	250 };

	251

205 protected:	252 protected:

206 struct TokenDesc {	253 struct TokenDesc {

207 Token::Value token;

208 int beg_pos;	254 int beg_pos;

209 int end_pos;	255 int end_pos;

	256 Token::Value token;

210 bool has_escapes;	257 bool has_escapes;

211 bool is_onebyte;	258 bool is_onebyte;

212 };	259 };

213	260

214 struct LiteralDesc {

215 int beg_pos;

216 bool is_one_byte;

217 bool is_in_buffer;

218 int offset;

219 int length;

220 Vector<const uint8_t> one_byte_string;

221 Vector<const uint16_t> two_byte_string;

222 LiteralBuffer buffer;

223 LiteralDesc() : beg_pos(-1), is_one_byte(false), is_in_buffer(false),

224 offset(0), length(0) { }

225 bool Valid(int pos) { return beg_pos == pos; }

226 };

227

228 virtual void Scan() = 0;	261 virtual void Scan() = 0;

229

230 virtual void UpdateBufferBasedOnHandle() = 0;	262 virtual void UpdateBufferBasedOnHandle() = 0;

231 virtual bool FillLiteral(const TokenDesc& token, LiteralDesc* literal) = 0;	263 virtual bool FillLiteral(const TokenDesc& token, LiteralDesc* literal) = 0;

232 virtual Handle<String> InternalizeLiteral(LiteralDesc* literal) = 0;	264 virtual Handle<String> InternalizeLiteral(LiteralDesc* literal) = 0;

233 virtual Handle<String> AllocateLiteral(LiteralDesc* literal,	265 virtual Handle<String> AllocateLiteral(LiteralDesc* literal,

234 PretenureFlag tenured) = 0;	266 PretenureFlag tenured) = 0;

235	267

236 void ResetLiterals() {

237 if (!current_literal_->is_in_buffer) current_literal_->beg_pos = -1;

238 if (!next_literal_->is_in_buffer) next_literal_->beg_pos = -1;

239 }

240

241 void EnsureCurrentLiteralIsValid() {	268 void EnsureCurrentLiteralIsValid() {

242 if (!current_literal_->Valid(current_.beg_pos)) {	269 if (!current_literal_->Valid(current_.beg_pos)) {

243 FillLiteral(current_, current_literal_);	270 FillLiteral(current_, current_literal_);

244 }	271 }

245 }	272 }

246	273

247 void EnsureNextLiteralIsValid() {	274 void EnsureNextLiteralIsValid() {

248 if (!next_literal_->Valid(next_.beg_pos)) {	275 if (!next_literal_->Valid(next_.beg_pos)) {

249 FillLiteral(next_, next_literal_);	276 FillLiteral(next_, next_literal_);

250 }	277 }

251 }	278 }

252	279

253 UnicodeCache* unicode_cache_;	280 UnicodeCache* unicode_cache_;

	281 LiteralDesc* current_literal_;

	282 LiteralDesc* next_literal_;

	283 LiteralDesc literals_[2];

254	284

	285 TokenDesc current_; // desc for current token (as returned by Next())

	286 TokenDesc next_; // desc for next token (one token look-ahead)

	287

	288 // TODO(dcarney): encode flags in uint8_t

255 bool has_line_terminator_before_next_;	289 bool has_line_terminator_before_next_;

256 // Whether there is a multiline comment with a line break before the next	290 // Whether there is a multiline comment with a line break before the next

257 // token.	291 // token.

258 bool has_multiline_comment_before_next_;	292 bool has_multiline_comment_before_next_;

259

260 TokenDesc current_; // desc for current token (as returned by Next())

261 TokenDesc next_; // desc for next token (one token look-ahead)

262

263 LiteralDesc* current_literal_;

264 LiteralDesc* next_literal_;

265 LiteralDesc literals_[2];

266

267 bool harmony_numeric_literals_;	293 bool harmony_numeric_literals_;

268 bool harmony_modules_;	294 bool harmony_modules_;

269 bool harmony_scoping_;	295 bool harmony_scoping_;

270	296

271 friend class Scanner;	297 friend class Scanner;

272 friend class LexerGCHandler;	298 friend class LexerGCHandler;

273 };	299 };

274	300

275	301

276 template<typename Char>	302 template<typename Char>

277 class Lexer : public LexerBase {	303 class Lexer : public LexerBase {

278 public:	304 public:

279 Lexer(UnicodeCache* unicode_cache,	305 Lexer(UnicodeCache* unicode_cache,

280 Handle<String> source,	306 Handle<String> source,

281 int start_position,	307 int start_position,

282 int end_position);	308 int end_position);

283 Lexer(UnicodeCache* unicode_cache, const Char* source_ptr, int length);	309 Lexer(UnicodeCache* unicode_cache, const Char* source_ptr, int length);

284 virtual ~Lexer();	310 virtual ~Lexer();

285	311

286 virtual void SeekForward(int pos);	312 virtual void SeekForward(int pos);

287 virtual bool ScanRegExpPattern(bool seen_equal);	313 virtual bool ScanRegExpPattern(bool seen_equal);

288 virtual bool ScanRegExpFlags();	314 virtual bool ScanRegExpFlags();

289 virtual Location octal_position() const;	315 virtual Location octal_position() const;

290 virtual void clear_octal_position() { last_octal_end_ = NULL; }	316 virtual void clear_octal_position() { last_octal_end_ = NULL; }

291	317

292 protected:	318 protected:

293 virtual void Scan();	319 virtual void Scan();

294	320

	321 private:

	322 uc32 ScanHexNumber(int length);

	323

	324 bool ScanLiteralUnicodeEscape();

	325

295 const Char* GetNewBufferBasedOnHandle() const;	326 const Char* GetNewBufferBasedOnHandle() const;

296 virtual void UpdateBufferBasedOnHandle();	327 virtual void UpdateBufferBasedOnHandle();

297	328

298 virtual bool FillLiteral(const TokenDesc& token, LiteralDesc* literal);	329 virtual bool FillLiteral(const TokenDesc& token, LiteralDesc* literal);

299 virtual Handle<String> InternalizeLiteral(LiteralDesc* literal);	330 virtual Handle<String> InternalizeLiteral(LiteralDesc* literal);

300 virtual Handle<String> AllocateLiteral(LiteralDesc* literal,	331 virtual Handle<String> AllocateLiteral(LiteralDesc* literal,

301 PretenureFlag tenured);	332 PretenureFlag tenured);

302	333

303 private:	334 // Helper function for FillLiteral.

304 uc32 ScanHexNumber(int length);	335 template<bool is_one_byte>

305	336 static void SetLiteral(

306 bool ScanLiteralUnicodeEscape();	337 const Char* start, const Char* end, LiteralDesc* literal);

307

308 const Char* ScanHexNumber(const Char* start,

309 const Char* end,

310 uc32* result);

311 const Char* ScanOctalEscape(const Char* start,

312 const Char* end,

313 uc32* result);

314 const Char* ScanIdentifierUnicodeEscape(const Char* start,

315 const Char* end,

316 uc32* result);

317 const Char* ScanEscape(const Char* start,

318 const Char* end,

319 LiteralBuffer* literal);

320

321 // Returns true if the literal of the token can be represented as a

322 // substring of the source.

323 bool IsSubstringOfSource(const TokenDesc& token);

324	338

325 bool CopyToLiteralBuffer(const Char* start,	339 bool CopyToLiteralBuffer(const Char* start,

326 const Char* end,	340 const Char* end,

327 const TokenDesc& token,	341 const TokenDesc& token,

328 LiteralDesc* literal);	342 LiteralDesc* literal);

329	343

330 // One of source_handle_ or source_ptr_ is set.	344 // One of source_handle_ or source_ptr_ is set.

331 // If source_ptr_ is set, isolate_ is 0 and no isolate accesses are allowed.	345 // If source_ptr_ is set, isolate_ is 0 and no isolate accesses are allowed.

332 Isolate* isolate_;	346 Isolate* isolate_;

333 const Handle<String> source_handle_;	347 const Handle<String> source_handle_;

334 const Char* const source_ptr_;	348 const Char* const source_ptr_;

335 const int start_position_;

336 const int end_position_;	349 const int end_position_;

337 // Stream variables.	350 // Stream variables.

338 const Char* buffer_;	351 const Char* buffer_;

339 const Char* buffer_end_;	352 const Char* buffer_end_;

340 const Char* start_;	353 const Char* start_;

341 const Char* cursor_;	354 const Char* cursor_;

342 // Where we have seen the last octal number or an octal escape inside a	355 // Where we have seen the last octal number or an octal escape inside a

343 // string. Used by octal_position().	356 // string. Used by octal_position().

344 const Char* last_octal_end_;	357 const Char* last_octal_end_;

345 };	358 };

(...skipping 126 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
472 bool harmony_scoping_;	485 bool harmony_scoping_;

473 };	486 };

474	487

475	488

476 #endif	489 #endif

477	490

478	491

479 } }	492 } }

480	493

481 #endif // V8_LEXER_EXPERIMENTAL_SCANNER_H	494 #endif // V8_LEXER_EXPERIMENTAL_SCANNER_H

OLD	NEW

« no previous file with comments | « no previous file | src/lexer/lexer.cc » ('j') | no next file with comments »