src/asmjs/asm-scanner.cc - Issue 2751693002: [wasm][asm.js] Adding custom asm.js lexer.

Side by Side Diff: src/asmjs/asm-scanner.cc

Issue 2751693002: [wasm][asm.js] Adding custom asm.js lexer. (Closed)

Patch Set: fix Created 3 years, 9 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
(Empty)
	1 // Copyright 2017 the V8 project authors. All rights reserved.

	2 // Use of this source code is governed by a BSD-style license that can be

	3 // found in the LICENSE file.

	4

	5 #include "src/asmjs/asm-scanner.h"

	6

	7 #include "src/conversions.h"

	8 #include "src/flags.h"

	9 #include "src/parsing/scanner.h"

	10 #include "src/unicode-cache.h"

	11

	12 namespace v8 {

	13 namespace internal {

	14

	15 namespace {

	16 // Cap number of identifiers to ensure we can assign both global and

	17 // local ones a token id in the range of an int32_t.

	18 static const int kMaxIdentifierCount = 0xf000000;

	19 };

	20

	21 AsmJsScanner::AsmJsScanner()

	22 : token_(kUninitialized),

	23 preceding_token_(kUninitialized),

	24 next_token_(kUninitialized),

	25 rewind_(false),

	26 in_local_scope_(false),

	27 global_count_(0),

	28 double_value_(0.0),

	29 unsigned_value_(0),

	30 preceded_by_newline_(false) {

	31 #define V(name, _junk1, _junk2, _junk3) property_names_[#name] = kToken_##name;

	32 STDLIB_MATH_FUNCTION_LIST(V)

	33 STDLIB_ARRAY_TYPE_LIST(V)

	34 #undef V

	35 #define V(name) property_names_[#name] = kToken_##name;

	36 STDLIB_MATH_VALUE_LIST(V)

	37 STDLIB_OTHER_LIST(V)

	38 #undef V

	39 #define V(name) global_names_[#name] = kToken_##name;

	40 KEYWORD_NAME_LIST(V)

	41 #undef V

	42 }

	43

	44 void AsmJsScanner::SetStream(std::unique_ptr<Utf16CharacterStream> stream) {

	45 stream_ = std::move(stream);

	46 Next();

	47 }

	48

	49 void AsmJsScanner::Next() {

	50 if (rewind_) {

	51 preceding_token_ = token_;

	52 token_ = next_token_;

	53 next_token_ = kUninitialized;

	54 rewind_ = false;

	55 return;

	56 }

	57

	58 if (token_ == kEndOfInput \|\| token_ == kParseError) {

	59 return;

	60 }

	61

	62 #if DEBUG

	63 if (FLAG_trace_asm_scanner) {

	64 if (Token() == kDouble) {

	65 PrintF("%lf ", AsDouble());

	66 } else if (Token() == kUnsigned) {

	67 PrintF("%" PRIu64 " ", AsUnsigned());

	68 } else {

	69 std::string name = Name(Token());

	70 PrintF("%s ", name.c_str());

	71 }

	72 }

	73 #endif

	74

	75 preceded_by_newline_ = false;

	76 preceding_token_ = token_;

	77 for (;;) {

	78 uc32 ch = stream_->Advance();

	79 switch (ch) {

	80 case ' ':

	81 case '\t':

	82 case '\r':

	83 // Ignore whitespace.

	84 break;

	85

	86 case '\n':

	87 // Track when we've passed a newline for optional semicolon support,

	88 // but keep scanning.

	89 preceded_by_newline_ = true;

	90 break;

	91

	92 case kEndOfInput:

	93 token_ = kEndOfInput;

	94 return;

	95

	96 case '\'':

	97 case '"':

	98 ConsumeString(ch);

	99 return;

	100

	101 case '/':

	102 ch = stream_->Advance();

	103 if (ch == '/') {

	104 ConsumeCPPComment();

	105 } else if (ch == '*') {

	106 if (!ConsumeCComment()) {

	107 token_ = kParseError;

	108 return;

	109 }

	110 } else {

	111 stream_->Back();

	112 token_ = '/';

	113 return;

	114 }

	115 // Breaks out of switch, but loops again (i.e. the case when we parsed

	116 // a comment, but need to continue to look for the next token).

	117 break;

	118

	119 case '<':

	120 case '>':

	121 case '=':

	122 case '!':

	123 ConsumeCompareOrShift(ch);

	124 return;

	125

	126 #define V(single_char_token) case single_char_token:

	127 SIMPLE_SINGLE_TOKEN_LIST(V)

	128 #undef V

	129 // Use fixed token IDs for ASCII.

	130 token_ = ch;

	131 return;

	132

	133 default:

	134 if (IsIdentifierStart(ch)) {

	135 ConsumeIdentifier(ch);

	136 } else if (IsNumberStart(ch)) {

	137 ConsumeNumber(ch);

	138 } else {

	139 // TODO(bradnelson): Support unicode (probably via UnicodeCache).

	140 token_ = kParseError;

	141 }

	142 return;

	143 }

	144 }

	145 }

	146

	147 void AsmJsScanner::Rewind() {

	148 DCHECK(!rewind_);

	149 next_token_ = token_;

	150 token_ = preceding_token_;

	151 preceding_token_ = kUninitialized;

	152 rewind_ = true;

	153 preceded_by_newline_ = false;

	154 identifier_string_.clear();

	155 }

	156

	157 void AsmJsScanner::ResetLocals() { local_names_.clear(); }

	158

	159 #if DEBUG

	160 // Only used for debugging.

	161 std::string AsmJsScanner::Name(token_t token) const {

	162 if (token >= 32 && token < 127) {

	163 return std::string(1, static_cast<char>(token));

	164 }

	165 for (auto& i : local_names_) {

	166 if (i.second == token) {

	167 return i.first;

	168 }

	169 }

	170 for (auto& i : global_names_) {

	171 if (i.second == token) {

	172 return i.first;

	173 }

	174 }

	175 for (auto& i : property_names_) {

	176 if (i.second == token) {

	177 return i.first;

	178 }

	179 }

	180 switch (token) {

	181 #define V(rawname, name) \

	182 case kToken_##name: \

	183 return rawname;

	184 LONG_SYMBOL_NAME_LIST(V)

	185 #undef V

	186 #define V(name, value, string_name) \

	187 case name: \

	188 return string_name;

	189 SPECIAL_TOKEN_LIST(V)

	190 default:

	191 break;

	192 }

	193 UNREACHABLE();

	194 return "{unreachable}";

	195 }

	196 #endif

	197

	198 int AsmJsScanner::GetPosition() const {

	199 DCHECK(!rewind_);

	200 return static_cast<int>(stream_->pos());

	201 }

	202

	203 void AsmJsScanner::Seek(int pos) {

	204 stream_->Seek(pos);

	205 preceding_token_ = kUninitialized;

	206 token_ = kUninitialized;

	207 next_token_ = kUninitialized;

	208 rewind_ = false;

	209 Next();

	210 }

	211

	212 void AsmJsScanner::ConsumeIdentifier(uc32 ch) {

	213 // Consume characters while still part of the identifier.

	214 identifier_string_.clear();

	215 while (IsIdentifierPart(ch)) {

	216 identifier_string_ += ch;

	217 ch = stream_->Advance();

	218 }

	219 // Go back one for next time.

	220 stream_->Back();

	221

	222 // Decode what the identifier means.

	223 if (preceding_token_ == '.') {

	224 auto i = property_names_.find(identifier_string_);

	225 if (i != property_names_.end()) {

	226 token_ = i->second;

	227 return;

	228 }

	229 } else {

	230 {

	231 auto i = local_names_.find(identifier_string_);

	232 if (i != local_names_.end()) {

	233 token_ = i->second;

	234 return;

	235 }

	236 }

	237 if (!in_local_scope_) {

	238 auto i = global_names_.find(identifier_string_);

	239 if (i != global_names_.end()) {

	240 token_ = i->second;

	241 return;

	242 }

	243 }

	244 }

	245 if (preceding_token_ == '.') {

	246 CHECK(global_count_ < kMaxIdentifierCount);

	247 token_ = kGlobalsStart + global_count_++;

	248 property_names_[identifier_string_] = token_;

	249 } else if (in_local_scope_) {

	250 CHECK(local_names_.size() < kMaxIdentifierCount);

	251 token_ = kLocalsStart - static_cast<token_t>(local_names_.size());

	252 local_names_[identifier_string_] = token_;

	253 } else {

	254 CHECK(global_count_ < kMaxIdentifierCount);

	255 token_ = kGlobalsStart + global_count_++;

	256 global_names_[identifier_string_] = token_;

	257 }

	258 }

	259

	260 void AsmJsScanner::ConsumeNumber(uc32 ch) {

	261 std::string number;

	262 number = ch;

	263 bool has_dot = ch == '.';

	264 for (;;) {

	265 ch = stream_->Advance();

	266 if ((ch >= '0' && ch <= '9') \|\| (ch >= 'a' && ch <= 'f') \|\|

	267 (ch >= 'A' && ch <= 'F') \|\| ch == '.' \|\| ch == 'b' \|\| ch == 'o' \|\|

	268 ch == 'x' \|\|

	269 ((ch == '-' \|\| ch == '+') && (number[number.size() - 1] == 'e' \|\|

	270 number[number.size() - 1] == 'E'))) {

	271 // TODO(bradnelson): Test weird cases ending in -.

	272 if (ch == '.') {

	273 has_dot = true;

	274 }

	275 number.push_back(ch);

	276 } else {

	277 break;

	278 }

	279 }

	280 stream_->Back();

	281 // Special case the most common number.

	282 if (number.size() == 1 && number[0] == '0') {

	283 unsigned_value_ = 0;

	284 token_ = kUnsigned;

	285 return;

	286 }

	287 // Pick out dot.

	288 if (number.size() == 1 && number[0] == '.') {

	289 token_ = '.';

	290 return;

	291 }

	292 // Decode numbers.

	293 UnicodeCache cache;

	294 double_value_ = StringToDouble(

	295 &cache,

	296 Vector<uint8_t>(

	297 const_cast<uint8_t>(reinterpret_cast<const uint8_t>(number.data())),

	298 number.size()),

	299 ALLOW_HEX \| ALLOW_OCTAL \| ALLOW_BINARY \| ALLOW_IMPLICIT_OCTAL);

	300 if (std::isnan(double_value_)) {

	301 // Check if string to number conversion didn't consume all the characters.

	302 // This happens if the character filter let through something invalid

	303 // like: 0123ef for example.

	304 // TODO(bradnelson): Check if this happens often enough to be a perf

	305 // problem.

	306 if (number[0] == '.') {

	307 for (size_t k = 1; k < number.size(); ++k) {

	308 stream_->Back();

	309 }

	310 token_ = '.';

	311 return;

	312 }

	313 // Anything else that doesn't parse is an error.

	314 token_ = kParseError;

	315 return;

	316 }

	317 if (has_dot) {

	318 token_ = kDouble;

	319 } else {

	320 unsigned_value_ = static_cast<uint32_t>(double_value_);

	321 token_ = kUnsigned;

	322 }

	323 }

	324

	325 bool AsmJsScanner::ConsumeCComment() {

	326 for (;;) {

	327 uc32 ch = stream_->Advance();

	328 while (ch == '*') {

	329 ch = stream_->Advance();

	330 if (ch == '/') {

	331 return true;

	332 }

	333 if (ch == kEndOfInput) {
	vogelheim 2017/03/16 12:46:47 I think you can just drop this if. If ch is kEndO I think you can just drop this if. If ch is kEndOfInput here, it would exit the while loop and run into the if right after it, which does the same check. bradn 2017/03/16 17:03:15 Done. Show quoted text On 2017/03/16 12:46:47, vogelheim wrote: > I think you can just drop this if. > > If ch is kEndOfInput here, it would exit the while loop and run into the if > right after it, which does the same check. Done.
	334 return false;

	335 }

	336 }

	337 if (ch == kEndOfInput) {

	338 return false;

	339 }

	340 }

	341 }

	342

	343 void AsmJsScanner::ConsumeCPPComment() {

	344 for (;;) {

	345 uc32 ch = stream_->Advance();

	346 if (ch == '\n' \|\| ch == kEndOfInput) {

	347 return;

	348 }

	349 }

	350 }

	351

	352 void AsmJsScanner::ConsumeString(uc32 quote) {

	353 // Only string allowed is 'use asm' / "use asm".

	354 const char* expected = "use asm";

	355 for (; *expected != '\0'; ++expected) {

	356 if (stream_->Advance() != *expected) {

	357 token_ = kParseError;

	358 return;

	359 }

	360 }

	361 if (stream_->Advance() != quote) {

	362 token_ = kParseError;

	363 return;

	364 }

	365 token_ = kToken_UseAsm;

	366 }

	367

	368 void AsmJsScanner::ConsumeCompareOrShift(uc32 ch) {

	369 uc32 next_ch = stream_->Advance();

	370 if (next_ch == '=') {

	371 switch (ch) {

	372 case '<':

	373 token_ = kToken_LE;

	374 break;

	375 case '>':

	376 token_ = kToken_GE;

	377 break;

	378 case '=':

	379 token_ = kToken_EQ;

	380 break;

	381 case '!':

	382 token_ = kToken_NE;

	383 break;

	384 default:

	385 UNREACHABLE();

	386 }

	387 } else if (ch == '<' && next_ch == '<') {

	388 token_ = kToken_SHL;

	389 } else if (ch == '>' && next_ch == '>') {

	390 if (stream_->Advance() == '>') {

	391 token_ = kToken_SHR;

	392 } else {

	393 token_ = kToken_SAR;

	394 stream_->Back();

	395 }

	396 } else {

	397 stream_->Back();

	398 token_ = ch;

	399 }

	400 }

	401

	402 bool AsmJsScanner::IsIdentifierStart(uc32 ch) {

	403 return (ch >= 'A' && ch <= 'Z') \|\| (ch >= 'a' && ch <= 'z') \|\| ch == '_' \|\|

	404 ch == '$';

	405 }

	406

	407 bool AsmJsScanner::IsIdentifierPart(uc32 ch) {

	408 return IsIdentifierStart(ch) \|\| (ch >= '0' && ch <= '9');

	409 }

	410

	411 bool AsmJsScanner::IsNumberStart(uc32 ch) {

	412 return ch == '.' \|\| (ch >= '0' && ch <= '9');

	413 }

	414

	415 } // namespace internal

	416 } // namespace v8

OLD	NEW

« src/asmjs/asm-scanner.h ('K') | « src/asmjs/asm-scanner.h ('k') | src/flag-definitions.h » ('j') | src/flag-definitions.h » ('J')