Chromium Code Reviews| OLD | NEW |
|---|---|
| (Empty) | |
| 1 // Copyright 2017 the V8 project authors. All rights reserved. | |
| 2 // Use of this source code is governed by a BSD-style license that can be | |
| 3 // found in the LICENSE file. | |
| 4 | |
| 5 #include "src/asmjs/asm-lexer.h" | |
| 6 | |
| 7 #include <stdlib.h> | |
| 8 | |
| 9 #include "src/objects.h" | |
|
marja
2017/03/15 12:34:49
Why is objects.h needed?
bradn
2017/03/16 00:21:46
There was a Handle<String> used in scanner.h inlin
| |
| 10 #include "src/parsing/scanner-character-streams.h" | |
|
vogelheim
2017/03/15 12:07:41
I don't see scanner-character-streams.h being used
bradn
2017/03/16 00:21:47
Dropped.
| |
| 11 #include "src/parsing/scanner.h" | |
|
marja
2017/03/15 12:34:49
Hmm, you're still including scanner.h even though
bradn
2017/03/16 00:21:47
That was in the header.
This is needed here becaus
marja
2017/03/16 17:05:33
My orig. comment suggested moving the streams out
| |
| 12 | |
| 13 namespace v8 { | |
| 14 namespace internal { | |
| 15 | |
| 16 namespace { | |
| 17 // Cap number of identifiers to ensure we can assign both global and | |
| 18 // local ones a token id in the range of an int32_t. | |
| 19 static const int kMaxIdentifierCount = 0xf000000; | |
| 20 }; | |
| 21 | |
| 22 AsmJsLexer::AsmJsLexer() | |
| 23 : token_(0), | |
| 24 preceding_token_(0), | |
| 25 next_token_(0), | |
| 26 rewind_(false), | |
| 27 in_local_scope_(false), | |
| 28 global_count_(0), | |
| 29 double_value_(0.0), | |
| 30 unsigned_value_(0), | |
| 31 preceded_by_newline_(false) { | |
| 32 #define V(name, _junk1, _junk2, _junk3) property_names_[#name] = kToken_##name; | |
| 33 STDLIB_MATH_FUNCTION_LIST(V) | |
| 34 STDLIB_ARRAY_TYPE_LIST(V) | |
| 35 #undef V | |
| 36 #define V(name) property_names_[#name] = kToken_##name; | |
| 37 STDLIB_MATH_VALUE_LIST(V) | |
| 38 STDLIB_OTHER_LIST(V) | |
| 39 #undef V | |
| 40 #define V(name) global_names_[#name] = kToken_##name; | |
| 41 KEYWORD_NAME_LIST(V) | |
| 42 #undef V | |
| 43 } | |
| 44 | |
| 45 void AsmJsLexer::SetStream(std::unique_ptr<Utf16CharacterStream> stream) { | |
| 46 stream_ = std::move(stream); | |
| 47 Next(); | |
| 48 } | |
| 49 | |
| 50 void AsmJsLexer::Next() { | |
|
vogelheim
2017/03/15 12:07:40
I find this method much nicer to read now. Thanks.
bradn
2017/03/16 00:21:47
:-)
| |
| 51 if (rewind_) { | |
| 52 preceding_token_ = token_; | |
| 53 token_ = next_token_; | |
| 54 next_token_ = 0; | |
| 55 rewind_ = false; | |
| 56 return; | |
| 57 } | |
| 58 | |
| 59 if (token_ == kEndOfInput || token_ == kParseError) { | |
| 60 return; | |
| 61 } | |
| 62 | |
| 63 #if DEBUG | |
| 64 if (FLAG_trace_asm_lexer) { | |
| 65 if (Token() != 0) { | |
|
vogelheim
2017/03/15 12:07:41
nitpick: No real problem here, but this logic is a
bradn
2017/03/16 00:21:47
Done.
| |
| 66 if (Token() == kDouble) { | |
| 67 PrintF("%lf ", AsDouble()); | |
| 68 } else if (Token() == kUnsigned) { | |
| 69 PrintF("%" PRIu64 " ", AsUnsigned()); | |
| 70 } else { | |
| 71 std::string name = Name(Token()); | |
| 72 PrintF("%s ", name.c_str()); | |
| 73 } | |
| 74 } | |
| 75 } | |
| 76 #endif | |
| 77 | |
| 78 preceded_by_newline_ = false; | |
| 79 preceding_token_ = token_; | |
| 80 for (;;) { | |
| 81 uc32 ch = stream_->Advance(); | |
| 82 switch (ch) { | |
| 83 case ' ': | |
| 84 case '\t': | |
| 85 case '\n': | |
| 86 case '\r': | |
| 87 // Ignore whitespace, track when we've passed a newline for optional | |
| 88 // semicolon support. | |
| 89 if (ch == '\n') { | |
|
vogelheim
2017/03/15 12:07:41
nitpick: This is weird. If you have a switch-case
bradn
2017/03/16 00:21:46
Hah, yeah good point (missed that in the refactor)
| |
| 90 preceded_by_newline_ = true; | |
| 91 } | |
| 92 break; | |
| 93 | |
| 94 case kEndOfInput: | |
| 95 token_ = kEndOfInput; | |
| 96 return; | |
| 97 | |
| 98 case '\'': | |
| 99 case '"': | |
| 100 ConsumeString(ch); | |
| 101 return; | |
| 102 | |
| 103 case '/': | |
| 104 ch = stream_->Advance(); | |
| 105 if (ch == '/') { | |
| 106 ConsumeCComment(); | |
| 107 } else if (ch == '*') { | |
| 108 ConsumeCPPComment(); | |
| 109 } else { | |
| 110 stream_->Back(); | |
| 111 token_ = '/'; | |
| 112 return; | |
| 113 } | |
| 114 // Breaks out of switch, but loops again (i.e. the case when we parsed | |
| 115 // a comment, but need to continue to look for the next token). | |
| 116 break; | |
| 117 | |
| 118 case '<': | |
| 119 case '>': | |
| 120 case '=': | |
| 121 case '!': | |
| 122 ConsumeCompareOrShift(ch); | |
| 123 return; | |
| 124 | |
| 125 default: | |
| 126 if (IsIdentifierStart(ch)) { | |
| 127 ConsumeIdentifier(ch); | |
| 128 } else if (IsNumberStart(ch)) { | |
| 129 ConsumeNumber(ch); | |
| 130 } else if (ch >= 32 && ch < 127) { | |
|
vogelheim
2017/03/15 12:07:41
[Not sure this is an issue, but... ]
How many of
bradn
2017/03/16 00:21:47
Listed out the single char ones.
| |
| 131 // Use fixed token IDs for ASCII. | |
| 132 token_ = ch; | |
| 133 } else { | |
| 134 // TODO(bradnelson): Support unicode (probably via UnicodeCache). | |
| 135 token_ = kParseError; | |
| 136 } | |
| 137 return; | |
| 138 } | |
| 139 } | |
| 140 } | |
| 141 | |
| 142 void AsmJsLexer::Rewind() { | |
| 143 DCHECK(!rewind_); | |
| 144 next_token_ = token_; | |
| 145 token_ = preceding_token_; | |
| 146 preceding_token_ = 0; | |
| 147 rewind_ = true; | |
| 148 preceded_by_newline_ = false; | |
| 149 identifier_string_.clear(); | |
| 150 } | |
| 151 | |
| 152 void AsmJsLexer::ResetLocals() { local_names_.clear(); } | |
| 153 | |
| 154 #if DEBUG | |
| 155 // Only used for debugging. | |
| 156 std::string AsmJsLexer::Name(token_t token) const { | |
| 157 // TODO(bradnelson): Make thread safe. | |
| 158 if (token >= 32 && token < 127) { | |
| 159 return std::string(1, static_cast<char>(token)); | |
| 160 } | |
| 161 for (auto& i : local_names_) { | |
| 162 if (i.second == token) { | |
| 163 return i.first.c_str(); | |
|
Karl
2017/03/15 15:04:13
Why not just:
return i.first;
bradn
2017/03/16 00:21:46
Done.
| |
| 164 } | |
| 165 } | |
| 166 for (auto& i : global_names_) { | |
| 167 if (i.second == token) { | |
| 168 return i.first.c_str(); | |
|
Karl
2017/03/15 15:04:13
Same here.
bradn
2017/03/16 00:21:47
Done.
| |
| 169 } | |
| 170 } | |
| 171 for (auto& i : property_names_) { | |
| 172 if (i.second == token) { | |
| 173 return i.first.c_str(); | |
|
Karl
2017/03/15 15:04:13
Same here.
bradn
2017/03/16 00:21:46
Done.
| |
| 174 } | |
| 175 } | |
| 176 switch (token) { | |
| 177 #define V(rawname, name) \ | |
| 178 case kToken_##name: \ | |
| 179 return rawname; | |
| 180 LONG_SYMBOL_NAME_LIST(V) | |
| 181 #undef V | |
| 182 default: | |
| 183 break; | |
| 184 } | |
| 185 if (token == kUnsigned) { | |
|
vogelheim
2017/03/15 12:07:40
Why not handle all of these inside the switch righ
bradn
2017/03/16 00:21:46
Done.
| |
| 186 return "{unsigned value}"; | |
| 187 } else if (token == kDouble) { | |
| 188 return "{double value}"; | |
| 189 } else if (token == kParseError) { | |
| 190 return "{parse error}"; | |
| 191 } else if (token == kEndOfInput) { | |
| 192 return "{end of input}"; | |
| 193 } | |
| 194 UNREACHABLE(); | |
| 195 return "{unreachable}"; | |
| 196 } | |
| 197 #endif | |
| 198 | |
| 199 int AsmJsLexer::GetPosition() const { return static_cast<int>(stream_->pos()); } | |
|
vogelheim
2017/03/15 12:07:40
Does this work if rewind_ is set? If not, maybe ad
bradn
2017/03/16 00:21:46
Done.
| |
| 200 | |
| 201 void AsmJsLexer::Seek(int pos) { | |
| 202 stream_->Seek(pos); | |
| 203 preceding_token_ = 0; | |
| 204 token_ = 0; | |
| 205 next_token_ = 0; | |
| 206 rewind_ = false; | |
| 207 Next(); | |
| 208 } | |
| 209 | |
| 210 void AsmJsLexer::ConsumeIdentifier(uc32 ch) { | |
| 211 // Consume characters while still part of the identifier. | |
| 212 identifier_string_ = ""; | |
|
vogelheim
2017/03/15 12:07:41
identifier_string_.clear();
(STL is bizarre, but.
bradn
2017/03/16 00:21:46
Yep. Done.
| |
| 213 while (IsIdentifierPart(ch)) { | |
| 214 identifier_string_ += ch; | |
| 215 ch = stream_->Advance(); | |
| 216 } | |
| 217 // Go back one for next time. | |
| 218 stream_->Back(); | |
| 219 | |
| 220 // Decode what the identifier means. | |
| 221 if (preceding_token_ == '.') { | |
| 222 auto i = property_names_.find(identifier_string_); | |
| 223 if (i != property_names_.end()) { | |
| 224 token_ = i->second; | |
| 225 return; | |
| 226 } | |
| 227 } else { | |
| 228 { | |
| 229 auto i = local_names_.find(identifier_string_); | |
| 230 if (i != local_names_.end()) { | |
| 231 token_ = i->second; | |
| 232 return; | |
| 233 } | |
| 234 } | |
| 235 if (!in_local_scope_) { | |
| 236 auto i = global_names_.find(identifier_string_); | |
| 237 if (i != global_names_.end()) { | |
| 238 token_ = i->second; | |
| 239 return; | |
| 240 } | |
| 241 } | |
| 242 } | |
| 243 if (preceding_token_ == '.') { | |
| 244 CHECK(global_count_ < kMaxIdentifierCount); | |
| 245 token_ = kGlobalsStart + global_count_++; | |
| 246 property_names_[identifier_string_] = token_; | |
| 247 } else if (in_local_scope_) { | |
| 248 CHECK(local_names_.size() < kMaxIdentifierCount); | |
| 249 token_ = kLocalsStart - static_cast<token_t>(local_names_.size()); | |
| 250 local_names_[identifier_string_] = token_; | |
| 251 } else { | |
| 252 CHECK(global_count_ < kMaxIdentifierCount); | |
| 253 token_ = kGlobalsStart + global_count_++; | |
| 254 global_names_[identifier_string_] = token_; | |
| 255 } | |
| 256 } | |
| 257 | |
| 258 void AsmJsLexer::ConsumeNumber(uc32 ch) { | |
| 259 std::string number; | |
| 260 number = ch; | |
| 261 bool has_dot = ch == '.'; | |
| 262 for (;;) { | |
| 263 ch = stream_->Advance(); | |
| 264 if ((ch >= '0' && ch <= '9') || (ch >= 'a' && ch <= 'f') || | |
| 265 (ch >= 'A' && ch <= 'F') || ch == '.' || ch == 'x' || | |
| 266 ((ch == '-' || ch == '+') && (number[number.size() - 1] == 'e' || | |
| 267 number[number.size() - 1] == 'E'))) { | |
| 268 // TODO(bradnelson): Test weird cases ending in -. | |
| 269 if (ch == '.') { | |
| 270 has_dot = true; | |
| 271 } | |
| 272 number += ch; | |
| 273 } else { | |
| 274 break; | |
| 275 } | |
| 276 } | |
| 277 stream_->Back(); | |
| 278 // Special case the most common number. | |
| 279 if (number == "0") { | |
| 280 unsigned_value_ = 0; | |
| 281 token_ = kUnsigned; | |
| 282 return; | |
| 283 } | |
| 284 // Pick out dot. | |
| 285 if (number == ".") { | |
| 286 token_ = '.'; | |
| 287 return; | |
| 288 } | |
| 289 // Decode numbers. | |
| 290 // TODO(bradnelson): Replace strto* with shared code with scanner.cc | |
| 291 char* end; | |
| 292 if (has_dot) { | |
| 293 double_value_ = strtod(number.c_str(), &end); | |
| 294 token_ = kDouble; | |
| 295 } else { | |
| 296 if (number.size() > 2 && number[0] == '0' && number[1] == 'x') { | |
| 297 // Decode 0x* as hex. | |
| 298 unsigned_value_ = strtoul(number.c_str() + 2, &end, 16); | |
| 299 } else if (number.size() > 1 && number[0] == '0') { | |
| 300 // Decode 0* as octal. | |
| 301 unsigned_value_ = strtoul(number.c_str() + 1, &end, 8); | |
| 302 } else { | |
| 303 // Decode the rest as double. | |
| 304 // This can come up in asm.js as for example 1e2 is used to encode 100. | |
| 305 double_value_ = strtod(number.c_str(), &end); | |
| 306 unsigned_value_ = static_cast<uint32_t>(double_value_); | |
| 307 } | |
| 308 token_ = kUnsigned; | |
| 309 } | |
| 310 // Check if string to number conversion didn't consume all the characters. | |
| 311 // This happens if the character filter let through something invalid | |
| 312 // like: 0123ef for example. | |
| 313 // TODO(bradnelson): Check if this happens often enough to be a perf problem. | |
| 314 if (end != number.c_str() + number.size()) { | |
| 315 // If things didn't parse fully, but start with a '.', back out the other | |
| 316 // characters and emit the '.' token. | |
| 317 if (number[0] == '.') { | |
| 318 for (size_t k = 1; k < number.size(); ++k) { | |
| 319 stream_->Back(); | |
| 320 } | |
| 321 token_ = '.'; | |
| 322 return; | |
| 323 } | |
| 324 // Anything else that doesn't parse is an error. | |
| 325 token_ = kParseError; | |
| 326 return; | |
| 327 } | |
| 328 } | |
| 329 | |
| 330 void AsmJsLexer::ConsumeCComment() { | |
| 331 for (;;) { | |
| 332 uc32 ch = stream_->Advance(); | |
| 333 if (ch == '\n' || ch == kEndOfInput) { | |
| 334 break; | |
| 335 } | |
| 336 } | |
| 337 } | |
| 338 | |
| 339 void AsmJsLexer::ConsumeCPPComment() { | |
| 340 for (;;) { | |
| 341 uc32 ch = stream_->Advance(); | |
| 342 if (ch == '*') { | |
|
vogelheim
2017/03/15 12:07:41
Your choice, but I think this if-branch would be a
bradn
2017/03/16 00:21:46
Ah, yeah, that's better.
Done.
| |
| 343 ch = stream_->Advance(); | |
| 344 if (ch == '/') { | |
| 345 break; | |
| 346 } | |
| 347 if (ch == '*') { | |
| 348 stream_->Back(); | |
| 349 } | |
| 350 } else if (ch == kEndOfInput) { | |
|
vogelheim
2017/03/15 12:07:40
I think this potentially swallows a syntax error w
bradn
2017/03/16 00:21:46
Ah, yes.
Fixed and added a test.
| |
| 351 break; | |
| 352 } | |
| 353 } | |
| 354 } | |
| 355 | |
| 356 void AsmJsLexer::ConsumeString(uc32 quote) { | |
| 357 // Only string allowed is 'use asm' / "use asm". | |
| 358 const char* expected = "use asm"; | |
| 359 for (; *expected != '\0'; ++expected) { | |
| 360 if (stream_->Advance() != *expected) { | |
| 361 token_ = kParseError; | |
| 362 return; | |
| 363 } | |
| 364 } | |
| 365 if (stream_->Advance() != quote) { | |
| 366 token_ = kParseError; | |
| 367 return; | |
| 368 } | |
| 369 token_ = kToken_UseAsm; | |
| 370 } | |
| 371 | |
| 372 void AsmJsLexer::ConsumeCompareOrShift(uc32 ch) { | |
| 373 uc32 next_ch = stream_->Advance(); | |
| 374 if (next_ch == '=') { | |
| 375 switch (ch) { | |
| 376 case '<': | |
| 377 token_ = kToken_LE; | |
| 378 break; | |
| 379 case '>': | |
| 380 token_ = kToken_GE; | |
| 381 break; | |
| 382 case '=': | |
| 383 token_ = kToken_EQ; | |
| 384 break; | |
| 385 case '!': | |
| 386 token_ = kToken_NE; | |
| 387 break; | |
| 388 default: | |
| 389 UNREACHABLE(); | |
| 390 } | |
| 391 } else if (ch == '<' && next_ch == '<') { | |
| 392 token_ = kToken_SHL; | |
| 393 } else if (ch == '>' && next_ch == '>') { | |
| 394 if (stream_->Advance() == '>') { | |
| 395 token_ = kToken_SHR; | |
| 396 } else { | |
| 397 token_ = kToken_SAR; | |
| 398 stream_->Back(); | |
| 399 } | |
| 400 } else { | |
| 401 stream_->Back(); | |
| 402 token_ = ch; | |
| 403 } | |
| 404 } | |
| 405 | |
| 406 bool AsmJsLexer::IsIdentifierStart(uc32 ch) { | |
| 407 return (ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z') || ch == '_' || | |
| 408 ch == '$'; | |
| 409 } | |
| 410 | |
| 411 bool AsmJsLexer::IsIdentifierPart(uc32 ch) { | |
| 412 return IsIdentifierStart(ch) || (ch >= '0' && ch <= '9'); | |
| 413 } | |
| 414 | |
| 415 bool AsmJsLexer::IsNumberStart(uc32 ch) { | |
| 416 return ch == '.' || (ch >= '0' && ch <= '9'); | |
| 417 } | |
| 418 | |
| 419 } // namespace internal | |
| 420 } // namespace v8 | |
| OLD | NEW |