Chromium Code Reviews| OLD | NEW |
|---|---|
| (Empty) | |
| 1 // Copyright 2017 the V8 project authors. All rights reserved. | |
| 2 // Use of this source code is governed by a BSD-style license that can be | |
| 3 // found in the LICENSE file. | |
| 4 | |
| 5 #include "src/asmjs/asm-lexer.h" | |
| 6 | |
| 7 #include <stdlib.h> | |
| 8 | |
| 9 #include "src/objects-inl.h" | |
| 10 #include "src/parsing/scanner-character-streams.h" | |
| 11 #include "src/parsing/scanner.h" | |
| 12 | |
| 13 namespace v8 { | |
| 14 namespace internal { | |
| 15 | |
| 16 AsmJsLexer::AsmJsLexer(Isolate* isolate, Handle<Script> script, int start, | |
| 17 int end) | |
| 18 : script_(script), | |
| 19 source_(String::cast(script->source()), isolate), | |
| 20 stream_(ScannerStream::For(source_, start, end)), | |
| 21 token_(0), | |
| 22 last_token_(0), | |
| 23 next_token_(0), | |
| 24 rewind_(false), | |
| 25 local_(false), | |
| 26 global_count_(0), | |
| 27 double_value_(0.0), | |
| 28 unsigned_value_(0), | |
| 29 preceeded_by_newline_(false) { | |
| 30 #define V(name, _junk1, _junk2, _junk3) property_names_[#name] = kToken_##name; | |
| 31 STDLIB_MATH_FUNCTION_LIST(V) | |
| 32 STDLIB_ARRAY_TYPE_LIST(V) | |
| 33 #undef V | |
| 34 #define V(name) property_names_[#name] = kToken_##name; | |
| 35 STDLIB_MATH_VALUE_LIST(V) | |
| 36 STDLIB_OTHER_LIST(V) | |
| 37 #undef V | |
| 38 #define V(name) global_names_[#name] = kToken_##name; | |
| 39 KEYWORD_NAME_LIST(V) | |
| 40 #undef V | |
| 41 Next(); | |
| 42 } | |
| 43 | |
| 44 void AsmJsLexer::Next() { | |
| 45 if (rewind_) { | |
| 46 last_token_ = token_; | |
| 47 token_ = next_token_; | |
| 48 next_token_ = 0; | |
| 49 rewind_ = false; | |
| 50 return; | |
| 51 } | |
| 52 | |
| 53 if (token_ == kEndOfInput || token_ == kParseError) { | |
| 54 return; | |
| 55 } | |
| 56 | |
| 57 #if 0 | |
|
vogelheim
2017/03/14 13:36:37
Please don't do this.
bradn
2017/03/15 07:53:03
Changed to a trace flag.
This ends up being useful
| |
| 58 // Uncomment for debug raw token stream. | |
| 59 if (Token() != 0) { | |
| 60 if (Token() == kDouble) { | |
| 61 fprintf(stderr, "%lf ", AsDouble()); | |
| 62 } else if (Token() == kUnsigned) { | |
| 63 fprintf(stderr, "%lu ", AsUnsigned()); | |
| 64 } else { | |
| 65 fprintf(stderr, "%s ", Name(Token())); | |
| 66 } | |
| 67 } | |
| 68 #endif | |
| 69 | |
| 70 preceeded_by_newline_ = false; | |
| 71 last_token_ = token_; | |
| 72 for (;;) { | |
| 73 token_t ch = stream_->Advance(); | |
|
vogelheim
2017/03/14 13:36:37
(Here & below.) Using token_t for individual chara
bradn
2017/03/15 07:53:02
Switched all of these inside to uc32.
| |
| 74 if (ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r') { | |
|
Karl
2017/03/14 18:00:47
Would a switch statement be cleaner here?
bradn
2017/03/15 07:53:02
Done.
| |
| 75 // Skip whitespace. | |
| 76 if (ch == '\n') { | |
| 77 preceeded_by_newline_ = true; | |
| 78 } | |
| 79 continue; | |
| 80 } else if (ch == kEndOfInput) { | |
|
marja
2017/03/14 11:11:47
(general comment) The structure is getting a bit c
bradn
2017/03/15 07:53:03
Decomposed into more functions, hope that helps.
| |
| 81 token_ = kEndOfInput; | |
| 82 break; | |
|
marja
2017/03/14 11:11:47
Why break, why not return? (Now it's not trivial t
bradn
2017/03/15 07:53:03
Redone in more functions, avoids the break.
| |
| 83 } else if (ch < 32 || ch >= 127) { | |
|
Karl
2017/03/14 18:00:47
If you use a switch statement, either explicitly e
bradn
2017/03/15 07:53:02
Done.
| |
| 84 // Disallow non-ascii for now. | |
| 85 token_ = kParseError; | |
| 86 break; | |
| 87 } else if (ch == '\'' || ch == '\"') { | |
| 88 // Only string allowed is 'use asm' / "use asm". | |
| 89 const char* use_asm = "use asm"; | |
|
Karl
2017/03/14 18:00:47
Should this be a constexpr?
bradn
2017/03/15 07:53:03
Changed round.
| |
| 90 const char* pos = use_asm; | |
| 91 while (*pos) { | |
| 92 token_t och = stream_->Advance(); | |
|
vogelheim
2017/03/14 13:36:37
och ? [here & below]
bradn
2017/03/15 07:53:02
Renamed and refactored.
| |
| 93 if (och != *pos) { | |
| 94 token_ = kParseError; | |
| 95 return; | |
| 96 } | |
| 97 ++pos; | |
| 98 } | |
| 99 token_t och = stream_->Advance(); | |
| 100 if (och != ch) { | |
|
marja
2017/03/14 11:11:47
Lost here... what's this?
Ahh, it's checking the
bradn
2017/03/15 07:53:02
renamed variable to highlight that.
| |
| 101 token_ = kParseError; | |
| 102 break; | |
| 103 } | |
| 104 token_ = kToken_UseAsm; | |
| 105 break; | |
| 106 } else if (ch == '/') { | |
| 107 ch = stream_->Advance(); | |
| 108 if (ch == '/') { | |
| 109 for (;;) { | |
| 110 ch = stream_->Advance(); | |
| 111 if (ch == '\n' || ch == kEndOfInput) { | |
| 112 break; | |
| 113 } | |
| 114 } | |
| 115 continue; | |
| 116 } else if (ch == '*') { | |
| 117 for (;;) { | |
| 118 ch = stream_->Advance(); | |
| 119 if (ch == '*') { | |
| 120 ch = stream_->Advance(); | |
| 121 if (ch == '/') { | |
|
vogelheim
2017/03/14 13:36:37
+1 to Marja's comments.
Also, would this work on
bradn
2017/03/15 07:53:02
Yeah, this was wrong. Factor to function and fixed
| |
| 122 if (ch == '*') { | |
|
marja
2017/03/14 11:11:47
if ch == '/' on the line above, it cannot be '*' h
bradn
2017/03/15 07:53:02
Oops, fixed.
| |
| 123 stream_->Back(); | |
| 124 } | |
| 125 break; | |
|
marja
2017/03/14 11:11:47
I'm lost here anyway, what's this block, what are
bradn
2017/03/15 07:53:02
This was meant to back up if you saw a * inside a
| |
| 126 } | |
| 127 } else if (ch == kEndOfInput) { | |
| 128 break; | |
| 129 } | |
| 130 } | |
| 131 continue; | |
| 132 } else { | |
|
marja
2017/03/14 11:11:47
No idea here anymore which if this else associates
bradn
2017/03/15 07:53:03
Restructured, should be more clear now.
| |
| 133 stream_->Back(); | |
| 134 token_ = '/'; | |
| 135 break; | |
| 136 } | |
| 137 } else if (ch == '<' || ch == '>' || ch == '=' || ch == '!') { | |
| 138 token_t och = stream_->Advance(); | |
| 139 if (och == '=') { | |
| 140 if (ch == '<') { | |
| 141 token_ = kToken_LE; | |
| 142 break; | |
| 143 } else if (ch == '>') { | |
| 144 token_ = kToken_GE; | |
| 145 break; | |
| 146 } else if (ch == '=') { | |
| 147 token_ = kToken_EQ; | |
| 148 break; | |
| 149 } else if (ch == '!') { | |
| 150 token_ = kToken_NE; | |
| 151 break; | |
| 152 } else { | |
| 153 UNREACHABLE(); | |
| 154 } | |
| 155 } else if (ch == '<' && och == '<') { | |
| 156 token_ = kToken_SHL; | |
| 157 break; | |
| 158 } else if (ch == '>' && och == '>') { | |
| 159 token_t ooch = stream_->Advance(); | |
|
vogelheim
2017/03/14 13:36:37
ooch ?
bradn
2017/03/15 07:53:02
Hah, terrible name, sorry, dropped variable comple
| |
| 160 if (ooch == '>') { | |
| 161 token_ = kToken_SHR; | |
| 162 } else { | |
| 163 token_ = kToken_SAR; | |
| 164 stream_->Back(); | |
| 165 } | |
| 166 break; | |
| 167 } else { | |
| 168 stream_->Back(); | |
| 169 token_ = ch; | |
| 170 break; | |
| 171 } | |
| 172 } else if ((ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z') || | |
| 173 ch == '_' || ch == '$') { | |
|
vogelheim
2017/03/14 13:36:37
Could you introduce helper functions for the chara
bradn
2017/03/15 07:53:02
Done.
| |
| 174 name_ = ch; | |
| 175 for (;;) { | |
| 176 ch = stream_->Advance(); | |
| 177 if ((ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z') || ch == '_' || | |
| 178 ch == '$' || (ch >= '0' && ch <= '9')) { | |
| 179 name_ += ch; | |
| 180 } else { | |
| 181 break; | |
|
marja
2017/03/14 11:11:47
Why not
while(ch >= ...) {
name_ += ch;
ch =
bradn
2017/03/15 07:53:03
Done.
| |
| 182 } | |
| 183 } | |
| 184 stream_->Back(); | |
| 185 if (last_token_ == '.') { | |
| 186 auto i = property_names_.find(name_); | |
| 187 if (i != property_names_.end()) { | |
| 188 token_ = i->second; | |
| 189 break; | |
|
marja
2017/03/14 11:11:47
E.g,. here it would be less confusing to use retur
bradn
2017/03/15 07:53:03
Done.
| |
| 190 } | |
| 191 } else { | |
| 192 { | |
| 193 auto i = local_names_.find(name_); | |
| 194 if (i != local_names_.end()) { | |
| 195 token_ = i->second; | |
| 196 break; | |
| 197 } | |
| 198 } | |
| 199 if (!local_) { | |
|
marja
2017/03/14 11:11:47
What's local_?
bradn
2017/03/15 07:53:03
Renamed.
| |
| 200 auto i = global_names_.find(name_); | |
| 201 if (i != global_names_.end()) { | |
| 202 token_ = i->second; | |
| 203 break; | |
| 204 } | |
| 205 } | |
| 206 } | |
| 207 if (last_token_ == '.') { | |
| 208 // TODO(bradnelson): Assert no overflow. | |
| 209 token_ = kGlobalsStart + global_count_++; | |
| 210 property_names_[name_] = token_; | |
| 211 } else if (local_) { | |
| 212 // TODO(bradnelson): Assert no overflow. | |
| 213 token_ = kLocalsStart - local_names_.size(); | |
| 214 local_names_[name_] = token_; | |
| 215 } else { | |
| 216 // TODO(bradnelson): Assert no overflow. | |
| 217 token_ = kGlobalsStart + global_count_++; | |
| 218 global_names_[name_] = token_; | |
| 219 } | |
| 220 break; | |
| 221 } else if (ch == '.' || (ch >= '0' && ch <= '9')) { | |
| 222 bool has_dot = ch == '.'; | |
| 223 name_ = ch; | |
| 224 for (;;) { | |
| 225 ch = stream_->Advance(); | |
| 226 if ((ch >= '0' && ch <= '9') || (ch >= 'a' && ch <= 'f') || | |
|
marja
2017/03/14 11:11:47
Would it be feasible to have a helper function for
bradn
2017/03/15 07:53:03
I've added a TODO to do this.
Might require some c
| |
| 227 (ch >= 'A' && ch <= 'F') || ch == '.' || ch == 'x' || | |
| 228 ((ch == '-' || ch == '+') && (name_[name_.size() - 1] == 'e' || | |
| 229 name_[name_.size() - 1] == 'E'))) { | |
| 230 // TODO(bradnelson): Test weird cases ending in -. | |
| 231 if (ch == '.') { | |
| 232 has_dot = true; | |
| 233 } | |
| 234 name_ += ch; | |
| 235 } else { | |
| 236 break; | |
| 237 } | |
| 238 } | |
| 239 stream_->Back(); | |
| 240 // Special case the most common number. | |
| 241 if (name_ == "0") { | |
| 242 unsigned_value_ = 0; | |
| 243 token_ = kUnsigned; | |
| 244 break; | |
| 245 } | |
| 246 // Pick out dot. | |
| 247 if (name_ == ".") { | |
| 248 token_ = '.'; | |
| 249 break; | |
| 250 } | |
| 251 // Decode numbers. | |
| 252 char* end; | |
| 253 if (has_dot) { | |
| 254 double_value_ = strtod(name_.c_str(), &end); | |
|
vogelheim
2017/03/14 13:36:37
strtod may depend on the current locale. Are you r
bradn
2017/03/15 07:53:03
Yeah, it's a fair point these probably aren't idea
vogelheim
2017/03/15 12:07:40
Note that this is a correctness, not a performance
vogelheim
2017/03/15 12:10:11
My gut feeling is that parser & scanner are tightl
| |
| 255 token_ = kDouble; | |
| 256 } else { | |
| 257 if (name_.size() > 2 && name_[0] == '0' && name_[1] == 'x') { | |
| 258 unsigned_value_ = strtoul(name_.c_str() + 2, &end, 16); | |
| 259 } else if (name_.size() > 1 && name_[0] == '0') { | |
| 260 unsigned_value_ = strtoul(name_.c_str() + 1, &end, 8); | |
| 261 } else { | |
| 262 double_value_ = strtod(name_.c_str(), &end); | |
| 263 unsigned_value_ = static_cast<uint32_t>(double_value_); | |
|
marja
2017/03/14 11:11:47
Why strtod if it's guaranteed to be an integer (no
bradn
2017/03/15 07:53:02
Asm.js uses 1e2 for 100 (as an integer :-)
Added a
| |
| 264 } | |
| 265 token_ = kUnsigned; | |
| 266 } | |
| 267 if (end != name_.c_str() + name_.size()) { | |
|
vogelheim
2017/03/14 13:36:37
I'm confused. When does this happen?
bradn
2017/03/15 07:53:03
When a number failed to parse, added a comment + e
| |
| 268 // Handle mistaken parse of '.' as number. | |
|
marja
2017/03/14 11:11:47
How does this relate to the "Pick out dot" above?
bradn
2017/03/15 07:53:02
Reworded.
The idea here is that if the number pars
| |
| 269 if (name_[0] == '.') { | |
| 270 for (size_t k = 1; k < name_.size(); ++k) { | |
| 271 stream_->Back(); | |
| 272 } | |
| 273 token_ = '.'; | |
| 274 break; | |
| 275 } | |
| 276 token_ = kParseError; | |
| 277 return; | |
| 278 } | |
| 279 break; | |
| 280 } else { | |
| 281 token_ = ch; | |
| 282 break; | |
| 283 } | |
| 284 } | |
| 285 } | |
| 286 | |
| 287 void AsmJsLexer::Rewind() { | |
| 288 DCHECK(!rewind_); | |
| 289 next_token_ = token_; | |
| 290 token_ = last_token_; | |
| 291 last_token_ = 0; | |
| 292 rewind_ = true; | |
|
vogelheim
2017/03/14 13:36:37
This doesn't update name_. Is this intentional?
vogelheim
2017/03/14 13:36:37
This doesn't update preceeded_by_newline_. Is this
bradn
2017/03/15 07:53:02
Clearing it for good measure here (didn't want to
bradn
2017/03/15 07:53:03
Clobbering for good measure here, also commented a
| |
| 293 } | |
| 294 | |
| 295 void AsmJsLexer::ResetLocals() { local_names_.clear(); } | |
| 296 | |
| 297 const char* AsmJsLexer::Name(token_t token) const { | |
| 298 // TODO(bradnelson): Make thread safe (and maybe debug only). | |
| 299 if (token >= 32 && token < 127) { | |
| 300 static char chname[2]; | |
| 301 chname[0] = static_cast<char>(token); | |
|
vogelheim
2017/03/14 13:36:37
chname[1] = '\0' ??
bradn
2017/03/15 07:53:02
Done.
Whoops.
| |
| 302 return chname; | |
| 303 } | |
| 304 for (auto i = local_names_.begin(); i != local_names_.end(); ++i) { | |
|
vogelheim
2017/03/14 13:36:37
style nitpick: I'd use the for(auto& i : local_nam
bradn
2017/03/15 07:53:03
Done.
| |
| 305 if (i->second == token) { | |
| 306 return i->first.c_str(); | |
| 307 } | |
| 308 } | |
| 309 for (auto i = global_names_.begin(); i != global_names_.end(); ++i) { | |
| 310 if (i->second == token) { | |
| 311 return i->first.c_str(); | |
| 312 } | |
| 313 } | |
| 314 for (auto i = property_names_.begin(); i != property_names_.end(); ++i) { | |
| 315 if (i->second == token) { | |
| 316 return i->first.c_str(); | |
| 317 } | |
| 318 } | |
| 319 switch (token) { | |
| 320 #define V(rawname, name) \ | |
| 321 case kToken_##name: \ | |
| 322 return rawname; | |
| 323 LONG_SYMBOL_NAME_LIST(V) | |
| 324 #undef V | |
| 325 default: | |
| 326 break; | |
| 327 } | |
| 328 if (token == kUnsigned) { | |
| 329 return "{unsigned value}"; | |
| 330 } else if (token == kDouble) { | |
| 331 return "{double value}"; | |
| 332 } else if (token == kParseError) { | |
| 333 return "{parse error}"; | |
| 334 } else if (token == kEndOfInput) { | |
| 335 return "{end of input}"; | |
| 336 } | |
| 337 UNREACHABLE(); | |
| 338 return "{unreachable}"; | |
| 339 } | |
| 340 | |
| 341 int AsmJsLexer::position() const { return static_cast<int>(stream_->pos()); } | |
| 342 | |
| 343 void AsmJsLexer::Seek(int pos) { stream_->Seek(pos); } | |
| 344 | |
| 345 } // namespace internal | |
| 346 } // namespace v8 | |
| OLD | NEW |