OLD | NEW |
---|---|
(Empty) | |
1 // Copyright 2017 the V8 project authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 | |
5 #include "src/asmjs/asm-lexer.h" | |
6 | |
7 #include <stdlib.h> | |
8 | |
9 #include "src/objects-inl.h" | |
10 #include "src/parsing/scanner-character-streams.h" | |
11 #include "src/parsing/scanner.h" | |
12 | |
13 namespace v8 { | |
14 namespace internal { | |
15 | |
16 AsmJsLexer::AsmJsLexer(Isolate* isolate, Handle<Script> script, int start, | |
17 int end) | |
18 : script_(script), | |
19 source_(String::cast(script->source()), isolate), | |
20 stream_(ScannerStream::For(source_, start, end)), | |
21 token_(0), | |
22 last_token_(0), | |
23 next_token_(0), | |
24 rewind_(false), | |
25 local_(false), | |
26 global_count_(0), | |
27 double_value_(0.0), | |
28 unsigned_value_(0), | |
29 preceeded_by_newline_(false) { | |
30 #define V(name, _junk1, _junk2, _junk3) property_names_[#name] = kToken_##name; | |
31 STDLIB_MATH_FUNCTION_LIST(V) | |
32 STDLIB_ARRAY_TYPE_LIST(V) | |
33 #undef V | |
34 #define V(name) property_names_[#name] = kToken_##name; | |
35 STDLIB_MATH_VALUE_LIST(V) | |
36 STDLIB_OTHER_LIST(V) | |
37 #undef V | |
38 #define V(name) global_names_[#name] = kToken_##name; | |
39 KEYWORD_NAME_LIST(V) | |
40 #undef V | |
41 Next(); | |
42 } | |
43 | |
44 void AsmJsLexer::Next() { | |
45 if (rewind_) { | |
46 last_token_ = token_; | |
47 token_ = next_token_; | |
48 next_token_ = 0; | |
49 rewind_ = false; | |
50 return; | |
51 } | |
52 | |
53 if (token_ == kEndOfInput || token_ == kParseError) { | |
54 return; | |
55 } | |
56 | |
57 #if 0 | |
vogelheim
2017/03/14 13:36:37
Please don't do this.
bradn
2017/03/15 07:53:03
Changed to a trace flag.
This ends up being useful
| |
58 // Uncomment for debug raw token stream. | |
59 if (Token() != 0) { | |
60 if (Token() == kDouble) { | |
61 fprintf(stderr, "%lf ", AsDouble()); | |
62 } else if (Token() == kUnsigned) { | |
63 fprintf(stderr, "%lu ", AsUnsigned()); | |
64 } else { | |
65 fprintf(stderr, "%s ", Name(Token())); | |
66 } | |
67 } | |
68 #endif | |
69 | |
70 preceeded_by_newline_ = false; | |
71 last_token_ = token_; | |
72 for (;;) { | |
73 token_t ch = stream_->Advance(); | |
vogelheim
2017/03/14 13:36:37
(Here & below.) Using token_t for individual chara
bradn
2017/03/15 07:53:02
Switched all of these inside to uc32.
| |
74 if (ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r') { | |
Karl
2017/03/14 18:00:47
Would a switch statement be cleaner here?
bradn
2017/03/15 07:53:02
Done.
| |
75 // Skip whitespace. | |
76 if (ch == '\n') { | |
77 preceeded_by_newline_ = true; | |
78 } | |
79 continue; | |
80 } else if (ch == kEndOfInput) { | |
marja
2017/03/14 11:11:47
(general comment) The structure is getting a bit c
bradn
2017/03/15 07:53:03
Decomposed into more functions, hope that helps.
| |
81 token_ = kEndOfInput; | |
82 break; | |
marja
2017/03/14 11:11:47
Why break, why not return? (Now it's not trivial t
bradn
2017/03/15 07:53:03
Redone in more functions, avoids the break.
| |
83 } else if (ch < 32 || ch >= 127) { | |
Karl
2017/03/14 18:00:47
If you use a switch statement, either explicitly e
bradn
2017/03/15 07:53:02
Done.
| |
84 // Disallow non-ascii for now. | |
85 token_ = kParseError; | |
86 break; | |
87 } else if (ch == '\'' || ch == '\"') { | |
88 // Only string allowed is 'use asm' / "use asm". | |
89 const char* use_asm = "use asm"; | |
Karl
2017/03/14 18:00:47
Should this be a constexpr?
bradn
2017/03/15 07:53:03
Changed round.
| |
90 const char* pos = use_asm; | |
91 while (*pos) { | |
92 token_t och = stream_->Advance(); | |
vogelheim
2017/03/14 13:36:37
och ? [here & below]
bradn
2017/03/15 07:53:02
Renamed and refactored.
| |
93 if (och != *pos) { | |
94 token_ = kParseError; | |
95 return; | |
96 } | |
97 ++pos; | |
98 } | |
99 token_t och = stream_->Advance(); | |
100 if (och != ch) { | |
marja
2017/03/14 11:11:47
Lost here... what's this?
Ahh, it's checking the
bradn
2017/03/15 07:53:02
renamed variable to highlight that.
| |
101 token_ = kParseError; | |
102 break; | |
103 } | |
104 token_ = kToken_UseAsm; | |
105 break; | |
106 } else if (ch == '/') { | |
107 ch = stream_->Advance(); | |
108 if (ch == '/') { | |
109 for (;;) { | |
110 ch = stream_->Advance(); | |
111 if (ch == '\n' || ch == kEndOfInput) { | |
112 break; | |
113 } | |
114 } | |
115 continue; | |
116 } else if (ch == '*') { | |
117 for (;;) { | |
118 ch = stream_->Advance(); | |
119 if (ch == '*') { | |
120 ch = stream_->Advance(); | |
121 if (ch == '/') { | |
vogelheim
2017/03/14 13:36:37
+1 to Marja's comments.
Also, would this work on
bradn
2017/03/15 07:53:02
Yeah, this was wrong. Factor to function and fixed
| |
122 if (ch == '*') { | |
marja
2017/03/14 11:11:47
if ch == '/' on the line above, it cannot be '*' h
bradn
2017/03/15 07:53:02
Oops, fixed.
| |
123 stream_->Back(); | |
124 } | |
125 break; | |
marja
2017/03/14 11:11:47
I'm lost here anyway, what's this block, what are
bradn
2017/03/15 07:53:02
This was meant to back up if you saw a * inside a
| |
126 } | |
127 } else if (ch == kEndOfInput) { | |
128 break; | |
129 } | |
130 } | |
131 continue; | |
132 } else { | |
marja
2017/03/14 11:11:47
No idea here anymore which if this else associates
bradn
2017/03/15 07:53:03
Restructured, should be more clear now.
| |
133 stream_->Back(); | |
134 token_ = '/'; | |
135 break; | |
136 } | |
137 } else if (ch == '<' || ch == '>' || ch == '=' || ch == '!') { | |
138 token_t och = stream_->Advance(); | |
139 if (och == '=') { | |
140 if (ch == '<') { | |
141 token_ = kToken_LE; | |
142 break; | |
143 } else if (ch == '>') { | |
144 token_ = kToken_GE; | |
145 break; | |
146 } else if (ch == '=') { | |
147 token_ = kToken_EQ; | |
148 break; | |
149 } else if (ch == '!') { | |
150 token_ = kToken_NE; | |
151 break; | |
152 } else { | |
153 UNREACHABLE(); | |
154 } | |
155 } else if (ch == '<' && och == '<') { | |
156 token_ = kToken_SHL; | |
157 break; | |
158 } else if (ch == '>' && och == '>') { | |
159 token_t ooch = stream_->Advance(); | |
vogelheim
2017/03/14 13:36:37
ooch ?
bradn
2017/03/15 07:53:02
Hah, terrible name, sorry, dropped variable comple
| |
160 if (ooch == '>') { | |
161 token_ = kToken_SHR; | |
162 } else { | |
163 token_ = kToken_SAR; | |
164 stream_->Back(); | |
165 } | |
166 break; | |
167 } else { | |
168 stream_->Back(); | |
169 token_ = ch; | |
170 break; | |
171 } | |
172 } else if ((ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z') || | |
173 ch == '_' || ch == '$') { | |
vogelheim
2017/03/14 13:36:37
Could you introduce helper functions for the chara
bradn
2017/03/15 07:53:02
Done.
| |
174 name_ = ch; | |
175 for (;;) { | |
176 ch = stream_->Advance(); | |
177 if ((ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z') || ch == '_' || | |
178 ch == '$' || (ch >= '0' && ch <= '9')) { | |
179 name_ += ch; | |
180 } else { | |
181 break; | |
marja
2017/03/14 11:11:47
Why not
while(ch >= ...) {
name_ += ch;
ch =
bradn
2017/03/15 07:53:03
Done.
| |
182 } | |
183 } | |
184 stream_->Back(); | |
185 if (last_token_ == '.') { | |
186 auto i = property_names_.find(name_); | |
187 if (i != property_names_.end()) { | |
188 token_ = i->second; | |
189 break; | |
marja
2017/03/14 11:11:47
E.g,. here it would be less confusing to use retur
bradn
2017/03/15 07:53:03
Done.
| |
190 } | |
191 } else { | |
192 { | |
193 auto i = local_names_.find(name_); | |
194 if (i != local_names_.end()) { | |
195 token_ = i->second; | |
196 break; | |
197 } | |
198 } | |
199 if (!local_) { | |
marja
2017/03/14 11:11:47
What's local_?
bradn
2017/03/15 07:53:03
Renamed.
| |
200 auto i = global_names_.find(name_); | |
201 if (i != global_names_.end()) { | |
202 token_ = i->second; | |
203 break; | |
204 } | |
205 } | |
206 } | |
207 if (last_token_ == '.') { | |
208 // TODO(bradnelson): Assert no overflow. | |
209 token_ = kGlobalsStart + global_count_++; | |
210 property_names_[name_] = token_; | |
211 } else if (local_) { | |
212 // TODO(bradnelson): Assert no overflow. | |
213 token_ = kLocalsStart - local_names_.size(); | |
214 local_names_[name_] = token_; | |
215 } else { | |
216 // TODO(bradnelson): Assert no overflow. | |
217 token_ = kGlobalsStart + global_count_++; | |
218 global_names_[name_] = token_; | |
219 } | |
220 break; | |
221 } else if (ch == '.' || (ch >= '0' && ch <= '9')) { | |
222 bool has_dot = ch == '.'; | |
223 name_ = ch; | |
224 for (;;) { | |
225 ch = stream_->Advance(); | |
226 if ((ch >= '0' && ch <= '9') || (ch >= 'a' && ch <= 'f') || | |
marja
2017/03/14 11:11:47
Would it be feasible to have a helper function for
bradn
2017/03/15 07:53:03
I've added a TODO to do this.
Might require some c
| |
227 (ch >= 'A' && ch <= 'F') || ch == '.' || ch == 'x' || | |
228 ((ch == '-' || ch == '+') && (name_[name_.size() - 1] == 'e' || | |
229 name_[name_.size() - 1] == 'E'))) { | |
230 // TODO(bradnelson): Test weird cases ending in -. | |
231 if (ch == '.') { | |
232 has_dot = true; | |
233 } | |
234 name_ += ch; | |
235 } else { | |
236 break; | |
237 } | |
238 } | |
239 stream_->Back(); | |
240 // Special case the most common number. | |
241 if (name_ == "0") { | |
242 unsigned_value_ = 0; | |
243 token_ = kUnsigned; | |
244 break; | |
245 } | |
246 // Pick out dot. | |
247 if (name_ == ".") { | |
248 token_ = '.'; | |
249 break; | |
250 } | |
251 // Decode numbers. | |
252 char* end; | |
253 if (has_dot) { | |
254 double_value_ = strtod(name_.c_str(), &end); | |
vogelheim
2017/03/14 13:36:37
strtod may depend on the current locale. Are you r
bradn
2017/03/15 07:53:03
Yeah, it's a fair point these probably aren't idea
vogelheim
2017/03/15 12:07:40
Note that this is a correctness, not a performance
vogelheim
2017/03/15 12:10:11
My gut feeling is that parser & scanner are tightl
| |
255 token_ = kDouble; | |
256 } else { | |
257 if (name_.size() > 2 && name_[0] == '0' && name_[1] == 'x') { | |
258 unsigned_value_ = strtoul(name_.c_str() + 2, &end, 16); | |
259 } else if (name_.size() > 1 && name_[0] == '0') { | |
260 unsigned_value_ = strtoul(name_.c_str() + 1, &end, 8); | |
261 } else { | |
262 double_value_ = strtod(name_.c_str(), &end); | |
263 unsigned_value_ = static_cast<uint32_t>(double_value_); | |
marja
2017/03/14 11:11:47
Why strtod if it's guaranteed to be an integer (no
bradn
2017/03/15 07:53:02
Asm.js uses 1e2 for 100 (as an integer :-)
Added a
| |
264 } | |
265 token_ = kUnsigned; | |
266 } | |
267 if (end != name_.c_str() + name_.size()) { | |
vogelheim
2017/03/14 13:36:37
I'm confused. When does this happen?
bradn
2017/03/15 07:53:03
When a number failed to parse, added a comment + e
| |
268 // Handle mistaken parse of '.' as number. | |
marja
2017/03/14 11:11:47
How does this relate to the "Pick out dot" above?
bradn
2017/03/15 07:53:02
Reworded.
The idea here is that if the number pars
| |
269 if (name_[0] == '.') { | |
270 for (size_t k = 1; k < name_.size(); ++k) { | |
271 stream_->Back(); | |
272 } | |
273 token_ = '.'; | |
274 break; | |
275 } | |
276 token_ = kParseError; | |
277 return; | |
278 } | |
279 break; | |
280 } else { | |
281 token_ = ch; | |
282 break; | |
283 } | |
284 } | |
285 } | |
286 | |
287 void AsmJsLexer::Rewind() { | |
288 DCHECK(!rewind_); | |
289 next_token_ = token_; | |
290 token_ = last_token_; | |
291 last_token_ = 0; | |
292 rewind_ = true; | |
vogelheim
2017/03/14 13:36:37
This doesn't update name_. Is this intentional?
vogelheim
2017/03/14 13:36:37
This doesn't update preceeded_by_newline_. Is this
bradn
2017/03/15 07:53:02
Clearing it for good measure here (didn't want to
bradn
2017/03/15 07:53:03
Clobbering for good measure here, also commented a
| |
293 } | |
294 | |
295 void AsmJsLexer::ResetLocals() { local_names_.clear(); } | |
296 | |
297 const char* AsmJsLexer::Name(token_t token) const { | |
298 // TODO(bradnelson): Make thread safe (and maybe debug only). | |
299 if (token >= 32 && token < 127) { | |
300 static char chname[2]; | |
301 chname[0] = static_cast<char>(token); | |
vogelheim
2017/03/14 13:36:37
chname[1] = '\0' ??
bradn
2017/03/15 07:53:02
Done.
Whoops.
| |
302 return chname; | |
303 } | |
304 for (auto i = local_names_.begin(); i != local_names_.end(); ++i) { | |
vogelheim
2017/03/14 13:36:37
style nitpick: I'd use the for(auto& i : local_nam
bradn
2017/03/15 07:53:03
Done.
| |
305 if (i->second == token) { | |
306 return i->first.c_str(); | |
307 } | |
308 } | |
309 for (auto i = global_names_.begin(); i != global_names_.end(); ++i) { | |
310 if (i->second == token) { | |
311 return i->first.c_str(); | |
312 } | |
313 } | |
314 for (auto i = property_names_.begin(); i != property_names_.end(); ++i) { | |
315 if (i->second == token) { | |
316 return i->first.c_str(); | |
317 } | |
318 } | |
319 switch (token) { | |
320 #define V(rawname, name) \ | |
321 case kToken_##name: \ | |
322 return rawname; | |
323 LONG_SYMBOL_NAME_LIST(V) | |
324 #undef V | |
325 default: | |
326 break; | |
327 } | |
328 if (token == kUnsigned) { | |
329 return "{unsigned value}"; | |
330 } else if (token == kDouble) { | |
331 return "{double value}"; | |
332 } else if (token == kParseError) { | |
333 return "{parse error}"; | |
334 } else if (token == kEndOfInput) { | |
335 return "{end of input}"; | |
336 } | |
337 UNREACHABLE(); | |
338 return "{unreachable}"; | |
339 } | |
340 | |
341 int AsmJsLexer::position() const { return static_cast<int>(stream_->pos()); } | |
342 | |
343 void AsmJsLexer::Seek(int pos) { stream_->Seek(pos); } | |
344 | |
345 } // namespace internal | |
346 } // namespace v8 | |
OLD | NEW |