Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(233)

Side by Side Diff: src/asmjs/asm-lexer.cc

Issue 2751693002: [wasm][asm.js] Adding custom asm.js lexer. (Closed)
Patch Set: fix warning Created 3 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 // Copyright 2017 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "src/asmjs/asm-lexer.h"
6
7 #include <stdlib.h>
8
9 #include "src/objects-inl.h"
10 #include "src/parsing/scanner-character-streams.h"
11 #include "src/parsing/scanner.h"
12
13 namespace v8 {
14 namespace internal {
15
16 AsmJsLexer::AsmJsLexer(Isolate* isolate, Handle<Script> script, int start,
17 int end)
18 : script_(script),
19 source_(String::cast(script->source()), isolate),
20 stream_(ScannerStream::For(source_, start, end)),
21 token_(0),
22 last_token_(0),
23 next_token_(0),
24 rewind_(false),
25 local_(false),
26 global_count_(0),
27 double_value_(0.0),
28 unsigned_value_(0),
29 preceeded_by_newline_(false) {
30 #define V(name, _junk1, _junk2, _junk3) property_names_[#name] = kToken_##name;
31 STDLIB_MATH_FUNCTION_LIST(V)
32 STDLIB_ARRAY_TYPE_LIST(V)
33 #undef V
34 #define V(name) property_names_[#name] = kToken_##name;
35 STDLIB_MATH_VALUE_LIST(V)
36 STDLIB_OTHER_LIST(V)
37 #undef V
38 #define V(name) global_names_[#name] = kToken_##name;
39 KEYWORD_NAME_LIST(V)
40 #undef V
41 Next();
42 }
43
44 void AsmJsLexer::Next() {
45 if (rewind_) {
46 last_token_ = token_;
47 token_ = next_token_;
48 next_token_ = 0;
49 rewind_ = false;
50 return;
51 }
52
53 if (token_ == kEndOfInput || token_ == kParseError) {
54 return;
55 }
56
57 #if 0
vogelheim 2017/03/14 13:36:37 Please don't do this.
bradn 2017/03/15 07:53:03 Changed to a trace flag. This ends up being useful
58 // Uncomment for debug raw token stream.
59 if (Token() != 0) {
60 if (Token() == kDouble) {
61 fprintf(stderr, "%lf ", AsDouble());
62 } else if (Token() == kUnsigned) {
63 fprintf(stderr, "%lu ", AsUnsigned());
64 } else {
65 fprintf(stderr, "%s ", Name(Token()));
66 }
67 }
68 #endif
69
70 preceeded_by_newline_ = false;
71 last_token_ = token_;
72 for (;;) {
73 token_t ch = stream_->Advance();
vogelheim 2017/03/14 13:36:37 (Here & below.) Using token_t for individual chara
bradn 2017/03/15 07:53:02 Switched all of these inside to uc32.
74 if (ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r') {
Karl 2017/03/14 18:00:47 Would a switch statement be cleaner here?
bradn 2017/03/15 07:53:02 Done.
75 // Skip whitespace.
76 if (ch == '\n') {
77 preceeded_by_newline_ = true;
78 }
79 continue;
80 } else if (ch == kEndOfInput) {
marja 2017/03/14 11:11:47 (general comment) The structure is getting a bit c
bradn 2017/03/15 07:53:03 Decomposed into more functions, hope that helps.
81 token_ = kEndOfInput;
82 break;
marja 2017/03/14 11:11:47 Why break, why not return? (Now it's not trivial t
bradn 2017/03/15 07:53:03 Redone in more functions, avoids the break.
83 } else if (ch < 32 || ch >= 127) {
Karl 2017/03/14 18:00:47 If you use a switch statement, either explicitly e
bradn 2017/03/15 07:53:02 Done.
84 // Disallow non-ascii for now.
85 token_ = kParseError;
86 break;
87 } else if (ch == '\'' || ch == '\"') {
88 // Only string allowed is 'use asm' / "use asm".
89 const char* use_asm = "use asm";
Karl 2017/03/14 18:00:47 Should this be a constexpr?
bradn 2017/03/15 07:53:03 Changed round.
90 const char* pos = use_asm;
91 while (*pos) {
92 token_t och = stream_->Advance();
vogelheim 2017/03/14 13:36:37 och ? [here & below]
bradn 2017/03/15 07:53:02 Renamed and refactored.
93 if (och != *pos) {
94 token_ = kParseError;
95 return;
96 }
97 ++pos;
98 }
99 token_t och = stream_->Advance();
100 if (och != ch) {
marja 2017/03/14 11:11:47 Lost here... what's this? Ahh, it's checking the
bradn 2017/03/15 07:53:02 renamed variable to highlight that.
101 token_ = kParseError;
102 break;
103 }
104 token_ = kToken_UseAsm;
105 break;
106 } else if (ch == '/') {
107 ch = stream_->Advance();
108 if (ch == '/') {
109 for (;;) {
110 ch = stream_->Advance();
111 if (ch == '\n' || ch == kEndOfInput) {
112 break;
113 }
114 }
115 continue;
116 } else if (ch == '*') {
117 for (;;) {
118 ch = stream_->Advance();
119 if (ch == '*') {
120 ch = stream_->Advance();
121 if (ch == '/') {
vogelheim 2017/03/14 13:36:37 +1 to Marja's comments. Also, would this work on
bradn 2017/03/15 07:53:02 Yeah, this was wrong. Factor to function and fixed
122 if (ch == '*') {
marja 2017/03/14 11:11:47 if ch == '/' on the line above, it cannot be '*' h
bradn 2017/03/15 07:53:02 Oops, fixed.
123 stream_->Back();
124 }
125 break;
marja 2017/03/14 11:11:47 I'm lost here anyway, what's this block, what are
bradn 2017/03/15 07:53:02 This was meant to back up if you saw a * inside a
126 }
127 } else if (ch == kEndOfInput) {
128 break;
129 }
130 }
131 continue;
132 } else {
marja 2017/03/14 11:11:47 No idea here anymore which if this else associates
bradn 2017/03/15 07:53:03 Restructured, should be more clear now.
133 stream_->Back();
134 token_ = '/';
135 break;
136 }
137 } else if (ch == '<' || ch == '>' || ch == '=' || ch == '!') {
138 token_t och = stream_->Advance();
139 if (och == '=') {
140 if (ch == '<') {
141 token_ = kToken_LE;
142 break;
143 } else if (ch == '>') {
144 token_ = kToken_GE;
145 break;
146 } else if (ch == '=') {
147 token_ = kToken_EQ;
148 break;
149 } else if (ch == '!') {
150 token_ = kToken_NE;
151 break;
152 } else {
153 UNREACHABLE();
154 }
155 } else if (ch == '<' && och == '<') {
156 token_ = kToken_SHL;
157 break;
158 } else if (ch == '>' && och == '>') {
159 token_t ooch = stream_->Advance();
vogelheim 2017/03/14 13:36:37 ooch ?
bradn 2017/03/15 07:53:02 Hah, terrible name, sorry, dropped variable comple
160 if (ooch == '>') {
161 token_ = kToken_SHR;
162 } else {
163 token_ = kToken_SAR;
164 stream_->Back();
165 }
166 break;
167 } else {
168 stream_->Back();
169 token_ = ch;
170 break;
171 }
172 } else if ((ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z') ||
173 ch == '_' || ch == '$') {
vogelheim 2017/03/14 13:36:37 Could you introduce helper functions for the chara
bradn 2017/03/15 07:53:02 Done.
174 name_ = ch;
175 for (;;) {
176 ch = stream_->Advance();
177 if ((ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z') || ch == '_' ||
178 ch == '$' || (ch >= '0' && ch <= '9')) {
179 name_ += ch;
180 } else {
181 break;
marja 2017/03/14 11:11:47 Why not while(ch >= ...) { name_ += ch; ch =
bradn 2017/03/15 07:53:03 Done.
182 }
183 }
184 stream_->Back();
185 if (last_token_ == '.') {
186 auto i = property_names_.find(name_);
187 if (i != property_names_.end()) {
188 token_ = i->second;
189 break;
marja 2017/03/14 11:11:47 E.g,. here it would be less confusing to use retur
bradn 2017/03/15 07:53:03 Done.
190 }
191 } else {
192 {
193 auto i = local_names_.find(name_);
194 if (i != local_names_.end()) {
195 token_ = i->second;
196 break;
197 }
198 }
199 if (!local_) {
marja 2017/03/14 11:11:47 What's local_?
bradn 2017/03/15 07:53:03 Renamed.
200 auto i = global_names_.find(name_);
201 if (i != global_names_.end()) {
202 token_ = i->second;
203 break;
204 }
205 }
206 }
207 if (last_token_ == '.') {
208 // TODO(bradnelson): Assert no overflow.
209 token_ = kGlobalsStart + global_count_++;
210 property_names_[name_] = token_;
211 } else if (local_) {
212 // TODO(bradnelson): Assert no overflow.
213 token_ = kLocalsStart - local_names_.size();
214 local_names_[name_] = token_;
215 } else {
216 // TODO(bradnelson): Assert no overflow.
217 token_ = kGlobalsStart + global_count_++;
218 global_names_[name_] = token_;
219 }
220 break;
221 } else if (ch == '.' || (ch >= '0' && ch <= '9')) {
222 bool has_dot = ch == '.';
223 name_ = ch;
224 for (;;) {
225 ch = stream_->Advance();
226 if ((ch >= '0' && ch <= '9') || (ch >= 'a' && ch <= 'f') ||
marja 2017/03/14 11:11:47 Would it be feasible to have a helper function for
bradn 2017/03/15 07:53:03 I've added a TODO to do this. Might require some c
227 (ch >= 'A' && ch <= 'F') || ch == '.' || ch == 'x' ||
228 ((ch == '-' || ch == '+') && (name_[name_.size() - 1] == 'e' ||
229 name_[name_.size() - 1] == 'E'))) {
230 // TODO(bradnelson): Test weird cases ending in -.
231 if (ch == '.') {
232 has_dot = true;
233 }
234 name_ += ch;
235 } else {
236 break;
237 }
238 }
239 stream_->Back();
240 // Special case the most common number.
241 if (name_ == "0") {
242 unsigned_value_ = 0;
243 token_ = kUnsigned;
244 break;
245 }
246 // Pick out dot.
247 if (name_ == ".") {
248 token_ = '.';
249 break;
250 }
251 // Decode numbers.
252 char* end;
253 if (has_dot) {
254 double_value_ = strtod(name_.c_str(), &end);
vogelheim 2017/03/14 13:36:37 strtod may depend on the current locale. Are you r
bradn 2017/03/15 07:53:03 Yeah, it's a fair point these probably aren't idea
vogelheim 2017/03/15 12:07:40 Note that this is a correctness, not a performance
vogelheim 2017/03/15 12:10:11 My gut feeling is that parser & scanner are tightl
255 token_ = kDouble;
256 } else {
257 if (name_.size() > 2 && name_[0] == '0' && name_[1] == 'x') {
258 unsigned_value_ = strtoul(name_.c_str() + 2, &end, 16);
259 } else if (name_.size() > 1 && name_[0] == '0') {
260 unsigned_value_ = strtoul(name_.c_str() + 1, &end, 8);
261 } else {
262 double_value_ = strtod(name_.c_str(), &end);
263 unsigned_value_ = static_cast<uint32_t>(double_value_);
marja 2017/03/14 11:11:47 Why strtod if it's guaranteed to be an integer (no
bradn 2017/03/15 07:53:02 Asm.js uses 1e2 for 100 (as an integer :-) Added a
264 }
265 token_ = kUnsigned;
266 }
267 if (end != name_.c_str() + name_.size()) {
vogelheim 2017/03/14 13:36:37 I'm confused. When does this happen?
bradn 2017/03/15 07:53:03 When a number failed to parse, added a comment + e
268 // Handle mistaken parse of '.' as number.
marja 2017/03/14 11:11:47 How does this relate to the "Pick out dot" above?
bradn 2017/03/15 07:53:02 Reworded. The idea here is that if the number pars
269 if (name_[0] == '.') {
270 for (size_t k = 1; k < name_.size(); ++k) {
271 stream_->Back();
272 }
273 token_ = '.';
274 break;
275 }
276 token_ = kParseError;
277 return;
278 }
279 break;
280 } else {
281 token_ = ch;
282 break;
283 }
284 }
285 }
286
287 void AsmJsLexer::Rewind() {
288 DCHECK(!rewind_);
289 next_token_ = token_;
290 token_ = last_token_;
291 last_token_ = 0;
292 rewind_ = true;
vogelheim 2017/03/14 13:36:37 This doesn't update name_. Is this intentional?
vogelheim 2017/03/14 13:36:37 This doesn't update preceeded_by_newline_. Is this
bradn 2017/03/15 07:53:02 Clearing it for good measure here (didn't want to
bradn 2017/03/15 07:53:03 Clobbering for good measure here, also commented a
293 }
294
295 void AsmJsLexer::ResetLocals() { local_names_.clear(); }
296
297 const char* AsmJsLexer::Name(token_t token) const {
298 // TODO(bradnelson): Make thread safe (and maybe debug only).
299 if (token >= 32 && token < 127) {
300 static char chname[2];
301 chname[0] = static_cast<char>(token);
vogelheim 2017/03/14 13:36:37 chname[1] = '\0' ??
bradn 2017/03/15 07:53:02 Done. Whoops.
302 return chname;
303 }
304 for (auto i = local_names_.begin(); i != local_names_.end(); ++i) {
vogelheim 2017/03/14 13:36:37 style nitpick: I'd use the for(auto& i : local_nam
bradn 2017/03/15 07:53:03 Done.
305 if (i->second == token) {
306 return i->first.c_str();
307 }
308 }
309 for (auto i = global_names_.begin(); i != global_names_.end(); ++i) {
310 if (i->second == token) {
311 return i->first.c_str();
312 }
313 }
314 for (auto i = property_names_.begin(); i != property_names_.end(); ++i) {
315 if (i->second == token) {
316 return i->first.c_str();
317 }
318 }
319 switch (token) {
320 #define V(rawname, name) \
321 case kToken_##name: \
322 return rawname;
323 LONG_SYMBOL_NAME_LIST(V)
324 #undef V
325 default:
326 break;
327 }
328 if (token == kUnsigned) {
329 return "{unsigned value}";
330 } else if (token == kDouble) {
331 return "{double value}";
332 } else if (token == kParseError) {
333 return "{parse error}";
334 } else if (token == kEndOfInput) {
335 return "{end of input}";
336 }
337 UNREACHABLE();
338 return "{unreachable}";
339 }
340
341 int AsmJsLexer::position() const { return static_cast<int>(stream_->pos()); }
342
343 void AsmJsLexer::Seek(int pos) { stream_->Seek(pos); }
344
345 } // namespace internal
346 } // namespace v8
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698