Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1076)

Side by Side Diff: src/lexer/lexer-shell.cc

Issue 194693003: Experimental parser: add ability to compare lexer shell outputs (Closed) Base URL: https://v8.googlecode.com/svn/branches/experimental/parser
Patch Set: Created 6 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « no previous file | tools/lexer_generator/test/run_lexing_tests.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2013 the V8 project authors. All rights reserved. 1 // Copyright 2013 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 42 matching lines...) Expand 10 before | Expand all | Expand 10 after
53 UTF8, 53 UTF8,
54 UTF16, 54 UTF16,
55 UTF8TO16, // Convert stream via scanner input stream 55 UTF8TO16, // Convert stream via scanner input stream
56 UTF8TO16_PRECONVERT // Convert stream during file read 56 UTF8TO16_PRECONVERT // Convert stream during file read
57 }; 57 };
58 58
59 59
60 struct LexerShellSettings { 60 struct LexerShellSettings {
61 Encoding encoding; 61 Encoding encoding;
62 bool print_tokens; 62 bool print_tokens;
63 bool print_tokens_for_compare;
63 bool break_after_illegal; 64 bool break_after_illegal;
64 bool eos_test; 65 bool eos_test;
65 int repeat; 66 int repeat;
66 bool harmony_numeric_literals; 67 bool harmony_numeric_literals;
67 bool harmony_modules; 68 bool harmony_modules;
68 bool harmony_scoping; 69 bool harmony_scoping;
69 LexerShellSettings() 70 LexerShellSettings()
70 : encoding(LATIN1), 71 : encoding(LATIN1),
71 print_tokens(false), 72 print_tokens(false),
73 print_tokens_for_compare(false),
72 break_after_illegal(false), 74 break_after_illegal(false),
73 eos_test(false), 75 eos_test(false),
74 repeat(1), 76 repeat(1),
75 harmony_numeric_literals(false), 77 harmony_numeric_literals(false),
76 harmony_modules(false), 78 harmony_modules(false),
77 harmony_scoping(false) {} 79 harmony_scoping(false) {}
78 }; 80 };
79 81
80 82
81 static uint16_t* ConvertUtf8ToUtf16(const uint16_t* const data_in, 83 static uint16_t* ConvertUtf8ToUtf16(const uint16_t* const data_in,
(...skipping 96 matching lines...) Expand 10 before | Expand all | Expand 10 after
178 if (settings.repeat > 1) { 180 if (settings.repeat > 1) {
179 uint16_t* new_data = Repeat(settings.repeat, data, length); 181 uint16_t* new_data = Repeat(settings.repeat, data, length);
180 delete data; 182 delete data;
181 data = new_data; 183 data = new_data;
182 } 184 }
183 185
184 return data; 186 return data;
185 } 187 }
186 188
187 189
188 struct TokenWithLocation {
189 Token::Value value;
190 size_t beg;
191 size_t end;
192 std::vector<int> literal;
193 bool is_ascii;
194 // The location of the latest octal position when the token was seen.
195 int octal_beg;
196 int octal_end;
197 TokenWithLocation() :
198 value(Token::ILLEGAL), beg(0), end(0), is_ascii(false) { }
199 TokenWithLocation(Token::Value value, size_t beg, size_t end,
200 int octal_beg) :
201 value(value), beg(beg), end(end), is_ascii(false), octal_beg(octal_beg) {
202 }
203 bool operator==(const TokenWithLocation& other) {
204 return value == other.value && beg == other.beg && end == other.end &&
205 literal == other.literal && is_ascii == other.is_ascii &&
206 octal_beg == other.octal_beg;
207 }
208 bool operator!=(const TokenWithLocation& other) {
209 return !(*this == other);
210 }
211 void Print(const char* prefix) const {
212 printf("%s %11s at (%d, %d)",
213 prefix, Token::Name(value),
214 static_cast<int>(beg), static_cast<int>(end));
215 if (literal.size() > 0) {
216 for (size_t i = 0; i < literal.size(); i++) {
217 printf(is_ascii ? " %02x" : " %04x", literal[i]);
218 }
219 printf(" (is ascii: %d)", is_ascii);
220 }
221 printf(" (last octal start: %d)\n", octal_beg);
222 }
223 };
224
225
226 static bool HasLiteral(Token::Value token) { 190 static bool HasLiteral(Token::Value token) {
227 return token == Token::IDENTIFIER || 191 return token == Token::IDENTIFIER ||
228 token == Token::STRING || 192 token == Token::STRING ||
229 token == Token::NUMBER; 193 token == Token::NUMBER;
230 } 194 }
231 195
232 196
233 template<typename Char> 197 template<typename Char>
234 static std::vector<int> ToStdVector(const Vector<Char>& literal) { 198 static void Copy(const Vector<Char>& literal,
235 std::vector<int> result; 199 SmartArrayPointer<const uint16_t>* result,
200 int* literal_length) {
201 uint16_t* data = new uint16_t[literal.length()];
202 result->Reset(data);
236 for (int i = 0; i < literal.length(); i++) { 203 for (int i = 0; i < literal.length(); i++) {
237 result.push_back(literal[i]); 204 data[i] = literal[i];
238 } 205 }
239 return result; 206 *literal_length = literal.length();
240 } 207 }
241 208
242 209
243 template<typename Scanner> 210 class TokenWithLocation {
244 static TokenWithLocation GetTokenWithLocation( 211 public:
245 Scanner *scanner, Token::Value token) { 212 Token::Value value;
246 int beg = scanner->location().beg_pos; 213 int beg;
247 int end = scanner->location().end_pos; 214 int end;
248 TokenWithLocation result(token, beg, end, scanner->octal_position().beg_pos); 215 bool is_one_byte;
249 if (HasLiteral(token)) { 216 SmartArrayPointer<const uint16_t> literal;
250 result.is_ascii = scanner->is_literal_ascii(); 217 int literal_length;
251 if (scanner->is_literal_ascii()) { 218 // The location of the latest octal position when the token was seen.
252 result.literal = ToStdVector(scanner->literal_ascii_string()); 219 int octal_beg;
253 } else { 220 int octal_end;
254 result.literal = ToStdVector(scanner->literal_utf16_string()); 221 TokenWithLocation(Token::Value token, Scanner* scanner) : value(token) {
222 beg = scanner->location().beg_pos;
223 end = scanner->location().end_pos;
224 octal_beg = scanner->octal_position().beg_pos;
225 octal_end = scanner->octal_position().end_pos;
226 is_one_byte = false;
227 literal_length = 0;
228 if (HasLiteral(token)) {
229 is_one_byte = scanner->is_literal_ascii();
230 if (scanner->is_literal_ascii()) {
231 Copy(scanner->literal_ascii_string(), &literal, &literal_length);
232 } else {
233 Copy(scanner->literal_utf16_string(), &literal, &literal_length);
234 }
255 } 235 }
256 } 236 }
257 return result; 237 void Print(bool do_compare) const {
258 } 238 if (value == Token::ILLEGAL && do_compare) {
239 printf("%-15s (%d)\n", Token::Name(value), beg);
240 return;
241 }
242 printf("%-15s (%d, %d)", Token::Name(value), beg, end);
243 if (literal_length > 0) {
244 // TODO(dcarney): need some sort of checksum.
245 for (int i = 0; i < literal_length; i++) {
246 printf(is_one_byte ? " %02x" : " %04x", literal[i]);
247 }
248 printf(" (is_one_byte: %d)", is_one_byte);
249 }
250 if (octal_beg >= 0) {
251 printf(" (last octal start: %d)", octal_beg);
252 }
253 printf("\n");
254 }
255
256 private:
257 DISALLOW_COPY_AND_ASSIGN(TokenWithLocation);
258 };
259 259
260 260
261 static TimeDelta RunLexer(const uint16_t* source, 261 static TimeDelta RunLexer(const uint16_t* source,
262 const uint8_t* source_end, 262 const uint8_t* source_end,
263 Isolate* isolate, 263 Isolate* isolate,
264 std::vector<TokenWithLocation>* tokens,
265 const LexerShellSettings& settings) { 264 const LexerShellSettings& settings) {
266 SmartPointer<Utf16CharacterStream> stream; 265 SmartPointer<Utf16CharacterStream> stream;
267 const uint8_t* one_byte_source = reinterpret_cast<const uint8_t*>(source); 266 const uint8_t* one_byte_source = reinterpret_cast<const uint8_t*>(source);
268 int bytes = source_end - one_byte_source; 267 int bytes = source_end - one_byte_source;
269 switch (settings.encoding) { 268 switch (settings.encoding) {
270 case UTF8TO16: 269 case UTF8TO16:
271 case UTF8: 270 case UTF8:
272 stream.Reset(new Utf8ToUtf16CharacterStream(one_byte_source, bytes)); 271 stream.Reset(new Utf8ToUtf16CharacterStream(one_byte_source, bytes));
273 break; 272 break;
274 case UTF8TO16_PRECONVERT: 273 case UTF8TO16_PRECONVERT:
(...skipping 11 matching lines...) Expand all
286 stream.Reset( 285 stream.Reset(
287 new GenericStringUtf16CharacterStream(result, 0, result->length())); 286 new GenericStringUtf16CharacterStream(result, 0, result->length()));
288 break; 287 break;
289 } 288 }
290 } 289 }
291 Scanner scanner(isolate->unicode_cache()); 290 Scanner scanner(isolate->unicode_cache());
292 scanner.SetHarmonyNumericLiterals(settings.harmony_numeric_literals); 291 scanner.SetHarmonyNumericLiterals(settings.harmony_numeric_literals);
293 scanner.SetHarmonyModules(settings.harmony_modules); 292 scanner.SetHarmonyModules(settings.harmony_modules);
294 scanner.SetHarmonyScoping(settings.harmony_scoping); 293 scanner.SetHarmonyScoping(settings.harmony_scoping);
295 ElapsedTimer timer; 294 ElapsedTimer timer;
295 std::vector<TokenWithLocation*> tokens;
296 timer.Start(); 296 timer.Start();
297 scanner.Initialize(stream.get()); 297 scanner.Initialize(stream.get());
298 Token::Value token; 298 Token::Value token;
299 do { 299 do {
300 token = scanner.Next(); 300 token = scanner.Next();
301 if (settings.print_tokens) { 301 if (settings.print_tokens) {
302 tokens->push_back(GetTokenWithLocation(&scanner, token)); 302 tokens.push_back(new TokenWithLocation(token, &scanner));
303 } else if (HasLiteral(token)) { 303 } else if (HasLiteral(token)) {
304 if (scanner.is_literal_ascii()) { 304 if (scanner.is_literal_ascii()) {
305 scanner.literal_ascii_string(); 305 scanner.literal_ascii_string();
306 } else { 306 } else {
307 scanner.literal_utf16_string(); 307 scanner.literal_utf16_string();
308 } 308 }
309 } 309 }
310 if (token == Token::ILLEGAL && settings.break_after_illegal) break;
310 } while (token != Token::EOS); 311 } while (token != Token::EOS);
312 // Dump tokens.
313 if (settings.print_tokens) {
314 if (!settings.print_tokens_for_compare) {
315 printf("No of tokens:\t%d\n", static_cast<int>(tokens.size()));
316 }
317 for (size_t i = 0; i < tokens.size(); ++i) {
318 tokens[i]->Print(settings.print_tokens_for_compare);
319 }
320 }
321 for (size_t i = 0; i < tokens.size(); ++i) {
322 delete tokens[i];
323 }
311 return timer.Elapsed(); 324 return timer.Elapsed();
312 } 325 }
313 326
314 327
315 static TimeDelta ProcessFile( 328 static TimeDelta ProcessFile(
316 const char* fname, 329 const char* fname,
317 Isolate* isolate, 330 Isolate* isolate,
318 const LexerShellSettings& settings, 331 const LexerShellSettings& settings,
319 int truncate_by, 332 int truncate_by,
320 bool* can_truncate) { 333 bool* can_truncate) {
321 if (settings.print_tokens) { 334 if (settings.print_tokens && !settings.print_tokens_for_compare) {
322 printf("Processing file %s, truncating by %d bytes\n", fname, truncate_by); 335 printf("Processing file %s, truncating by %d bytes\n", fname, truncate_by);
323 } 336 }
324 HandleScope handle_scope(isolate); 337 HandleScope handle_scope(isolate);
325 std::vector<TokenWithLocation> tokens;
326 TimeDelta time; 338 TimeDelta time;
327 { 339 {
328 unsigned length_in_bytes; 340 unsigned length_in_bytes;
329 const uint16_t* buffer = ReadFile(fname, settings, &length_in_bytes); 341 const uint16_t* buffer = ReadFile(fname, settings, &length_in_bytes);
330 const uint8_t* char_data = reinterpret_cast<const uint8_t*>(buffer); 342 const uint8_t* char_data = reinterpret_cast<const uint8_t*>(buffer);
331 const uint8_t* buffer_end = &char_data[length_in_bytes]; 343 const uint8_t* buffer_end = &char_data[length_in_bytes];
332 if (truncate_by > buffer_end - char_data) { 344 if (truncate_by > buffer_end - char_data) {
333 *can_truncate = false; 345 *can_truncate = false;
334 } else { 346 } else {
335 buffer_end -= truncate_by; 347 buffer_end -= truncate_by;
336 time = RunLexer(buffer, buffer_end, isolate, &tokens, settings); 348 time = RunLexer(buffer, buffer_end, isolate, settings);
337 } 349 }
338 delete[] buffer; 350 delete[] buffer;
339 } 351 }
340 if (settings.print_tokens) { 352
341 printf("No of tokens:\t%d\n", static_cast<int>(tokens.size()));
342 for (size_t i = 0; i < tokens.size(); ++i) {
343 tokens[i].Print("=>");
344 if (tokens[i].value == Token::ILLEGAL) {
345 if (settings.break_after_illegal)
346 break;
347 }
348 }
349 }
350 return time; 353 return time;
351 } 354 }
352 355
353 356
354 int main(int argc, char* argv[]) { 357 int main(int argc, char* argv[]) {
355 v8::V8::InitializeICU(); 358 v8::V8::InitializeICU();
356 v8::V8::SetFlagsFromCommandLine(&argc, argv, true); 359 v8::V8::SetFlagsFromCommandLine(&argc, argv, true);
357 std::vector<std::string> fnames; 360 std::vector<std::string> fnames;
358 LexerShellSettings settings; 361 LexerShellSettings settings;
359 for (int i = 0; i < argc; ++i) { 362 for (int i = 0; i < argc; ++i) {
360 if (strcmp(argv[i], "--latin1") == 0) { 363 if (strcmp(argv[i], "--latin1") == 0) {
361 settings.encoding = LATIN1; 364 settings.encoding = LATIN1;
362 } else if (strcmp(argv[i], "--utf8") == 0) { 365 } else if (strcmp(argv[i], "--utf8") == 0) {
363 settings.encoding = UTF8; 366 settings.encoding = UTF8;
364 } else if (strcmp(argv[i], "--utf16") == 0) { 367 } else if (strcmp(argv[i], "--utf16") == 0) {
365 settings.encoding = UTF16; 368 settings.encoding = UTF16;
366 } else if (strcmp(argv[i], "--utf8to16") == 0) { 369 } else if (strcmp(argv[i], "--utf8to16") == 0) {
367 #ifdef V8_USE_GENERATED_LEXER 370 #ifdef V8_USE_GENERATED_LEXER
368 settings.encoding = UTF8TO16_PRECONVERT; 371 settings.encoding = UTF8TO16_PRECONVERT;
369 #else 372 #else
370 settings.encoding = UTF8TO16; 373 settings.encoding = UTF8TO16;
371 #endif 374 #endif
372 } else if (strcmp(argv[i], "--print-tokens") == 0) { 375 } else if (strcmp(argv[i], "--print-tokens") == 0) {
373 settings.print_tokens = true; 376 settings.print_tokens = true;
377 } else if (strcmp(argv[i], "--print-tokens-for-compare") == 0) {
378 settings.print_tokens = true;
379 settings.print_tokens_for_compare = true;
374 } else if (strcmp(argv[i], "--no-baseline") == 0) { 380 } else if (strcmp(argv[i], "--no-baseline") == 0) {
375 // Ignore. 381 // Ignore.
376 } else if (strcmp(argv[i], "--no-experimental") == 0) { 382 } else if (strcmp(argv[i], "--no-experimental") == 0) {
377 // Ignore. 383 // Ignore.
378 } else if (strcmp(argv[i], "--no-check") == 0) { 384 } else if (strcmp(argv[i], "--no-check") == 0) {
379 // Ignore. 385 // Ignore.
380 } else if (strcmp(argv[i], "--break-after-illegal") == 0) { 386 } else if (strcmp(argv[i], "--break-after-illegal") == 0) {
381 settings.break_after_illegal = true; 387 settings.break_after_illegal = true;
382 } else if (strcmp(argv[i], "--use-harmony") == 0) { 388 } else if (strcmp(argv[i], "--use-harmony") == 0) {
383 settings.harmony_numeric_literals = true; 389 settings.harmony_numeric_literals = true;
(...skipping 25 matching lines...) Expand all
409 do { 415 do {
410 TimeDelta t = ProcessFile(fnames[i].c_str(), 416 TimeDelta t = ProcessFile(fnames[i].c_str(),
411 internal_isolate, 417 internal_isolate,
412 settings, 418 settings,
413 truncate_by, 419 truncate_by,
414 &can_truncate); 420 &can_truncate);
415 total_time += t.InMillisecondsF(); 421 total_time += t.InMillisecondsF();
416 ++truncate_by; 422 ++truncate_by;
417 } while (can_truncate); 423 } while (can_truncate);
418 } 424 }
419 printf("RunTime: %.f ms\n", total_time); 425 if (!settings.print_tokens_for_compare) {
426 printf("RunTime: %.f ms\n", total_time);
427 }
420 } 428 }
421 v8::V8::Dispose(); 429 v8::V8::Dispose();
422 return 0; 430 return 0;
423 } 431 }
OLDNEW
« no previous file with comments | « no previous file | tools/lexer_generator/test/run_lexing_tests.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698