Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1513)

Side by Side Diff: src/lexer/lexer-shell.cc

Issue 203103005: Experimental parser: only read one file in lexer-shell (Closed) Base URL: https://v8.googlecode.com/svn/branches/experimental/parser
Patch Set: Created 6 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « no previous file | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2013 the V8 project authors. All rights reserved. 1 // Copyright 2013 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 62 matching lines...) Expand 10 before | Expand all | Expand 10 after
73 print_tokens_for_compare(false), 73 print_tokens_for_compare(false),
74 break_after_illegal(false), 74 break_after_illegal(false),
75 eos_test(false), 75 eos_test(false),
76 repeat(1), 76 repeat(1),
77 harmony_numeric_literals(false), 77 harmony_numeric_literals(false),
78 harmony_modules(false), 78 harmony_modules(false),
79 harmony_scoping(false) {} 79 harmony_scoping(false) {}
80 }; 80 };
81 81
82 82
83 struct FileData {
84 const char* file_name;
85 unsigned length_in_bytes;
86 Encoding encoding;
87 const uint16_t* data;
88 };
89
90
83 static uint16_t* ConvertUtf8ToUtf16(const uint16_t* const data_in, 91 static uint16_t* ConvertUtf8ToUtf16(const uint16_t* const data_in,
84 unsigned* length, 92 unsigned* length_in_bytes,
85 bool* is_one_byte) { 93 bool* is_one_byte) {
86 const unsigned file_size = *length; 94 const unsigned file_size = *length_in_bytes;
87 const uint8_t* char_data = reinterpret_cast<const uint8_t*>(data_in); 95 const uint8_t* char_data = reinterpret_cast<const uint8_t*>(data_in);
88 const uint32_t kMaxUtf16Character = 0xffff; 96 const uint32_t kMaxUtf16Character = 0xffff;
89 // Get utf8 length. 97 // Get utf8 length.
90 unsigned utf16_chars = 0; 98 unsigned utf16_chars = 0;
91 *is_one_byte = true; 99 *is_one_byte = true;
92 { 100 {
93 unsigned position = 0; 101 unsigned position = 0;
94 while (position < file_size) { 102 while (position < file_size) {
95 uint32_t c = char_data[position]; 103 uint32_t c = char_data[position];
96 if (c <= unibrow::Utf8::kMaxOneByteChar) { 104 if (c <= unibrow::Utf8::kMaxOneByteChar) {
(...skipping 24 matching lines...) Expand all
121 file_size - position, 129 file_size - position,
122 &position); 130 &position);
123 } 131 }
124 if (c > kMaxUtf16Character) { 132 if (c > kMaxUtf16Character) {
125 data[i++] = unibrow::Utf16::LeadSurrogate(c); 133 data[i++] = unibrow::Utf16::LeadSurrogate(c);
126 data[i++] = unibrow::Utf16::TrailSurrogate(c); 134 data[i++] = unibrow::Utf16::TrailSurrogate(c);
127 } else { 135 } else {
128 data[i++] = static_cast<uc16>(c); 136 data[i++] = static_cast<uc16>(c);
129 } 137 }
130 } 138 }
131 *length = 2 * utf16_chars; 139 *length_in_bytes = 2 * utf16_chars;
132 return data; 140 return data;
133 } 141 }
134 142
135 143
136 static uint16_t* ConvertUtf16ToLatin1(const uint16_t* const data_in, 144 static uint16_t* ConvertUtf16ToLatin1(const uint16_t* const data_in,
137 unsigned* length) { 145 unsigned* length_in_bytes) {
138 const unsigned size = *length / 2 + *length % 2; 146 const unsigned size = *length_in_bytes / 2 + *length_in_bytes % 2;
139 uint16_t* data = new uint16_t[size]; 147 uint16_t* data = new uint16_t[size];
140 uint8_t* char_data = reinterpret_cast<uint8_t*>(data); 148 uint8_t* char_data = reinterpret_cast<uint8_t*>(data);
141 CopyChars(char_data, data_in, size); 149 CopyChars(char_data, data_in, size);
142 *length = size; 150 *length_in_bytes = size;
143 return data; 151 return data;
144 } 152 }
145 153
146 154
147 static uint16_t* Repeat(int repeat, 155 static uint16_t* Repeat(int repeat,
148 const uint16_t* const data_in, 156 const uint16_t* const data_in,
149 unsigned* length) { 157 unsigned* length_in_bytes) {
150 const unsigned file_size = *length; 158 const unsigned file_size = *length_in_bytes;
151 unsigned size = file_size * repeat; 159 unsigned size = file_size * repeat;
152 uint16_t* data = new uint16_t[size / 2 + size % 2]; 160 uint16_t* data = new uint16_t[size / 2 + size % 2];
153 uint8_t* char_data = reinterpret_cast<uint8_t*>(data); 161 uint8_t* char_data = reinterpret_cast<uint8_t*>(data);
154 for (int i = 0; i < repeat; i++) { 162 for (int i = 0; i < repeat; i++) {
155 memcpy(&char_data[i * file_size], data_in, file_size); 163 memcpy(&char_data[i * file_size], data_in, file_size);
156 } 164 }
157 *length = size; 165 *length_in_bytes = size;
158 return data; 166 return data;
159 } 167 }
160 168
161 169
162 static uint16_t* ReadFile(const char* name, unsigned* length) { 170 static uint16_t* ReadFile(const char* name, unsigned* length_in_bytes) {
163 FILE* file = fopen(name, "rb"); 171 FILE* file = fopen(name, "rb");
164 CHECK(file != NULL); 172 CHECK(file != NULL);
165 // Get file size. 173 // Get file size.
166 fseek(file, 0, SEEK_END); 174 fseek(file, 0, SEEK_END);
167 unsigned file_size = ftell(file); 175 unsigned file_size = ftell(file);
168 rewind(file); 176 rewind(file);
169 // Read file contents. 177 // Read file contents.
170 uint16_t* data = new uint16_t[file_size / 2 + file_size % 2]; 178 uint16_t* data = new uint16_t[file_size / 2 + file_size % 2];
171 uint8_t* char_data = reinterpret_cast<uint8_t*>(data); 179 uint8_t* char_data = reinterpret_cast<uint8_t*>(data);
172 for (unsigned i = 0; i < file_size;) { 180 for (unsigned i = 0; i < file_size;) {
173 i += fread(&char_data[i], 1, file_size - i, file); 181 i += fread(&char_data[i], 1, file_size - i, file);
174 } 182 }
175 fclose(file); 183 fclose(file);
176 *length = file_size; 184 *length_in_bytes = file_size;
177 return data; 185 return data;
178 } 186 }
179 187
180 188
181 static uint16_t* ReadFile(const char* name, 189 static FileData ReadFile(const char* file_name,
182 const LexerShellSettings& settings, 190 const LexerShellSettings& settings) {
183 unsigned* length, 191 unsigned length_in_bytes;
184 Encoding* output_encoding) { 192 uint16_t* data = ReadFile(file_name, &length_in_bytes);
185 uint16_t* data = ReadFile(name, length); 193 CHECK_GE(length_in_bytes, 0);
186 CHECK_GE(*length, 0);
187 if (*length == 0) return data;
188 194
189 *output_encoding = settings.encoding; 195 Encoding encoding = settings.encoding;
190 196 if (encoding == UTF8TO16 || encoding == UTF8TOLATIN1) {
191 if (settings.encoding == UTF8TO16 ||
192 settings.encoding == UTF8TOLATIN1) {
193 bool is_one_byte; 197 bool is_one_byte;
194 uint16_t* new_data = ConvertUtf8ToUtf16(data, length, &is_one_byte); 198 uint16_t* new_data = ConvertUtf8ToUtf16(
195 if (settings.encoding == UTF8TOLATIN1 && is_one_byte) { 199 data, &length_in_bytes, &is_one_byte);
196 *output_encoding = LATIN1; 200 if (encoding == UTF8TOLATIN1 && is_one_byte) {
201 encoding = LATIN1;
197 } else { 202 } else {
198 *output_encoding = UTF16; 203 encoding = UTF16;
199 } 204 }
200 delete data; 205 delete data;
201 data = new_data; 206 data = new_data;
202 } 207 }
203 208
204 if (settings.encoding == UTF8TOLATIN1 && *output_encoding == LATIN1) { 209 if (settings.encoding == UTF8TOLATIN1 && encoding == LATIN1) {
205 uint16_t* new_data = ConvertUtf16ToLatin1(data, length); 210 uint16_t* new_data = ConvertUtf16ToLatin1(data, &length_in_bytes);
206 delete data; 211 delete data;
207 data = new_data; 212 data = new_data;
208 } 213 }
209 214
210 if (settings.repeat > 1) { 215 if (settings.repeat > 1) {
211 uint16_t* new_data = Repeat(settings.repeat, data, length); 216 uint16_t* new_data = Repeat(settings.repeat, data, &length_in_bytes);
212 delete data; 217 delete data;
213 data = new_data; 218 data = new_data;
214 } 219 }
215 220
216 return data; 221 FileData file_data;
222 file_data.file_name = file_name;
223 file_data.data = data;
224 file_data.length_in_bytes = length_in_bytes;
225 file_data.encoding = encoding;
226
227 return file_data;
217 } 228 }
218 229
219 230
220 static bool HasLiteral(Token::Value token) { 231 static bool HasLiteral(Token::Value token) {
221 return token == Token::IDENTIFIER || 232 return token == Token::IDENTIFIER ||
222 token == Token::STRING || 233 token == Token::STRING ||
223 token == Token::NUMBER; 234 token == Token::NUMBER;
224 } 235 }
225 236
226 237
(...skipping 102 matching lines...) Expand 10 before | Expand all | Expand 10 after
329 token = scanner.Next(); 340 token = scanner.Next();
330 Handle<String> literal; 341 Handle<String> literal;
331 if (HasLiteral(token)) { 342 if (HasLiteral(token)) {
332 literal = scanner.AllocateInternalizedString(isolate); 343 literal = scanner.AllocateInternalizedString(isolate);
333 } 344 }
334 if (settings.print_tokens) { 345 if (settings.print_tokens) {
335 tokens.push_back(new TokenWithLocation(token, &scanner, literal)); 346 tokens.push_back(new TokenWithLocation(token, &scanner, literal));
336 } 347 }
337 if (token == Token::ILLEGAL && settings.break_after_illegal) break; 348 if (token == Token::ILLEGAL && settings.break_after_illegal) break;
338 } while (token != Token::EOS); 349 } while (token != Token::EOS);
350 TimeDelta elapsed = timer.Elapsed();
339 // Dump tokens. 351 // Dump tokens.
340 if (settings.print_tokens) { 352 if (settings.print_tokens) {
341 if (!settings.print_tokens_for_compare) { 353 if (!settings.print_tokens_for_compare) {
342 printf("No of tokens:\t%d\n", static_cast<int>(tokens.size())); 354 printf("No of tokens:\t%d\n", static_cast<int>(tokens.size()));
343 } 355 }
344 for (size_t i = 0; i < tokens.size(); ++i) { 356 for (size_t i = 0; i < tokens.size(); ++i) {
345 tokens[i]->Print(settings.print_tokens_for_compare); 357 tokens[i]->Print(settings.print_tokens_for_compare);
346 } 358 }
347 } 359 }
348 for (size_t i = 0; i < tokens.size(); ++i) { 360 for (size_t i = 0; i < tokens.size(); ++i) {
349 delete tokens[i]; 361 delete tokens[i];
350 } 362 }
351 return timer.Elapsed(); 363 return elapsed;
352 } 364 }
353 365
354 366
355 static TimeDelta ProcessFile( 367 static TimeDelta ProcessFile(
356 const char* fname,
357 Isolate* isolate, 368 Isolate* isolate,
358 const LexerShellSettings& settings, 369 const LexerShellSettings& settings,
370 const FileData& file_data,
359 int truncate_by, 371 int truncate_by,
360 bool* can_truncate) { 372 bool* can_truncate) {
361 if (settings.print_tokens && !settings.print_tokens_for_compare) { 373 if (settings.print_tokens && !settings.print_tokens_for_compare) {
362 printf("Processing file %s, truncating by %d bytes\n", fname, truncate_by); 374 printf("Processing file %s, truncating by %d bytes\n",
375 file_data.file_name, truncate_by);
363 } 376 }
364 HandleScope handle_scope(isolate); 377 HandleScope handle_scope(isolate);
378 const uint16_t* buffer = file_data.data;
379 const uint8_t* char_data = reinterpret_cast<const uint8_t*>(buffer);
380 const uint8_t* buffer_end = &char_data[file_data.length_in_bytes];
365 TimeDelta time; 381 TimeDelta time;
366 { 382 if (truncate_by > buffer_end - char_data) {
367 unsigned length_in_bytes; 383 *can_truncate = false;
368 Encoding output_encoding; 384 } else {
369 const uint16_t* buffer = 385 buffer_end -= truncate_by;
370 ReadFile(fname, settings, &length_in_bytes, &output_encoding); 386 time = RunLexer(buffer, buffer_end, isolate, file_data.encoding, settings);
371 const uint8_t* char_data = reinterpret_cast<const uint8_t*>(buffer);
372 const uint8_t* buffer_end = &char_data[length_in_bytes];
373 if (truncate_by > buffer_end - char_data) {
374 *can_truncate = false;
375 } else {
376 buffer_end -= truncate_by;
377 time = RunLexer(buffer, buffer_end, isolate, output_encoding, settings);
378 }
379 delete[] buffer;
380 } 387 }
381
382 return time; 388 return time;
383 } 389 }
384 390
385 391
386 int main(int argc, char* argv[]) { 392 int main(int argc, char* argv[]) {
387 v8::V8::InitializeICU(); 393 v8::V8::InitializeICU();
388 v8::V8::SetFlagsFromCommandLine(&argc, argv, true); 394 v8::V8::SetFlagsFromCommandLine(&argc, argv, true);
389 std::vector<std::string> fnames; 395 std::string file_name;
390 LexerShellSettings settings; 396 LexerShellSettings settings;
391 for (int i = 0; i < argc; ++i) { 397 for (int i = 0; i < argc; ++i) {
392 if (strcmp(argv[i], "--latin1") == 0) { 398 if (strcmp(argv[i], "--latin1") == 0) {
393 settings.encoding = LATIN1; 399 settings.encoding = LATIN1;
394 } else if (strcmp(argv[i], "--utf8") == 0) { 400 } else if (strcmp(argv[i], "--utf8") == 0) {
395 settings.encoding = UTF8; 401 settings.encoding = UTF8;
396 } else if (strcmp(argv[i], "--utf16") == 0) { 402 } else if (strcmp(argv[i], "--utf16") == 0) {
397 settings.encoding = UTF16; 403 settings.encoding = UTF16;
398 } else if (strcmp(argv[i], "--utf8to16") == 0) { 404 } else if (strcmp(argv[i], "--utf8to16") == 0) {
399 #ifdef V8_USE_GENERATED_LEXER 405 #ifdef V8_USE_GENERATED_LEXER
(...skipping 25 matching lines...) Expand all
425 settings.harmony_modules = true; 431 settings.harmony_modules = true;
426 settings.harmony_scoping = true; 432 settings.harmony_scoping = true;
427 } else if (strncmp(argv[i], "--benchmark=", 12) == 0) { 433 } else if (strncmp(argv[i], "--benchmark=", 12) == 0) {
428 // Ignore. 434 // Ignore.
429 } else if (strncmp(argv[i], "--repeat=", 9) == 0) { 435 } else if (strncmp(argv[i], "--repeat=", 9) == 0) {
430 std::string repeat_str = std::string(argv[i]).substr(9); 436 std::string repeat_str = std::string(argv[i]).substr(9);
431 settings.repeat = atoi(repeat_str.c_str()); 437 settings.repeat = atoi(repeat_str.c_str());
432 } else if (strcmp(argv[i], "--eos-test") == 0) { 438 } else if (strcmp(argv[i], "--eos-test") == 0) {
433 settings.eos_test = true; 439 settings.eos_test = true;
434 } else if (i > 0 && argv[i][0] != '-') { 440 } else if (i > 0 && argv[i][0] != '-') {
435 fnames.push_back(std::string(argv[i])); 441 file_name = std::string(argv[i]);
436 } 442 }
437 } 443 }
444 CHECK_NE(0, file_name.size());
445 FileData file_data = ReadFile(file_name.c_str(), settings);
438 { 446 {
439 v8::Isolate* isolate = v8::Isolate::GetCurrent(); 447 v8::Isolate* isolate = v8::Isolate::GetCurrent();
440 v8::HandleScope handle_scope(isolate); 448 v8::HandleScope handle_scope(isolate);
441 v8::Local<v8::Context> context = v8::Context::New(isolate); 449 v8::Local<v8::Context> context = v8::Context::New(isolate);
442 CHECK(!context.IsEmpty()); 450 CHECK(!context.IsEmpty());
443 v8::Context::Scope scope(context); 451 v8::Context::Scope scope(context);
444 Isolate* internal_isolate = Isolate::Current(); 452 Isolate* internal_isolate = Isolate::Current();
445 double total_time = 0; 453 double total_time = 0;
446 for (size_t i = 0; i < fnames.size(); i++) { 454 bool can_truncate = settings.eos_test;
447 std::pair<TimeDelta, TimeDelta> times; 455 int truncate_by = 0;
448 bool can_truncate = settings.eos_test; 456 do {
449 int truncate_by = 0; 457 TimeDelta t = ProcessFile(internal_isolate,
450 do { 458 settings,
451 TimeDelta t = ProcessFile(fnames[i].c_str(), 459 file_data,
452 internal_isolate, 460 truncate_by,
453 settings, 461 &can_truncate);
454 truncate_by, 462 total_time += t.InMillisecondsF();
455 &can_truncate); 463 ++truncate_by;
456 total_time += t.InMillisecondsF(); 464 } while (can_truncate);
457 ++truncate_by;
458 } while (can_truncate);
459 }
460 if (!settings.print_tokens_for_compare) { 465 if (!settings.print_tokens_for_compare) {
461 printf("RunTime: %.f ms\n", total_time); 466 printf("RunTime: %.f ms\n", total_time);
462 } 467 }
463 } 468 }
469 delete[] file_data.data;
464 v8::V8::Dispose(); 470 v8::V8::Dispose();
465 return 0; 471 return 0;
466 } 472 }
OLDNEW
« no previous file with comments | « no previous file | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698