src/lexer/lexer-shell.cc - Issue 203103005: Experimental parser: only read one file in lexer-shell

Side by Side Diff: src/lexer/lexer-shell.cc

Issue 203103005: Experimental parser: only read one file in lexer-shell (Closed) Base URL: https://v8.googlecode.com/svn/branches/experimental/parser

Patch Set: Created 6 years, 9 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 // Copyright 2013 the V8 project authors. All rights reserved.	1 // Copyright 2013 the V8 project authors. All rights reserved.

2 // Redistribution and use in source and binary forms, with or without	2 // Redistribution and use in source and binary forms, with or without

3 // modification, are permitted provided that the following conditions are	3 // modification, are permitted provided that the following conditions are

4 // met:	4 // met:

5 //	5 //

6 // * Redistributions of source code must retain the above copyright	6 // * Redistributions of source code must retain the above copyright

7 // notice, this list of conditions and the following disclaimer.	7 // notice, this list of conditions and the following disclaimer.

8 // * Redistributions in binary form must reproduce the above	8 // * Redistributions in binary form must reproduce the above

9 // copyright notice, this list of conditions and the following	9 // copyright notice, this list of conditions and the following

10 // disclaimer in the documentation and/or other materials provided	10 // disclaimer in the documentation and/or other materials provided

(...skipping 62 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
73 print_tokens_for_compare(false),	73 print_tokens_for_compare(false),

74 break_after_illegal(false),	74 break_after_illegal(false),

75 eos_test(false),	75 eos_test(false),

76 repeat(1),	76 repeat(1),

77 harmony_numeric_literals(false),	77 harmony_numeric_literals(false),

78 harmony_modules(false),	78 harmony_modules(false),

79 harmony_scoping(false) {}	79 harmony_scoping(false) {}

80 };	80 };

81	81

82	82

	83 struct FileData {

	84 const char* file_name;

	85 unsigned length_in_bytes;

	86 Encoding encoding;

	87 const uint16_t* data;

	88 };

	89

	90

83 static uint16_t* ConvertUtf8ToUtf16(const uint16_t* const data_in,	91 static uint16_t* ConvertUtf8ToUtf16(const uint16_t* const data_in,

84 unsigned* length,	92 unsigned* length_in_bytes,

85 bool* is_one_byte) {	93 bool* is_one_byte) {

86 const unsigned file_size = *length;	94 const unsigned file_size = *length_in_bytes;

87 const uint8_t* char_data = reinterpret_cast<const uint8_t*>(data_in);	95 const uint8_t* char_data = reinterpret_cast<const uint8_t*>(data_in);

88 const uint32_t kMaxUtf16Character = 0xffff;	96 const uint32_t kMaxUtf16Character = 0xffff;

89 // Get utf8 length.	97 // Get utf8 length.

90 unsigned utf16_chars = 0;	98 unsigned utf16_chars = 0;

91 *is_one_byte = true;	99 *is_one_byte = true;

92 {	100 {

93 unsigned position = 0;	101 unsigned position = 0;

94 while (position < file_size) {	102 while (position < file_size) {

95 uint32_t c = char_data[position];	103 uint32_t c = char_data[position];

96 if (c <= unibrow::Utf8::kMaxOneByteChar) {	104 if (c <= unibrow::Utf8::kMaxOneByteChar) {

(...skipping 24 matching lines...) Expand all Loading...
121 file_size - position,	129 file_size - position,

122 &position);	130 &position);

123 }	131 }

124 if (c > kMaxUtf16Character) {	132 if (c > kMaxUtf16Character) {

125 data[i++] = unibrow::Utf16::LeadSurrogate(c);	133 data[i++] = unibrow::Utf16::LeadSurrogate(c);

126 data[i++] = unibrow::Utf16::TrailSurrogate(c);	134 data[i++] = unibrow::Utf16::TrailSurrogate(c);

127 } else {	135 } else {

128 data[i++] = static_cast<uc16>(c);	136 data[i++] = static_cast<uc16>(c);

129 }	137 }

130 }	138 }

131 length = 2 utf16_chars;	139 length_in_bytes = 2 utf16_chars;

132 return data;	140 return data;

133 }	141 }

134	142

135	143

136 static uint16_t* ConvertUtf16ToLatin1(const uint16_t* const data_in,	144 static uint16_t* ConvertUtf16ToLatin1(const uint16_t* const data_in,

137 unsigned* length) {	145 unsigned* length_in_bytes) {

138 const unsigned size = length / 2 + length % 2;	146 const unsigned size = length_in_bytes / 2 + length_in_bytes % 2;

139 uint16_t* data = new uint16_t[size];	147 uint16_t* data = new uint16_t[size];

140 uint8_t* char_data = reinterpret_cast<uint8_t*>(data);	148 uint8_t* char_data = reinterpret_cast<uint8_t*>(data);

141 CopyChars(char_data, data_in, size);	149 CopyChars(char_data, data_in, size);

142 *length = size;	150 *length_in_bytes = size;

143 return data;	151 return data;

144 }	152 }

145	153

146	154

147 static uint16_t* Repeat(int repeat,	155 static uint16_t* Repeat(int repeat,

148 const uint16_t* const data_in,	156 const uint16_t* const data_in,

149 unsigned* length) {	157 unsigned* length_in_bytes) {

150 const unsigned file_size = *length;	158 const unsigned file_size = *length_in_bytes;

151 unsigned size = file_size * repeat;	159 unsigned size = file_size * repeat;

152 uint16_t* data = new uint16_t[size / 2 + size % 2];	160 uint16_t* data = new uint16_t[size / 2 + size % 2];

153 uint8_t* char_data = reinterpret_cast<uint8_t*>(data);	161 uint8_t* char_data = reinterpret_cast<uint8_t*>(data);

154 for (int i = 0; i < repeat; i++) {	162 for (int i = 0; i < repeat; i++) {

155 memcpy(&char_data[i * file_size], data_in, file_size);	163 memcpy(&char_data[i * file_size], data_in, file_size);

156 }	164 }

157 *length = size;	165 *length_in_bytes = size;

158 return data;	166 return data;

159 }	167 }

160	168

161	169

162 static uint16_t* ReadFile(const char* name, unsigned* length) {	170 static uint16_t* ReadFile(const char* name, unsigned* length_in_bytes) {

163 FILE* file = fopen(name, "rb");	171 FILE* file = fopen(name, "rb");

164 CHECK(file != NULL);	172 CHECK(file != NULL);

165 // Get file size.	173 // Get file size.

166 fseek(file, 0, SEEK_END);	174 fseek(file, 0, SEEK_END);

167 unsigned file_size = ftell(file);	175 unsigned file_size = ftell(file);

168 rewind(file);	176 rewind(file);

169 // Read file contents.	177 // Read file contents.

170 uint16_t* data = new uint16_t[file_size / 2 + file_size % 2];	178 uint16_t* data = new uint16_t[file_size / 2 + file_size % 2];

171 uint8_t* char_data = reinterpret_cast<uint8_t*>(data);	179 uint8_t* char_data = reinterpret_cast<uint8_t*>(data);

172 for (unsigned i = 0; i < file_size;) {	180 for (unsigned i = 0; i < file_size;) {

173 i += fread(&char_data[i], 1, file_size - i, file);	181 i += fread(&char_data[i], 1, file_size - i, file);

174 }	182 }

175 fclose(file);	183 fclose(file);

176 *length = file_size;	184 *length_in_bytes = file_size;

177 return data;	185 return data;

178 }	186 }

179	187

180	188

181 static uint16_t* ReadFile(const char* name,	189 static FileData ReadFile(const char* file_name,

182 const LexerShellSettings& settings,	190 const LexerShellSettings& settings) {

183 unsigned* length,	191 unsigned length_in_bytes;

184 Encoding* output_encoding) {	192 uint16_t* data = ReadFile(file_name, &length_in_bytes);

185 uint16_t* data = ReadFile(name, length);	193 CHECK_GE(length_in_bytes, 0);

186 CHECK_GE(*length, 0);

187 if (*length == 0) return data;

188	194

189 *output_encoding = settings.encoding;	195 Encoding encoding = settings.encoding;

190	196 if (encoding == UTF8TO16 \|\| encoding == UTF8TOLATIN1) {

191 if (settings.encoding == UTF8TO16 \|\|

192 settings.encoding == UTF8TOLATIN1) {

193 bool is_one_byte;	197 bool is_one_byte;

194 uint16_t* new_data = ConvertUtf8ToUtf16(data, length, &is_one_byte);	198 uint16_t* new_data = ConvertUtf8ToUtf16(

195 if (settings.encoding == UTF8TOLATIN1 && is_one_byte) {	199 data, &length_in_bytes, &is_one_byte);

196 *output_encoding = LATIN1;	200 if (encoding == UTF8TOLATIN1 && is_one_byte) {

	201 encoding = LATIN1;

197 } else {	202 } else {

198 *output_encoding = UTF16;	203 encoding = UTF16;

199 }	204 }

200 delete data;	205 delete data;

201 data = new_data;	206 data = new_data;

202 }	207 }

203	208

204 if (settings.encoding == UTF8TOLATIN1 && *output_encoding == LATIN1) {	209 if (settings.encoding == UTF8TOLATIN1 && encoding == LATIN1) {

205 uint16_t* new_data = ConvertUtf16ToLatin1(data, length);	210 uint16_t* new_data = ConvertUtf16ToLatin1(data, &length_in_bytes);

206 delete data;	211 delete data;

207 data = new_data;	212 data = new_data;

208 }	213 }

209	214

210 if (settings.repeat > 1) {	215 if (settings.repeat > 1) {

211 uint16_t* new_data = Repeat(settings.repeat, data, length);	216 uint16_t* new_data = Repeat(settings.repeat, data, &length_in_bytes);

212 delete data;	217 delete data;

213 data = new_data;	218 data = new_data;

214 }	219 }

215	220

216 return data;	221 FileData file_data;

	222 file_data.file_name = file_name;

	223 file_data.data = data;

	224 file_data.length_in_bytes = length_in_bytes;

	225 file_data.encoding = encoding;

	226

	227 return file_data;

217 }	228 }

218	229

219	230

220 static bool HasLiteral(Token::Value token) {	231 static bool HasLiteral(Token::Value token) {

221 return token == Token::IDENTIFIER \|\|	232 return token == Token::IDENTIFIER \|\|

222 token == Token::STRING \|\|	233 token == Token::STRING \|\|

223 token == Token::NUMBER;	234 token == Token::NUMBER;

224 }	235 }

225	236

226	237

(...skipping 102 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
329 token = scanner.Next();	340 token = scanner.Next();

330 Handle<String> literal;	341 Handle<String> literal;

331 if (HasLiteral(token)) {	342 if (HasLiteral(token)) {

332 literal = scanner.AllocateInternalizedString(isolate);	343 literal = scanner.AllocateInternalizedString(isolate);

333 }	344 }

334 if (settings.print_tokens) {	345 if (settings.print_tokens) {

335 tokens.push_back(new TokenWithLocation(token, &scanner, literal));	346 tokens.push_back(new TokenWithLocation(token, &scanner, literal));

336 }	347 }

337 if (token == Token::ILLEGAL && settings.break_after_illegal) break;	348 if (token == Token::ILLEGAL && settings.break_after_illegal) break;

338 } while (token != Token::EOS);	349 } while (token != Token::EOS);

	350 TimeDelta elapsed = timer.Elapsed();

339 // Dump tokens.	351 // Dump tokens.

340 if (settings.print_tokens) {	352 if (settings.print_tokens) {

341 if (!settings.print_tokens_for_compare) {	353 if (!settings.print_tokens_for_compare) {

342 printf("No of tokens:\t%d\n", static_cast<int>(tokens.size()));	354 printf("No of tokens:\t%d\n", static_cast<int>(tokens.size()));

343 }	355 }

344 for (size_t i = 0; i < tokens.size(); ++i) {	356 for (size_t i = 0; i < tokens.size(); ++i) {

345 tokens[i]->Print(settings.print_tokens_for_compare);	357 tokens[i]->Print(settings.print_tokens_for_compare);

346 }	358 }

347 }	359 }

348 for (size_t i = 0; i < tokens.size(); ++i) {	360 for (size_t i = 0; i < tokens.size(); ++i) {

349 delete tokens[i];	361 delete tokens[i];

350 }	362 }

351 return timer.Elapsed();	363 return elapsed;

352 }	364 }

353	365

354	366

355 static TimeDelta ProcessFile(	367 static TimeDelta ProcessFile(

356 const char* fname,

357 Isolate* isolate,	368 Isolate* isolate,

358 const LexerShellSettings& settings,	369 const LexerShellSettings& settings,

	370 const FileData& file_data,

359 int truncate_by,	371 int truncate_by,

360 bool* can_truncate) {	372 bool* can_truncate) {

361 if (settings.print_tokens && !settings.print_tokens_for_compare) {	373 if (settings.print_tokens && !settings.print_tokens_for_compare) {

362 printf("Processing file %s, truncating by %d bytes\n", fname, truncate_by);	374 printf("Processing file %s, truncating by %d bytes\n",

	375 file_data.file_name, truncate_by);

363 }	376 }

364 HandleScope handle_scope(isolate);	377 HandleScope handle_scope(isolate);

	378 const uint16_t* buffer = file_data.data;

	379 const uint8_t* char_data = reinterpret_cast<const uint8_t*>(buffer);

	380 const uint8_t* buffer_end = &char_data[file_data.length_in_bytes];

365 TimeDelta time;	381 TimeDelta time;

366 {	382 if (truncate_by > buffer_end - char_data) {

367 unsigned length_in_bytes;	383 *can_truncate = false;

368 Encoding output_encoding;	384 } else {

369 const uint16_t* buffer =	385 buffer_end -= truncate_by;

370 ReadFile(fname, settings, &length_in_bytes, &output_encoding);	386 time = RunLexer(buffer, buffer_end, isolate, file_data.encoding, settings);

371 const uint8_t* char_data = reinterpret_cast<const uint8_t*>(buffer);

372 const uint8_t* buffer_end = &char_data[length_in_bytes];

373 if (truncate_by > buffer_end - char_data) {

374 *can_truncate = false;

375 } else {

376 buffer_end -= truncate_by;

377 time = RunLexer(buffer, buffer_end, isolate, output_encoding, settings);

378 }

379 delete[] buffer;

380 }	387 }

381

382 return time;	388 return time;

383 }	389 }

384	390

385	391

386 int main(int argc, char* argv[]) {	392 int main(int argc, char* argv[]) {

387 v8::V8::InitializeICU();	393 v8::V8::InitializeICU();

388 v8::V8::SetFlagsFromCommandLine(&argc, argv, true);	394 v8::V8::SetFlagsFromCommandLine(&argc, argv, true);

389 std::vector<std::string> fnames;	395 std::string file_name;

390 LexerShellSettings settings;	396 LexerShellSettings settings;

391 for (int i = 0; i < argc; ++i) {	397 for (int i = 0; i < argc; ++i) {

392 if (strcmp(argv[i], "--latin1") == 0) {	398 if (strcmp(argv[i], "--latin1") == 0) {

393 settings.encoding = LATIN1;	399 settings.encoding = LATIN1;

394 } else if (strcmp(argv[i], "--utf8") == 0) {	400 } else if (strcmp(argv[i], "--utf8") == 0) {

395 settings.encoding = UTF8;	401 settings.encoding = UTF8;

396 } else if (strcmp(argv[i], "--utf16") == 0) {	402 } else if (strcmp(argv[i], "--utf16") == 0) {

397 settings.encoding = UTF16;	403 settings.encoding = UTF16;

398 } else if (strcmp(argv[i], "--utf8to16") == 0) {	404 } else if (strcmp(argv[i], "--utf8to16") == 0) {

399 #ifdef V8_USE_GENERATED_LEXER	405 #ifdef V8_USE_GENERATED_LEXER

(...skipping 25 matching lines...) Expand all Loading...
425 settings.harmony_modules = true;	431 settings.harmony_modules = true;

426 settings.harmony_scoping = true;	432 settings.harmony_scoping = true;

427 } else if (strncmp(argv[i], "--benchmark=", 12) == 0) {	433 } else if (strncmp(argv[i], "--benchmark=", 12) == 0) {

428 // Ignore.	434 // Ignore.

429 } else if (strncmp(argv[i], "--repeat=", 9) == 0) {	435 } else if (strncmp(argv[i], "--repeat=", 9) == 0) {

430 std::string repeat_str = std::string(argv[i]).substr(9);	436 std::string repeat_str = std::string(argv[i]).substr(9);

431 settings.repeat = atoi(repeat_str.c_str());	437 settings.repeat = atoi(repeat_str.c_str());

432 } else if (strcmp(argv[i], "--eos-test") == 0) {	438 } else if (strcmp(argv[i], "--eos-test") == 0) {

433 settings.eos_test = true;	439 settings.eos_test = true;

434 } else if (i > 0 && argv[i][0] != '-') {	440 } else if (i > 0 && argv[i][0] != '-') {

435 fnames.push_back(std::string(argv[i]));	441 file_name = std::string(argv[i]);

436 }	442 }

437 }	443 }

	444 CHECK_NE(0, file_name.size());

	445 FileData file_data = ReadFile(file_name.c_str(), settings);

438 {	446 {

439 v8::Isolate* isolate = v8::Isolate::GetCurrent();	447 v8::Isolate* isolate = v8::Isolate::GetCurrent();

440 v8::HandleScope handle_scope(isolate);	448 v8::HandleScope handle_scope(isolate);

441 v8::Local<v8::Context> context = v8::Context::New(isolate);	449 v8::Local<v8::Context> context = v8::Context::New(isolate);

442 CHECK(!context.IsEmpty());	450 CHECK(!context.IsEmpty());

443 v8::Context::Scope scope(context);	451 v8::Context::Scope scope(context);

444 Isolate* internal_isolate = Isolate::Current();	452 Isolate* internal_isolate = Isolate::Current();

445 double total_time = 0;	453 double total_time = 0;

446 for (size_t i = 0; i < fnames.size(); i++) {	454 bool can_truncate = settings.eos_test;

447 std::pair<TimeDelta, TimeDelta> times;	455 int truncate_by = 0;

448 bool can_truncate = settings.eos_test;	456 do {

449 int truncate_by = 0;	457 TimeDelta t = ProcessFile(internal_isolate,

450 do {	458 settings,

451 TimeDelta t = ProcessFile(fnames[i].c_str(),	459 file_data,

452 internal_isolate,	460 truncate_by,

453 settings,	461 &can_truncate);

454 truncate_by,	462 total_time += t.InMillisecondsF();

455 &can_truncate);	463 ++truncate_by;

456 total_time += t.InMillisecondsF();	464 } while (can_truncate);

457 ++truncate_by;

458 } while (can_truncate);

459 }

460 if (!settings.print_tokens_for_compare) {	465 if (!settings.print_tokens_for_compare) {

461 printf("RunTime: %.f ms\n", total_time);	466 printf("RunTime: %.f ms\n", total_time);

462 }	467 }

463 }	468 }

	469 delete[] file_data.data;

464 v8::V8::Dispose();	470 v8::V8::Dispose();

465 return 0;	471 return 0;

466 }	472 }

OLD	NEW

« no previous file with comments | « no previous file | no next file » | no next file with comments »