| OLD | NEW |
| 1 // Copyright 2013 the V8 project authors. All rights reserved. | 1 // Copyright 2013 the V8 project authors. All rights reserved. |
| 2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
| 3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
| 4 // met: | 4 // met: |
| 5 // | 5 // |
| 6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
| 7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
| 8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
| 9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
| 10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
| (...skipping 29 matching lines...) Expand all Loading... |
| 40 #include "platform.h" | 40 #include "platform.h" |
| 41 #include "runtime.h" | 41 #include "runtime.h" |
| 42 #include "scanner-character-streams.h" | 42 #include "scanner-character-streams.h" |
| 43 #include "scopeinfo.h" | 43 #include "scopeinfo.h" |
| 44 #include "string-stream.h" | 44 #include "string-stream.h" |
| 45 #include "scanner.h" | 45 #include "scanner.h" |
| 46 #include "lexer/lexer.h" | 46 #include "lexer/lexer.h" |
| 47 | 47 |
| 48 using namespace v8::internal; | 48 using namespace v8::internal; |
| 49 | 49 |
| 50 static byte* ReadFile(const char* name, const byte** end, int repeat, | |
| 51 bool convert_to_utf16) { | |
| 52 FILE* file = fopen(name, "rb"); | |
| 53 if (file == NULL) return NULL; | |
| 54 | |
| 55 fseek(file, 0, SEEK_END); | |
| 56 int file_size = ftell(file); | |
| 57 rewind(file); | |
| 58 | |
| 59 int size = file_size * repeat; | |
| 60 | |
| 61 byte* chars = new byte[size]; | |
| 62 for (int i = 0; i < file_size;) { | |
| 63 int read = static_cast<int>(fread(&chars[i], 1, file_size - i, file)); | |
| 64 i += read; | |
| 65 } | |
| 66 fclose(file); | |
| 67 | |
| 68 for (int i = file_size; i < size; i++) { | |
| 69 chars[i] = chars[i - file_size]; | |
| 70 } | |
| 71 *end = &chars[size]; | |
| 72 | |
| 73 if (!convert_to_utf16) return chars; | |
| 74 | |
| 75 // Length of new_chars is not strictly accurate, but should be enough. | |
| 76 uint16_t* new_chars = new uint16_t[size]; | |
| 77 { | |
| 78 Utf8ToUtf16CharacterStream stream(chars, size); | |
| 79 uint16_t* cursor = new_chars; | |
| 80 // uc32 c; | |
| 81 // The 32-bit char type is probably only so that we can have -1 as a return | |
| 82 // value. If the char is not -1, it should fit into 16 bits. | |
| 83 CHECK(false); | |
| 84 // while ((c = stream.Advance()) != -1) { | |
| 85 // *cursor++ = c; | |
| 86 // } | |
| 87 *end = reinterpret_cast<byte*>(cursor); | |
| 88 } | |
| 89 delete[] chars; | |
| 90 return reinterpret_cast<byte*>(new_chars); | |
| 91 } | |
| 92 | |
| 93 | 50 |
| 94 enum Encoding { | 51 enum Encoding { |
| 95 LATIN1, | 52 LATIN1, |
| 96 UTF8, | 53 UTF8, |
| 97 UTF16, | 54 UTF16, |
| 98 UTF8TO16 // Read as UTF8, convert to UTF16 before giving it to the lexers. | 55 UTF8TO16 // Read as UTF8, convert to UTF16 before giving it to the lexers. |
| 99 }; | 56 }; |
| 100 | 57 |
| 101 | 58 |
| 102 struct LexerShellSettings { | 59 struct LexerShellSettings { |
| (...skipping 10 matching lines...) Expand all Loading... |
| 113 print_tokens(false), | 70 print_tokens(false), |
| 114 break_after_illegal(false), | 71 break_after_illegal(false), |
| 115 eos_test(false), | 72 eos_test(false), |
| 116 repeat(1), | 73 repeat(1), |
| 117 harmony_numeric_literals(false), | 74 harmony_numeric_literals(false), |
| 118 harmony_modules(false), | 75 harmony_modules(false), |
| 119 harmony_scoping(false) {} | 76 harmony_scoping(false) {} |
| 120 }; | 77 }; |
| 121 | 78 |
| 122 | 79 |
| 80 static uint16_t* ReadFile(const char* name, const uint8_t** end, |
| 81 const LexerShellSettings& settings) { |
| 82 FILE* file = fopen(name, "rb"); |
| 83 CHECK(file != NULL); |
| 84 |
| 85 fseek(file, 0, SEEK_END); |
| 86 unsigned file_size = ftell(file); |
| 87 rewind(file); |
| 88 |
| 89 uint16_t* two_byte_data = new uint16_t[file_size / 2 + file_size % 2]; |
| 90 |
| 91 uint8_t* char_data = reinterpret_cast<uint8_t*>(two_byte_data); |
| 92 for (unsigned i = 0; i < file_size;) { |
| 93 i += fread(&char_data[i], 1, file_size - i, file); |
| 94 } |
| 95 fclose(file); |
| 96 |
| 97 if (settings.encoding == UTF8TO16) { |
| 98 const uint32_t kMaxUtf16Character = 0xffff; |
| 99 // Get utf8 length. |
| 100 unsigned utf16_chars = 0; |
| 101 { |
| 102 unsigned position = 0; |
| 103 while (position < file_size) { |
| 104 uint32_t c = char_data[position]; |
| 105 if (c <= unibrow::Utf8::kMaxOneByteChar) { |
| 106 position++; |
| 107 } else { |
| 108 c = unibrow::Utf8::CalculateValue(char_data + position, |
| 109 file_size - position, |
| 110 &position); |
| 111 } |
| 112 if (c > kMaxUtf16Character) { |
| 113 utf16_chars += 2; |
| 114 } else { |
| 115 utf16_chars += 1; |
| 116 } |
| 117 } |
| 118 } |
| 119 // Write new buffer out. |
| 120 uint16_t* data = new uint16_t[utf16_chars]; |
| 121 unsigned position = 0; |
| 122 unsigned i = 0; |
| 123 while (position < file_size) { |
| 124 uint32_t c = char_data[position]; |
| 125 if (c <= unibrow::Utf8::kMaxOneByteChar) { |
| 126 position++; |
| 127 } else { |
| 128 c = unibrow::Utf8::CalculateValue(char_data + position, |
| 129 file_size - position, |
| 130 &position); |
| 131 } |
| 132 if (c > kMaxUtf16Character) { |
| 133 data[i++] = unibrow::Utf16::LeadSurrogate(c); |
| 134 data[i++] = unibrow::Utf16::TrailSurrogate(c); |
| 135 } else { |
| 136 data[i++] = static_cast<uc16>(c); |
| 137 } |
| 138 } |
| 139 // Swap buffers. |
| 140 delete two_byte_data; |
| 141 file_size = utf16_chars * 2; |
| 142 two_byte_data = data; |
| 143 char_data = reinterpret_cast<uint8_t*>(two_byte_data); |
| 144 } |
| 145 |
| 146 // Duplicate buffer if necessary. |
| 147 if (settings.repeat > 1) { |
| 148 unsigned size = file_size * settings.repeat; |
| 149 uint16_t* data = new uint16_t[size / 2 + size % 2]; |
| 150 char_data = reinterpret_cast<uint8_t*>(two_byte_data); |
| 151 for (int i = 0; i < settings.repeat; i++) { |
| 152 memcpy(&char_data[i * file_size], two_byte_data, file_size); |
| 153 } |
| 154 delete two_byte_data; |
| 155 file_size = size; |
| 156 two_byte_data = data; |
| 157 } |
| 158 |
| 159 *end = &char_data[file_size]; |
| 160 return two_byte_data; |
| 161 } |
| 162 |
| 163 |
| 123 struct TokenWithLocation { | 164 struct TokenWithLocation { |
| 124 Token::Value value; | 165 Token::Value value; |
| 125 size_t beg; | 166 size_t beg; |
| 126 size_t end; | 167 size_t end; |
| 127 std::vector<int> literal; | 168 std::vector<int> literal; |
| 128 bool is_ascii; | 169 bool is_ascii; |
| 129 // The location of the latest octal position when the token was seen. | 170 // The location of the latest octal position when the token was seen. |
| 130 int octal_beg; | 171 int octal_beg; |
| 131 int octal_end; | 172 int octal_end; |
| 132 TokenWithLocation() : | 173 TokenWithLocation() : |
| (...skipping 53 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 186 if (scanner->is_literal_ascii()) { | 227 if (scanner->is_literal_ascii()) { |
| 187 result.literal = ToStdVector(scanner->literal_ascii_string()); | 228 result.literal = ToStdVector(scanner->literal_ascii_string()); |
| 188 } else { | 229 } else { |
| 189 result.literal = ToStdVector(scanner->literal_utf16_string()); | 230 result.literal = ToStdVector(scanner->literal_utf16_string()); |
| 190 } | 231 } |
| 191 } | 232 } |
| 192 return result; | 233 return result; |
| 193 } | 234 } |
| 194 | 235 |
| 195 | 236 |
| 196 static TimeDelta RunLexer(const byte* source, | 237 static TimeDelta RunLexer(const uint16_t* source, |
| 197 const byte* source_end, | 238 const uint8_t* source_end, |
| 198 Isolate* isolate, | 239 Isolate* isolate, |
| 199 std::vector<TokenWithLocation>* tokens, | 240 std::vector<TokenWithLocation>* tokens, |
| 200 const LexerShellSettings& settings) { | 241 const LexerShellSettings& settings) { |
| 201 SmartPointer<Utf16CharacterStream> stream; | 242 SmartPointer<Utf16CharacterStream> stream; |
| 243 const uint8_t* one_byte_source = reinterpret_cast<const uint8_t*>(source); |
| 244 int bytes = source_end - one_byte_source; |
| 202 switch (settings.encoding) { | 245 switch (settings.encoding) { |
| 203 case UTF8: | 246 case UTF8: |
| 247 stream.Reset(new Utf8ToUtf16CharacterStream(one_byte_source, bytes)); |
| 248 break; |
| 204 case UTF8TO16: | 249 case UTF8TO16: |
| 205 stream.Reset(new Utf8ToUtf16CharacterStream(source, source_end - source)); | |
| 206 break; | |
| 207 case UTF16: { | 250 case UTF16: { |
| 251 CHECK_EQ(0, bytes % 2); |
| 208 Handle<String> result = isolate->factory()->NewStringFromTwoByte( | 252 Handle<String> result = isolate->factory()->NewStringFromTwoByte( |
| 209 Vector<const uint16_t>( | 253 Vector<const uint16_t>(source, bytes / 2)); |
| 210 reinterpret_cast<const uint16_t*>(source), | |
| 211 (source_end - source) / 2)); | |
| 212 stream.Reset( | 254 stream.Reset( |
| 213 new GenericStringUtf16CharacterStream(result, 0, result->length())); | 255 new GenericStringUtf16CharacterStream(result, 0, result->length())); |
| 214 break; | 256 break; |
| 215 } | 257 } |
| 216 case LATIN1: { | 258 case LATIN1: { |
| 217 Handle<String> result = isolate->factory()->NewStringFromOneByte( | 259 Handle<String> result = isolate->factory()->NewStringFromOneByte( |
| 218 Vector<const uint8_t>(source, source_end - source)); | 260 Vector<const uint8_t>(one_byte_source, bytes)); |
| 219 stream.Reset( | 261 stream.Reset( |
| 220 new GenericStringUtf16CharacterStream(result, 0, result->length())); | 262 new GenericStringUtf16CharacterStream(result, 0, result->length())); |
| 221 break; | 263 break; |
| 222 } | 264 } |
| 223 } | 265 } |
| 224 Scanner scanner(isolate->unicode_cache()); | 266 Scanner scanner(isolate->unicode_cache()); |
| 225 scanner.SetHarmonyNumericLiterals(settings.harmony_numeric_literals); | 267 scanner.SetHarmonyNumericLiterals(settings.harmony_numeric_literals); |
| 226 scanner.SetHarmonyModules(settings.harmony_modules); | 268 scanner.SetHarmonyModules(settings.harmony_modules); |
| 227 scanner.SetHarmonyScoping(settings.harmony_scoping); | 269 scanner.SetHarmonyScoping(settings.harmony_scoping); |
| 228 ElapsedTimer timer; | 270 ElapsedTimer timer; |
| (...skipping 22 matching lines...) Expand all Loading... |
| 251 const LexerShellSettings& settings, | 293 const LexerShellSettings& settings, |
| 252 int truncate_by, | 294 int truncate_by, |
| 253 bool* can_truncate) { | 295 bool* can_truncate) { |
| 254 if (settings.print_tokens) { | 296 if (settings.print_tokens) { |
| 255 printf("Processing file %s, truncating by %d bytes\n", fname, truncate_by); | 297 printf("Processing file %s, truncating by %d bytes\n", fname, truncate_by); |
| 256 } | 298 } |
| 257 HandleScope handle_scope(isolate); | 299 HandleScope handle_scope(isolate); |
| 258 std::vector<TokenWithLocation> tokens; | 300 std::vector<TokenWithLocation> tokens; |
| 259 TimeDelta time; | 301 TimeDelta time; |
| 260 { | 302 { |
| 261 const byte* buffer_end = 0; | 303 const uint8_t* buffer_end = 0; |
| 262 const byte* buffer = ReadFile(fname, &buffer_end, settings.repeat, false); | 304 const uint16_t* buffer = ReadFile(fname, &buffer_end, settings); |
| 263 if (truncate_by > buffer_end - buffer) { | 305 if (truncate_by > buffer_end - reinterpret_cast<const uint8_t*>(buffer)) { |
| 264 *can_truncate = false; | 306 *can_truncate = false; |
| 265 } else { | 307 } else { |
| 266 buffer_end -= truncate_by; | 308 buffer_end -= truncate_by; |
| 267 time = RunLexer(buffer, buffer_end, isolate, &tokens, settings); | 309 time = RunLexer(buffer, buffer_end, isolate, &tokens, settings); |
| 268 } | 310 } |
| 269 delete[] buffer; | 311 delete[] buffer; |
| 270 } | 312 } |
| 271 if (settings.print_tokens) { | 313 if (settings.print_tokens) { |
| 272 printf("No of tokens:\t%d\n", static_cast<int>(tokens.size())); | 314 printf("No of tokens:\t%d\n", static_cast<int>(tokens.size())); |
| 273 for (size_t i = 0; i < tokens.size(); ++i) { | 315 for (size_t i = 0; i < tokens.size(); ++i) { |
| (...skipping 67 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 341 &can_truncate); | 383 &can_truncate); |
| 342 total_time += t.InMillisecondsF(); | 384 total_time += t.InMillisecondsF(); |
| 343 ++truncate_by; | 385 ++truncate_by; |
| 344 } while (can_truncate); | 386 } while (can_truncate); |
| 345 } | 387 } |
| 346 printf("RunTime: %.f ms\n", total_time); | 388 printf("RunTime: %.f ms\n", total_time); |
| 347 } | 389 } |
| 348 v8::V8::Dispose(); | 390 v8::V8::Dispose(); |
| 349 return 0; | 391 return 0; |
| 350 } | 392 } |
| OLD | NEW |