OLD | NEW |
1 // Copyright 2016 the V8 project authors. All rights reserved. | 1 // Copyright 2016 the V8 project authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "src/factory.h" // for i::Factory::NewExternalStringFrom*Byte | 5 #include "src/factory.h" // for i::Factory::NewExternalStringFrom*Byte |
6 #include "src/objects-inl.h" | 6 #include "src/objects-inl.h" |
7 #include "src/parsing/scanner-character-streams.h" | 7 #include "src/parsing/scanner-character-streams.h" |
8 #include "src/parsing/scanner.h" | 8 #include "src/parsing/scanner.h" |
9 #include "src/type-feedback-vector-inl.h" // for include "src/factory.h" | 9 #include "src/type-feedback-vector-inl.h" // for include "src/factory.h" |
10 #include "test/cctest/cctest.h" | 10 #include "test/cctest/cctest.h" |
(...skipping 82 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
93 const uint16_t unicode_ucs2[] = {97, 98, 99, 228, 10784, 55357, | 93 const uint16_t unicode_ucs2[] = {97, 98, 99, 228, 10784, 55357, |
94 56489, 100, 101, 102, 0}; | 94 56489, 100, 101, 102, 0}; |
95 | 95 |
96 } // anonymous namespace | 96 } // anonymous namespace |
97 | 97 |
98 TEST(Utf8StreamAsciiOnly) { | 98 TEST(Utf8StreamAsciiOnly) { |
99 const char* chunks[] = {"abc", "def", "ghi", ""}; | 99 const char* chunks[] = {"abc", "def", "ghi", ""}; |
100 ChunkSource chunk_source(chunks); | 100 ChunkSource chunk_source(chunks); |
101 std::unique_ptr<v8::internal::Utf16CharacterStream> stream( | 101 std::unique_ptr<v8::internal::Utf16CharacterStream> stream( |
102 v8::internal::ScannerStream::For( | 102 v8::internal::ScannerStream::For( |
103 &chunk_source, v8::ScriptCompiler::StreamedSource::UTF8)); | 103 &chunk_source, v8::ScriptCompiler::StreamedSource::UTF8, nullptr)); |
104 | 104 |
105 // Read the data without dying. | 105 // Read the data without dying. |
106 v8::internal::uc32 c; | 106 v8::internal::uc32 c; |
107 do { | 107 do { |
108 c = stream->Advance(); | 108 c = stream->Advance(); |
109 } while (c != v8::internal::Utf16CharacterStream::kEndOfInput); | 109 } while (c != v8::internal::Utf16CharacterStream::kEndOfInput); |
110 } | 110 } |
111 | 111 |
112 TEST(Utf8StreamBOM) { | 112 TEST(Utf8StreamBOM) { |
113 // Construct test string w/ UTF-8 BOM (byte order mark) | 113 // Construct test string w/ UTF-8 BOM (byte order mark) |
114 char data[3 + arraysize(unicode_utf8)] = {"\xef\xbb\xbf"}; | 114 char data[3 + arraysize(unicode_utf8)] = {"\xef\xbb\xbf"}; |
115 strncpy(data + 3, unicode_utf8, arraysize(unicode_utf8)); | 115 strncpy(data + 3, unicode_utf8, arraysize(unicode_utf8)); |
116 | 116 |
117 const char* chunks[] = {data, "\0"}; | 117 const char* chunks[] = {data, "\0"}; |
118 ChunkSource chunk_source(chunks); | 118 ChunkSource chunk_source(chunks); |
119 std::unique_ptr<v8::internal::Utf16CharacterStream> stream( | 119 std::unique_ptr<v8::internal::Utf16CharacterStream> stream( |
120 v8::internal::ScannerStream::For( | 120 v8::internal::ScannerStream::For( |
121 &chunk_source, v8::ScriptCompiler::StreamedSource::UTF8)); | 121 &chunk_source, v8::ScriptCompiler::StreamedSource::UTF8, nullptr)); |
122 | 122 |
123 // Read the data without tripping over the BOM. | 123 // Read the data without tripping over the BOM. |
124 for (size_t i = 0; unicode_ucs2[i]; i++) { | 124 for (size_t i = 0; unicode_ucs2[i]; i++) { |
125 CHECK_EQ(unicode_ucs2[i], stream->Advance()); | 125 CHECK_EQ(unicode_ucs2[i], stream->Advance()); |
126 } | 126 } |
127 CHECK_EQ(v8::internal::Utf16CharacterStream::kEndOfInput, stream->Advance()); | 127 CHECK_EQ(v8::internal::Utf16CharacterStream::kEndOfInput, stream->Advance()); |
128 | 128 |
129 // Make sure seek works. | 129 // Make sure seek works. |
130 stream->Seek(0); | 130 stream->Seek(0); |
131 CHECK_EQ(unicode_ucs2[0], stream->Advance()); | 131 CHECK_EQ(unicode_ucs2[0], stream->Advance()); |
132 | 132 |
133 stream->Seek(5); | 133 stream->Seek(5); |
134 CHECK_EQ(unicode_ucs2[5], stream->Advance()); | 134 CHECK_EQ(unicode_ucs2[5], stream->Advance()); |
135 } | 135 } |
136 | 136 |
137 TEST(Utf8SplitBOM) { | 137 TEST(Utf8SplitBOM) { |
138 // Construct chunks with a BOM split into two chunks. | 138 // Construct chunks with a BOM split into two chunks. |
139 char partial_bom[] = "\xef\xbb"; | 139 char partial_bom[] = "\xef\xbb"; |
140 char data[1 + arraysize(unicode_utf8)] = {"\xbf"}; | 140 char data[1 + arraysize(unicode_utf8)] = {"\xbf"}; |
141 strncpy(data + 1, unicode_utf8, arraysize(unicode_utf8)); | 141 strncpy(data + 1, unicode_utf8, arraysize(unicode_utf8)); |
142 | 142 |
143 { | 143 { |
144 const char* chunks[] = {partial_bom, data, "\0"}; | 144 const char* chunks[] = {partial_bom, data, "\0"}; |
145 ChunkSource chunk_source(chunks); | 145 ChunkSource chunk_source(chunks); |
146 std::unique_ptr<v8::internal::Utf16CharacterStream> stream( | 146 std::unique_ptr<v8::internal::Utf16CharacterStream> stream( |
147 v8::internal::ScannerStream::For( | 147 v8::internal::ScannerStream::For( |
148 &chunk_source, v8::ScriptCompiler::StreamedSource::UTF8)); | 148 &chunk_source, v8::ScriptCompiler::StreamedSource::UTF8, nullptr)); |
149 | 149 |
150 // Read the data without tripping over the BOM. | 150 // Read the data without tripping over the BOM. |
151 for (size_t i = 0; unicode_ucs2[i]; i++) { | 151 for (size_t i = 0; unicode_ucs2[i]; i++) { |
152 CHECK_EQ(unicode_ucs2[i], stream->Advance()); | 152 CHECK_EQ(unicode_ucs2[i], stream->Advance()); |
153 } | 153 } |
154 } | 154 } |
155 | 155 |
156 // And now with single-byte BOM chunks. | 156 // And now with single-byte BOM chunks. |
157 char bom_byte_1[] = "\xef"; | 157 char bom_byte_1[] = "\xef"; |
158 char bom_byte_2[] = "\xbb"; | 158 char bom_byte_2[] = "\xbb"; |
159 { | 159 { |
160 const char* chunks[] = {bom_byte_1, bom_byte_2, data, "\0"}; | 160 const char* chunks[] = {bom_byte_1, bom_byte_2, data, "\0"}; |
161 ChunkSource chunk_source(chunks); | 161 ChunkSource chunk_source(chunks); |
162 std::unique_ptr<v8::internal::Utf16CharacterStream> stream( | 162 std::unique_ptr<v8::internal::Utf16CharacterStream> stream( |
163 v8::internal::ScannerStream::For( | 163 v8::internal::ScannerStream::For( |
164 &chunk_source, v8::ScriptCompiler::StreamedSource::UTF8)); | 164 &chunk_source, v8::ScriptCompiler::StreamedSource::UTF8, nullptr)); |
165 | 165 |
166 // Read the data without tripping over the BOM. | 166 // Read the data without tripping over the BOM. |
167 for (size_t i = 0; unicode_ucs2[i]; i++) { | 167 for (size_t i = 0; unicode_ucs2[i]; i++) { |
168 CHECK_EQ(unicode_ucs2[i], stream->Advance()); | 168 CHECK_EQ(unicode_ucs2[i], stream->Advance()); |
169 } | 169 } |
170 } | 170 } |
171 } | 171 } |
172 | 172 |
173 TEST(Utf8ChunkBoundaries) { | 173 TEST(Utf8ChunkBoundaries) { |
174 // Test utf-8 parsing at chunk boundaries. | 174 // Test utf-8 parsing at chunk boundaries. |
175 | 175 |
176 // Split the test string at each byte and pass it to the stream. This way, | 176 // Split the test string at each byte and pass it to the stream. This way, |
177 // we'll have a split at each possible boundary. | 177 // we'll have a split at each possible boundary. |
178 size_t len = strlen(unicode_utf8); | 178 size_t len = strlen(unicode_utf8); |
179 char buffer[arraysize(unicode_utf8) + 3]; | 179 char buffer[arraysize(unicode_utf8) + 3]; |
180 for (size_t i = 1; i < len; i++) { | 180 for (size_t i = 1; i < len; i++) { |
181 // Copy source string into buffer, splitting it at i. | 181 // Copy source string into buffer, splitting it at i. |
182 // Then add three chunks, 0..i-1, i..strlen-1, empty. | 182 // Then add three chunks, 0..i-1, i..strlen-1, empty. |
183 strncpy(buffer, unicode_utf8, i); | 183 strncpy(buffer, unicode_utf8, i); |
184 strncpy(buffer + i + 1, unicode_utf8 + i, len - i); | 184 strncpy(buffer + i + 1, unicode_utf8 + i, len - i); |
185 buffer[i] = '\0'; | 185 buffer[i] = '\0'; |
186 buffer[len + 1] = '\0'; | 186 buffer[len + 1] = '\0'; |
187 buffer[len + 2] = '\0'; | 187 buffer[len + 2] = '\0'; |
188 const char* chunks[] = {buffer, buffer + i + 1, buffer + len + 2}; | 188 const char* chunks[] = {buffer, buffer + i + 1, buffer + len + 2}; |
189 | 189 |
190 ChunkSource chunk_source(chunks); | 190 ChunkSource chunk_source(chunks); |
191 std::unique_ptr<v8::internal::Utf16CharacterStream> stream( | 191 std::unique_ptr<v8::internal::Utf16CharacterStream> stream( |
192 v8::internal::ScannerStream::For( | 192 v8::internal::ScannerStream::For( |
193 &chunk_source, v8::ScriptCompiler::StreamedSource::UTF8)); | 193 &chunk_source, v8::ScriptCompiler::StreamedSource::UTF8, nullptr)); |
194 | 194 |
195 for (size_t i = 0; unicode_ucs2[i]; i++) { | 195 for (size_t i = 0; unicode_ucs2[i]; i++) { |
196 CHECK_EQ(unicode_ucs2[i], stream->Advance()); | 196 CHECK_EQ(unicode_ucs2[i], stream->Advance()); |
197 } | 197 } |
198 CHECK_EQ(v8::internal::Utf16CharacterStream::kEndOfInput, | 198 CHECK_EQ(v8::internal::Utf16CharacterStream::kEndOfInput, |
199 stream->Advance()); | 199 stream->Advance()); |
200 } | 200 } |
201 } | 201 } |
202 | 202 |
203 TEST(Utf8SingleByteChunks) { | 203 TEST(Utf8SingleByteChunks) { |
204 // Have each byte as a single-byte chunk. | 204 // Have each byte as a single-byte chunk. |
205 size_t len = strlen(unicode_utf8); | 205 size_t len = strlen(unicode_utf8); |
206 char buffer[arraysize(unicode_utf8) + 4]; | 206 char buffer[arraysize(unicode_utf8) + 4]; |
207 for (size_t i = 1; i < len - 1; i++) { | 207 for (size_t i = 1; i < len - 1; i++) { |
208 // Copy source string into buffer, make a single-byte chunk at i. | 208 // Copy source string into buffer, make a single-byte chunk at i. |
209 strncpy(buffer, unicode_utf8, i); | 209 strncpy(buffer, unicode_utf8, i); |
210 strncpy(buffer + i + 3, unicode_utf8 + i + 1, len - i - 1); | 210 strncpy(buffer + i + 3, unicode_utf8 + i + 1, len - i - 1); |
211 buffer[i] = '\0'; | 211 buffer[i] = '\0'; |
212 buffer[i + 1] = unicode_utf8[i]; | 212 buffer[i + 1] = unicode_utf8[i]; |
213 buffer[i + 2] = '\0'; | 213 buffer[i + 2] = '\0'; |
214 buffer[len + 2] = '\0'; | 214 buffer[len + 2] = '\0'; |
215 buffer[len + 3] = '\0'; | 215 buffer[len + 3] = '\0'; |
216 const char* chunks[] = {buffer, buffer + i + 1, buffer + i + 3, | 216 const char* chunks[] = {buffer, buffer + i + 1, buffer + i + 3, |
217 buffer + len + 3}; | 217 buffer + len + 3}; |
218 | 218 |
219 ChunkSource chunk_source(chunks); | 219 ChunkSource chunk_source(chunks); |
220 std::unique_ptr<v8::internal::Utf16CharacterStream> stream( | 220 std::unique_ptr<v8::internal::Utf16CharacterStream> stream( |
221 v8::internal::ScannerStream::For( | 221 v8::internal::ScannerStream::For( |
222 &chunk_source, v8::ScriptCompiler::StreamedSource::UTF8)); | 222 &chunk_source, v8::ScriptCompiler::StreamedSource::UTF8, nullptr)); |
223 | 223 |
224 for (size_t j = 0; unicode_ucs2[j]; j++) { | 224 for (size_t j = 0; unicode_ucs2[j]; j++) { |
225 CHECK_EQ(unicode_ucs2[j], stream->Advance()); | 225 CHECK_EQ(unicode_ucs2[j], stream->Advance()); |
226 } | 226 } |
227 CHECK_EQ(v8::internal::Utf16CharacterStream::kEndOfInput, | 227 CHECK_EQ(v8::internal::Utf16CharacterStream::kEndOfInput, |
228 stream->Advance()); | 228 stream->Advance()); |
229 } | 229 } |
230 } | 230 } |
231 | 231 |
232 #define CHECK_EQU(v1, v2) CHECK_EQ(static_cast<int>(v1), static_cast<int>(v2)) | 232 #define CHECK_EQU(v1, v2) CHECK_EQ(static_cast<int>(v1), static_cast<int>(v2)) |
(...skipping 111 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
344 // 1-byte streaming stream, single + many chunks. | 344 // 1-byte streaming stream, single + many chunks. |
345 { | 345 { |
346 const uint8_t* data = | 346 const uint8_t* data = |
347 reinterpret_cast<const uint8_t*>(one_byte_vector.begin()); | 347 reinterpret_cast<const uint8_t*>(one_byte_vector.begin()); |
348 const uint8_t* data_end = | 348 const uint8_t* data_end = |
349 reinterpret_cast<const uint8_t*>(one_byte_vector.end()); | 349 reinterpret_cast<const uint8_t*>(one_byte_vector.end()); |
350 | 350 |
351 ChunkSource single_chunk(data, data_end - data, false); | 351 ChunkSource single_chunk(data, data_end - data, false); |
352 std::unique_ptr<i::Utf16CharacterStream> one_byte_streaming_stream( | 352 std::unique_ptr<i::Utf16CharacterStream> one_byte_streaming_stream( |
353 i::ScannerStream::For(&single_chunk, | 353 i::ScannerStream::For(&single_chunk, |
354 v8::ScriptCompiler::StreamedSource::ONE_BYTE)); | 354 v8::ScriptCompiler::StreamedSource::ONE_BYTE, |
| 355 nullptr)); |
355 TestCharacterStream(one_byte_source, one_byte_streaming_stream.get(), | 356 TestCharacterStream(one_byte_source, one_byte_streaming_stream.get(), |
356 length, start, end); | 357 length, start, end); |
357 | 358 |
358 ChunkSource many_chunks(data, data_end - data, true); | 359 ChunkSource many_chunks(data, data_end - data, true); |
359 one_byte_streaming_stream.reset(i::ScannerStream::For( | 360 one_byte_streaming_stream.reset(i::ScannerStream::For( |
360 &many_chunks, v8::ScriptCompiler::StreamedSource::ONE_BYTE)); | 361 &many_chunks, v8::ScriptCompiler::StreamedSource::ONE_BYTE, nullptr)); |
361 TestCharacterStream(one_byte_source, one_byte_streaming_stream.get(), | 362 TestCharacterStream(one_byte_source, one_byte_streaming_stream.get(), |
362 length, start, end); | 363 length, start, end); |
363 } | 364 } |
364 | 365 |
365 // UTF-8 streaming stream, single + many chunks. | 366 // UTF-8 streaming stream, single + many chunks. |
366 { | 367 { |
367 const uint8_t* data = | 368 const uint8_t* data = |
368 reinterpret_cast<const uint8_t*>(one_byte_vector.begin()); | 369 reinterpret_cast<const uint8_t*>(one_byte_vector.begin()); |
369 const uint8_t* data_end = | 370 const uint8_t* data_end = |
370 reinterpret_cast<const uint8_t*>(one_byte_vector.end()); | 371 reinterpret_cast<const uint8_t*>(one_byte_vector.end()); |
371 ChunkSource chunks(data, data_end - data, false); | 372 ChunkSource chunks(data, data_end - data, false); |
372 std::unique_ptr<i::Utf16CharacterStream> utf8_streaming_stream( | 373 std::unique_ptr<i::Utf16CharacterStream> utf8_streaming_stream( |
373 i::ScannerStream::For(&chunks, | 374 i::ScannerStream::For(&chunks, v8::ScriptCompiler::StreamedSource::UTF8, |
374 v8::ScriptCompiler::StreamedSource::UTF8)); | 375 nullptr)); |
375 TestCharacterStream(one_byte_source, utf8_streaming_stream.get(), length, | 376 TestCharacterStream(one_byte_source, utf8_streaming_stream.get(), length, |
376 start, end); | 377 start, end); |
377 | 378 |
378 ChunkSource many_chunks(data, data_end - data, true); | 379 ChunkSource many_chunks(data, data_end - data, true); |
379 utf8_streaming_stream.reset(i::ScannerStream::For( | 380 utf8_streaming_stream.reset(i::ScannerStream::For( |
380 &many_chunks, v8::ScriptCompiler::StreamedSource::UTF8)); | 381 &many_chunks, v8::ScriptCompiler::StreamedSource::UTF8, nullptr)); |
381 TestCharacterStream(one_byte_source, utf8_streaming_stream.get(), length, | 382 TestCharacterStream(one_byte_source, utf8_streaming_stream.get(), length, |
382 start, end); | 383 start, end); |
383 } | 384 } |
384 | 385 |
385 // 2-byte streaming stream, single + many chunks. | 386 // 2-byte streaming stream, single + many chunks. |
386 { | 387 { |
387 const uint8_t* data = | 388 const uint8_t* data = |
388 reinterpret_cast<const uint8_t*>(two_byte_vector.begin()); | 389 reinterpret_cast<const uint8_t*>(two_byte_vector.begin()); |
389 const uint8_t* data_end = | 390 const uint8_t* data_end = |
390 reinterpret_cast<const uint8_t*>(two_byte_vector.end()); | 391 reinterpret_cast<const uint8_t*>(two_byte_vector.end()); |
391 ChunkSource chunks(data, data_end - data, false); | 392 ChunkSource chunks(data, data_end - data, false); |
392 std::unique_ptr<i::Utf16CharacterStream> two_byte_streaming_stream( | 393 std::unique_ptr<i::Utf16CharacterStream> two_byte_streaming_stream( |
393 i::ScannerStream::For(&chunks, | 394 i::ScannerStream::For( |
394 v8::ScriptCompiler::StreamedSource::TWO_BYTE)); | 395 &chunks, v8::ScriptCompiler::StreamedSource::TWO_BYTE, nullptr)); |
395 TestCharacterStream(one_byte_source, two_byte_streaming_stream.get(), | 396 TestCharacterStream(one_byte_source, two_byte_streaming_stream.get(), |
396 length, start, end); | 397 length, start, end); |
397 | 398 |
398 ChunkSource many_chunks(data, data_end - data, true); | 399 ChunkSource many_chunks(data, data_end - data, true); |
399 two_byte_streaming_stream.reset(i::ScannerStream::For( | 400 two_byte_streaming_stream.reset(i::ScannerStream::For( |
400 &many_chunks, v8::ScriptCompiler::StreamedSource::TWO_BYTE)); | 401 &many_chunks, v8::ScriptCompiler::StreamedSource::TWO_BYTE, nullptr)); |
401 TestCharacterStream(one_byte_source, two_byte_streaming_stream.get(), | 402 TestCharacterStream(one_byte_source, two_byte_streaming_stream.get(), |
402 length, start, end); | 403 length, start, end); |
403 } | 404 } |
404 } | 405 } |
405 | 406 |
406 TEST(CharacterStreams) { | 407 TEST(CharacterStreams) { |
407 v8::Isolate* isolate = CcTest::isolate(); | 408 v8::Isolate* isolate = CcTest::isolate(); |
408 v8::HandleScope handles(isolate); | 409 v8::HandleScope handles(isolate); |
409 v8::Local<v8::Context> context = v8::Context::New(isolate); | 410 v8::Local<v8::Context> context = v8::Context::New(isolate); |
410 v8::Context::Scope context_scope(context); | 411 v8::Context::Scope context_scope(context); |
(...skipping 21 matching lines...) Expand all Loading... |
432 const uint16_t unicode[] = {65, 65533, 97, 100}; | 433 const uint16_t unicode[] = {65, 65533, 97, 100}; |
433 | 434 |
434 // Run the test for all sub-strings 0..N of bytes, to make sure we hit the | 435 // Run the test for all sub-strings 0..N of bytes, to make sure we hit the |
435 // error condition in and at chunk boundaries. | 436 // error condition in and at chunk boundaries. |
436 for (size_t len = 0; len < arraysize(bytes); len++) { | 437 for (size_t len = 0; len < arraysize(bytes); len++) { |
437 // Read len bytes from bytes, and compare against the expected unicode | 438 // Read len bytes from bytes, and compare against the expected unicode |
438 // characters. Expect kBadChar ( == Unicode replacement char == code point | 439 // characters. Expect kBadChar ( == Unicode replacement char == code point |
439 // 65533) instead of the incorrectly coded Latin1 char. | 440 // 65533) instead of the incorrectly coded Latin1 char. |
440 ChunkSource chunks(bytes, len, false); | 441 ChunkSource chunks(bytes, len, false); |
441 std::unique_ptr<i::Utf16CharacterStream> stream(i::ScannerStream::For( | 442 std::unique_ptr<i::Utf16CharacterStream> stream(i::ScannerStream::For( |
442 &chunks, v8::ScriptCompiler::StreamedSource::UTF8)); | 443 &chunks, v8::ScriptCompiler::StreamedSource::UTF8, nullptr)); |
443 for (size_t i = 0; i < len; i++) { | 444 for (size_t i = 0; i < len; i++) { |
444 CHECK_EQ(unicode[i], stream->Advance()); | 445 CHECK_EQ(unicode[i], stream->Advance()); |
445 } | 446 } |
446 CHECK_EQ(i::Utf16CharacterStream::kEndOfInput, stream->Advance()); | 447 CHECK_EQ(i::Utf16CharacterStream::kEndOfInput, stream->Advance()); |
447 } | 448 } |
448 } | 449 } |
OLD | NEW |