Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright 2016 the V8 project authors. All rights reserved. | 1 // Copyright 2016 the V8 project authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "src/factory.h" // for i::Factory::NewExternalStringFrom*Byte | 5 #include "src/factory.h" // for i::Factory::NewExternalStringFrom*Byte |
| 6 #include "src/objects-inl.h" | 6 #include "src/objects-inl.h" |
| 7 #include "src/parsing/scanner-character-streams.h" | 7 #include "src/parsing/scanner-character-streams.h" |
| 8 #include "src/parsing/scanner.h" | 8 #include "src/parsing/scanner.h" |
| 9 #include "src/type-feedback-vector-inl.h" // for include "src/factory.h" | 9 #include "src/type-feedback-vector-inl.h" // for include "src/factory.h" |
| 10 #include "test/cctest/cctest.h" | 10 #include "test/cctest/cctest.h" |
| (...skipping 91 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 102 v8::internal::ScannerStream::For( | 102 v8::internal::ScannerStream::For( |
| 103 &chunk_source, v8::ScriptCompiler::StreamedSource::UTF8)); | 103 &chunk_source, v8::ScriptCompiler::StreamedSource::UTF8)); |
| 104 | 104 |
| 105 // Read the data without dying. | 105 // Read the data without dying. |
| 106 v8::internal::uc32 c; | 106 v8::internal::uc32 c; |
| 107 do { | 107 do { |
| 108 c = stream->Advance(); | 108 c = stream->Advance(); |
| 109 } while (c != v8::internal::Utf16CharacterStream::kEndOfInput); | 109 } while (c != v8::internal::Utf16CharacterStream::kEndOfInput); |
| 110 } | 110 } |
| 111 | 111 |
| 112 TEST(Utf8StreamBOM) { | |
| 113 // Construct test string w/ UTF-8 BOM (byte order mark) | |
| 114 char data[3 + arraysize(unicode_utf8)] = {"\xef\xbb\xbf"}; | |
| 115 strncpy(data + 3, unicode_utf8, arraysize(unicode_utf8)); | |
| 116 | |
| 117 const char* chunks[] = {data, "\0"}; | |
| 118 ChunkSource chunk_source(chunks); | |
| 119 std::unique_ptr<v8::internal::Utf16CharacterStream> stream( | |
| 120 v8::internal::ScannerStream::For( | |
| 121 &chunk_source, v8::ScriptCompiler::StreamedSource::UTF8)); | |
| 122 | |
| 123 // Read the data without tripping over the BOM. | |
| 124 for (size_t i = 0; unicode_ucs2[i]; i++) { | |
| 125 CHECK_EQ(unicode_ucs2[i], stream->Advance()); | |
| 126 } | |
| 127 CHECK_EQ(v8::internal::Utf16CharacterStream::kEndOfInput, stream->Advance()); | |
| 128 | |
| 129 // Make sure seek works. | |
| 130 stream->Seek(0); | |
| 131 CHECK_EQ(unicode_ucs2[0], stream->Advance()); | |
| 132 | |
| 133 stream->Seek(5); | |
| 134 CHECK_EQ(unicode_ucs2[5], stream->Advance()); | |
| 135 } | |
| 136 | |
| 137 TEST(Utf8SplitBOM) { | |
| 138 // Construct chunks with a BOM split into two chunks. | |
| 139 char partial_bom[] = "\xef\xbb"; | |
| 140 char data[1 + arraysize(unicode_utf8)] = {"\xbf"}; | |
|
marja
2016/09/20 18:52:30
... how paranoid do we want to be, should there al
vogelheim
2016/09/21 07:50:54
Done.
| |
| 141 strncpy(data + 1, unicode_utf8, arraysize(unicode_utf8)); | |
| 142 | |
| 143 const char* chunks[] = {partial_bom, data, "\0"}; | |
| 144 ChunkSource chunk_source(chunks); | |
| 145 std::unique_ptr<v8::internal::Utf16CharacterStream> stream( | |
| 146 v8::internal::ScannerStream::For( | |
| 147 &chunk_source, v8::ScriptCompiler::StreamedSource::UTF8)); | |
| 148 | |
| 149 // Read the data without tripping over the BOM. | |
| 150 for (size_t i = 0; unicode_ucs2[i]; i++) { | |
| 151 CHECK_EQ(unicode_ucs2[i], stream->Advance()); | |
| 152 } | |
| 153 } | |
| 154 | |
| 112 TEST(Utf8ChunkBoundaries) { | 155 TEST(Utf8ChunkBoundaries) { |
| 113 // Test utf-8 parsing at chunk boundaries. | 156 // Test utf-8 parsing at chunk boundaries. |
| 114 | 157 |
| 115 // Split the test string at each byte and pass it to the stream. This way, | 158 // Split the test string at each byte and pass it to the stream. This way, |
| 116 // we'll have a split at each possible boundary. | 159 // we'll have a split at each possible boundary. |
| 117 size_t len = strlen(unicode_utf8); | 160 size_t len = strlen(unicode_utf8); |
| 118 char buffer[arraysize(unicode_utf8) + 3]; | 161 char buffer[arraysize(unicode_utf8) + 3]; |
| 119 for (size_t i = 1; i < len; i++) { | 162 for (size_t i = 1; i < len; i++) { |
| 120 // Copy source string into buffer, splitting it at i. | 163 // Copy source string into buffer, splitting it at i. |
| 121 // Then add three chunks, 0..i-1, i..strlen-1, empty. | 164 // Then add three chunks, 0..i-1, i..strlen-1, empty. |
| (...skipping 233 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 355 | 398 |
| 356 // 4k large buffer. | 399 // 4k large buffer. |
| 357 char buffer[4096 + 1]; | 400 char buffer[4096 + 1]; |
| 358 for (unsigned i = 0; i < arraysize(buffer); i++) { | 401 for (unsigned i = 0; i < arraysize(buffer); i++) { |
| 359 buffer[i] = static_cast<char>(i & 0x7F); | 402 buffer[i] = static_cast<char>(i & 0x7F); |
| 360 } | 403 } |
| 361 buffer[arraysize(buffer) - 1] = '\0'; | 404 buffer[arraysize(buffer) - 1] = '\0'; |
| 362 TestCharacterStreams(buffer, arraysize(buffer) - 1); | 405 TestCharacterStreams(buffer, arraysize(buffer) - 1); |
| 363 TestCharacterStreams(buffer, arraysize(buffer) - 1, 576, 3298); | 406 TestCharacterStreams(buffer, arraysize(buffer) - 1, 576, 3298); |
| 364 } | 407 } |
| OLD | NEW |