OLD | NEW |
---|---|
1 // Copyright 2016 the V8 project authors. All rights reserved. | 1 // Copyright 2016 the V8 project authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "src/factory.h" // for i::Factory::NewExternalStringFrom*Byte | 5 #include "src/factory.h" // for i::Factory::NewExternalStringFrom*Byte |
6 #include "src/objects-inl.h" | 6 #include "src/objects-inl.h" |
7 #include "src/parsing/scanner-character-streams.h" | 7 #include "src/parsing/scanner-character-streams.h" |
8 #include "src/parsing/scanner.h" | 8 #include "src/parsing/scanner.h" |
9 #include "src/type-feedback-vector-inl.h" // for include "src/factory.h" | 9 #include "src/type-feedback-vector-inl.h" // for include "src/factory.h" |
10 #include "test/cctest/cctest.h" | 10 #include "test/cctest/cctest.h" |
(...skipping 91 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
102 v8::internal::ScannerStream::For( | 102 v8::internal::ScannerStream::For( |
103 &chunk_source, v8::ScriptCompiler::StreamedSource::UTF8)); | 103 &chunk_source, v8::ScriptCompiler::StreamedSource::UTF8)); |
104 | 104 |
105 // Read the data without dying. | 105 // Read the data without dying. |
106 v8::internal::uc32 c; | 106 v8::internal::uc32 c; |
107 do { | 107 do { |
108 c = stream->Advance(); | 108 c = stream->Advance(); |
109 } while (c != v8::internal::Utf16CharacterStream::kEndOfInput); | 109 } while (c != v8::internal::Utf16CharacterStream::kEndOfInput); |
110 } | 110 } |
111 | 111 |
112 TEST(Utf8StreamBOM) { | |
113 // Construct test string w/ UTF-8 BOM (byte order mark) | |
114 char data[3 + arraysize(unicode_utf8)] = {"\xef\xbb\xbf"}; | |
115 strncpy(data + 3, unicode_utf8, arraysize(unicode_utf8)); | |
116 | |
117 const char* chunks[] = {data, "\0"}; | |
118 ChunkSource chunk_source(chunks); | |
119 std::unique_ptr<v8::internal::Utf16CharacterStream> stream( | |
120 v8::internal::ScannerStream::For( | |
121 &chunk_source, v8::ScriptCompiler::StreamedSource::UTF8)); | |
122 | |
123 // Read the data without tripping over the BOM. | |
124 for (size_t i = 0; unicode_ucs2[i]; i++) { | |
125 CHECK_EQ(unicode_ucs2[i], stream->Advance()); | |
126 } | |
127 CHECK_EQ(v8::internal::Utf16CharacterStream::kEndOfInput, stream->Advance()); | |
128 | |
129 // Make sure seek works. | |
130 stream->Seek(0); | |
131 CHECK_EQ(unicode_ucs2[0], stream->Advance()); | |
132 | |
133 stream->Seek(5); | |
134 CHECK_EQ(unicode_ucs2[5], stream->Advance()); | |
135 } | |
136 | |
137 TEST(Utf8SplitBOM) { | |
138 // Construct chunks with a BOM split into two chunks. | |
139 char partial_bom[] = "\xef\xbb"; | |
140 char data[1 + arraysize(unicode_utf8)] = {"\xbf"}; | |
marja
2016/09/20 18:52:30
... how paranoid do we want to be, should there al
vogelheim
2016/09/21 07:50:54
Done.
| |
141 strncpy(data + 1, unicode_utf8, arraysize(unicode_utf8)); | |
142 | |
143 const char* chunks[] = {partial_bom, data, "\0"}; | |
144 ChunkSource chunk_source(chunks); | |
145 std::unique_ptr<v8::internal::Utf16CharacterStream> stream( | |
146 v8::internal::ScannerStream::For( | |
147 &chunk_source, v8::ScriptCompiler::StreamedSource::UTF8)); | |
148 | |
149 // Read the data without tripping over the BOM. | |
150 for (size_t i = 0; unicode_ucs2[i]; i++) { | |
151 CHECK_EQ(unicode_ucs2[i], stream->Advance()); | |
152 } | |
153 } | |
154 | |
112 TEST(Utf8ChunkBoundaries) { | 155 TEST(Utf8ChunkBoundaries) { |
113 // Test utf-8 parsing at chunk boundaries. | 156 // Test utf-8 parsing at chunk boundaries. |
114 | 157 |
115 // Split the test string at each byte and pass it to the stream. This way, | 158 // Split the test string at each byte and pass it to the stream. This way, |
116 // we'll have a split at each possible boundary. | 159 // we'll have a split at each possible boundary. |
117 size_t len = strlen(unicode_utf8); | 160 size_t len = strlen(unicode_utf8); |
118 char buffer[arraysize(unicode_utf8) + 3]; | 161 char buffer[arraysize(unicode_utf8) + 3]; |
119 for (size_t i = 1; i < len; i++) { | 162 for (size_t i = 1; i < len; i++) { |
120 // Copy source string into buffer, splitting it at i. | 163 // Copy source string into buffer, splitting it at i. |
121 // Then add three chunks, 0..i-1, i..strlen-1, empty. | 164 // Then add three chunks, 0..i-1, i..strlen-1, empty. |
(...skipping 233 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
355 | 398 |
356 // 4k large buffer. | 399 // 4k large buffer. |
357 char buffer[4096 + 1]; | 400 char buffer[4096 + 1]; |
358 for (unsigned i = 0; i < arraysize(buffer); i++) { | 401 for (unsigned i = 0; i < arraysize(buffer); i++) { |
359 buffer[i] = static_cast<char>(i & 0x7F); | 402 buffer[i] = static_cast<char>(i & 0x7F); |
360 } | 403 } |
361 buffer[arraysize(buffer) - 1] = '\0'; | 404 buffer[arraysize(buffer) - 1] = '\0'; |
362 TestCharacterStreams(buffer, arraysize(buffer) - 1); | 405 TestCharacterStreams(buffer, arraysize(buffer) - 1); |
363 TestCharacterStreams(buffer, arraysize(buffer) - 1, 576, 3298); | 406 TestCharacterStreams(buffer, arraysize(buffer) - 1, 576, 3298); |
364 } | 407 } |
OLD | NEW |