OLD | NEW |
---|---|
(Empty) | |
1 // Copyright 2016 the V8 project authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 | |
5 #include "src/factory.h" // for i::Factory::NewExternalStringFrom*Byte | |
6 #include "src/objects-inl.h" | |
7 #include "src/parsing/scanner-character-streams.h" | |
8 #include "src/parsing/scanner.h" | |
9 #include "src/type-feedback-vector-inl.h" // for include "src/factory.h" | |
10 #include "test/cctest/cctest.h" | |
11 | |
12 namespace { | |
13 | |
14 // Implement ExternalSourceStream based on const char**. | |
15 // This will take each string as one chunk. The last chunk must be empty. | |
16 class ChunkSource : public v8::ScriptCompiler::ExternalSourceStream { | |
17 public: | |
18 explicit ChunkSource(const char** chunks) : current_(0) { | |
19 do { | |
20 chunks_.push_back( | |
21 {reinterpret_cast<const uint8_t*>(*chunks), strlen(*chunks)}); | |
22 chunks++; | |
23 } while (chunks_.back().len > 0); | |
24 } | |
25 ChunkSource(const uint8_t* data, size_t len, bool extra_chunky) | |
26 : current_(0) { | |
27 // If extra_chunky, we'll use increasingly large chunk sizes. | |
28 // If not, we'll have a single chunk of full length. | |
29 size_t chunk_size = extra_chunky ? 1 : len; | |
30 for (size_t i = 0; i < len; i += chunk_size, chunk_size *= 2) { | |
31 chunks_.push_back({data + i, i::Min(chunk_size, len - i)}); | |
32 } | |
33 chunks_.push_back({nullptr, 0}); | |
34 } | |
35 ~ChunkSource() {} | |
36 bool SetBookmark() override { return false; } | |
37 void ResetToBookmark() override {} | |
38 size_t GetMoreData(const uint8_t** src) override { | |
39 DCHECK_LT(current_, chunks_.size()); | |
40 Chunk& next = chunks_[current_++]; | |
41 uint8_t* chunk = new uint8_t[next.len]; | |
42 i::MemMove(chunk, next.ptr, next.len); | |
43 *src = chunk; | |
44 return next.len; | |
45 } | |
46 | |
47 private: | |
48 struct Chunk { | |
49 const uint8_t* ptr; | |
50 size_t len; | |
51 }; | |
52 std::vector<Chunk> chunks_; | |
53 size_t current_; | |
54 }; | |
55 | |
56 class TestExternalResource : public v8::String::ExternalStringResource { | |
57 public: | |
58 explicit TestExternalResource(uint16_t* data, int length) | |
59 : data_(data), length_(static_cast<size_t>(length)) {} | |
60 | |
61 ~TestExternalResource() {} | |
62 | |
63 const uint16_t* data() const { return data_; } | |
64 size_t length() const { return length_; } | |
65 | |
66 private: | |
67 uint16_t* data_; | |
68 size_t length_; | |
69 }; | |
70 | |
71 class TestExternalOneByteResource | |
72 : public v8::String::ExternalOneByteStringResource { | |
73 public: | |
74 TestExternalOneByteResource(const char* data, size_t length) | |
75 : data_(data), length_(length) {} | |
76 | |
77 const char* data() const { return data_; } | |
78 size_t length() const { return length_; } | |
79 | |
80 private: | |
81 const char* data_; | |
82 size_t length_; | |
83 }; | |
84 | |
85 // A test string with all lengths of utf-8 encodings. | |
86 const char unicode_utf8[] = | |
87 "abc" // 3x ascii | |
88 "\xc3\xa4" // a Umlaut, code point 228 | |
89 "\xe2\xa8\xa0" // >> (math symbol), code point 10784 | |
90 "\xf0\x9f\x92\xa9" // best character, code point 128169, | |
91 // as utf-16 surrogates: 55357 56489 | |
92 "def"; // 3x ascii again. | |
93 const uint16_t unicode_ucs2[] = {97, 98, 99, 228, 10784, 55357, | |
94 56489, 100, 101, 102, 0}; | |
95 | |
96 } // anonymous namespace | |
97 | |
98 TEST(Utf8StreamAsciiOnly) { | |
99 const char* chunks[] = {"abc", "def", "ghi", ""}; | |
100 ChunkSource chunk_source(chunks); | |
101 std::unique_ptr<v8::internal::Utf16CharacterStream> stream( | |
102 v8::internal::ScannerStream::For( | |
103 &chunk_source, v8::ScriptCompiler::StreamedSource::UTF8)); | |
104 | |
105 // Read the data without dying. | |
106 v8::internal::uc32 c; | |
107 do { | |
108 c = stream->Advance(); | |
109 } while (c != v8::internal::Utf16CharacterStream::kEndOfInput); | |
110 } | |
111 | |
112 TEST(Utf8ChunkBoundaries) { | |
113 // Test utf-8 parsing at chunk boundaries. | |
114 | |
115 // Split the test string at each byte and pass it to the stream. This way, | |
116 // we'll have a split at each possible boundary. | |
117 size_t len = strlen(unicode_utf8); | |
118 char buffer[arraysize(unicode_utf8) + 3]; | |
119 for (size_t i = 1; i < len; i++) { | |
120 // Copy source string into buffer, splitting it at i. | |
121 // Then add three chunks, 0..i-1, i..strlen-1, empty. | |
122 strncpy(buffer, unicode_utf8, i); | |
123 strncpy(buffer + i + 1, unicode_utf8 + i, len - i); | |
124 buffer[i] = '\0'; | |
125 buffer[len + 1] = '\0'; | |
126 buffer[len + 2] = '\0'; | |
127 const char* chunks[] = {buffer, buffer + i + 1, buffer + len + 2}; | |
128 | |
129 ChunkSource chunk_source(chunks); | |
130 std::unique_ptr<v8::internal::Utf16CharacterStream> stream( | |
131 v8::internal::ScannerStream::For( | |
132 &chunk_source, v8::ScriptCompiler::StreamedSource::UTF8)); | |
133 | |
134 for (size_t i = 0; unicode_ucs2[i]; i++) { | |
135 CHECK_EQ(unicode_ucs2[i], stream->Advance()); | |
136 } | |
137 CHECK_EQ(v8::internal::Utf16CharacterStream::kEndOfInput, | |
138 stream->Advance()); | |
139 } | |
140 } | |
141 | |
142 TEST(Utf8SingleByteChunks) { | |
143 // Have each byte as a single-byte chunk. | |
144 size_t len = strlen(unicode_utf8); | |
145 char buffer[arraysize(unicode_utf8) + 4]; | |
146 for (size_t i = 1; i < len - 1; i++) { | |
147 // Copy source string into buffer, make a single-byte chunk at i. | |
148 strncpy(buffer, unicode_utf8, i); | |
149 strncpy(buffer + i + 3, unicode_utf8 + i + 1, len - i - 1); | |
150 buffer[i] = '\0'; | |
151 buffer[i + 1] = unicode_utf8[i]; | |
152 buffer[i + 2] = '\0'; | |
153 buffer[len + 2] = '\0'; | |
154 buffer[len + 3] = '\0'; | |
155 const char* chunks[] = {buffer, buffer + i + 1, buffer + i + 3, | |
156 buffer + len + 3}; | |
157 | |
158 ChunkSource chunk_source(chunks); | |
159 std::unique_ptr<v8::internal::Utf16CharacterStream> stream( | |
160 v8::internal::ScannerStream::For( | |
161 &chunk_source, v8::ScriptCompiler::StreamedSource::UTF8)); | |
162 | |
163 for (size_t j = 0; unicode_ucs2[j]; j++) { | |
164 CHECK_EQ(unicode_ucs2[j], stream->Advance()); | |
165 } | |
166 CHECK_EQ(v8::internal::Utf16CharacterStream::kEndOfInput, | |
167 stream->Advance()); | |
168 } | |
169 } | |
170 | |
171 #define CHECK_EQU(v1, v2) CHECK_EQ(static_cast<int>(v1), static_cast<int>(v2)) | |
172 | |
173 void TestCharacterStream(const char* reference, i::Utf16CharacterStream* stream, | |
174 unsigned length, unsigned start, unsigned end) { | |
175 // Read streams one char at a time | |
176 unsigned i; | |
177 for (i = start; i < end; i++) { | |
178 CHECK_EQU(i, stream->pos()); | |
179 CHECK_EQU(reference[i], stream->Advance()); | |
180 } | |
181 CHECK_EQU(end, stream->pos()); | |
182 | |
183 // Pushback, re-read, pushback again. | |
184 while (i > end / 4) { | |
185 int32_t c0 = reference[i - 1]; | |
186 CHECK_EQU(i, stream->pos()); | |
187 stream->PushBack(c0); | |
188 i--; | |
189 CHECK_EQU(i, stream->pos()); | |
190 int32_t c1 = stream->Advance(); | |
191 i++; | |
192 CHECK_EQU(i, stream->pos()); | |
193 CHECK_EQ(c0, c1); | |
194 stream->PushBack(c0); | |
195 i--; | |
196 CHECK_EQU(i, stream->pos()); | |
197 } | |
198 | |
199 // Seek + read streams one char at a time. | |
200 unsigned halfway = end / 2; | |
201 stream->SeekForward(halfway - i); | |
202 for (i = halfway; i < end; i++) { | |
203 CHECK_EQU(i, stream->pos()); | |
204 CHECK_EQU(reference[i], stream->Advance()); | |
205 } | |
206 CHECK_EQU(i, stream->pos()); | |
207 CHECK_LT(stream->Advance(), 0); | |
208 | |
209 // Seek back, then seek beyond end of stream. | |
210 stream->Seek(start); | |
211 if (start < length) { | |
212 CHECK_EQU(stream->Advance(), reference[start]); | |
213 } else { | |
214 CHECK_LT(stream->Advance(), 0); | |
215 } | |
216 stream->Seek(length + 5); | |
217 CHECK_LT(stream->Advance(), 0); | |
218 } | |
219 | |
220 #undef CHECK_EQU | |
221 | |
222 void TestCharacterStreams(const char* one_byte_source, unsigned length, | |
223 unsigned start = 0, unsigned end = 0) { | |
224 if (end == 0) end = length; | |
225 | |
226 i::Isolate* isolate = CcTest::i_isolate(); | |
227 i::Factory* factory = isolate->factory(); | |
228 | |
229 // 2-byte external string | |
230 std::unique_ptr<i::uc16[]> uc16_buffer(new i::uc16[length]); | |
231 i::Vector<const i::uc16> two_byte_vector(uc16_buffer.get(), | |
232 static_cast<int>(length)); | |
233 { | |
234 for (unsigned i = 0; i < length; i++) { | |
235 uc16_buffer[i] = static_cast<i::uc16>(one_byte_source[i]); | |
236 } | |
237 TestExternalResource resource(uc16_buffer.get(), length); | |
238 i::Handle<i::String> uc16_string( | |
239 factory->NewExternalStringFromTwoByte(&resource).ToHandleChecked()); | |
240 std::unique_ptr<i::Utf16CharacterStream> uc16_stream( | |
241 i::ScannerStream::For(uc16_string, start, end)); | |
242 TestCharacterStream(one_byte_source, uc16_stream.get(), length, start, end); | |
243 } | |
244 | |
245 // 1-byte external string | |
246 i::Vector<const char> one_byte_vector(one_byte_source, | |
247 static_cast<int>(length)); | |
248 i::Handle<i::String> one_byte_string = | |
249 factory->NewStringFromAscii(one_byte_vector).ToHandleChecked(); | |
250 { | |
251 TestExternalOneByteResource one_byte_resource(one_byte_source, length); | |
252 i::Handle<i::String> ext_one_byte_string( | |
253 factory->NewExternalStringFromOneByte(&one_byte_resource) | |
254 .ToHandleChecked()); | |
255 std::unique_ptr<i::Utf16CharacterStream> one_byte_stream( | |
256 i::ScannerStream::For(ext_one_byte_string, start, end)); | |
257 TestCharacterStream(one_byte_source, one_byte_stream.get(), length, start, | |
258 end); | |
259 } | |
260 | |
261 // 1-byte generic i::String | |
262 { | |
263 std::unique_ptr<i::Utf16CharacterStream> string_stream( | |
264 i::ScannerStream::For(one_byte_string, start, end)); | |
265 TestCharacterStream(one_byte_source, string_stream.get(), length, start, | |
266 end); | |
267 } | |
268 | |
269 // 2-byte generic i::String | |
270 { | |
271 i::Handle<i::String> two_byte_string = | |
272 factory->NewStringFromTwoByte(two_byte_vector).ToHandleChecked(); | |
273 std::unique_ptr<i::Utf16CharacterStream> two_byte_string_stream( | |
274 i::ScannerStream::For(two_byte_string, start, end)); | |
275 TestCharacterStream(one_byte_source, two_byte_string_stream.get(), length, | |
276 start, end); | |
277 } | |
278 | |
279 // Streaming has no notion of start/end, so let's skip streaming tests for | |
280 // th cases. | |
marja
2016/09/15 08:25:17
typo
vogelheim
2016/09/15 11:29:26
Done.
| |
281 if (start != 0 || end != length) return; | |
282 | |
283 // 1-byte streaming stream, single + many chunks. | |
284 { | |
285 const uint8_t* data = | |
286 reinterpret_cast<const uint8_t*>(one_byte_vector.begin()); | |
287 const uint8_t* data_end = | |
288 reinterpret_cast<const uint8_t*>(one_byte_vector.end()); | |
289 | |
290 ChunkSource single_chunk(data, data_end - data, false); | |
291 std::unique_ptr<i::Utf16CharacterStream> one_byte_streaming_stream( | |
292 i::ScannerStream::For(&single_chunk, | |
293 v8::ScriptCompiler::StreamedSource::ONE_BYTE)); | |
294 TestCharacterStream(one_byte_source, one_byte_streaming_stream.get(), | |
295 length, start, end); | |
296 | |
297 ChunkSource many_chunks(data, data_end - data, true); | |
298 one_byte_streaming_stream.reset(i::ScannerStream::For( | |
299 &many_chunks, v8::ScriptCompiler::StreamedSource::ONE_BYTE)); | |
300 TestCharacterStream(one_byte_source, one_byte_streaming_stream.get(), | |
301 length, start, end); | |
302 } | |
303 | |
304 // UTF-8 streaming stream, single + many chunks. | |
305 { | |
306 const uint8_t* data = | |
307 reinterpret_cast<const uint8_t*>(one_byte_vector.begin()); | |
308 const uint8_t* data_end = | |
309 reinterpret_cast<const uint8_t*>(one_byte_vector.end()); | |
310 ChunkSource chunks(data, data_end - data, false); | |
311 std::unique_ptr<i::Utf16CharacterStream> utf8_streaming_stream( | |
312 i::ScannerStream::For(&chunks, | |
313 v8::ScriptCompiler::StreamedSource::UTF8)); | |
314 TestCharacterStream(one_byte_source, utf8_streaming_stream.get(), length, | |
315 start, end); | |
316 | |
317 ChunkSource many_chunks(data, data_end - data, true); | |
318 utf8_streaming_stream.reset(i::ScannerStream::For( | |
319 &many_chunks, v8::ScriptCompiler::StreamedSource::UTF8)); | |
320 TestCharacterStream(one_byte_source, utf8_streaming_stream.get(), length, | |
321 start, end); | |
322 } | |
323 | |
324 // 2-byte streaming stream, single + many chunks. | |
325 { | |
326 const uint8_t* data = | |
327 reinterpret_cast<const uint8_t*>(two_byte_vector.begin()); | |
328 const uint8_t* data_end = | |
329 reinterpret_cast<const uint8_t*>(two_byte_vector.end()); | |
330 ChunkSource chunks(data, data_end - data, false); | |
331 std::unique_ptr<i::Utf16CharacterStream> two_byte_streaming_stream( | |
332 i::ScannerStream::For(&chunks, | |
333 v8::ScriptCompiler::StreamedSource::TWO_BYTE)); | |
334 TestCharacterStream(one_byte_source, two_byte_streaming_stream.get(), | |
335 length, start, end); | |
336 | |
337 ChunkSource many_chunks(data, data_end - data, true); | |
338 two_byte_streaming_stream.reset(i::ScannerStream::For( | |
339 &many_chunks, v8::ScriptCompiler::StreamedSource::TWO_BYTE)); | |
340 TestCharacterStream(one_byte_source, two_byte_streaming_stream.get(), | |
341 length, start, end); | |
342 } | |
343 } | |
344 | |
345 TEST(CharacterStreams) { | |
346 v8::Isolate* isolate = CcTest::isolate(); | |
347 v8::HandleScope handles(isolate); | |
348 v8::Local<v8::Context> context = v8::Context::New(isolate); | |
349 v8::Context::Scope context_scope(context); | |
350 | |
351 TestCharacterStreams("abcdefghi", 9); | |
352 TestCharacterStreams("abc\0\n\r\x7f", 7); | |
353 TestCharacterStreams("\0", 1); | |
354 TestCharacterStreams("", 0); | |
355 | |
356 // 4k large buffer. | |
357 char buffer[4096 + 1]; | |
358 for (unsigned i = 0; i < arraysize(buffer); i++) { | |
359 buffer[i] = static_cast<char>(i & 0x7F); | |
360 } | |
361 buffer[arraysize(buffer) - 1] = '\0'; | |
362 TestCharacterStreams(buffer, arraysize(buffer) - 1); | |
363 TestCharacterStreams(buffer, arraysize(buffer) - 1, 576, 3298); | |
364 } | |
OLD | NEW |