Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(122)

Side by Side Diff: test/cctest/parsing/test-scanner-streams.cc

Issue 2314663002: Rework scanner-character-streams. (Closed)
Patch Set: Niko's feedback and fix compile even harder Created 4 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 // Copyright 2016 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "src/factory.h" // for i::Factory::NewExternalStringFrom*Byte
6 #include "src/objects-inl.h"
7 #include "src/parsing/scanner-character-streams.h"
8 #include "src/parsing/scanner.h"
9 #include "src/type-feedback-vector-inl.h" // for include "src/factory.h"
10 #include "test/cctest/cctest.h"
11
12 namespace {
13
14 // Implement ExternalSourceStream based on const char**.
15 // This will take each string as one chunk. The last chunk must be empty.
16 class ChunkSource : public v8::ScriptCompiler::ExternalSourceStream {
17 public:
18 explicit ChunkSource(const char** chunks) : current_(0) {
19 do {
20 chunks_.push_back(
21 {reinterpret_cast<const uint8_t*>(*chunks), strlen(*chunks)});
22 chunks++;
23 } while (chunks_.back().len > 0);
24 }
25 ChunkSource(const uint8_t* data, size_t len, bool extra_chunky)
26 : current_(0) {
27 // If extra_chunky, we'll use increasingly large chunk sizes.
28 // If not, we'll have a single chunk of full length.
29 size_t chunk_size = extra_chunky ? 1 : len;
30 for (size_t i = 0; i < len; i += chunk_size, chunk_size *= 2) {
31 chunks_.push_back({data + i, i::Min(chunk_size, len - i)});
32 }
33 chunks_.push_back({nullptr, 0});
34 }
35 ~ChunkSource() {}
36 bool SetBookmark() override { return false; }
37 void ResetToBookmark() override {}
38 size_t GetMoreData(const uint8_t** src) override {
39 DCHECK_LT(current_, chunks_.size());
40 Chunk& next = chunks_[current_++];
41 uint8_t* chunk = new uint8_t[next.len];
42 i::MemMove(chunk, next.ptr, next.len);
43 *src = chunk;
44 return next.len;
45 }
46
47 private:
48 struct Chunk {
49 const uint8_t* ptr;
50 size_t len;
51 };
52 std::vector<Chunk> chunks_;
53 size_t current_;
54 };
55
56 class TestExternalResource : public v8::String::ExternalStringResource {
57 public:
58 explicit TestExternalResource(uint16_t* data, int length)
59 : data_(data), length_(static_cast<size_t>(length)) {}
60
61 ~TestExternalResource() {}
62
63 const uint16_t* data() const { return data_; }
64 size_t length() const { return length_; }
65
66 private:
67 uint16_t* data_;
68 size_t length_;
69 };
70
71 class TestExternalOneByteResource
72 : public v8::String::ExternalOneByteStringResource {
73 public:
74 TestExternalOneByteResource(const char* data, size_t length)
75 : data_(data), length_(length) {}
76
77 const char* data() const { return data_; }
78 size_t length() const { return length_; }
79
80 private:
81 const char* data_;
82 size_t length_;
83 };
84
85 // A test string with all lengths of utf-8 encodings.
86 const char unicode_utf8[] =
87 "abc" // 3x ascii
88 "\xc3\xa4" // a Umlaut, code point 228
89 "\xe2\xa8\xa0" // >> (math symbol), code point 10784
90 "\xf0\x9f\x92\xa9" // best character, code point 128169,
91 // as utf-16 surrogates: 55357 56489
92 "def"; // 3x ascii again.
93 const uint16_t unicode_ucs2[] = {97, 98, 99, 228, 10784, 55357,
94 56489, 100, 101, 102, 0};
95
96 } // anonymous namespace
97
98 TEST(Utf8StreamAsciiOnly) {
99 const char* chunks[] = {"abc", "def", "ghi", ""};
100 ChunkSource chunk_source(chunks);
101 std::unique_ptr<v8::internal::Utf16CharacterStream> stream(
102 v8::internal::ScannerStream::For(
103 &chunk_source, v8::ScriptCompiler::StreamedSource::UTF8));
104
105 // Read the data without dying.
106 v8::internal::uc32 c;
107 do {
108 c = stream->Advance();
109 } while (c != v8::internal::Utf16CharacterStream::kEndOfInput);
110 }
111
112 TEST(Utf8ChunkBoundaries) {
113 // Test utf-8 parsing at chunk boundaries.
114
115 // Split the test string at each byte and pass it to the stream. This way,
116 // we'll have a split at each possible boundary.
117 size_t len = strlen(unicode_utf8);
118 char buffer[arraysize(unicode_utf8) + 3];
119 for (size_t i = 1; i < len; i++) {
120 // Copy source string into buffer, splitting it at i.
121 // Then add three chunks, 0..i-1, i..strlen-1, empty.
122 strncpy(buffer, unicode_utf8, i);
123 strncpy(buffer + i + 1, unicode_utf8 + i, len - i);
124 buffer[i] = '\0';
125 buffer[len + 1] = '\0';
126 buffer[len + 2] = '\0';
127 const char* chunks[] = {buffer, buffer + i + 1, buffer + len + 2};
128
129 ChunkSource chunk_source(chunks);
130 std::unique_ptr<v8::internal::Utf16CharacterStream> stream(
131 v8::internal::ScannerStream::For(
132 &chunk_source, v8::ScriptCompiler::StreamedSource::UTF8));
133
134 for (size_t i = 0; unicode_ucs2[i]; i++) {
135 CHECK_EQ(unicode_ucs2[i], stream->Advance());
136 }
137 CHECK_EQ(v8::internal::Utf16CharacterStream::kEndOfInput,
138 stream->Advance());
139 }
140 }
141
142 TEST(Utf8SingleByteChunks) {
143 // Have each byte as a single-byte chunk.
144 size_t len = strlen(unicode_utf8);
145 char buffer[arraysize(unicode_utf8) + 4];
146 for (size_t i = 1; i < len - 1; i++) {
147 // Copy source string into buffer, make a single-byte chunk at i.
148 strncpy(buffer, unicode_utf8, i);
149 strncpy(buffer + i + 3, unicode_utf8 + i + 1, len - i - 1);
150 buffer[i] = '\0';
151 buffer[i + 1] = unicode_utf8[i];
152 buffer[i + 2] = '\0';
153 buffer[len + 2] = '\0';
154 buffer[len + 3] = '\0';
155 const char* chunks[] = {buffer, buffer + i + 1, buffer + i + 3,
156 buffer + len + 3};
157
158 ChunkSource chunk_source(chunks);
159 std::unique_ptr<v8::internal::Utf16CharacterStream> stream(
160 v8::internal::ScannerStream::For(
161 &chunk_source, v8::ScriptCompiler::StreamedSource::UTF8));
162
163 for (size_t j = 0; unicode_ucs2[j]; j++) {
164 CHECK_EQ(unicode_ucs2[j], stream->Advance());
165 }
166 CHECK_EQ(v8::internal::Utf16CharacterStream::kEndOfInput,
167 stream->Advance());
168 }
169 }
170
171 #define CHECK_EQU(v1, v2) CHECK_EQ(static_cast<int>(v1), static_cast<int>(v2))
172
173 void TestCharacterStream(const char* reference, i::Utf16CharacterStream* stream,
174 unsigned length, unsigned start, unsigned end) {
175 // Read streams one char at a time
176 unsigned i;
177 for (i = start; i < end; i++) {
178 CHECK_EQU(i, stream->pos());
179 CHECK_EQU(reference[i], stream->Advance());
180 }
181 CHECK_EQU(end, stream->pos());
182
183 // Pushback, re-read, pushback again.
184 while (i > end / 4) {
185 int32_t c0 = reference[i - 1];
186 CHECK_EQU(i, stream->pos());
187 stream->PushBack(c0);
188 i--;
189 CHECK_EQU(i, stream->pos());
190 int32_t c1 = stream->Advance();
191 i++;
192 CHECK_EQU(i, stream->pos());
193 CHECK_EQ(c0, c1);
194 stream->PushBack(c0);
195 i--;
196 CHECK_EQU(i, stream->pos());
197 }
198
199 // Seek + read streams one char at a time.
200 unsigned halfway = end / 2;
201 stream->SeekForward(halfway - i);
202 for (i = halfway; i < end; i++) {
203 CHECK_EQU(i, stream->pos());
204 CHECK_EQU(reference[i], stream->Advance());
205 }
206 CHECK_EQU(i, stream->pos());
207 CHECK_LT(stream->Advance(), 0);
208
209 // Seek back, then seek beyond end of stream.
210 stream->Seek(start);
211 if (start < length) {
212 CHECK_EQU(stream->Advance(), reference[start]);
213 } else {
214 CHECK_LT(stream->Advance(), 0);
215 }
216 stream->Seek(length + 5);
217 CHECK_LT(stream->Advance(), 0);
218 }
219
220 #undef CHECK_EQU
221
222 void TestCharacterStreams(const char* one_byte_source, unsigned length,
223 unsigned start = 0, unsigned end = 0) {
224 if (end == 0) end = length;
225
226 i::Isolate* isolate = CcTest::i_isolate();
227 i::Factory* factory = isolate->factory();
228
229 // 2-byte external string
230 std::unique_ptr<i::uc16[]> uc16_buffer(new i::uc16[length]);
231 i::Vector<const i::uc16> two_byte_vector(uc16_buffer.get(),
232 static_cast<int>(length));
233 {
234 for (unsigned i = 0; i < length; i++) {
235 uc16_buffer[i] = static_cast<i::uc16>(one_byte_source[i]);
236 }
237 TestExternalResource resource(uc16_buffer.get(), length);
238 i::Handle<i::String> uc16_string(
239 factory->NewExternalStringFromTwoByte(&resource).ToHandleChecked());
240 std::unique_ptr<i::Utf16CharacterStream> uc16_stream(
241 i::ScannerStream::For(uc16_string, start, end));
242 TestCharacterStream(one_byte_source, uc16_stream.get(), length, start, end);
243 }
244
245 // 1-byte external string
246 i::Vector<const char> one_byte_vector(one_byte_source,
247 static_cast<int>(length));
248 i::Handle<i::String> one_byte_string =
249 factory->NewStringFromAscii(one_byte_vector).ToHandleChecked();
250 {
251 TestExternalOneByteResource one_byte_resource(one_byte_source, length);
252 i::Handle<i::String> ext_one_byte_string(
253 factory->NewExternalStringFromOneByte(&one_byte_resource)
254 .ToHandleChecked());
255 std::unique_ptr<i::Utf16CharacterStream> one_byte_stream(
256 i::ScannerStream::For(ext_one_byte_string, start, end));
257 TestCharacterStream(one_byte_source, one_byte_stream.get(), length, start,
258 end);
259 }
260
261 // 1-byte generic i::String
262 {
263 std::unique_ptr<i::Utf16CharacterStream> string_stream(
264 i::ScannerStream::For(one_byte_string, start, end));
265 TestCharacterStream(one_byte_source, string_stream.get(), length, start,
266 end);
267 }
268
269 // 2-byte generic i::String
270 {
271 i::Handle<i::String> two_byte_string =
272 factory->NewStringFromTwoByte(two_byte_vector).ToHandleChecked();
273 std::unique_ptr<i::Utf16CharacterStream> two_byte_string_stream(
274 i::ScannerStream::For(two_byte_string, start, end));
275 TestCharacterStream(one_byte_source, two_byte_string_stream.get(), length,
276 start, end);
277 }
278
279 // Streaming has no notion of start/end, so let's skip streaming tests for
280 // th cases.
marja 2016/09/15 08:25:17 typo
vogelheim 2016/09/15 11:29:26 Done.
281 if (start != 0 || end != length) return;
282
283 // 1-byte streaming stream, single + many chunks.
284 {
285 const uint8_t* data =
286 reinterpret_cast<const uint8_t*>(one_byte_vector.begin());
287 const uint8_t* data_end =
288 reinterpret_cast<const uint8_t*>(one_byte_vector.end());
289
290 ChunkSource single_chunk(data, data_end - data, false);
291 std::unique_ptr<i::Utf16CharacterStream> one_byte_streaming_stream(
292 i::ScannerStream::For(&single_chunk,
293 v8::ScriptCompiler::StreamedSource::ONE_BYTE));
294 TestCharacterStream(one_byte_source, one_byte_streaming_stream.get(),
295 length, start, end);
296
297 ChunkSource many_chunks(data, data_end - data, true);
298 one_byte_streaming_stream.reset(i::ScannerStream::For(
299 &many_chunks, v8::ScriptCompiler::StreamedSource::ONE_BYTE));
300 TestCharacterStream(one_byte_source, one_byte_streaming_stream.get(),
301 length, start, end);
302 }
303
304 // UTF-8 streaming stream, single + many chunks.
305 {
306 const uint8_t* data =
307 reinterpret_cast<const uint8_t*>(one_byte_vector.begin());
308 const uint8_t* data_end =
309 reinterpret_cast<const uint8_t*>(one_byte_vector.end());
310 ChunkSource chunks(data, data_end - data, false);
311 std::unique_ptr<i::Utf16CharacterStream> utf8_streaming_stream(
312 i::ScannerStream::For(&chunks,
313 v8::ScriptCompiler::StreamedSource::UTF8));
314 TestCharacterStream(one_byte_source, utf8_streaming_stream.get(), length,
315 start, end);
316
317 ChunkSource many_chunks(data, data_end - data, true);
318 utf8_streaming_stream.reset(i::ScannerStream::For(
319 &many_chunks, v8::ScriptCompiler::StreamedSource::UTF8));
320 TestCharacterStream(one_byte_source, utf8_streaming_stream.get(), length,
321 start, end);
322 }
323
324 // 2-byte streaming stream, single + many chunks.
325 {
326 const uint8_t* data =
327 reinterpret_cast<const uint8_t*>(two_byte_vector.begin());
328 const uint8_t* data_end =
329 reinterpret_cast<const uint8_t*>(two_byte_vector.end());
330 ChunkSource chunks(data, data_end - data, false);
331 std::unique_ptr<i::Utf16CharacterStream> two_byte_streaming_stream(
332 i::ScannerStream::For(&chunks,
333 v8::ScriptCompiler::StreamedSource::TWO_BYTE));
334 TestCharacterStream(one_byte_source, two_byte_streaming_stream.get(),
335 length, start, end);
336
337 ChunkSource many_chunks(data, data_end - data, true);
338 two_byte_streaming_stream.reset(i::ScannerStream::For(
339 &many_chunks, v8::ScriptCompiler::StreamedSource::TWO_BYTE));
340 TestCharacterStream(one_byte_source, two_byte_streaming_stream.get(),
341 length, start, end);
342 }
343 }
344
345 TEST(CharacterStreams) {
346 v8::Isolate* isolate = CcTest::isolate();
347 v8::HandleScope handles(isolate);
348 v8::Local<v8::Context> context = v8::Context::New(isolate);
349 v8::Context::Scope context_scope(context);
350
351 TestCharacterStreams("abcdefghi", 9);
352 TestCharacterStreams("abc\0\n\r\x7f", 7);
353 TestCharacterStreams("\0", 1);
354 TestCharacterStreams("", 0);
355
356 // 4k large buffer.
357 char buffer[4096 + 1];
358 for (unsigned i = 0; i < arraysize(buffer); i++) {
359 buffer[i] = static_cast<char>(i & 0x7F);
360 }
361 buffer[arraysize(buffer) - 1] = '\0';
362 TestCharacterStreams(buffer, arraysize(buffer) - 1);
363 TestCharacterStreams(buffer, arraysize(buffer) - 1, 576, 3298);
364 }
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698