test/cctest/parsing/test-scanner-streams.cc - Issue 2549083002: [counters] Move waiting for more data from background-parsing into callbacks

Side by Side Diff: test/cctest/parsing/test-scanner-streams.cc

Issue 2549083002: [counters] Move waiting for more data from background-parsing into callbacks (Closed)

Patch Set: Addressed comments Created 4 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

« no previous file with comments | « src/parsing/scanner-character-streams.cc ('k') | no next file » | no next file with comments »

OLD	NEW
1 // Copyright 2016 the V8 project authors. All rights reserved.	1 // Copyright 2016 the V8 project authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "src/factory.h" // for i::Factory::NewExternalStringFrom*Byte	5 #include "src/factory.h" // for i::Factory::NewExternalStringFrom*Byte

6 #include "src/objects-inl.h"	6 #include "src/objects-inl.h"

7 #include "src/parsing/scanner-character-streams.h"	7 #include "src/parsing/scanner-character-streams.h"

8 #include "src/parsing/scanner.h"	8 #include "src/parsing/scanner.h"

9 #include "src/type-feedback-vector-inl.h" // for include "src/factory.h"	9 #include "src/type-feedback-vector-inl.h" // for include "src/factory.h"

10 #include "test/cctest/cctest.h"	10 #include "test/cctest/cctest.h"

(...skipping 82 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
93 const uint16_t unicode_ucs2[] = {97, 98, 99, 228, 10784, 55357,	93 const uint16_t unicode_ucs2[] = {97, 98, 99, 228, 10784, 55357,

94 56489, 100, 101, 102, 0};	94 56489, 100, 101, 102, 0};

95	95

96 } // anonymous namespace	96 } // anonymous namespace

97	97

98 TEST(Utf8StreamAsciiOnly) {	98 TEST(Utf8StreamAsciiOnly) {

99 const char* chunks[] = {"abc", "def", "ghi", ""};	99 const char* chunks[] = {"abc", "def", "ghi", ""};

100 ChunkSource chunk_source(chunks);	100 ChunkSource chunk_source(chunks);

101 std::unique_ptr<v8::internal::Utf16CharacterStream> stream(	101 std::unique_ptr<v8::internal::Utf16CharacterStream> stream(

102 v8::internal::ScannerStream::For(	102 v8::internal::ScannerStream::For(

103 &chunk_source, v8::ScriptCompiler::StreamedSource::UTF8));	103 &chunk_source, v8::ScriptCompiler::StreamedSource::UTF8, nullptr));

104	104

105 // Read the data without dying.	105 // Read the data without dying.

106 v8::internal::uc32 c;	106 v8::internal::uc32 c;

107 do {	107 do {

108 c = stream->Advance();	108 c = stream->Advance();

109 } while (c != v8::internal::Utf16CharacterStream::kEndOfInput);	109 } while (c != v8::internal::Utf16CharacterStream::kEndOfInput);

110 }	110 }

111	111

112 TEST(Utf8StreamBOM) {	112 TEST(Utf8StreamBOM) {

113 // Construct test string w/ UTF-8 BOM (byte order mark)	113 // Construct test string w/ UTF-8 BOM (byte order mark)

114 char data[3 + arraysize(unicode_utf8)] = {"\xef\xbb\xbf"};	114 char data[3 + arraysize(unicode_utf8)] = {"\xef\xbb\xbf"};

115 strncpy(data + 3, unicode_utf8, arraysize(unicode_utf8));	115 strncpy(data + 3, unicode_utf8, arraysize(unicode_utf8));

116	116

117 const char* chunks[] = {data, "\0"};	117 const char* chunks[] = {data, "\0"};

118 ChunkSource chunk_source(chunks);	118 ChunkSource chunk_source(chunks);

119 std::unique_ptr<v8::internal::Utf16CharacterStream> stream(	119 std::unique_ptr<v8::internal::Utf16CharacterStream> stream(

120 v8::internal::ScannerStream::For(	120 v8::internal::ScannerStream::For(

121 &chunk_source, v8::ScriptCompiler::StreamedSource::UTF8));	121 &chunk_source, v8::ScriptCompiler::StreamedSource::UTF8, nullptr));

122	122

123 // Read the data without tripping over the BOM.	123 // Read the data without tripping over the BOM.

124 for (size_t i = 0; unicode_ucs2[i]; i++) {	124 for (size_t i = 0; unicode_ucs2[i]; i++) {

125 CHECK_EQ(unicode_ucs2[i], stream->Advance());	125 CHECK_EQ(unicode_ucs2[i], stream->Advance());

126 }	126 }

127 CHECK_EQ(v8::internal::Utf16CharacterStream::kEndOfInput, stream->Advance());	127 CHECK_EQ(v8::internal::Utf16CharacterStream::kEndOfInput, stream->Advance());

128	128

129 // Make sure seek works.	129 // Make sure seek works.

130 stream->Seek(0);	130 stream->Seek(0);

131 CHECK_EQ(unicode_ucs2[0], stream->Advance());	131 CHECK_EQ(unicode_ucs2[0], stream->Advance());

132	132

133 stream->Seek(5);	133 stream->Seek(5);

134 CHECK_EQ(unicode_ucs2[5], stream->Advance());	134 CHECK_EQ(unicode_ucs2[5], stream->Advance());

135 }	135 }

136	136

137 TEST(Utf8SplitBOM) {	137 TEST(Utf8SplitBOM) {

138 // Construct chunks with a BOM split into two chunks.	138 // Construct chunks with a BOM split into two chunks.

139 char partial_bom[] = "\xef\xbb";	139 char partial_bom[] = "\xef\xbb";

140 char data[1 + arraysize(unicode_utf8)] = {"\xbf"};	140 char data[1 + arraysize(unicode_utf8)] = {"\xbf"};

141 strncpy(data + 1, unicode_utf8, arraysize(unicode_utf8));	141 strncpy(data + 1, unicode_utf8, arraysize(unicode_utf8));

142	142

143 {	143 {

144 const char* chunks[] = {partial_bom, data, "\0"};	144 const char* chunks[] = {partial_bom, data, "\0"};

145 ChunkSource chunk_source(chunks);	145 ChunkSource chunk_source(chunks);

146 std::unique_ptr<v8::internal::Utf16CharacterStream> stream(	146 std::unique_ptr<v8::internal::Utf16CharacterStream> stream(

147 v8::internal::ScannerStream::For(	147 v8::internal::ScannerStream::For(

148 &chunk_source, v8::ScriptCompiler::StreamedSource::UTF8));	148 &chunk_source, v8::ScriptCompiler::StreamedSource::UTF8, nullptr));

149	149

150 // Read the data without tripping over the BOM.	150 // Read the data without tripping over the BOM.

151 for (size_t i = 0; unicode_ucs2[i]; i++) {	151 for (size_t i = 0; unicode_ucs2[i]; i++) {

152 CHECK_EQ(unicode_ucs2[i], stream->Advance());	152 CHECK_EQ(unicode_ucs2[i], stream->Advance());

153 }	153 }

154 }	154 }

155	155

156 // And now with single-byte BOM chunks.	156 // And now with single-byte BOM chunks.

157 char bom_byte_1[] = "\xef";	157 char bom_byte_1[] = "\xef";

158 char bom_byte_2[] = "\xbb";	158 char bom_byte_2[] = "\xbb";

159 {	159 {

160 const char* chunks[] = {bom_byte_1, bom_byte_2, data, "\0"};	160 const char* chunks[] = {bom_byte_1, bom_byte_2, data, "\0"};

161 ChunkSource chunk_source(chunks);	161 ChunkSource chunk_source(chunks);

162 std::unique_ptr<v8::internal::Utf16CharacterStream> stream(	162 std::unique_ptr<v8::internal::Utf16CharacterStream> stream(

163 v8::internal::ScannerStream::For(	163 v8::internal::ScannerStream::For(

164 &chunk_source, v8::ScriptCompiler::StreamedSource::UTF8));	164 &chunk_source, v8::ScriptCompiler::StreamedSource::UTF8, nullptr));

165	165

166 // Read the data without tripping over the BOM.	166 // Read the data without tripping over the BOM.

167 for (size_t i = 0; unicode_ucs2[i]; i++) {	167 for (size_t i = 0; unicode_ucs2[i]; i++) {

168 CHECK_EQ(unicode_ucs2[i], stream->Advance());	168 CHECK_EQ(unicode_ucs2[i], stream->Advance());

169 }	169 }

170 }	170 }

171 }	171 }

172	172

173 TEST(Utf8ChunkBoundaries) {	173 TEST(Utf8ChunkBoundaries) {

174 // Test utf-8 parsing at chunk boundaries.	174 // Test utf-8 parsing at chunk boundaries.

175	175

176 // Split the test string at each byte and pass it to the stream. This way,	176 // Split the test string at each byte and pass it to the stream. This way,

177 // we'll have a split at each possible boundary.	177 // we'll have a split at each possible boundary.

178 size_t len = strlen(unicode_utf8);	178 size_t len = strlen(unicode_utf8);

179 char buffer[arraysize(unicode_utf8) + 3];	179 char buffer[arraysize(unicode_utf8) + 3];

180 for (size_t i = 1; i < len; i++) {	180 for (size_t i = 1; i < len; i++) {

181 // Copy source string into buffer, splitting it at i.	181 // Copy source string into buffer, splitting it at i.

182 // Then add three chunks, 0..i-1, i..strlen-1, empty.	182 // Then add three chunks, 0..i-1, i..strlen-1, empty.

183 strncpy(buffer, unicode_utf8, i);	183 strncpy(buffer, unicode_utf8, i);

184 strncpy(buffer + i + 1, unicode_utf8 + i, len - i);	184 strncpy(buffer + i + 1, unicode_utf8 + i, len - i);

185 buffer[i] = '\0';	185 buffer[i] = '\0';

186 buffer[len + 1] = '\0';	186 buffer[len + 1] = '\0';

187 buffer[len + 2] = '\0';	187 buffer[len + 2] = '\0';

188 const char* chunks[] = {buffer, buffer + i + 1, buffer + len + 2};	188 const char* chunks[] = {buffer, buffer + i + 1, buffer + len + 2};

189	189

190 ChunkSource chunk_source(chunks);	190 ChunkSource chunk_source(chunks);

191 std::unique_ptr<v8::internal::Utf16CharacterStream> stream(	191 std::unique_ptr<v8::internal::Utf16CharacterStream> stream(

192 v8::internal::ScannerStream::For(	192 v8::internal::ScannerStream::For(

193 &chunk_source, v8::ScriptCompiler::StreamedSource::UTF8));	193 &chunk_source, v8::ScriptCompiler::StreamedSource::UTF8, nullptr));

194	194

195 for (size_t i = 0; unicode_ucs2[i]; i++) {	195 for (size_t i = 0; unicode_ucs2[i]; i++) {

196 CHECK_EQ(unicode_ucs2[i], stream->Advance());	196 CHECK_EQ(unicode_ucs2[i], stream->Advance());

197 }	197 }

198 CHECK_EQ(v8::internal::Utf16CharacterStream::kEndOfInput,	198 CHECK_EQ(v8::internal::Utf16CharacterStream::kEndOfInput,

199 stream->Advance());	199 stream->Advance());

200 }	200 }

201 }	201 }

202	202

203 TEST(Utf8SingleByteChunks) {	203 TEST(Utf8SingleByteChunks) {

204 // Have each byte as a single-byte chunk.	204 // Have each byte as a single-byte chunk.

205 size_t len = strlen(unicode_utf8);	205 size_t len = strlen(unicode_utf8);

206 char buffer[arraysize(unicode_utf8) + 4];	206 char buffer[arraysize(unicode_utf8) + 4];

207 for (size_t i = 1; i < len - 1; i++) {	207 for (size_t i = 1; i < len - 1; i++) {

208 // Copy source string into buffer, make a single-byte chunk at i.	208 // Copy source string into buffer, make a single-byte chunk at i.

209 strncpy(buffer, unicode_utf8, i);	209 strncpy(buffer, unicode_utf8, i);

210 strncpy(buffer + i + 3, unicode_utf8 + i + 1, len - i - 1);	210 strncpy(buffer + i + 3, unicode_utf8 + i + 1, len - i - 1);

211 buffer[i] = '\0';	211 buffer[i] = '\0';

212 buffer[i + 1] = unicode_utf8[i];	212 buffer[i + 1] = unicode_utf8[i];

213 buffer[i + 2] = '\0';	213 buffer[i + 2] = '\0';

214 buffer[len + 2] = '\0';	214 buffer[len + 2] = '\0';

215 buffer[len + 3] = '\0';	215 buffer[len + 3] = '\0';

216 const char* chunks[] = {buffer, buffer + i + 1, buffer + i + 3,	216 const char* chunks[] = {buffer, buffer + i + 1, buffer + i + 3,

217 buffer + len + 3};	217 buffer + len + 3};

218	218

219 ChunkSource chunk_source(chunks);	219 ChunkSource chunk_source(chunks);

220 std::unique_ptr<v8::internal::Utf16CharacterStream> stream(	220 std::unique_ptr<v8::internal::Utf16CharacterStream> stream(

221 v8::internal::ScannerStream::For(	221 v8::internal::ScannerStream::For(

222 &chunk_source, v8::ScriptCompiler::StreamedSource::UTF8));	222 &chunk_source, v8::ScriptCompiler::StreamedSource::UTF8, nullptr));

223	223

224 for (size_t j = 0; unicode_ucs2[j]; j++) {	224 for (size_t j = 0; unicode_ucs2[j]; j++) {

225 CHECK_EQ(unicode_ucs2[j], stream->Advance());	225 CHECK_EQ(unicode_ucs2[j], stream->Advance());

226 }	226 }

227 CHECK_EQ(v8::internal::Utf16CharacterStream::kEndOfInput,	227 CHECK_EQ(v8::internal::Utf16CharacterStream::kEndOfInput,

228 stream->Advance());	228 stream->Advance());

229 }	229 }

230 }	230 }

231	231

232 #define CHECK_EQU(v1, v2) CHECK_EQ(static_cast<int>(v1), static_cast<int>(v2))	232 #define CHECK_EQU(v1, v2) CHECK_EQ(static_cast<int>(v1), static_cast<int>(v2))

(...skipping 111 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
344 // 1-byte streaming stream, single + many chunks.	344 // 1-byte streaming stream, single + many chunks.

345 {	345 {

346 const uint8_t* data =	346 const uint8_t* data =

347 reinterpret_cast<const uint8_t*>(one_byte_vector.begin());	347 reinterpret_cast<const uint8_t*>(one_byte_vector.begin());

348 const uint8_t* data_end =	348 const uint8_t* data_end =

349 reinterpret_cast<const uint8_t*>(one_byte_vector.end());	349 reinterpret_cast<const uint8_t*>(one_byte_vector.end());

350	350

351 ChunkSource single_chunk(data, data_end - data, false);	351 ChunkSource single_chunk(data, data_end - data, false);

352 std::unique_ptr<i::Utf16CharacterStream> one_byte_streaming_stream(	352 std::unique_ptr<i::Utf16CharacterStream> one_byte_streaming_stream(

353 i::ScannerStream::For(&single_chunk,	353 i::ScannerStream::For(&single_chunk,

354 v8::ScriptCompiler::StreamedSource::ONE_BYTE));	354 v8::ScriptCompiler::StreamedSource::ONE_BYTE,

	355 nullptr));

355 TestCharacterStream(one_byte_source, one_byte_streaming_stream.get(),	356 TestCharacterStream(one_byte_source, one_byte_streaming_stream.get(),

356 length, start, end);	357 length, start, end);

357	358

358 ChunkSource many_chunks(data, data_end - data, true);	359 ChunkSource many_chunks(data, data_end - data, true);

359 one_byte_streaming_stream.reset(i::ScannerStream::For(	360 one_byte_streaming_stream.reset(i::ScannerStream::For(

360 &many_chunks, v8::ScriptCompiler::StreamedSource::ONE_BYTE));	361 &many_chunks, v8::ScriptCompiler::StreamedSource::ONE_BYTE, nullptr));

361 TestCharacterStream(one_byte_source, one_byte_streaming_stream.get(),	362 TestCharacterStream(one_byte_source, one_byte_streaming_stream.get(),

362 length, start, end);	363 length, start, end);

363 }	364 }

364	365

365 // UTF-8 streaming stream, single + many chunks.	366 // UTF-8 streaming stream, single + many chunks.

366 {	367 {

367 const uint8_t* data =	368 const uint8_t* data =

368 reinterpret_cast<const uint8_t*>(one_byte_vector.begin());	369 reinterpret_cast<const uint8_t*>(one_byte_vector.begin());

369 const uint8_t* data_end =	370 const uint8_t* data_end =

370 reinterpret_cast<const uint8_t*>(one_byte_vector.end());	371 reinterpret_cast<const uint8_t*>(one_byte_vector.end());

371 ChunkSource chunks(data, data_end - data, false);	372 ChunkSource chunks(data, data_end - data, false);

372 std::unique_ptr<i::Utf16CharacterStream> utf8_streaming_stream(	373 std::unique_ptr<i::Utf16CharacterStream> utf8_streaming_stream(

373 i::ScannerStream::For(&chunks,	374 i::ScannerStream::For(&chunks, v8::ScriptCompiler::StreamedSource::UTF8,

374 v8::ScriptCompiler::StreamedSource::UTF8));	375 nullptr));

375 TestCharacterStream(one_byte_source, utf8_streaming_stream.get(), length,	376 TestCharacterStream(one_byte_source, utf8_streaming_stream.get(), length,

376 start, end);	377 start, end);

377	378

378 ChunkSource many_chunks(data, data_end - data, true);	379 ChunkSource many_chunks(data, data_end - data, true);

379 utf8_streaming_stream.reset(i::ScannerStream::For(	380 utf8_streaming_stream.reset(i::ScannerStream::For(

380 &many_chunks, v8::ScriptCompiler::StreamedSource::UTF8));	381 &many_chunks, v8::ScriptCompiler::StreamedSource::UTF8, nullptr));

381 TestCharacterStream(one_byte_source, utf8_streaming_stream.get(), length,	382 TestCharacterStream(one_byte_source, utf8_streaming_stream.get(), length,

382 start, end);	383 start, end);

383 }	384 }

384	385

385 // 2-byte streaming stream, single + many chunks.	386 // 2-byte streaming stream, single + many chunks.

386 {	387 {

387 const uint8_t* data =	388 const uint8_t* data =

388 reinterpret_cast<const uint8_t*>(two_byte_vector.begin());	389 reinterpret_cast<const uint8_t*>(two_byte_vector.begin());

389 const uint8_t* data_end =	390 const uint8_t* data_end =

390 reinterpret_cast<const uint8_t*>(two_byte_vector.end());	391 reinterpret_cast<const uint8_t*>(two_byte_vector.end());

391 ChunkSource chunks(data, data_end - data, false);	392 ChunkSource chunks(data, data_end - data, false);

392 std::unique_ptr<i::Utf16CharacterStream> two_byte_streaming_stream(	393 std::unique_ptr<i::Utf16CharacterStream> two_byte_streaming_stream(

393 i::ScannerStream::For(&chunks,	394 i::ScannerStream::For(

394 v8::ScriptCompiler::StreamedSource::TWO_BYTE));	395 &chunks, v8::ScriptCompiler::StreamedSource::TWO_BYTE, nullptr));

395 TestCharacterStream(one_byte_source, two_byte_streaming_stream.get(),	396 TestCharacterStream(one_byte_source, two_byte_streaming_stream.get(),

396 length, start, end);	397 length, start, end);

397	398

398 ChunkSource many_chunks(data, data_end - data, true);	399 ChunkSource many_chunks(data, data_end - data, true);

399 two_byte_streaming_stream.reset(i::ScannerStream::For(	400 two_byte_streaming_stream.reset(i::ScannerStream::For(

400 &many_chunks, v8::ScriptCompiler::StreamedSource::TWO_BYTE));	401 &many_chunks, v8::ScriptCompiler::StreamedSource::TWO_BYTE, nullptr));

401 TestCharacterStream(one_byte_source, two_byte_streaming_stream.get(),	402 TestCharacterStream(one_byte_source, two_byte_streaming_stream.get(),

402 length, start, end);	403 length, start, end);

403 }	404 }

404 }	405 }

405	406

406 TEST(CharacterStreams) {	407 TEST(CharacterStreams) {

407 v8::Isolate* isolate = CcTest::isolate();	408 v8::Isolate* isolate = CcTest::isolate();

408 v8::HandleScope handles(isolate);	409 v8::HandleScope handles(isolate);

409 v8::Local<v8::Context> context = v8::Context::New(isolate);	410 v8::Local<v8::Context> context = v8::Context::New(isolate);

410 v8::Context::Scope context_scope(context);	411 v8::Context::Scope context_scope(context);

(...skipping 21 matching lines...) Expand all Loading...
432 const uint16_t unicode[] = {65, 65533, 97, 100};	433 const uint16_t unicode[] = {65, 65533, 97, 100};

433	434

434 // Run the test for all sub-strings 0..N of bytes, to make sure we hit the	435 // Run the test for all sub-strings 0..N of bytes, to make sure we hit the

435 // error condition in and at chunk boundaries.	436 // error condition in and at chunk boundaries.

436 for (size_t len = 0; len < arraysize(bytes); len++) {	437 for (size_t len = 0; len < arraysize(bytes); len++) {

437 // Read len bytes from bytes, and compare against the expected unicode	438 // Read len bytes from bytes, and compare against the expected unicode

438 // characters. Expect kBadChar ( == Unicode replacement char == code point	439 // characters. Expect kBadChar ( == Unicode replacement char == code point

439 // 65533) instead of the incorrectly coded Latin1 char.	440 // 65533) instead of the incorrectly coded Latin1 char.

440 ChunkSource chunks(bytes, len, false);	441 ChunkSource chunks(bytes, len, false);

441 std::unique_ptr<i::Utf16CharacterStream> stream(i::ScannerStream::For(	442 std::unique_ptr<i::Utf16CharacterStream> stream(i::ScannerStream::For(

442 &chunks, v8::ScriptCompiler::StreamedSource::UTF8));	443 &chunks, v8::ScriptCompiler::StreamedSource::UTF8, nullptr));

443 for (size_t i = 0; i < len; i++) {	444 for (size_t i = 0; i < len; i++) {

444 CHECK_EQ(unicode[i], stream->Advance());	445 CHECK_EQ(unicode[i], stream->Advance());

445 }	446 }

446 CHECK_EQ(i::Utf16CharacterStream::kEndOfInput, stream->Advance());	447 CHECK_EQ(i::Utf16CharacterStream::kEndOfInput, stream->Advance());

447 }	448 }

448 }	449 }

OLD	NEW