| Index: test/cctest/parsing/test-scanner-streams.cc
|
| diff --git a/test/cctest/parsing/test-scanner-streams.cc b/test/cctest/parsing/test-scanner-streams.cc
|
| new file mode 100644
|
| index 0000000000000000000000000000000000000000..d4ad4fb37c1844a1eeea6124cf21f64c44263e13
|
| --- /dev/null
|
| +++ b/test/cctest/parsing/test-scanner-streams.cc
|
| @@ -0,0 +1,354 @@
|
| +// Copyright 2016 the V8 project authors. All rights reserved.
|
| +// Use of this source code is governed by a BSD-style license that can be
|
| +// found in the LICENSE file.
|
| +
|
| +#include "src/factory.h" // for i::Factory::NewExternalStringFrom*Byte
|
| +#include "src/objects-inl.h"
|
| +#include "src/parsing/scanner-character-streams.h"
|
| +#include "src/parsing/scanner.h"
|
| +#include "src/type-feedback-vector-inl.h" // for include "src/factory.h"
|
| +#include "test/cctest/cctest.h"
|
| +
|
| +namespace {
|
| +
|
| +// Implement ExternalSourceStream based on const char**.
|
| +// This will take each string as one chunk. The last chunk must be empty.
|
| +class ChunkSource : public v8::ScriptCompiler::ExternalSourceStream {
|
| + public:
|
| + explicit ChunkSource(const char** chunks) : current_(0) {
|
| + do {
|
| + chunks_.push_back(
|
| + {reinterpret_cast<const uint8_t*>(*chunks), strlen(*chunks)});
|
| + chunks++;
|
| + } while (chunks_.back().len > 0);
|
| + }
|
| + ChunkSource(const uint8_t* data, size_t len, bool extra_chunky)
|
| + : current_(0) {
|
| + // If extra_chunky, we'll use increasingly large chunk sizes.
|
| + // If not, we'll have a single chunk of full length.
|
| + size_t chunk_size = extra_chunky ? 1 : len;
|
| + for (size_t i = 0; i < len; i += chunk_size, chunk_size *= 2) {
|
| + chunks_.push_back({data + i, i::Min(chunk_size, len - i)});
|
| + }
|
| + chunks_.push_back({nullptr, 0});
|
| + }
|
| + ~ChunkSource() {}
|
| + bool SetBookmark() override { return false; }
|
| + void ResetToBookmark() override {}
|
| + size_t GetMoreData(const uint8_t** src) override {
|
| + DCHECK_LT(current_, chunks_.size());
|
| + Chunk& next = chunks_[current_++];
|
| + uint8_t* chunk = new uint8_t[next.len];
|
| + i::MemMove(chunk, next.ptr, next.len);
|
| + *src = chunk;
|
| + return next.len;
|
| + }
|
| +
|
| + private:
|
| + struct Chunk {
|
| + const uint8_t* ptr;
|
| + size_t len;
|
| + };
|
| + std::vector<Chunk> chunks_;
|
| + size_t current_;
|
| +};
|
| +
|
| +class TestExternalResource : public v8::String::ExternalStringResource {
|
| + public:
|
| + explicit TestExternalResource(uint16_t* data, int length)
|
| + : data_(data), length_(static_cast<size_t>(length)) {}
|
| +
|
| + ~TestExternalResource() {}
|
| +
|
| + const uint16_t* data() const { return data_; }
|
| + size_t length() const { return length_; }
|
| +
|
| + private:
|
| + uint16_t* data_;
|
| + size_t length_;
|
| +};
|
| +
|
| +class TestExternalOneByteResource
|
| + : public v8::String::ExternalOneByteStringResource {
|
| + public:
|
| + TestExternalOneByteResource(const char* data, size_t length)
|
| + : data_(data), length_(length) {}
|
| +
|
| + const char* data() const { return data_; }
|
| + size_t length() const { return length_; }
|
| +
|
| + private:
|
| + const char* data_;
|
| + size_t length_;
|
| +};
|
| +
|
| +// A test string with all lengths of utf-8 encodings.
|
| +const char unicode_utf8[] =
|
| + "abc" // 3x ascii
|
| + "\xc3\xa4" // a Umlaut, code point 228
|
| + "\xe2\xa8\xa0" // >> (math symbol), code point 10784
|
| + "\xf0\x9f\x92\xa9" // best character, code point 128169,
|
| + // as utf-16 surrogates: 55357 56489
|
| + "def"; // 3x ascii again.
|
| +const uint16_t unicode_ucs2[] = {97, 98, 99, 228, 10784, 55357,
|
| + 56489, 100, 101, 102, 0};
|
| +
|
| +} // anonymous namespace
|
| +
|
| +TEST(Utf8StreamAsciiOnly) {
|
| + const char* chunks[] = {"abc", "def", "ghi", ""};
|
| + ChunkSource chunk_source(chunks);
|
| + std::unique_ptr<v8::internal::Utf16CharacterStream> stream(
|
| + v8::internal::ScannerStream::For(
|
| + &chunk_source, v8::ScriptCompiler::StreamedSource::UTF8));
|
| +
|
| + // Read the data without dying.
|
| + v8::internal::uc32 c;
|
| + do {
|
| + c = stream->Advance();
|
| + } while (c != v8::internal::Utf16CharacterStream::kEndOfInput);
|
| +}
|
| +
|
| +TEST(Utf8ChunkBoundaries) {
|
| + // Test utf-8 parsing at chunk boundaries.
|
| +
|
| + // Split the test string at each byte and pass it to the stream. This way,
|
| + // we'll have a split at each possible boundary.
|
| + size_t len = strlen(unicode_utf8);
|
| + char buffer[arraysize(unicode_utf8) + 3];
|
| + for (size_t i = 1; i < len; i++) {
|
| + // Copy source string into buffer, splitting it at i.
|
| + // Then add three chunks, 0..i-1, i..strlen-1, empty.
|
| + strncpy(buffer, unicode_utf8, i);
|
| + strncpy(buffer + i + 1, unicode_utf8 + i, len - i);
|
| + buffer[i] = '\0';
|
| + buffer[len + 1] = '\0';
|
| + buffer[len + 2] = '\0';
|
| + const char* chunks[] = {buffer, buffer + i + 1, buffer + len + 2};
|
| +
|
| + ChunkSource chunk_source(chunks);
|
| + std::unique_ptr<v8::internal::Utf16CharacterStream> stream(
|
| + v8::internal::ScannerStream::For(
|
| + &chunk_source, v8::ScriptCompiler::StreamedSource::UTF8));
|
| +
|
| + for (size_t i = 0; unicode_ucs2[i]; i++) {
|
| + CHECK_EQ(unicode_ucs2[i], stream->Advance());
|
| + }
|
| + CHECK_EQ(v8::internal::Utf16CharacterStream::kEndOfInput,
|
| + stream->Advance());
|
| + }
|
| +}
|
| +
|
| +TEST(Utf8SingleByteChunks) {
|
| + // Have each byte as a single-byte chunk.
|
| + size_t len = strlen(unicode_utf8);
|
| + char buffer[arraysize(unicode_utf8) + 4];
|
| + for (size_t i = 1; i < len - 1; i++) {
|
| + // Copy source string into buffer, make a single-byte chunk at i.
|
| + strncpy(buffer, unicode_utf8, i);
|
| + strncpy(buffer + i + 3, unicode_utf8 + i + 1, len - i - 1);
|
| + buffer[i] = '\0';
|
| + buffer[i + 1] = unicode_utf8[i];
|
| + buffer[i + 2] = '\0';
|
| + buffer[len + 2] = '\0';
|
| + buffer[len + 3] = '\0';
|
| + const char* chunks[] = {buffer, buffer + i + 1, buffer + i + 3,
|
| + buffer + len + 3};
|
| +
|
| + ChunkSource chunk_source(chunks);
|
| + std::unique_ptr<v8::internal::Utf16CharacterStream> stream(
|
| + v8::internal::ScannerStream::For(
|
| + &chunk_source, v8::ScriptCompiler::StreamedSource::UTF8));
|
| +
|
| + for (size_t j = 0; unicode_ucs2[j]; j++) {
|
| + CHECK_EQ(unicode_ucs2[j], stream->Advance());
|
| + }
|
| + CHECK_EQ(v8::internal::Utf16CharacterStream::kEndOfInput,
|
| + stream->Advance());
|
| + }
|
| +}
|
| +
|
| +#define CHECK_EQU(v1, v2) CHECK_EQ(static_cast<int>(v1), static_cast<int>(v2))
|
| +
|
| +void TestCharacterStream(const char* reference, i::Utf16CharacterStream* stream,
|
| + unsigned length, unsigned start, unsigned end) {
|
| + // Read streams one char at a time
|
| + unsigned i;
|
| + for (i = start; i < end; i++) {
|
| + CHECK_EQU(i, stream->pos());
|
| + CHECK_EQU(reference[i], stream->Advance());
|
| + }
|
| + CHECK_EQU(end, stream->pos());
|
| +
|
| + // Pushback, re-read, pushback again.
|
| + while (i > end / 4) {
|
| + int32_t c0 = reference[i - 1];
|
| + CHECK_EQU(i, stream->pos());
|
| + stream->PushBack(c0);
|
| + i--;
|
| + CHECK_EQU(i, stream->pos());
|
| + int32_t c1 = stream->Advance();
|
| + i++;
|
| + CHECK_EQU(i, stream->pos());
|
| + CHECK_EQ(c0, c1);
|
| + stream->PushBack(c0);
|
| + i--;
|
| + CHECK_EQU(i, stream->pos());
|
| + }
|
| +
|
| + // Seek + read streams one char at a time.
|
| + unsigned halfway = end / 2;
|
| + stream->SeekForward(halfway - i);
|
| + for (i = halfway; i < end; i++) {
|
| + CHECK_EQU(i, stream->pos());
|
| + CHECK_EQU(reference[i], stream->Advance());
|
| + }
|
| + CHECK_EQU(i, stream->pos());
|
| + CHECK_LT(stream->Advance(), 0);
|
| +}
|
| +
|
| +#undef CHECK_EQU
|
| +
|
| +void TestCharacterStreams(const char* one_byte_source, unsigned length,
|
| + unsigned start = 0, unsigned end = 0) {
|
| + if (end == 0) end = length;
|
| +
|
| + i::Isolate* isolate = CcTest::i_isolate();
|
| + i::Factory* factory = isolate->factory();
|
| +
|
| + // 2-byte external string
|
| + std::unique_ptr<i::uc16[]> uc16_buffer(new i::uc16[length]);
|
| + i::Vector<const i::uc16> two_byte_vector(uc16_buffer.get(),
|
| + static_cast<int>(length));
|
| + {
|
| + for (unsigned i = 0; i < length; i++) {
|
| + uc16_buffer[i] = static_cast<i::uc16>(one_byte_source[i]);
|
| + }
|
| + TestExternalResource resource(uc16_buffer.get(), length);
|
| + i::Handle<i::String> uc16_string(
|
| + factory->NewExternalStringFromTwoByte(&resource).ToHandleChecked());
|
| + std::unique_ptr<i::Utf16CharacterStream> uc16_stream(
|
| + i::ScannerStream::For(uc16_string, start, end));
|
| + TestCharacterStream(one_byte_source, uc16_stream.get(), length, start, end);
|
| + }
|
| +
|
| + // 1-byte external string
|
| + i::Vector<const char> one_byte_vector(one_byte_source,
|
| + static_cast<int>(length));
|
| + i::Handle<i::String> one_byte_string =
|
| + factory->NewStringFromAscii(one_byte_vector).ToHandleChecked();
|
| + {
|
| + TestExternalOneByteResource one_byte_resource(one_byte_source, length);
|
| + i::Handle<i::String> ext_one_byte_string(
|
| + factory->NewExternalStringFromOneByte(&one_byte_resource)
|
| + .ToHandleChecked());
|
| + std::unique_ptr<i::Utf16CharacterStream> one_byte_stream(
|
| + i::ScannerStream::For(ext_one_byte_string, start, end));
|
| + TestCharacterStream(one_byte_source, one_byte_stream.get(), length, start,
|
| + end);
|
| + }
|
| +
|
| + // 1-byte generic i::String
|
| + {
|
| + std::unique_ptr<i::Utf16CharacterStream> string_stream(
|
| + i::ScannerStream::For(one_byte_string, start, end));
|
| + TestCharacterStream(one_byte_source, string_stream.get(), length, start,
|
| + end);
|
| + }
|
| +
|
| + // 2-byte generic i::String
|
| + {
|
| + i::Handle<i::String> two_byte_string =
|
| + factory->NewStringFromTwoByte(two_byte_vector).ToHandleChecked();
|
| + std::unique_ptr<i::Utf16CharacterStream> two_byte_string_stream(
|
| + i::ScannerStream::For(two_byte_string, start, end));
|
| + TestCharacterStream(one_byte_source, two_byte_string_stream.get(), length,
|
| + start, end);
|
| + }
|
| +
|
| + // Streaming has no notion of start/end, so let's skip streaming tests for
|
| + // th cases.
|
| + if (start != 0 || end != length) return;
|
| +
|
| + // 1-byte streaming stream, single + many chunks.
|
| + {
|
| + const uint8_t* data =
|
| + reinterpret_cast<const uint8_t*>(one_byte_vector.begin());
|
| + const uint8_t* data_end =
|
| + reinterpret_cast<const uint8_t*>(one_byte_vector.end());
|
| +
|
| + ChunkSource single_chunk(data, data_end - data, false);
|
| + std::unique_ptr<i::Utf16CharacterStream> one_byte_streaming_stream(
|
| + i::ScannerStream::For(&single_chunk,
|
| + v8::ScriptCompiler::StreamedSource::ONE_BYTE));
|
| + TestCharacterStream(one_byte_source, one_byte_streaming_stream.get(),
|
| + length, start, end);
|
| +
|
| + ChunkSource many_chunks(data, data_end - data, true);
|
| + one_byte_streaming_stream.reset(i::ScannerStream::For(
|
| + &many_chunks, v8::ScriptCompiler::StreamedSource::ONE_BYTE));
|
| + TestCharacterStream(one_byte_source, one_byte_streaming_stream.get(),
|
| + length, start, end);
|
| + }
|
| +
|
| + // UTF-8 streaming stream, single + many chunks.
|
| + {
|
| + const uint8_t* data =
|
| + reinterpret_cast<const uint8_t*>(one_byte_vector.begin());
|
| + const uint8_t* data_end =
|
| + reinterpret_cast<const uint8_t*>(one_byte_vector.end());
|
| + ChunkSource chunks(data, data_end - data, false);
|
| + std::unique_ptr<i::Utf16CharacterStream> utf8_streaming_stream(
|
| + i::ScannerStream::For(&chunks,
|
| + v8::ScriptCompiler::StreamedSource::UTF8));
|
| + TestCharacterStream(one_byte_source, utf8_streaming_stream.get(), length,
|
| + start, end);
|
| +
|
| + ChunkSource many_chunks(data, data_end - data, true);
|
| + utf8_streaming_stream.reset(i::ScannerStream::For(
|
| + &many_chunks, v8::ScriptCompiler::StreamedSource::UTF8));
|
| + TestCharacterStream(one_byte_source, utf8_streaming_stream.get(), length,
|
| + start, end);
|
| + }
|
| +
|
| + // 2-byte streaming stream, single + many chunks.
|
| + {
|
| + const uint8_t* data =
|
| + reinterpret_cast<const uint8_t*>(two_byte_vector.begin());
|
| + const uint8_t* data_end =
|
| + reinterpret_cast<const uint8_t*>(two_byte_vector.end());
|
| + ChunkSource chunks(data, data_end - data, false);
|
| + std::unique_ptr<i::Utf16CharacterStream> two_byte_streaming_stream(
|
| + i::ScannerStream::For(&chunks,
|
| + v8::ScriptCompiler::StreamedSource::TWO_BYTE));
|
| + TestCharacterStream(one_byte_source, two_byte_streaming_stream.get(),
|
| + length, start, end);
|
| +
|
| + ChunkSource many_chunks(data, data_end - data, true);
|
| + two_byte_streaming_stream.reset(i::ScannerStream::For(
|
| + &many_chunks, v8::ScriptCompiler::StreamedSource::TWO_BYTE));
|
| + TestCharacterStream(one_byte_source, two_byte_streaming_stream.get(),
|
| + length, start, end);
|
| + }
|
| +}
|
| +
|
| +TEST(CharacterStreams) {
|
| + v8::Isolate* isolate = CcTest::isolate();
|
| + v8::HandleScope handles(isolate);
|
| + v8::Local<v8::Context> context = v8::Context::New(isolate);
|
| + v8::Context::Scope context_scope(context);
|
| +
|
| + TestCharacterStreams("abcdefghi", 9);
|
| + TestCharacterStreams("abc\0\n\r\x7f", 7);
|
| + TestCharacterStreams("\0", 1);
|
| + TestCharacterStreams("", 0);
|
| +
|
| + // 4k large buffer.
|
| + char buffer[4096 + 1];
|
| + for (unsigned i = 0; i < arraysize(buffer); i++) {
|
| + buffer[i] = static_cast<char>(i & 0x7F);
|
| + }
|
| + buffer[arraysize(buffer) - 1] = '\0';
|
| + TestCharacterStreams(buffer, arraysize(buffer) - 1);
|
| + TestCharacterStreams(buffer, arraysize(buffer) - 1, 576, 3298);
|
| +}
|
|
|