Source/bindings/core/v8/ScriptStreamer.cpp - Issue 708093002: Script streaming: remove byte order marks, detect encoding based on them.

Unified Diff: Source/bindings/core/v8/ScriptStreamer.cpp

Issue 708093002: Script streaming: remove byte order marks, detect encoding based on them. (Closed) Base URL: https://chromium.googlesource.com/chromium/blink.git@master

Patch Set: Created 6 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: Source/bindings/core/v8/ScriptStreamer.cpp

diff --git a/Source/bindings/core/v8/ScriptStreamer.cpp b/Source/bindings/core/v8/ScriptStreamer.cpp

index ae7a28746e76edc3fa5f412748b85450a8ec0458..8134c98bf44103567bc3d6bb91c045efc41f8c3b 100644

--- a/Source/bindings/core/v8/ScriptStreamer.cpp

+++ b/Source/bindings/core/v8/ScriptStreamer.cpp

@@ -12,6 +12,7 @@

#include "core/dom/PendingScript.h"

#include "core/fetch/ScriptResource.h"

#include "core/frame/Settings.h"

+#include "core/html/parser/TextResourceDecoder.h"

#include "platform/SharedBuffer.h"

#include "platform/TraceEvent.h"

#include "public/platform/Platform.h"

@@ -123,10 +124,10 @@ public:

m_dataQueue.finish();

}

- void didReceiveData()

+ void didReceiveData(size_t lengthOfBOM)

{

ASSERT(isMainThread());

- prepareDataOnMainThread();

+ prepareDataOnMainThread(lengthOfBOM);

}

void cancel()

@@ -145,7 +146,7 @@ public:

}

private:

- void prepareDataOnMainThread()

+ void prepareDataOnMainThread(size_t lengthOfBOM)

{

ASSERT(isMainThread());

// The Resource must still be alive; otherwise we should've cancelled

@@ -153,6 +154,9 @@ private:

// waiting).

ASSERT(m_streamer->resource());

+ // BOM can only occur at the beginning of the data.

+ ASSERT(lengthOfBOM == 0 || m_dataPosition == 0);

if (m_streamer->resource()->cachedMetadata(V8ScriptRunner::tagForCodeCache())) {

// The resource has a code cache, so it's unnecessary to stream and

// parse the code. Cancel the streaming and resume the non-streaming

@@ -169,8 +173,6 @@ private:

if (!m_resourceBuffer) {

// We don't have a buffer yet. Try to get it from the resource.

SharedBuffer* buffer = m_streamer->resource()->resourceBuffer();

- if (!buffer)

- return;

marja 2014/11/07 13:32:35 This was unnecessary since we've already accessed

m_resourceBuffer = RefPtr<SharedBuffer>(buffer);

}

@@ -190,12 +192,15 @@ private:

}

// Copy the data chunks into a new buffer, since we're going to give the

// data to a background thread.

- if (dataLength > 0) {

+ if (dataLength > lengthOfBOM) {

+ dataLength -= lengthOfBOM;

uint8_t* copiedData = new uint8_t[dataLength];

unsigned offset = 0;

for (size_t i = 0; i < chunks.size(); ++i) {

- memcpy(copiedData + offset, chunks[i], chunkLengths[i]);

- offset += chunkLengths[i];

+ memcpy(copiedData + offset, chunks[i] + lengthOfBOM, chunkLengths[i] - lengthOfBOM);

+ offset += chunkLengths[i] - lengthOfBOM;

+ // BOM is only in the first chunk

+ lengthOfBOM = 0;

}

m_dataQueue.produce(copiedData, dataLength);

}

@@ -286,21 +291,32 @@ void ScriptStreamer::notifyAppendData(ScriptResource* resource)

if (m_streamingSuppressed)

return;

}

+ size_t lengthOfBOM = 0;

if (!m_haveEnoughDataForStreaming) {

// Even if the first data chunk is small, the script can still be big

// enough - wait until the next data chunk comes before deciding whether

// to start the streaming.

- if (resource->resourceBuffer()->size() < kSmallScriptThreshold) {

+ ASSERT(resource->resourceBuffer());

+ if (resource->resourceBuffer()->size() < kSmallScriptThreshold)

return;

- }

m_haveEnoughDataForStreaming = true;

const char* histogramName = startedStreamingHistogramName(m_scriptType);

// Encoding should be detected only when we have some data. It's

// possible that resource->encoding() returns a different encoding

- // before the loading has started and after we got some data.

- WTF::TextEncoding textEncoding(resource->encoding());

- const char* encodingName = textEncoding.name();

+ // before the loading has started and after we got some data. In

+ // addition, check for byte order marks. Note that checking the byte

+ // order mark might change the encoding. We cannot decode the full text

+ // here, because it might contain incomplete UTF-8 characters.

+ const char* data = 0;

+ unsigned length = resource->resourceBuffer()->getSomeData(data, 0);

+ OwnPtr<TextResourceDecoder> decoder(TextResourceDecoder::create("application/javascript", resource->encoding()));

+ lengthOfBOM = decoder->checkForBOM(data, length);

haraken 2014/11/07 17:39:52 At this point, we're not sure how large the |data|

marja 2014/11/10 10:31:27 Added a comment here: we have at least kSmallScrip

+ // Maybe the encoding changed because we saw the BOM; get the encoding

+ // from the decoder.

+ const char* encodingName = decoder->encoding().name();

haraken 2014/11/07 17:39:52 Not directly related to this CL, isn't it possible

marja 2014/11/10 10:31:27 Afaics, the rules for script encodings are: 1) if

// Here's a list of encodings we can use for streaming. These are

// the canonical names.

@@ -313,9 +329,9 @@ void ScriptStreamer::notifyAppendData(ScriptResource* resource)

encoding = v8::ScriptCompiler::StreamedSource::UTF8;

} else {

// We don't stream other encodings; especially we don't stream two

- // byte scripts to avoid the handling of byte order marks. Most

- // scripts are Latin1 or UTF-8 anyway, so this should be enough for

- // most real world purposes.

+ // byte scripts to avoid the handling of endianness. Most scripts

+ // are Latin1 or UTF-8 anyway, so this should be enough for most

+ // real world purposes.

suppressStreaming();

blink::Platform::current()->histogramEnumeration(histogramName, 0, 2);

return;

@@ -363,7 +379,7 @@ void ScriptStreamer::notifyAppendData(ScriptResource* resource)

blink::Platform::current()->histogramEnumeration(histogramName, 1, 2);

}

if (m_stream)

- m_stream->didReceiveData();

+ m_stream->didReceiveData(lengthOfBOM);

}

void ScriptStreamer::notifyFinished(Resource* resource)

« no previous file with comments | « no previous file | Source/bindings/core/v8/ScriptStreamerTest.cpp » ('j') | Source/core/html/parser/TextResourceDecoder.h » ('J')