Chromium Code Reviews| Index: Source/bindings/core/v8/ScriptStreamer.cpp |
| diff --git a/Source/bindings/core/v8/ScriptStreamer.cpp b/Source/bindings/core/v8/ScriptStreamer.cpp |
| index ae7a28746e76edc3fa5f412748b85450a8ec0458..8134c98bf44103567bc3d6bb91c045efc41f8c3b 100644 |
| --- a/Source/bindings/core/v8/ScriptStreamer.cpp |
| +++ b/Source/bindings/core/v8/ScriptStreamer.cpp |
| @@ -12,6 +12,7 @@ |
| #include "core/dom/PendingScript.h" |
| #include "core/fetch/ScriptResource.h" |
| #include "core/frame/Settings.h" |
| +#include "core/html/parser/TextResourceDecoder.h" |
| #include "platform/SharedBuffer.h" |
| #include "platform/TraceEvent.h" |
| #include "public/platform/Platform.h" |
| @@ -123,10 +124,10 @@ public: |
| m_dataQueue.finish(); |
| } |
| - void didReceiveData() |
| + void didReceiveData(size_t lengthOfBOM) |
| { |
| ASSERT(isMainThread()); |
| - prepareDataOnMainThread(); |
| + prepareDataOnMainThread(lengthOfBOM); |
| } |
| void cancel() |
| @@ -145,7 +146,7 @@ public: |
| } |
| private: |
| - void prepareDataOnMainThread() |
| + void prepareDataOnMainThread(size_t lengthOfBOM) |
| { |
| ASSERT(isMainThread()); |
| // The Resource must still be alive; otherwise we should've cancelled |
| @@ -153,6 +154,9 @@ private: |
| // waiting). |
| ASSERT(m_streamer->resource()); |
| + // BOM can only occur at the beginning of the data. |
| + ASSERT(lengthOfBOM == 0 || m_dataPosition == 0); |
| + |
| if (m_streamer->resource()->cachedMetadata(V8ScriptRunner::tagForCodeCache())) { |
| // The resource has a code cache, so it's unnecessary to stream and |
| // parse the code. Cancel the streaming and resume the non-streaming |
| @@ -169,8 +173,6 @@ private: |
| if (!m_resourceBuffer) { |
| // We don't have a buffer yet. Try to get it from the resource. |
| SharedBuffer* buffer = m_streamer->resource()->resourceBuffer(); |
| - if (!buffer) |
| - return; |
|
marja
2014/11/07 13:32:35
This was unnecessary since we've already accessed
|
| m_resourceBuffer = RefPtr<SharedBuffer>(buffer); |
| } |
| @@ -190,12 +192,15 @@ private: |
| } |
| // Copy the data chunks into a new buffer, since we're going to give the |
| // data to a background thread. |
| - if (dataLength > 0) { |
| + if (dataLength > lengthOfBOM) { |
| + dataLength -= lengthOfBOM; |
| uint8_t* copiedData = new uint8_t[dataLength]; |
| unsigned offset = 0; |
| for (size_t i = 0; i < chunks.size(); ++i) { |
| - memcpy(copiedData + offset, chunks[i], chunkLengths[i]); |
| - offset += chunkLengths[i]; |
| + memcpy(copiedData + offset, chunks[i] + lengthOfBOM, chunkLengths[i] - lengthOfBOM); |
| + offset += chunkLengths[i] - lengthOfBOM; |
| + // BOM is only in the first chunk |
| + lengthOfBOM = 0; |
| } |
| m_dataQueue.produce(copiedData, dataLength); |
| } |
| @@ -286,21 +291,32 @@ void ScriptStreamer::notifyAppendData(ScriptResource* resource) |
| if (m_streamingSuppressed) |
| return; |
| } |
| + size_t lengthOfBOM = 0; |
| if (!m_haveEnoughDataForStreaming) { |
| // Even if the first data chunk is small, the script can still be big |
| // enough - wait until the next data chunk comes before deciding whether |
| // to start the streaming. |
| - if (resource->resourceBuffer()->size() < kSmallScriptThreshold) { |
| + ASSERT(resource->resourceBuffer()); |
| + if (resource->resourceBuffer()->size() < kSmallScriptThreshold) |
| return; |
| - } |
| m_haveEnoughDataForStreaming = true; |
| const char* histogramName = startedStreamingHistogramName(m_scriptType); |
| // Encoding should be detected only when we have some data. It's |
| // possible that resource->encoding() returns a different encoding |
| - // before the loading has started and after we got some data. |
| - WTF::TextEncoding textEncoding(resource->encoding()); |
| - const char* encodingName = textEncoding.name(); |
| + // before the loading has started and after we got some data. In |
| + // addition, check for byte order marks. Note that checking the byte |
| + // order mark might change the encoding. We cannot decode the full text |
| + // here, because it might contain incomplete UTF-8 characters. |
| + const char* data = 0; |
| + unsigned length = resource->resourceBuffer()->getSomeData(data, 0); |
| + |
| + OwnPtr<TextResourceDecoder> decoder(TextResourceDecoder::create("application/javascript", resource->encoding())); |
| + lengthOfBOM = decoder->checkForBOM(data, length); |
|
haraken
2014/11/07 17:39:52
At this point, we're not sure how large the |data|
marja
2014/11/10 10:31:27
Added a comment here: we have at least kSmallScrip
|
| + |
| + // Maybe the encoding changed because we saw the BOM; get the encoding |
| + // from the decoder. |
| + const char* encodingName = decoder->encoding().name(); |
|
haraken
2014/11/07 17:39:52
Not directly related to this CL, isn't it possible
marja
2014/11/10 10:31:27
Afaics, the rules for script encodings are: 1) if
|
| // Here's a list of encodings we can use for streaming. These are |
| // the canonical names. |
| @@ -313,9 +329,9 @@ void ScriptStreamer::notifyAppendData(ScriptResource* resource) |
| encoding = v8::ScriptCompiler::StreamedSource::UTF8; |
| } else { |
| // We don't stream other encodings; especially we don't stream two |
| - // byte scripts to avoid the handling of byte order marks. Most |
| - // scripts are Latin1 or UTF-8 anyway, so this should be enough for |
| - // most real world purposes. |
| + // byte scripts to avoid the handling of endianness. Most scripts |
| + // are Latin1 or UTF-8 anyway, so this should be enough for most |
| + // real world purposes. |
| suppressStreaming(); |
| blink::Platform::current()->histogramEnumeration(histogramName, 0, 2); |
| return; |
| @@ -363,7 +379,7 @@ void ScriptStreamer::notifyAppendData(ScriptResource* resource) |
| blink::Platform::current()->histogramEnumeration(histogramName, 1, 2); |
| } |
| if (m_stream) |
| - m_stream->didReceiveData(); |
| + m_stream->didReceiveData(lengthOfBOM); |
| } |
| void ScriptStreamer::notifyFinished(Resource* resource) |