Index: Source/bindings/core/v8/ScriptStreamer.cpp |
diff --git a/Source/bindings/core/v8/ScriptStreamer.cpp b/Source/bindings/core/v8/ScriptStreamer.cpp |
index ae7a28746e76edc3fa5f412748b85450a8ec0458..8134c98bf44103567bc3d6bb91c045efc41f8c3b 100644 |
--- a/Source/bindings/core/v8/ScriptStreamer.cpp |
+++ b/Source/bindings/core/v8/ScriptStreamer.cpp |
@@ -12,6 +12,7 @@ |
#include "core/dom/PendingScript.h" |
#include "core/fetch/ScriptResource.h" |
#include "core/frame/Settings.h" |
+#include "core/html/parser/TextResourceDecoder.h" |
#include "platform/SharedBuffer.h" |
#include "platform/TraceEvent.h" |
#include "public/platform/Platform.h" |
@@ -123,10 +124,10 @@ public: |
m_dataQueue.finish(); |
} |
- void didReceiveData() |
+ void didReceiveData(size_t lengthOfBOM) |
{ |
ASSERT(isMainThread()); |
- prepareDataOnMainThread(); |
+ prepareDataOnMainThread(lengthOfBOM); |
} |
void cancel() |
@@ -145,7 +146,7 @@ public: |
} |
private: |
- void prepareDataOnMainThread() |
+ void prepareDataOnMainThread(size_t lengthOfBOM) |
{ |
ASSERT(isMainThread()); |
// The Resource must still be alive; otherwise we should've cancelled |
@@ -153,6 +154,9 @@ private: |
// waiting). |
ASSERT(m_streamer->resource()); |
+ // BOM can only occur at the beginning of the data. |
+ ASSERT(lengthOfBOM == 0 || m_dataPosition == 0); |
+ |
if (m_streamer->resource()->cachedMetadata(V8ScriptRunner::tagForCodeCache())) { |
// The resource has a code cache, so it's unnecessary to stream and |
// parse the code. Cancel the streaming and resume the non-streaming |
@@ -169,8 +173,6 @@ private: |
if (!m_resourceBuffer) { |
// We don't have a buffer yet. Try to get it from the resource. |
SharedBuffer* buffer = m_streamer->resource()->resourceBuffer(); |
- if (!buffer) |
- return; |
marja
2014/11/07 13:32:35
This was unnecessary since we've already accessed
|
m_resourceBuffer = RefPtr<SharedBuffer>(buffer); |
} |
@@ -190,12 +192,15 @@ private: |
} |
// Copy the data chunks into a new buffer, since we're going to give the |
// data to a background thread. |
- if (dataLength > 0) { |
+ if (dataLength > lengthOfBOM) { |
+ dataLength -= lengthOfBOM; |
uint8_t* copiedData = new uint8_t[dataLength]; |
unsigned offset = 0; |
for (size_t i = 0; i < chunks.size(); ++i) { |
- memcpy(copiedData + offset, chunks[i], chunkLengths[i]); |
- offset += chunkLengths[i]; |
+ memcpy(copiedData + offset, chunks[i] + lengthOfBOM, chunkLengths[i] - lengthOfBOM); |
+ offset += chunkLengths[i] - lengthOfBOM; |
+ // BOM is only in the first chunk |
+ lengthOfBOM = 0; |
} |
m_dataQueue.produce(copiedData, dataLength); |
} |
@@ -286,21 +291,32 @@ void ScriptStreamer::notifyAppendData(ScriptResource* resource) |
if (m_streamingSuppressed) |
return; |
} |
+ size_t lengthOfBOM = 0; |
if (!m_haveEnoughDataForStreaming) { |
// Even if the first data chunk is small, the script can still be big |
// enough - wait until the next data chunk comes before deciding whether |
// to start the streaming. |
- if (resource->resourceBuffer()->size() < kSmallScriptThreshold) { |
+ ASSERT(resource->resourceBuffer()); |
+ if (resource->resourceBuffer()->size() < kSmallScriptThreshold) |
return; |
- } |
m_haveEnoughDataForStreaming = true; |
const char* histogramName = startedStreamingHistogramName(m_scriptType); |
// Encoding should be detected only when we have some data. It's |
// possible that resource->encoding() returns a different encoding |
- // before the loading has started and after we got some data. |
- WTF::TextEncoding textEncoding(resource->encoding()); |
- const char* encodingName = textEncoding.name(); |
+ // before the loading has started and after we got some data. In |
+ // addition, check for byte order marks. Note that checking the byte |
+ // order mark might change the encoding. We cannot decode the full text |
+ // here, because it might contain incomplete UTF-8 characters. |
+ const char* data = 0; |
+ unsigned length = resource->resourceBuffer()->getSomeData(data, 0); |
+ |
+ OwnPtr<TextResourceDecoder> decoder(TextResourceDecoder::create("application/javascript", resource->encoding())); |
+ lengthOfBOM = decoder->checkForBOM(data, length); |
haraken
2014/11/07 17:39:52
At this point, we're not sure how large the |data|
marja
2014/11/10 10:31:27
Added a comment here: we have at least kSmallScrip
|
+ |
+ // Maybe the encoding changed because we saw the BOM; get the encoding |
+ // from the decoder. |
+ const char* encodingName = decoder->encoding().name(); |
haraken
2014/11/07 17:39:52
Not directly related to this CL, isn't it possible
marja
2014/11/10 10:31:27
Afaics, the rules for script encodings are: 1) if
|
// Here's a list of encodings we can use for streaming. These are |
// the canonical names. |
@@ -313,9 +329,9 @@ void ScriptStreamer::notifyAppendData(ScriptResource* resource) |
encoding = v8::ScriptCompiler::StreamedSource::UTF8; |
} else { |
// We don't stream other encodings; especially we don't stream two |
- // byte scripts to avoid the handling of byte order marks. Most |
- // scripts are Latin1 or UTF-8 anyway, so this should be enough for |
- // most real world purposes. |
+ // byte scripts to avoid the handling of endianness. Most scripts |
+ // are Latin1 or UTF-8 anyway, so this should be enough for most |
+ // real world purposes. |
suppressStreaming(); |
blink::Platform::current()->histogramEnumeration(histogramName, 0, 2); |
return; |
@@ -363,7 +379,7 @@ void ScriptStreamer::notifyAppendData(ScriptResource* resource) |
blink::Platform::current()->histogramEnumeration(histogramName, 1, 2); |
} |
if (m_stream) |
- m_stream->didReceiveData(); |
+ m_stream->didReceiveData(lengthOfBOM); |
} |
void ScriptStreamer::notifyFinished(Resource* resource) |