| Index: third_party/WebKit/Source/core/html/parser/TextResourceDecoder.cpp
|
| diff --git a/third_party/WebKit/Source/core/html/parser/TextResourceDecoder.cpp b/third_party/WebKit/Source/core/html/parser/TextResourceDecoder.cpp
|
| index 1f43e014c4e15b83460855cceb9f83b6405d5ebc..d1797998cd2de4e3ffa7a3649803aae2e71074e1 100644
|
| --- a/third_party/WebKit/Source/core/html/parser/TextResourceDecoder.cpp
|
| +++ b/third_party/WebKit/Source/core/html/parser/TextResourceDecoder.cpp
|
| @@ -1,6 +1,7 @@
|
| /*
|
| Copyright (C) 1999 Lars Knoll (knoll@mpi-hd.mpg.de)
|
| - Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2012 Apple Inc. All rights reserved.
|
| + Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2012 Apple Inc. All
|
| + rights reserved.
|
| Copyright (C) 2005, 2006, 2007 Alexey Proskuryakov (ap@nypop.com)
|
|
|
| This library is free software; you can redistribute it and/or
|
| @@ -85,9 +86,9 @@ static inline bool bytesEqual(const char* p,
|
| p[5] == b5 && p[6] == b6 && p[7] == b7 && p[8] == b8 && p[9] == b9;
|
| }
|
|
|
| -// You might think we should put these find functions elsewhere, perhaps with the
|
| -// similar functions that operate on UChar, but arguably only the decoder has
|
| -// a reason to process strings of char rather than UChar.
|
| +// You might think we should put these find functions elsewhere, perhaps with
|
| +// the similar functions that operate on UChar, but arguably only the decoder
|
| +// has a reason to process strings of char rather than UChar.
|
|
|
| static int find(const char* subject, size_t subjectLength, const char* target) {
|
| size_t targetLength = strlen(target);
|
| @@ -129,8 +130,8 @@ TextResourceDecoder::ContentType TextResourceDecoder::determineContentType(
|
| const WTF::TextEncoding& TextResourceDecoder::defaultEncoding(
|
| ContentType contentType,
|
| const WTF::TextEncoding& specifiedDefaultEncoding) {
|
| - // Despite 8.5 "Text/xml with Omitted Charset" of RFC 3023, we assume UTF-8 instead of US-ASCII
|
| - // for text/xml. This matches Firefox.
|
| + // Despite 8.5 "Text/xml with Omitted Charset" of RFC 3023, we assume UTF-8
|
| + // instead of US-ASCII for text/xml. This matches Firefox.
|
| if (contentType == XMLContent)
|
| return UTF8Encoding();
|
| if (!specifiedDefaultEncoding.isValid())
|
| @@ -161,12 +162,13 @@ TextResourceDecoder::~TextResourceDecoder() {}
|
|
|
| void TextResourceDecoder::setEncoding(const WTF::TextEncoding& encoding,
|
| EncodingSource source) {
|
| - // In case the encoding didn't exist, we keep the old one (helps some sites specifying invalid encodings).
|
| + // In case the encoding didn't exist, we keep the old one (helps some sites
|
| + // specifying invalid encodings).
|
| if (!encoding.isValid())
|
| return;
|
|
|
| - // When encoding comes from meta tag (i.e. it cannot be XML files sent via XHR),
|
| - // treat x-user-defined as windows-1252 (bug 18270)
|
| + // When encoding comes from meta tag (i.e. it cannot be XML files sent via
|
| + // XHR), treat x-user-defined as windows-1252 (bug 18270)
|
| if (source == EncodingFromMetaTag &&
|
| !strcasecmp(encoding.name(), "x-user-defined"))
|
| m_encoding = "windows-1252";
|
| @@ -220,8 +222,8 @@ static int findXMLEncoding(const char* str, int len, int& encodingLength) {
|
| }
|
|
|
| size_t TextResourceDecoder::checkForBOM(const char* data, size_t len) {
|
| - // Check for UTF-16/32 or UTF-8 BOM mark at the beginning, which is a sure sign of a Unicode encoding.
|
| - // We let it override even a user-chosen encoding.
|
| + // Check for UTF-16/32 or UTF-8 BOM mark at the beginning, which is a sure
|
| + // sign of a Unicode encoding. We let it override even a user-chosen encoding.
|
| ASSERT(!m_checkedForBOM);
|
|
|
| size_t lengthOfBOM = 0;
|
| @@ -337,20 +339,24 @@ bool TextResourceDecoder::checkForXMLCharset(const char* data,
|
| if (m_buffer.size() < minimumLengthOfXMLDeclaration)
|
| return false;
|
|
|
| - // Handle XML declaration, which can have encoding in it. This encoding is honored even for HTML documents.
|
| - // It is an error for an XML declaration not to be at the start of an XML document, and it is ignored in HTML documents in such case.
|
| + // Handle XML declaration, which can have encoding in it. This encoding is
|
| + // honored even for HTML documents. It is an error for an XML declaration not
|
| + // to be at the start of an XML document, and it is ignored in HTML documents
|
| + // in such case.
|
| if (bytesEqual(ptr, '<', '?', 'x', 'm', 'l')) {
|
| const char* xmlDeclarationEnd = ptr;
|
| while (xmlDeclarationEnd != pEnd && *xmlDeclarationEnd != '>')
|
| ++xmlDeclarationEnd;
|
| if (xmlDeclarationEnd == pEnd)
|
| return false;
|
| - // No need for +1, because we have an extra "?" to lose at the end of XML declaration.
|
| + // No need for +1, because we have an extra "?" to lose at the end of XML
|
| + // declaration.
|
| int len = 0;
|
| int pos = findXMLEncoding(ptr, xmlDeclarationEnd - ptr, len);
|
| if (pos != -1)
|
| setEncoding(findTextEncoding(ptr + pos, len), EncodingFromXMLHeader);
|
| - // continue looking for a charset - it may be specified in an HTTP-Equiv meta
|
| + // continue looking for a charset - it may be specified in an HTTP-Equiv
|
| + // meta
|
| } else if (bytesEqual(ptr, '<', 0, '?', 0, 'x', 0)) {
|
| setEncoding(UTF16LittleEndianEncoding(), AutoDetectedEncoding);
|
| } else if (bytesEqual(ptr, 0, '<', 0, '?', 0, 'x')) {
|
| @@ -421,7 +427,8 @@ String TextResourceDecoder::decode(const char* data, size_t len) {
|
| return emptyString();
|
| }
|
|
|
| - // We check XML declaration in HTML content only if there is enough data available
|
| + // We check XML declaration in HTML content only if there is enough data
|
| + // available
|
| if (((m_contentType == HTMLContent && len >= minimumLengthOfXMLDeclaration) ||
|
| m_contentType == XMLContent) &&
|
| !m_checkedForXMLCharset) {
|
|
|