| OLD | NEW |
| 1 /* | 1 /* |
| 2 Copyright (C) 1999 Lars Knoll (knoll@mpi-hd.mpg.de) | 2 Copyright (C) 1999 Lars Knoll (knoll@mpi-hd.mpg.de) |
| 3 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2012 Apple Inc. All
rights reserved. | 3 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2012 Apple Inc. All |
| 4 rights reserved. |
| 4 Copyright (C) 2005, 2006, 2007 Alexey Proskuryakov (ap@nypop.com) | 5 Copyright (C) 2005, 2006, 2007 Alexey Proskuryakov (ap@nypop.com) |
| 5 | 6 |
| 6 This library is free software; you can redistribute it and/or | 7 This library is free software; you can redistribute it and/or |
| 7 modify it under the terms of the GNU Library General Public | 8 modify it under the terms of the GNU Library General Public |
| 8 License as published by the Free Software Foundation; either | 9 License as published by the Free Software Foundation; either |
| 9 version 2 of the License, or (at your option) any later version. | 10 version 2 of the License, or (at your option) any later version. |
| 10 | 11 |
| 11 This library is distributed in the hope that it will be useful, | 12 This library is distributed in the hope that it will be useful, |
| 12 but WITHOUT ANY WARRANTY; without even the implied warranty of | 13 but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | 14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| (...skipping 64 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 78 char b4, | 79 char b4, |
| 79 char b5, | 80 char b5, |
| 80 char b6, | 81 char b6, |
| 81 char b7, | 82 char b7, |
| 82 char b8, | 83 char b8, |
| 83 char b9) { | 84 char b9) { |
| 84 return p[0] == b0 && p[1] == b1 && p[2] == b2 && p[3] == b3 && p[4] == b4 && | 85 return p[0] == b0 && p[1] == b1 && p[2] == b2 && p[3] == b3 && p[4] == b4 && |
| 85 p[5] == b5 && p[6] == b6 && p[7] == b7 && p[8] == b8 && p[9] == b9; | 86 p[5] == b5 && p[6] == b6 && p[7] == b7 && p[8] == b8 && p[9] == b9; |
| 86 } | 87 } |
| 87 | 88 |
| 88 // You might think we should put these find functions elsewhere, perhaps with th
e | 89 // You might think we should put these find functions elsewhere, perhaps with |
| 89 // similar functions that operate on UChar, but arguably only the decoder has | 90 // the similar functions that operate on UChar, but arguably only the decoder |
| 90 // a reason to process strings of char rather than UChar. | 91 // has a reason to process strings of char rather than UChar. |
| 91 | 92 |
| 92 static int find(const char* subject, size_t subjectLength, const char* target) { | 93 static int find(const char* subject, size_t subjectLength, const char* target) { |
| 93 size_t targetLength = strlen(target); | 94 size_t targetLength = strlen(target); |
| 94 if (targetLength > subjectLength) | 95 if (targetLength > subjectLength) |
| 95 return -1; | 96 return -1; |
| 96 for (size_t i = 0; i <= subjectLength - targetLength; ++i) { | 97 for (size_t i = 0; i <= subjectLength - targetLength; ++i) { |
| 97 bool match = true; | 98 bool match = true; |
| 98 for (size_t j = 0; j < targetLength; ++j) { | 99 for (size_t j = 0; j < targetLength; ++j) { |
| 99 if (subject[i + j] != target[j]) { | 100 if (subject[i + j] != target[j]) { |
| 100 match = false; | 101 match = false; |
| (...skipping 21 matching lines...) Expand all Loading... |
| 122 if (equalIgnoringCase(mimeType, "text/html")) | 123 if (equalIgnoringCase(mimeType, "text/html")) |
| 123 return HTMLContent; | 124 return HTMLContent; |
| 124 if (DOMImplementation::isXMLMIMEType(mimeType)) | 125 if (DOMImplementation::isXMLMIMEType(mimeType)) |
| 125 return XMLContent; | 126 return XMLContent; |
| 126 return PlainTextContent; | 127 return PlainTextContent; |
| 127 } | 128 } |
| 128 | 129 |
| 129 const WTF::TextEncoding& TextResourceDecoder::defaultEncoding( | 130 const WTF::TextEncoding& TextResourceDecoder::defaultEncoding( |
| 130 ContentType contentType, | 131 ContentType contentType, |
| 131 const WTF::TextEncoding& specifiedDefaultEncoding) { | 132 const WTF::TextEncoding& specifiedDefaultEncoding) { |
| 132 // Despite 8.5 "Text/xml with Omitted Charset" of RFC 3023, we assume UTF-8 in
stead of US-ASCII | 133 // Despite 8.5 "Text/xml with Omitted Charset" of RFC 3023, we assume UTF-8 |
| 133 // for text/xml. This matches Firefox. | 134 // instead of US-ASCII for text/xml. This matches Firefox. |
| 134 if (contentType == XMLContent) | 135 if (contentType == XMLContent) |
| 135 return UTF8Encoding(); | 136 return UTF8Encoding(); |
| 136 if (!specifiedDefaultEncoding.isValid()) | 137 if (!specifiedDefaultEncoding.isValid()) |
| 137 return Latin1Encoding(); | 138 return Latin1Encoding(); |
| 138 return specifiedDefaultEncoding; | 139 return specifiedDefaultEncoding; |
| 139 } | 140 } |
| 140 | 141 |
| 141 TextResourceDecoder::TextResourceDecoder( | 142 TextResourceDecoder::TextResourceDecoder( |
| 142 const String& mimeType, | 143 const String& mimeType, |
| 143 const WTF::TextEncoding& specifiedDefaultEncoding, | 144 const WTF::TextEncoding& specifiedDefaultEncoding, |
| (...skipping 10 matching lines...) Expand all Loading... |
| 154 m_sawError(false), | 155 m_sawError(false), |
| 155 m_encodingDetectionOption(encodingDetectionOption) { | 156 m_encodingDetectionOption(encodingDetectionOption) { |
| 156 if (m_encodingDetectionOption == AlwaysUseUTF8ForText) | 157 if (m_encodingDetectionOption == AlwaysUseUTF8ForText) |
| 157 ASSERT(m_contentType == PlainTextContent && m_encoding == UTF8Encoding()); | 158 ASSERT(m_contentType == PlainTextContent && m_encoding == UTF8Encoding()); |
| 158 } | 159 } |
| 159 | 160 |
| 160 TextResourceDecoder::~TextResourceDecoder() {} | 161 TextResourceDecoder::~TextResourceDecoder() {} |
| 161 | 162 |
| 162 void TextResourceDecoder::setEncoding(const WTF::TextEncoding& encoding, | 163 void TextResourceDecoder::setEncoding(const WTF::TextEncoding& encoding, |
| 163 EncodingSource source) { | 164 EncodingSource source) { |
| 164 // In case the encoding didn't exist, we keep the old one (helps some sites sp
ecifying invalid encodings). | 165 // In case the encoding didn't exist, we keep the old one (helps some sites |
| 166 // specifying invalid encodings). |
| 165 if (!encoding.isValid()) | 167 if (!encoding.isValid()) |
| 166 return; | 168 return; |
| 167 | 169 |
| 168 // When encoding comes from meta tag (i.e. it cannot be XML files sent via XHR
), | 170 // When encoding comes from meta tag (i.e. it cannot be XML files sent via |
| 169 // treat x-user-defined as windows-1252 (bug 18270) | 171 // XHR), treat x-user-defined as windows-1252 (bug 18270) |
| 170 if (source == EncodingFromMetaTag && | 172 if (source == EncodingFromMetaTag && |
| 171 !strcasecmp(encoding.name(), "x-user-defined")) | 173 !strcasecmp(encoding.name(), "x-user-defined")) |
| 172 m_encoding = "windows-1252"; | 174 m_encoding = "windows-1252"; |
| 173 else if (source == EncodingFromMetaTag || source == EncodingFromXMLHeader || | 175 else if (source == EncodingFromMetaTag || source == EncodingFromXMLHeader || |
| 174 source == EncodingFromCSSCharset) | 176 source == EncodingFromCSSCharset) |
| 175 m_encoding = encoding.closestByteBasedEquivalent(); | 177 m_encoding = encoding.closestByteBasedEquivalent(); |
| 176 else | 178 else |
| 177 m_encoding = encoding; | 179 m_encoding = encoding; |
| 178 | 180 |
| 179 m_codec.reset(); | 181 m_codec.reset(); |
| (...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 213 while (end < len && str[end] != quoteMark) | 215 while (end < len && str[end] != quoteMark) |
| 214 ++end; | 216 ++end; |
| 215 if (end >= len) | 217 if (end >= len) |
| 216 return -1; | 218 return -1; |
| 217 | 219 |
| 218 encodingLength = end - pos; | 220 encodingLength = end - pos; |
| 219 return pos; | 221 return pos; |
| 220 } | 222 } |
| 221 | 223 |
| 222 size_t TextResourceDecoder::checkForBOM(const char* data, size_t len) { | 224 size_t TextResourceDecoder::checkForBOM(const char* data, size_t len) { |
| 223 // Check for UTF-16/32 or UTF-8 BOM mark at the beginning, which is a sure sig
n of a Unicode encoding. | 225 // Check for UTF-16/32 or UTF-8 BOM mark at the beginning, which is a sure |
| 224 // We let it override even a user-chosen encoding. | 226 // sign of a Unicode encoding. We let it override even a user-chosen encoding. |
| 225 ASSERT(!m_checkedForBOM); | 227 ASSERT(!m_checkedForBOM); |
| 226 | 228 |
| 227 size_t lengthOfBOM = 0; | 229 size_t lengthOfBOM = 0; |
| 228 | 230 |
| 229 size_t bufferLength = m_buffer.size(); | 231 size_t bufferLength = m_buffer.size(); |
| 230 | 232 |
| 231 size_t buf1Len = bufferLength; | 233 size_t buf1Len = bufferLength; |
| 232 size_t buf2Len = len; | 234 size_t buf2Len = len; |
| 233 const unsigned char* buf1 = | 235 const unsigned char* buf1 = |
| 234 reinterpret_cast<const unsigned char*>(m_buffer.data()); | 236 reinterpret_cast<const unsigned char*>(m_buffer.data()); |
| (...skipping 95 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 330 | 332 |
| 331 movedDataToBuffer = true; | 333 movedDataToBuffer = true; |
| 332 | 334 |
| 333 const char* ptr = m_buffer.data(); | 335 const char* ptr = m_buffer.data(); |
| 334 const char* pEnd = ptr + m_buffer.size(); | 336 const char* pEnd = ptr + m_buffer.size(); |
| 335 | 337 |
| 336 // Is there enough data available to check for XML declaration? | 338 // Is there enough data available to check for XML declaration? |
| 337 if (m_buffer.size() < minimumLengthOfXMLDeclaration) | 339 if (m_buffer.size() < minimumLengthOfXMLDeclaration) |
| 338 return false; | 340 return false; |
| 339 | 341 |
| 340 // Handle XML declaration, which can have encoding in it. This encoding is hon
ored even for HTML documents. | 342 // Handle XML declaration, which can have encoding in it. This encoding is |
| 341 // It is an error for an XML declaration not to be at the start of an XML docu
ment, and it is ignored in HTML documents in such case. | 343 // honored even for HTML documents. It is an error for an XML declaration not |
| 344 // to be at the start of an XML document, and it is ignored in HTML documents |
| 345 // in such case. |
| 342 if (bytesEqual(ptr, '<', '?', 'x', 'm', 'l')) { | 346 if (bytesEqual(ptr, '<', '?', 'x', 'm', 'l')) { |
| 343 const char* xmlDeclarationEnd = ptr; | 347 const char* xmlDeclarationEnd = ptr; |
| 344 while (xmlDeclarationEnd != pEnd && *xmlDeclarationEnd != '>') | 348 while (xmlDeclarationEnd != pEnd && *xmlDeclarationEnd != '>') |
| 345 ++xmlDeclarationEnd; | 349 ++xmlDeclarationEnd; |
| 346 if (xmlDeclarationEnd == pEnd) | 350 if (xmlDeclarationEnd == pEnd) |
| 347 return false; | 351 return false; |
| 348 // No need for +1, because we have an extra "?" to lose at the end of XML de
claration. | 352 // No need for +1, because we have an extra "?" to lose at the end of XML |
| 353 // declaration. |
| 349 int len = 0; | 354 int len = 0; |
| 350 int pos = findXMLEncoding(ptr, xmlDeclarationEnd - ptr, len); | 355 int pos = findXMLEncoding(ptr, xmlDeclarationEnd - ptr, len); |
| 351 if (pos != -1) | 356 if (pos != -1) |
| 352 setEncoding(findTextEncoding(ptr + pos, len), EncodingFromXMLHeader); | 357 setEncoding(findTextEncoding(ptr + pos, len), EncodingFromXMLHeader); |
| 353 // continue looking for a charset - it may be specified in an HTTP-Equiv met
a | 358 // continue looking for a charset - it may be specified in an HTTP-Equiv |
| 359 // meta |
| 354 } else if (bytesEqual(ptr, '<', 0, '?', 0, 'x', 0)) { | 360 } else if (bytesEqual(ptr, '<', 0, '?', 0, 'x', 0)) { |
| 355 setEncoding(UTF16LittleEndianEncoding(), AutoDetectedEncoding); | 361 setEncoding(UTF16LittleEndianEncoding(), AutoDetectedEncoding); |
| 356 } else if (bytesEqual(ptr, 0, '<', 0, '?', 0, 'x')) { | 362 } else if (bytesEqual(ptr, 0, '<', 0, '?', 0, 'x')) { |
| 357 setEncoding(UTF16BigEndianEncoding(), AutoDetectedEncoding); | 363 setEncoding(UTF16BigEndianEncoding(), AutoDetectedEncoding); |
| 358 } else if (bytesEqual(ptr, '<', 0, 0, 0, '?', 0, 0, 0)) { | 364 } else if (bytesEqual(ptr, '<', 0, 0, 0, '?', 0, 0, 0)) { |
| 359 setEncoding(UTF32LittleEndianEncoding(), AutoDetectedEncoding); | 365 setEncoding(UTF32LittleEndianEncoding(), AutoDetectedEncoding); |
| 360 } else if (bytesEqual(ptr, 0, 0, 0, '<', 0, 0, 0, '?')) { | 366 } else if (bytesEqual(ptr, 0, 0, 0, '<', 0, 0, 0, '?')) { |
| 361 setEncoding(UTF32BigEndianEncoding(), AutoDetectedEncoding); | 367 setEncoding(UTF32BigEndianEncoding(), AutoDetectedEncoding); |
| 362 } | 368 } |
| 363 | 369 |
| (...skipping 50 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 414 } | 420 } |
| 415 DCHECK_LE(lengthOfBOM, m_buffer.size() + len); | 421 DCHECK_LE(lengthOfBOM, m_buffer.size() + len); |
| 416 | 422 |
| 417 bool movedDataToBuffer = false; | 423 bool movedDataToBuffer = false; |
| 418 | 424 |
| 419 if (m_contentType == CSSContent && !m_checkedForCSSCharset) { | 425 if (m_contentType == CSSContent && !m_checkedForCSSCharset) { |
| 420 if (!checkForCSSCharset(data, len, movedDataToBuffer)) | 426 if (!checkForCSSCharset(data, len, movedDataToBuffer)) |
| 421 return emptyString(); | 427 return emptyString(); |
| 422 } | 428 } |
| 423 | 429 |
| 424 // We check XML declaration in HTML content only if there is enough data avail
able | 430 // We check XML declaration in HTML content only if there is enough data |
| 431 // available |
| 425 if (((m_contentType == HTMLContent && len >= minimumLengthOfXMLDeclaration) || | 432 if (((m_contentType == HTMLContent && len >= minimumLengthOfXMLDeclaration) || |
| 426 m_contentType == XMLContent) && | 433 m_contentType == XMLContent) && |
| 427 !m_checkedForXMLCharset) { | 434 !m_checkedForXMLCharset) { |
| 428 if (!checkForXMLCharset(data, len, movedDataToBuffer)) | 435 if (!checkForXMLCharset(data, len, movedDataToBuffer)) |
| 429 return emptyString(); | 436 return emptyString(); |
| 430 } | 437 } |
| 431 | 438 |
| 432 const char* dataForDecode = data + lengthOfBOM; | 439 const char* dataForDecode = data + lengthOfBOM; |
| 433 size_t lengthForDecode = len - lengthOfBOM; | 440 size_t lengthForDecode = len - lengthOfBOM; |
| 434 | 441 |
| (...skipping 50 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 485 String result = m_codec->decode( | 492 String result = m_codec->decode( |
| 486 m_buffer.data(), m_buffer.size(), FetchEOF, | 493 m_buffer.data(), m_buffer.size(), FetchEOF, |
| 487 m_contentType == XMLContent && !m_useLenientXMLDecoding, m_sawError); | 494 m_contentType == XMLContent && !m_useLenientXMLDecoding, m_sawError); |
| 488 m_buffer.clear(); | 495 m_buffer.clear(); |
| 489 m_codec.reset(); | 496 m_codec.reset(); |
| 490 m_checkedForBOM = false; // Skip BOM again when re-decoding. | 497 m_checkedForBOM = false; // Skip BOM again when re-decoding. |
| 491 return result; | 498 return result; |
| 492 } | 499 } |
| 493 | 500 |
| 494 } // namespace blink | 501 } // namespace blink |
| OLD | NEW |