OLD | NEW |
1 /* | 1 /* |
2 Copyright (C) 1999 Lars Knoll (knoll@mpi-hd.mpg.de) | 2 Copyright (C) 1999 Lars Knoll (knoll@mpi-hd.mpg.de) |
3 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2012 Apple Inc. All
rights reserved. | 3 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2012 Apple Inc. All |
| 4 rights reserved. |
4 Copyright (C) 2005, 2006, 2007 Alexey Proskuryakov (ap@nypop.com) | 5 Copyright (C) 2005, 2006, 2007 Alexey Proskuryakov (ap@nypop.com) |
5 | 6 |
6 This library is free software; you can redistribute it and/or | 7 This library is free software; you can redistribute it and/or |
7 modify it under the terms of the GNU Library General Public | 8 modify it under the terms of the GNU Library General Public |
8 License as published by the Free Software Foundation; either | 9 License as published by the Free Software Foundation; either |
9 version 2 of the License, or (at your option) any later version. | 10 version 2 of the License, or (at your option) any later version. |
10 | 11 |
11 This library is distributed in the hope that it will be useful, | 12 This library is distributed in the hope that it will be useful, |
12 but WITHOUT ANY WARRANTY; without even the implied warranty of | 13 but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | 14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
(...skipping 64 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
78 char b4, | 79 char b4, |
79 char b5, | 80 char b5, |
80 char b6, | 81 char b6, |
81 char b7, | 82 char b7, |
82 char b8, | 83 char b8, |
83 char b9) { | 84 char b9) { |
84 return p[0] == b0 && p[1] == b1 && p[2] == b2 && p[3] == b3 && p[4] == b4 && | 85 return p[0] == b0 && p[1] == b1 && p[2] == b2 && p[3] == b3 && p[4] == b4 && |
85 p[5] == b5 && p[6] == b6 && p[7] == b7 && p[8] == b8 && p[9] == b9; | 86 p[5] == b5 && p[6] == b6 && p[7] == b7 && p[8] == b8 && p[9] == b9; |
86 } | 87 } |
87 | 88 |
88 // You might think we should put these find functions elsewhere, perhaps with th
e | 89 // You might think we should put these find functions elsewhere, perhaps with |
89 // similar functions that operate on UChar, but arguably only the decoder has | 90 // the similar functions that operate on UChar, but arguably only the decoder |
90 // a reason to process strings of char rather than UChar. | 91 // has a reason to process strings of char rather than UChar. |
91 | 92 |
92 static int find(const char* subject, size_t subjectLength, const char* target) { | 93 static int find(const char* subject, size_t subjectLength, const char* target) { |
93 size_t targetLength = strlen(target); | 94 size_t targetLength = strlen(target); |
94 if (targetLength > subjectLength) | 95 if (targetLength > subjectLength) |
95 return -1; | 96 return -1; |
96 for (size_t i = 0; i <= subjectLength - targetLength; ++i) { | 97 for (size_t i = 0; i <= subjectLength - targetLength; ++i) { |
97 bool match = true; | 98 bool match = true; |
98 for (size_t j = 0; j < targetLength; ++j) { | 99 for (size_t j = 0; j < targetLength; ++j) { |
99 if (subject[i + j] != target[j]) { | 100 if (subject[i + j] != target[j]) { |
100 match = false; | 101 match = false; |
(...skipping 21 matching lines...) Expand all Loading... |
122 if (equalIgnoringCase(mimeType, "text/html")) | 123 if (equalIgnoringCase(mimeType, "text/html")) |
123 return HTMLContent; | 124 return HTMLContent; |
124 if (DOMImplementation::isXMLMIMEType(mimeType)) | 125 if (DOMImplementation::isXMLMIMEType(mimeType)) |
125 return XMLContent; | 126 return XMLContent; |
126 return PlainTextContent; | 127 return PlainTextContent; |
127 } | 128 } |
128 | 129 |
129 const WTF::TextEncoding& TextResourceDecoder::defaultEncoding( | 130 const WTF::TextEncoding& TextResourceDecoder::defaultEncoding( |
130 ContentType contentType, | 131 ContentType contentType, |
131 const WTF::TextEncoding& specifiedDefaultEncoding) { | 132 const WTF::TextEncoding& specifiedDefaultEncoding) { |
132 // Despite 8.5 "Text/xml with Omitted Charset" of RFC 3023, we assume UTF-8 in
stead of US-ASCII | 133 // Despite 8.5 "Text/xml with Omitted Charset" of RFC 3023, we assume UTF-8 |
133 // for text/xml. This matches Firefox. | 134 // instead of US-ASCII for text/xml. This matches Firefox. |
134 if (contentType == XMLContent) | 135 if (contentType == XMLContent) |
135 return UTF8Encoding(); | 136 return UTF8Encoding(); |
136 if (!specifiedDefaultEncoding.isValid()) | 137 if (!specifiedDefaultEncoding.isValid()) |
137 return Latin1Encoding(); | 138 return Latin1Encoding(); |
138 return specifiedDefaultEncoding; | 139 return specifiedDefaultEncoding; |
139 } | 140 } |
140 | 141 |
141 TextResourceDecoder::TextResourceDecoder( | 142 TextResourceDecoder::TextResourceDecoder( |
142 const String& mimeType, | 143 const String& mimeType, |
143 const WTF::TextEncoding& specifiedDefaultEncoding, | 144 const WTF::TextEncoding& specifiedDefaultEncoding, |
(...skipping 10 matching lines...) Expand all Loading... |
154 m_sawError(false), | 155 m_sawError(false), |
155 m_encodingDetectionOption(encodingDetectionOption) { | 156 m_encodingDetectionOption(encodingDetectionOption) { |
156 if (m_encodingDetectionOption == AlwaysUseUTF8ForText) | 157 if (m_encodingDetectionOption == AlwaysUseUTF8ForText) |
157 ASSERT(m_contentType == PlainTextContent && m_encoding == UTF8Encoding()); | 158 ASSERT(m_contentType == PlainTextContent && m_encoding == UTF8Encoding()); |
158 } | 159 } |
159 | 160 |
160 TextResourceDecoder::~TextResourceDecoder() {} | 161 TextResourceDecoder::~TextResourceDecoder() {} |
161 | 162 |
162 void TextResourceDecoder::setEncoding(const WTF::TextEncoding& encoding, | 163 void TextResourceDecoder::setEncoding(const WTF::TextEncoding& encoding, |
163 EncodingSource source) { | 164 EncodingSource source) { |
164 // In case the encoding didn't exist, we keep the old one (helps some sites sp
ecifying invalid encodings). | 165 // In case the encoding didn't exist, we keep the old one (helps some sites |
| 166 // specifying invalid encodings). |
165 if (!encoding.isValid()) | 167 if (!encoding.isValid()) |
166 return; | 168 return; |
167 | 169 |
168 // When encoding comes from meta tag (i.e. it cannot be XML files sent via XHR
), | 170 // When encoding comes from meta tag (i.e. it cannot be XML files sent via |
169 // treat x-user-defined as windows-1252 (bug 18270) | 171 // XHR), treat x-user-defined as windows-1252 (bug 18270) |
170 if (source == EncodingFromMetaTag && | 172 if (source == EncodingFromMetaTag && |
171 !strcasecmp(encoding.name(), "x-user-defined")) | 173 !strcasecmp(encoding.name(), "x-user-defined")) |
172 m_encoding = "windows-1252"; | 174 m_encoding = "windows-1252"; |
173 else if (source == EncodingFromMetaTag || source == EncodingFromXMLHeader || | 175 else if (source == EncodingFromMetaTag || source == EncodingFromXMLHeader || |
174 source == EncodingFromCSSCharset) | 176 source == EncodingFromCSSCharset) |
175 m_encoding = encoding.closestByteBasedEquivalent(); | 177 m_encoding = encoding.closestByteBasedEquivalent(); |
176 else | 178 else |
177 m_encoding = encoding; | 179 m_encoding = encoding; |
178 | 180 |
179 m_codec.reset(); | 181 m_codec.reset(); |
(...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
213 while (end < len && str[end] != quoteMark) | 215 while (end < len && str[end] != quoteMark) |
214 ++end; | 216 ++end; |
215 if (end >= len) | 217 if (end >= len) |
216 return -1; | 218 return -1; |
217 | 219 |
218 encodingLength = end - pos; | 220 encodingLength = end - pos; |
219 return pos; | 221 return pos; |
220 } | 222 } |
221 | 223 |
222 size_t TextResourceDecoder::checkForBOM(const char* data, size_t len) { | 224 size_t TextResourceDecoder::checkForBOM(const char* data, size_t len) { |
223 // Check for UTF-16/32 or UTF-8 BOM mark at the beginning, which is a sure sig
n of a Unicode encoding. | 225 // Check for UTF-16/32 or UTF-8 BOM mark at the beginning, which is a sure |
224 // We let it override even a user-chosen encoding. | 226 // sign of a Unicode encoding. We let it override even a user-chosen encoding. |
225 ASSERT(!m_checkedForBOM); | 227 ASSERT(!m_checkedForBOM); |
226 | 228 |
227 size_t lengthOfBOM = 0; | 229 size_t lengthOfBOM = 0; |
228 | 230 |
229 size_t bufferLength = m_buffer.size(); | 231 size_t bufferLength = m_buffer.size(); |
230 | 232 |
231 size_t buf1Len = bufferLength; | 233 size_t buf1Len = bufferLength; |
232 size_t buf2Len = len; | 234 size_t buf2Len = len; |
233 const unsigned char* buf1 = | 235 const unsigned char* buf1 = |
234 reinterpret_cast<const unsigned char*>(m_buffer.data()); | 236 reinterpret_cast<const unsigned char*>(m_buffer.data()); |
(...skipping 95 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
330 | 332 |
331 movedDataToBuffer = true; | 333 movedDataToBuffer = true; |
332 | 334 |
333 const char* ptr = m_buffer.data(); | 335 const char* ptr = m_buffer.data(); |
334 const char* pEnd = ptr + m_buffer.size(); | 336 const char* pEnd = ptr + m_buffer.size(); |
335 | 337 |
336 // Is there enough data available to check for XML declaration? | 338 // Is there enough data available to check for XML declaration? |
337 if (m_buffer.size() < minimumLengthOfXMLDeclaration) | 339 if (m_buffer.size() < minimumLengthOfXMLDeclaration) |
338 return false; | 340 return false; |
339 | 341 |
340 // Handle XML declaration, which can have encoding in it. This encoding is hon
ored even for HTML documents. | 342 // Handle XML declaration, which can have encoding in it. This encoding is |
341 // It is an error for an XML declaration not to be at the start of an XML docu
ment, and it is ignored in HTML documents in such case. | 343 // honored even for HTML documents. It is an error for an XML declaration not |
| 344 // to be at the start of an XML document, and it is ignored in HTML documents |
| 345 // in such case. |
342 if (bytesEqual(ptr, '<', '?', 'x', 'm', 'l')) { | 346 if (bytesEqual(ptr, '<', '?', 'x', 'm', 'l')) { |
343 const char* xmlDeclarationEnd = ptr; | 347 const char* xmlDeclarationEnd = ptr; |
344 while (xmlDeclarationEnd != pEnd && *xmlDeclarationEnd != '>') | 348 while (xmlDeclarationEnd != pEnd && *xmlDeclarationEnd != '>') |
345 ++xmlDeclarationEnd; | 349 ++xmlDeclarationEnd; |
346 if (xmlDeclarationEnd == pEnd) | 350 if (xmlDeclarationEnd == pEnd) |
347 return false; | 351 return false; |
348 // No need for +1, because we have an extra "?" to lose at the end of XML de
claration. | 352 // No need for +1, because we have an extra "?" to lose at the end of XML |
| 353 // declaration. |
349 int len = 0; | 354 int len = 0; |
350 int pos = findXMLEncoding(ptr, xmlDeclarationEnd - ptr, len); | 355 int pos = findXMLEncoding(ptr, xmlDeclarationEnd - ptr, len); |
351 if (pos != -1) | 356 if (pos != -1) |
352 setEncoding(findTextEncoding(ptr + pos, len), EncodingFromXMLHeader); | 357 setEncoding(findTextEncoding(ptr + pos, len), EncodingFromXMLHeader); |
353 // continue looking for a charset - it may be specified in an HTTP-Equiv met
a | 358 // continue looking for a charset - it may be specified in an HTTP-Equiv |
| 359 // meta |
354 } else if (bytesEqual(ptr, '<', 0, '?', 0, 'x', 0)) { | 360 } else if (bytesEqual(ptr, '<', 0, '?', 0, 'x', 0)) { |
355 setEncoding(UTF16LittleEndianEncoding(), AutoDetectedEncoding); | 361 setEncoding(UTF16LittleEndianEncoding(), AutoDetectedEncoding); |
356 } else if (bytesEqual(ptr, 0, '<', 0, '?', 0, 'x')) { | 362 } else if (bytesEqual(ptr, 0, '<', 0, '?', 0, 'x')) { |
357 setEncoding(UTF16BigEndianEncoding(), AutoDetectedEncoding); | 363 setEncoding(UTF16BigEndianEncoding(), AutoDetectedEncoding); |
358 } else if (bytesEqual(ptr, '<', 0, 0, 0, '?', 0, 0, 0)) { | 364 } else if (bytesEqual(ptr, '<', 0, 0, 0, '?', 0, 0, 0)) { |
359 setEncoding(UTF32LittleEndianEncoding(), AutoDetectedEncoding); | 365 setEncoding(UTF32LittleEndianEncoding(), AutoDetectedEncoding); |
360 } else if (bytesEqual(ptr, 0, 0, 0, '<', 0, 0, 0, '?')) { | 366 } else if (bytesEqual(ptr, 0, 0, 0, '<', 0, 0, 0, '?')) { |
361 setEncoding(UTF32BigEndianEncoding(), AutoDetectedEncoding); | 367 setEncoding(UTF32BigEndianEncoding(), AutoDetectedEncoding); |
362 } | 368 } |
363 | 369 |
(...skipping 50 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
414 } | 420 } |
415 DCHECK_LE(lengthOfBOM, m_buffer.size() + len); | 421 DCHECK_LE(lengthOfBOM, m_buffer.size() + len); |
416 | 422 |
417 bool movedDataToBuffer = false; | 423 bool movedDataToBuffer = false; |
418 | 424 |
419 if (m_contentType == CSSContent && !m_checkedForCSSCharset) { | 425 if (m_contentType == CSSContent && !m_checkedForCSSCharset) { |
420 if (!checkForCSSCharset(data, len, movedDataToBuffer)) | 426 if (!checkForCSSCharset(data, len, movedDataToBuffer)) |
421 return emptyString(); | 427 return emptyString(); |
422 } | 428 } |
423 | 429 |
424 // We check XML declaration in HTML content only if there is enough data avail
able | 430 // We check XML declaration in HTML content only if there is enough data |
| 431 // available |
425 if (((m_contentType == HTMLContent && len >= minimumLengthOfXMLDeclaration) || | 432 if (((m_contentType == HTMLContent && len >= minimumLengthOfXMLDeclaration) || |
426 m_contentType == XMLContent) && | 433 m_contentType == XMLContent) && |
427 !m_checkedForXMLCharset) { | 434 !m_checkedForXMLCharset) { |
428 if (!checkForXMLCharset(data, len, movedDataToBuffer)) | 435 if (!checkForXMLCharset(data, len, movedDataToBuffer)) |
429 return emptyString(); | 436 return emptyString(); |
430 } | 437 } |
431 | 438 |
432 const char* dataForDecode = data + lengthOfBOM; | 439 const char* dataForDecode = data + lengthOfBOM; |
433 size_t lengthForDecode = len - lengthOfBOM; | 440 size_t lengthForDecode = len - lengthOfBOM; |
434 | 441 |
(...skipping 50 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
485 String result = m_codec->decode( | 492 String result = m_codec->decode( |
486 m_buffer.data(), m_buffer.size(), FetchEOF, | 493 m_buffer.data(), m_buffer.size(), FetchEOF, |
487 m_contentType == XMLContent && !m_useLenientXMLDecoding, m_sawError); | 494 m_contentType == XMLContent && !m_useLenientXMLDecoding, m_sawError); |
488 m_buffer.clear(); | 495 m_buffer.clear(); |
489 m_codec.reset(); | 496 m_codec.reset(); |
490 m_checkedForBOM = false; // Skip BOM again when re-decoding. | 497 m_checkedForBOM = false; // Skip BOM again when re-decoding. |
491 return result; | 498 return result; |
492 } | 499 } |
493 | 500 |
494 } // namespace blink | 501 } // namespace blink |
OLD | NEW |