OLD | NEW |
1 /* | 1 /* |
2 Copyright (C) 1999 Lars Knoll (knoll@mpi-hd.mpg.de) | 2 Copyright (C) 1999 Lars Knoll (knoll@mpi-hd.mpg.de) |
3 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2012 Apple Inc. All
rights reserved. | 3 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2012 Apple Inc. All
rights reserved. |
4 Copyright (C) 2005, 2006, 2007 Alexey Proskuryakov (ap@nypop.com) | 4 Copyright (C) 2005, 2006, 2007 Alexey Proskuryakov (ap@nypop.com) |
5 | 5 |
6 This library is free software; you can redistribute it and/or | 6 This library is free software; you can redistribute it and/or |
7 modify it under the terms of the GNU Library General Public | 7 modify it under the terms of the GNU Library General Public |
8 License as published by the Free Software Foundation; either | 8 License as published by the Free Software Foundation; either |
9 version 2 of the License, or (at your option) any later version. | 9 version 2 of the License, or (at your option) any later version. |
10 | 10 |
11 This library is distributed in the hope that it will be useful, | 11 This library is distributed in the hope that it will be useful, |
12 but WITHOUT ANY WARRANTY; without even the implied warranty of | 12 but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | 13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
14 Library General Public License for more details. | 14 Library General Public License for more details. |
15 | 15 |
16 You should have received a copy of the GNU Library General Public License | 16 You should have received a copy of the GNU Library General Public License |
17 along with this library; see the file COPYING.LIB. If not, write to | 17 along with this library; see the file COPYING.LIB. If not, write to |
18 the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, | 18 the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, |
19 Boston, MA 02110-1301, USA. | 19 Boston, MA 02110-1301, USA. |
20 */ | 20 */ |
21 | 21 |
22 | 22 |
23 #include "config.h" | 23 #include "config.h" |
24 #include "core/html/parser/TextResourceDecoder.h" | 24 #include "core/fetch/TextResourceDecoder.h" |
25 | 25 |
26 #include "HTMLNames.h" | 26 #include "HTMLNames.h" |
27 #include "core/dom/DOMImplementation.h" | 27 #include "core/dom/DOMImplementation.h" |
28 #include "core/html/parser/HTMLMetaCharsetParser.h" | 28 #include "core/html/parser/HTMLMetaCharsetParser.h" |
29 #include "platform/text/TextEncodingDetector.h" | 29 #include "platform/text/TextEncodingDetector.h" |
30 #include "wtf/StringExtras.h" | 30 #include "wtf/StringExtras.h" |
31 #include "wtf/text/TextCodec.h" | 31 #include "wtf/text/TextCodec.h" |
32 #include "wtf/text/TextEncodingRegistry.h" | 32 #include "wtf/text/TextEncodingRegistry.h" |
33 | 33 |
34 using namespace WTF; | 34 using namespace WTF; |
(...skipping 49 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
84 { | 84 { |
85 Vector<char, 64> buffer(length + 1); | 85 Vector<char, 64> buffer(length + 1); |
86 memcpy(buffer.data(), encodingName, length); | 86 memcpy(buffer.data(), encodingName, length); |
87 buffer[length] = '\0'; | 87 buffer[length] = '\0'; |
88 return buffer.data(); | 88 return buffer.data(); |
89 } | 89 } |
90 | 90 |
91 TextResourceDecoder::ContentType TextResourceDecoder::determineContentType(const
String& mimeType) | 91 TextResourceDecoder::ContentType TextResourceDecoder::determineContentType(const
String& mimeType) |
92 { | 92 { |
93 if (equalIgnoringCase(mimeType, "text/css")) | 93 if (equalIgnoringCase(mimeType, "text/css")) |
94 return CSSContent; | 94 return CSS; |
95 if (equalIgnoringCase(mimeType, "text/html")) | 95 if (equalIgnoringCase(mimeType, "text/html")) |
96 return HTMLContent; | 96 return HTML; |
97 if (DOMImplementation::isXMLMIMEType(mimeType)) | 97 if (DOMImplementation::isXMLMIMEType(mimeType)) |
98 return XMLContent; | 98 return XML; |
99 return PlainTextContent; | 99 return PlainText; |
100 } | 100 } |
101 | 101 |
102 const WTF::TextEncoding& TextResourceDecoder::defaultEncoding(ContentType conten
tType, const WTF::TextEncoding& specifiedDefaultEncoding) | 102 const WTF::TextEncoding& TextResourceDecoder::defaultEncoding(ContentType conten
tType, const WTF::TextEncoding& specifiedDefaultEncoding) |
103 { | 103 { |
104 // Despite 8.5 "Text/xml with Omitted Charset" of RFC 3023, we assume UTF-8
instead of US-ASCII | 104 // Despite 8.5 "Text/xml with Omitted Charset" of RFC 3023, we assume UTF-8
instead of US-ASCII |
105 // for text/xml. This matches Firefox. | 105 // for text/xml. This matches Firefox. |
106 if (contentType == XMLContent) | 106 if (contentType == XML) |
107 return UTF8Encoding(); | 107 return UTF8Encoding(); |
108 if (!specifiedDefaultEncoding.isValid()) | 108 if (!specifiedDefaultEncoding.isValid()) |
109 return Latin1Encoding(); | 109 return Latin1Encoding(); |
110 return specifiedDefaultEncoding; | 110 return specifiedDefaultEncoding; |
111 } | 111 } |
112 | 112 |
113 TextResourceDecoder::TextResourceDecoder(const String& mimeType, const WTF::Text
Encoding& specifiedDefaultEncoding, bool usesEncodingDetector) | 113 TextResourceDecoder::TextResourceDecoder(const String& mimeType, const WTF::Text
Encoding& specifiedDefaultEncoding, bool usesEncodingDetector) |
114 : m_contentType(determineContentType(mimeType)) | 114 : m_contentType(determineContentType(mimeType)) |
115 , m_encoding(defaultEncoding(m_contentType, specifiedDefaultEncoding)) | 115 , m_encoding(defaultEncoding(m_contentType, specifiedDefaultEncoding)) |
116 , m_source(DefaultEncoding) | 116 , m_source(DefaultEncoding) |
(...skipping 244 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
361 } | 361 } |
362 | 362 |
363 String TextResourceDecoder::decode(const char* data, size_t len) | 363 String TextResourceDecoder::decode(const char* data, size_t len) |
364 { | 364 { |
365 size_t lengthOfBOM = 0; | 365 size_t lengthOfBOM = 0; |
366 if (!m_checkedForBOM) | 366 if (!m_checkedForBOM) |
367 lengthOfBOM = checkForBOM(data, len); | 367 lengthOfBOM = checkForBOM(data, len); |
368 | 368 |
369 bool movedDataToBuffer = false; | 369 bool movedDataToBuffer = false; |
370 | 370 |
371 if (m_contentType == CSSContent && !m_checkedForCSSCharset) { | 371 if (m_contentType == CSS && !m_checkedForCSSCharset) { |
372 if (!checkForCSSCharset(data, len, movedDataToBuffer)) | 372 if (!checkForCSSCharset(data, len, movedDataToBuffer)) |
373 return emptyString(); | 373 return emptyString(); |
374 } | 374 } |
375 | 375 |
376 if ((m_contentType == HTMLContent || m_contentType == XMLContent) && !m_chec
kedForXMLCharset) { | 376 if ((m_contentType == HTML || m_contentType == XML) && !m_checkedForXMLChars
et) { |
377 if (!checkForXMLCharset(data, len, movedDataToBuffer)) | 377 if (!checkForXMLCharset(data, len, movedDataToBuffer)) |
378 return emptyString(); | 378 return emptyString(); |
379 } | 379 } |
380 | 380 |
381 const char* dataForDecode = data + lengthOfBOM; | 381 const char* dataForDecode = data + lengthOfBOM; |
382 size_t lengthForDecode = len - lengthOfBOM; | 382 size_t lengthForDecode = len - lengthOfBOM; |
383 | 383 |
384 if (!m_buffer.isEmpty()) { | 384 if (!m_buffer.isEmpty()) { |
385 if (!movedDataToBuffer) { | 385 if (!movedDataToBuffer) { |
386 size_t oldSize = m_buffer.size(); | 386 size_t oldSize = m_buffer.size(); |
387 m_buffer.grow(oldSize + len); | 387 m_buffer.grow(oldSize + len); |
388 memcpy(m_buffer.data() + oldSize, data, len); | 388 memcpy(m_buffer.data() + oldSize, data, len); |
389 } | 389 } |
390 | 390 |
391 dataForDecode = m_buffer.data() + lengthOfBOM; | 391 dataForDecode = m_buffer.data() + lengthOfBOM; |
392 lengthForDecode = m_buffer.size() - lengthOfBOM; | 392 lengthForDecode = m_buffer.size() - lengthOfBOM; |
393 } | 393 } |
394 | 394 |
395 if (m_contentType == HTMLContent && !m_checkedForMetaCharset) | 395 if (m_contentType == HTML && !m_checkedForMetaCharset) |
396 checkForMetaCharset(dataForDecode, lengthForDecode); | 396 checkForMetaCharset(dataForDecode, lengthForDecode); |
397 | 397 |
398 if (shouldAutoDetect()) { | 398 if (shouldAutoDetect()) { |
399 WTF::TextEncoding detectedEncoding; | 399 WTF::TextEncoding detectedEncoding; |
400 if (detectTextEncoding(data, len, m_hintEncoding, &detectedEncoding)) | 400 if (detectTextEncoding(data, len, m_hintEncoding, &detectedEncoding)) |
401 setEncoding(detectedEncoding, EncodingFromContentSniffing); | 401 setEncoding(detectedEncoding, EncodingFromContentSniffing); |
402 } | 402 } |
403 | 403 |
404 ASSERT(m_encoding.isValid()); | 404 ASSERT(m_encoding.isValid()); |
405 | 405 |
406 if (!m_codec) | 406 if (!m_codec) |
407 m_codec = newTextCodec(m_encoding); | 407 m_codec = newTextCodec(m_encoding); |
408 | 408 |
409 String result = m_codec->decode(dataForDecode, lengthForDecode, false, m_con
tentType == XMLContent && !m_useLenientXMLDecoding, m_sawError); | 409 String result = m_codec->decode(dataForDecode, lengthForDecode, false, m_con
tentType == XML && !m_useLenientXMLDecoding, m_sawError); |
410 | 410 |
411 m_buffer.clear(); | 411 m_buffer.clear(); |
412 return result; | 412 return result; |
413 } | 413 } |
414 | 414 |
415 String TextResourceDecoder::flush() | 415 String TextResourceDecoder::flush() |
416 { | 416 { |
417 // If we can not identify the encoding even after a document is completely | 417 // If we can not identify the encoding even after a document is completely |
418 // loaded, we need to detect the encoding if other conditions for | 418 // loaded, we need to detect the encoding if other conditions for |
419 // autodetection is satisfied. | 419 // autodetection is satisfied. |
420 if (m_buffer.size() && shouldAutoDetect() | 420 if (m_buffer.size() && shouldAutoDetect() |
421 && ((!m_checkedForXMLCharset && (m_contentType == HTMLContent || m_conte
ntType == XMLContent)) || (!m_checkedForCSSCharset && (m_contentType == CSSConte
nt)))) { | 421 && ((!m_checkedForXMLCharset && (m_contentType == HTML || m_contentType
== XML)) || (!m_checkedForCSSCharset && (m_contentType == CSS)))) { |
422 WTF::TextEncoding detectedEncoding; | 422 WTF::TextEncoding detectedEncoding; |
423 if (detectTextEncoding(m_buffer.data(), m_buffer.size(), m_hintEncoding,
&detectedEncoding)) | 423 if (detectTextEncoding(m_buffer.data(), m_buffer.size(), m_hintEncoding,
&detectedEncoding)) |
424 setEncoding(detectedEncoding, EncodingFromContentSniffing); | 424 setEncoding(detectedEncoding, EncodingFromContentSniffing); |
425 } | 425 } |
426 | 426 |
427 if (!m_codec) | 427 if (!m_codec) |
428 m_codec = newTextCodec(m_encoding); | 428 m_codec = newTextCodec(m_encoding); |
429 | 429 |
430 String result = m_codec->decode(m_buffer.data(), m_buffer.size(), true, m_co
ntentType == XMLContent && !m_useLenientXMLDecoding, m_sawError); | 430 String result = m_codec->decode(m_buffer.data(), m_buffer.size(), true, m_co
ntentType == XML && !m_useLenientXMLDecoding, m_sawError); |
431 m_buffer.clear(); | 431 m_buffer.clear(); |
432 m_codec.clear(); | 432 m_codec.clear(); |
433 m_checkedForBOM = false; // Skip BOM again when re-decoding. | 433 m_checkedForBOM = false; // Skip BOM again when re-decoding. |
434 return result; | 434 return result; |
435 } | 435 } |
436 | 436 |
437 } | 437 } |
OLD | NEW |