OLD | NEW |
1 /* | 1 /* |
2 Copyright (C) 1999 Lars Knoll (knoll@mpi-hd.mpg.de) | 2 Copyright (C) 1999 Lars Knoll (knoll@mpi-hd.mpg.de) |
3 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2012 Apple Inc. All
rights reserved. | 3 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2012 Apple Inc. All
rights reserved. |
4 Copyright (C) 2005, 2006, 2007 Alexey Proskuryakov (ap@nypop.com) | 4 Copyright (C) 2005, 2006, 2007 Alexey Proskuryakov (ap@nypop.com) |
5 | 5 |
6 This library is free software; you can redistribute it and/or | 6 This library is free software; you can redistribute it and/or |
7 modify it under the terms of the GNU Library General Public | 7 modify it under the terms of the GNU Library General Public |
8 License as published by the Free Software Foundation; either | 8 License as published by the Free Software Foundation; either |
9 version 2 of the License, or (at your option) any later version. | 9 version 2 of the License, or (at your option) any later version. |
10 | 10 |
11 This library is distributed in the hope that it will be useful, | 11 This library is distributed in the hope that it will be useful, |
12 but WITHOUT ANY WARRANTY; without even the implied warranty of | 12 but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | 13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
14 Library General Public License for more details. | 14 Library General Public License for more details. |
15 | 15 |
16 You should have received a copy of the GNU Library General Public License | 16 You should have received a copy of the GNU Library General Public License |
17 along with this library; see the file COPYING.LIB. If not, write to | 17 along with this library; see the file COPYING.LIB. If not, write to |
18 the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, | 18 the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, |
19 Boston, MA 02110-1301, USA. | 19 Boston, MA 02110-1301, USA. |
20 */ | 20 */ |
21 | 21 |
22 | 22 |
23 #include "config.h" | 23 #include "config.h" |
24 #include "core/fetch/TextResourceDecoder.h" | 24 #include "core/html/parser/TextResourceDecoder.h" |
25 | 25 |
26 #include "HTMLNames.h" | 26 #include "HTMLNames.h" |
27 #include "core/dom/DOMImplementation.h" | 27 #include "core/dom/DOMImplementation.h" |
28 #include "core/html/parser/HTMLMetaCharsetParser.h" | 28 #include "core/html/parser/HTMLMetaCharsetParser.h" |
29 #include "platform/text/TextEncodingDetector.h" | 29 #include "platform/text/TextEncodingDetector.h" |
30 #include "wtf/StringExtras.h" | 30 #include "wtf/StringExtras.h" |
31 #include "wtf/text/TextCodec.h" | 31 #include "wtf/text/TextCodec.h" |
32 #include "wtf/text/TextEncoding.h" | 32 #include "wtf/text/TextEncoding.h" |
33 #include "wtf/text/TextEncodingRegistry.h" | 33 #include "wtf/text/TextEncodingRegistry.h" |
34 | 34 |
(...skipping 50 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
85 { | 85 { |
86 Vector<char, 64> buffer(length + 1); | 86 Vector<char, 64> buffer(length + 1); |
87 memcpy(buffer.data(), encodingName, length); | 87 memcpy(buffer.data(), encodingName, length); |
88 buffer[length] = '\0'; | 88 buffer[length] = '\0'; |
89 return buffer.data(); | 89 return buffer.data(); |
90 } | 90 } |
91 | 91 |
92 TextResourceDecoder::ContentType TextResourceDecoder::determineContentType(const
String& mimeType) | 92 TextResourceDecoder::ContentType TextResourceDecoder::determineContentType(const
String& mimeType) |
93 { | 93 { |
94 if (equalIgnoringCase(mimeType, "text/css")) | 94 if (equalIgnoringCase(mimeType, "text/css")) |
95 return CSS; | 95 return CSSContent; |
96 if (equalIgnoringCase(mimeType, "text/html")) | 96 if (equalIgnoringCase(mimeType, "text/html")) |
97 return HTML; | 97 return HTMLContent; |
98 if (DOMImplementation::isXMLMIMEType(mimeType)) | 98 if (DOMImplementation::isXMLMIMEType(mimeType)) |
99 return XML; | 99 return XMLContent; |
100 return PlainText; | 100 return PlainTextContent; |
101 } | 101 } |
102 | 102 |
103 const WTF::TextEncoding& TextResourceDecoder::defaultEncoding(ContentType conten
tType, const WTF::TextEncoding& specifiedDefaultEncoding) | 103 const WTF::TextEncoding& TextResourceDecoder::defaultEncoding(ContentType conten
tType, const WTF::TextEncoding& specifiedDefaultEncoding) |
104 { | 104 { |
105 // Despite 8.5 "Text/xml with Omitted Charset" of RFC 3023, we assume UTF-8
instead of US-ASCII | 105 // Despite 8.5 "Text/xml with Omitted Charset" of RFC 3023, we assume UTF-8
instead of US-ASCII |
106 // for text/xml. This matches Firefox. | 106 // for text/xml. This matches Firefox. |
107 if (contentType == XML) | 107 if (contentType == XMLContent) |
108 return UTF8Encoding(); | 108 return UTF8Encoding(); |
109 if (!specifiedDefaultEncoding.isValid()) | 109 if (!specifiedDefaultEncoding.isValid()) |
110 return Latin1Encoding(); | 110 return Latin1Encoding(); |
111 return specifiedDefaultEncoding; | 111 return specifiedDefaultEncoding; |
112 } | 112 } |
113 | 113 |
114 TextResourceDecoder::TextResourceDecoder(const String& mimeType, const WTF::Text
Encoding& specifiedDefaultEncoding, bool usesEncodingDetector) | 114 TextResourceDecoder::TextResourceDecoder(const String& mimeType, const WTF::Text
Encoding& specifiedDefaultEncoding, bool usesEncodingDetector) |
115 : m_contentType(determineContentType(mimeType)) | 115 : m_contentType(determineContentType(mimeType)) |
116 , m_encoding(defaultEncoding(m_contentType, specifiedDefaultEncoding)) | 116 , m_encoding(defaultEncoding(m_contentType, specifiedDefaultEncoding)) |
117 , m_source(DefaultEncoding) | 117 , m_source(DefaultEncoding) |
(...skipping 244 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
362 } | 362 } |
363 | 363 |
364 String TextResourceDecoder::decode(const char* data, size_t len) | 364 String TextResourceDecoder::decode(const char* data, size_t len) |
365 { | 365 { |
366 size_t lengthOfBOM = 0; | 366 size_t lengthOfBOM = 0; |
367 if (!m_checkedForBOM) | 367 if (!m_checkedForBOM) |
368 lengthOfBOM = checkForBOM(data, len); | 368 lengthOfBOM = checkForBOM(data, len); |
369 | 369 |
370 bool movedDataToBuffer = false; | 370 bool movedDataToBuffer = false; |
371 | 371 |
372 if (m_contentType == CSS && !m_checkedForCSSCharset) { | 372 if (m_contentType == CSSContent && !m_checkedForCSSCharset) { |
373 if (!checkForCSSCharset(data, len, movedDataToBuffer)) | 373 if (!checkForCSSCharset(data, len, movedDataToBuffer)) |
374 return emptyString(); | 374 return emptyString(); |
375 } | 375 } |
376 | 376 |
377 if ((m_contentType == HTML || m_contentType == XML) && !m_checkedForXMLChars
et) { | 377 if ((m_contentType == HTMLContent || m_contentType == XMLContent) && !m_chec
kedForXMLCharset) { |
378 if (!checkForXMLCharset(data, len, movedDataToBuffer)) | 378 if (!checkForXMLCharset(data, len, movedDataToBuffer)) |
379 return emptyString(); | 379 return emptyString(); |
380 } | 380 } |
381 | 381 |
382 const char* dataForDecode = data + lengthOfBOM; | 382 const char* dataForDecode = data + lengthOfBOM; |
383 size_t lengthForDecode = len - lengthOfBOM; | 383 size_t lengthForDecode = len - lengthOfBOM; |
384 | 384 |
385 if (!m_buffer.isEmpty()) { | 385 if (!m_buffer.isEmpty()) { |
386 if (!movedDataToBuffer) { | 386 if (!movedDataToBuffer) { |
387 size_t oldSize = m_buffer.size(); | 387 size_t oldSize = m_buffer.size(); |
388 m_buffer.grow(oldSize + len); | 388 m_buffer.grow(oldSize + len); |
389 memcpy(m_buffer.data() + oldSize, data, len); | 389 memcpy(m_buffer.data() + oldSize, data, len); |
390 } | 390 } |
391 | 391 |
392 dataForDecode = m_buffer.data() + lengthOfBOM; | 392 dataForDecode = m_buffer.data() + lengthOfBOM; |
393 lengthForDecode = m_buffer.size() - lengthOfBOM; | 393 lengthForDecode = m_buffer.size() - lengthOfBOM; |
394 } | 394 } |
395 | 395 |
396 if (m_contentType == HTML && !m_checkedForMetaCharset) | 396 if (m_contentType == HTMLContent && !m_checkedForMetaCharset) |
397 checkForMetaCharset(dataForDecode, lengthForDecode); | 397 checkForMetaCharset(dataForDecode, lengthForDecode); |
398 | 398 |
399 if (shouldAutoDetect()) { | 399 if (shouldAutoDetect()) { |
400 WTF::TextEncoding detectedEncoding; | 400 WTF::TextEncoding detectedEncoding; |
401 if (detectTextEncoding(data, len, m_hintEncoding, &detectedEncoding)) | 401 if (detectTextEncoding(data, len, m_hintEncoding, &detectedEncoding)) |
402 setEncoding(detectedEncoding, EncodingFromContentSniffing); | 402 setEncoding(detectedEncoding, EncodingFromContentSniffing); |
403 } | 403 } |
404 | 404 |
405 ASSERT(m_encoding.isValid()); | 405 ASSERT(m_encoding.isValid()); |
406 | 406 |
407 if (!m_codec) | 407 if (!m_codec) |
408 m_codec = newTextCodec(m_encoding); | 408 m_codec = newTextCodec(m_encoding); |
409 | 409 |
410 String result = m_codec->decode(dataForDecode, lengthForDecode, false, m_con
tentType == XML && !m_useLenientXMLDecoding, m_sawError); | 410 String result = m_codec->decode(dataForDecode, lengthForDecode, false, m_con
tentType == XMLContent && !m_useLenientXMLDecoding, m_sawError); |
411 | 411 |
412 m_buffer.clear(); | 412 m_buffer.clear(); |
413 return result; | 413 return result; |
414 } | 414 } |
415 | 415 |
416 String TextResourceDecoder::flush() | 416 String TextResourceDecoder::flush() |
417 { | 417 { |
418 // If we can not identify the encoding even after a document is completely | 418 // If we can not identify the encoding even after a document is completely |
419 // loaded, we need to detect the encoding if other conditions for | 419 // loaded, we need to detect the encoding if other conditions for |
420 // autodetection is satisfied. | 420 // autodetection is satisfied. |
421 if (m_buffer.size() && shouldAutoDetect() | 421 if (m_buffer.size() && shouldAutoDetect() |
422 && ((!m_checkedForXMLCharset && (m_contentType == HTML || m_contentType
== XML)) || (!m_checkedForCSSCharset && (m_contentType == CSS)))) { | 422 && ((!m_checkedForXMLCharset && (m_contentType == HTMLContent || m_conte
ntType == XMLContent)) || (!m_checkedForCSSCharset && (m_contentType == CSSConte
nt)))) { |
423 WTF::TextEncoding detectedEncoding; | 423 WTF::TextEncoding detectedEncoding; |
424 if (detectTextEncoding(m_buffer.data(), m_buffer.size(), m_hintEncoding,
&detectedEncoding)) | 424 if (detectTextEncoding(m_buffer.data(), m_buffer.size(), m_hintEncoding,
&detectedEncoding)) |
425 setEncoding(detectedEncoding, EncodingFromContentSniffing); | 425 setEncoding(detectedEncoding, EncodingFromContentSniffing); |
426 } | 426 } |
427 | 427 |
428 if (!m_codec) | 428 if (!m_codec) |
429 m_codec = newTextCodec(m_encoding); | 429 m_codec = newTextCodec(m_encoding); |
430 | 430 |
431 String result = m_codec->decode(m_buffer.data(), m_buffer.size(), true, m_co
ntentType == XML && !m_useLenientXMLDecoding, m_sawError); | 431 String result = m_codec->decode(m_buffer.data(), m_buffer.size(), true, m_co
ntentType == XMLContent && !m_useLenientXMLDecoding, m_sawError); |
432 m_buffer.clear(); | 432 m_buffer.clear(); |
433 m_codec.clear(); | 433 m_codec.clear(); |
434 m_checkedForBOM = false; // Skip BOM again when re-decoding. | 434 m_checkedForBOM = false; // Skip BOM again when re-decoding. |
435 return result; | 435 return result; |
436 } | 436 } |
437 | 437 |
438 } | 438 } |
OLD | NEW |