Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(159)

Side by Side Diff: Source/core/html/parser/TextResourceDecoder.cpp

Issue 74513003: Moved text decoding to the parser thread (Closed) Base URL: https://chromium.googlesource.com/chromium/blink.git@parserthread_step25
Patch Set: Removed AtomicString from HTMLMetaCharsetParser Created 7 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 /* 1 /*
2 Copyright (C) 1999 Lars Knoll (knoll@mpi-hd.mpg.de) 2 Copyright (C) 1999 Lars Knoll (knoll@mpi-hd.mpg.de)
3 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2012 Apple Inc. All rights reserved. 3 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2012 Apple Inc. All rights reserved.
4 Copyright (C) 2005, 2006, 2007 Alexey Proskuryakov (ap@nypop.com) 4 Copyright (C) 2005, 2006, 2007 Alexey Proskuryakov (ap@nypop.com)
5 5
6 This library is free software; you can redistribute it and/or 6 This library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Library General Public 7 modify it under the terms of the GNU Library General Public
8 License as published by the Free Software Foundation; either 8 License as published by the Free Software Foundation; either
9 version 2 of the License, or (at your option) any later version. 9 version 2 of the License, or (at your option) any later version.
10 10
11 This library is distributed in the hope that it will be useful, 11 This library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Library General Public License for more details. 14 Library General Public License for more details.
15 15
16 You should have received a copy of the GNU Library General Public License 16 You should have received a copy of the GNU Library General Public License
17 along with this library; see the file COPYING.LIB. If not, write to 17 along with this library; see the file COPYING.LIB. If not, write to
18 the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 18 the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
19 Boston, MA 02110-1301, USA. 19 Boston, MA 02110-1301, USA.
20 */ 20 */
21 21
22 22
23 #include "config.h" 23 #include "config.h"
24 #include "core/fetch/TextResourceDecoder.h" 24 #include "core/html/parser/TextResourceDecoder.h"
25 25
26 #include "HTMLNames.h" 26 #include "HTMLNames.h"
27 #include "core/dom/DOMImplementation.h" 27 #include "core/dom/DOMImplementation.h"
28 #include "core/html/parser/HTMLMetaCharsetParser.h" 28 #include "core/html/parser/HTMLMetaCharsetParser.h"
29 #include "platform/text/TextEncodingDetector.h" 29 #include "platform/text/TextEncodingDetector.h"
30 #include "wtf/StringExtras.h" 30 #include "wtf/StringExtras.h"
31 #include "wtf/text/TextCodec.h" 31 #include "wtf/text/TextCodec.h"
32 #include "wtf/text/TextEncoding.h" 32 #include "wtf/text/TextEncoding.h"
33 #include "wtf/text/TextEncodingRegistry.h" 33 #include "wtf/text/TextEncodingRegistry.h"
34 34
(...skipping 50 matching lines...) Expand 10 before | Expand all | Expand 10 after
85 { 85 {
86 Vector<char, 64> buffer(length + 1); 86 Vector<char, 64> buffer(length + 1);
87 memcpy(buffer.data(), encodingName, length); 87 memcpy(buffer.data(), encodingName, length);
88 buffer[length] = '\0'; 88 buffer[length] = '\0';
89 return buffer.data(); 89 return buffer.data();
90 } 90 }
91 91
92 TextResourceDecoder::ContentType TextResourceDecoder::determineContentType(const String& mimeType) 92 TextResourceDecoder::ContentType TextResourceDecoder::determineContentType(const String& mimeType)
93 { 93 {
94 if (equalIgnoringCase(mimeType, "text/css")) 94 if (equalIgnoringCase(mimeType, "text/css"))
95 return CSS; 95 return CSSContent;
96 if (equalIgnoringCase(mimeType, "text/html")) 96 if (equalIgnoringCase(mimeType, "text/html"))
97 return HTML; 97 return HTMLContent;
98 if (DOMImplementation::isXMLMIMEType(mimeType)) 98 if (DOMImplementation::isXMLMIMEType(mimeType))
99 return XML; 99 return XMLContent;
100 return PlainText; 100 return PlainTextContent;
101 } 101 }
102 102
103 const WTF::TextEncoding& TextResourceDecoder::defaultEncoding(ContentType conten tType, const WTF::TextEncoding& specifiedDefaultEncoding) 103 const WTF::TextEncoding& TextResourceDecoder::defaultEncoding(ContentType conten tType, const WTF::TextEncoding& specifiedDefaultEncoding)
104 { 104 {
105 // Despite 8.5 "Text/xml with Omitted Charset" of RFC 3023, we assume UTF-8 instead of US-ASCII 105 // Despite 8.5 "Text/xml with Omitted Charset" of RFC 3023, we assume UTF-8 instead of US-ASCII
106 // for text/xml. This matches Firefox. 106 // for text/xml. This matches Firefox.
107 if (contentType == XML) 107 if (contentType == XMLContent)
108 return UTF8Encoding(); 108 return UTF8Encoding();
109 if (!specifiedDefaultEncoding.isValid()) 109 if (!specifiedDefaultEncoding.isValid())
110 return Latin1Encoding(); 110 return Latin1Encoding();
111 return specifiedDefaultEncoding; 111 return specifiedDefaultEncoding;
112 } 112 }
113 113
114 TextResourceDecoder::TextResourceDecoder(const String& mimeType, const WTF::Text Encoding& specifiedDefaultEncoding, bool usesEncodingDetector) 114 TextResourceDecoder::TextResourceDecoder(const String& mimeType, const WTF::Text Encoding& specifiedDefaultEncoding, bool usesEncodingDetector)
115 : m_contentType(determineContentType(mimeType)) 115 : m_contentType(determineContentType(mimeType))
116 , m_encoding(defaultEncoding(m_contentType, specifiedDefaultEncoding)) 116 , m_encoding(defaultEncoding(m_contentType, specifiedDefaultEncoding))
117 , m_source(DefaultEncoding) 117 , m_source(DefaultEncoding)
(...skipping 244 matching lines...) Expand 10 before | Expand all | Expand 10 after
362 } 362 }
363 363
364 String TextResourceDecoder::decode(const char* data, size_t len) 364 String TextResourceDecoder::decode(const char* data, size_t len)
365 { 365 {
366 size_t lengthOfBOM = 0; 366 size_t lengthOfBOM = 0;
367 if (!m_checkedForBOM) 367 if (!m_checkedForBOM)
368 lengthOfBOM = checkForBOM(data, len); 368 lengthOfBOM = checkForBOM(data, len);
369 369
370 bool movedDataToBuffer = false; 370 bool movedDataToBuffer = false;
371 371
372 if (m_contentType == CSS && !m_checkedForCSSCharset) { 372 if (m_contentType == CSSContent && !m_checkedForCSSCharset) {
373 if (!checkForCSSCharset(data, len, movedDataToBuffer)) 373 if (!checkForCSSCharset(data, len, movedDataToBuffer))
374 return emptyString(); 374 return emptyString();
375 } 375 }
376 376
377 if ((m_contentType == HTML || m_contentType == XML) && !m_checkedForXMLChars et) { 377 if ((m_contentType == HTMLContent || m_contentType == XMLContent) && !m_chec kedForXMLCharset) {
378 if (!checkForXMLCharset(data, len, movedDataToBuffer)) 378 if (!checkForXMLCharset(data, len, movedDataToBuffer))
379 return emptyString(); 379 return emptyString();
380 } 380 }
381 381
382 const char* dataForDecode = data + lengthOfBOM; 382 const char* dataForDecode = data + lengthOfBOM;
383 size_t lengthForDecode = len - lengthOfBOM; 383 size_t lengthForDecode = len - lengthOfBOM;
384 384
385 if (!m_buffer.isEmpty()) { 385 if (!m_buffer.isEmpty()) {
386 if (!movedDataToBuffer) { 386 if (!movedDataToBuffer) {
387 size_t oldSize = m_buffer.size(); 387 size_t oldSize = m_buffer.size();
388 m_buffer.grow(oldSize + len); 388 m_buffer.grow(oldSize + len);
389 memcpy(m_buffer.data() + oldSize, data, len); 389 memcpy(m_buffer.data() + oldSize, data, len);
390 } 390 }
391 391
392 dataForDecode = m_buffer.data() + lengthOfBOM; 392 dataForDecode = m_buffer.data() + lengthOfBOM;
393 lengthForDecode = m_buffer.size() - lengthOfBOM; 393 lengthForDecode = m_buffer.size() - lengthOfBOM;
394 } 394 }
395 395
396 if (m_contentType == HTML && !m_checkedForMetaCharset) 396 if (m_contentType == HTMLContent && !m_checkedForMetaCharset)
397 checkForMetaCharset(dataForDecode, lengthForDecode); 397 checkForMetaCharset(dataForDecode, lengthForDecode);
398 398
399 if (shouldAutoDetect()) { 399 if (shouldAutoDetect()) {
400 WTF::TextEncoding detectedEncoding; 400 WTF::TextEncoding detectedEncoding;
401 if (detectTextEncoding(data, len, m_hintEncoding, &detectedEncoding)) 401 if (detectTextEncoding(data, len, m_hintEncoding, &detectedEncoding))
402 setEncoding(detectedEncoding, EncodingFromContentSniffing); 402 setEncoding(detectedEncoding, EncodingFromContentSniffing);
403 } 403 }
404 404
405 ASSERT(m_encoding.isValid()); 405 ASSERT(m_encoding.isValid());
406 406
407 if (!m_codec) 407 if (!m_codec)
408 m_codec = newTextCodec(m_encoding); 408 m_codec = newTextCodec(m_encoding);
409 409
410 String result = m_codec->decode(dataForDecode, lengthForDecode, false, m_con tentType == XML && !m_useLenientXMLDecoding, m_sawError); 410 String result = m_codec->decode(dataForDecode, lengthForDecode, false, m_con tentType == XMLContent && !m_useLenientXMLDecoding, m_sawError);
411 411
412 m_buffer.clear(); 412 m_buffer.clear();
413 return result; 413 return result;
414 } 414 }
415 415
416 String TextResourceDecoder::flush() 416 String TextResourceDecoder::flush()
417 { 417 {
418 // If we can not identify the encoding even after a document is completely 418 // If we can not identify the encoding even after a document is completely
419 // loaded, we need to detect the encoding if other conditions for 419 // loaded, we need to detect the encoding if other conditions for
420 // autodetection is satisfied. 420 // autodetection is satisfied.
421 if (m_buffer.size() && shouldAutoDetect() 421 if (m_buffer.size() && shouldAutoDetect()
422 && ((!m_checkedForXMLCharset && (m_contentType == HTML || m_contentType == XML)) || (!m_checkedForCSSCharset && (m_contentType == CSS)))) { 422 && ((!m_checkedForXMLCharset && (m_contentType == HTMLContent || m_conte ntType == XMLContent)) || (!m_checkedForCSSCharset && (m_contentType == CSSConte nt)))) {
423 WTF::TextEncoding detectedEncoding; 423 WTF::TextEncoding detectedEncoding;
424 if (detectTextEncoding(m_buffer.data(), m_buffer.size(), m_hintEncoding, &detectedEncoding)) 424 if (detectTextEncoding(m_buffer.data(), m_buffer.size(), m_hintEncoding, &detectedEncoding))
425 setEncoding(detectedEncoding, EncodingFromContentSniffing); 425 setEncoding(detectedEncoding, EncodingFromContentSniffing);
426 } 426 }
427 427
428 if (!m_codec) 428 if (!m_codec)
429 m_codec = newTextCodec(m_encoding); 429 m_codec = newTextCodec(m_encoding);
430 430
431 String result = m_codec->decode(m_buffer.data(), m_buffer.size(), true, m_co ntentType == XML && !m_useLenientXMLDecoding, m_sawError); 431 String result = m_codec->decode(m_buffer.data(), m_buffer.size(), true, m_co ntentType == XMLContent && !m_useLenientXMLDecoding, m_sawError);
432 m_buffer.clear(); 432 m_buffer.clear();
433 m_codec.clear(); 433 m_codec.clear();
434 m_checkedForBOM = false; // Skip BOM again when re-decoding. 434 m_checkedForBOM = false; // Skip BOM again when re-decoding.
435 return result; 435 return result;
436 } 436 }
437 437
438 } 438 }
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698