third_party/WebKit/WebCore/loader/TextResourceDecoder.cpp - Issue 174528: japanese encoding webkit fixes for 3.0 branch

Side by Side Diff: third_party/WebKit/WebCore/loader/TextResourceDecoder.cpp

Issue 174528: japanese encoding webkit fixes for 3.0 branch (Closed) Base URL: svn://chrome-svn/chrome/branches/195/src/

Patch Set: Created 11 years, 3 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 /*	1 /*

2 Copyright (C) 1999 Lars Knoll (knoll@mpi-hd.mpg.de)	2 Copyright (C) 1999 Lars Knoll (knoll@mpi-hd.mpg.de)

3 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.	3 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.

4 Copyright (C) 2005, 2006, 2007 Alexey Proskuryakov (ap@nypop.com)	4 Copyright (C) 2005, 2006, 2007 Alexey Proskuryakov (ap@nypop.com)

5	5

6 This library is free software; you can redistribute it and/or	6 This library is free software; you can redistribute it and/or

7 modify it under the terms of the GNU Library General Public	7 modify it under the terms of the GNU Library General Public

8 License as published by the Free Software Foundation; either	8 License as published by the Free Software Foundation; either

9 version 2 of the License, or (at your option) any later version.	9 version 2 of the License, or (at your option) any later version.

10	10

(...skipping 74 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
85 }	85 }

86	86

87 static TextEncoding findTextEncoding(const char* encodingName, int length)	87 static TextEncoding findTextEncoding(const char* encodingName, int length)

88 {	88 {

89 Vector<char, 64> buffer(length + 1);	89 Vector<char, 64> buffer(length + 1);

90 memcpy(buffer.data(), encodingName, length);	90 memcpy(buffer.data(), encodingName, length);

91 buffer[length] = '\0';	91 buffer[length] = '\0';

92 return buffer.data();	92 return buffer.data();

93 }	93 }

94	94

	95 #if !PLATFORM(CHROMIUM)

95 class KanjiCode {	96 class KanjiCode {

96 public:	97 public:

97 enum Type { ASCII, JIS, EUC, SJIS, UTF16, UTF8 };	98 enum Type { ASCII, JIS, EUC, SJIS, UTF16, UTF8 };

98 static enum Type judge(const char* str, int length);	99 static enum Type judge(const char* str, int length);

99 static const int ESC = 0x1b;	100 static const int ESC = 0x1b;

100 static const unsigned char sjisMap[256];	101 static const unsigned char sjisMap[256];

101 static int ISkanji(int code)	102 static int ISkanji(int code)

102 {	103 {

103 if (code >= 0x100)	104 if (code >= 0x100)

104 return 0;	105 return 0;

(...skipping 188 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
293 if (code == ASCII) {	294 if (code == ASCII) {

294 if (sjis > euc) {	295 if (sjis > euc) {

295 code = SJIS;	296 code = SJIS;

296 } else if (sjis < euc) {	297 } else if (sjis < euc) {

297 code = EUC;	298 code = EUC;

298 }	299 }

299 }	300 }

300 breakBreak:	301 breakBreak:

301 return (code);	302 return (code);

302 }	303 }

	304 #endif

303	305

304 TextResourceDecoder::ContentType TextResourceDecoder::determineContentType(const String& mimeType)	306 TextResourceDecoder::ContentType TextResourceDecoder::determineContentType(const String& mimeType)

305 {	307 {

306 if (equalIgnoringCase(mimeType, "text/css"))	308 if (equalIgnoringCase(mimeType, "text/css"))

307 return CSS;	309 return CSS;

308 if (equalIgnoringCase(mimeType, "text/html"))	310 if (equalIgnoringCase(mimeType, "text/html"))

309 return HTML;	311 return HTML;

310 if (DOMImplementation::isXMLMIMEType(mimeType))	312 if (DOMImplementation::isXMLMIMEType(mimeType))

311 return XML;	313 return XML;

312 return PlainText;	314 return PlainText;

(...skipping 426 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
739 m_checkedForHeadCharset = true;	741 m_checkedForHeadCharset = true;

740 return true;	742 return true;

741 }	743 }

742 }	744 }

743 } else	745 } else

744 ++ptr;	746 ++ptr;

745 }	747 }

746 return false;	748 return false;

747 }	749 }

748	750

	751 #if !PLATFORM(CHROMIUM)

749 void TextResourceDecoder::detectJapaneseEncoding(const char* data, size_t len)	752 void TextResourceDecoder::detectJapaneseEncoding(const char* data, size_t len)

750 {	753 {

751 switch (KanjiCode::judge(data, len)) {	754 switch (KanjiCode::judge(data, len)) {

752 case KanjiCode::JIS:	755 case KanjiCode::JIS:

753 setEncoding("ISO-2022-JP", AutoDetectedEncoding);	756 setEncoding("ISO-2022-JP", AutoDetectedEncoding);

754 break;	757 break;

755 case KanjiCode::EUC:	758 case KanjiCode::EUC:

756 setEncoding("EUC-JP", AutoDetectedEncoding);	759 setEncoding("EUC-JP", AutoDetectedEncoding);

757 break;	760 break;

758 case KanjiCode::SJIS:	761 case KanjiCode::SJIS:

759 setEncoding("Shift_JIS", AutoDetectedEncoding);	762 setEncoding("Shift_JIS", AutoDetectedEncoding);

760 break;	763 break;

761 case KanjiCode::ASCII:	764 case KanjiCode::ASCII:

762 case KanjiCode::UTF16:	765 case KanjiCode::UTF16:

763 case KanjiCode::UTF8:	766 case KanjiCode::UTF8:

764 break;	767 break;

765 }	768 }

766 }	769 }

	770 #endif

767	771

768 // We use the encoding detector in two cases:	772 // We use the encoding detector in two cases:

769 // 1. Encoding detector is turned ON and no other encoding source is	773 // 1. Encoding detector is turned ON and no other encoding source is

770 // available (that is, it's DefaultEncoding).	774 // available (that is, it's DefaultEncoding).

771 // 2. Encoding detector is turned ON and the encoding is set to	775 // 2. Encoding detector is turned ON and the encoding is set to

772 // the encoding of the parent frame, which is also auto-detected.	776 // the encoding of the parent frame, which is also auto-detected.

773 // Note that condition #2 is NOT satisfied unless parent-child frame	777 // Note that condition #2 is NOT satisfied unless parent-child frame

774 // relationship is compliant to the same-origin policy. If they're from	778 // relationship is compliant to the same-origin policy. If they're from

775 // different domains, \|m_source\| would not be set to EncodingFromParentFrame	779 // different domains, \|m_source\| would not be set to EncodingFromParentFrame

776 // in the first place.	780 // in the first place.

(...skipping 25 matching lines...) Expand all Loading...
802 // we have already done some decoding. However, it's not possible	806 // we have already done some decoding. However, it's not possible

803 // to avoid in a sense in two cases below because triggering conditions	807 // to avoid in a sense in two cases below because triggering conditions

804 // for both cases depend on the information that won't be available	808 // for both cases depend on the information that won't be available

805 // until we do partial read.	809 // until we do partial read.

806 // The first case had better be removed altogether (see bug 21990)	810 // The first case had better be removed altogether (see bug 21990)

807 // or at least be made to be invoked only when the encoding detection	811 // or at least be made to be invoked only when the encoding detection

808 // is turned on.	812 // is turned on.

809 // Do the auto-detect 1) using Japanese detector if our default encoding is	813 // Do the auto-detect 1) using Japanese detector if our default encoding is

810 // one of the Japanese detector or 2) using detectTextEncoding if encoding	814 // one of the Japanese detector or 2) using detectTextEncoding if encoding

811 // detection is turned on.	815 // detection is turned on.

	816 #if !PLATFORM(CHROMIUM)

812 if (m_source != UserChosenEncoding && m_source != AutoDetectedEncoding && m_ encoding.isJapanese())	817 if (m_source != UserChosenEncoding && m_source != AutoDetectedEncoding && m_ encoding.isJapanese())

813 detectJapaneseEncoding(data, len);	818 detectJapaneseEncoding(data, len);

814 else if (shouldAutoDetect()) {	819 else if (shouldAutoDetect()) {

	820 #else

	821 if (shouldAutoDetect()) {

	822 #endif

815 TextEncoding detectedEncoding;	823 TextEncoding detectedEncoding;

816 if (detectTextEncoding(data, len, m_hintEncoding, &detectedEncoding))	824 if (detectTextEncoding(data, len, m_hintEncoding, &detectedEncoding))

817 setEncoding(detectedEncoding, AutoDetectedEncoding);	825 setEncoding(detectedEncoding, AutoDetectedEncoding);

818 }	826 }

819	827

820 ASSERT(m_encoding.isValid());	828 ASSERT(m_encoding.isValid());

821	829

822 if (!m_codec)	830 if (!m_codec)

823 m_codec.set(newTextCodec(m_encoding).release());	831 m_codec.set(newTextCodec(m_encoding).release());

824	832

(...skipping 28 matching lines...) Expand all Loading...
853 m_codec.set(newTextCodec(m_encoding).release());	861 m_codec.set(newTextCodec(m_encoding).release());

854	862

855 String result = m_codec->decode(m_buffer.data(), m_buffer.size(), true, m_co ntentType == XML && !m_useLenientXMLDecoding, m_sawError);	863 String result = m_codec->decode(m_buffer.data(), m_buffer.size(), true, m_co ntentType == XML && !m_useLenientXMLDecoding, m_sawError);

856 m_buffer.clear();	864 m_buffer.clear();

857 m_codec.clear();	865 m_codec.clear();

858 m_checkedForBOM = false; // Skip BOM again when re-decoding.	866 m_checkedForBOM = false; // Skip BOM again when re-decoding.

859 return result;	867 return result;

860 }	868 }

861	869

862 }	870 }

OLD	NEW

« no previous file with comments | « third_party/WebKit/WebCore/loader/TextResourceDecoder.h ('k') | third_party/WebKit/WebCore/platform/text/TextEncoding.h » ('j') | no next file with comments »