Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 /* | 1 /* |
| 2 * Copyright (C) 2004, 2006, 2008, 2010 Apple Inc. All rights reserved. | 2 * Copyright (C) 2004, 2006, 2008, 2010 Apple Inc. All rights reserved. |
| 3 * | 3 * |
| 4 * Redistribution and use in source and binary forms, with or without | 4 * Redistribution and use in source and binary forms, with or without |
| 5 * modification, are permitted provided that the following conditions | 5 * modification, are permitted provided that the following conditions |
| 6 * are met: | 6 * are met: |
| 7 * 1. Redistributions of source code must retain the above copyright | 7 * 1. Redistributions of source code must retain the above copyright |
| 8 * notice, this list of conditions and the following disclaimer. | 8 * notice, this list of conditions and the following disclaimer. |
| 9 * 2. Redistributions in binary form must reproduce the above copyright | 9 * 2. Redistributions in binary form must reproduce the above copyright |
| 10 * notice, this list of conditions and the following disclaimer in the | 10 * notice, this list of conditions and the following disclaimer in the |
| (...skipping 55 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 66 registrar("UTF-16LE", newStreamingTextDecoderUTF16LE, 0); | 66 registrar("UTF-16LE", newStreamingTextDecoderUTF16LE, 0); |
| 67 registrar("UTF-16BE", newStreamingTextDecoderUTF16BE, 0); | 67 registrar("UTF-16BE", newStreamingTextDecoderUTF16BE, 0); |
| 68 } | 68 } |
| 69 | 69 |
| 70 String TextCodecUTF16::decode(const char* bytes, size_t length, FlushBehavior fl ush, bool, bool& sawError) | 70 String TextCodecUTF16::decode(const char* bytes, size_t length, FlushBehavior fl ush, bool, bool& sawError) |
| 71 { | 71 { |
| 72 // For compatibility reasons, ignore flush from fetch EOF. | 72 // For compatibility reasons, ignore flush from fetch EOF. |
| 73 const bool reallyFlush = flush != DoNotFlush && flush != FetchEOF; | 73 const bool reallyFlush = flush != DoNotFlush && flush != FetchEOF; |
| 74 | 74 |
| 75 if (!length) { | 75 if (!length) { |
| 76 if (!reallyFlush || !m_haveBufferedByte) | 76 if (reallyFlush && (m_haveLeadByte || m_haveLeadSurrogate)) { |
| 77 return String(); | 77 m_haveLeadByte = m_haveLeadSurrogate = false; |
| 78 sawError = true; | 78 sawError = true; |
| 79 return String(&replacementCharacter, 1); | 79 return String(&replacementCharacter, 1); |
| 80 } | |
| 81 return String(); | |
| 80 } | 82 } |
| 81 | 83 |
| 82 // FIXME: This should generate an error if there is an unpaired surrogate. | 84 const unsigned char* p = reinterpret_cast<const unsigned char*>(bytes); |
| 85 const size_t numBytes = length + m_haveLeadByte; | |
| 86 const bool willHaveExtraByte = numBytes & 1; | |
| 87 const size_t numCharsIn = numBytes / 2; | |
| 88 const size_t maxCharsOut = numCharsIn + (m_haveLeadSurrogate ? 1 : 0) + (rea llyFlush && willHaveExtraByte ? 1 : 0); | |
| 83 | 89 |
| 84 const unsigned char* p = reinterpret_cast<const unsigned char*>(bytes); | 90 StringBuffer<UChar> buffer(maxCharsOut); |
| 85 size_t numBytes = length + m_haveBufferedByte; | |
| 86 size_t numCharsIn = numBytes / 2; | |
| 87 size_t numCharsOut = ((numBytes & 1) && reallyFlush) ? numCharsIn + 1 : numC harsIn; | |
| 88 | |
| 89 StringBuffer<UChar> buffer(numCharsOut); | |
| 90 UChar* q = buffer.characters(); | 91 UChar* q = buffer.characters(); |
| 91 | 92 |
| 92 if (m_haveBufferedByte) { | 93 for (size_t i = 0; i < numCharsIn; ++i) { |
| 93 UChar c; | 94 UChar c; |
| 94 if (m_littleEndian) | 95 if (m_haveLeadByte) { |
| 95 c = m_bufferedByte | (p[0] << 8); | 96 c = m_littleEndian ? (m_leadByte | (p[0] << 8)) : ((m_leadByte << 8) | p[0]); |
| 96 else | 97 m_haveLeadByte = false; |
| 97 c = (m_bufferedByte << 8) | p[0]; | 98 ++p; |
| 98 *q++ = c; | 99 } else { |
| 99 m_haveBufferedByte = false; | 100 c = m_littleEndian ? (p[0] | (p[1] << 8)) : ((p[0] << 8) | p[1]); |
| 100 p += 1; | 101 p += 2; |
| 101 numCharsIn -= 1; | 102 } |
| 102 } | |
| 103 | 103 |
| 104 if (m_littleEndian) { | 104 // TODO(jsbell): If necessary for performance, m_haveLeadByte handling |
| 105 for (size_t i = 0; i < numCharsIn; ++i) { | 105 // can be pulled out and this loop split into distinct cases for |
| 106 UChar c = p[0] | (p[1] << 8); | 106 // big/little endian. The logic from here to the end of the loop is |
| 107 p += 2; | 107 // constant with respect to m_haveLeadByte and m_littleEndian. |
| 108 | |
| 109 if (m_haveLeadSurrogate && U_IS_TRAIL(c)) { | |
| 110 *q++ = m_leadSurrogate; | |
| 111 m_haveLeadSurrogate = false; | |
| 108 *q++ = c; | 112 *q++ = c; |
| 109 } | 113 } else { |
| 110 } else { | 114 if (m_haveLeadSurrogate) { |
| 111 for (size_t i = 0; i < numCharsIn; ++i) { | 115 m_haveLeadSurrogate = false; |
| 112 UChar c = (p[0] << 8) | p[1]; | 116 sawError = true; |
| 113 p += 2; | 117 *q++ = replacementCharacter; |
| 114 *q++ = c; | 118 } |
| 119 | |
| 120 if (U_IS_LEAD(c)) { | |
| 121 m_haveLeadSurrogate = true; | |
| 122 m_leadSurrogate = c; | |
| 123 } else if (U_IS_TRAIL(c)) { | |
| 124 sawError = true; | |
| 125 *q++ = replacementCharacter; | |
| 126 } else { | |
| 127 *q++ = c; | |
| 128 } | |
| 115 } | 129 } |
| 116 } | 130 } |
| 117 | 131 |
| 118 if (numBytes & 1) { | 132 if (willHaveExtraByte) { |
| 119 ASSERT(!m_haveBufferedByte); | 133 DCHECK(!m_haveLeadByte); |
|
foolip
2016/09/30 22:59:41
I think it's the m_haveLeadByte=false in the loop
jsbell
2016/09/30 23:52:13
Yes.
| |
| 134 m_haveLeadByte = true; | |
| 135 m_leadByte = p[0]; | |
| 136 } | |
| 120 | 137 |
| 121 if (reallyFlush) { | 138 if (reallyFlush && (m_haveLeadByte || m_haveLeadSurrogate)) { |
| 122 sawError = true; | 139 m_haveLeadByte = m_haveLeadSurrogate = false; |
| 123 *q++ = replacementCharacter; | 140 sawError = true; |
| 124 } else { | 141 *q++ = replacementCharacter; |
| 125 m_haveBufferedByte = true; | |
| 126 m_bufferedByte = p[0]; | |
| 127 } | |
| 128 } | 142 } |
| 129 | 143 |
| 130 buffer.shrink(q - buffer.characters()); | 144 buffer.shrink(q - buffer.characters()); |
| 131 | 145 |
| 132 return String::adopt(buffer); | 146 return String::adopt(buffer); |
| 133 } | 147 } |
| 134 | 148 |
| 135 CString TextCodecUTF16::encode(const UChar* characters, size_t length, Unencodab leHandling) | 149 CString TextCodecUTF16::encode(const UChar* characters, size_t length, Unencodab leHandling) |
| 136 { | 150 { |
| 137 // We need to be sure we can double the length without overflowing. | 151 // We need to be sure we can double the length without overflowing. |
| (...skipping 43 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 181 for (size_t i = 0; i < length; ++i) { | 195 for (size_t i = 0; i < length; ++i) { |
| 182 bytes[i * 2] = 0; | 196 bytes[i * 2] = 0; |
| 183 bytes[i * 2 + 1] = characters[i]; | 197 bytes[i * 2 + 1] = characters[i]; |
| 184 } | 198 } |
| 185 } | 199 } |
| 186 | 200 |
| 187 return result; | 201 return result; |
| 188 } | 202 } |
| 189 | 203 |
| 190 } // namespace WTF | 204 } // namespace WTF |
| OLD | NEW |