OLD | NEW |
---|---|
1 /* | 1 /* |
2 * Copyright (C) 2004, 2006, 2008, 2010 Apple Inc. All rights reserved. | 2 * Copyright (C) 2004, 2006, 2008, 2010 Apple Inc. All rights reserved. |
3 * | 3 * |
4 * Redistribution and use in source and binary forms, with or without | 4 * Redistribution and use in source and binary forms, with or without |
5 * modification, are permitted provided that the following conditions | 5 * modification, are permitted provided that the following conditions |
6 * are met: | 6 * are met: |
7 * 1. Redistributions of source code must retain the above copyright | 7 * 1. Redistributions of source code must retain the above copyright |
8 * notice, this list of conditions and the following disclaimer. | 8 * notice, this list of conditions and the following disclaimer. |
9 * 2. Redistributions in binary form must reproduce the above copyright | 9 * 2. Redistributions in binary form must reproduce the above copyright |
10 * notice, this list of conditions and the following disclaimer in the | 10 * notice, this list of conditions and the following disclaimer in the |
(...skipping 55 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
66 registrar("UTF-16LE", newStreamingTextDecoderUTF16LE, 0); | 66 registrar("UTF-16LE", newStreamingTextDecoderUTF16LE, 0); |
67 registrar("UTF-16BE", newStreamingTextDecoderUTF16BE, 0); | 67 registrar("UTF-16BE", newStreamingTextDecoderUTF16BE, 0); |
68 } | 68 } |
69 | 69 |
70 String TextCodecUTF16::decode(const char* bytes, size_t length, FlushBehavior fl ush, bool, bool& sawError) | 70 String TextCodecUTF16::decode(const char* bytes, size_t length, FlushBehavior fl ush, bool, bool& sawError) |
71 { | 71 { |
72 // For compatibility reasons, ignore flush from fetch EOF. | 72 // For compatibility reasons, ignore flush from fetch EOF. |
73 const bool reallyFlush = flush != DoNotFlush && flush != FetchEOF; | 73 const bool reallyFlush = flush != DoNotFlush && flush != FetchEOF; |
74 | 74 |
75 if (!length) { | 75 if (!length) { |
76 if (!reallyFlush || !m_haveBufferedByte) | 76 if (reallyFlush && (m_haveLeadByte || m_haveLeadSurrogate)) { |
77 return String(); | 77 m_haveLeadByte = m_haveLeadSurrogate = false; |
78 sawError = true; | 78 sawError = true; |
79 return String(&replacementCharacter, 1); | 79 return String(&replacementCharacter, 1); |
80 } | |
81 return String(); | |
80 } | 82 } |
81 | 83 |
82 // FIXME: This should generate an error if there is an unpaired surrogate. | 84 const unsigned char* p = reinterpret_cast<const unsigned char*>(bytes); |
85 const size_t numBytes = length + m_haveLeadByte; | |
86 const bool willHaveExtraByte = numBytes & 1; | |
87 const size_t numCharsIn = numBytes / 2; | |
88 const size_t maxCharsOut = numCharsIn + (m_haveLeadSurrogate ? 1 : 0) + (rea llyFlush && willHaveExtraByte ? 1 : 0); | |
83 | 89 |
84 const unsigned char* p = reinterpret_cast<const unsigned char*>(bytes); | 90 StringBuffer<UChar> buffer(maxCharsOut); |
85 size_t numBytes = length + m_haveBufferedByte; | |
86 size_t numCharsIn = numBytes / 2; | |
87 size_t numCharsOut = ((numBytes & 1) && reallyFlush) ? numCharsIn + 1 : numC harsIn; | |
88 | |
89 StringBuffer<UChar> buffer(numCharsOut); | |
90 UChar* q = buffer.characters(); | 91 UChar* q = buffer.characters(); |
91 | 92 |
92 if (m_haveBufferedByte) { | 93 for (size_t i = 0; i < numCharsIn; ++i) { |
93 UChar c; | 94 UChar c; |
94 if (m_littleEndian) | 95 if (m_haveLeadByte) { |
95 c = m_bufferedByte | (p[0] << 8); | 96 c = m_littleEndian ? (m_leadByte | (p[0] << 8)) : ((m_leadByte << 8) | p[0]); |
96 else | 97 m_haveLeadByte = false; |
97 c = (m_bufferedByte << 8) | p[0]; | 98 ++p; |
98 *q++ = c; | 99 } else { |
99 m_haveBufferedByte = false; | 100 c = m_littleEndian ? (p[0] | (p[1] << 8)) : ((p[0] << 8) | p[1]); |
100 p += 1; | 101 p += 2; |
101 numCharsIn -= 1; | 102 } |
102 } | |
103 | 103 |
104 if (m_littleEndian) { | 104 // TODO(jsbell): If necessary for performance, m_haveLeadByte handling |
105 for (size_t i = 0; i < numCharsIn; ++i) { | 105 // can be pulled out and this loop split into distinct cases for |
106 UChar c = p[0] | (p[1] << 8); | 106 // big/little endian. The logic from here to the end of the loop is |
107 p += 2; | 107 // constant with respect to m_haveLeadByte and m_littleEndian. |
108 | |
109 if (m_haveLeadSurrogate && U_IS_TRAIL(c)) { | |
110 *q++ = m_leadSurrogate; | |
111 m_haveLeadSurrogate = false; | |
108 *q++ = c; | 112 *q++ = c; |
109 } | 113 } else { |
110 } else { | 114 if (m_haveLeadSurrogate) { |
111 for (size_t i = 0; i < numCharsIn; ++i) { | 115 m_haveLeadSurrogate = false; |
112 UChar c = (p[0] << 8) | p[1]; | 116 sawError = true; |
113 p += 2; | 117 *q++ = replacementCharacter; |
114 *q++ = c; | 118 } |
119 | |
120 if (U_IS_LEAD(c)) { | |
121 m_haveLeadSurrogate = true; | |
122 m_leadSurrogate = c; | |
123 } else if (U_IS_TRAIL(c)) { | |
124 sawError = true; | |
125 *q++ = replacementCharacter; | |
126 } else { | |
127 *q++ = c; | |
128 } | |
115 } | 129 } |
116 } | 130 } |
117 | 131 |
118 if (numBytes & 1) { | 132 if (willHaveExtraByte) { |
119 ASSERT(!m_haveBufferedByte); | 133 DCHECK(!m_haveLeadByte); |
foolip
2016/09/30 22:59:41
I think it's the m_haveLeadByte=false in the loop
jsbell
2016/09/30 23:52:13
Yes.
| |
134 m_haveLeadByte = true; | |
135 m_leadByte = p[0]; | |
136 } | |
120 | 137 |
121 if (reallyFlush) { | 138 if (reallyFlush && (m_haveLeadByte || m_haveLeadSurrogate)) { |
122 sawError = true; | 139 m_haveLeadByte = m_haveLeadSurrogate = false; |
123 *q++ = replacementCharacter; | 140 sawError = true; |
124 } else { | 141 *q++ = replacementCharacter; |
125 m_haveBufferedByte = true; | |
126 m_bufferedByte = p[0]; | |
127 } | |
128 } | 142 } |
129 | 143 |
130 buffer.shrink(q - buffer.characters()); | 144 buffer.shrink(q - buffer.characters()); |
131 | 145 |
132 return String::adopt(buffer); | 146 return String::adopt(buffer); |
133 } | 147 } |
134 | 148 |
135 CString TextCodecUTF16::encode(const UChar* characters, size_t length, Unencodab leHandling) | 149 CString TextCodecUTF16::encode(const UChar* characters, size_t length, Unencodab leHandling) |
136 { | 150 { |
137 // We need to be sure we can double the length without overflowing. | 151 // We need to be sure we can double the length without overflowing. |
(...skipping 43 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
181 for (size_t i = 0; i < length; ++i) { | 195 for (size_t i = 0; i < length; ++i) { |
182 bytes[i * 2] = 0; | 196 bytes[i * 2] = 0; |
183 bytes[i * 2 + 1] = characters[i]; | 197 bytes[i * 2 + 1] = characters[i]; |
184 } | 198 } |
185 } | 199 } |
186 | 200 |
187 return result; | 201 return result; |
188 } | 202 } |
189 | 203 |
190 } // namespace WTF | 204 } // namespace WTF |
OLD | NEW |