Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(109)

Side by Side Diff: third_party/WebKit/Source/wtf/text/TextCodecUTF16.cpp

Issue 2764283002: Move files in wtf/ to platform/wtf/ (Part 10). (Closed)
Patch Set: Rebase. Created 3 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 /*
2 * Copyright (C) 2004, 2006, 2008, 2010 Apple Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 */
25
26 #include "wtf/text/TextCodecUTF16.h"
27
28 #include "wtf/PtrUtil.h"
29 #include "wtf/text/CString.h"
30 #include "wtf/text/CharacterNames.h"
31 #include "wtf/text/StringBuffer.h"
32 #include "wtf/text/WTFString.h"
33 #include <memory>
34
35 using namespace std;
36
37 namespace WTF {
38
39 void TextCodecUTF16::registerEncodingNames(EncodingNameRegistrar registrar) {
40 registrar("UTF-16LE", "UTF-16LE");
41 registrar("UTF-16BE", "UTF-16BE");
42
43 registrar("ISO-10646-UCS-2", "UTF-16LE");
44 registrar("UCS-2", "UTF-16LE");
45 registrar("UTF-16", "UTF-16LE");
46 registrar("Unicode", "UTF-16LE");
47 registrar("csUnicode", "UTF-16LE");
48 registrar("unicodeFEFF", "UTF-16LE");
49
50 registrar("unicodeFFFE", "UTF-16BE");
51 }
52
53 static std::unique_ptr<TextCodec> newStreamingTextDecoderUTF16LE(
54 const TextEncoding&,
55 const void*) {
56 return WTF::makeUnique<TextCodecUTF16>(true);
57 }
58
59 static std::unique_ptr<TextCodec> newStreamingTextDecoderUTF16BE(
60 const TextEncoding&,
61 const void*) {
62 return WTF::makeUnique<TextCodecUTF16>(false);
63 }
64
65 void TextCodecUTF16::registerCodecs(TextCodecRegistrar registrar) {
66 registrar("UTF-16LE", newStreamingTextDecoderUTF16LE, 0);
67 registrar("UTF-16BE", newStreamingTextDecoderUTF16BE, 0);
68 }
69
70 String TextCodecUTF16::decode(const char* bytes,
71 size_t length,
72 FlushBehavior flush,
73 bool,
74 bool& sawError) {
75 // For compatibility reasons, ignore flush from fetch EOF.
76 const bool reallyFlush = flush != DoNotFlush && flush != FetchEOF;
77
78 if (!length) {
79 if (reallyFlush && (m_haveLeadByte || m_haveLeadSurrogate)) {
80 m_haveLeadByte = m_haveLeadSurrogate = false;
81 sawError = true;
82 return String(&replacementCharacter, 1);
83 }
84 return String();
85 }
86
87 const unsigned char* p = reinterpret_cast<const unsigned char*>(bytes);
88 const size_t numBytes = length + m_haveLeadByte;
89 const bool willHaveExtraByte = numBytes & 1;
90 const size_t numCharsIn = numBytes / 2;
91 const size_t maxCharsOut = numCharsIn + (m_haveLeadSurrogate ? 1 : 0) +
92 (reallyFlush && willHaveExtraByte ? 1 : 0);
93
94 StringBuffer<UChar> buffer(maxCharsOut);
95 UChar* q = buffer.characters();
96
97 for (size_t i = 0; i < numCharsIn; ++i) {
98 UChar c;
99 if (m_haveLeadByte) {
100 c = m_littleEndian ? (m_leadByte | (p[0] << 8))
101 : ((m_leadByte << 8) | p[0]);
102 m_haveLeadByte = false;
103 ++p;
104 } else {
105 c = m_littleEndian ? (p[0] | (p[1] << 8)) : ((p[0] << 8) | p[1]);
106 p += 2;
107 }
108
109 // TODO(jsbell): If necessary for performance, m_haveLeadByte handling
110 // can be pulled out and this loop split into distinct cases for
111 // big/little endian. The logic from here to the end of the loop is
112 // constant with respect to m_haveLeadByte and m_littleEndian.
113
114 if (m_haveLeadSurrogate && U_IS_TRAIL(c)) {
115 *q++ = m_leadSurrogate;
116 m_haveLeadSurrogate = false;
117 *q++ = c;
118 } else {
119 if (m_haveLeadSurrogate) {
120 m_haveLeadSurrogate = false;
121 sawError = true;
122 *q++ = replacementCharacter;
123 }
124
125 if (U_IS_LEAD(c)) {
126 m_haveLeadSurrogate = true;
127 m_leadSurrogate = c;
128 } else if (U_IS_TRAIL(c)) {
129 sawError = true;
130 *q++ = replacementCharacter;
131 } else {
132 *q++ = c;
133 }
134 }
135 }
136
137 DCHECK(!m_haveLeadByte);
138 if (willHaveExtraByte) {
139 m_haveLeadByte = true;
140 m_leadByte = p[0];
141 }
142
143 if (reallyFlush && (m_haveLeadByte || m_haveLeadSurrogate)) {
144 m_haveLeadByte = m_haveLeadSurrogate = false;
145 sawError = true;
146 *q++ = replacementCharacter;
147 }
148
149 buffer.shrink(q - buffer.characters());
150
151 return String::adopt(buffer);
152 }
153
154 CString TextCodecUTF16::encode(const UChar* characters,
155 size_t length,
156 UnencodableHandling) {
157 // We need to be sure we can double the length without overflowing.
158 // Since the passed-in length is the length of an actual existing
159 // character buffer, each character is two bytes, and we know
160 // the buffer doesn't occupy the entire address space, we can
161 // assert here that doubling the length does not overflow size_t
162 // and there's no need for a runtime check.
163 DCHECK_LE(length, numeric_limits<size_t>::max() / 2);
164
165 char* bytes;
166 CString result = CString::createUninitialized(length * 2, bytes);
167
168 // FIXME: CString is not a reasonable data structure for encoded UTF-16, which
169 // will have null characters inside it. Perhaps the result of encode should
170 // not be a CString.
171 if (m_littleEndian) {
172 for (size_t i = 0; i < length; ++i) {
173 UChar c = characters[i];
174 bytes[i * 2] = static_cast<char>(c);
175 bytes[i * 2 + 1] = c >> 8;
176 }
177 } else {
178 for (size_t i = 0; i < length; ++i) {
179 UChar c = characters[i];
180 bytes[i * 2] = c >> 8;
181 bytes[i * 2 + 1] = static_cast<char>(c);
182 }
183 }
184
185 return result;
186 }
187
188 CString TextCodecUTF16::encode(const LChar* characters,
189 size_t length,
190 UnencodableHandling) {
191 // In the LChar case, we do actually need to perform this check in release. :)
192 RELEASE_ASSERT(length <= numeric_limits<size_t>::max() / 2);
193
194 char* bytes;
195 CString result = CString::createUninitialized(length * 2, bytes);
196
197 if (m_littleEndian) {
198 for (size_t i = 0; i < length; ++i) {
199 bytes[i * 2] = characters[i];
200 bytes[i * 2 + 1] = 0;
201 }
202 } else {
203 for (size_t i = 0; i < length; ++i) {
204 bytes[i * 2] = 0;
205 bytes[i * 2 + 1] = characters[i];
206 }
207 }
208
209 return result;
210 }
211
212 } // namespace WTF
OLDNEW
« no previous file with comments | « third_party/WebKit/Source/wtf/text/TextCodecUTF16.h ('k') | third_party/WebKit/Source/wtf/text/TextCodecUTF8.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698