third_party/WebKit/Source/wtf/text/TextCodecUTF16.cpp - Issue 2764283002: Move files in wtf/ to platform/wtf/ (Part 10).

Side by Side Diff: third_party/WebKit/Source/wtf/text/TextCodecUTF16.cpp

Issue 2764283002: Move files in wtf/ to platform/wtf/ (Part 10). (Closed)

Patch Set: Rebase. Created 3 years, 9 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
	(Empty)
1 /*

2 * Copyright (C) 2004, 2006, 2008, 2010 Apple Inc. All rights reserved.

3 *

4 * Redistribution and use in source and binary forms, with or without

5 * modification, are permitted provided that the following conditions

6 * are met:

7 * 1. Redistributions of source code must retain the above copyright

8 * notice, this list of conditions and the following disclaimer.

9 * 2. Redistributions in binary form must reproduce the above copyright

10 * notice, this list of conditions and the following disclaimer in the

11 * documentation and/or other materials provided with the distribution.

12 *

13 * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY

14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE

15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR

16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR

17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,

18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,

19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR

20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY

21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT

22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE

23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

24 */

25

26 #include "wtf/text/TextCodecUTF16.h"

27

28 #include "wtf/PtrUtil.h"

29 #include "wtf/text/CString.h"

30 #include "wtf/text/CharacterNames.h"

31 #include "wtf/text/StringBuffer.h"

32 #include "wtf/text/WTFString.h"

33 #include <memory>

34

35 using namespace std;

36

37 namespace WTF {

38

39 void TextCodecUTF16::registerEncodingNames(EncodingNameRegistrar registrar) {

40 registrar("UTF-16LE", "UTF-16LE");

41 registrar("UTF-16BE", "UTF-16BE");

42

43 registrar("ISO-10646-UCS-2", "UTF-16LE");

44 registrar("UCS-2", "UTF-16LE");

45 registrar("UTF-16", "UTF-16LE");

46 registrar("Unicode", "UTF-16LE");

47 registrar("csUnicode", "UTF-16LE");

48 registrar("unicodeFEFF", "UTF-16LE");

49

50 registrar("unicodeFFFE", "UTF-16BE");

51 }

52

53 static std::unique_ptr<TextCodec> newStreamingTextDecoderUTF16LE(

54 const TextEncoding&,

55 const void*) {

56 return WTF::makeUnique<TextCodecUTF16>(true);

57 }

58

59 static std::unique_ptr<TextCodec> newStreamingTextDecoderUTF16BE(

60 const TextEncoding&,

61 const void*) {

62 return WTF::makeUnique<TextCodecUTF16>(false);

63 }

64

65 void TextCodecUTF16::registerCodecs(TextCodecRegistrar registrar) {

66 registrar("UTF-16LE", newStreamingTextDecoderUTF16LE, 0);

67 registrar("UTF-16BE", newStreamingTextDecoderUTF16BE, 0);

68 }

69

70 String TextCodecUTF16::decode(const char* bytes,

71 size_t length,

72 FlushBehavior flush,

73 bool,

74 bool& sawError) {

75 // For compatibility reasons, ignore flush from fetch EOF.

76 const bool reallyFlush = flush != DoNotFlush && flush != FetchEOF;

77

78 if (!length) {

79 if (reallyFlush && (m_haveLeadByte \|\| m_haveLeadSurrogate)) {

80 m_haveLeadByte = m_haveLeadSurrogate = false;

81 sawError = true;

82 return String(&replacementCharacter, 1);

83 }

84 return String();

85 }

86

87 const unsigned char* p = reinterpret_cast<const unsigned char*>(bytes);

88 const size_t numBytes = length + m_haveLeadByte;

89 const bool willHaveExtraByte = numBytes & 1;

90 const size_t numCharsIn = numBytes / 2;

91 const size_t maxCharsOut = numCharsIn + (m_haveLeadSurrogate ? 1 : 0) +

92 (reallyFlush && willHaveExtraByte ? 1 : 0);

93

94 StringBuffer<UChar> buffer(maxCharsOut);

95 UChar* q = buffer.characters();

96

97 for (size_t i = 0; i < numCharsIn; ++i) {

98 UChar c;

99 if (m_haveLeadByte) {

100 c = m_littleEndian ? (m_leadByte \| (p[0] << 8))

101 : ((m_leadByte << 8) \| p[0]);

102 m_haveLeadByte = false;

103 ++p;

104 } else {

105 c = m_littleEndian ? (p[0] \| (p[1] << 8)) : ((p[0] << 8) \| p[1]);

106 p += 2;

107 }

108

109 // TODO(jsbell): If necessary for performance, m_haveLeadByte handling

110 // can be pulled out and this loop split into distinct cases for

111 // big/little endian. The logic from here to the end of the loop is

112 // constant with respect to m_haveLeadByte and m_littleEndian.

113

114 if (m_haveLeadSurrogate && U_IS_TRAIL(c)) {

115 *q++ = m_leadSurrogate;

116 m_haveLeadSurrogate = false;

117 *q++ = c;

118 } else {

119 if (m_haveLeadSurrogate) {

120 m_haveLeadSurrogate = false;

121 sawError = true;

122 *q++ = replacementCharacter;

123 }

124

125 if (U_IS_LEAD(c)) {

126 m_haveLeadSurrogate = true;

127 m_leadSurrogate = c;

128 } else if (U_IS_TRAIL(c)) {

129 sawError = true;

130 *q++ = replacementCharacter;

131 } else {

132 *q++ = c;

133 }

134 }

135 }

136

137 DCHECK(!m_haveLeadByte);

138 if (willHaveExtraByte) {

139 m_haveLeadByte = true;

140 m_leadByte = p[0];

141 }

142

143 if (reallyFlush && (m_haveLeadByte \|\| m_haveLeadSurrogate)) {

144 m_haveLeadByte = m_haveLeadSurrogate = false;

145 sawError = true;

146 *q++ = replacementCharacter;

147 }

148

149 buffer.shrink(q - buffer.characters());

150

151 return String::adopt(buffer);

152 }

153

154 CString TextCodecUTF16::encode(const UChar* characters,

155 size_t length,

156 UnencodableHandling) {

157 // We need to be sure we can double the length without overflowing.

158 // Since the passed-in length is the length of an actual existing

159 // character buffer, each character is two bytes, and we know

160 // the buffer doesn't occupy the entire address space, we can

161 // assert here that doubling the length does not overflow size_t

162 // and there's no need for a runtime check.

163 DCHECK_LE(length, numeric_limits<size_t>::max() / 2);

164

165 char* bytes;

166 CString result = CString::createUninitialized(length * 2, bytes);

167

168 // FIXME: CString is not a reasonable data structure for encoded UTF-16, which

169 // will have null characters inside it. Perhaps the result of encode should

170 // not be a CString.

171 if (m_littleEndian) {

172 for (size_t i = 0; i < length; ++i) {

173 UChar c = characters[i];

174 bytes[i * 2] = static_cast<char>(c);

175 bytes[i * 2 + 1] = c >> 8;

176 }

177 } else {

178 for (size_t i = 0; i < length; ++i) {

179 UChar c = characters[i];

180 bytes[i * 2] = c >> 8;

181 bytes[i * 2 + 1] = static_cast<char>(c);

182 }

183 }

184

185 return result;

186 }

187

188 CString TextCodecUTF16::encode(const LChar* characters,

189 size_t length,

190 UnencodableHandling) {

191 // In the LChar case, we do actually need to perform this check in release. :)

192 RELEASE_ASSERT(length <= numeric_limits<size_t>::max() / 2);

193

194 char* bytes;

195 CString result = CString::createUninitialized(length * 2, bytes);

196

197 if (m_littleEndian) {

198 for (size_t i = 0; i < length; ++i) {

199 bytes[i * 2] = characters[i];

200 bytes[i * 2 + 1] = 0;

201 }

202 } else {

203 for (size_t i = 0; i < length; ++i) {

204 bytes[i * 2] = 0;

205 bytes[i * 2 + 1] = characters[i];

206 }

207 }

208

209 return result;

210 }

211

212 } // namespace WTF

OLD	NEW

« no previous file with comments | « third_party/WebKit/Source/wtf/text/TextCodecUTF16.h ('k') | third_party/WebKit/Source/wtf/text/TextCodecUTF8.h » ('j') | no next file with comments »