OLD | NEW |
| (Empty) |
1 /* | |
2 * Copyright (C) 2007, 2008 Apple, Inc. All rights reserved. | |
3 * | |
4 * Redistribution and use in source and binary forms, with or without | |
5 * modification, are permitted provided that the following conditions | |
6 * are met: | |
7 * 1. Redistributions of source code must retain the above copyright | |
8 * notice, this list of conditions and the following disclaimer. | |
9 * 2. Redistributions in binary form must reproduce the above copyright | |
10 * notice, this list of conditions and the following disclaimer in the | |
11 * documentation and/or other materials provided with the distribution. | |
12 * | |
13 * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY | |
14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | |
16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR | |
17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, | |
18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, | |
19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR | |
20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY | |
21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
24 */ | |
25 | |
26 #include "wtf/text/TextCodecUserDefined.h" | |
27 | |
28 #include "wtf/PtrUtil.h" | |
29 #include "wtf/text/CString.h" | |
30 #include "wtf/text/StringBuffer.h" | |
31 #include "wtf/text/StringBuilder.h" | |
32 #include "wtf/text/WTFString.h" | |
33 #include <memory> | |
34 | |
35 namespace WTF { | |
36 | |
37 void TextCodecUserDefined::registerEncodingNames( | |
38 EncodingNameRegistrar registrar) { | |
39 registrar("x-user-defined", "x-user-defined"); | |
40 } | |
41 | |
42 static std::unique_ptr<TextCodec> newStreamingTextDecoderUserDefined( | |
43 const TextEncoding&, | |
44 const void*) { | |
45 return WTF::wrapUnique(new TextCodecUserDefined); | |
46 } | |
47 | |
48 void TextCodecUserDefined::registerCodecs(TextCodecRegistrar registrar) { | |
49 registrar("x-user-defined", newStreamingTextDecoderUserDefined, 0); | |
50 } | |
51 | |
52 String TextCodecUserDefined::decode(const char* bytes, | |
53 size_t length, | |
54 FlushBehavior, | |
55 bool, | |
56 bool&) { | |
57 StringBuilder result; | |
58 result.reserveCapacity(length); | |
59 | |
60 for (size_t i = 0; i < length; ++i) { | |
61 signed char c = bytes[i]; | |
62 result.append(static_cast<UChar>(c & 0xF7FF)); | |
63 } | |
64 | |
65 return result.toString(); | |
66 } | |
67 | |
68 template <typename CharType> | |
69 static CString encodeComplexUserDefined(const CharType* characters, | |
70 size_t length, | |
71 UnencodableHandling handling) { | |
72 size_t targetLength = length; | |
73 Vector<char> result(targetLength); | |
74 char* bytes = result.data(); | |
75 | |
76 size_t resultLength = 0; | |
77 for (size_t i = 0; i < length;) { | |
78 UChar32 c; | |
79 // TODO(jsbell): Will the input for x-user-defined ever be LChars? | |
80 U16_NEXT(characters, i, length, c); | |
81 // If the input was a surrogate pair (non-BMP character) then we | |
82 // overestimated the length. | |
83 if (c > 0xffff) | |
84 --targetLength; | |
85 signed char signedByte = static_cast<signed char>(c); | |
86 if ((signedByte & 0xF7FF) == c) { | |
87 bytes[resultLength++] = signedByte; | |
88 } else { | |
89 // No way to encode this character with x-user-defined. | |
90 UnencodableReplacementArray replacement; | |
91 int replacementLength = | |
92 TextCodec::getUnencodableReplacement(c, handling, replacement); | |
93 DCHECK_GT(replacementLength, 0); | |
94 // Only one char was initially reserved per input character, so grow if | |
95 // necessary. Note that in the case of surrogate pairs and | |
96 // QuestionMarksForUnencodables the result length may be shorter than | |
97 // the input length. | |
98 targetLength += replacementLength - 1; | |
99 if (targetLength > result.size()) { | |
100 result.grow(targetLength); | |
101 bytes = result.data(); | |
102 } | |
103 memcpy(bytes + resultLength, replacement, replacementLength); | |
104 resultLength += replacementLength; | |
105 } | |
106 } | |
107 | |
108 return CString(bytes, resultLength); | |
109 } | |
110 | |
111 template <typename CharType> | |
112 CString TextCodecUserDefined::encodeCommon(const CharType* characters, | |
113 size_t length, | |
114 UnencodableHandling handling) { | |
115 char* bytes; | |
116 CString result = CString::createUninitialized(length, bytes); | |
117 | |
118 // Convert the string a fast way and simultaneously do an efficient check to | |
119 // see if it's all ASCII. | |
120 UChar ored = 0; | |
121 for (size_t i = 0; i < length; ++i) { | |
122 UChar c = characters[i]; | |
123 bytes[i] = static_cast<char>(c); | |
124 ored |= c; | |
125 } | |
126 | |
127 if (!(ored & 0xFF80)) | |
128 return result; | |
129 | |
130 // If it wasn't all ASCII, call the function that handles more-complex cases. | |
131 return encodeComplexUserDefined(characters, length, handling); | |
132 } | |
133 | |
134 CString TextCodecUserDefined::encode(const UChar* characters, | |
135 size_t length, | |
136 UnencodableHandling handling) { | |
137 return encodeCommon(characters, length, handling); | |
138 } | |
139 | |
140 CString TextCodecUserDefined::encode(const LChar* characters, | |
141 size_t length, | |
142 UnencodableHandling handling) { | |
143 return encodeCommon(characters, length, handling); | |
144 } | |
145 | |
146 } // namespace WTF | |
OLD | NEW |