OLD | NEW |
| (Empty) |
1 /* | |
2 * Copyright (C) 2006, 2007, 2011 Apple Inc. All rights reserved. | |
3 * Copyright (C) 2007-2009 Torch Mobile, Inc. | |
4 * | |
5 * Redistribution and use in source and binary forms, with or without | |
6 * modification, are permitted provided that the following conditions | |
7 * are met: | |
8 * 1. Redistributions of source code must retain the above copyright | |
9 * notice, this list of conditions and the following disclaimer. | |
10 * 2. Redistributions in binary form must reproduce the above copyright | |
11 * notice, this list of conditions and the following disclaimer in the | |
12 * documentation and/or other materials provided with the distribution. | |
13 * | |
14 * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY | |
15 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | |
17 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR | |
18 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, | |
19 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, | |
20 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR | |
21 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY | |
22 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
25 */ | |
26 | |
27 #include "wtf/text/TextEncodingRegistry.h" | |
28 | |
29 #include "wtf/ASCIICType.h" | |
30 #include "wtf/Atomics.h" | |
31 #include "wtf/CurrentTime.h" | |
32 #include "wtf/HashMap.h" | |
33 #include "wtf/HashSet.h" | |
34 #include "wtf/StdLibExtras.h" | |
35 #include "wtf/StringExtras.h" | |
36 #include "wtf/ThreadingPrimitives.h" | |
37 #include "wtf/text/CString.h" | |
38 #include "wtf/text/TextCodecICU.h" | |
39 #include "wtf/text/TextCodecLatin1.h" | |
40 #include "wtf/text/TextCodecReplacement.h" | |
41 #include "wtf/text/TextCodecUTF16.h" | |
42 #include "wtf/text/TextCodecUTF8.h" | |
43 #include "wtf/text/TextCodecUserDefined.h" | |
44 #include "wtf/text/TextEncoding.h" | |
45 #include <memory> | |
46 | |
47 namespace WTF { | |
48 | |
49 const size_t maxEncodingNameLength = 63; | |
50 | |
51 // Hash for all-ASCII strings that does case folding. | |
52 struct TextEncodingNameHash { | |
53 static bool equal(const char* s1, const char* s2) { | |
54 char c1; | |
55 char c2; | |
56 do { | |
57 c1 = *s1++; | |
58 c2 = *s2++; | |
59 if (toASCIILower(c1) != toASCIILower(c2)) | |
60 return false; | |
61 } while (c1 && c2); | |
62 return !c1 && !c2; | |
63 } | |
64 | |
65 // This algorithm is the one-at-a-time hash from: | |
66 // http://burtleburtle.net/bob/hash/hashfaq.html | |
67 // http://burtleburtle.net/bob/hash/doobs.html | |
68 static unsigned hash(const char* s) { | |
69 unsigned h = WTF::stringHashingStartValue; | |
70 for (;;) { | |
71 char c = *s++; | |
72 if (!c) { | |
73 h += (h << 3); | |
74 h ^= (h >> 11); | |
75 h += (h << 15); | |
76 return h; | |
77 } | |
78 h += toASCIILower(c); | |
79 h += (h << 10); | |
80 h ^= (h >> 6); | |
81 } | |
82 } | |
83 | |
84 static const bool safeToCompareToEmptyOrDeleted = false; | |
85 }; | |
86 | |
87 struct TextCodecFactory { | |
88 NewTextCodecFunction function; | |
89 const void* additionalData; | |
90 TextCodecFactory(NewTextCodecFunction f = 0, const void* d = 0) | |
91 : function(f), additionalData(d) {} | |
92 }; | |
93 | |
94 typedef HashMap<const char*, const char*, TextEncodingNameHash> | |
95 TextEncodingNameMap; | |
96 typedef HashMap<const char*, TextCodecFactory> TextCodecMap; | |
97 | |
98 static Mutex& encodingRegistryMutex() { | |
99 // We don't have to use AtomicallyInitializedStatic here because | |
100 // this function is called on the main thread for any page before | |
101 // it is used in worker threads. | |
102 DEFINE_STATIC_LOCAL(Mutex, mutex, ()); | |
103 return mutex; | |
104 } | |
105 | |
106 static TextEncodingNameMap* textEncodingNameMap; | |
107 static TextCodecMap* textCodecMap; | |
108 | |
109 namespace { | |
110 static unsigned didExtendTextCodecMaps = 0; | |
111 | |
112 ALWAYS_INLINE unsigned atomicDidExtendTextCodecMaps() { | |
113 return acquireLoad(&didExtendTextCodecMaps); | |
114 } | |
115 | |
116 ALWAYS_INLINE void atomicSetDidExtendTextCodecMaps() { | |
117 releaseStore(&didExtendTextCodecMaps, 1); | |
118 } | |
119 } // namespace | |
120 | |
121 static const char textEncodingNameBlacklist[][6] = {"UTF-7"}; | |
122 | |
123 #if ERROR_DISABLED | |
124 | |
125 static inline void checkExistingName(const char*, const char*) {} | |
126 | |
127 #else | |
128 | |
129 static void checkExistingName(const char* alias, const char* atomicName) { | |
130 const char* oldAtomicName = textEncodingNameMap->at(alias); | |
131 if (!oldAtomicName) | |
132 return; | |
133 if (oldAtomicName == atomicName) | |
134 return; | |
135 // Keep the warning silent about one case where we know this will happen. | |
136 if (strcmp(alias, "ISO-8859-8-I") == 0 && | |
137 strcmp(oldAtomicName, "ISO-8859-8-I") == 0 && | |
138 strcasecmp(atomicName, "iso-8859-8") == 0) | |
139 return; | |
140 LOG(ERROR) << "alias " << alias << " maps to " << oldAtomicName | |
141 << " already, but someone is trying to make it map to " | |
142 << atomicName; | |
143 } | |
144 | |
145 #endif | |
146 | |
147 static bool isUndesiredAlias(const char* alias) { | |
148 // Reject aliases with version numbers that are supported by some back-ends | |
149 // (such as "ISO_2022,locale=ja,version=0" in ICU). | |
150 for (const char* p = alias; *p; ++p) { | |
151 if (*p == ',') | |
152 return true; | |
153 } | |
154 // 8859_1 is known to (at least) ICU, but other browsers don't support this | |
155 // name - and having it caused a compatibility | |
156 // problem, see bug 43554. | |
157 if (0 == strcmp(alias, "8859_1")) | |
158 return true; | |
159 return false; | |
160 } | |
161 | |
162 static void addToTextEncodingNameMap(const char* alias, const char* name) { | |
163 DCHECK_LE(strlen(alias), maxEncodingNameLength); | |
164 if (isUndesiredAlias(alias)) | |
165 return; | |
166 const char* atomicName = textEncodingNameMap->at(name); | |
167 DCHECK(strcmp(alias, name) == 0 || atomicName); | |
168 if (!atomicName) | |
169 atomicName = name; | |
170 checkExistingName(alias, atomicName); | |
171 textEncodingNameMap->insert(alias, atomicName); | |
172 } | |
173 | |
174 static void addToTextCodecMap(const char* name, | |
175 NewTextCodecFunction function, | |
176 const void* additionalData) { | |
177 const char* atomicName = textEncodingNameMap->at(name); | |
178 DCHECK(atomicName); | |
179 textCodecMap->insert(atomicName, TextCodecFactory(function, additionalData)); | |
180 } | |
181 | |
182 static void pruneBlacklistedCodecs() { | |
183 for (size_t i = 0; i < WTF_ARRAY_LENGTH(textEncodingNameBlacklist); ++i) { | |
184 const char* atomicName = | |
185 textEncodingNameMap->at(textEncodingNameBlacklist[i]); | |
186 if (!atomicName) | |
187 continue; | |
188 | |
189 Vector<const char*> names; | |
190 TextEncodingNameMap::const_iterator it = textEncodingNameMap->begin(); | |
191 TextEncodingNameMap::const_iterator end = textEncodingNameMap->end(); | |
192 for (; it != end; ++it) { | |
193 if (it->value == atomicName) | |
194 names.push_back(it->key); | |
195 } | |
196 | |
197 textEncodingNameMap->removeAll(names); | |
198 | |
199 textCodecMap->erase(atomicName); | |
200 } | |
201 } | |
202 | |
203 static void buildBaseTextCodecMaps() { | |
204 DCHECK(isMainThread()); | |
205 DCHECK(!textCodecMap); | |
206 DCHECK(!textEncodingNameMap); | |
207 | |
208 textCodecMap = new TextCodecMap; | |
209 textEncodingNameMap = new TextEncodingNameMap; | |
210 | |
211 TextCodecLatin1::registerEncodingNames(addToTextEncodingNameMap); | |
212 TextCodecLatin1::registerCodecs(addToTextCodecMap); | |
213 | |
214 TextCodecUTF8::registerEncodingNames(addToTextEncodingNameMap); | |
215 TextCodecUTF8::registerCodecs(addToTextCodecMap); | |
216 | |
217 TextCodecUTF16::registerEncodingNames(addToTextEncodingNameMap); | |
218 TextCodecUTF16::registerCodecs(addToTextCodecMap); | |
219 | |
220 TextCodecUserDefined::registerEncodingNames(addToTextEncodingNameMap); | |
221 TextCodecUserDefined::registerCodecs(addToTextCodecMap); | |
222 } | |
223 | |
224 bool isReplacementEncoding(const char* alias) { | |
225 return alias && !strcasecmp(alias, "replacement"); | |
226 } | |
227 | |
228 bool isReplacementEncoding(const String& alias) { | |
229 return alias == "replacement"; | |
230 } | |
231 | |
232 static void extendTextCodecMaps() { | |
233 TextCodecReplacement::registerEncodingNames(addToTextEncodingNameMap); | |
234 TextCodecReplacement::registerCodecs(addToTextCodecMap); | |
235 | |
236 TextCodecICU::registerEncodingNames(addToTextEncodingNameMap); | |
237 TextCodecICU::registerCodecs(addToTextCodecMap); | |
238 | |
239 pruneBlacklistedCodecs(); | |
240 } | |
241 | |
242 std::unique_ptr<TextCodec> newTextCodec(const TextEncoding& encoding) { | |
243 MutexLocker lock(encodingRegistryMutex()); | |
244 | |
245 DCHECK(textCodecMap); | |
246 TextCodecFactory factory = textCodecMap->at(encoding.name()); | |
247 DCHECK(factory.function); | |
248 return factory.function(encoding, factory.additionalData); | |
249 } | |
250 | |
251 const char* atomicCanonicalTextEncodingName(const char* name) { | |
252 if (!name || !name[0]) | |
253 return 0; | |
254 if (!textEncodingNameMap) | |
255 buildBaseTextCodecMaps(); | |
256 | |
257 MutexLocker lock(encodingRegistryMutex()); | |
258 | |
259 if (const char* atomicName = textEncodingNameMap->at(name)) | |
260 return atomicName; | |
261 if (atomicDidExtendTextCodecMaps()) | |
262 return 0; | |
263 extendTextCodecMaps(); | |
264 atomicSetDidExtendTextCodecMaps(); | |
265 return textEncodingNameMap->at(name); | |
266 } | |
267 | |
268 template <typename CharacterType> | |
269 const char* atomicCanonicalTextEncodingName(const CharacterType* characters, | |
270 size_t length) { | |
271 char buffer[maxEncodingNameLength + 1]; | |
272 size_t j = 0; | |
273 for (size_t i = 0; i < length; ++i) { | |
274 char c = static_cast<char>(characters[i]); | |
275 if (j == maxEncodingNameLength || c != characters[i]) | |
276 return 0; | |
277 buffer[j++] = c; | |
278 } | |
279 buffer[j] = 0; | |
280 return atomicCanonicalTextEncodingName(buffer); | |
281 } | |
282 | |
283 const char* atomicCanonicalTextEncodingName(const String& alias) { | |
284 if (!alias.length()) | |
285 return 0; | |
286 | |
287 if (alias.contains('\0')) | |
288 return 0; | |
289 | |
290 if (alias.is8Bit()) | |
291 return atomicCanonicalTextEncodingName<LChar>(alias.characters8(), | |
292 alias.length()); | |
293 | |
294 return atomicCanonicalTextEncodingName<UChar>(alias.characters16(), | |
295 alias.length()); | |
296 } | |
297 | |
298 bool noExtendedTextEncodingNameUsed() { | |
299 return !atomicDidExtendTextCodecMaps(); | |
300 } | |
301 | |
302 #ifndef NDEBUG | |
303 void dumpTextEncodingNameMap() { | |
304 unsigned size = textEncodingNameMap->size(); | |
305 fprintf(stderr, "Dumping %u entries in WTF::TextEncodingNameMap...\n", size); | |
306 | |
307 MutexLocker lock(encodingRegistryMutex()); | |
308 | |
309 TextEncodingNameMap::const_iterator it = textEncodingNameMap->begin(); | |
310 TextEncodingNameMap::const_iterator end = textEncodingNameMap->end(); | |
311 for (; it != end; ++it) | |
312 fprintf(stderr, "'%s' => '%s'\n", it->key, it->value); | |
313 } | |
314 #endif | |
315 | |
316 } // namespace WTF | |
OLD | NEW |