Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(257)

Side by Side Diff: third_party/WebKit/Source/wtf/text/TextEncodingRegistry.cpp

Issue 2764283002: Move files in wtf/ to platform/wtf/ (Part 10). (Closed)
Patch Set: Rebase. Created 3 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 /*
2 * Copyright (C) 2006, 2007, 2011 Apple Inc. All rights reserved.
3 * Copyright (C) 2007-2009 Torch Mobile, Inc.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
15 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
18 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27 #include "wtf/text/TextEncodingRegistry.h"
28
29 #include "wtf/ASCIICType.h"
30 #include "wtf/Atomics.h"
31 #include "wtf/CurrentTime.h"
32 #include "wtf/HashMap.h"
33 #include "wtf/HashSet.h"
34 #include "wtf/StdLibExtras.h"
35 #include "wtf/StringExtras.h"
36 #include "wtf/ThreadingPrimitives.h"
37 #include "wtf/text/CString.h"
38 #include "wtf/text/TextCodecICU.h"
39 #include "wtf/text/TextCodecLatin1.h"
40 #include "wtf/text/TextCodecReplacement.h"
41 #include "wtf/text/TextCodecUTF16.h"
42 #include "wtf/text/TextCodecUTF8.h"
43 #include "wtf/text/TextCodecUserDefined.h"
44 #include "wtf/text/TextEncoding.h"
45 #include <memory>
46
47 namespace WTF {
48
49 const size_t maxEncodingNameLength = 63;
50
51 // Hash for all-ASCII strings that does case folding.
52 struct TextEncodingNameHash {
53 static bool equal(const char* s1, const char* s2) {
54 char c1;
55 char c2;
56 do {
57 c1 = *s1++;
58 c2 = *s2++;
59 if (toASCIILower(c1) != toASCIILower(c2))
60 return false;
61 } while (c1 && c2);
62 return !c1 && !c2;
63 }
64
65 // This algorithm is the one-at-a-time hash from:
66 // http://burtleburtle.net/bob/hash/hashfaq.html
67 // http://burtleburtle.net/bob/hash/doobs.html
68 static unsigned hash(const char* s) {
69 unsigned h = WTF::stringHashingStartValue;
70 for (;;) {
71 char c = *s++;
72 if (!c) {
73 h += (h << 3);
74 h ^= (h >> 11);
75 h += (h << 15);
76 return h;
77 }
78 h += toASCIILower(c);
79 h += (h << 10);
80 h ^= (h >> 6);
81 }
82 }
83
84 static const bool safeToCompareToEmptyOrDeleted = false;
85 };
86
87 struct TextCodecFactory {
88 NewTextCodecFunction function;
89 const void* additionalData;
90 TextCodecFactory(NewTextCodecFunction f = 0, const void* d = 0)
91 : function(f), additionalData(d) {}
92 };
93
94 typedef HashMap<const char*, const char*, TextEncodingNameHash>
95 TextEncodingNameMap;
96 typedef HashMap<const char*, TextCodecFactory> TextCodecMap;
97
98 static Mutex& encodingRegistryMutex() {
99 // We don't have to use AtomicallyInitializedStatic here because
100 // this function is called on the main thread for any page before
101 // it is used in worker threads.
102 DEFINE_STATIC_LOCAL(Mutex, mutex, ());
103 return mutex;
104 }
105
106 static TextEncodingNameMap* textEncodingNameMap;
107 static TextCodecMap* textCodecMap;
108
109 namespace {
110 static unsigned didExtendTextCodecMaps = 0;
111
112 ALWAYS_INLINE unsigned atomicDidExtendTextCodecMaps() {
113 return acquireLoad(&didExtendTextCodecMaps);
114 }
115
116 ALWAYS_INLINE void atomicSetDidExtendTextCodecMaps() {
117 releaseStore(&didExtendTextCodecMaps, 1);
118 }
119 } // namespace
120
121 static const char textEncodingNameBlacklist[][6] = {"UTF-7"};
122
123 #if ERROR_DISABLED
124
125 static inline void checkExistingName(const char*, const char*) {}
126
127 #else
128
129 static void checkExistingName(const char* alias, const char* atomicName) {
130 const char* oldAtomicName = textEncodingNameMap->at(alias);
131 if (!oldAtomicName)
132 return;
133 if (oldAtomicName == atomicName)
134 return;
135 // Keep the warning silent about one case where we know this will happen.
136 if (strcmp(alias, "ISO-8859-8-I") == 0 &&
137 strcmp(oldAtomicName, "ISO-8859-8-I") == 0 &&
138 strcasecmp(atomicName, "iso-8859-8") == 0)
139 return;
140 LOG(ERROR) << "alias " << alias << " maps to " << oldAtomicName
141 << " already, but someone is trying to make it map to "
142 << atomicName;
143 }
144
145 #endif
146
147 static bool isUndesiredAlias(const char* alias) {
148 // Reject aliases with version numbers that are supported by some back-ends
149 // (such as "ISO_2022,locale=ja,version=0" in ICU).
150 for (const char* p = alias; *p; ++p) {
151 if (*p == ',')
152 return true;
153 }
154 // 8859_1 is known to (at least) ICU, but other browsers don't support this
155 // name - and having it caused a compatibility
156 // problem, see bug 43554.
157 if (0 == strcmp(alias, "8859_1"))
158 return true;
159 return false;
160 }
161
162 static void addToTextEncodingNameMap(const char* alias, const char* name) {
163 DCHECK_LE(strlen(alias), maxEncodingNameLength);
164 if (isUndesiredAlias(alias))
165 return;
166 const char* atomicName = textEncodingNameMap->at(name);
167 DCHECK(strcmp(alias, name) == 0 || atomicName);
168 if (!atomicName)
169 atomicName = name;
170 checkExistingName(alias, atomicName);
171 textEncodingNameMap->insert(alias, atomicName);
172 }
173
174 static void addToTextCodecMap(const char* name,
175 NewTextCodecFunction function,
176 const void* additionalData) {
177 const char* atomicName = textEncodingNameMap->at(name);
178 DCHECK(atomicName);
179 textCodecMap->insert(atomicName, TextCodecFactory(function, additionalData));
180 }
181
182 static void pruneBlacklistedCodecs() {
183 for (size_t i = 0; i < WTF_ARRAY_LENGTH(textEncodingNameBlacklist); ++i) {
184 const char* atomicName =
185 textEncodingNameMap->at(textEncodingNameBlacklist[i]);
186 if (!atomicName)
187 continue;
188
189 Vector<const char*> names;
190 TextEncodingNameMap::const_iterator it = textEncodingNameMap->begin();
191 TextEncodingNameMap::const_iterator end = textEncodingNameMap->end();
192 for (; it != end; ++it) {
193 if (it->value == atomicName)
194 names.push_back(it->key);
195 }
196
197 textEncodingNameMap->removeAll(names);
198
199 textCodecMap->erase(atomicName);
200 }
201 }
202
203 static void buildBaseTextCodecMaps() {
204 DCHECK(isMainThread());
205 DCHECK(!textCodecMap);
206 DCHECK(!textEncodingNameMap);
207
208 textCodecMap = new TextCodecMap;
209 textEncodingNameMap = new TextEncodingNameMap;
210
211 TextCodecLatin1::registerEncodingNames(addToTextEncodingNameMap);
212 TextCodecLatin1::registerCodecs(addToTextCodecMap);
213
214 TextCodecUTF8::registerEncodingNames(addToTextEncodingNameMap);
215 TextCodecUTF8::registerCodecs(addToTextCodecMap);
216
217 TextCodecUTF16::registerEncodingNames(addToTextEncodingNameMap);
218 TextCodecUTF16::registerCodecs(addToTextCodecMap);
219
220 TextCodecUserDefined::registerEncodingNames(addToTextEncodingNameMap);
221 TextCodecUserDefined::registerCodecs(addToTextCodecMap);
222 }
223
224 bool isReplacementEncoding(const char* alias) {
225 return alias && !strcasecmp(alias, "replacement");
226 }
227
228 bool isReplacementEncoding(const String& alias) {
229 return alias == "replacement";
230 }
231
232 static void extendTextCodecMaps() {
233 TextCodecReplacement::registerEncodingNames(addToTextEncodingNameMap);
234 TextCodecReplacement::registerCodecs(addToTextCodecMap);
235
236 TextCodecICU::registerEncodingNames(addToTextEncodingNameMap);
237 TextCodecICU::registerCodecs(addToTextCodecMap);
238
239 pruneBlacklistedCodecs();
240 }
241
242 std::unique_ptr<TextCodec> newTextCodec(const TextEncoding& encoding) {
243 MutexLocker lock(encodingRegistryMutex());
244
245 DCHECK(textCodecMap);
246 TextCodecFactory factory = textCodecMap->at(encoding.name());
247 DCHECK(factory.function);
248 return factory.function(encoding, factory.additionalData);
249 }
250
251 const char* atomicCanonicalTextEncodingName(const char* name) {
252 if (!name || !name[0])
253 return 0;
254 if (!textEncodingNameMap)
255 buildBaseTextCodecMaps();
256
257 MutexLocker lock(encodingRegistryMutex());
258
259 if (const char* atomicName = textEncodingNameMap->at(name))
260 return atomicName;
261 if (atomicDidExtendTextCodecMaps())
262 return 0;
263 extendTextCodecMaps();
264 atomicSetDidExtendTextCodecMaps();
265 return textEncodingNameMap->at(name);
266 }
267
268 template <typename CharacterType>
269 const char* atomicCanonicalTextEncodingName(const CharacterType* characters,
270 size_t length) {
271 char buffer[maxEncodingNameLength + 1];
272 size_t j = 0;
273 for (size_t i = 0; i < length; ++i) {
274 char c = static_cast<char>(characters[i]);
275 if (j == maxEncodingNameLength || c != characters[i])
276 return 0;
277 buffer[j++] = c;
278 }
279 buffer[j] = 0;
280 return atomicCanonicalTextEncodingName(buffer);
281 }
282
283 const char* atomicCanonicalTextEncodingName(const String& alias) {
284 if (!alias.length())
285 return 0;
286
287 if (alias.contains('\0'))
288 return 0;
289
290 if (alias.is8Bit())
291 return atomicCanonicalTextEncodingName<LChar>(alias.characters8(),
292 alias.length());
293
294 return atomicCanonicalTextEncodingName<UChar>(alias.characters16(),
295 alias.length());
296 }
297
298 bool noExtendedTextEncodingNameUsed() {
299 return !atomicDidExtendTextCodecMaps();
300 }
301
302 #ifndef NDEBUG
303 void dumpTextEncodingNameMap() {
304 unsigned size = textEncodingNameMap->size();
305 fprintf(stderr, "Dumping %u entries in WTF::TextEncodingNameMap...\n", size);
306
307 MutexLocker lock(encodingRegistryMutex());
308
309 TextEncodingNameMap::const_iterator it = textEncodingNameMap->begin();
310 TextEncodingNameMap::const_iterator end = textEncodingNameMap->end();
311 for (; it != end; ++it)
312 fprintf(stderr, "'%s' => '%s'\n", it->key, it->value);
313 }
314 #endif
315
316 } // namespace WTF
OLDNEW
« no previous file with comments | « third_party/WebKit/Source/wtf/text/TextEncodingRegistry.h ('k') | third_party/WebKit/Source/wtf/text/TextPosition.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698