Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(339)

Side by Side Diff: third_party/WebKit/Source/wtf/text/TextEncodingRegistry.cpp

Issue 1611343002: wtf reformat test Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: pydent Created 4 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 /* 1 /*
2 * Copyright (C) 2006, 2007, 2011 Apple Inc. All rights reserved. 2 * Copyright (C) 2006, 2007, 2011 Apple Inc. All rights reserved.
3 * Copyright (C) 2007-2009 Torch Mobile, Inc. 3 * Copyright (C) 2007-2009 Torch Mobile, Inc.
4 * 4 *
5 * Redistribution and use in source and binary forms, with or without 5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions 6 * modification, are permitted provided that the following conditions
7 * are met: 7 * are met:
8 * 1. Redistributions of source code must retain the above copyright 8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer. 9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright 10 * 2. Redistributions in binary form must reproduce the above copyright
(...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after
43 #include "wtf/text/TextCodecUTF8.h" 43 #include "wtf/text/TextCodecUTF8.h"
44 #include "wtf/text/TextCodecUserDefined.h" 44 #include "wtf/text/TextCodecUserDefined.h"
45 #include "wtf/text/TextEncoding.h" 45 #include "wtf/text/TextEncoding.h"
46 46
47 namespace WTF { 47 namespace WTF {
48 48
49 const size_t maxEncodingNameLength = 63; 49 const size_t maxEncodingNameLength = 63;
50 50
51 // Hash for all-ASCII strings that does case folding. 51 // Hash for all-ASCII strings that does case folding.
52 struct TextEncodingNameHash { 52 struct TextEncodingNameHash {
53 static bool equal(const char* s1, const char* s2) 53 static bool equal(const char* s1, const char* s2) {
54 { 54 char c1;
55 char c1; 55 char c2;
56 char c2; 56 do {
57 do {
58 #if defined(_MSC_FULL_VER) && _MSC_FULL_VER == 170051106 57 #if defined(_MSC_FULL_VER) && _MSC_FULL_VER == 170051106
59 // Workaround for a bug in the VS2012 Update 1 optimizer, remove onc e the fix is released. 58 // Workaround for a bug in the VS2012 Update 1 optimizer, remove once the fix is released.
60 // https://connect.microsoft.com/VisualStudio/feedback/details/77753 3/vs2012-c-optimizing-bug-when-using-inline-and-char-return-type-x86-target-only 59 // https://connect.microsoft.com/VisualStudio/feedback/details/777533/vs20 12-c-optimizing-bug-when-using-inline-and-char-return-type-x86-target-only
61 c1 = toASCIILower(*s1++); 60 c1 = toASCIILower(*s1++);
62 c2 = toASCIILower(*s2++); 61 c2 = toASCIILower(*s2++);
63 if (c1 != c2) 62 if (c1 != c2)
64 return false; 63 return false;
65 #else 64 #else
66 c1 = *s1++; 65 c1 = *s1++;
67 c2 = *s2++; 66 c2 = *s2++;
68 if (toASCIILower(c1) != toASCIILower(c2)) 67 if (toASCIILower(c1) != toASCIILower(c2))
69 return false; 68 return false;
70 #endif 69 #endif
71 } while (c1 && c2); 70 } while (c1 && c2);
72 return !c1 && !c2; 71 return !c1 && !c2;
72 }
73
74 // This algorithm is the one-at-a-time hash from:
75 // http://burtleburtle.net/bob/hash/hashfaq.html
76 // http://burtleburtle.net/bob/hash/doobs.html
77 static unsigned hash(const char* s) {
78 unsigned h = WTF::stringHashingStartValue;
79 for (;;) {
80 char c = *s++;
81 if (!c) {
82 h += (h << 3);
83 h ^= (h >> 11);
84 h += (h << 15);
85 return h;
86 }
87 h += toASCIILower(c);
88 h += (h << 10);
89 h ^= (h >> 6);
73 } 90 }
91 }
74 92
75 // This algorithm is the one-at-a-time hash from: 93 static const bool safeToCompareToEmptyOrDeleted = false;
76 // http://burtleburtle.net/bob/hash/hashfaq.html
77 // http://burtleburtle.net/bob/hash/doobs.html
78 static unsigned hash(const char* s)
79 {
80 unsigned h = WTF::stringHashingStartValue;
81 for (;;) {
82 char c = *s++;
83 if (!c) {
84 h += (h << 3);
85 h ^= (h >> 11);
86 h += (h << 15);
87 return h;
88 }
89 h += toASCIILower(c);
90 h += (h << 10);
91 h ^= (h >> 6);
92 }
93 }
94
95 static const bool safeToCompareToEmptyOrDeleted = false;
96 }; 94 };
97 95
98 struct TextCodecFactory { 96 struct TextCodecFactory {
99 NewTextCodecFunction function; 97 NewTextCodecFunction function;
100 const void* additionalData; 98 const void* additionalData;
101 TextCodecFactory(NewTextCodecFunction f = 0, const void* d = 0) : function(f ), additionalData(d) { } 99 TextCodecFactory(NewTextCodecFunction f = 0, const void* d = 0)
100 : function(f), additionalData(d) {}
102 }; 101 };
103 102
104 typedef HashMap<const char*, const char*, TextEncodingNameHash> TextEncodingName Map; 103 typedef HashMap<const char*, const char*, TextEncodingNameHash>
104 TextEncodingNameMap;
105 typedef HashMap<const char*, TextCodecFactory> TextCodecMap; 105 typedef HashMap<const char*, TextCodecFactory> TextCodecMap;
106 106
107 static Mutex& encodingRegistryMutex() 107 static Mutex& encodingRegistryMutex() {
108 { 108 // We don't have to use AtomicallyInitializedStatic here because
109 // We don't have to use AtomicallyInitializedStatic here because 109 // this function is called on the main thread for any page before
110 // this function is called on the main thread for any page before 110 // it is used in worker threads.
111 // it is used in worker threads. 111 DEFINE_STATIC_LOCAL(Mutex, mutex, ());
112 DEFINE_STATIC_LOCAL(Mutex, mutex, ()); 112 return mutex;
113 return mutex;
114 } 113 }
115 114
116 static TextEncodingNameMap* textEncodingNameMap; 115 static TextEncodingNameMap* textEncodingNameMap;
117 static TextCodecMap* textCodecMap; 116 static TextCodecMap* textCodecMap;
118 117
119 namespace { 118 namespace {
120 static unsigned didExtendTextCodecMaps = 0; 119 static unsigned didExtendTextCodecMaps = 0;
121 120
122 ALWAYS_INLINE unsigned atomicDidExtendTextCodecMaps() 121 ALWAYS_INLINE unsigned atomicDidExtendTextCodecMaps() {
123 { 122 return acquireLoad(&didExtendTextCodecMaps);
124 return acquireLoad(&didExtendTextCodecMaps); 123 }
125 } 124
126 125 ALWAYS_INLINE void atomicSetDidExtendTextCodemMaps() {
127 ALWAYS_INLINE void atomicSetDidExtendTextCodemMaps() 126 releaseStore(&didExtendTextCodecMaps, 1);
128 { 127 }
129 releaseStore(&didExtendTextCodecMaps, 1); 128 } // namespace
130 } 129
131 } // namespace 130 static const char textEncodingNameBlacklist[][6] = {"UTF-7"};
132
133 static const char textEncodingNameBlacklist[][6] = { "UTF-7" };
134 131
135 #if ERROR_DISABLED 132 #if ERROR_DISABLED
136 133
137 static inline void checkExistingName(const char*, const char*) { } 134 static inline void checkExistingName(const char*, const char*) {}
138 135
139 #else 136 #else
140 137
141 static void checkExistingName(const char* alias, const char* atomicName) 138 static void checkExistingName(const char* alias, const char* atomicName) {
142 { 139 const char* oldAtomicName = textEncodingNameMap->get(alias);
143 const char* oldAtomicName = textEncodingNameMap->get(alias); 140 if (!oldAtomicName)
144 if (!oldAtomicName) 141 return;
145 return; 142 if (oldAtomicName == atomicName)
146 if (oldAtomicName == atomicName) 143 return;
147 return; 144 // Keep the warning silent about one case where we know this will happen.
148 // Keep the warning silent about one case where we know this will happen. 145 if (strcmp(alias, "ISO-8859-8-I") == 0 &&
149 if (strcmp(alias, "ISO-8859-8-I") == 0 146 strcmp(oldAtomicName, "ISO-8859-8-I") == 0 &&
150 && strcmp(oldAtomicName, "ISO-8859-8-I") == 0 147 strcasecmp(atomicName, "iso-8859-8") == 0)
151 && strcasecmp(atomicName, "iso-8859-8") == 0) 148 return;
152 return; 149 WTF_LOG_ERROR(
153 WTF_LOG_ERROR("alias %s maps to %s already, but someone is trying to make it map to %s", alias, oldAtomicName, atomicName); 150 "alias %s maps to %s already, but someone is trying to make it map to %s",
151 alias, oldAtomicName, atomicName);
154 } 152 }
155 153
156 #endif 154 #endif
157 155
158 static bool isUndesiredAlias(const char* alias) 156 static bool isUndesiredAlias(const char* alias) {
159 { 157 // Reject aliases with version numbers that are supported by some back-ends (s uch as "ISO_2022,locale=ja,version=0" in ICU).
160 // Reject aliases with version numbers that are supported by some back-ends (such as "ISO_2022,locale=ja,version=0" in ICU). 158 for (const char* p = alias; *p; ++p) {
161 for (const char* p = alias; *p; ++p) { 159 if (*p == ',')
162 if (*p == ',') 160 return true;
163 return true; 161 }
164 } 162 // 8859_1 is known to (at least) ICU, but other browsers don't support this na me - and having it caused a compatibility
165 // 8859_1 is known to (at least) ICU, but other browsers don't support this name - and having it caused a compatibility 163 // problem, see bug 43554.
166 // problem, see bug 43554. 164 if (0 == strcmp(alias, "8859_1"))
167 if (0 == strcmp(alias, "8859_1")) 165 return true;
168 return true; 166 return false;
169 return false; 167 }
170 } 168
171 169 static void addToTextEncodingNameMap(const char* alias, const char* name) {
172 static void addToTextEncodingNameMap(const char* alias, const char* name) 170 ASSERT(strlen(alias) <= maxEncodingNameLength);
173 { 171 if (isUndesiredAlias(alias))
174 ASSERT(strlen(alias) <= maxEncodingNameLength); 172 return;
175 if (isUndesiredAlias(alias)) 173 const char* atomicName = textEncodingNameMap->get(name);
176 return; 174 ASSERT(strcmp(alias, name) == 0 || atomicName);
177 const char* atomicName = textEncodingNameMap->get(name); 175 if (!atomicName)
178 ASSERT(strcmp(alias, name) == 0 || atomicName); 176 atomicName = name;
177 checkExistingName(alias, atomicName);
178 textEncodingNameMap->add(alias, atomicName);
179 }
180
181 static void addToTextCodecMap(const char* name,
182 NewTextCodecFunction function,
183 const void* additionalData) {
184 const char* atomicName = textEncodingNameMap->get(name);
185 ASSERT(atomicName);
186 textCodecMap->add(atomicName, TextCodecFactory(function, additionalData));
187 }
188
189 static void pruneBlacklistedCodecs() {
190 for (size_t i = 0; i < WTF_ARRAY_LENGTH(textEncodingNameBlacklist); ++i) {
191 const char* atomicName =
192 textEncodingNameMap->get(textEncodingNameBlacklist[i]);
179 if (!atomicName) 193 if (!atomicName)
180 atomicName = name; 194 continue;
181 checkExistingName(alias, atomicName); 195
182 textEncodingNameMap->add(alias, atomicName); 196 Vector<const char*> names;
183 }
184
185 static void addToTextCodecMap(const char* name, NewTextCodecFunction function, c onst void* additionalData)
186 {
187 const char* atomicName = textEncodingNameMap->get(name);
188 ASSERT(atomicName);
189 textCodecMap->add(atomicName, TextCodecFactory(function, additionalData));
190 }
191
192 static void pruneBlacklistedCodecs()
193 {
194 for (size_t i = 0; i < WTF_ARRAY_LENGTH(textEncodingNameBlacklist); ++i) {
195 const char* atomicName = textEncodingNameMap->get(textEncodingNameBlackl ist[i]);
196 if (!atomicName)
197 continue;
198
199 Vector<const char*> names;
200 TextEncodingNameMap::const_iterator it = textEncodingNameMap->begin();
201 TextEncodingNameMap::const_iterator end = textEncodingNameMap->end();
202 for (; it != end; ++it) {
203 if (it->value == atomicName)
204 names.append(it->key);
205 }
206
207 textEncodingNameMap->removeAll(names);
208
209 textCodecMap->remove(atomicName);
210 }
211 }
212
213 static void buildBaseTextCodecMaps()
214 {
215 ASSERT(isMainThread());
216 ASSERT(!textCodecMap);
217 ASSERT(!textEncodingNameMap);
218
219 textCodecMap = new TextCodecMap;
220 textEncodingNameMap = new TextEncodingNameMap;
221
222 TextCodecLatin1::registerEncodingNames(addToTextEncodingNameMap);
223 TextCodecLatin1::registerCodecs(addToTextCodecMap);
224
225 TextCodecUTF8::registerEncodingNames(addToTextEncodingNameMap);
226 TextCodecUTF8::registerCodecs(addToTextCodecMap);
227
228 TextCodecUTF16::registerEncodingNames(addToTextEncodingNameMap);
229 TextCodecUTF16::registerCodecs(addToTextCodecMap);
230
231 TextCodecUserDefined::registerEncodingNames(addToTextEncodingNameMap);
232 TextCodecUserDefined::registerCodecs(addToTextCodecMap);
233 }
234
235 bool isReplacementEncoding(const char* alias)
236 {
237 return alias && !strcasecmp(alias, "replacement");
238 }
239
240 bool isReplacementEncoding(const String& alias)
241 {
242 return alias == "replacement";
243 }
244
245 static void extendTextCodecMaps()
246 {
247 TextCodecReplacement::registerEncodingNames(addToTextEncodingNameMap);
248 TextCodecReplacement::registerCodecs(addToTextCodecMap);
249
250 TextCodecICU::registerEncodingNames(addToTextEncodingNameMap);
251 TextCodecICU::registerCodecs(addToTextCodecMap);
252
253 pruneBlacklistedCodecs();
254 }
255
256 PassOwnPtr<TextCodec> newTextCodec(const TextEncoding& encoding)
257 {
258 MutexLocker lock(encodingRegistryMutex());
259
260
261 ASSERT(textCodecMap);
262 TextCodecFactory factory = textCodecMap->get(encoding.name());
263 ASSERT(factory.function);
264 return factory.function(encoding, factory.additionalData);
265 }
266
267 const char* atomicCanonicalTextEncodingName(const char* name)
268 {
269 if (!name || !name[0])
270 return 0;
271 if (!textEncodingNameMap)
272 buildBaseTextCodecMaps();
273
274 MutexLocker lock(encodingRegistryMutex());
275
276 if (const char* atomicName = textEncodingNameMap->get(name))
277 return atomicName;
278 if (atomicDidExtendTextCodecMaps())
279 return 0;
280 extendTextCodecMaps();
281 atomicSetDidExtendTextCodemMaps();
282 return textEncodingNameMap->get(name);
283 }
284
285 template <typename CharacterType>
286 const char* atomicCanonicalTextEncodingName(const CharacterType* characters, siz e_t length)
287 {
288 char buffer[maxEncodingNameLength + 1];
289 size_t j = 0;
290 for (size_t i = 0; i < length; ++i) {
291 char c = static_cast<char>(characters[i]);
292 if (j == maxEncodingNameLength || c != characters[i])
293 return 0;
294 buffer[j++] = c;
295 }
296 buffer[j] = 0;
297 return atomicCanonicalTextEncodingName(buffer);
298 }
299
300 const char* atomicCanonicalTextEncodingName(const String& alias)
301 {
302 if (!alias.length())
303 return 0;
304
305 if (alias.contains(static_cast<UChar>('\0')))
306 return 0;
307
308 if (alias.is8Bit())
309 return atomicCanonicalTextEncodingName<LChar>(alias.characters8(), alias .length());
310
311 return atomicCanonicalTextEncodingName<UChar>(alias.characters16(), alias.le ngth());
312 }
313
314 bool noExtendedTextEncodingNameUsed()
315 {
316 return !atomicDidExtendTextCodecMaps();
317 }
318
319 #ifndef NDEBUG
320 void dumpTextEncodingNameMap()
321 {
322 unsigned size = textEncodingNameMap->size();
323 fprintf(stderr, "Dumping %u entries in WTF::TextEncodingNameMap...\n", size) ;
324
325 MutexLocker lock(encodingRegistryMutex());
326
327 TextEncodingNameMap::const_iterator it = textEncodingNameMap->begin(); 197 TextEncodingNameMap::const_iterator it = textEncodingNameMap->begin();
328 TextEncodingNameMap::const_iterator end = textEncodingNameMap->end(); 198 TextEncodingNameMap::const_iterator end = textEncodingNameMap->end();
329 for (; it != end; ++it) 199 for (; it != end; ++it) {
330 fprintf(stderr, "'%s' => '%s'\n", it->key, it->value); 200 if (it->value == atomicName)
201 names.append(it->key);
202 }
203
204 textEncodingNameMap->removeAll(names);
205
206 textCodecMap->remove(atomicName);
207 }
208 }
209
210 static void buildBaseTextCodecMaps() {
211 ASSERT(isMainThread());
212 ASSERT(!textCodecMap);
213 ASSERT(!textEncodingNameMap);
214
215 textCodecMap = new TextCodecMap;
216 textEncodingNameMap = new TextEncodingNameMap;
217
218 TextCodecLatin1::registerEncodingNames(addToTextEncodingNameMap);
219 TextCodecLatin1::registerCodecs(addToTextCodecMap);
220
221 TextCodecUTF8::registerEncodingNames(addToTextEncodingNameMap);
222 TextCodecUTF8::registerCodecs(addToTextCodecMap);
223
224 TextCodecUTF16::registerEncodingNames(addToTextEncodingNameMap);
225 TextCodecUTF16::registerCodecs(addToTextCodecMap);
226
227 TextCodecUserDefined::registerEncodingNames(addToTextEncodingNameMap);
228 TextCodecUserDefined::registerCodecs(addToTextCodecMap);
229 }
230
231 bool isReplacementEncoding(const char* alias) {
232 return alias && !strcasecmp(alias, "replacement");
233 }
234
235 bool isReplacementEncoding(const String& alias) {
236 return alias == "replacement";
237 }
238
239 static void extendTextCodecMaps() {
240 TextCodecReplacement::registerEncodingNames(addToTextEncodingNameMap);
241 TextCodecReplacement::registerCodecs(addToTextCodecMap);
242
243 TextCodecICU::registerEncodingNames(addToTextEncodingNameMap);
244 TextCodecICU::registerCodecs(addToTextCodecMap);
245
246 pruneBlacklistedCodecs();
247 }
248
249 PassOwnPtr<TextCodec> newTextCodec(const TextEncoding& encoding) {
250 MutexLocker lock(encodingRegistryMutex());
251
252 ASSERT(textCodecMap);
253 TextCodecFactory factory = textCodecMap->get(encoding.name());
254 ASSERT(factory.function);
255 return factory.function(encoding, factory.additionalData);
256 }
257
258 const char* atomicCanonicalTextEncodingName(const char* name) {
259 if (!name || !name[0])
260 return 0;
261 if (!textEncodingNameMap)
262 buildBaseTextCodecMaps();
263
264 MutexLocker lock(encodingRegistryMutex());
265
266 if (const char* atomicName = textEncodingNameMap->get(name))
267 return atomicName;
268 if (atomicDidExtendTextCodecMaps())
269 return 0;
270 extendTextCodecMaps();
271 atomicSetDidExtendTextCodemMaps();
272 return textEncodingNameMap->get(name);
273 }
274
275 template <typename CharacterType>
276 const char* atomicCanonicalTextEncodingName(const CharacterType* characters,
277 size_t length) {
278 char buffer[maxEncodingNameLength + 1];
279 size_t j = 0;
280 for (size_t i = 0; i < length; ++i) {
281 char c = static_cast<char>(characters[i]);
282 if (j == maxEncodingNameLength || c != characters[i])
283 return 0;
284 buffer[j++] = c;
285 }
286 buffer[j] = 0;
287 return atomicCanonicalTextEncodingName(buffer);
288 }
289
290 const char* atomicCanonicalTextEncodingName(const String& alias) {
291 if (!alias.length())
292 return 0;
293
294 if (alias.contains(static_cast<UChar>('\0')))
295 return 0;
296
297 if (alias.is8Bit())
298 return atomicCanonicalTextEncodingName<LChar>(alias.characters8(),
299 alias.length());
300
301 return atomicCanonicalTextEncodingName<UChar>(alias.characters16(),
302 alias.length());
303 }
304
305 bool noExtendedTextEncodingNameUsed() {
306 return !atomicDidExtendTextCodecMaps();
307 }
308
309 #ifndef NDEBUG
310 void dumpTextEncodingNameMap() {
311 unsigned size = textEncodingNameMap->size();
312 fprintf(stderr, "Dumping %u entries in WTF::TextEncodingNameMap...\n", size);
313
314 MutexLocker lock(encodingRegistryMutex());
315
316 TextEncodingNameMap::const_iterator it = textEncodingNameMap->begin();
317 TextEncodingNameMap::const_iterator end = textEncodingNameMap->end();
318 for (; it != end; ++it)
319 fprintf(stderr, "'%s' => '%s'\n", it->key, it->value);
331 } 320 }
332 #endif 321 #endif
333 322
334 } // namespace WTF 323 } // namespace WTF
OLDNEW
« no previous file with comments | « third_party/WebKit/Source/wtf/text/TextEncodingRegistry.h ('k') | third_party/WebKit/Source/wtf/text/TextPosition.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698