| OLD | NEW |
| (Empty) |
| 1 /* | |
| 2 * Copyright (C) 2006, 2007, 2011 Apple Inc. All rights reserved. | |
| 3 * Copyright (C) 2007-2009 Torch Mobile, Inc. | |
| 4 * | |
| 5 * Redistribution and use in source and binary forms, with or without | |
| 6 * modification, are permitted provided that the following conditions | |
| 7 * are met: | |
| 8 * 1. Redistributions of source code must retain the above copyright | |
| 9 * notice, this list of conditions and the following disclaimer. | |
| 10 * 2. Redistributions in binary form must reproduce the above copyright | |
| 11 * notice, this list of conditions and the following disclaimer in the | |
| 12 * documentation and/or other materials provided with the distribution. | |
| 13 * | |
| 14 * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY | |
| 15 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
| 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | |
| 17 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR | |
| 18 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, | |
| 19 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, | |
| 20 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR | |
| 21 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY | |
| 22 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
| 23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
| 24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
| 25 */ | |
| 26 | |
| 27 #include "wtf/text/TextEncodingRegistry.h" | |
| 28 | |
| 29 #include "wtf/ASCIICType.h" | |
| 30 #include "wtf/Atomics.h" | |
| 31 #include "wtf/CurrentTime.h" | |
| 32 #include "wtf/HashMap.h" | |
| 33 #include "wtf/HashSet.h" | |
| 34 #include "wtf/StdLibExtras.h" | |
| 35 #include "wtf/StringExtras.h" | |
| 36 #include "wtf/ThreadingPrimitives.h" | |
| 37 #include "wtf/text/CString.h" | |
| 38 #include "wtf/text/TextCodecICU.h" | |
| 39 #include "wtf/text/TextCodecLatin1.h" | |
| 40 #include "wtf/text/TextCodecReplacement.h" | |
| 41 #include "wtf/text/TextCodecUTF16.h" | |
| 42 #include "wtf/text/TextCodecUTF8.h" | |
| 43 #include "wtf/text/TextCodecUserDefined.h" | |
| 44 #include "wtf/text/TextEncoding.h" | |
| 45 #include <memory> | |
| 46 | |
| 47 namespace WTF { | |
| 48 | |
| 49 const size_t maxEncodingNameLength = 63; | |
| 50 | |
| 51 // Hash for all-ASCII strings that does case folding. | |
| 52 struct TextEncodingNameHash { | |
| 53 static bool equal(const char* s1, const char* s2) { | |
| 54 char c1; | |
| 55 char c2; | |
| 56 do { | |
| 57 c1 = *s1++; | |
| 58 c2 = *s2++; | |
| 59 if (toASCIILower(c1) != toASCIILower(c2)) | |
| 60 return false; | |
| 61 } while (c1 && c2); | |
| 62 return !c1 && !c2; | |
| 63 } | |
| 64 | |
| 65 // This algorithm is the one-at-a-time hash from: | |
| 66 // http://burtleburtle.net/bob/hash/hashfaq.html | |
| 67 // http://burtleburtle.net/bob/hash/doobs.html | |
| 68 static unsigned hash(const char* s) { | |
| 69 unsigned h = WTF::stringHashingStartValue; | |
| 70 for (;;) { | |
| 71 char c = *s++; | |
| 72 if (!c) { | |
| 73 h += (h << 3); | |
| 74 h ^= (h >> 11); | |
| 75 h += (h << 15); | |
| 76 return h; | |
| 77 } | |
| 78 h += toASCIILower(c); | |
| 79 h += (h << 10); | |
| 80 h ^= (h >> 6); | |
| 81 } | |
| 82 } | |
| 83 | |
| 84 static const bool safeToCompareToEmptyOrDeleted = false; | |
| 85 }; | |
| 86 | |
| 87 struct TextCodecFactory { | |
| 88 NewTextCodecFunction function; | |
| 89 const void* additionalData; | |
| 90 TextCodecFactory(NewTextCodecFunction f = 0, const void* d = 0) | |
| 91 : function(f), additionalData(d) {} | |
| 92 }; | |
| 93 | |
| 94 typedef HashMap<const char*, const char*, TextEncodingNameHash> | |
| 95 TextEncodingNameMap; | |
| 96 typedef HashMap<const char*, TextCodecFactory> TextCodecMap; | |
| 97 | |
| 98 static Mutex& encodingRegistryMutex() { | |
| 99 // We don't have to use AtomicallyInitializedStatic here because | |
| 100 // this function is called on the main thread for any page before | |
| 101 // it is used in worker threads. | |
| 102 DEFINE_STATIC_LOCAL(Mutex, mutex, ()); | |
| 103 return mutex; | |
| 104 } | |
| 105 | |
| 106 static TextEncodingNameMap* textEncodingNameMap; | |
| 107 static TextCodecMap* textCodecMap; | |
| 108 | |
| 109 namespace { | |
| 110 static unsigned didExtendTextCodecMaps = 0; | |
| 111 | |
| 112 ALWAYS_INLINE unsigned atomicDidExtendTextCodecMaps() { | |
| 113 return acquireLoad(&didExtendTextCodecMaps); | |
| 114 } | |
| 115 | |
| 116 ALWAYS_INLINE void atomicSetDidExtendTextCodecMaps() { | |
| 117 releaseStore(&didExtendTextCodecMaps, 1); | |
| 118 } | |
| 119 } // namespace | |
| 120 | |
| 121 static const char textEncodingNameBlacklist[][6] = {"UTF-7"}; | |
| 122 | |
| 123 #if ERROR_DISABLED | |
| 124 | |
| 125 static inline void checkExistingName(const char*, const char*) {} | |
| 126 | |
| 127 #else | |
| 128 | |
| 129 static void checkExistingName(const char* alias, const char* atomicName) { | |
| 130 const char* oldAtomicName = textEncodingNameMap->at(alias); | |
| 131 if (!oldAtomicName) | |
| 132 return; | |
| 133 if (oldAtomicName == atomicName) | |
| 134 return; | |
| 135 // Keep the warning silent about one case where we know this will happen. | |
| 136 if (strcmp(alias, "ISO-8859-8-I") == 0 && | |
| 137 strcmp(oldAtomicName, "ISO-8859-8-I") == 0 && | |
| 138 strcasecmp(atomicName, "iso-8859-8") == 0) | |
| 139 return; | |
| 140 LOG(ERROR) << "alias " << alias << " maps to " << oldAtomicName | |
| 141 << " already, but someone is trying to make it map to " | |
| 142 << atomicName; | |
| 143 } | |
| 144 | |
| 145 #endif | |
| 146 | |
| 147 static bool isUndesiredAlias(const char* alias) { | |
| 148 // Reject aliases with version numbers that are supported by some back-ends | |
| 149 // (such as "ISO_2022,locale=ja,version=0" in ICU). | |
| 150 for (const char* p = alias; *p; ++p) { | |
| 151 if (*p == ',') | |
| 152 return true; | |
| 153 } | |
| 154 // 8859_1 is known to (at least) ICU, but other browsers don't support this | |
| 155 // name - and having it caused a compatibility | |
| 156 // problem, see bug 43554. | |
| 157 if (0 == strcmp(alias, "8859_1")) | |
| 158 return true; | |
| 159 return false; | |
| 160 } | |
| 161 | |
| 162 static void addToTextEncodingNameMap(const char* alias, const char* name) { | |
| 163 DCHECK_LE(strlen(alias), maxEncodingNameLength); | |
| 164 if (isUndesiredAlias(alias)) | |
| 165 return; | |
| 166 const char* atomicName = textEncodingNameMap->at(name); | |
| 167 DCHECK(strcmp(alias, name) == 0 || atomicName); | |
| 168 if (!atomicName) | |
| 169 atomicName = name; | |
| 170 checkExistingName(alias, atomicName); | |
| 171 textEncodingNameMap->insert(alias, atomicName); | |
| 172 } | |
| 173 | |
| 174 static void addToTextCodecMap(const char* name, | |
| 175 NewTextCodecFunction function, | |
| 176 const void* additionalData) { | |
| 177 const char* atomicName = textEncodingNameMap->at(name); | |
| 178 DCHECK(atomicName); | |
| 179 textCodecMap->insert(atomicName, TextCodecFactory(function, additionalData)); | |
| 180 } | |
| 181 | |
| 182 static void pruneBlacklistedCodecs() { | |
| 183 for (size_t i = 0; i < WTF_ARRAY_LENGTH(textEncodingNameBlacklist); ++i) { | |
| 184 const char* atomicName = | |
| 185 textEncodingNameMap->at(textEncodingNameBlacklist[i]); | |
| 186 if (!atomicName) | |
| 187 continue; | |
| 188 | |
| 189 Vector<const char*> names; | |
| 190 TextEncodingNameMap::const_iterator it = textEncodingNameMap->begin(); | |
| 191 TextEncodingNameMap::const_iterator end = textEncodingNameMap->end(); | |
| 192 for (; it != end; ++it) { | |
| 193 if (it->value == atomicName) | |
| 194 names.push_back(it->key); | |
| 195 } | |
| 196 | |
| 197 textEncodingNameMap->removeAll(names); | |
| 198 | |
| 199 textCodecMap->erase(atomicName); | |
| 200 } | |
| 201 } | |
| 202 | |
| 203 static void buildBaseTextCodecMaps() { | |
| 204 DCHECK(isMainThread()); | |
| 205 DCHECK(!textCodecMap); | |
| 206 DCHECK(!textEncodingNameMap); | |
| 207 | |
| 208 textCodecMap = new TextCodecMap; | |
| 209 textEncodingNameMap = new TextEncodingNameMap; | |
| 210 | |
| 211 TextCodecLatin1::registerEncodingNames(addToTextEncodingNameMap); | |
| 212 TextCodecLatin1::registerCodecs(addToTextCodecMap); | |
| 213 | |
| 214 TextCodecUTF8::registerEncodingNames(addToTextEncodingNameMap); | |
| 215 TextCodecUTF8::registerCodecs(addToTextCodecMap); | |
| 216 | |
| 217 TextCodecUTF16::registerEncodingNames(addToTextEncodingNameMap); | |
| 218 TextCodecUTF16::registerCodecs(addToTextCodecMap); | |
| 219 | |
| 220 TextCodecUserDefined::registerEncodingNames(addToTextEncodingNameMap); | |
| 221 TextCodecUserDefined::registerCodecs(addToTextCodecMap); | |
| 222 } | |
| 223 | |
| 224 bool isReplacementEncoding(const char* alias) { | |
| 225 return alias && !strcasecmp(alias, "replacement"); | |
| 226 } | |
| 227 | |
| 228 bool isReplacementEncoding(const String& alias) { | |
| 229 return alias == "replacement"; | |
| 230 } | |
| 231 | |
| 232 static void extendTextCodecMaps() { | |
| 233 TextCodecReplacement::registerEncodingNames(addToTextEncodingNameMap); | |
| 234 TextCodecReplacement::registerCodecs(addToTextCodecMap); | |
| 235 | |
| 236 TextCodecICU::registerEncodingNames(addToTextEncodingNameMap); | |
| 237 TextCodecICU::registerCodecs(addToTextCodecMap); | |
| 238 | |
| 239 pruneBlacklistedCodecs(); | |
| 240 } | |
| 241 | |
| 242 std::unique_ptr<TextCodec> newTextCodec(const TextEncoding& encoding) { | |
| 243 MutexLocker lock(encodingRegistryMutex()); | |
| 244 | |
| 245 DCHECK(textCodecMap); | |
| 246 TextCodecFactory factory = textCodecMap->at(encoding.name()); | |
| 247 DCHECK(factory.function); | |
| 248 return factory.function(encoding, factory.additionalData); | |
| 249 } | |
| 250 | |
| 251 const char* atomicCanonicalTextEncodingName(const char* name) { | |
| 252 if (!name || !name[0]) | |
| 253 return 0; | |
| 254 if (!textEncodingNameMap) | |
| 255 buildBaseTextCodecMaps(); | |
| 256 | |
| 257 MutexLocker lock(encodingRegistryMutex()); | |
| 258 | |
| 259 if (const char* atomicName = textEncodingNameMap->at(name)) | |
| 260 return atomicName; | |
| 261 if (atomicDidExtendTextCodecMaps()) | |
| 262 return 0; | |
| 263 extendTextCodecMaps(); | |
| 264 atomicSetDidExtendTextCodecMaps(); | |
| 265 return textEncodingNameMap->at(name); | |
| 266 } | |
| 267 | |
| 268 template <typename CharacterType> | |
| 269 const char* atomicCanonicalTextEncodingName(const CharacterType* characters, | |
| 270 size_t length) { | |
| 271 char buffer[maxEncodingNameLength + 1]; | |
| 272 size_t j = 0; | |
| 273 for (size_t i = 0; i < length; ++i) { | |
| 274 char c = static_cast<char>(characters[i]); | |
| 275 if (j == maxEncodingNameLength || c != characters[i]) | |
| 276 return 0; | |
| 277 buffer[j++] = c; | |
| 278 } | |
| 279 buffer[j] = 0; | |
| 280 return atomicCanonicalTextEncodingName(buffer); | |
| 281 } | |
| 282 | |
| 283 const char* atomicCanonicalTextEncodingName(const String& alias) { | |
| 284 if (!alias.length()) | |
| 285 return 0; | |
| 286 | |
| 287 if (alias.contains('\0')) | |
| 288 return 0; | |
| 289 | |
| 290 if (alias.is8Bit()) | |
| 291 return atomicCanonicalTextEncodingName<LChar>(alias.characters8(), | |
| 292 alias.length()); | |
| 293 | |
| 294 return atomicCanonicalTextEncodingName<UChar>(alias.characters16(), | |
| 295 alias.length()); | |
| 296 } | |
| 297 | |
| 298 bool noExtendedTextEncodingNameUsed() { | |
| 299 return !atomicDidExtendTextCodecMaps(); | |
| 300 } | |
| 301 | |
| 302 #ifndef NDEBUG | |
| 303 void dumpTextEncodingNameMap() { | |
| 304 unsigned size = textEncodingNameMap->size(); | |
| 305 fprintf(stderr, "Dumping %u entries in WTF::TextEncodingNameMap...\n", size); | |
| 306 | |
| 307 MutexLocker lock(encodingRegistryMutex()); | |
| 308 | |
| 309 TextEncodingNameMap::const_iterator it = textEncodingNameMap->begin(); | |
| 310 TextEncodingNameMap::const_iterator end = textEncodingNameMap->end(); | |
| 311 for (; it != end; ++it) | |
| 312 fprintf(stderr, "'%s' => '%s'\n", it->key, it->value); | |
| 313 } | |
| 314 #endif | |
| 315 | |
| 316 } // namespace WTF | |
| OLD | NEW |