| OLD | NEW |
| (Empty) |
| 1 /* | |
| 2 * Copyright (C) 2007-2009 Torch Mobile, Inc. All rights reserved. | |
| 3 * Copyright (C) 2010-2012 Patrick Gansterer <paroga@paroga.com> | |
| 4 * | |
| 5 * Redistribution and use in source and binary forms, with or without | |
| 6 * modification, are permitted provided that the following conditions | |
| 7 * are met: | |
| 8 * 1. Redistributions of source code must retain the above copyright | |
| 9 * notice, this list of conditions and the following disclaimer. | |
| 10 * 2. Redistributions in binary form must reproduce the above copyright | |
| 11 * notice, this list of conditions and the following disclaimer in the | |
| 12 * documentation and/or other materials provided with the distribution. | |
| 13 * | |
| 14 * This library is distributed in the hope that i will be useful, | |
| 15 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
| 16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
| 17 * Library General Public License for more details. | |
| 18 * | |
| 19 * You should have received a copy of the GNU Library General Public License | |
| 20 * along with this library; see the file COPYING.LIB. If not, write to | |
| 21 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, | |
| 22 * Boston, MA 02110-1301, USA. | |
| 23 */ | |
| 24 | |
| 25 #include "config.h" | |
| 26 #include "TextCodecWin.h" | |
| 27 | |
| 28 #include "COMPtr.h" | |
| 29 #include <mlang.h> | |
| 30 #include <windows.h> | |
| 31 #include <wtf/HashMap.h> | |
| 32 #include <wtf/HashSet.h> | |
| 33 #include <wtf/text/CString.h> | |
| 34 #include <wtf/text/StringHash.h> | |
| 35 #include <wtf/text/WTFString.h> | |
| 36 | |
| 37 namespace WebCore { | |
| 38 | |
| 39 struct CharsetInfo { | |
| 40 CString m_name; | |
| 41 String m_friendlyName; | |
| 42 UINT m_codePage; | |
| 43 Vector<CString> m_aliases; | |
| 44 }; | |
| 45 | |
| 46 class LanguageManager { | |
| 47 private: | |
| 48 LanguageManager(); | |
| 49 | |
| 50 friend LanguageManager& languageManager(); | |
| 51 }; | |
| 52 | |
| 53 // Usage: a lookup table used to get CharsetInfo with code page ID. | |
| 54 // Key: code page ID. Value: charset information. | |
| 55 static HashMap<UINT, CString>& codePageCharsets() | |
| 56 { | |
| 57 static HashMap<UINT, CString> cc; | |
| 58 return cc; | |
| 59 } | |
| 60 | |
| 61 static HashMap<String, CharsetInfo>& knownCharsets() | |
| 62 { | |
| 63 static HashMap<String, CharsetInfo> kc; | |
| 64 return kc; | |
| 65 } | |
| 66 | |
| 67 // Usage: a map that stores charsets that are supported by system. Sorted by nam
e. | |
| 68 // Key: charset. Value: code page ID. | |
| 69 typedef HashSet<String> CharsetSet; | |
| 70 static CharsetSet& supportedCharsets() | |
| 71 { | |
| 72 static CharsetSet sl; | |
| 73 return sl; | |
| 74 } | |
| 75 | |
| 76 static LanguageManager& languageManager() | |
| 77 { | |
| 78 static LanguageManager lm; | |
| 79 return lm; | |
| 80 } | |
| 81 | |
| 82 LanguageManager::LanguageManager() | |
| 83 { | |
| 84 COMPtr<IMultiLanguage> multiLanguage; | |
| 85 if (FAILED(::CoCreateInstance(CLSID_CMultiLanguage, 0, CLSCTX_INPROC_SERVER,
IID_IMultiLanguage, reinterpret_cast<LPVOID*>(&multiLanguage)))) | |
| 86 return; | |
| 87 | |
| 88 COMPtr<IEnumCodePage> enumInterface; | |
| 89 if (FAILED(multiLanguage->EnumCodePages(MIMECONTF_BROWSER, &enumInterface))) | |
| 90 return; | |
| 91 | |
| 92 MIMECPINFO cpInfo; | |
| 93 ULONG ccpInfo; | |
| 94 while (SUCCEEDED(enumInterface->Next(1, &cpInfo, &ccpInfo)) && ccpInfo) { | |
| 95 if (!IsValidCodePage(cpInfo.uiCodePage)) | |
| 96 continue; | |
| 97 | |
| 98 HashMap<UINT, CString>::iterator i = codePageCharsets().find(cpInfo.uiCo
dePage); | |
| 99 | |
| 100 CString name(String(cpInfo.wszWebCharset).latin1()); | |
| 101 if (i == codePageCharsets().end()) { | |
| 102 CharsetInfo info; | |
| 103 info.m_codePage = cpInfo.uiCodePage; | |
| 104 knownCharsets().set(name.data(), info); | |
| 105 i = codePageCharsets().set(cpInfo.uiCodePage, name).iterator; | |
| 106 } | |
| 107 if (i != codePageCharsets().end()) { | |
| 108 HashMap<String, CharsetInfo>::iterator j = knownCharsets().find(Stri
ng(i->value.data(), i->value.length())); | |
| 109 ASSERT(j != knownCharsets().end()); | |
| 110 CharsetInfo& info = j->value; | |
| 111 info.m_name = i->value.data(); | |
| 112 info.m_friendlyName = cpInfo.wszDescription; | |
| 113 info.m_aliases.append(name); | |
| 114 info.m_aliases.append(String(cpInfo.wszHeaderCharset).latin1()); | |
| 115 info.m_aliases.append(String(cpInfo.wszBodyCharset).latin1()); | |
| 116 String cpName = "cp" + String::number(cpInfo.uiCodePage); | |
| 117 info.m_aliases.append(cpName.latin1()); | |
| 118 supportedCharsets().add(i->value.data()); | |
| 119 } | |
| 120 } | |
| 121 } | |
| 122 | |
| 123 static UINT getCodePage(const char* name) | |
| 124 { | |
| 125 // Explicitly use a "const" reference to fix the silly VS build error | |
| 126 // saying "==" is not found for const_iterator and iterator | |
| 127 const HashMap<String, CharsetInfo>& charsets = knownCharsets(); | |
| 128 HashMap<String, CharsetInfo>::const_iterator i = charsets.find(name); | |
| 129 return i == charsets.end() ? CP_ACP : i->value.m_codePage; | |
| 130 } | |
| 131 | |
| 132 static PassOwnPtr<TextCodec> newTextCodecWin(const TextEncoding& encoding, const
void*) | |
| 133 { | |
| 134 return adoptPtr(new TextCodecWin(getCodePage(encoding.name()))); | |
| 135 } | |
| 136 | |
| 137 TextCodecWin::TextCodecWin(UINT codePage) | |
| 138 : m_codePage(codePage) | |
| 139 { | |
| 140 } | |
| 141 | |
| 142 TextCodecWin::~TextCodecWin() | |
| 143 { | |
| 144 } | |
| 145 | |
| 146 void TextCodecWin::registerExtendedEncodingNames(EncodingNameRegistrar registrar
) | |
| 147 { | |
| 148 languageManager(); | |
| 149 for (CharsetSet::iterator i = supportedCharsets().begin(); i != supportedCha
rsets().end(); ++i) { | |
| 150 HashMap<String, CharsetInfo>::iterator j = knownCharsets().find(*i); | |
| 151 if (j != knownCharsets().end()) { | |
| 152 registrar(j->value.m_name.data(), j->value.m_name.data()); | |
| 153 for (Vector<CString>::const_iterator alias = j->value.m_aliases.begi
n(); alias != j->value.m_aliases.end(); ++alias) | |
| 154 registrar(alias->data(), j->value.m_name.data()); | |
| 155 } | |
| 156 } | |
| 157 } | |
| 158 | |
| 159 void TextCodecWin::registerExtendedCodecs(TextCodecRegistrar registrar) | |
| 160 { | |
| 161 languageManager(); | |
| 162 for (CharsetSet::iterator i = supportedCharsets().begin(); i != supportedCha
rsets().end(); ++i) { | |
| 163 HashMap<String, CharsetInfo>::iterator j = knownCharsets().find(*i); | |
| 164 if (j != knownCharsets().end()) | |
| 165 registrar(j->value.m_name.data(), newTextCodecWin, 0); | |
| 166 } | |
| 167 } | |
| 168 | |
| 169 static DWORD getCodePageFlags(UINT codePage) | |
| 170 { | |
| 171 if (codePage == 42) // Symbol | |
| 172 return 0; | |
| 173 | |
| 174 // Microsoft says the flag must be 0 for the following code pages | |
| 175 if (codePage > 50000) { | |
| 176 if ((codePage >= 50220 && codePage <= 50222) | |
| 177 || codePage == 50225 | |
| 178 || codePage == 50227 | |
| 179 || codePage == 50229 | |
| 180 || codePage == 52936 | |
| 181 || codePage == 54936 | |
| 182 || (codePage >= 57002 && codePage <= 57001) | |
| 183 || codePage == 65000 // UTF-7 | |
| 184 ) | |
| 185 return 0; | |
| 186 } | |
| 187 | |
| 188 return MB_PRECOMPOSED | MB_ERR_INVALID_CHARS; | |
| 189 } | |
| 190 | |
| 191 static inline const char* findFirstNonAsciiCharacter(const char* bytes, size_t l
ength) | |
| 192 { | |
| 193 for (const char* bytesEnd = bytes + length; bytes < bytesEnd; ++bytes) { | |
| 194 if (*bytes & 0x80) | |
| 195 break; | |
| 196 } | |
| 197 return bytes; | |
| 198 } | |
| 199 | |
| 200 static void decodeInternal(Vector<UChar, 8192>& result, UINT codePage, const cha
r* bytes, size_t length, size_t* left) | |
| 201 { | |
| 202 *left = length; | |
| 203 if (!bytes || !length) | |
| 204 return; | |
| 205 | |
| 206 DWORD flags = getCodePageFlags(codePage); | |
| 207 | |
| 208 int testLength = length; | |
| 209 int untestedLength = length; | |
| 210 for (;;) { | |
| 211 int resultLength = MultiByteToWideChar(codePage, flags, bytes, testLengt
h, 0, 0); | |
| 212 | |
| 213 if (resultLength > 0) { | |
| 214 int oldSize = result.size(); | |
| 215 result.resize(oldSize + resultLength); | |
| 216 | |
| 217 MultiByteToWideChar(codePage, flags, bytes, testLength, result.data(
) + oldSize, resultLength); | |
| 218 | |
| 219 if (testLength == untestedLength) { | |
| 220 *left = length - testLength; | |
| 221 break; | |
| 222 } | |
| 223 untestedLength -= testLength; | |
| 224 length -= testLength; | |
| 225 bytes += testLength; | |
| 226 } else { | |
| 227 untestedLength = testLength - 1; | |
| 228 if (!untestedLength) { | |
| 229 *left = length; | |
| 230 break; | |
| 231 } | |
| 232 } | |
| 233 testLength = (untestedLength + 1) / 2; | |
| 234 } | |
| 235 } | |
| 236 | |
| 237 String TextCodecWin::decode(const char* bytes, size_t length, bool flush, bool s
topOnError, bool& sawError) | |
| 238 { | |
| 239 if (!m_decodeBuffer.isEmpty()) { | |
| 240 m_decodeBuffer.append(bytes, length); | |
| 241 bytes = m_decodeBuffer.data(); | |
| 242 length = m_decodeBuffer.size(); | |
| 243 } | |
| 244 | |
| 245 size_t left; | |
| 246 Vector<UChar, 8192> result; | |
| 247 for (;;) { | |
| 248 decodeInternal(result, m_codePage, bytes, length, &left); | |
| 249 if (!left) | |
| 250 break; | |
| 251 | |
| 252 if (!flush && left < 16) | |
| 253 break; | |
| 254 | |
| 255 result.append(L'?'); | |
| 256 sawError = true; | |
| 257 if (stopOnError) | |
| 258 return String::adopt(result); | |
| 259 | |
| 260 if (left == 1) | |
| 261 break; | |
| 262 | |
| 263 bytes += length - left + 1; | |
| 264 length = left - 1; | |
| 265 } | |
| 266 if (left && !flush) { | |
| 267 if (m_decodeBuffer.isEmpty()) | |
| 268 m_decodeBuffer.append(bytes + length - left, left); | |
| 269 else { | |
| 270 memmove(m_decodeBuffer.data(), bytes + length - left, left); | |
| 271 m_decodeBuffer.resize(left); | |
| 272 } | |
| 273 } else | |
| 274 m_decodeBuffer.clear(); | |
| 275 | |
| 276 return String::adopt(result); | |
| 277 } | |
| 278 | |
| 279 CString TextCodecWin::encode(const UChar* characters, size_t length, Unencodable
Handling) | |
| 280 { | |
| 281 if (!characters || !length) | |
| 282 return CString(); | |
| 283 | |
| 284 int resultLength = WideCharToMultiByte(m_codePage, WC_COMPOSITECHECK, charac
ters, length, 0, 0, 0, 0); | |
| 285 | |
| 286 // FIXME: We need to implement UnencodableHandling: QuestionMarksForUnencoda
bles, EntitiesForUnencodables, and URLEncodedEntitiesForUnencodables. | |
| 287 | |
| 288 if (resultLength <= 0) | |
| 289 return "?"; | |
| 290 | |
| 291 char* characterBuffer; | |
| 292 CString result = CString::newUninitialized(resultLength, characterBuffer); | |
| 293 | |
| 294 WideCharToMultiByte(m_codePage, WC_COMPOSITECHECK, characters, length, chara
cterBuffer, resultLength, 0, 0); | |
| 295 | |
| 296 return result; | |
| 297 } | |
| 298 | |
| 299 void TextCodecWin::enumerateSupportedEncodings(EncodingReceiver& receiver) | |
| 300 { | |
| 301 languageManager(); | |
| 302 for (CharsetSet::iterator i = supportedCharsets().begin(); i != supportedCha
rsets().end(); ++i) { | |
| 303 HashMap<String, CharsetInfo>::iterator j = knownCharsets().find(*i); | |
| 304 if (j != knownCharsets().end() && !receiver.receive(j->value.m_name.data
(), j->value.m_friendlyName.charactersWithNullTermination(), j->value.m_codePage
)) | |
| 305 break; | |
| 306 } | |
| 307 } | |
| 308 | |
| 309 } // namespace WebCore | |
| OLD | NEW |