| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright (C) 2004, 2006, 2008, 2011 Apple Inc. All rights reserved. | 2 * Copyright (C) 2004, 2006, 2008, 2011 Apple Inc. All rights reserved. |
| 3 * | 3 * |
| 4 * Redistribution and use in source and binary forms, with or without | 4 * Redistribution and use in source and binary forms, with or without |
| 5 * modification, are permitted provided that the following conditions | 5 * modification, are permitted provided that the following conditions |
| 6 * are met: | 6 * are met: |
| 7 * 1. Redistributions of source code must retain the above copyright | 7 * 1. Redistributions of source code must retain the above copyright |
| 8 * notice, this list of conditions and the following disclaimer. | 8 * notice, this list of conditions and the following disclaimer. |
| 9 * 2. Redistributions in binary form must reproduce the above copyright | 9 * 2. Redistributions in binary form must reproduce the above copyright |
| 10 * notice, this list of conditions and the following disclaimer in the | 10 * notice, this list of conditions and the following disclaimer in the |
| (...skipping 13 matching lines...) Expand all Loading... |
| 24 */ | 24 */ |
| 25 | 25 |
| 26 #include "config.h" | 26 #include "config.h" |
| 27 #include "wtf/text/TextCodecUTF8.h" | 27 #include "wtf/text/TextCodecUTF8.h" |
| 28 | 28 |
| 29 #include "wtf/text/CString.h" | 29 #include "wtf/text/CString.h" |
| 30 #include "wtf/text/CharacterNames.h" | 30 #include "wtf/text/CharacterNames.h" |
| 31 #include "wtf/text/StringBuffer.h" | 31 #include "wtf/text/StringBuffer.h" |
| 32 #include "wtf/text/TextCodecASCIIFastPath.h" | 32 #include "wtf/text/TextCodecASCIIFastPath.h" |
| 33 | 33 |
| 34 using namespace WTF; | 34 namespace WTF { |
| 35 |
| 35 using namespace WTF::Unicode; | 36 using namespace WTF::Unicode; |
| 36 using namespace std; | |
| 37 | |
| 38 namespace WTF { | |
| 39 | 37 |
| 40 const int nonCharacter = -1; | 38 const int nonCharacter = -1; |
| 41 | 39 |
| 42 PassOwnPtr<TextCodec> TextCodecUTF8::create(const TextEncoding&, const void*) | 40 PassOwnPtr<TextCodec> TextCodecUTF8::create(const TextEncoding&, const void*) |
| 43 { | 41 { |
| 44 return adoptPtr(new TextCodecUTF8); | 42 return adoptPtr(new TextCodecUTF8); |
| 45 } | 43 } |
| 46 | 44 |
| 47 void TextCodecUTF8::registerEncodingNames(EncodingNameRegistrar registrar) | 45 void TextCodecUTF8::registerEncodingNames(EncodingNameRegistrar registrar) |
| 48 { | 46 { |
| (...skipping 90 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 139 return nonCharacter; | 137 return nonCharacter; |
| 140 if (sequence[3] < 0x80 || sequence[3] > 0xBF) | 138 if (sequence[3] < 0x80 || sequence[3] > 0xBF) |
| 141 return nonCharacter; | 139 return nonCharacter; |
| 142 return ((sequence[0] << 18) + (sequence[1] << 12) + (sequence[2] << 6) + seq
uence[3]) - 0x03C82080; | 140 return ((sequence[0] << 18) + (sequence[1] << 12) + (sequence[2] << 6) + seq
uence[3]) - 0x03C82080; |
| 143 } | 141 } |
| 144 | 142 |
| 145 static inline UChar* appendCharacter(UChar* destination, int character) | 143 static inline UChar* appendCharacter(UChar* destination, int character) |
| 146 { | 144 { |
| 147 ASSERT(character != nonCharacter); | 145 ASSERT(character != nonCharacter); |
| 148 ASSERT(!U_IS_SURROGATE(character)); | 146 ASSERT(!U_IS_SURROGATE(character)); |
| 149 if (U_IS_BMP(character)) | 147 if (U_IS_BMP(character)) { |
| 150 *destination++ = static_cast<UChar>(character); | 148 *destination++ = static_cast<UChar>(character); |
| 151 else { | 149 } else { |
| 152 *destination++ = U16_LEAD(character); | 150 *destination++ = U16_LEAD(character); |
| 153 *destination++ = U16_TRAIL(character); | 151 *destination++ = U16_TRAIL(character); |
| 154 } | 152 } |
| 155 return destination; | 153 return destination; |
| 156 } | 154 } |
| 157 | 155 |
| 158 void TextCodecUTF8::consumePartialSequenceByte() | 156 void TextCodecUTF8::consumePartialSequenceByte() |
| 159 { | 157 { |
| 160 --m_partialSequenceSize; | 158 --m_partialSequenceSize; |
| 161 memmove(m_partialSequence, m_partialSequence + 1, m_partialSequenceSize); | 159 memmove(m_partialSequence, m_partialSequence + 1, m_partialSequenceSize); |
| (...skipping 147 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 309 if (source == end) | 307 if (source == end) |
| 310 break; | 308 break; |
| 311 if (!isASCII(*source)) | 309 if (!isASCII(*source)) |
| 312 continue; | 310 continue; |
| 313 } | 311 } |
| 314 *destination++ = *source++; | 312 *destination++ = *source++; |
| 315 continue; | 313 continue; |
| 316 } | 314 } |
| 317 int count = nonASCIISequenceLength(*source); | 315 int count = nonASCIISequenceLength(*source); |
| 318 int character; | 316 int character; |
| 319 if (!count) | 317 if (count == 0) { |
| 320 character = nonCharacter; | 318 character = nonCharacter; |
| 321 else { | 319 } else { |
| 322 if (count > end - source) { | 320 if (count > end - source) { |
| 323 ASSERT_WITH_SECURITY_IMPLICATION(end - source < static_cast<
ptrdiff_t>(sizeof(m_partialSequence))); | 321 ASSERT_WITH_SECURITY_IMPLICATION(end - source < static_cast<
ptrdiff_t>(sizeof(m_partialSequence))); |
| 324 ASSERT(!m_partialSequenceSize); | 322 ASSERT(!m_partialSequenceSize); |
| 325 m_partialSequenceSize = end - source; | 323 m_partialSequenceSize = end - source; |
| 326 memcpy(m_partialSequence, source, m_partialSequenceSize); | 324 memcpy(m_partialSequence, source, m_partialSequenceSize); |
| 327 source = end; | 325 source = end; |
| 328 break; | 326 break; |
| 329 } | 327 } |
| 330 character = decodeNonASCIISequence(source, count); | 328 character = decodeNonASCIISequence(source, count); |
| 331 } | 329 } |
| (...skipping 54 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 386 if (source == end) | 384 if (source == end) |
| 387 break; | 385 break; |
| 388 if (!isASCII(*source)) | 386 if (!isASCII(*source)) |
| 389 continue; | 387 continue; |
| 390 } | 388 } |
| 391 *destination16++ = *source++; | 389 *destination16++ = *source++; |
| 392 continue; | 390 continue; |
| 393 } | 391 } |
| 394 int count = nonASCIISequenceLength(*source); | 392 int count = nonASCIISequenceLength(*source); |
| 395 int character; | 393 int character; |
| 396 if (!count) | 394 if (count == 0) { |
| 397 character = nonCharacter; | 395 character = nonCharacter; |
| 398 else { | 396 } else { |
| 399 if (count > end - source) { | 397 if (count > end - source) { |
| 400 ASSERT_WITH_SECURITY_IMPLICATION(end - source < static_cast<
ptrdiff_t>(sizeof(m_partialSequence))); | 398 ASSERT_WITH_SECURITY_IMPLICATION(end - source < static_cast<
ptrdiff_t>(sizeof(m_partialSequence))); |
| 401 ASSERT(!m_partialSequenceSize); | 399 ASSERT(!m_partialSequenceSize); |
| 402 m_partialSequenceSize = end - source; | 400 m_partialSequenceSize = end - source; |
| 403 memcpy(m_partialSequence, source, m_partialSequenceSize); | 401 memcpy(m_partialSequence, source, m_partialSequenceSize); |
| 404 source = end; | 402 source = end; |
| 405 break; | 403 break; |
| 406 } | 404 } |
| 407 character = decodeNonASCIISequence(source, count); | 405 character = decodeNonASCIISequence(source, count); |
| 408 } | 406 } |
| (...skipping 15 matching lines...) Expand all Loading... |
| 424 | 422 |
| 425 return String::adopt(buffer16); | 423 return String::adopt(buffer16); |
| 426 } | 424 } |
| 427 | 425 |
| 428 template<typename CharType> | 426 template<typename CharType> |
| 429 CString TextCodecUTF8::encodeCommon(const CharType* characters, size_t length) | 427 CString TextCodecUTF8::encodeCommon(const CharType* characters, size_t length) |
| 430 { | 428 { |
| 431 // The maximum number of UTF-8 bytes needed per UTF-16 code unit is 3. | 429 // The maximum number of UTF-8 bytes needed per UTF-16 code unit is 3. |
| 432 // BMP characters take only one UTF-16 code unit and can take up to 3 bytes
(3x). | 430 // BMP characters take only one UTF-16 code unit and can take up to 3 bytes
(3x). |
| 433 // Non-BMP characters take two UTF-16 code units and can take up to 4 bytes
(2x). | 431 // Non-BMP characters take two UTF-16 code units and can take up to 4 bytes
(2x). |
| 434 if (length > numeric_limits<size_t>::max() / 3) | 432 if (length > std::numeric_limits<size_t>::max() / 3) |
| 435 CRASH(); | 433 CRASH(); |
| 436 Vector<uint8_t> bytes(length * 3); | 434 Vector<uint8_t> bytes(length * 3); |
| 437 | 435 |
| 438 size_t i = 0; | 436 size_t i = 0; |
| 439 size_t bytesWritten = 0; | 437 size_t bytesWritten = 0; |
| 440 while (i < length) { | 438 while (i < length) { |
| 441 UChar32 character; | 439 UChar32 character; |
| 442 U16_NEXT(characters, i, length, character); | 440 U16_NEXT(characters, i, length, character); |
| 443 // U16_NEXT will simply emit a surrogate code point if an unmatched surr
ogate | 441 // U16_NEXT will simply emit a surrogate code point if an unmatched surr
ogate |
| 444 // is encountered; we must convert it to a U+FFFD (REPLACEMENT CHARACTER
) here. | 442 // is encountered; we must convert it to a U+FFFD (REPLACEMENT CHARACTER
) here. |
| 445 if (0xD800 <= character && character <= 0xDFFF) | 443 if (0xD800 <= character && character <= 0xDFFF) |
| 446 character = replacementCharacter; | 444 character = replacementCharacter; |
| 447 U8_APPEND_UNSAFE(bytes.data(), bytesWritten, character); | 445 U8_APPEND_UNSAFE(bytes.data(), bytesWritten, character); |
| 448 } | 446 } |
| 449 | 447 |
| 450 return CString(reinterpret_cast<char*>(bytes.data()), bytesWritten); | 448 return CString(reinterpret_cast<char*>(bytes.data()), bytesWritten); |
| 451 } | 449 } |
| 452 | 450 |
| 453 CString TextCodecUTF8::encode(const UChar* characters, size_t length, Unencodabl
eHandling) | 451 CString TextCodecUTF8::encode(const UChar* characters, size_t length, Unencodabl
eHandling) |
| 454 { | 452 { |
| 455 return encodeCommon(characters, length); | 453 return encodeCommon(characters, length); |
| 456 } | 454 } |
| 457 | 455 |
| 458 CString TextCodecUTF8::encode(const LChar* characters, size_t length, Unencodabl
eHandling) | 456 CString TextCodecUTF8::encode(const LChar* characters, size_t length, Unencodabl
eHandling) |
| 459 { | 457 { |
| 460 return encodeCommon(characters, length); | 458 return encodeCommon(characters, length); |
| 461 } | 459 } |
| 462 | 460 |
| 463 } // namespace WTF | 461 } // namespace WTF |
| OLD | NEW |