OLD | NEW |
1 /* | 1 /* |
2 * Copyright (C) 2004, 2006, 2008, 2011 Apple Inc. All rights reserved. | 2 * Copyright (C) 2004, 2006, 2008, 2011 Apple Inc. All rights reserved. |
3 * | 3 * |
4 * Redistribution and use in source and binary forms, with or without | 4 * Redistribution and use in source and binary forms, with or without |
5 * modification, are permitted provided that the following conditions | 5 * modification, are permitted provided that the following conditions |
6 * are met: | 6 * are met: |
7 * 1. Redistributions of source code must retain the above copyright | 7 * 1. Redistributions of source code must retain the above copyright |
8 * notice, this list of conditions and the following disclaimer. | 8 * notice, this list of conditions and the following disclaimer. |
9 * 2. Redistributions in binary form must reproduce the above copyright | 9 * 2. Redistributions in binary form must reproduce the above copyright |
10 * notice, this list of conditions and the following disclaimer in the | 10 * notice, this list of conditions and the following disclaimer in the |
(...skipping 13 matching lines...) Expand all Loading... |
24 */ | 24 */ |
25 | 25 |
26 #include "config.h" | 26 #include "config.h" |
27 #include "wtf/text/TextCodecUTF8.h" | 27 #include "wtf/text/TextCodecUTF8.h" |
28 | 28 |
29 #include "wtf/text/CString.h" | 29 #include "wtf/text/CString.h" |
30 #include "wtf/text/CharacterNames.h" | 30 #include "wtf/text/CharacterNames.h" |
31 #include "wtf/text/StringBuffer.h" | 31 #include "wtf/text/StringBuffer.h" |
32 #include "wtf/text/TextCodecASCIIFastPath.h" | 32 #include "wtf/text/TextCodecASCIIFastPath.h" |
33 | 33 |
34 using namespace WTF; | 34 namespace WTF { |
| 35 |
35 using namespace WTF::Unicode; | 36 using namespace WTF::Unicode; |
36 using namespace std; | |
37 | |
38 namespace WTF { | |
39 | 37 |
40 const int nonCharacter = -1; | 38 const int nonCharacter = -1; |
41 | 39 |
42 PassOwnPtr<TextCodec> TextCodecUTF8::create(const TextEncoding&, const void*) | 40 PassOwnPtr<TextCodec> TextCodecUTF8::create(const TextEncoding&, const void*) |
43 { | 41 { |
44 return adoptPtr(new TextCodecUTF8); | 42 return adoptPtr(new TextCodecUTF8); |
45 } | 43 } |
46 | 44 |
47 void TextCodecUTF8::registerEncodingNames(EncodingNameRegistrar registrar) | 45 void TextCodecUTF8::registerEncodingNames(EncodingNameRegistrar registrar) |
48 { | 46 { |
(...skipping 90 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
139 return nonCharacter; | 137 return nonCharacter; |
140 if (sequence[3] < 0x80 || sequence[3] > 0xBF) | 138 if (sequence[3] < 0x80 || sequence[3] > 0xBF) |
141 return nonCharacter; | 139 return nonCharacter; |
142 return ((sequence[0] << 18) + (sequence[1] << 12) + (sequence[2] << 6) + seq
uence[3]) - 0x03C82080; | 140 return ((sequence[0] << 18) + (sequence[1] << 12) + (sequence[2] << 6) + seq
uence[3]) - 0x03C82080; |
143 } | 141 } |
144 | 142 |
145 static inline UChar* appendCharacter(UChar* destination, int character) | 143 static inline UChar* appendCharacter(UChar* destination, int character) |
146 { | 144 { |
147 ASSERT(character != nonCharacter); | 145 ASSERT(character != nonCharacter); |
148 ASSERT(!U_IS_SURROGATE(character)); | 146 ASSERT(!U_IS_SURROGATE(character)); |
149 if (U_IS_BMP(character)) | 147 if (U_IS_BMP(character)) { |
150 *destination++ = static_cast<UChar>(character); | 148 *destination++ = static_cast<UChar>(character); |
151 else { | 149 } else { |
152 *destination++ = U16_LEAD(character); | 150 *destination++ = U16_LEAD(character); |
153 *destination++ = U16_TRAIL(character); | 151 *destination++ = U16_TRAIL(character); |
154 } | 152 } |
155 return destination; | 153 return destination; |
156 } | 154 } |
157 | 155 |
158 void TextCodecUTF8::consumePartialSequenceByte() | 156 void TextCodecUTF8::consumePartialSequenceByte() |
159 { | 157 { |
160 --m_partialSequenceSize; | 158 --m_partialSequenceSize; |
161 memmove(m_partialSequence, m_partialSequence + 1, m_partialSequenceSize); | 159 memmove(m_partialSequence, m_partialSequence + 1, m_partialSequenceSize); |
(...skipping 147 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
309 if (source == end) | 307 if (source == end) |
310 break; | 308 break; |
311 if (!isASCII(*source)) | 309 if (!isASCII(*source)) |
312 continue; | 310 continue; |
313 } | 311 } |
314 *destination++ = *source++; | 312 *destination++ = *source++; |
315 continue; | 313 continue; |
316 } | 314 } |
317 int count = nonASCIISequenceLength(*source); | 315 int count = nonASCIISequenceLength(*source); |
318 int character; | 316 int character; |
319 if (!count) | 317 if (count == 0) { |
320 character = nonCharacter; | 318 character = nonCharacter; |
321 else { | 319 } else { |
322 if (count > end - source) { | 320 if (count > end - source) { |
323 ASSERT_WITH_SECURITY_IMPLICATION(end - source < static_cast<
ptrdiff_t>(sizeof(m_partialSequence))); | 321 ASSERT_WITH_SECURITY_IMPLICATION(end - source < static_cast<
ptrdiff_t>(sizeof(m_partialSequence))); |
324 ASSERT(!m_partialSequenceSize); | 322 ASSERT(!m_partialSequenceSize); |
325 m_partialSequenceSize = end - source; | 323 m_partialSequenceSize = end - source; |
326 memcpy(m_partialSequence, source, m_partialSequenceSize); | 324 memcpy(m_partialSequence, source, m_partialSequenceSize); |
327 source = end; | 325 source = end; |
328 break; | 326 break; |
329 } | 327 } |
330 character = decodeNonASCIISequence(source, count); | 328 character = decodeNonASCIISequence(source, count); |
331 } | 329 } |
(...skipping 54 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
386 if (source == end) | 384 if (source == end) |
387 break; | 385 break; |
388 if (!isASCII(*source)) | 386 if (!isASCII(*source)) |
389 continue; | 387 continue; |
390 } | 388 } |
391 *destination16++ = *source++; | 389 *destination16++ = *source++; |
392 continue; | 390 continue; |
393 } | 391 } |
394 int count = nonASCIISequenceLength(*source); | 392 int count = nonASCIISequenceLength(*source); |
395 int character; | 393 int character; |
396 if (!count) | 394 if (count == 0) { |
397 character = nonCharacter; | 395 character = nonCharacter; |
398 else { | 396 } else { |
399 if (count > end - source) { | 397 if (count > end - source) { |
400 ASSERT_WITH_SECURITY_IMPLICATION(end - source < static_cast<
ptrdiff_t>(sizeof(m_partialSequence))); | 398 ASSERT_WITH_SECURITY_IMPLICATION(end - source < static_cast<
ptrdiff_t>(sizeof(m_partialSequence))); |
401 ASSERT(!m_partialSequenceSize); | 399 ASSERT(!m_partialSequenceSize); |
402 m_partialSequenceSize = end - source; | 400 m_partialSequenceSize = end - source; |
403 memcpy(m_partialSequence, source, m_partialSequenceSize); | 401 memcpy(m_partialSequence, source, m_partialSequenceSize); |
404 source = end; | 402 source = end; |
405 break; | 403 break; |
406 } | 404 } |
407 character = decodeNonASCIISequence(source, count); | 405 character = decodeNonASCIISequence(source, count); |
408 } | 406 } |
(...skipping 15 matching lines...) Expand all Loading... |
424 | 422 |
425 return String::adopt(buffer16); | 423 return String::adopt(buffer16); |
426 } | 424 } |
427 | 425 |
428 template<typename CharType> | 426 template<typename CharType> |
429 CString TextCodecUTF8::encodeCommon(const CharType* characters, size_t length) | 427 CString TextCodecUTF8::encodeCommon(const CharType* characters, size_t length) |
430 { | 428 { |
431 // The maximum number of UTF-8 bytes needed per UTF-16 code unit is 3. | 429 // The maximum number of UTF-8 bytes needed per UTF-16 code unit is 3. |
432 // BMP characters take only one UTF-16 code unit and can take up to 3 bytes
(3x). | 430 // BMP characters take only one UTF-16 code unit and can take up to 3 bytes
(3x). |
433 // Non-BMP characters take two UTF-16 code units and can take up to 4 bytes
(2x). | 431 // Non-BMP characters take two UTF-16 code units and can take up to 4 bytes
(2x). |
434 if (length > numeric_limits<size_t>::max() / 3) | 432 if (length > std::numeric_limits<size_t>::max() / 3) |
435 CRASH(); | 433 CRASH(); |
436 Vector<uint8_t> bytes(length * 3); | 434 Vector<uint8_t> bytes(length * 3); |
437 | 435 |
438 size_t i = 0; | 436 size_t i = 0; |
439 size_t bytesWritten = 0; | 437 size_t bytesWritten = 0; |
440 while (i < length) { | 438 while (i < length) { |
441 UChar32 character; | 439 UChar32 character; |
442 U16_NEXT(characters, i, length, character); | 440 U16_NEXT(characters, i, length, character); |
443 // U16_NEXT will simply emit a surrogate code point if an unmatched surr
ogate | 441 // U16_NEXT will simply emit a surrogate code point if an unmatched surr
ogate |
444 // is encountered; we must convert it to a U+FFFD (REPLACEMENT CHARACTER
) here. | 442 // is encountered; we must convert it to a U+FFFD (REPLACEMENT CHARACTER
) here. |
445 if (0xD800 <= character && character <= 0xDFFF) | 443 if (0xD800 <= character && character <= 0xDFFF) |
446 character = replacementCharacter; | 444 character = replacementCharacter; |
447 U8_APPEND_UNSAFE(bytes.data(), bytesWritten, character); | 445 U8_APPEND_UNSAFE(bytes.data(), bytesWritten, character); |
448 } | 446 } |
449 | 447 |
450 return CString(reinterpret_cast<char*>(bytes.data()), bytesWritten); | 448 return CString(reinterpret_cast<char*>(bytes.data()), bytesWritten); |
451 } | 449 } |
452 | 450 |
453 CString TextCodecUTF8::encode(const UChar* characters, size_t length, Unencodabl
eHandling) | 451 CString TextCodecUTF8::encode(const UChar* characters, size_t length, Unencodabl
eHandling) |
454 { | 452 { |
455 return encodeCommon(characters, length); | 453 return encodeCommon(characters, length); |
456 } | 454 } |
457 | 455 |
458 CString TextCodecUTF8::encode(const LChar* characters, size_t length, Unencodabl
eHandling) | 456 CString TextCodecUTF8::encode(const LChar* characters, size_t length, Unencodabl
eHandling) |
459 { | 457 { |
460 return encodeCommon(characters, length); | 458 return encodeCommon(characters, length); |
461 } | 459 } |
462 | 460 |
463 } // namespace WTF | 461 } // namespace WTF |
OLD | NEW |