| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright (C) 2004, 2006, 2008, 2011 Apple Inc. All rights reserved. | 2 * Copyright (C) 2004, 2006, 2008, 2011 Apple Inc. All rights reserved. |
| 3 * | 3 * |
| 4 * Redistribution and use in source and binary forms, with or without | 4 * Redistribution and use in source and binary forms, with or without |
| 5 * modification, are permitted provided that the following conditions | 5 * modification, are permitted provided that the following conditions |
| 6 * are met: | 6 * are met: |
| 7 * 1. Redistributions of source code must retain the above copyright | 7 * 1. Redistributions of source code must retain the above copyright |
| 8 * notice, this list of conditions and the following disclaimer. | 8 * notice, this list of conditions and the following disclaimer. |
| 9 * 2. Redistributions in binary form must reproduce the above copyright | 9 * 2. Redistributions in binary form must reproduce the above copyright |
| 10 * notice, this list of conditions and the following disclaimer in the | 10 * notice, this list of conditions and the following disclaimer in the |
| 11 * documentation and/or other materials provided with the distribution. | 11 * documentation and/or other materials provided with the distribution. |
| 12 * | 12 * |
| 13 * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY | 13 * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY |
| 14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | 14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| 15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | 15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
| 16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR | 16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR |
| 17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, | 17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
| 18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, | 18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
| 19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR | 19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR |
| 20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY | 20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY |
| 21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | 21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| 22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | 22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| 23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | 23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 24 */ | 24 */ |
| 25 | 25 |
| 26 #include "config.h" | 26 #include "config.h" |
| 27 #include "wtf/text/TextCodecUTF8.h" | 27 #include "wtf/text/TextCodecUTF8.h" |
| 28 | 28 |
| 29 #include "wtf/text/TextCodecASCIIFastPath.h" | 29 #include "wtf/text/TextCodecASCIIFastPath.h" |
| 30 #include "wtf/text/CString.h" | 30 #include "wtf/text/CString.h" |
| 31 #include "wtf/text/StringBuffer.h" | 31 #include "wtf/text/StringBuffer.h" |
| 32 #include "wtf/unicode/CharacterNames.h" | 32 #include "wtf/unicode/CharacterNames.h" |
| 33 | 33 |
| (...skipping 219 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 253 return false; | 253 return false; |
| 254 continue; | 254 continue; |
| 255 } | 255 } |
| 256 | 256 |
| 257 m_partialSequenceSize -= count; | 257 m_partialSequenceSize -= count; |
| 258 destination = appendCharacter(destination, character); | 258 destination = appendCharacter(destination, character); |
| 259 } while (m_partialSequenceSize); | 259 } while (m_partialSequenceSize); |
| 260 | 260 |
| 261 return false; | 261 return false; |
| 262 } | 262 } |
| 263 | 263 |
| 264 String TextCodecUTF8::decode(const char* bytes, size_t length, bool flush, bool
stopOnError, bool& sawError) | 264 String TextCodecUTF8::decode(const char* bytes, size_t length, bool flush, bool
stopOnError, bool& sawError) |
| 265 { | 265 { |
| 266 // Each input byte might turn into a character. | 266 // Each input byte might turn into a character. |
| 267 // That includes all bytes in the partial-sequence buffer because | 267 // That includes all bytes in the partial-sequence buffer because |
| 268 // each byte in an invalid sequence will turn into a replacement character. | 268 // each byte in an invalid sequence will turn into a replacement character. |
| 269 StringBuffer<LChar> buffer(m_partialSequenceSize + length); | 269 StringBuffer<LChar> buffer(m_partialSequenceSize + length); |
| 270 | 270 |
| 271 const uint8_t* source = reinterpret_cast<const uint8_t*>(bytes); | 271 const uint8_t* source = reinterpret_cast<const uint8_t*>(bytes); |
| 272 const uint8_t* end = source + length; | 272 const uint8_t* end = source + length; |
| 273 const uint8_t* alignedEnd = alignToMachineWord(end); | 273 const uint8_t* alignedEnd = alignToMachineWord(end); |
| (...skipping 48 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 322 memcpy(m_partialSequence, source, m_partialSequenceSize); | 322 memcpy(m_partialSequence, source, m_partialSequenceSize); |
| 323 source = end; | 323 source = end; |
| 324 break; | 324 break; |
| 325 } | 325 } |
| 326 character = decodeNonASCIISequence(source, count); | 326 character = decodeNonASCIISequence(source, count); |
| 327 } | 327 } |
| 328 if (character == nonCharacter) { | 328 if (character == nonCharacter) { |
| 329 sawError = true; | 329 sawError = true; |
| 330 if (stopOnError) | 330 if (stopOnError) |
| 331 break; | 331 break; |
| 332 | 332 |
| 333 goto upConvertTo16Bit; | 333 goto upConvertTo16Bit; |
| 334 } | 334 } |
| 335 if (character > 0xff) | 335 if (character > 0xff) |
| 336 goto upConvertTo16Bit; | 336 goto upConvertTo16Bit; |
| 337 | 337 |
| 338 source += count; | 338 source += count; |
| 339 *destination++ = character; | 339 *destination++ = character; |
| 340 } | 340 } |
| 341 } while (flush && m_partialSequenceSize); | 341 } while (flush && m_partialSequenceSize); |
| 342 | 342 |
| (...skipping 16 matching lines...) Expand all Loading... |
| 359 // local variables, which may harm code generation by disabling some
optimizations | 359 // local variables, which may harm code generation by disabling some
optimizations |
| 360 // in some compilers. | 360 // in some compilers. |
| 361 UChar* destinationForHandlePartialSequence = destination16; | 361 UChar* destinationForHandlePartialSequence = destination16; |
| 362 const uint8_t* sourceForHandlePartialSequence = source; | 362 const uint8_t* sourceForHandlePartialSequence = source; |
| 363 handlePartialSequence(destinationForHandlePartialSequence, sourceFor
HandlePartialSequence, end, flush, stopOnError, sawError); | 363 handlePartialSequence(destinationForHandlePartialSequence, sourceFor
HandlePartialSequence, end, flush, stopOnError, sawError); |
| 364 destination16 = destinationForHandlePartialSequence; | 364 destination16 = destinationForHandlePartialSequence; |
| 365 source = sourceForHandlePartialSequence; | 365 source = sourceForHandlePartialSequence; |
| 366 if (m_partialSequenceSize) | 366 if (m_partialSequenceSize) |
| 367 break; | 367 break; |
| 368 } | 368 } |
| 369 | 369 |
| 370 while (source < end) { | 370 while (source < end) { |
| 371 if (isASCII(*source)) { | 371 if (isASCII(*source)) { |
| 372 // Fast path for ASCII. Most UTF-8 text will be ASCII. | 372 // Fast path for ASCII. Most UTF-8 text will be ASCII. |
| 373 if (isAlignedToMachineWord(source)) { | 373 if (isAlignedToMachineWord(source)) { |
| 374 while (source < alignedEnd) { | 374 while (source < alignedEnd) { |
| 375 MachineWord chunk = *reinterpret_cast_ptr<const MachineW
ord*>(source); | 375 MachineWord chunk = *reinterpret_cast_ptr<const MachineW
ord*>(source); |
| 376 if (!isAllASCII<LChar>(chunk)) | 376 if (!isAllASCII<LChar>(chunk)) |
| 377 break; | 377 break; |
| 378 copyASCIIMachineWord(destination16, source); | 378 copyASCIIMachineWord(destination16, source); |
| 379 source += sizeof(MachineWord); | 379 source += sizeof(MachineWord); |
| (...skipping 28 matching lines...) Expand all Loading... |
| 408 break; | 408 break; |
| 409 // Each error generates a replacement character and consumes one
byte. | 409 // Each error generates a replacement character and consumes one
byte. |
| 410 *destination16++ = replacementCharacter; | 410 *destination16++ = replacementCharacter; |
| 411 ++source; | 411 ++source; |
| 412 continue; | 412 continue; |
| 413 } | 413 } |
| 414 source += count; | 414 source += count; |
| 415 destination16 = appendCharacter(destination16, character); | 415 destination16 = appendCharacter(destination16, character); |
| 416 } | 416 } |
| 417 } while (flush && m_partialSequenceSize); | 417 } while (flush && m_partialSequenceSize); |
| 418 | 418 |
| 419 buffer16.shrink(destination16 - buffer16.characters()); | 419 buffer16.shrink(destination16 - buffer16.characters()); |
| 420 | 420 |
| 421 return String::adopt(buffer16); | 421 return String::adopt(buffer16); |
| 422 } | 422 } |
| 423 | 423 |
| 424 template<typename CharType> | 424 template<typename CharType> |
| 425 CString TextCodecUTF8::encodeCommon(const CharType* characters, size_t length) | 425 CString TextCodecUTF8::encodeCommon(const CharType* characters, size_t length) |
| 426 { | 426 { |
| 427 // The maximum number of UTF-8 bytes needed per UTF-16 code unit is 3. | 427 // The maximum number of UTF-8 bytes needed per UTF-16 code unit is 3. |
| 428 // BMP characters take only one UTF-16 code unit and can take up to 3 bytes
(3x). | 428 // BMP characters take only one UTF-16 code unit and can take up to 3 bytes
(3x). |
| 429 // Non-BMP characters take two UTF-16 code units and can take up to 4 bytes
(2x). | 429 // Non-BMP characters take two UTF-16 code units and can take up to 4 bytes
(2x). |
| 430 if (length > numeric_limits<size_t>::max() / 3) | 430 if (length > numeric_limits<size_t>::max() / 3) |
| (...skipping 15 matching lines...) Expand all Loading... |
| 446 { | 446 { |
| 447 return encodeCommon(characters, length); | 447 return encodeCommon(characters, length); |
| 448 } | 448 } |
| 449 | 449 |
| 450 CString TextCodecUTF8::encode(const LChar* characters, size_t length, Unencodabl
eHandling) | 450 CString TextCodecUTF8::encode(const LChar* characters, size_t length, Unencodabl
eHandling) |
| 451 { | 451 { |
| 452 return encodeCommon(characters, length); | 452 return encodeCommon(characters, length); |
| 453 } | 453 } |
| 454 | 454 |
| 455 } // namespace WTF | 455 } // namespace WTF |
| OLD | NEW |