Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(101)

Side by Side Diff: third_party/WebKit/Source/wtf/text/TextCodecUTF8.cpp

Issue 2373983006: reflow comments in wtf/text (Closed)
Patch Set: Created 4 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 /* 1 /*
2 * Copyright (C) 2004, 2006, 2008, 2011 Apple Inc. All rights reserved. 2 * Copyright (C) 2004, 2006, 2008, 2011 Apple Inc. All rights reserved.
3 * 3 *
4 * Redistribution and use in source and binary forms, with or without 4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions 5 * modification, are permitted provided that the following conditions
6 * are met: 6 * are met:
7 * 1. Redistributions of source code must retain the above copyright 7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer. 8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright 9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the 10 * notice, this list of conditions and the following disclaimer in the
(...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after
48 48
49 // Additional aliases that originally were present in the encoding 49 // Additional aliases that originally were present in the encoding
50 // table in WebKit on Macintosh, and subsequently added by 50 // table in WebKit on Macintosh, and subsequently added by
51 // TextCodecICU. Perhaps we can prove some are not used on the web 51 // TextCodecICU. Perhaps we can prove some are not used on the web
52 // and remove them. 52 // and remove them.
53 registrar("unicode11utf8", "UTF-8"); 53 registrar("unicode11utf8", "UTF-8");
54 registrar("unicode20utf8", "UTF-8"); 54 registrar("unicode20utf8", "UTF-8");
55 registrar("utf8", "UTF-8"); 55 registrar("utf8", "UTF-8");
56 registrar("x-unicode20utf8", "UTF-8"); 56 registrar("x-unicode20utf8", "UTF-8");
57 57
58 // Additional aliases present in the WHATWG Encoding Standard (http://encoding .spec.whatwg.org/) 58 // Additional aliases present in the WHATWG Encoding Standard
59 // (http://encoding.spec.whatwg.org/)
59 // and Firefox (24), but not in ICU 4.6. 60 // and Firefox (24), but not in ICU 4.6.
60 registrar("unicode-1-1-utf-8", "UTF-8"); 61 registrar("unicode-1-1-utf-8", "UTF-8");
61 } 62 }
62 63
63 void TextCodecUTF8::registerCodecs(TextCodecRegistrar registrar) { 64 void TextCodecUTF8::registerCodecs(TextCodecRegistrar registrar) {
64 registrar("UTF-8", create, 0); 65 registrar("UTF-8", create, 0);
65 } 66 }
66 67
67 static inline int nonASCIISequenceLength(uint8_t firstByte) { 68 static inline int nonASCIISequenceLength(uint8_t firstByte) {
68 static const uint8_t lengths[256] = { 69 static const uint8_t lengths[256] = {
(...skipping 115 matching lines...) Expand 10 before | Expand all | Expand 10 after
184 if (count > m_partialSequenceSize) { 185 if (count > m_partialSequenceSize) {
185 if (count - m_partialSequenceSize > end - source) { 186 if (count - m_partialSequenceSize > end - source) {
186 if (!flush) { 187 if (!flush) {
187 // The new data is not enough to complete the sequence, so 188 // The new data is not enough to complete the sequence, so
188 // add it to the existing partial sequence. 189 // add it to the existing partial sequence.
189 memcpy(m_partialSequence + m_partialSequenceSize, source, 190 memcpy(m_partialSequence + m_partialSequenceSize, source,
190 end - source); 191 end - source);
191 m_partialSequenceSize += end - source; 192 m_partialSequenceSize += end - source;
192 return false; 193 return false;
193 } 194 }
194 // An incomplete partial sequence at the end is an error, but it will cr eate 195 // An incomplete partial sequence at the end is an error, but it will
195 // a 16 bit string due to the replacementCharacter. Let the 16 bit path handle 196 // create a 16 bit string due to the replacementCharacter. Let the 16
196 // the error. 197 // bit path handle the error.
197 return true; 198 return true;
198 } 199 }
199 memcpy(m_partialSequence + m_partialSequenceSize, source, 200 memcpy(m_partialSequence + m_partialSequenceSize, source,
200 count - m_partialSequenceSize); 201 count - m_partialSequenceSize);
201 source += count - m_partialSequenceSize; 202 source += count - m_partialSequenceSize;
202 m_partialSequenceSize = count; 203 m_partialSequenceSize = count;
203 } 204 }
204 int character = decodeNonASCIISequence(m_partialSequence, count); 205 int character = decodeNonASCIISequence(m_partialSequence, count);
205 if (character & ~0xff) 206 if (character & ~0xff)
206 return true; 207 return true;
(...skipping 72 matching lines...) Expand 10 before | Expand all | Expand 10 after
279 // each byte in an invalid sequence will turn into a replacement character. 280 // each byte in an invalid sequence will turn into a replacement character.
280 StringBuffer<LChar> buffer(m_partialSequenceSize + length); 281 StringBuffer<LChar> buffer(m_partialSequenceSize + length);
281 282
282 const uint8_t* source = reinterpret_cast<const uint8_t*>(bytes); 283 const uint8_t* source = reinterpret_cast<const uint8_t*>(bytes);
283 const uint8_t* end = source + length; 284 const uint8_t* end = source + length;
284 const uint8_t* alignedEnd = alignToMachineWord(end); 285 const uint8_t* alignedEnd = alignToMachineWord(end);
285 LChar* destination = buffer.characters(); 286 LChar* destination = buffer.characters();
286 287
287 do { 288 do {
288 if (m_partialSequenceSize) { 289 if (m_partialSequenceSize) {
289 // Explicitly copy destination and source pointers to avoid taking pointer s to the 290 // Explicitly copy destination and source pointers to avoid taking
290 // local variables, which may harm code generation by disabling some optim izations 291 // pointers to the local variables, which may harm code generation by
291 // in some compilers. 292 // disabling some optimizations in some compilers.
292 LChar* destinationForHandlePartialSequence = destination; 293 LChar* destinationForHandlePartialSequence = destination;
293 const uint8_t* sourceForHandlePartialSequence = source; 294 const uint8_t* sourceForHandlePartialSequence = source;
294 if (handlePartialSequence(destinationForHandlePartialSequence, 295 if (handlePartialSequence(destinationForHandlePartialSequence,
295 sourceForHandlePartialSequence, end, flush, 296 sourceForHandlePartialSequence, end, flush,
296 stopOnError, sawError)) { 297 stopOnError, sawError)) {
297 source = sourceForHandlePartialSequence; 298 source = sourceForHandlePartialSequence;
298 goto upConvertTo16Bit; 299 goto upConvertTo16Bit;
299 } 300 }
300 destination = destinationForHandlePartialSequence; 301 destination = destinationForHandlePartialSequence;
301 source = sourceForHandlePartialSequence; 302 source = sourceForHandlePartialSequence;
(...skipping 61 matching lines...) Expand 10 before | Expand all | Expand 10 after
363 StringBuffer<UChar> buffer16(m_partialSequenceSize + length); 364 StringBuffer<UChar> buffer16(m_partialSequenceSize + length);
364 365
365 UChar* destination16 = buffer16.characters(); 366 UChar* destination16 = buffer16.characters();
366 367
367 // Copy the already converted characters 368 // Copy the already converted characters
368 for (LChar* converted8 = buffer.characters(); converted8 < destination;) 369 for (LChar* converted8 = buffer.characters(); converted8 < destination;)
369 *destination16++ = *converted8++; 370 *destination16++ = *converted8++;
370 371
371 do { 372 do {
372 if (m_partialSequenceSize) { 373 if (m_partialSequenceSize) {
373 // Explicitly copy destination and source pointers to avoid taking pointer s to the 374 // Explicitly copy destination and source pointers to avoid taking
374 // local variables, which may harm code generation by disabling some optim izations 375 // pointers to the local variables, which may harm code generation by
375 // in some compilers. 376 // disabling some optimizations in some compilers.
376 UChar* destinationForHandlePartialSequence = destination16; 377 UChar* destinationForHandlePartialSequence = destination16;
377 const uint8_t* sourceForHandlePartialSequence = source; 378 const uint8_t* sourceForHandlePartialSequence = source;
378 handlePartialSequence(destinationForHandlePartialSequence, 379 handlePartialSequence(destinationForHandlePartialSequence,
379 sourceForHandlePartialSequence, end, flush, 380 sourceForHandlePartialSequence, end, flush,
380 stopOnError, sawError); 381 stopOnError, sawError);
381 destination16 = destinationForHandlePartialSequence; 382 destination16 = destinationForHandlePartialSequence;
382 source = sourceForHandlePartialSequence; 383 source = sourceForHandlePartialSequence;
383 if (m_partialSequenceSize) 384 if (m_partialSequenceSize)
384 break; 385 break;
385 } 386 }
(...skipping 50 matching lines...) Expand 10 before | Expand all | Expand 10 after
436 } while (flush && m_partialSequenceSize); 437 } while (flush && m_partialSequenceSize);
437 438
438 buffer16.shrink(destination16 - buffer16.characters()); 439 buffer16.shrink(destination16 - buffer16.characters());
439 440
440 return String::adopt(buffer16); 441 return String::adopt(buffer16);
441 } 442 }
442 443
443 template <typename CharType> 444 template <typename CharType>
444 CString TextCodecUTF8::encodeCommon(const CharType* characters, size_t length) { 445 CString TextCodecUTF8::encodeCommon(const CharType* characters, size_t length) {
445 // The maximum number of UTF-8 bytes needed per UTF-16 code unit is 3. 446 // The maximum number of UTF-8 bytes needed per UTF-16 code unit is 3.
446 // BMP characters take only one UTF-16 code unit and can take up to 3 bytes (3 x). 447 // BMP characters take only one UTF-16 code unit and can take up to 3 bytes
447 // Non-BMP characters take two UTF-16 code units and can take up to 4 bytes (2 x). 448 // (3x).
449 // Non-BMP characters take two UTF-16 code units and can take up to 4 bytes
450 // (2x).
448 if (length > std::numeric_limits<size_t>::max() / 3) 451 if (length > std::numeric_limits<size_t>::max() / 3)
449 CRASH(); 452 CRASH();
450 Vector<uint8_t> bytes(length * 3); 453 Vector<uint8_t> bytes(length * 3);
451 454
452 size_t i = 0; 455 size_t i = 0;
453 size_t bytesWritten = 0; 456 size_t bytesWritten = 0;
454 while (i < length) { 457 while (i < length) {
455 UChar32 character; 458 UChar32 character;
456 U16_NEXT(characters, i, length, character); 459 U16_NEXT(characters, i, length, character);
457 // U16_NEXT will simply emit a surrogate code point if an unmatched surrogat e 460 // U16_NEXT will simply emit a surrogate code point if an unmatched
458 // is encountered; we must convert it to a U+FFFD (REPLACEMENT CHARACTER) he re. 461 // surrogate is encountered; we must convert it to a
462 // U+FFFD (REPLACEMENT CHARACTER) here.
459 if (0xD800 <= character && character <= 0xDFFF) 463 if (0xD800 <= character && character <= 0xDFFF)
460 character = replacementCharacter; 464 character = replacementCharacter;
461 U8_APPEND_UNSAFE(bytes.data(), bytesWritten, character); 465 U8_APPEND_UNSAFE(bytes.data(), bytesWritten, character);
462 } 466 }
463 467
464 return CString(reinterpret_cast<char*>(bytes.data()), bytesWritten); 468 return CString(reinterpret_cast<char*>(bytes.data()), bytesWritten);
465 } 469 }
466 470
467 CString TextCodecUTF8::encode(const UChar* characters, 471 CString TextCodecUTF8::encode(const UChar* characters,
468 size_t length, 472 size_t length,
469 UnencodableHandling) { 473 UnencodableHandling) {
470 return encodeCommon(characters, length); 474 return encodeCommon(characters, length);
471 } 475 }
472 476
473 CString TextCodecUTF8::encode(const LChar* characters, 477 CString TextCodecUTF8::encode(const LChar* characters,
474 size_t length, 478 size_t length,
475 UnencodableHandling) { 479 UnencodableHandling) {
476 return encodeCommon(characters, length); 480 return encodeCommon(characters, length);
477 } 481 }
478 482
479 } // namespace WTF 483 } // namespace WTF
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698