third_party/WebKit/Source/wtf/text/TextCodecUTF8.cpp - Issue 2373983006: reflow comments in wtf/text

Side by Side Diff: third_party/WebKit/Source/wtf/text/TextCodecUTF8.cpp

Issue 2373983006: reflow comments in wtf/text (Closed)

Patch Set: Created 4 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

« third_party/WebKit/Source/wtf/text/TextCodecUTF16.cpp ('K') | « third_party/WebKit/Source/wtf/text/TextCodecUTF16.cpp ('k') | third_party/WebKit/Source/wtf/text/TextCodecUserDefined.cpp » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
1 /*	1 /*

2 * Copyright (C) 2004, 2006, 2008, 2011 Apple Inc. All rights reserved.	2 * Copyright (C) 2004, 2006, 2008, 2011 Apple Inc. All rights reserved.

3 *	3 *

4 * Redistribution and use in source and binary forms, with or without	4 * Redistribution and use in source and binary forms, with or without

5 * modification, are permitted provided that the following conditions	5 * modification, are permitted provided that the following conditions

6 * are met:	6 * are met:

7 * 1. Redistributions of source code must retain the above copyright	7 * 1. Redistributions of source code must retain the above copyright

8 * notice, this list of conditions and the following disclaimer.	8 * notice, this list of conditions and the following disclaimer.

9 * 2. Redistributions in binary form must reproduce the above copyright	9 * 2. Redistributions in binary form must reproduce the above copyright

10 * notice, this list of conditions and the following disclaimer in the	10 * notice, this list of conditions and the following disclaimer in the

(...skipping 37 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
48	48

49 // Additional aliases that originally were present in the encoding	49 // Additional aliases that originally were present in the encoding

50 // table in WebKit on Macintosh, and subsequently added by	50 // table in WebKit on Macintosh, and subsequently added by

51 // TextCodecICU. Perhaps we can prove some are not used on the web	51 // TextCodecICU. Perhaps we can prove some are not used on the web

52 // and remove them.	52 // and remove them.

53 registrar("unicode11utf8", "UTF-8");	53 registrar("unicode11utf8", "UTF-8");

54 registrar("unicode20utf8", "UTF-8");	54 registrar("unicode20utf8", "UTF-8");

55 registrar("utf8", "UTF-8");	55 registrar("utf8", "UTF-8");

56 registrar("x-unicode20utf8", "UTF-8");	56 registrar("x-unicode20utf8", "UTF-8");

57	57

58 // Additional aliases present in the WHATWG Encoding Standard (http://encoding .spec.whatwg.org/)	58 // Additional aliases present in the WHATWG Encoding Standard

	59 // (http://encoding.spec.whatwg.org/)

59 // and Firefox (24), but not in ICU 4.6.	60 // and Firefox (24), but not in ICU 4.6.

60 registrar("unicode-1-1-utf-8", "UTF-8");	61 registrar("unicode-1-1-utf-8", "UTF-8");

61 }	62 }

62	63

63 void TextCodecUTF8::registerCodecs(TextCodecRegistrar registrar) {	64 void TextCodecUTF8::registerCodecs(TextCodecRegistrar registrar) {

64 registrar("UTF-8", create, 0);	65 registrar("UTF-8", create, 0);

65 }	66 }

66	67

67 static inline int nonASCIISequenceLength(uint8_t firstByte) {	68 static inline int nonASCIISequenceLength(uint8_t firstByte) {

68 static const uint8_t lengths[256] = {	69 static const uint8_t lengths[256] = {

(...skipping 115 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
184 if (count > m_partialSequenceSize) {	185 if (count > m_partialSequenceSize) {

185 if (count - m_partialSequenceSize > end - source) {	186 if (count - m_partialSequenceSize > end - source) {

186 if (!flush) {	187 if (!flush) {

187 // The new data is not enough to complete the sequence, so	188 // The new data is not enough to complete the sequence, so

188 // add it to the existing partial sequence.	189 // add it to the existing partial sequence.

189 memcpy(m_partialSequence + m_partialSequenceSize, source,	190 memcpy(m_partialSequence + m_partialSequenceSize, source,

190 end - source);	191 end - source);

191 m_partialSequenceSize += end - source;	192 m_partialSequenceSize += end - source;

192 return false;	193 return false;

193 }	194 }

194 // An incomplete partial sequence at the end is an error, but it will cr eate	195 // An incomplete partial sequence at the end is an error, but it will

195 // a 16 bit string due to the replacementCharacter. Let the 16 bit path handle	196 // create a 16 bit string due to the replacementCharacter. Let the 16

196 // the error.	197 // bit path handle the error.

197 return true;	198 return true;

198 }	199 }

199 memcpy(m_partialSequence + m_partialSequenceSize, source,	200 memcpy(m_partialSequence + m_partialSequenceSize, source,

200 count - m_partialSequenceSize);	201 count - m_partialSequenceSize);

201 source += count - m_partialSequenceSize;	202 source += count - m_partialSequenceSize;

202 m_partialSequenceSize = count;	203 m_partialSequenceSize = count;

203 }	204 }

204 int character = decodeNonASCIISequence(m_partialSequence, count);	205 int character = decodeNonASCIISequence(m_partialSequence, count);

205 if (character & ~0xff)	206 if (character & ~0xff)

206 return true;	207 return true;

(...skipping 72 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
279 // each byte in an invalid sequence will turn into a replacement character.	280 // each byte in an invalid sequence will turn into a replacement character.

280 StringBuffer<LChar> buffer(m_partialSequenceSize + length);	281 StringBuffer<LChar> buffer(m_partialSequenceSize + length);

281	282

282 const uint8_t* source = reinterpret_cast<const uint8_t*>(bytes);	283 const uint8_t* source = reinterpret_cast<const uint8_t*>(bytes);

283 const uint8_t* end = source + length;	284 const uint8_t* end = source + length;

284 const uint8_t* alignedEnd = alignToMachineWord(end);	285 const uint8_t* alignedEnd = alignToMachineWord(end);

285 LChar* destination = buffer.characters();	286 LChar* destination = buffer.characters();

286	287

287 do {	288 do {

288 if (m_partialSequenceSize) {	289 if (m_partialSequenceSize) {

289 // Explicitly copy destination and source pointers to avoid taking pointer s to the	290 // Explicitly copy destination and source pointers to avoid taking

290 // local variables, which may harm code generation by disabling some optim izations	291 // pointers to the local variables, which may harm code generation by

291 // in some compilers.	292 // disabling some optimizations in some compilers.

292 LChar* destinationForHandlePartialSequence = destination;	293 LChar* destinationForHandlePartialSequence = destination;

293 const uint8_t* sourceForHandlePartialSequence = source;	294 const uint8_t* sourceForHandlePartialSequence = source;

294 if (handlePartialSequence(destinationForHandlePartialSequence,	295 if (handlePartialSequence(destinationForHandlePartialSequence,

295 sourceForHandlePartialSequence, end, flush,	296 sourceForHandlePartialSequence, end, flush,

296 stopOnError, sawError)) {	297 stopOnError, sawError)) {

297 source = sourceForHandlePartialSequence;	298 source = sourceForHandlePartialSequence;

298 goto upConvertTo16Bit;	299 goto upConvertTo16Bit;

299 }	300 }

300 destination = destinationForHandlePartialSequence;	301 destination = destinationForHandlePartialSequence;

301 source = sourceForHandlePartialSequence;	302 source = sourceForHandlePartialSequence;

(...skipping 61 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
363 StringBuffer<UChar> buffer16(m_partialSequenceSize + length);	364 StringBuffer<UChar> buffer16(m_partialSequenceSize + length);

364	365

365 UChar* destination16 = buffer16.characters();	366 UChar* destination16 = buffer16.characters();

366	367

367 // Copy the already converted characters	368 // Copy the already converted characters

368 for (LChar* converted8 = buffer.characters(); converted8 < destination;)	369 for (LChar* converted8 = buffer.characters(); converted8 < destination;)

369 destination16++ = converted8++;	370 destination16++ = converted8++;

370	371

371 do {	372 do {

372 if (m_partialSequenceSize) {	373 if (m_partialSequenceSize) {

373 // Explicitly copy destination and source pointers to avoid taking pointer s to the	374 // Explicitly copy destination and source pointers to avoid taking

374 // local variables, which may harm code generation by disabling some optim izations	375 // pointers to the local variables, which may harm code generation by

375 // in some compilers.	376 // disabling some optimizations in some compilers.

376 UChar* destinationForHandlePartialSequence = destination16;	377 UChar* destinationForHandlePartialSequence = destination16;

377 const uint8_t* sourceForHandlePartialSequence = source;	378 const uint8_t* sourceForHandlePartialSequence = source;

378 handlePartialSequence(destinationForHandlePartialSequence,	379 handlePartialSequence(destinationForHandlePartialSequence,

379 sourceForHandlePartialSequence, end, flush,	380 sourceForHandlePartialSequence, end, flush,

380 stopOnError, sawError);	381 stopOnError, sawError);

381 destination16 = destinationForHandlePartialSequence;	382 destination16 = destinationForHandlePartialSequence;

382 source = sourceForHandlePartialSequence;	383 source = sourceForHandlePartialSequence;

383 if (m_partialSequenceSize)	384 if (m_partialSequenceSize)

384 break;	385 break;

385 }	386 }

(...skipping 50 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
436 } while (flush && m_partialSequenceSize);	437 } while (flush && m_partialSequenceSize);

437	438

438 buffer16.shrink(destination16 - buffer16.characters());	439 buffer16.shrink(destination16 - buffer16.characters());

439	440

440 return String::adopt(buffer16);	441 return String::adopt(buffer16);

441 }	442 }

442	443

443 template <typename CharType>	444 template <typename CharType>

444 CString TextCodecUTF8::encodeCommon(const CharType* characters, size_t length) {	445 CString TextCodecUTF8::encodeCommon(const CharType* characters, size_t length) {

445 // The maximum number of UTF-8 bytes needed per UTF-16 code unit is 3.	446 // The maximum number of UTF-8 bytes needed per UTF-16 code unit is 3.

446 // BMP characters take only one UTF-16 code unit and can take up to 3 bytes (3 x).	447 // BMP characters take only one UTF-16 code unit and can take up to 3 bytes

447 // Non-BMP characters take two UTF-16 code units and can take up to 4 bytes (2 x).	448 // (3x).

	449 // Non-BMP characters take two UTF-16 code units and can take up to 4 bytes

	450 // (2x).

448 if (length > std::numeric_limits<size_t>::max() / 3)	451 if (length > std::numeric_limits<size_t>::max() / 3)

449 CRASH();	452 CRASH();

450 Vector<uint8_t> bytes(length * 3);	453 Vector<uint8_t> bytes(length * 3);

451	454

452 size_t i = 0;	455 size_t i = 0;

453 size_t bytesWritten = 0;	456 size_t bytesWritten = 0;

454 while (i < length) {	457 while (i < length) {

455 UChar32 character;	458 UChar32 character;

456 U16_NEXT(characters, i, length, character);	459 U16_NEXT(characters, i, length, character);

457 // U16_NEXT will simply emit a surrogate code point if an unmatched surrogat e	460 // U16_NEXT will simply emit a surrogate code point if an unmatched

458 // is encountered; we must convert it to a U+FFFD (REPLACEMENT CHARACTER) he re.	461 // surrogate is encountered; we must convert it to a

	462 // U+FFFD (REPLACEMENT CHARACTER) here.

459 if (0xD800 <= character && character <= 0xDFFF)	463 if (0xD800 <= character && character <= 0xDFFF)

460 character = replacementCharacter;	464 character = replacementCharacter;

461 U8_APPEND_UNSAFE(bytes.data(), bytesWritten, character);	465 U8_APPEND_UNSAFE(bytes.data(), bytesWritten, character);

462 }	466 }

463	467

464 return CString(reinterpret_cast<char*>(bytes.data()), bytesWritten);	468 return CString(reinterpret_cast<char*>(bytes.data()), bytesWritten);

465 }	469 }

466	470

467 CString TextCodecUTF8::encode(const UChar* characters,	471 CString TextCodecUTF8::encode(const UChar* characters,

468 size_t length,	472 size_t length,

469 UnencodableHandling) {	473 UnencodableHandling) {

470 return encodeCommon(characters, length);	474 return encodeCommon(characters, length);

471 }	475 }

472	476

473 CString TextCodecUTF8::encode(const LChar* characters,	477 CString TextCodecUTF8::encode(const LChar* characters,

474 size_t length,	478 size_t length,

475 UnencodableHandling) {	479 UnencodableHandling) {

476 return encodeCommon(characters, length);	480 return encodeCommon(characters, length);

477 }	481 }

478	482

479 } // namespace WTF	483 } // namespace WTF

OLD	NEW