third_party/WebKit/Source/platform/inspector_protocol/String16STL.cpp - Issue 2044343002: DevTools: update V8Inspector to work with the new v8_inspector API.

Side by Side Diff: third_party/WebKit/Source/platform/inspector_protocol/String16STL.cpp

Issue 2044343002: DevTools: update V8Inspector to work with the new v8_inspector API. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: Created 4 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

« no previous file with comments | « third_party/WebKit/Source/platform/inspector_protocol/String16STL.h ('k') | third_party/WebKit/Source/platform/inspector_protocol/TypeBuilder_h.template » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
1 // Copyright 2016 The Chromium Authors. All rights reserved.	1 // Copyright 2016 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "platform/inspector_protocol/String16STL.h"	5 #include "platform/inspector_protocol/String16STL.h"

6	6

7 #include <algorithm>	7 #include <algorithm>

8 #include <cctype>	8 #include <cctype>

	9 #include <cstdio>

9 #include <functional>	10 #include <functional>

10 #include <locale>	11 #include <locale>

11	12

	13 #define DCHECK(k)
	dgozman 2016/06/08 15:56:56 Not needed, as you have it in Platform.h Not needed, as you have it in Platform.h
	14

12 namespace blink {	15 namespace blink {

13 namespace protocol {	16 namespace protocol {

14	17

15 const UChar replacementCharacter = 0xFFFD;	18 const UChar replacementCharacter = 0xFFFD;

16	19

17 template<typename CharType> inline bool isASCII(CharType c)	20 template<typename CharType> inline bool isASCII(CharType c)

18 {	21 {

19 return !(c & ~0x7F);	22 return !(c & ~0x7F);

20 }	23 }

21	24

(...skipping 245 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
267 case 1:	270 case 1:

268 *--target = (char)(ch \| firstByteMark[bytesToWrite]);	271 *--target = (char)(ch \| firstByteMark[bytesToWrite]);

269 }	272 }

270 target += bytesToWrite;	273 target += bytesToWrite;

271 }	274 }

272 *sourceStart = source;	275 *sourceStart = source;

273 *targetStart = target;	276 *targetStart = target;

274 return result;	277 return result;

275 }	278 }

276	279

	280 /**

	281 * Is this code point a BMP code point (U+0000..U+ffff)?

	282 * @param c 32-bit code point

	283 * @return TRUE or FALSE

	284 * @stable ICU 2.8

	285 */

	286 #define U_IS_BMP(c) ((uint32_t)(c) <= 0xffff)

	287

	288 /**

	289 * Is this code point a supplementary code point (U+10000..U+10ffff)?

	290 * @param c 32-bit code point

	291 * @return TRUE or FALSE

	292 * @stable ICU 2.8

	293 */

	294 #define U_IS_SUPPLEMENTARY(c) ((uint32_t)((c) - 0x10000) <= 0xfffff)

	295

	296 /**

	297 * Is this code point a surrogate (U+d800..U+dfff)?

	298 * @param c 32-bit code point

	299 * @return TRUE or FALSE

	300 * @stable ICU 2.4

	301 */

	302 #define U_IS_SURROGATE(c) (((c) & 0xfffff800) == 0xd800)

	303

	304 /**

	305 * Get the lead surrogate (0xd800..0xdbff) for a

	306 * supplementary code point (0x10000..0x10ffff).

	307 * @param supplementary 32-bit code point (U+10000..U+10ffff)

	308 * @return lead surrogate (U+d800..U+dbff) for supplementary

	309 * @stable ICU 2.4

	310 */

	311 #define U16_LEAD(supplementary) (UChar)(((supplementary) >> 10) + 0xd7c0)

	312

	313 /**

	314 * Get the trail surrogate (0xdc00..0xdfff) for a

	315 * supplementary code point (0x10000..0x10ffff).

	316 * @param supplementary 32-bit code point (U+10000..U+10ffff)

	317 * @return trail surrogate (U+dc00..U+dfff) for supplementary

	318 * @stable ICU 2.4

	319 */

	320 #define U16_TRAIL(supplementary) (UChar)(((supplementary) & 0x3ff) \| 0xdc00)

	321

	322 // This must be called with the length pre-determined by the first byte.

	323 // If presented with a length > 4, this returns false. The Unicode

	324 // definition of UTF-8 goes up to 4-byte sequences.

	325 static bool isLegalUTF8(const unsigned char* source, int length)

	326 {

	327 unsigned char a;

	328 const unsigned char* srcptr = source + length;

	329 switch (length) {

	330 default:

	331 return false;

	332 // Everything else falls through when "true"...

	333 case 4:

	334 if ((a = (*--srcptr)) < 0x80 \|\| a > 0xBF)

	335 return false;

	336 case 3:

	337 if ((a = (*--srcptr)) < 0x80 \|\| a > 0xBF)

	338 return false;

	339 case 2:

	340 if ((a = (*--srcptr)) > 0xBF)

	341 return false;

	342

	343 // no fall-through in this inner switch

	344 switch (*source) {

	345 case 0xE0:

	346 if (a < 0xA0)

	347 return false;

	348 break;

	349 case 0xED:

	350 if (a > 0x9F)

	351 return false;

	352 break;

	353 case 0xF0:

	354 if (a < 0x90)

	355 return false;

	356 break;

	357 case 0xF4:

	358 if (a > 0x8F)

	359 return false;

	360 break;

	361 default:

	362 if (a < 0x80)

	363 return false;

	364 }

	365

	366 case 1:

	367 if (source >= 0x80 && source < 0xC2)

	368 return false;

	369 }

	370 if (*source > 0xF4)

	371 return false;

	372 return true;

	373 }

	374

	375 // Magic values subtracted from a buffer value during UTF8 conversion.

	376 // This table contains as many values as there might be trailing bytes

	377 // in a UTF-8 sequence.

	378 static const UChar32 offsetsFromUTF8[6] = { 0x00000000UL, 0x00003080UL, 0x000E20 80UL, 0x03C82080UL, static_cast<UChar32>(0xFA082080UL), static_cast<UChar32>(0x8 2082080UL) };

	379

	380 static inline UChar32 readUTF8Sequence(const char*& sequence, unsigned length)

	381 {

	382 UChar32 character = 0;

	383

	384 // The cases all fall through.

	385 switch (length) {

	386 case 6:

	387 character += static_cast<unsigned char>(*sequence++);

	388 character <<= 6;

	389 case 5:

	390 character += static_cast<unsigned char>(*sequence++);

	391 character <<= 6;

	392 case 4:

	393 character += static_cast<unsigned char>(*sequence++);

	394 character <<= 6;

	395 case 3:

	396 character += static_cast<unsigned char>(*sequence++);

	397 character <<= 6;

	398 case 2:

	399 character += static_cast<unsigned char>(*sequence++);

	400 character <<= 6;

	401 case 1:

	402 character += static_cast<unsigned char>(*sequence++);

	403 }

	404

	405 return character - offsetsFromUTF8[length - 1];

	406 }

	407

	408 ConversionResult convertUTF8ToUTF16(

	409 const char** sourceStart, const char* sourceEnd,

	410 UChar** targetStart, UChar* targetEnd, bool* sourceAllASCII, bool strict)

	411 {

	412 ConversionResult result = conversionOK;

	413 const char* source = *sourceStart;

	414 UChar* target = *targetStart;

	415 UChar orAllData = 0;

	416 while (source < sourceEnd) {

	417 int utf8SequenceLength = inlineUTF8SequenceLength(*source);

	418 if (sourceEnd - source < utf8SequenceLength) {

	419 result = sourceExhausted;

	420 break;

	421 }

	422 // Do this check whether lenient or strict

	423 if (!isLegalUTF8(reinterpret_cast<const unsigned char*>(source), utf8Seq uenceLength)) {

	424 result = sourceIllegal;

	425 break;

	426 }

	427

	428 UChar32 character = readUTF8Sequence(source, utf8SequenceLength);

	429

	430 if (target >= targetEnd) {

	431 source -= utf8SequenceLength; // Back up source pointer!

	432 result = targetExhausted;

	433 break;

	434 }

	435

	436 if (U_IS_BMP(character)) {

	437 // UTF-16 surrogate values are illegal in UTF-32

	438 if (U_IS_SURROGATE(character)) {

	439 if (strict) {

	440 source -= utf8SequenceLength; // return to the illegal value itself

	441 result = sourceIllegal;

	442 break;

	443 }

	444 *target++ = replacementCharacter;

	445 orAllData \|= replacementCharacter;

	446 } else {

	447 *target++ = static_cast<UChar>(character); // normal case

	448 orAllData \|= character;

	449 }

	450 } else if (U_IS_SUPPLEMENTARY(character)) {

	451 // target is a character in range 0xFFFF - 0x10FFFF

	452 if (target + 1 >= targetEnd) {

	453 source -= utf8SequenceLength; // Back up source pointer!

	454 result = targetExhausted;

	455 break;

	456 }

	457 *target++ = U16_LEAD(character);

	458 *target++ = U16_TRAIL(character);

	459 orAllData = 0xffff;

	460 } else {

	461 if (strict) {

	462 source -= utf8SequenceLength; // return to the start

	463 result = sourceIllegal;

	464 break; // Bail out; shouldn't continue

	465 } else {

	466 *target++ = replacementCharacter;

	467 orAllData \|= replacementCharacter;

	468 }

	469 }

	470 }

	471 *sourceStart = source;

	472 *targetStart = target;

	473

	474 if (sourceAllASCII)

	475 *sourceAllASCII = !(orAllData & ~0x7f);

	476

	477 return result;

	478 }

	479

277 // Helper to write a three-byte UTF-8 code point to the buffer, caller must chec k room is available.	480 // Helper to write a three-byte UTF-8 code point to the buffer, caller must chec k room is available.

278 static inline void putUTF8Triple(char*& buffer, UChar ch)	481 static inline void putUTF8Triple(char*& buffer, UChar ch)

279 {	482 {

280 DCHECK_GE(ch, 0x0800);	483 DCHECK_GE(ch, 0x0800);

281 *buffer++ = static_cast<char>(((ch >> 12) & 0x0F) \| 0xE0);	484 *buffer++ = static_cast<char>(((ch >> 12) & 0x0F) \| 0xE0);

282 *buffer++ = static_cast<char>(((ch >> 6) & 0x3F) \| 0x80);	485 *buffer++ = static_cast<char>(((ch >> 6) & 0x3F) \| 0x80);

283 *buffer++ = static_cast<char>((ch & 0x3F) \| 0x80);	486 *buffer++ = static_cast<char>((ch & 0x3F) \| 0x80);

284 }	487 }

285	488

	489 String16 String16::fromUTF8(const char* stringStart, size_t length)

	490 {

	491 if (!stringStart \|\| !length)

	492 return String16();

	493

	494 std::vector<UChar> buffer(length);

	495 UChar* bufferStart = buffer.data();

	496

	497 UChar* bufferCurrent = bufferStart;

	498 const char* stringCurrent = stringStart;

	499 if (convertUTF8ToUTF16(&stringCurrent, stringStart + length, &bufferCurrent, bufferCurrent + buffer.size(), 0, true) != conversionOK)

	500 return String16();

	501

	502 unsigned utf16Length = bufferCurrent - bufferStart;

	503 return String16(bufferStart, utf16Length);

	504 }

	505

286 // trim from start	506 // trim from start

287 static inline wstring &ltrim(wstring &s)	507 static inline wstring &ltrim(wstring &s)

288 {	508 {

289 s.erase(s.begin(), std::find_if(s.begin(), s.end(), std::not1(std::ptr_fun<i nt, int>(std::isspace))));	509 s.erase(s.begin(), std::find_if(s.begin(), s.end(), std::not1(std::ptr_fun<i nt, int>(std::isspace))));

290 return s;	510 return s;

291 }	511 }

292	512

293 // trim from end	513 // trim from end

294 static inline wstring &rtrim(wstring &s)	514 static inline wstring &rtrim(wstring &s)

295 {	515 {

296 s.erase(std::find_if(s.rbegin(), s.rend(), std::not1(std::ptr_fun<int, int>( std::isspace))).base(), s.end());	516 s.erase(std::find_if(s.rbegin(), s.rend(), std::not1(std::ptr_fun<int, int>( std::isspace))).base(), s.end());

297 return s;	517 return s;

298 }	518 }

299	519

300 // trim from both ends	520 // trim from both ends

301 static inline wstring &trim(wstring &s)	521 static inline wstring &trim(wstring &s)

302 {	522 {

303 return ltrim(rtrim(s));	523 return ltrim(rtrim(s));

304 }	524 }

305	525

	526 // static

	527 std::string String16::intToString(int i)

	528 {

	529 char buffer[50];

	530 std::sprintf(buffer, "%d", i);

	531 return std::string(buffer);

	532 }

	533

	534 // static

	535 std::string String16::doubleToString(double d)

	536 {

	537 char buffer[100];

	538 std::sprintf(buffer, "%f", d);

	539 return std::string(buffer);

	540 }

	541

306 std::string String16::utf8() const	542 std::string String16::utf8() const

307 {	543 {

308 unsigned length = this->length();	544 unsigned length = this->length();

309	545

310 if (!length)	546 if (!length)

311 return std::string("");	547 return std::string("");

312	548

313 // Allocate a buffer big enough to hold all the characters	549 // Allocate a buffer big enough to hold all the characters

314 // (an individual UTF-16 UChar can only expand to 3 UTF-8 bytes).	550 // (an individual UTF-16 UChar can only expand to 3 UTF-8 bytes).

315 // Optimization ideas, if we find this function is hot:	551 // Optimization ideas, if we find this function is hot:

(...skipping 41 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
357	593

358 String16 String16::stripWhiteSpace() const	594 String16 String16::stripWhiteSpace() const

359 {	595 {

360 wstring result(m_impl);	596 wstring result(m_impl);

361 trim(result);	597 trim(result);

362 return result;	598 return result;

363 }	599 }

364	600

365 } // namespace protocol	601 } // namespace protocol

366 } // namespace blink	602 } // namespace blink

OLD	NEW