| OLD | NEW |
| 1 // Copyright 2016 The Chromium Authors. All rights reserved. | 1 // Copyright 2016 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "platform/inspector_protocol/String16STL.h" | 5 #include "platform/inspector_protocol/String16STL.h" |
| 6 | 6 |
| 7 #include "platform/inspector_protocol/Platform.h" |
| 8 |
| 7 #include <algorithm> | 9 #include <algorithm> |
| 8 #include <cctype> | 10 #include <cctype> |
| 11 #include <cstdio> |
| 9 #include <functional> | 12 #include <functional> |
| 10 #include <locale> | 13 #include <locale> |
| 11 | 14 |
| 12 namespace blink { | 15 namespace blink { |
| 13 namespace protocol { | 16 namespace protocol { |
| 14 | 17 |
| 15 const UChar replacementCharacter = 0xFFFD; | 18 const UChar replacementCharacter = 0xFFFD; |
| 16 | 19 |
| 17 template<typename CharType> inline bool isASCII(CharType c) | 20 template<typename CharType> inline bool isASCII(CharType c) |
| 18 { | 21 { |
| (...skipping 248 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 267 case 1: | 270 case 1: |
| 268 *--target = (char)(ch | firstByteMark[bytesToWrite]); | 271 *--target = (char)(ch | firstByteMark[bytesToWrite]); |
| 269 } | 272 } |
| 270 target += bytesToWrite; | 273 target += bytesToWrite; |
| 271 } | 274 } |
| 272 *sourceStart = source; | 275 *sourceStart = source; |
| 273 *targetStart = target; | 276 *targetStart = target; |
| 274 return result; | 277 return result; |
| 275 } | 278 } |
| 276 | 279 |
| 280 /** |
| 281 * Is this code point a BMP code point (U+0000..U+ffff)? |
| 282 * @param c 32-bit code point |
| 283 * @return TRUE or FALSE |
| 284 * @stable ICU 2.8 |
| 285 */ |
| 286 #define U_IS_BMP(c) ((uint32_t)(c) <= 0xffff) |
| 287 |
| 288 /** |
| 289 * Is this code point a supplementary code point (U+10000..U+10ffff)? |
| 290 * @param c 32-bit code point |
| 291 * @return TRUE or FALSE |
| 292 * @stable ICU 2.8 |
| 293 */ |
| 294 #define U_IS_SUPPLEMENTARY(c) ((uint32_t)((c) - 0x10000) <= 0xfffff) |
| 295 |
| 296 /** |
| 297 * Is this code point a surrogate (U+d800..U+dfff)? |
| 298 * @param c 32-bit code point |
| 299 * @return TRUE or FALSE |
| 300 * @stable ICU 2.4 |
| 301 */ |
| 302 #define U_IS_SURROGATE(c) (((c) & 0xfffff800) == 0xd800) |
| 303 |
| 304 /** |
| 305 * Get the lead surrogate (0xd800..0xdbff) for a |
| 306 * supplementary code point (0x10000..0x10ffff). |
| 307 * @param supplementary 32-bit code point (U+10000..U+10ffff) |
| 308 * @return lead surrogate (U+d800..U+dbff) for supplementary |
| 309 * @stable ICU 2.4 |
| 310 */ |
| 311 #define U16_LEAD(supplementary) (UChar)(((supplementary) >> 10) + 0xd7c0) |
| 312 |
| 313 /** |
| 314 * Get the trail surrogate (0xdc00..0xdfff) for a |
| 315 * supplementary code point (0x10000..0x10ffff). |
| 316 * @param supplementary 32-bit code point (U+10000..U+10ffff) |
| 317 * @return trail surrogate (U+dc00..U+dfff) for supplementary |
| 318 * @stable ICU 2.4 |
| 319 */ |
| 320 #define U16_TRAIL(supplementary) (UChar)(((supplementary) & 0x3ff) | 0xdc00) |
| 321 |
| 322 // This must be called with the length pre-determined by the first byte. |
| 323 // If presented with a length > 4, this returns false. The Unicode |
| 324 // definition of UTF-8 goes up to 4-byte sequences. |
| 325 static bool isLegalUTF8(const unsigned char* source, int length) |
| 326 { |
| 327 unsigned char a; |
| 328 const unsigned char* srcptr = source + length; |
| 329 switch (length) { |
| 330 default: |
| 331 return false; |
| 332 // Everything else falls through when "true"... |
| 333 case 4: |
| 334 if ((a = (*--srcptr)) < 0x80 || a > 0xBF) |
| 335 return false; |
| 336 case 3: |
| 337 if ((a = (*--srcptr)) < 0x80 || a > 0xBF) |
| 338 return false; |
| 339 case 2: |
| 340 if ((a = (*--srcptr)) > 0xBF) |
| 341 return false; |
| 342 |
| 343 // no fall-through in this inner switch |
| 344 switch (*source) { |
| 345 case 0xE0: |
| 346 if (a < 0xA0) |
| 347 return false; |
| 348 break; |
| 349 case 0xED: |
| 350 if (a > 0x9F) |
| 351 return false; |
| 352 break; |
| 353 case 0xF0: |
| 354 if (a < 0x90) |
| 355 return false; |
| 356 break; |
| 357 case 0xF4: |
| 358 if (a > 0x8F) |
| 359 return false; |
| 360 break; |
| 361 default: |
| 362 if (a < 0x80) |
| 363 return false; |
| 364 } |
| 365 |
| 366 case 1: |
| 367 if (*source >= 0x80 && *source < 0xC2) |
| 368 return false; |
| 369 } |
| 370 if (*source > 0xF4) |
| 371 return false; |
| 372 return true; |
| 373 } |
| 374 |
| 375 // Magic values subtracted from a buffer value during UTF8 conversion. |
| 376 // This table contains as many values as there might be trailing bytes |
| 377 // in a UTF-8 sequence. |
| 378 static const UChar32 offsetsFromUTF8[6] = { 0x00000000UL, 0x00003080UL, 0x000E20
80UL, 0x03C82080UL, static_cast<UChar32>(0xFA082080UL), static_cast<UChar32>(0x8
2082080UL) }; |
| 379 |
| 380 static inline UChar32 readUTF8Sequence(const char*& sequence, unsigned length) |
| 381 { |
| 382 UChar32 character = 0; |
| 383 |
| 384 // The cases all fall through. |
| 385 switch (length) { |
| 386 case 6: |
| 387 character += static_cast<unsigned char>(*sequence++); |
| 388 character <<= 6; |
| 389 case 5: |
| 390 character += static_cast<unsigned char>(*sequence++); |
| 391 character <<= 6; |
| 392 case 4: |
| 393 character += static_cast<unsigned char>(*sequence++); |
| 394 character <<= 6; |
| 395 case 3: |
| 396 character += static_cast<unsigned char>(*sequence++); |
| 397 character <<= 6; |
| 398 case 2: |
| 399 character += static_cast<unsigned char>(*sequence++); |
| 400 character <<= 6; |
| 401 case 1: |
| 402 character += static_cast<unsigned char>(*sequence++); |
| 403 } |
| 404 |
| 405 return character - offsetsFromUTF8[length - 1]; |
| 406 } |
| 407 |
| 408 ConversionResult convertUTF8ToUTF16( |
| 409 const char** sourceStart, const char* sourceEnd, |
| 410 UChar** targetStart, UChar* targetEnd, bool* sourceAllASCII, bool strict) |
| 411 { |
| 412 ConversionResult result = conversionOK; |
| 413 const char* source = *sourceStart; |
| 414 UChar* target = *targetStart; |
| 415 UChar orAllData = 0; |
| 416 while (source < sourceEnd) { |
| 417 int utf8SequenceLength = inlineUTF8SequenceLength(*source); |
| 418 if (sourceEnd - source < utf8SequenceLength) { |
| 419 result = sourceExhausted; |
| 420 break; |
| 421 } |
| 422 // Do this check whether lenient or strict |
| 423 if (!isLegalUTF8(reinterpret_cast<const unsigned char*>(source), utf8Seq
uenceLength)) { |
| 424 result = sourceIllegal; |
| 425 break; |
| 426 } |
| 427 |
| 428 UChar32 character = readUTF8Sequence(source, utf8SequenceLength); |
| 429 |
| 430 if (target >= targetEnd) { |
| 431 source -= utf8SequenceLength; // Back up source pointer! |
| 432 result = targetExhausted; |
| 433 break; |
| 434 } |
| 435 |
| 436 if (U_IS_BMP(character)) { |
| 437 // UTF-16 surrogate values are illegal in UTF-32 |
| 438 if (U_IS_SURROGATE(character)) { |
| 439 if (strict) { |
| 440 source -= utf8SequenceLength; // return to the illegal value
itself |
| 441 result = sourceIllegal; |
| 442 break; |
| 443 } |
| 444 *target++ = replacementCharacter; |
| 445 orAllData |= replacementCharacter; |
| 446 } else { |
| 447 *target++ = static_cast<UChar>(character); // normal case |
| 448 orAllData |= character; |
| 449 } |
| 450 } else if (U_IS_SUPPLEMENTARY(character)) { |
| 451 // target is a character in range 0xFFFF - 0x10FFFF |
| 452 if (target + 1 >= targetEnd) { |
| 453 source -= utf8SequenceLength; // Back up source pointer! |
| 454 result = targetExhausted; |
| 455 break; |
| 456 } |
| 457 *target++ = U16_LEAD(character); |
| 458 *target++ = U16_TRAIL(character); |
| 459 orAllData = 0xffff; |
| 460 } else { |
| 461 if (strict) { |
| 462 source -= utf8SequenceLength; // return to the start |
| 463 result = sourceIllegal; |
| 464 break; // Bail out; shouldn't continue |
| 465 } else { |
| 466 *target++ = replacementCharacter; |
| 467 orAllData |= replacementCharacter; |
| 468 } |
| 469 } |
| 470 } |
| 471 *sourceStart = source; |
| 472 *targetStart = target; |
| 473 |
| 474 if (sourceAllASCII) |
| 475 *sourceAllASCII = !(orAllData & ~0x7f); |
| 476 |
| 477 return result; |
| 478 } |
| 479 |
| 277 // Helper to write a three-byte UTF-8 code point to the buffer, caller must chec
k room is available. | 480 // Helper to write a three-byte UTF-8 code point to the buffer, caller must chec
k room is available. |
| 278 static inline void putUTF8Triple(char*& buffer, UChar ch) | 481 static inline void putUTF8Triple(char*& buffer, UChar ch) |
| 279 { | 482 { |
| 280 DCHECK_GE(ch, 0x0800); | 483 DCHECK_GE(ch, 0x0800); |
| 281 *buffer++ = static_cast<char>(((ch >> 12) & 0x0F) | 0xE0); | 484 *buffer++ = static_cast<char>(((ch >> 12) & 0x0F) | 0xE0); |
| 282 *buffer++ = static_cast<char>(((ch >> 6) & 0x3F) | 0x80); | 485 *buffer++ = static_cast<char>(((ch >> 6) & 0x3F) | 0x80); |
| 283 *buffer++ = static_cast<char>((ch & 0x3F) | 0x80); | 486 *buffer++ = static_cast<char>((ch & 0x3F) | 0x80); |
| 284 } | 487 } |
| 285 | 488 |
| 489 String16 String16::fromUTF8(const char* stringStart, size_t length) |
| 490 { |
| 491 if (!stringStart || !length) |
| 492 return String16(); |
| 493 |
| 494 std::vector<UChar> buffer(length); |
| 495 UChar* bufferStart = buffer.data(); |
| 496 |
| 497 UChar* bufferCurrent = bufferStart; |
| 498 const char* stringCurrent = stringStart; |
| 499 if (convertUTF8ToUTF16(&stringCurrent, stringStart + length, &bufferCurrent,
bufferCurrent + buffer.size(), 0, true) != conversionOK) |
| 500 return String16(); |
| 501 |
| 502 unsigned utf16Length = bufferCurrent - bufferStart; |
| 503 return String16(bufferStart, utf16Length); |
| 504 } |
| 505 |
| 286 // trim from start | 506 // trim from start |
| 287 static inline wstring <rim(wstring &s) | 507 static inline wstring <rim(wstring &s) |
| 288 { | 508 { |
| 289 s.erase(s.begin(), std::find_if(s.begin(), s.end(), std::not1(std::ptr_fun<i
nt, int>(std::isspace)))); | 509 s.erase(s.begin(), std::find_if(s.begin(), s.end(), std::not1(std::ptr_fun<i
nt, int>(std::isspace)))); |
| 290 return s; | 510 return s; |
| 291 } | 511 } |
| 292 | 512 |
| 293 // trim from end | 513 // trim from end |
| 294 static inline wstring &rtrim(wstring &s) | 514 static inline wstring &rtrim(wstring &s) |
| 295 { | 515 { |
| 296 s.erase(std::find_if(s.rbegin(), s.rend(), std::not1(std::ptr_fun<int, int>(
std::isspace))).base(), s.end()); | 516 s.erase(std::find_if(s.rbegin(), s.rend(), std::not1(std::ptr_fun<int, int>(
std::isspace))).base(), s.end()); |
| 297 return s; | 517 return s; |
| 298 } | 518 } |
| 299 | 519 |
| 300 // trim from both ends | 520 // trim from both ends |
| 301 static inline wstring &trim(wstring &s) | 521 static inline wstring &trim(wstring &s) |
| 302 { | 522 { |
| 303 return ltrim(rtrim(s)); | 523 return ltrim(rtrim(s)); |
| 304 } | 524 } |
| 305 | 525 |
| 526 // static |
| 527 std::string String16::intToString(int i) |
| 528 { |
| 529 char buffer[50]; |
| 530 std::sprintf(buffer, "%d", i); |
| 531 return std::string(buffer); |
| 532 } |
| 533 |
| 534 // static |
| 535 std::string String16::doubleToString(double d) |
| 536 { |
| 537 char buffer[100]; |
| 538 std::sprintf(buffer, "%f", d); |
| 539 return std::string(buffer); |
| 540 } |
| 541 |
| 306 std::string String16::utf8() const | 542 std::string String16::utf8() const |
| 307 { | 543 { |
| 308 unsigned length = this->length(); | 544 unsigned length = this->length(); |
| 309 | 545 |
| 310 if (!length) | 546 if (!length) |
| 311 return std::string(""); | 547 return std::string(""); |
| 312 | 548 |
| 313 // Allocate a buffer big enough to hold all the characters | 549 // Allocate a buffer big enough to hold all the characters |
| 314 // (an individual UTF-16 UChar can only expand to 3 UTF-8 bytes). | 550 // (an individual UTF-16 UChar can only expand to 3 UTF-8 bytes). |
| 315 // Optimization ideas, if we find this function is hot: | 551 // Optimization ideas, if we find this function is hot: |
| (...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 357 | 593 |
| 358 String16 String16::stripWhiteSpace() const | 594 String16 String16::stripWhiteSpace() const |
| 359 { | 595 { |
| 360 wstring result(m_impl); | 596 wstring result(m_impl); |
| 361 trim(result); | 597 trim(result); |
| 362 return result; | 598 return result; |
| 363 } | 599 } |
| 364 | 600 |
| 365 } // namespace protocol | 601 } // namespace protocol |
| 366 } // namespace blink | 602 } // namespace blink |
| OLD | NEW |