| OLD | NEW |
| 1 // Copyright 2016 The Chromium Authors. All rights reserved. | 1 // Copyright 2016 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "platform/inspector_protocol/String16STL.h" | 5 #include "platform/inspector_protocol/String16.h" |
| 6 | |
| 7 #include "platform/inspector_protocol/Platform.h" | |
| 8 | 6 |
| 9 #include <algorithm> | 7 #include <algorithm> |
| 10 #include <cctype> | 8 #include <cctype> |
| 11 #include <cstdio> | 9 #include <cstdio> |
| 12 #include <functional> | |
| 13 #include <locale> | 10 #include <locale> |
| 14 | 11 |
| 15 namespace blink { | 12 namespace blink { |
| 16 namespace protocol { | 13 namespace protocol { |
| 17 | 14 |
| 18 const UChar replacementCharacter = 0xFFFD; | 15 const UChar replacementCharacter = 0xFFFD; |
| 19 | 16 using UChar32 = uint32_t; |
| 20 template<typename CharType> inline bool isASCII(CharType c) | |
| 21 { | |
| 22 return !(c & ~0x7F); | |
| 23 } | |
| 24 | |
| 25 template<typename CharType> inline bool isASCIIAlpha(CharType c) | |
| 26 { | |
| 27 return (c | 0x20) >= 'a' && (c | 0x20) <= 'z'; | |
| 28 } | |
| 29 | |
| 30 template<typename CharType> inline bool isASCIIDigit(CharType c) | |
| 31 { | |
| 32 return c >= '0' && c <= '9'; | |
| 33 } | |
| 34 | |
| 35 template<typename CharType> inline bool isASCIIAlphanumeric(CharType c) | |
| 36 { | |
| 37 return isASCIIDigit(c) || isASCIIAlpha(c); | |
| 38 } | |
| 39 | |
| 40 template<typename CharType> inline bool isASCIIHexDigit(CharType c) | |
| 41 { | |
| 42 return isASCIIDigit(c) || ((c | 0x20) >= 'a' && (c | 0x20) <= 'f'); | |
| 43 } | |
| 44 | |
| 45 template<typename CharType> inline bool isASCIIOctalDigit(CharType c) | |
| 46 { | |
| 47 return (c >= '0') & (c <= '7'); | |
| 48 } | |
| 49 | |
| 50 template<typename CharType> inline bool isASCIIPrintable(CharType c) | |
| 51 { | |
| 52 return c >= ' ' && c <= '~'; | |
| 53 } | |
| 54 | |
| 55 /* | |
| 56 Statistics from a run of Apple's page load test for callers of isASCIISpace: | |
| 57 | |
| 58 character count | |
| 59 --------- ----- | |
| 60 non-spaces 689383 | |
| 61 20 space 294720 | |
| 62 0A \n 89059 | |
| 63 09 \t 28320 | |
| 64 0D \r 0 | |
| 65 0C \f 0 | |
| 66 0B \v 0 | |
| 67 */ | |
| 68 template<typename CharType> inline bool isASCIISpace(CharType c) | |
| 69 { | |
| 70 return c <= ' ' && (c == ' ' || (c <= 0xD && c >= 0x9)); | |
| 71 } | |
| 72 | |
| 73 extern const LChar ASCIICaseFoldTable[256] = { | |
| 74 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c
, 0x0d, 0x0e, 0x0f, | |
| 75 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c
, 0x1d, 0x1e, 0x1f, | |
| 76 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c
, 0x2d, 0x2e, 0x2f, | |
| 77 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c
, 0x3d, 0x3e, 0x3f, | |
| 78 0x40, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c
, 0x6d, 0x6e, 0x6f, | |
| 79 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x5b, 0x5c
, 0x5d, 0x5e, 0x5f, | |
| 80 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c
, 0x6d, 0x6e, 0x6f, | |
| 81 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x7b, 0x7c
, 0x7d, 0x7e, 0x7f, | |
| 82 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c
, 0x8d, 0x8e, 0x8f, | |
| 83 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9a, 0x9b, 0x9c
, 0x9d, 0x9e, 0x9f, | |
| 84 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac
, 0xad, 0xae, 0xaf, | |
| 85 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xbb, 0xbc
, 0xbd, 0xbe, 0xbf, | |
| 86 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc
, 0xcd, 0xce, 0xcf, | |
| 87 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 0xdc
, 0xdd, 0xde, 0xdf, | |
| 88 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec
, 0xed, 0xee, 0xef, | |
| 89 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc
, 0xfd, 0xfe, 0xff | |
| 90 }; | |
| 91 | |
| 92 template<typename CharType> inline int toASCIIHexValue(CharType c) | |
| 93 { | |
| 94 DCHECK(isASCIIHexDigit(c)); | |
| 95 return c < 'A' ? c - '0' : (c - 'A' + 10) & 0xF; | |
| 96 } | |
| 97 | |
| 98 template<typename CharType> inline int toASCIIHexValue(CharType upperValue, Char
Type lowerValue) | |
| 99 { | |
| 100 DCHECK(isASCIIHexDigit(upperValue) && isASCIIHexDigit(lowerValue)); | |
| 101 return ((toASCIIHexValue(upperValue) << 4) & 0xF0) | toASCIIHexValue(lowerVa
lue); | |
| 102 } | |
| 103 | |
| 104 inline char lowerNibbleToASCIIHexDigit(char c) | |
| 105 { | |
| 106 char nibble = c & 0xF; | |
| 107 return nibble < 10 ? '0' + nibble : 'A' + nibble - 10; | |
| 108 } | |
| 109 | |
| 110 inline char upperNibbleToASCIIHexDigit(char c) | |
| 111 { | |
| 112 char nibble = (c >> 4) & 0xF; | |
| 113 return nibble < 10 ? '0' + nibble : 'A' + nibble - 10; | |
| 114 } | |
| 115 | 17 |
| 116 inline int inlineUTF8SequenceLengthNonASCII(char b0) | 18 inline int inlineUTF8SequenceLengthNonASCII(char b0) |
| 117 { | 19 { |
| 118 if ((b0 & 0xC0) != 0xC0) | 20 if ((b0 & 0xC0) != 0xC0) |
| 119 return 0; | 21 return 0; |
| 120 if ((b0 & 0xE0) == 0xC0) | 22 if ((b0 & 0xE0) == 0xC0) |
| 121 return 2; | 23 return 2; |
| 122 if ((b0 & 0xF0) == 0xE0) | 24 if ((b0 & 0xF0) == 0xE0) |
| 123 return 3; | 25 return 3; |
| 124 if ((b0 & 0xF8) == 0xF0) | 26 if ((b0 & 0xF8) == 0xF0) |
| 125 return 4; | 27 return 4; |
| 126 return 0; | 28 return 0; |
| 127 } | 29 } |
| 128 | 30 |
| 129 inline int inlineUTF8SequenceLength(char b0) | 31 inline int inlineUTF8SequenceLength(char b0) |
| 130 { | 32 { |
| 131 return isASCII(b0) ? 1 : inlineUTF8SequenceLengthNonASCII(b0); | 33 return String16::isASCII(b0) ? 1 : inlineUTF8SequenceLengthNonASCII(b0); |
| 132 } | 34 } |
| 133 | 35 |
| 134 // Once the bits are split out into bytes of UTF-8, this is a mask OR-ed | 36 // Once the bits are split out into bytes of UTF-8, this is a mask OR-ed |
| 135 // into the first byte, depending on how many bytes follow. There are | 37 // into the first byte, depending on how many bytes follow. There are |
| 136 // as many entries in this table as there are UTF-8 sequence types. | 38 // as many entries in this table as there are UTF-8 sequence types. |
| 137 // (I.e., one byte sequence, two byte... etc.). Remember that sequences | 39 // (I.e., one byte sequence, two byte... etc.). Remember that sequences |
| 138 // for *legal* UTF-8 will be 4 or fewer bytes total. | 40 // for *legal* UTF-8 will be 4 or fewer bytes total. |
| 139 static const unsigned char firstByteMark[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0x
F8, 0xFC }; | 41 static const unsigned char firstByteMark[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0x
F8, 0xFC }; |
| 140 | 42 |
| 141 typedef enum { | 43 typedef enum { |
| 142 conversionOK, // conversion successful | 44 conversionOK, // conversion successful |
| 143 sourceExhausted, // partial character in source, but hit end | 45 sourceExhausted, // partial character in source, but hit end |
| 144 targetExhausted, // insuff. room in target for conversion | 46 targetExhausted, // insuff. room in target for conversion |
| 145 sourceIllegal // source sequence is illegal/malformed | 47 sourceIllegal // source sequence is illegal/malformed |
| 146 } ConversionResult; | 48 } ConversionResult; |
| 147 | 49 |
| 148 ConversionResult convertLatin1ToUTF8( | |
| 149 const LChar** sourceStart, const LChar* sourceEnd, | |
| 150 char** targetStart, char* targetEnd) | |
| 151 { | |
| 152 ConversionResult result = conversionOK; | |
| 153 const LChar* source = *sourceStart; | |
| 154 char* target = *targetStart; | |
| 155 while (source < sourceEnd) { | |
| 156 UChar32 ch; | |
| 157 unsigned short bytesToWrite = 0; | |
| 158 const UChar32 byteMask = 0xBF; | |
| 159 const UChar32 byteMark = 0x80; | |
| 160 const LChar* oldSource = source; // In case we have to back up because o
f target overflow. | |
| 161 ch = static_cast<unsigned short>(*source++); | |
| 162 | |
| 163 // Figure out how many bytes the result will require | |
| 164 if (ch < (UChar32)0x80) | |
| 165 bytesToWrite = 1; | |
| 166 else | |
| 167 bytesToWrite = 2; | |
| 168 | |
| 169 target += bytesToWrite; | |
| 170 if (target > targetEnd) { | |
| 171 source = oldSource; // Back up source pointer! | |
| 172 target -= bytesToWrite; | |
| 173 result = targetExhausted; | |
| 174 break; | |
| 175 } | |
| 176 switch (bytesToWrite) { // note: everything falls through. | |
| 177 case 2: | |
| 178 *--target = (char)((ch | byteMark) & byteMask); | |
| 179 ch >>= 6; | |
| 180 case 1: | |
| 181 *--target = (char)(ch | firstByteMark[bytesToWrite]); | |
| 182 } | |
| 183 target += bytesToWrite; | |
| 184 } | |
| 185 *sourceStart = source; | |
| 186 *targetStart = target; | |
| 187 return result; | |
| 188 } | |
| 189 | |
| 190 ConversionResult convertUTF16ToUTF8( | 50 ConversionResult convertUTF16ToUTF8( |
| 191 const UChar** sourceStart, const UChar* sourceEnd, | 51 const UChar** sourceStart, const UChar* sourceEnd, |
| 192 char** targetStart, char* targetEnd, bool strict) | 52 char** targetStart, char* targetEnd, bool strict) |
| 193 { | 53 { |
| 194 ConversionResult result = conversionOK; | 54 ConversionResult result = conversionOK; |
| 195 const UChar* source = *sourceStart; | 55 const UChar* source = *sourceStart; |
| 196 char* target = *targetStart; | 56 char* target = *targetStart; |
| 197 while (source < sourceEnd) { | 57 while (source < sourceEnd) { |
| 198 UChar32 ch; | 58 UChar32 ch; |
| 199 unsigned short bytesToWrite = 0; | 59 unsigned short bytesToWrite = 0; |
| (...skipping 288 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 488 | 348 |
| 489 UChar* bufferCurrent = bufferStart; | 349 UChar* bufferCurrent = bufferStart; |
| 490 const char* stringCurrent = stringStart; | 350 const char* stringCurrent = stringStart; |
| 491 if (convertUTF8ToUTF16(&stringCurrent, stringStart + length, &bufferCurrent,
bufferCurrent + buffer.size(), 0, true) != conversionOK) | 351 if (convertUTF8ToUTF16(&stringCurrent, stringStart + length, &bufferCurrent,
bufferCurrent + buffer.size(), 0, true) != conversionOK) |
| 492 return String16(); | 352 return String16(); |
| 493 | 353 |
| 494 unsigned utf16Length = bufferCurrent - bufferStart; | 354 unsigned utf16Length = bufferCurrent - bufferStart; |
| 495 return String16(bufferStart, utf16Length); | 355 return String16(bufferStart, utf16Length); |
| 496 } | 356 } |
| 497 | 357 |
| 498 // trim from start | |
| 499 static inline wstring <rim(wstring &s) | |
| 500 { | |
| 501 s.erase(s.begin(), std::find_if(s.begin(), s.end(), std::not1(std::ptr_fun<i
nt, int>(std::isspace)))); | |
| 502 return s; | |
| 503 } | |
| 504 | |
| 505 // trim from end | |
| 506 static inline wstring &rtrim(wstring &s) | |
| 507 { | |
| 508 s.erase(std::find_if(s.rbegin(), s.rend(), std::not1(std::ptr_fun<int, int>(
std::isspace))).base(), s.end()); | |
| 509 return s; | |
| 510 } | |
| 511 | |
| 512 // trim from both ends | |
| 513 static inline wstring &trim(wstring &s) | |
| 514 { | |
| 515 return ltrim(rtrim(s)); | |
| 516 } | |
| 517 | |
| 518 // static | |
| 519 std::string String16::intToString(int i) | |
| 520 { | |
| 521 char buffer[50]; | |
| 522 std::sprintf(buffer, "%d", i); | |
| 523 return std::string(buffer); | |
| 524 } | |
| 525 | |
| 526 // static | |
| 527 std::string String16::doubleToString(double d) | |
| 528 { | |
| 529 char buffer[100]; | |
| 530 std::sprintf(buffer, "%f", d); | |
| 531 return std::string(buffer); | |
| 532 } | |
| 533 | |
| 534 std::string String16::utf8() const | 358 std::string String16::utf8() const |
| 535 { | 359 { |
| 536 unsigned length = this->length(); | 360 unsigned length = this->length(); |
| 537 | 361 |
| 538 if (!length) | 362 if (!length) |
| 539 return std::string(""); | 363 return std::string(""); |
| 540 | 364 |
| 541 // Allocate a buffer big enough to hold all the characters | 365 // Allocate a buffer big enough to hold all the characters |
| 542 // (an individual UTF-16 UChar can only expand to 3 UTF-8 bytes). | 366 // (an individual UTF-16 UChar can only expand to 3 UTF-8 bytes). |
| 543 // Optimization ideas, if we find this function is hot: | 367 // Optimization ideas, if we find this function is hot: |
| (...skipping 26 matching lines...) Expand all Loading... |
| 570 DCHECK((*characters >= 0xD800) && (*characters <= 0xDBFF)); | 394 DCHECK((*characters >= 0xD800) && (*characters <= 0xDBFF)); |
| 571 // There should be room left, since one UChar hasn't been | 395 // There should be room left, since one UChar hasn't been |
| 572 // converted. | 396 // converted. |
| 573 DCHECK((buffer + 3) <= (buffer + bufferVector.size())); | 397 DCHECK((buffer + 3) <= (buffer + bufferVector.size())); |
| 574 putUTF8Triple(buffer, *characters); | 398 putUTF8Triple(buffer, *characters); |
| 575 } | 399 } |
| 576 | 400 |
| 577 return std::string(bufferVector.data(), buffer - bufferVector.data()); | 401 return std::string(bufferVector.data(), buffer - bufferVector.data()); |
| 578 } | 402 } |
| 579 | 403 |
| 580 String16 String16::stripWhiteSpace() const | |
| 581 { | |
| 582 wstring result(m_impl); | |
| 583 trim(result); | |
| 584 return result; | |
| 585 } | |
| 586 | |
| 587 } // namespace protocol | 404 } // namespace protocol |
| 588 } // namespace blink | 405 } // namespace blink |
| OLD | NEW |