third_party/WebKit/Source/platform/inspector_protocol/String16STL.cpp - Issue 2226863003: [DevTools] Reduce API surface of String16.

Side by Side Diff: third_party/WebKit/Source/platform/inspector_protocol/String16STL.cpp

Issue 2226863003: [DevTools] Reduce API surface of String16. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: wrong vector usage Created 4 years, 4 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

« third_party/WebKit/Source/platform/inspector_protocol/String16STL.h ('K') | « third_party/WebKit/Source/platform/inspector_protocol/String16STL.h ('k') | third_party/WebKit/Source/platform/inspector_protocol/String16WTF.h » ('j') | third_party/WebKit/Source/platform/inspector_protocol/String16WTF.h » ('J')
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
1 // Copyright 2016 The Chromium Authors. All rights reserved.	1 // Copyright 2016 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "platform/inspector_protocol/String16STL.h"	5 #include "platform/inspector_protocol/String16.h"

6

7 #include "platform/inspector_protocol/Platform.h"

8	6

9 #include <algorithm>	7 #include <algorithm>

10 #include <cctype>	8 #include <cctype>

11 #include <cstdio>	9 #include <cstdio>

12 #include <functional>

13 #include <locale>	10 #include <locale>

14	11

15 namespace blink {	12 namespace blink {

16 namespace protocol {	13 namespace protocol {

17	14

18 const UChar replacementCharacter = 0xFFFD;	15 const UChar replacementCharacter = 0xFFFD;

19	16 using UChar32 = uint32_t;

20 template<typename CharType> inline bool isASCII(CharType c)

21 {

22 return !(c & ~0x7F);

23 }

24

25 template<typename CharType> inline bool isASCIIAlpha(CharType c)

26 {

27 return (c \| 0x20) >= 'a' && (c \| 0x20) <= 'z';

28 }

29

30 template<typename CharType> inline bool isASCIIDigit(CharType c)

31 {

32 return c >= '0' && c <= '9';

33 }

34

35 template<typename CharType> inline bool isASCIIAlphanumeric(CharType c)

36 {

37 return isASCIIDigit(c) \|\| isASCIIAlpha(c);

38 }

39

40 template<typename CharType> inline bool isASCIIHexDigit(CharType c)

41 {

42 return isASCIIDigit(c) \|\| ((c \| 0x20) >= 'a' && (c \| 0x20) <= 'f');

43 }

44

45 template<typename CharType> inline bool isASCIIOctalDigit(CharType c)

46 {

47 return (c >= '0') & (c <= '7');

48 }

49

50 template<typename CharType> inline bool isASCIIPrintable(CharType c)

51 {

52 return c >= ' ' && c <= '~';

53 }

54

55 /*

56 Statistics from a run of Apple's page load test for callers of isASCIISpace:

57

58 character count

59 --------- -----

60 non-spaces 689383

61 20 space 294720

62 0A \n 89059

63 09 \t 28320

64 0D \r 0

65 0C \f 0

66 0B \v 0

67 */

68 template<typename CharType> inline bool isASCIISpace(CharType c)

69 {

70 return c <= ' ' && (c == ' ' \|\| (c <= 0xD && c >= 0x9));

71 }

72

73 extern const LChar ASCIICaseFoldTable[256] = {

74 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c , 0x0d, 0x0e, 0x0f,

75 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c , 0x1d, 0x1e, 0x1f,

76 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c , 0x2d, 0x2e, 0x2f,

77 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c , 0x3d, 0x3e, 0x3f,

78 0x40, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c , 0x6d, 0x6e, 0x6f,

79 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x5b, 0x5c , 0x5d, 0x5e, 0x5f,

80 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c , 0x6d, 0x6e, 0x6f,

81 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x7b, 0x7c , 0x7d, 0x7e, 0x7f,

82 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c , 0x8d, 0x8e, 0x8f,

83 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9a, 0x9b, 0x9c , 0x9d, 0x9e, 0x9f,

84 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac , 0xad, 0xae, 0xaf,

85 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xbb, 0xbc , 0xbd, 0xbe, 0xbf,

86 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc , 0xcd, 0xce, 0xcf,

87 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 0xdc , 0xdd, 0xde, 0xdf,

88 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec , 0xed, 0xee, 0xef,

89 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc , 0xfd, 0xfe, 0xff

90 };

91

92 template<typename CharType> inline int toASCIIHexValue(CharType c)

93 {

94 DCHECK(isASCIIHexDigit(c));

95 return c < 'A' ? c - '0' : (c - 'A' + 10) & 0xF;

96 }

97

98 template<typename CharType> inline int toASCIIHexValue(CharType upperValue, Char Type lowerValue)

99 {

100 DCHECK(isASCIIHexDigit(upperValue) && isASCIIHexDigit(lowerValue));

101 return ((toASCIIHexValue(upperValue) << 4) & 0xF0) \| toASCIIHexValue(lowerVa lue);

102 }

103

104 inline char lowerNibbleToASCIIHexDigit(char c)

105 {

106 char nibble = c & 0xF;

107 return nibble < 10 ? '0' + nibble : 'A' + nibble - 10;

108 }

109

110 inline char upperNibbleToASCIIHexDigit(char c)

111 {

112 char nibble = (c >> 4) & 0xF;

113 return nibble < 10 ? '0' + nibble : 'A' + nibble - 10;

114 }

115	17

116 inline int inlineUTF8SequenceLengthNonASCII(char b0)	18 inline int inlineUTF8SequenceLengthNonASCII(char b0)

117 {	19 {

118 if ((b0 & 0xC0) != 0xC0)	20 if ((b0 & 0xC0) != 0xC0)

119 return 0;	21 return 0;

120 if ((b0 & 0xE0) == 0xC0)	22 if ((b0 & 0xE0) == 0xC0)

121 return 2;	23 return 2;

122 if ((b0 & 0xF0) == 0xE0)	24 if ((b0 & 0xF0) == 0xE0)

123 return 3;	25 return 3;

124 if ((b0 & 0xF8) == 0xF0)	26 if ((b0 & 0xF8) == 0xF0)

125 return 4;	27 return 4;

126 return 0;	28 return 0;

127 }	29 }

128	30

129 inline int inlineUTF8SequenceLength(char b0)	31 inline int inlineUTF8SequenceLength(char b0)

130 {	32 {

131 return isASCII(b0) ? 1 : inlineUTF8SequenceLengthNonASCII(b0);	33 return String16::isASCII(b0) ? 1 : inlineUTF8SequenceLengthNonASCII(b0);

132 }	34 }

133	35

134 // Once the bits are split out into bytes of UTF-8, this is a mask OR-ed	36 // Once the bits are split out into bytes of UTF-8, this is a mask OR-ed

135 // into the first byte, depending on how many bytes follow. There are	37 // into the first byte, depending on how many bytes follow. There are

136 // as many entries in this table as there are UTF-8 sequence types.	38 // as many entries in this table as there are UTF-8 sequence types.

137 // (I.e., one byte sequence, two byte... etc.). Remember that sequences	39 // (I.e., one byte sequence, two byte... etc.). Remember that sequences

138 // for legal UTF-8 will be 4 or fewer bytes total.	40 // for legal UTF-8 will be 4 or fewer bytes total.

139 static const unsigned char firstByteMark[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0x F8, 0xFC };	41 static const unsigned char firstByteMark[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0x F8, 0xFC };

140	42

141 typedef enum {	43 typedef enum {

142 conversionOK, // conversion successful	44 conversionOK, // conversion successful

143 sourceExhausted, // partial character in source, but hit end	45 sourceExhausted, // partial character in source, but hit end

144 targetExhausted, // insuff. room in target for conversion	46 targetExhausted, // insuff. room in target for conversion

145 sourceIllegal // source sequence is illegal/malformed	47 sourceIllegal // source sequence is illegal/malformed

146 } ConversionResult;	48 } ConversionResult;

147	49

148 ConversionResult convertLatin1ToUTF8(

149 const LChar** sourceStart, const LChar* sourceEnd,

150 char** targetStart, char* targetEnd)

151 {

152 ConversionResult result = conversionOK;

153 const LChar* source = *sourceStart;

154 char* target = *targetStart;

155 while (source < sourceEnd) {

156 UChar32 ch;

157 unsigned short bytesToWrite = 0;

158 const UChar32 byteMask = 0xBF;

159 const UChar32 byteMark = 0x80;

160 const LChar* oldSource = source; // In case we have to back up because o f target overflow.

161 ch = static_cast<unsigned short>(*source++);

162

163 // Figure out how many bytes the result will require

164 if (ch < (UChar32)0x80)

165 bytesToWrite = 1;

166 else

167 bytesToWrite = 2;

168

169 target += bytesToWrite;

170 if (target > targetEnd) {

171 source = oldSource; // Back up source pointer!

172 target -= bytesToWrite;

173 result = targetExhausted;

174 break;

175 }

176 switch (bytesToWrite) { // note: everything falls through.

177 case 2:

178 *--target = (char)((ch \| byteMark) & byteMask);

179 ch >>= 6;

180 case 1:

181 *--target = (char)(ch \| firstByteMark[bytesToWrite]);

182 }

183 target += bytesToWrite;

184 }

185 *sourceStart = source;

186 *targetStart = target;

187 return result;

188 }

189

190 ConversionResult convertUTF16ToUTF8(	50 ConversionResult convertUTF16ToUTF8(

191 const UChar** sourceStart, const UChar* sourceEnd,	51 const UChar** sourceStart, const UChar* sourceEnd,

192 char** targetStart, char* targetEnd, bool strict)	52 char** targetStart, char* targetEnd, bool strict)

193 {	53 {

194 ConversionResult result = conversionOK;	54 ConversionResult result = conversionOK;

195 const UChar* source = *sourceStart;	55 const UChar* source = *sourceStart;

196 char* target = *targetStart;	56 char* target = *targetStart;

197 while (source < sourceEnd) {	57 while (source < sourceEnd) {

198 UChar32 ch;	58 UChar32 ch;

199 unsigned short bytesToWrite = 0;	59 unsigned short bytesToWrite = 0;

(...skipping 288 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
488	348

489 UChar* bufferCurrent = bufferStart;	349 UChar* bufferCurrent = bufferStart;

490 const char* stringCurrent = stringStart;	350 const char* stringCurrent = stringStart;

491 if (convertUTF8ToUTF16(&stringCurrent, stringStart + length, &bufferCurrent, bufferCurrent + buffer.size(), 0, true) != conversionOK)	351 if (convertUTF8ToUTF16(&stringCurrent, stringStart + length, &bufferCurrent, bufferCurrent + buffer.size(), 0, true) != conversionOK)

492 return String16();	352 return String16();

493	353

494 unsigned utf16Length = bufferCurrent - bufferStart;	354 unsigned utf16Length = bufferCurrent - bufferStart;

495 return String16(bufferStart, utf16Length);	355 return String16(bufferStart, utf16Length);

496 }	356 }

497	357

498 // trim from start

499 static inline wstring &ltrim(wstring &s)

500 {

501 s.erase(s.begin(), std::find_if(s.begin(), s.end(), std::not1(std::ptr_fun<i nt, int>(std::isspace))));

502 return s;

503 }

504

505 // trim from end

506 static inline wstring &rtrim(wstring &s)

507 {

508 s.erase(std::find_if(s.rbegin(), s.rend(), std::not1(std::ptr_fun<int, int>( std::isspace))).base(), s.end());

509 return s;

510 }

511

512 // trim from both ends

513 static inline wstring &trim(wstring &s)

514 {

515 return ltrim(rtrim(s));

516 }

517

518 // static

519 std::string String16::intToString(int i)

520 {

521 char buffer[50];

522 std::sprintf(buffer, "%d", i);

523 return std::string(buffer);

524 }

525

526 // static

527 std::string String16::doubleToString(double d)

528 {

529 char buffer[100];

530 std::sprintf(buffer, "%f", d);

531 return std::string(buffer);

532 }

533

534 std::string String16::utf8() const	358 std::string String16::utf8() const

535 {	359 {

536 unsigned length = this->length();	360 unsigned length = this->length();

537	361

538 if (!length)	362 if (!length)

539 return std::string("");	363 return std::string("");

540	364

541 // Allocate a buffer big enough to hold all the characters	365 // Allocate a buffer big enough to hold all the characters

542 // (an individual UTF-16 UChar can only expand to 3 UTF-8 bytes).	366 // (an individual UTF-16 UChar can only expand to 3 UTF-8 bytes).

543 // Optimization ideas, if we find this function is hot:	367 // Optimization ideas, if we find this function is hot:

(...skipping 26 matching lines...) Expand all Loading...
570 DCHECK((characters >= 0xD800) && (characters <= 0xDBFF));	394 DCHECK((characters >= 0xD800) && (characters <= 0xDBFF));

571 // There should be room left, since one UChar hasn't been	395 // There should be room left, since one UChar hasn't been

572 // converted.	396 // converted.

573 DCHECK((buffer + 3) <= (buffer + bufferVector.size()));	397 DCHECK((buffer + 3) <= (buffer + bufferVector.size()));

574 putUTF8Triple(buffer, *characters);	398 putUTF8Triple(buffer, *characters);

575 }	399 }

576	400

577 return std::string(bufferVector.data(), buffer - bufferVector.data());	401 return std::string(bufferVector.data(), buffer - bufferVector.data());

578 }	402 }

579	403

580 String16 String16::stripWhiteSpace() const

581 {

582 wstring result(m_impl);

583 trim(result);

584 return result;

585 }

586

587 } // namespace protocol	404 } // namespace protocol

588 } // namespace blink	405 } // namespace blink

OLD	NEW