OLD | NEW |
1 // Copyright 2016 The Chromium Authors. All rights reserved. | 1 // Copyright 2016 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "platform/inspector_protocol/String16STL.h" | 5 #include "platform/inspector_protocol/String16.h" |
6 | |
7 #include "platform/inspector_protocol/Platform.h" | |
8 | 6 |
9 #include <algorithm> | 7 #include <algorithm> |
10 #include <cctype> | 8 #include <cctype> |
11 #include <cstdio> | 9 #include <cstdio> |
12 #include <functional> | |
13 #include <locale> | 10 #include <locale> |
14 | 11 |
15 namespace blink { | 12 namespace blink { |
16 namespace protocol { | 13 namespace protocol { |
17 | 14 |
18 const UChar replacementCharacter = 0xFFFD; | 15 const UChar replacementCharacter = 0xFFFD; |
19 | 16 using UChar32 = uint32_t; |
20 template<typename CharType> inline bool isASCII(CharType c) | |
21 { | |
22 return !(c & ~0x7F); | |
23 } | |
24 | |
25 template<typename CharType> inline bool isASCIIAlpha(CharType c) | |
26 { | |
27 return (c | 0x20) >= 'a' && (c | 0x20) <= 'z'; | |
28 } | |
29 | |
30 template<typename CharType> inline bool isASCIIDigit(CharType c) | |
31 { | |
32 return c >= '0' && c <= '9'; | |
33 } | |
34 | |
35 template<typename CharType> inline bool isASCIIAlphanumeric(CharType c) | |
36 { | |
37 return isASCIIDigit(c) || isASCIIAlpha(c); | |
38 } | |
39 | |
40 template<typename CharType> inline bool isASCIIHexDigit(CharType c) | |
41 { | |
42 return isASCIIDigit(c) || ((c | 0x20) >= 'a' && (c | 0x20) <= 'f'); | |
43 } | |
44 | |
45 template<typename CharType> inline bool isASCIIOctalDigit(CharType c) | |
46 { | |
47 return (c >= '0') & (c <= '7'); | |
48 } | |
49 | |
50 template<typename CharType> inline bool isASCIIPrintable(CharType c) | |
51 { | |
52 return c >= ' ' && c <= '~'; | |
53 } | |
54 | |
55 /* | |
56 Statistics from a run of Apple's page load test for callers of isASCIISpace: | |
57 | |
58 character count | |
59 --------- ----- | |
60 non-spaces 689383 | |
61 20 space 294720 | |
62 0A \n 89059 | |
63 09 \t 28320 | |
64 0D \r 0 | |
65 0C \f 0 | |
66 0B \v 0 | |
67 */ | |
68 template<typename CharType> inline bool isASCIISpace(CharType c) | |
69 { | |
70 return c <= ' ' && (c == ' ' || (c <= 0xD && c >= 0x9)); | |
71 } | |
72 | |
73 extern const LChar ASCIICaseFoldTable[256] = { | |
74 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c
, 0x0d, 0x0e, 0x0f, | |
75 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c
, 0x1d, 0x1e, 0x1f, | |
76 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c
, 0x2d, 0x2e, 0x2f, | |
77 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c
, 0x3d, 0x3e, 0x3f, | |
78 0x40, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c
, 0x6d, 0x6e, 0x6f, | |
79 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x5b, 0x5c
, 0x5d, 0x5e, 0x5f, | |
80 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c
, 0x6d, 0x6e, 0x6f, | |
81 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x7b, 0x7c
, 0x7d, 0x7e, 0x7f, | |
82 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c
, 0x8d, 0x8e, 0x8f, | |
83 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9a, 0x9b, 0x9c
, 0x9d, 0x9e, 0x9f, | |
84 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac
, 0xad, 0xae, 0xaf, | |
85 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xbb, 0xbc
, 0xbd, 0xbe, 0xbf, | |
86 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc
, 0xcd, 0xce, 0xcf, | |
87 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 0xdc
, 0xdd, 0xde, 0xdf, | |
88 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec
, 0xed, 0xee, 0xef, | |
89 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc
, 0xfd, 0xfe, 0xff | |
90 }; | |
91 | |
92 template<typename CharType> inline int toASCIIHexValue(CharType c) | |
93 { | |
94 DCHECK(isASCIIHexDigit(c)); | |
95 return c < 'A' ? c - '0' : (c - 'A' + 10) & 0xF; | |
96 } | |
97 | |
98 template<typename CharType> inline int toASCIIHexValue(CharType upperValue, Char
Type lowerValue) | |
99 { | |
100 DCHECK(isASCIIHexDigit(upperValue) && isASCIIHexDigit(lowerValue)); | |
101 return ((toASCIIHexValue(upperValue) << 4) & 0xF0) | toASCIIHexValue(lowerVa
lue); | |
102 } | |
103 | |
104 inline char lowerNibbleToASCIIHexDigit(char c) | |
105 { | |
106 char nibble = c & 0xF; | |
107 return nibble < 10 ? '0' + nibble : 'A' + nibble - 10; | |
108 } | |
109 | |
110 inline char upperNibbleToASCIIHexDigit(char c) | |
111 { | |
112 char nibble = (c >> 4) & 0xF; | |
113 return nibble < 10 ? '0' + nibble : 'A' + nibble - 10; | |
114 } | |
115 | 17 |
116 inline int inlineUTF8SequenceLengthNonASCII(char b0) | 18 inline int inlineUTF8SequenceLengthNonASCII(char b0) |
117 { | 19 { |
118 if ((b0 & 0xC0) != 0xC0) | 20 if ((b0 & 0xC0) != 0xC0) |
119 return 0; | 21 return 0; |
120 if ((b0 & 0xE0) == 0xC0) | 22 if ((b0 & 0xE0) == 0xC0) |
121 return 2; | 23 return 2; |
122 if ((b0 & 0xF0) == 0xE0) | 24 if ((b0 & 0xF0) == 0xE0) |
123 return 3; | 25 return 3; |
124 if ((b0 & 0xF8) == 0xF0) | 26 if ((b0 & 0xF8) == 0xF0) |
125 return 4; | 27 return 4; |
126 return 0; | 28 return 0; |
127 } | 29 } |
128 | 30 |
129 inline int inlineUTF8SequenceLength(char b0) | 31 inline int inlineUTF8SequenceLength(char b0) |
130 { | 32 { |
131 return isASCII(b0) ? 1 : inlineUTF8SequenceLengthNonASCII(b0); | 33 return String16::isASCII(b0) ? 1 : inlineUTF8SequenceLengthNonASCII(b0); |
132 } | 34 } |
133 | 35 |
134 // Once the bits are split out into bytes of UTF-8, this is a mask OR-ed | 36 // Once the bits are split out into bytes of UTF-8, this is a mask OR-ed |
135 // into the first byte, depending on how many bytes follow. There are | 37 // into the first byte, depending on how many bytes follow. There are |
136 // as many entries in this table as there are UTF-8 sequence types. | 38 // as many entries in this table as there are UTF-8 sequence types. |
137 // (I.e., one byte sequence, two byte... etc.). Remember that sequences | 39 // (I.e., one byte sequence, two byte... etc.). Remember that sequences |
138 // for *legal* UTF-8 will be 4 or fewer bytes total. | 40 // for *legal* UTF-8 will be 4 or fewer bytes total. |
139 static const unsigned char firstByteMark[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0x
F8, 0xFC }; | 41 static const unsigned char firstByteMark[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0x
F8, 0xFC }; |
140 | 42 |
141 typedef enum { | 43 typedef enum { |
142 conversionOK, // conversion successful | 44 conversionOK, // conversion successful |
143 sourceExhausted, // partial character in source, but hit end | 45 sourceExhausted, // partial character in source, but hit end |
144 targetExhausted, // insuff. room in target for conversion | 46 targetExhausted, // insuff. room in target for conversion |
145 sourceIllegal // source sequence is illegal/malformed | 47 sourceIllegal // source sequence is illegal/malformed |
146 } ConversionResult; | 48 } ConversionResult; |
147 | 49 |
148 ConversionResult convertLatin1ToUTF8( | |
149 const LChar** sourceStart, const LChar* sourceEnd, | |
150 char** targetStart, char* targetEnd) | |
151 { | |
152 ConversionResult result = conversionOK; | |
153 const LChar* source = *sourceStart; | |
154 char* target = *targetStart; | |
155 while (source < sourceEnd) { | |
156 UChar32 ch; | |
157 unsigned short bytesToWrite = 0; | |
158 const UChar32 byteMask = 0xBF; | |
159 const UChar32 byteMark = 0x80; | |
160 const LChar* oldSource = source; // In case we have to back up because o
f target overflow. | |
161 ch = static_cast<unsigned short>(*source++); | |
162 | |
163 // Figure out how many bytes the result will require | |
164 if (ch < (UChar32)0x80) | |
165 bytesToWrite = 1; | |
166 else | |
167 bytesToWrite = 2; | |
168 | |
169 target += bytesToWrite; | |
170 if (target > targetEnd) { | |
171 source = oldSource; // Back up source pointer! | |
172 target -= bytesToWrite; | |
173 result = targetExhausted; | |
174 break; | |
175 } | |
176 switch (bytesToWrite) { // note: everything falls through. | |
177 case 2: | |
178 *--target = (char)((ch | byteMark) & byteMask); | |
179 ch >>= 6; | |
180 case 1: | |
181 *--target = (char)(ch | firstByteMark[bytesToWrite]); | |
182 } | |
183 target += bytesToWrite; | |
184 } | |
185 *sourceStart = source; | |
186 *targetStart = target; | |
187 return result; | |
188 } | |
189 | |
190 ConversionResult convertUTF16ToUTF8( | 50 ConversionResult convertUTF16ToUTF8( |
191 const UChar** sourceStart, const UChar* sourceEnd, | 51 const UChar** sourceStart, const UChar* sourceEnd, |
192 char** targetStart, char* targetEnd, bool strict) | 52 char** targetStart, char* targetEnd, bool strict) |
193 { | 53 { |
194 ConversionResult result = conversionOK; | 54 ConversionResult result = conversionOK; |
195 const UChar* source = *sourceStart; | 55 const UChar* source = *sourceStart; |
196 char* target = *targetStart; | 56 char* target = *targetStart; |
197 while (source < sourceEnd) { | 57 while (source < sourceEnd) { |
198 UChar32 ch; | 58 UChar32 ch; |
199 unsigned short bytesToWrite = 0; | 59 unsigned short bytesToWrite = 0; |
(...skipping 288 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
488 | 348 |
489 UChar* bufferCurrent = bufferStart; | 349 UChar* bufferCurrent = bufferStart; |
490 const char* stringCurrent = stringStart; | 350 const char* stringCurrent = stringStart; |
491 if (convertUTF8ToUTF16(&stringCurrent, stringStart + length, &bufferCurrent,
bufferCurrent + buffer.size(), 0, true) != conversionOK) | 351 if (convertUTF8ToUTF16(&stringCurrent, stringStart + length, &bufferCurrent,
bufferCurrent + buffer.size(), 0, true) != conversionOK) |
492 return String16(); | 352 return String16(); |
493 | 353 |
494 unsigned utf16Length = bufferCurrent - bufferStart; | 354 unsigned utf16Length = bufferCurrent - bufferStart; |
495 return String16(bufferStart, utf16Length); | 355 return String16(bufferStart, utf16Length); |
496 } | 356 } |
497 | 357 |
498 // trim from start | |
499 static inline wstring <rim(wstring &s) | |
500 { | |
501 s.erase(s.begin(), std::find_if(s.begin(), s.end(), std::not1(std::ptr_fun<i
nt, int>(std::isspace)))); | |
502 return s; | |
503 } | |
504 | |
505 // trim from end | |
506 static inline wstring &rtrim(wstring &s) | |
507 { | |
508 s.erase(std::find_if(s.rbegin(), s.rend(), std::not1(std::ptr_fun<int, int>(
std::isspace))).base(), s.end()); | |
509 return s; | |
510 } | |
511 | |
512 // trim from both ends | |
513 static inline wstring &trim(wstring &s) | |
514 { | |
515 return ltrim(rtrim(s)); | |
516 } | |
517 | |
518 // static | |
519 std::string String16::intToString(int i) | |
520 { | |
521 char buffer[50]; | |
522 std::sprintf(buffer, "%d", i); | |
523 return std::string(buffer); | |
524 } | |
525 | |
526 // static | |
527 std::string String16::doubleToString(double d) | |
528 { | |
529 char buffer[100]; | |
530 std::sprintf(buffer, "%f", d); | |
531 return std::string(buffer); | |
532 } | |
533 | |
534 std::string String16::utf8() const | 358 std::string String16::utf8() const |
535 { | 359 { |
536 unsigned length = this->length(); | 360 unsigned length = this->length(); |
537 | 361 |
538 if (!length) | 362 if (!length) |
539 return std::string(""); | 363 return std::string(""); |
540 | 364 |
541 // Allocate a buffer big enough to hold all the characters | 365 // Allocate a buffer big enough to hold all the characters |
542 // (an individual UTF-16 UChar can only expand to 3 UTF-8 bytes). | 366 // (an individual UTF-16 UChar can only expand to 3 UTF-8 bytes). |
543 // Optimization ideas, if we find this function is hot: | 367 // Optimization ideas, if we find this function is hot: |
(...skipping 26 matching lines...) Expand all Loading... |
570 DCHECK((*characters >= 0xD800) && (*characters <= 0xDBFF)); | 394 DCHECK((*characters >= 0xD800) && (*characters <= 0xDBFF)); |
571 // There should be room left, since one UChar hasn't been | 395 // There should be room left, since one UChar hasn't been |
572 // converted. | 396 // converted. |
573 DCHECK((buffer + 3) <= (buffer + bufferVector.size())); | 397 DCHECK((buffer + 3) <= (buffer + bufferVector.size())); |
574 putUTF8Triple(buffer, *characters); | 398 putUTF8Triple(buffer, *characters); |
575 } | 399 } |
576 | 400 |
577 return std::string(bufferVector.data(), buffer - bufferVector.data()); | 401 return std::string(bufferVector.data(), buffer - bufferVector.data()); |
578 } | 402 } |
579 | 403 |
580 String16 String16::stripWhiteSpace() const | |
581 { | |
582 wstring result(m_impl); | |
583 trim(result); | |
584 return result; | |
585 } | |
586 | |
587 } // namespace protocol | 404 } // namespace protocol |
588 } // namespace blink | 405 } // namespace blink |
OLD | NEW |