Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(865)

Side by Side Diff: third_party/WebKit/Source/platform/inspector_protocol/String16STL.cpp

Issue 2226863003: [DevTools] Reduce API surface of String16. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: rebased Created 4 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2016 The Chromium Authors. All rights reserved. 1 // Copyright 2016 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "platform/inspector_protocol/String16STL.h" 5 #include "platform/inspector_protocol/String16.h"
6
7 #include "platform/inspector_protocol/Platform.h"
8 6
9 #include <algorithm> 7 #include <algorithm>
10 #include <cctype> 8 #include <cctype>
11 #include <cstdio> 9 #include <cstdio>
12 #include <functional>
13 #include <locale> 10 #include <locale>
14 11
15 namespace blink { 12 namespace blink {
16 namespace protocol { 13 namespace protocol {
17 14
18 const UChar replacementCharacter = 0xFFFD; 15 const UChar replacementCharacter = 0xFFFD;
19 16 using UChar32 = uint32_t;
20 template<typename CharType> inline bool isASCII(CharType c)
21 {
22 return !(c & ~0x7F);
23 }
24
25 template<typename CharType> inline bool isASCIIAlpha(CharType c)
26 {
27 return (c | 0x20) >= 'a' && (c | 0x20) <= 'z';
28 }
29
30 template<typename CharType> inline bool isASCIIDigit(CharType c)
31 {
32 return c >= '0' && c <= '9';
33 }
34
35 template<typename CharType> inline bool isASCIIAlphanumeric(CharType c)
36 {
37 return isASCIIDigit(c) || isASCIIAlpha(c);
38 }
39
40 template<typename CharType> inline bool isASCIIHexDigit(CharType c)
41 {
42 return isASCIIDigit(c) || ((c | 0x20) >= 'a' && (c | 0x20) <= 'f');
43 }
44
45 template<typename CharType> inline bool isASCIIOctalDigit(CharType c)
46 {
47 return (c >= '0') & (c <= '7');
48 }
49
50 template<typename CharType> inline bool isASCIIPrintable(CharType c)
51 {
52 return c >= ' ' && c <= '~';
53 }
54
55 /*
56 Statistics from a run of Apple's page load test for callers of isASCIISpace:
57
58 character count
59 --------- -----
60 non-spaces 689383
61 20 space 294720
62 0A \n 89059
63 09 \t 28320
64 0D \r 0
65 0C \f 0
66 0B \v 0
67 */
68 template<typename CharType> inline bool isASCIISpace(CharType c)
69 {
70 return c <= ' ' && (c == ' ' || (c <= 0xD && c >= 0x9));
71 }
72
73 extern const LChar ASCIICaseFoldTable[256] = {
74 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c , 0x0d, 0x0e, 0x0f,
75 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c , 0x1d, 0x1e, 0x1f,
76 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c , 0x2d, 0x2e, 0x2f,
77 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c , 0x3d, 0x3e, 0x3f,
78 0x40, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c , 0x6d, 0x6e, 0x6f,
79 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x5b, 0x5c , 0x5d, 0x5e, 0x5f,
80 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c , 0x6d, 0x6e, 0x6f,
81 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x7b, 0x7c , 0x7d, 0x7e, 0x7f,
82 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c , 0x8d, 0x8e, 0x8f,
83 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9a, 0x9b, 0x9c , 0x9d, 0x9e, 0x9f,
84 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac , 0xad, 0xae, 0xaf,
85 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xbb, 0xbc , 0xbd, 0xbe, 0xbf,
86 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc , 0xcd, 0xce, 0xcf,
87 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 0xdc , 0xdd, 0xde, 0xdf,
88 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec , 0xed, 0xee, 0xef,
89 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc , 0xfd, 0xfe, 0xff
90 };
91
92 template<typename CharType> inline int toASCIIHexValue(CharType c)
93 {
94 DCHECK(isASCIIHexDigit(c));
95 return c < 'A' ? c - '0' : (c - 'A' + 10) & 0xF;
96 }
97
98 template<typename CharType> inline int toASCIIHexValue(CharType upperValue, Char Type lowerValue)
99 {
100 DCHECK(isASCIIHexDigit(upperValue) && isASCIIHexDigit(lowerValue));
101 return ((toASCIIHexValue(upperValue) << 4) & 0xF0) | toASCIIHexValue(lowerVa lue);
102 }
103
104 inline char lowerNibbleToASCIIHexDigit(char c)
105 {
106 char nibble = c & 0xF;
107 return nibble < 10 ? '0' + nibble : 'A' + nibble - 10;
108 }
109
110 inline char upperNibbleToASCIIHexDigit(char c)
111 {
112 char nibble = (c >> 4) & 0xF;
113 return nibble < 10 ? '0' + nibble : 'A' + nibble - 10;
114 }
115 17
116 inline int inlineUTF8SequenceLengthNonASCII(char b0) 18 inline int inlineUTF8SequenceLengthNonASCII(char b0)
117 { 19 {
118 if ((b0 & 0xC0) != 0xC0) 20 if ((b0 & 0xC0) != 0xC0)
119 return 0; 21 return 0;
120 if ((b0 & 0xE0) == 0xC0) 22 if ((b0 & 0xE0) == 0xC0)
121 return 2; 23 return 2;
122 if ((b0 & 0xF0) == 0xE0) 24 if ((b0 & 0xF0) == 0xE0)
123 return 3; 25 return 3;
124 if ((b0 & 0xF8) == 0xF0) 26 if ((b0 & 0xF8) == 0xF0)
125 return 4; 27 return 4;
126 return 0; 28 return 0;
127 } 29 }
128 30
129 inline int inlineUTF8SequenceLength(char b0) 31 inline int inlineUTF8SequenceLength(char b0)
130 { 32 {
131 return isASCII(b0) ? 1 : inlineUTF8SequenceLengthNonASCII(b0); 33 return String16::isASCII(b0) ? 1 : inlineUTF8SequenceLengthNonASCII(b0);
132 } 34 }
133 35
134 // Once the bits are split out into bytes of UTF-8, this is a mask OR-ed 36 // Once the bits are split out into bytes of UTF-8, this is a mask OR-ed
135 // into the first byte, depending on how many bytes follow. There are 37 // into the first byte, depending on how many bytes follow. There are
136 // as many entries in this table as there are UTF-8 sequence types. 38 // as many entries in this table as there are UTF-8 sequence types.
137 // (I.e., one byte sequence, two byte... etc.). Remember that sequences 39 // (I.e., one byte sequence, two byte... etc.). Remember that sequences
138 // for *legal* UTF-8 will be 4 or fewer bytes total. 40 // for *legal* UTF-8 will be 4 or fewer bytes total.
139 static const unsigned char firstByteMark[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0x F8, 0xFC }; 41 static const unsigned char firstByteMark[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0x F8, 0xFC };
140 42
141 typedef enum { 43 typedef enum {
142 conversionOK, // conversion successful 44 conversionOK, // conversion successful
143 sourceExhausted, // partial character in source, but hit end 45 sourceExhausted, // partial character in source, but hit end
144 targetExhausted, // insuff. room in target for conversion 46 targetExhausted, // insuff. room in target for conversion
145 sourceIllegal // source sequence is illegal/malformed 47 sourceIllegal // source sequence is illegal/malformed
146 } ConversionResult; 48 } ConversionResult;
147 49
148 ConversionResult convertLatin1ToUTF8(
149 const LChar** sourceStart, const LChar* sourceEnd,
150 char** targetStart, char* targetEnd)
151 {
152 ConversionResult result = conversionOK;
153 const LChar* source = *sourceStart;
154 char* target = *targetStart;
155 while (source < sourceEnd) {
156 UChar32 ch;
157 unsigned short bytesToWrite = 0;
158 const UChar32 byteMask = 0xBF;
159 const UChar32 byteMark = 0x80;
160 const LChar* oldSource = source; // In case we have to back up because o f target overflow.
161 ch = static_cast<unsigned short>(*source++);
162
163 // Figure out how many bytes the result will require
164 if (ch < (UChar32)0x80)
165 bytesToWrite = 1;
166 else
167 bytesToWrite = 2;
168
169 target += bytesToWrite;
170 if (target > targetEnd) {
171 source = oldSource; // Back up source pointer!
172 target -= bytesToWrite;
173 result = targetExhausted;
174 break;
175 }
176 switch (bytesToWrite) { // note: everything falls through.
177 case 2:
178 *--target = (char)((ch | byteMark) & byteMask);
179 ch >>= 6;
180 case 1:
181 *--target = (char)(ch | firstByteMark[bytesToWrite]);
182 }
183 target += bytesToWrite;
184 }
185 *sourceStart = source;
186 *targetStart = target;
187 return result;
188 }
189
190 ConversionResult convertUTF16ToUTF8( 50 ConversionResult convertUTF16ToUTF8(
191 const UChar** sourceStart, const UChar* sourceEnd, 51 const UChar** sourceStart, const UChar* sourceEnd,
192 char** targetStart, char* targetEnd, bool strict) 52 char** targetStart, char* targetEnd, bool strict)
193 { 53 {
194 ConversionResult result = conversionOK; 54 ConversionResult result = conversionOK;
195 const UChar* source = *sourceStart; 55 const UChar* source = *sourceStart;
196 char* target = *targetStart; 56 char* target = *targetStart;
197 while (source < sourceEnd) { 57 while (source < sourceEnd) {
198 UChar32 ch; 58 UChar32 ch;
199 unsigned short bytesToWrite = 0; 59 unsigned short bytesToWrite = 0;
(...skipping 288 matching lines...) Expand 10 before | Expand all | Expand 10 after
488 348
489 UChar* bufferCurrent = bufferStart; 349 UChar* bufferCurrent = bufferStart;
490 const char* stringCurrent = stringStart; 350 const char* stringCurrent = stringStart;
491 if (convertUTF8ToUTF16(&stringCurrent, stringStart + length, &bufferCurrent, bufferCurrent + buffer.size(), 0, true) != conversionOK) 351 if (convertUTF8ToUTF16(&stringCurrent, stringStart + length, &bufferCurrent, bufferCurrent + buffer.size(), 0, true) != conversionOK)
492 return String16(); 352 return String16();
493 353
494 unsigned utf16Length = bufferCurrent - bufferStart; 354 unsigned utf16Length = bufferCurrent - bufferStart;
495 return String16(bufferStart, utf16Length); 355 return String16(bufferStart, utf16Length);
496 } 356 }
497 357
498 // trim from start
499 static inline wstring &ltrim(wstring &s)
500 {
501 s.erase(s.begin(), std::find_if(s.begin(), s.end(), std::not1(std::ptr_fun<i nt, int>(std::isspace))));
502 return s;
503 }
504
505 // trim from end
506 static inline wstring &rtrim(wstring &s)
507 {
508 s.erase(std::find_if(s.rbegin(), s.rend(), std::not1(std::ptr_fun<int, int>( std::isspace))).base(), s.end());
509 return s;
510 }
511
512 // trim from both ends
513 static inline wstring &trim(wstring &s)
514 {
515 return ltrim(rtrim(s));
516 }
517
518 // static
519 std::string String16::intToString(int i)
520 {
521 char buffer[50];
522 std::sprintf(buffer, "%d", i);
523 return std::string(buffer);
524 }
525
526 // static
527 std::string String16::doubleToString(double d)
528 {
529 char buffer[100];
530 std::sprintf(buffer, "%f", d);
531 return std::string(buffer);
532 }
533
534 std::string String16::utf8() const 358 std::string String16::utf8() const
535 { 359 {
536 unsigned length = this->length(); 360 unsigned length = this->length();
537 361
538 if (!length) 362 if (!length)
539 return std::string(""); 363 return std::string("");
540 364
541 // Allocate a buffer big enough to hold all the characters 365 // Allocate a buffer big enough to hold all the characters
542 // (an individual UTF-16 UChar can only expand to 3 UTF-8 bytes). 366 // (an individual UTF-16 UChar can only expand to 3 UTF-8 bytes).
543 // Optimization ideas, if we find this function is hot: 367 // Optimization ideas, if we find this function is hot:
(...skipping 26 matching lines...) Expand all
570 DCHECK((*characters >= 0xD800) && (*characters <= 0xDBFF)); 394 DCHECK((*characters >= 0xD800) && (*characters <= 0xDBFF));
571 // There should be room left, since one UChar hasn't been 395 // There should be room left, since one UChar hasn't been
572 // converted. 396 // converted.
573 DCHECK((buffer + 3) <= (buffer + bufferVector.size())); 397 DCHECK((buffer + 3) <= (buffer + bufferVector.size()));
574 putUTF8Triple(buffer, *characters); 398 putUTF8Triple(buffer, *characters);
575 } 399 }
576 400
577 return std::string(bufferVector.data(), buffer - bufferVector.data()); 401 return std::string(bufferVector.data(), buffer - bufferVector.data());
578 } 402 }
579 403
580 String16 String16::stripWhiteSpace() const
581 {
582 wstring result(m_impl);
583 trim(result);
584 return result;
585 }
586
587 } // namespace protocol 404 } // namespace protocol
588 } // namespace blink 405 } // namespace blink
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698