Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(96)

Side by Side Diff: third_party/WebKit/Source/platform/inspector_protocol/String16STL.cpp

Issue 2044343002: DevTools: update V8Inspector to work with the new v8_inspector API. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Created 4 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2016 The Chromium Authors. All rights reserved. 1 // Copyright 2016 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "platform/inspector_protocol/String16STL.h" 5 #include "platform/inspector_protocol/String16STL.h"
6 6
7 #include <algorithm> 7 #include <algorithm>
8 #include <cctype> 8 #include <cctype>
9 #include <cstdio>
9 #include <functional> 10 #include <functional>
10 #include <locale> 11 #include <locale>
11 12
13 #define DCHECK(k)
dgozman 2016/06/08 15:56:56 Not needed, as you have it in Platform.h
14
12 namespace blink { 15 namespace blink {
13 namespace protocol { 16 namespace protocol {
14 17
15 const UChar replacementCharacter = 0xFFFD; 18 const UChar replacementCharacter = 0xFFFD;
16 19
17 template<typename CharType> inline bool isASCII(CharType c) 20 template<typename CharType> inline bool isASCII(CharType c)
18 { 21 {
19 return !(c & ~0x7F); 22 return !(c & ~0x7F);
20 } 23 }
21 24
(...skipping 245 matching lines...) Expand 10 before | Expand all | Expand 10 after
267 case 1: 270 case 1:
268 *--target = (char)(ch | firstByteMark[bytesToWrite]); 271 *--target = (char)(ch | firstByteMark[bytesToWrite]);
269 } 272 }
270 target += bytesToWrite; 273 target += bytesToWrite;
271 } 274 }
272 *sourceStart = source; 275 *sourceStart = source;
273 *targetStart = target; 276 *targetStart = target;
274 return result; 277 return result;
275 } 278 }
276 279
280 /**
281 * Is this code point a BMP code point (U+0000..U+ffff)?
282 * @param c 32-bit code point
283 * @return TRUE or FALSE
284 * @stable ICU 2.8
285 */
286 #define U_IS_BMP(c) ((uint32_t)(c) <= 0xffff)
287
288 /**
289 * Is this code point a supplementary code point (U+10000..U+10ffff)?
290 * @param c 32-bit code point
291 * @return TRUE or FALSE
292 * @stable ICU 2.8
293 */
294 #define U_IS_SUPPLEMENTARY(c) ((uint32_t)((c) - 0x10000) <= 0xfffff)
295
296 /**
297 * Is this code point a surrogate (U+d800..U+dfff)?
298 * @param c 32-bit code point
299 * @return TRUE or FALSE
300 * @stable ICU 2.4
301 */
302 #define U_IS_SURROGATE(c) (((c) & 0xfffff800) == 0xd800)
303
304 /**
305 * Get the lead surrogate (0xd800..0xdbff) for a
306 * supplementary code point (0x10000..0x10ffff).
307 * @param supplementary 32-bit code point (U+10000..U+10ffff)
308 * @return lead surrogate (U+d800..U+dbff) for supplementary
309 * @stable ICU 2.4
310 */
311 #define U16_LEAD(supplementary) (UChar)(((supplementary) >> 10) + 0xd7c0)
312
313 /**
314 * Get the trail surrogate (0xdc00..0xdfff) for a
315 * supplementary code point (0x10000..0x10ffff).
316 * @param supplementary 32-bit code point (U+10000..U+10ffff)
317 * @return trail surrogate (U+dc00..U+dfff) for supplementary
318 * @stable ICU 2.4
319 */
320 #define U16_TRAIL(supplementary) (UChar)(((supplementary) & 0x3ff) | 0xdc00)
321
322 // This must be called with the length pre-determined by the first byte.
323 // If presented with a length > 4, this returns false. The Unicode
324 // definition of UTF-8 goes up to 4-byte sequences.
325 static bool isLegalUTF8(const unsigned char* source, int length)
326 {
327 unsigned char a;
328 const unsigned char* srcptr = source + length;
329 switch (length) {
330 default:
331 return false;
332 // Everything else falls through when "true"...
333 case 4:
334 if ((a = (*--srcptr)) < 0x80 || a > 0xBF)
335 return false;
336 case 3:
337 if ((a = (*--srcptr)) < 0x80 || a > 0xBF)
338 return false;
339 case 2:
340 if ((a = (*--srcptr)) > 0xBF)
341 return false;
342
343 // no fall-through in this inner switch
344 switch (*source) {
345 case 0xE0:
346 if (a < 0xA0)
347 return false;
348 break;
349 case 0xED:
350 if (a > 0x9F)
351 return false;
352 break;
353 case 0xF0:
354 if (a < 0x90)
355 return false;
356 break;
357 case 0xF4:
358 if (a > 0x8F)
359 return false;
360 break;
361 default:
362 if (a < 0x80)
363 return false;
364 }
365
366 case 1:
367 if (*source >= 0x80 && *source < 0xC2)
368 return false;
369 }
370 if (*source > 0xF4)
371 return false;
372 return true;
373 }
374
375 // Magic values subtracted from a buffer value during UTF8 conversion.
376 // This table contains as many values as there might be trailing bytes
377 // in a UTF-8 sequence.
378 static const UChar32 offsetsFromUTF8[6] = { 0x00000000UL, 0x00003080UL, 0x000E20 80UL, 0x03C82080UL, static_cast<UChar32>(0xFA082080UL), static_cast<UChar32>(0x8 2082080UL) };
379
380 static inline UChar32 readUTF8Sequence(const char*& sequence, unsigned length)
381 {
382 UChar32 character = 0;
383
384 // The cases all fall through.
385 switch (length) {
386 case 6:
387 character += static_cast<unsigned char>(*sequence++);
388 character <<= 6;
389 case 5:
390 character += static_cast<unsigned char>(*sequence++);
391 character <<= 6;
392 case 4:
393 character += static_cast<unsigned char>(*sequence++);
394 character <<= 6;
395 case 3:
396 character += static_cast<unsigned char>(*sequence++);
397 character <<= 6;
398 case 2:
399 character += static_cast<unsigned char>(*sequence++);
400 character <<= 6;
401 case 1:
402 character += static_cast<unsigned char>(*sequence++);
403 }
404
405 return character - offsetsFromUTF8[length - 1];
406 }
407
408 ConversionResult convertUTF8ToUTF16(
409 const char** sourceStart, const char* sourceEnd,
410 UChar** targetStart, UChar* targetEnd, bool* sourceAllASCII, bool strict)
411 {
412 ConversionResult result = conversionOK;
413 const char* source = *sourceStart;
414 UChar* target = *targetStart;
415 UChar orAllData = 0;
416 while (source < sourceEnd) {
417 int utf8SequenceLength = inlineUTF8SequenceLength(*source);
418 if (sourceEnd - source < utf8SequenceLength) {
419 result = sourceExhausted;
420 break;
421 }
422 // Do this check whether lenient or strict
423 if (!isLegalUTF8(reinterpret_cast<const unsigned char*>(source), utf8Seq uenceLength)) {
424 result = sourceIllegal;
425 break;
426 }
427
428 UChar32 character = readUTF8Sequence(source, utf8SequenceLength);
429
430 if (target >= targetEnd) {
431 source -= utf8SequenceLength; // Back up source pointer!
432 result = targetExhausted;
433 break;
434 }
435
436 if (U_IS_BMP(character)) {
437 // UTF-16 surrogate values are illegal in UTF-32
438 if (U_IS_SURROGATE(character)) {
439 if (strict) {
440 source -= utf8SequenceLength; // return to the illegal value itself
441 result = sourceIllegal;
442 break;
443 }
444 *target++ = replacementCharacter;
445 orAllData |= replacementCharacter;
446 } else {
447 *target++ = static_cast<UChar>(character); // normal case
448 orAllData |= character;
449 }
450 } else if (U_IS_SUPPLEMENTARY(character)) {
451 // target is a character in range 0xFFFF - 0x10FFFF
452 if (target + 1 >= targetEnd) {
453 source -= utf8SequenceLength; // Back up source pointer!
454 result = targetExhausted;
455 break;
456 }
457 *target++ = U16_LEAD(character);
458 *target++ = U16_TRAIL(character);
459 orAllData = 0xffff;
460 } else {
461 if (strict) {
462 source -= utf8SequenceLength; // return to the start
463 result = sourceIllegal;
464 break; // Bail out; shouldn't continue
465 } else {
466 *target++ = replacementCharacter;
467 orAllData |= replacementCharacter;
468 }
469 }
470 }
471 *sourceStart = source;
472 *targetStart = target;
473
474 if (sourceAllASCII)
475 *sourceAllASCII = !(orAllData & ~0x7f);
476
477 return result;
478 }
479
277 // Helper to write a three-byte UTF-8 code point to the buffer, caller must chec k room is available. 480 // Helper to write a three-byte UTF-8 code point to the buffer, caller must chec k room is available.
278 static inline void putUTF8Triple(char*& buffer, UChar ch) 481 static inline void putUTF8Triple(char*& buffer, UChar ch)
279 { 482 {
280 DCHECK_GE(ch, 0x0800); 483 DCHECK_GE(ch, 0x0800);
281 *buffer++ = static_cast<char>(((ch >> 12) & 0x0F) | 0xE0); 484 *buffer++ = static_cast<char>(((ch >> 12) & 0x0F) | 0xE0);
282 *buffer++ = static_cast<char>(((ch >> 6) & 0x3F) | 0x80); 485 *buffer++ = static_cast<char>(((ch >> 6) & 0x3F) | 0x80);
283 *buffer++ = static_cast<char>((ch & 0x3F) | 0x80); 486 *buffer++ = static_cast<char>((ch & 0x3F) | 0x80);
284 } 487 }
285 488
489 String16 String16::fromUTF8(const char* stringStart, size_t length)
490 {
491 if (!stringStart || !length)
492 return String16();
493
494 std::vector<UChar> buffer(length);
495 UChar* bufferStart = buffer.data();
496
497 UChar* bufferCurrent = bufferStart;
498 const char* stringCurrent = stringStart;
499 if (convertUTF8ToUTF16(&stringCurrent, stringStart + length, &bufferCurrent, bufferCurrent + buffer.size(), 0, true) != conversionOK)
500 return String16();
501
502 unsigned utf16Length = bufferCurrent - bufferStart;
503 return String16(bufferStart, utf16Length);
504 }
505
286 // trim from start 506 // trim from start
287 static inline wstring &ltrim(wstring &s) 507 static inline wstring &ltrim(wstring &s)
288 { 508 {
289 s.erase(s.begin(), std::find_if(s.begin(), s.end(), std::not1(std::ptr_fun<i nt, int>(std::isspace)))); 509 s.erase(s.begin(), std::find_if(s.begin(), s.end(), std::not1(std::ptr_fun<i nt, int>(std::isspace))));
290 return s; 510 return s;
291 } 511 }
292 512
293 // trim from end 513 // trim from end
294 static inline wstring &rtrim(wstring &s) 514 static inline wstring &rtrim(wstring &s)
295 { 515 {
296 s.erase(std::find_if(s.rbegin(), s.rend(), std::not1(std::ptr_fun<int, int>( std::isspace))).base(), s.end()); 516 s.erase(std::find_if(s.rbegin(), s.rend(), std::not1(std::ptr_fun<int, int>( std::isspace))).base(), s.end());
297 return s; 517 return s;
298 } 518 }
299 519
300 // trim from both ends 520 // trim from both ends
301 static inline wstring &trim(wstring &s) 521 static inline wstring &trim(wstring &s)
302 { 522 {
303 return ltrim(rtrim(s)); 523 return ltrim(rtrim(s));
304 } 524 }
305 525
526 // static
527 std::string String16::intToString(int i)
528 {
529 char buffer[50];
530 std::sprintf(buffer, "%d", i);
531 return std::string(buffer);
532 }
533
534 // static
535 std::string String16::doubleToString(double d)
536 {
537 char buffer[100];
538 std::sprintf(buffer, "%f", d);
539 return std::string(buffer);
540 }
541
306 std::string String16::utf8() const 542 std::string String16::utf8() const
307 { 543 {
308 unsigned length = this->length(); 544 unsigned length = this->length();
309 545
310 if (!length) 546 if (!length)
311 return std::string(""); 547 return std::string("");
312 548
313 // Allocate a buffer big enough to hold all the characters 549 // Allocate a buffer big enough to hold all the characters
314 // (an individual UTF-16 UChar can only expand to 3 UTF-8 bytes). 550 // (an individual UTF-16 UChar can only expand to 3 UTF-8 bytes).
315 // Optimization ideas, if we find this function is hot: 551 // Optimization ideas, if we find this function is hot:
(...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after
357 593
358 String16 String16::stripWhiteSpace() const 594 String16 String16::stripWhiteSpace() const
359 { 595 {
360 wstring result(m_impl); 596 wstring result(m_impl);
361 trim(result); 597 trim(result);
362 return result; 598 return result;
363 } 599 }
364 600
365 } // namespace protocol 601 } // namespace protocol
366 } // namespace blink 602 } // namespace blink
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698