Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(34)

Side by Side Diff: Source/core/css/CSSTokenizer-in.cpp

Issue 196353018: Smaller CSSParser UTF16 buffers for escaped strings. (Closed) Base URL: https://chromium.googlesource.com/chromium/blink.git@master
Patch Set: Dropped /* static */ and rewrote comment. Created 6 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « Source/core/css/CSSTokenizer.h ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright (C) 2003 Lars Knoll (knoll@kde.org) 2 * Copyright (C) 2003 Lars Knoll (knoll@kde.org)
3 * Copyright (C) 2005 Allan Sandfeld Jensen (kde@carewolf.com) 3 * Copyright (C) 2005 Allan Sandfeld Jensen (kde@carewolf.com)
4 * Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012 Apple Inc. All rights reserved. 4 * Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012 Apple Inc. All rights reserved.
5 * Copyright (C) 2007 Nicholas Shanks <webkit@nickshanks.com> 5 * Copyright (C) 2007 Nicholas Shanks <webkit@nickshanks.com>
6 * Copyright (C) 2008 Eric Seidel <eric@webkit.org> 6 * Copyright (C) 2008 Eric Seidel <eric@webkit.org>
7 * Copyright (C) 2009 Torch Mobile Inc. All rights reserved. (http://www.torchmo bile.com/) 7 * Copyright (C) 2009 Torch Mobile Inc. All rights reserved. (http://www.torchmo bile.com/)
8 * Copyright (C) 2012 Adobe Systems Incorporated. All rights reserved. 8 * Copyright (C) 2012 Adobe Systems Incorporated. All rights reserved.
9 * Copyright (C) 2012 Intel Corporation. All rights reserved. 9 * Copyright (C) 2012 Intel Corporation. All rights reserved.
10 * 10 *
(...skipping 286 matching lines...) Expand 10 before | Expand all | Expand 10 after
297 { 297 {
298 return m_currentCharacter8; 298 return m_currentCharacter8;
299 } 299 }
300 300
301 template <> 301 template <>
302 inline UChar*& CSSTokenizer::currentCharacter<UChar>() 302 inline UChar*& CSSTokenizer::currentCharacter<UChar>()
303 { 303 {
304 return m_currentCharacter16; 304 return m_currentCharacter16;
305 } 305 }
306 306
307 UChar*& CSSTokenizer::currentCharacter16() 307 UChar* CSSTokenizer::getStringBuffer16(size_t len)
308 { 308 {
309 if (!m_currentCharacter16) { 309 OwnPtr<UChar[]> buffer = adoptArrayPtr(new UChar[len]);
310 m_dataStart16 = adoptArrayPtr(new UChar[m_length]);
311 m_currentCharacter16 = m_dataStart16.get();
312 }
313 310
314 return m_currentCharacter16; 311 UChar* bufferPtr = buffer.get();
312
313 m_cssStrings16.append(buffer.release());
314 return bufferPtr;
315 } 315 }
316 316
317 template <> 317 template <>
318 inline LChar* CSSTokenizer::dataStart<LChar>() 318 inline LChar* CSSTokenizer::dataStart<LChar>()
319 { 319 {
320 return m_dataStart8.get(); 320 return m_dataStart8.get();
321 } 321 }
322 322
323 template <> 323 template <>
324 inline UChar* CSSTokenizer::dataStart<UChar>() 324 inline UChar* CSSTokenizer::dataStart<UChar>()
(...skipping 80 matching lines...) Expand 10 before | Expand all | Expand 10 after
405 if (unicode > 0x10ffff) 405 if (unicode > 0x10ffff)
406 unicode = 0xfffd; 406 unicode = 0xfffd;
407 407
408 // Optional space after the escape sequence. 408 // Optional space after the escape sequence.
409 if (isHTMLSpace<CharacterType>(*src)) 409 if (isHTMLSpace<CharacterType>(*src))
410 ++src; 410 ++src;
411 411
412 return unicode; 412 return unicode;
413 } 413 }
414 414
415 return *currentCharacter<CharacterType>()++; 415 return *src++;
416 } 416 }
417 417
418 template <> 418 template <>
419 inline void CSSTokenizer::UnicodeToChars<LChar>(LChar*& result, unsigned unicode ) 419 inline void CSSTokenizer::UnicodeToChars<LChar>(LChar*& result, unsigned unicode )
420 { 420 {
421 ASSERT(unicode <= 0xff); 421 ASSERT(unicode <= 0xff);
422 *result = unicode; 422 *result = unicode;
423 423
424 ++result; 424 ++result;
425 } 425 }
426 426
427 template <> 427 template <>
428 inline void CSSTokenizer::UnicodeToChars<UChar>(UChar*& result, unsigned unicode ) 428 inline void CSSTokenizer::UnicodeToChars<UChar>(UChar*& result, unsigned unicode )
429 { 429 {
430 // Replace unicode with a surrogate pairs when it is bigger than 0xffff 430 // Replace unicode with a surrogate pairs when it is bigger than 0xffff
431 if (U16_LENGTH(unicode) == 2) { 431 if (U16_LENGTH(unicode) == 2) {
432 *result++ = U16_LEAD(unicode); 432 *result++ = U16_LEAD(unicode);
433 *result = U16_TRAIL(unicode); 433 *result = U16_TRAIL(unicode);
434 } else { 434 } else {
435 *result = unicode; 435 *result = unicode;
436 } 436 }
437 437
438 ++result; 438 ++result;
439 } 439 }
440 440
441 template <typename SrcCharacterType>
442 size_t CSSTokenizer::peekMaxIdentifierLen(SrcCharacterType* src)
443 {
444 // The decoded form of an identifier (after resolving escape
445 // sequences) will not contain more characters (ASCII or UTF-16
446 // codepoints) than the input. This code can therefore ignore
447 // escape sequences completely.
448 SrcCharacterType* start = src;
449 do {
450 if (LIKELY(*src != '\\'))
451 src++;
452 else
453 parseEscape<SrcCharacterType>(src);
454 } while (isCSSLetter(src[0]) || (src[0] == '\\' && isCSSEscape(src[1])));
455
456 return src - start;
457 }
458
441 template <typename SrcCharacterType, typename DestCharacterType> 459 template <typename SrcCharacterType, typename DestCharacterType>
442 inline bool CSSTokenizer::parseIdentifierInternal(SrcCharacterType*& src, DestCh aracterType*& result, bool& hasEscape) 460 inline bool CSSTokenizer::parseIdentifierInternal(SrcCharacterType*& src, DestCh aracterType*& result, bool& hasEscape)
443 { 461 {
444 hasEscape = false; 462 hasEscape = false;
445 do { 463 do {
446 if (LIKELY(*src != '\\')) { 464 if (LIKELY(*src != '\\')) {
447 *result++ = *src++; 465 *result++ = *src++;
448 } else { 466 } else {
449 hasEscape = true; 467 hasEscape = true;
450 SrcCharacterType* savedEscapeStart = src; 468 SrcCharacterType* savedEscapeStart = src;
(...skipping 13 matching lines...) Expand all
464 inline void CSSTokenizer::parseIdentifier(CharacterType*& result, CSSParserStrin g& resultString, bool& hasEscape) 482 inline void CSSTokenizer::parseIdentifier(CharacterType*& result, CSSParserStrin g& resultString, bool& hasEscape)
465 { 483 {
466 // If a valid identifier start is found, we can safely 484 // If a valid identifier start is found, we can safely
467 // parse the identifier until the next invalid character. 485 // parse the identifier until the next invalid character.
468 ASSERT(isIdentifierStart<CharacterType>()); 486 ASSERT(isIdentifierStart<CharacterType>());
469 487
470 CharacterType* start = currentCharacter<CharacterType>(); 488 CharacterType* start = currentCharacter<CharacterType>();
471 if (UNLIKELY(!parseIdentifierInternal(currentCharacter<CharacterType>(), res ult, hasEscape))) { 489 if (UNLIKELY(!parseIdentifierInternal(currentCharacter<CharacterType>(), res ult, hasEscape))) {
472 // Found an escape we couldn't handle with 8 bits, copy what has been re cognized and continue 490 // Found an escape we couldn't handle with 8 bits, copy what has been re cognized and continue
473 ASSERT(is8BitSource()); 491 ASSERT(is8BitSource());
474 UChar*& result16 = currentCharacter16(); 492 UChar* result16 = getStringBuffer16((result - start) + peekMaxIdentifier Len(result));
475 UChar* start16 = result16; 493 UChar* start16 = result16;
476 int i = 0; 494 int i = 0;
477 for (; i < result - start; i++) 495 for (; i < result - start; i++)
478 result16[i] = start[i]; 496 result16[i] = start[i];
479 497
480 result16 += i; 498 result16 += i;
481 499
482 parseIdentifierInternal(currentCharacter<CharacterType>(), result16, has Escape); 500 parseIdentifierInternal(currentCharacter<CharacterType>(), result16, has Escape);
483 501
484 resultString.init(start16, result16 - start16); 502 resultString.init(start16, result16 - start16);
485 503
486 return; 504 return;
487 } 505 }
488 506
489 resultString.init(start, result - start); 507 resultString.init(start, result - start);
490 } 508 }
491 509
510 template <typename SrcCharacterType>
511 size_t CSSTokenizer::peekMaxStringLen(SrcCharacterType* src, UChar quote)
512 {
513 // A string can't be longer than the ASCII characters used to write
514 // it down so use that as the upper limit.
rune 2014/03/17 20:18:35 I just noticed now this comment has the same issue
Daniel Bratell 2014/03/17 20:25:17 Done.
515 SrcCharacterType* start = src;
516 while (true) {
517 if (UNLIKELY(*src == quote)) {
518 // String parsing is done.
519 ++src;
520 break;
521 }
522 if (UNLIKELY(!*src)) {
523 // String parsing is done, but don't advance pointer if at the end o f input.
524 break;
525 }
526 ASSERT(*src > '\r' || (*src < '\n' && *src) || *src == '\v');
527
528 if (LIKELY(src[0] != '\\'))
529 src++;
530 else if (src[1] == '\n' || src[1] == '\f')
531 src += 2;
532 else if (src[1] == '\r')
533 src += src[2] == '\n' ? 3 : 2;
534 else
535 parseEscape<SrcCharacterType>(src);
536 }
537
538 return src - start;
539 }
540
492 template <typename SrcCharacterType, typename DestCharacterType> 541 template <typename SrcCharacterType, typename DestCharacterType>
493 inline bool CSSTokenizer::parseStringInternal(SrcCharacterType*& src, DestCharac terType*& result, UChar quote) 542 inline bool CSSTokenizer::parseStringInternal(SrcCharacterType*& src, DestCharac terType*& result, UChar quote)
494 { 543 {
495 while (true) { 544 while (true) {
496 if (UNLIKELY(*src == quote)) { 545 if (UNLIKELY(*src == quote)) {
497 // String parsing is done. 546 // String parsing is done.
498 ++src; 547 ++src;
499 return true; 548 return true;
500 } 549 }
501 if (UNLIKELY(!*src)) { 550 if (UNLIKELY(!*src)) {
(...skipping 23 matching lines...) Expand all
525 } 574 }
526 575
527 template <typename CharacterType> 576 template <typename CharacterType>
528 inline void CSSTokenizer::parseString(CharacterType*& result, CSSParserString& r esultString, UChar quote) 577 inline void CSSTokenizer::parseString(CharacterType*& result, CSSParserString& r esultString, UChar quote)
529 { 578 {
530 CharacterType* start = currentCharacter<CharacterType>(); 579 CharacterType* start = currentCharacter<CharacterType>();
531 580
532 if (UNLIKELY(!parseStringInternal(currentCharacter<CharacterType>(), result, quote))) { 581 if (UNLIKELY(!parseStringInternal(currentCharacter<CharacterType>(), result, quote))) {
533 // Found an escape we couldn't handle with 8 bits, copy what has been re cognized and continue 582 // Found an escape we couldn't handle with 8 bits, copy what has been re cognized and continue
534 ASSERT(is8BitSource()); 583 ASSERT(is8BitSource());
535 UChar*& result16 = currentCharacter16(); 584 UChar* result16 = getStringBuffer16((result - start) + peekMaxStringLen( result, quote));
536 UChar* start16 = result16; 585 UChar* start16 = result16;
537 int i = 0; 586 int i = 0;
538 for (; i < result - start; i++) 587 for (; i < result - start; i++)
539 result16[i] = start[i]; 588 result16[i] = start[i];
540 589
541 result16 += i; 590 result16 += i;
542 591
543 parseStringInternal(currentCharacter<CharacterType>(), result16, quote); 592 parseStringInternal(currentCharacter<CharacterType>(), result16, quote);
544 593
545 resultString.init(start16, result16 - start16); 594 resultString.init(start16, result16 - start16);
(...skipping 27 matching lines...) Expand all
573 } 622 }
574 } 623 }
575 624
576 end = skipWhiteSpace(end); 625 end = skipWhiteSpace(end);
577 if (*end != ')') 626 if (*end != ')')
578 return false; 627 return false;
579 628
580 return true; 629 return true;
581 } 630 }
582 631
632 template <typename SrcCharacterType>
633 inline size_t CSSTokenizer::peekMaxURILen(SrcCharacterType* src, UChar quote)
634 {
635 // A URI can't be longer than the ASCII characters used to write
636 // it down so use that as the upper limit.
rune 2014/03/17 20:18:35 I just noticed now this comment has the same issue
Daniel Bratell 2014/03/17 20:25:17 Done.
637 SrcCharacterType* start = src;
638 if (quote) {
639 ASSERT(quote == '"' || quote == '\'');
640 return peekMaxStringLen(src, quote);
641 }
642
643 while (isURILetter(*src)) {
644 if (LIKELY(*src != '\\'))
645 src++;
646 else
647 parseEscape<SrcCharacterType>(src);
648 }
649
650 return src - start;
651 }
652
583 template <typename SrcCharacterType, typename DestCharacterType> 653 template <typename SrcCharacterType, typename DestCharacterType>
584 inline bool CSSTokenizer::parseURIInternal(SrcCharacterType*& src, DestCharacter Type*& dest, UChar quote) 654 inline bool CSSTokenizer::parseURIInternal(SrcCharacterType*& src, DestCharacter Type*& dest, UChar quote)
585 { 655 {
586 if (quote) { 656 if (quote) {
587 ASSERT(quote == '"' || quote == '\''); 657 ASSERT(quote == '"' || quote == '\'');
588 return parseStringInternal(src, dest, quote); 658 return parseStringInternal(src, dest, quote);
589 } 659 }
590 660
591 while (isURILetter(*src)) { 661 while (isURILetter(*src)) {
592 if (LIKELY(*src != '\\')) { 662 if (LIKELY(*src != '\\')) {
593 *dest++ = *src++; 663 *dest++ = *src++;
594 } else { 664 } else {
595 unsigned unicode = parseEscape<SrcCharacterType>(src); 665 unsigned unicode = parseEscape<SrcCharacterType>(src);
596 if (unicode > 0xff && sizeof(SrcCharacterType) == 1) 666 if (unicode > 0xff && sizeof(DestCharacterType) == 1)
Daniel Bratell 2014/03/17 16:01:07 Note: This is an unrelated bug fix. Before this fi
Daniel Bratell 2014/03/17 20:15:36 Done.
597 return false; 667 return false;
598 UnicodeToChars(dest, unicode); 668 UnicodeToChars(dest, unicode);
599 } 669 }
600 } 670 }
601 671
602 return true; 672 return true;
603 } 673 }
604 674
605 template <typename CharacterType> 675 template <typename CharacterType>
606 inline void CSSTokenizer::parseURI(CSSParserString& string) 676 inline void CSSTokenizer::parseURI(CSSParserString& string)
607 { 677 {
608 CharacterType* uriStart; 678 CharacterType* uriStart;
609 CharacterType* uriEnd; 679 CharacterType* uriEnd;
610 UChar quote; 680 UChar quote;
611 if (!findURI(uriStart, uriEnd, quote)) 681 if (!findURI(uriStart, uriEnd, quote))
612 return; 682 return;
613 683
614 CharacterType* dest = currentCharacter<CharacterType>() = uriStart; 684 CharacterType* dest = currentCharacter<CharacterType>() = uriStart;
615 if (LIKELY(parseURIInternal(currentCharacter<CharacterType>(), dest, quote)) ) { 685 if (LIKELY(parseURIInternal(currentCharacter<CharacterType>(), dest, quote)) ) {
616 string.init(uriStart, dest - uriStart); 686 string.init(uriStart, dest - uriStart);
617 } else { 687 } else {
618 // An escape sequence was encountered that can't be stored in 8 bits. 688 // An escape sequence was encountered that can't be stored in 8 bits.
619 // Reset the current character to the start of the URI and re-parse with 689 // Reset the current character to the start of the URI and re-parse with
620 // a 16-bit destination. 690 // a 16-bit destination.
621 ASSERT(is8BitSource()); 691 ASSERT(is8BitSource());
622 UChar* uriStart16 = currentCharacter16(); 692 UChar* result16 = getStringBuffer16(peekMaxURILen(uriStart, quote));
693 UChar* uriStart16 = result16;
623 currentCharacter<CharacterType>() = uriStart; 694 currentCharacter<CharacterType>() = uriStart;
624 bool result = parseURIInternal(currentCharacter<CharacterType>(), curren tCharacter16(), quote); 695 bool result = parseURIInternal(currentCharacter<CharacterType>(), result 16, quote);
625 ASSERT_UNUSED(result, result); 696 ASSERT_UNUSED(result, result);
626 string.init(uriStart16, currentCharacter16() - uriStart16); 697 string.init(uriStart16, result16 - uriStart16);
627 } 698 }
628 699
629 currentCharacter<CharacterType>() = uriEnd + 1; 700 currentCharacter<CharacterType>() = uriEnd + 1;
630 m_token = URI; 701 m_token = URI;
631 } 702 }
632 703
633 template <typename CharacterType> 704 template <typename CharacterType>
634 inline bool CSSTokenizer::parseUnicodeRange() 705 inline bool CSSTokenizer::parseUnicodeRange()
635 { 706 {
636 CharacterType* character = currentCharacter<CharacterType>() + 1; 707 CharacterType* character = currentCharacter<CharacterType>() + 1;
(...skipping 889 matching lines...) Expand 10 before | Expand all | Expand 10 after
1526 m_dataStart16[length - 1] = 0; 1597 m_dataStart16[length - 1] = 0;
1527 1598
1528 m_is8BitSource = false; 1599 m_is8BitSource = false;
1529 m_currentCharacter8 = 0; 1600 m_currentCharacter8 = 0;
1530 m_currentCharacter16 = m_dataStart16.get(); 1601 m_currentCharacter16 = m_dataStart16.get();
1531 setTokenStart<UChar>(m_currentCharacter16); 1602 setTokenStart<UChar>(m_currentCharacter16);
1532 m_lexFunc = &CSSTokenizer::realLex<UChar>; 1603 m_lexFunc = &CSSTokenizer::realLex<UChar>;
1533 } 1604 }
1534 1605
1535 } // namespace WebCore 1606 } // namespace WebCore
OLDNEW
« no previous file with comments | « Source/core/css/CSSTokenizer.h ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698