Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(19)

Side by Side Diff: Source/core/css/CSSTokenizer-in.cpp

Issue 196353018: Smaller CSSParser UTF16 buffers for escaped strings. (Closed) Base URL: https://chromium.googlesource.com/chromium/blink.git@master
Patch Set: Created 6 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « Source/core/css/CSSTokenizer.h ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright (C) 2003 Lars Knoll (knoll@kde.org) 2 * Copyright (C) 2003 Lars Knoll (knoll@kde.org)
3 * Copyright (C) 2005 Allan Sandfeld Jensen (kde@carewolf.com) 3 * Copyright (C) 2005 Allan Sandfeld Jensen (kde@carewolf.com)
4 * Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012 Apple Inc. All rights reserved. 4 * Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012 Apple Inc. All rights reserved.
5 * Copyright (C) 2007 Nicholas Shanks <webkit@nickshanks.com> 5 * Copyright (C) 2007 Nicholas Shanks <webkit@nickshanks.com>
6 * Copyright (C) 2008 Eric Seidel <eric@webkit.org> 6 * Copyright (C) 2008 Eric Seidel <eric@webkit.org>
7 * Copyright (C) 2009 Torch Mobile Inc. All rights reserved. (http://www.torchmo bile.com/) 7 * Copyright (C) 2009 Torch Mobile Inc. All rights reserved. (http://www.torchmo bile.com/)
8 * Copyright (C) 2012 Adobe Systems Incorporated. All rights reserved. 8 * Copyright (C) 2012 Adobe Systems Incorporated. All rights reserved.
9 * Copyright (C) 2012 Intel Corporation. All rights reserved. 9 * Copyright (C) 2012 Intel Corporation. All rights reserved.
10 * 10 *
(...skipping 286 matching lines...) Expand 10 before | Expand all | Expand 10 after
297 { 297 {
298 return m_currentCharacter8; 298 return m_currentCharacter8;
299 } 299 }
300 300
301 template <> 301 template <>
302 inline UChar*& CSSTokenizer::currentCharacter<UChar>() 302 inline UChar*& CSSTokenizer::currentCharacter<UChar>()
303 { 303 {
304 return m_currentCharacter16; 304 return m_currentCharacter16;
305 } 305 }
306 306
307 UChar*& CSSTokenizer::currentCharacter16() 307 UChar* CSSTokenizer::getStringBuffer16(size_t len)
308 { 308 {
309 if (!m_currentCharacter16) { 309 OwnPtr<UChar[]> buffer = adoptArrayPtr(new UChar[len]);
310 m_dataStart16 = adoptArrayPtr(new UChar[m_length]);
311 m_currentCharacter16 = m_dataStart16.get();
312 }
313 310
314 return m_currentCharacter16; 311 UChar* bufferPtr = buffer.get();
312
313 m_cssStrings16.append(buffer.release());
314 return bufferPtr;
315 } 315 }
316 316
317 template <> 317 template <>
318 inline LChar* CSSTokenizer::dataStart<LChar>() 318 inline LChar* CSSTokenizer::dataStart<LChar>()
319 { 319 {
320 return m_dataStart8.get(); 320 return m_dataStart8.get();
321 } 321 }
322 322
323 template <> 323 template <>
324 inline UChar* CSSTokenizer::dataStart<UChar>() 324 inline UChar* CSSTokenizer::dataStart<UChar>()
(...skipping 54 matching lines...) Expand 10 before | Expand all | Expand 10 after
379 currentCharacter += currentCharacter[2] == '\n' ? 3 : 2; 379 currentCharacter += currentCharacter[2] == '\n' ? 3 : 2;
380 } else { 380 } else {
381 currentCharacter = checkAndSkipEscape(currentCharacter); 381 currentCharacter = checkAndSkipEscape(currentCharacter);
382 if (!currentCharacter) 382 if (!currentCharacter)
383 return 0; 383 return 0;
384 } 384 }
385 } 385 }
386 } 386 }
387 387
388 template <typename CharacterType> 388 template <typename CharacterType>
389 /* static */
rune 2014/03/14 12:50:23 There's no precedence for indication of static lik
Daniel Bratell 2014/03/17 20:15:35 Done.
389 unsigned CSSTokenizer::parseEscape(CharacterType*& src) 390 unsigned CSSTokenizer::parseEscape(CharacterType*& src)
390 { 391 {
391 ASSERT(*src == '\\' && isCSSEscape(src[1])); 392 ASSERT(*src == '\\' && isCSSEscape(src[1]));
392 393
393 unsigned unicode = 0; 394 unsigned unicode = 0;
394 395
395 ++src; 396 ++src;
396 if (isASCIIHexDigit(*src)) { 397 if (isASCIIHexDigit(*src)) {
397 398
398 int length = 6; 399 int length = 6;
399 400
400 do { 401 do {
401 unicode = (unicode << 4) + toASCIIHexValue(*src++); 402 unicode = (unicode << 4) + toASCIIHexValue(*src++);
402 } while (--length && isASCIIHexDigit(*src)); 403 } while (--length && isASCIIHexDigit(*src));
403 404
404 // Characters above 0x10ffff are not handled. 405 // Characters above 0x10ffff are not handled.
405 if (unicode > 0x10ffff) 406 if (unicode > 0x10ffff)
406 unicode = 0xfffd; 407 unicode = 0xfffd;
407 408
408 // Optional space after the escape sequence. 409 // Optional space after the escape sequence.
409 if (isHTMLSpace<CharacterType>(*src)) 410 if (isHTMLSpace<CharacterType>(*src))
410 ++src; 411 ++src;
411 412
412 return unicode; 413 return unicode;
413 } 414 }
414 415
415 return *currentCharacter<CharacterType>()++; 416 return *src++;
416 } 417 }
417 418
418 template <> 419 template <>
420 /* static */
rune 2014/03/14 12:50:23 Likewise.
Daniel Bratell 2014/03/17 20:15:35 Done.
419 inline void CSSTokenizer::UnicodeToChars<LChar>(LChar*& result, unsigned unicode ) 421 inline void CSSTokenizer::UnicodeToChars<LChar>(LChar*& result, unsigned unicode )
420 { 422 {
421 ASSERT(unicode <= 0xff); 423 ASSERT(unicode <= 0xff);
422 *result = unicode; 424 *result = unicode;
423 425
424 ++result; 426 ++result;
425 } 427 }
426 428
427 template <> 429 template <>
430 /* static */
rune 2014/03/14 12:50:23 Likewise.
Daniel Bratell 2014/03/17 20:15:35 Done.
428 inline void CSSTokenizer::UnicodeToChars<UChar>(UChar*& result, unsigned unicode ) 431 inline void CSSTokenizer::UnicodeToChars<UChar>(UChar*& result, unsigned unicode )
429 { 432 {
430 // Replace unicode with a surrogate pairs when it is bigger than 0xffff 433 // Replace unicode with a surrogate pairs when it is bigger than 0xffff
431 if (U16_LENGTH(unicode) == 2) { 434 if (U16_LENGTH(unicode) == 2) {
432 *result++ = U16_LEAD(unicode); 435 *result++ = U16_LEAD(unicode);
433 *result = U16_TRAIL(unicode); 436 *result = U16_TRAIL(unicode);
434 } else { 437 } else {
435 *result = unicode; 438 *result = unicode;
436 } 439 }
437 440
438 ++result; 441 ++result;
439 } 442 }
440 443
444 template <typename SrcCharacterType>
445 /* static */
446 size_t CSSTokenizer::peekMaxIdentifierLen(SrcCharacterType* src)
447 {
448 // An identifier can't be longer than the ASCII characters used to
449 // write it down so use that count as upper limit.
rune 2014/03/14 12:50:23 By that you mean that: "For escapes, the number of
Daniel Bratell 2014/03/17 15:55:35 That is what it means, but considering that this c
Daniel Bratell 2014/03/17 20:15:35 Done.
Daniel Bratell 2014/03/17 20:15:35 Done.
Daniel Bratell 2014/03/17 20:15:35 Done.
450 SrcCharacterType* start = src;
451 do {
452 if (LIKELY(*src != '\\'))
453 src++;
454 else
455 parseEscape<SrcCharacterType>(src);
456 } while (isCSSLetter(src[0]) || (src[0] == '\\' && isCSSEscape(src[1])));
457
458 return src - start;
459 }
460
441 template <typename SrcCharacterType, typename DestCharacterType> 461 template <typename SrcCharacterType, typename DestCharacterType>
462 /* static */
rune 2014/03/14 12:50:23 and here.
Daniel Bratell 2014/03/17 20:15:35 Done.
442 inline bool CSSTokenizer::parseIdentifierInternal(SrcCharacterType*& src, DestCh aracterType*& result, bool& hasEscape) 463 inline bool CSSTokenizer::parseIdentifierInternal(SrcCharacterType*& src, DestCh aracterType*& result, bool& hasEscape)
443 { 464 {
444 hasEscape = false; 465 hasEscape = false;
445 do { 466 do {
446 if (LIKELY(*src != '\\')) { 467 if (LIKELY(*src != '\\')) {
447 *result++ = *src++; 468 *result++ = *src++;
448 } else { 469 } else {
449 hasEscape = true; 470 hasEscape = true;
450 SrcCharacterType* savedEscapeStart = src; 471 SrcCharacterType* savedEscapeStart = src;
451 unsigned unicode = parseEscape<SrcCharacterType>(src); 472 unsigned unicode = parseEscape<SrcCharacterType>(src);
(...skipping 12 matching lines...) Expand all
464 inline void CSSTokenizer::parseIdentifier(CharacterType*& result, CSSParserStrin g& resultString, bool& hasEscape) 485 inline void CSSTokenizer::parseIdentifier(CharacterType*& result, CSSParserStrin g& resultString, bool& hasEscape)
465 { 486 {
466 // If a valid identifier start is found, we can safely 487 // If a valid identifier start is found, we can safely
467 // parse the identifier until the next invalid character. 488 // parse the identifier until the next invalid character.
468 ASSERT(isIdentifierStart<CharacterType>()); 489 ASSERT(isIdentifierStart<CharacterType>());
469 490
470 CharacterType* start = currentCharacter<CharacterType>(); 491 CharacterType* start = currentCharacter<CharacterType>();
471 if (UNLIKELY(!parseIdentifierInternal(currentCharacter<CharacterType>(), res ult, hasEscape))) { 492 if (UNLIKELY(!parseIdentifierInternal(currentCharacter<CharacterType>(), res ult, hasEscape))) {
472 // Found an escape we couldn't handle with 8 bits, copy what has been re cognized and continue 493 // Found an escape we couldn't handle with 8 bits, copy what has been re cognized and continue
473 ASSERT(is8BitSource()); 494 ASSERT(is8BitSource());
474 UChar*& result16 = currentCharacter16(); 495 UChar* result16 = getStringBuffer16((result - start) + peekMaxIdentifier Len(result));
475 UChar* start16 = result16; 496 UChar* start16 = result16;
476 int i = 0; 497 int i = 0;
477 for (; i < result - start; i++) 498 for (; i < result - start; i++)
478 result16[i] = start[i]; 499 result16[i] = start[i];
479 500
480 result16 += i; 501 result16 += i;
481 502
482 parseIdentifierInternal(currentCharacter<CharacterType>(), result16, has Escape); 503 parseIdentifierInternal(currentCharacter<CharacterType>(), result16, has Escape);
483 504
484 resultString.init(start16, result16 - start16); 505 resultString.init(start16, result16 - start16);
485 506
486 return; 507 return;
487 } 508 }
488 509
489 resultString.init(start, result - start); 510 resultString.init(start, result - start);
490 } 511 }
491 512
513 template <typename SrcCharacterType>
514 /* static */
rune 2014/03/14 12:50:23 Same.
Daniel Bratell 2014/03/17 20:15:35 Done.
515 size_t CSSTokenizer::peekMaxStringLen(SrcCharacterType* src, UChar quote)
516 {
517 // A string can't be longer than the ASCII characters used to write
518 // it down so use that as the upper limit.
519 SrcCharacterType* start = src;
520 while (true) {
521 if (UNLIKELY(*src == quote)) {
522 // String parsing is done.
523 ++src;
524 break;
525 }
526 if (UNLIKELY(!*src)) {
527 // String parsing is done, but don't advance pointer if at the end o f input.
528 break;
529 }
530 ASSERT(*src > '\r' || (*src < '\n' && *src) || *src == '\v');
531
532 if (LIKELY(src[0] != '\\'))
533 src++;
534 else if (src[1] == '\n' || src[1] == '\f')
535 src += 2;
536 else if (src[1] == '\r')
537 src += src[2] == '\n' ? 3 : 2;
538 else
539 parseEscape<SrcCharacterType>(src);
540 }
541
542 return src - start;
543 }
544
492 template <typename SrcCharacterType, typename DestCharacterType> 545 template <typename SrcCharacterType, typename DestCharacterType>
546 /* static */
rune 2014/03/14 12:50:23 Same.
Daniel Bratell 2014/03/17 20:15:35 Done.
493 inline bool CSSTokenizer::parseStringInternal(SrcCharacterType*& src, DestCharac terType*& result, UChar quote) 547 inline bool CSSTokenizer::parseStringInternal(SrcCharacterType*& src, DestCharac terType*& result, UChar quote)
494 { 548 {
495 while (true) { 549 while (true) {
496 if (UNLIKELY(*src == quote)) { 550 if (UNLIKELY(*src == quote)) {
497 // String parsing is done. 551 // String parsing is done.
498 ++src; 552 ++src;
499 return true; 553 return true;
500 } 554 }
501 if (UNLIKELY(!*src)) { 555 if (UNLIKELY(!*src)) {
502 // String parsing is done, but don't advance pointer if at the end o f input. 556 // String parsing is done, but don't advance pointer if at the end o f input.
(...skipping 22 matching lines...) Expand all
525 } 579 }
526 580
527 template <typename CharacterType> 581 template <typename CharacterType>
528 inline void CSSTokenizer::parseString(CharacterType*& result, CSSParserString& r esultString, UChar quote) 582 inline void CSSTokenizer::parseString(CharacterType*& result, CSSParserString& r esultString, UChar quote)
529 { 583 {
530 CharacterType* start = currentCharacter<CharacterType>(); 584 CharacterType* start = currentCharacter<CharacterType>();
531 585
532 if (UNLIKELY(!parseStringInternal(currentCharacter<CharacterType>(), result, quote))) { 586 if (UNLIKELY(!parseStringInternal(currentCharacter<CharacterType>(), result, quote))) {
533 // Found an escape we couldn't handle with 8 bits, copy what has been re cognized and continue 587 // Found an escape we couldn't handle with 8 bits, copy what has been re cognized and continue
534 ASSERT(is8BitSource()); 588 ASSERT(is8BitSource());
535 UChar*& result16 = currentCharacter16(); 589 UChar* result16 = getStringBuffer16((result - start) + peekMaxStringLen( result, quote));
536 UChar* start16 = result16; 590 UChar* start16 = result16;
537 int i = 0; 591 int i = 0;
538 for (; i < result - start; i++) 592 for (; i < result - start; i++)
539 result16[i] = start[i]; 593 result16[i] = start[i];
540 594
541 result16 += i; 595 result16 += i;
542 596
543 parseStringInternal(currentCharacter<CharacterType>(), result16, quote); 597 parseStringInternal(currentCharacter<CharacterType>(), result16, quote);
544 598
545 resultString.init(start16, result16 - start16); 599 resultString.init(start16, result16 - start16);
(...skipping 27 matching lines...) Expand all
573 } 627 }
574 } 628 }
575 629
576 end = skipWhiteSpace(end); 630 end = skipWhiteSpace(end);
577 if (*end != ')') 631 if (*end != ')')
578 return false; 632 return false;
579 633
580 return true; 634 return true;
581 } 635 }
582 636
637 template <typename SrcCharacterType>
638 /* static */
rune 2014/03/14 12:50:23 Same.
Daniel Bratell 2014/03/17 20:15:35 Done.
639 inline size_t CSSTokenizer::peekMaxURILen(SrcCharacterType* src, UChar quote)
640 {
641 // A URI can't be longer than the ASCII characters used to write
642 // it down so use that as the upper limit.
643 SrcCharacterType* start = src;
644 if (quote) {
645 ASSERT(quote == '"' || quote == '\'');
646 return peekMaxStringLen(src, quote);
647 }
648
649 while (isURILetter(*src)) {
650 if (LIKELY(*src != '\\'))
651 src++;
652 else
653 parseEscape<SrcCharacterType>(src);
654 }
655
656 return src - start;
657 }
658
583 template <typename SrcCharacterType, typename DestCharacterType> 659 template <typename SrcCharacterType, typename DestCharacterType>
660 /* static */
rune 2014/03/14 12:50:23 Same.
Daniel Bratell 2014/03/17 20:15:35 Done.
584 inline bool CSSTokenizer::parseURIInternal(SrcCharacterType*& src, DestCharacter Type*& dest, UChar quote) 661 inline bool CSSTokenizer::parseURIInternal(SrcCharacterType*& src, DestCharacter Type*& dest, UChar quote)
585 { 662 {
586 if (quote) { 663 if (quote) {
587 ASSERT(quote == '"' || quote == '\''); 664 ASSERT(quote == '"' || quote == '\'');
588 return parseStringInternal(src, dest, quote); 665 return parseStringInternal(src, dest, quote);
589 } 666 }
590 667
591 while (isURILetter(*src)) { 668 while (isURILetter(*src)) {
592 if (LIKELY(*src != '\\')) { 669 if (LIKELY(*src != '\\')) {
593 *dest++ = *src++; 670 *dest++ = *src++;
594 } else { 671 } else {
595 unsigned unicode = parseEscape<SrcCharacterType>(src); 672 unsigned unicode = parseEscape<SrcCharacterType>(src);
596 if (unicode > 0xff && sizeof(SrcCharacterType) == 1) 673 if (unicode > 0xff && sizeof(DestCharacterType) == 1)
597 return false; 674 return false;
598 UnicodeToChars(dest, unicode); 675 UnicodeToChars(dest, unicode);
599 } 676 }
600 } 677 }
601 678
602 return true; 679 return true;
603 } 680 }
604 681
605 template <typename CharacterType> 682 template <typename CharacterType>
606 inline void CSSTokenizer::parseURI(CSSParserString& string) 683 inline void CSSTokenizer::parseURI(CSSParserString& string)
607 { 684 {
608 CharacterType* uriStart; 685 CharacterType* uriStart;
609 CharacterType* uriEnd; 686 CharacterType* uriEnd;
610 UChar quote; 687 UChar quote;
611 if (!findURI(uriStart, uriEnd, quote)) 688 if (!findURI(uriStart, uriEnd, quote))
612 return; 689 return;
613 690
614 CharacterType* dest = currentCharacter<CharacterType>() = uriStart; 691 CharacterType* dest = currentCharacter<CharacterType>() = uriStart;
615 if (LIKELY(parseURIInternal(currentCharacter<CharacterType>(), dest, quote)) ) { 692 if (LIKELY(parseURIInternal(currentCharacter<CharacterType>(), dest, quote)) ) {
616 string.init(uriStart, dest - uriStart); 693 string.init(uriStart, dest - uriStart);
617 } else { 694 } else {
618 // An escape sequence was encountered that can't be stored in 8 bits. 695 // An escape sequence was encountered that can't be stored in 8 bits.
619 // Reset the current character to the start of the URI and re-parse with 696 // Reset the current character to the start of the URI and re-parse with
620 // a 16-bit destination. 697 // a 16-bit destination.
621 ASSERT(is8BitSource()); 698 ASSERT(is8BitSource());
622 UChar* uriStart16 = currentCharacter16(); 699 UChar* result16 = getStringBuffer16(peekMaxURILen(uriStart, quote));
700 UChar* uriStart16 = result16;
623 currentCharacter<CharacterType>() = uriStart; 701 currentCharacter<CharacterType>() = uriStart;
624 bool result = parseURIInternal(currentCharacter<CharacterType>(), curren tCharacter16(), quote); 702 bool result = parseURIInternal(currentCharacter<CharacterType>(), result 16, quote);
625 ASSERT_UNUSED(result, result); 703 ASSERT_UNUSED(result, result);
626 string.init(uriStart16, currentCharacter16() - uriStart16); 704 string.init(uriStart16, result16 - uriStart16);
627 } 705 }
628 706
629 currentCharacter<CharacterType>() = uriEnd + 1; 707 currentCharacter<CharacterType>() = uriEnd + 1;
630 m_token = URI; 708 m_token = URI;
631 } 709 }
632 710
633 template <typename CharacterType> 711 template <typename CharacterType>
634 inline bool CSSTokenizer::parseUnicodeRange() 712 inline bool CSSTokenizer::parseUnicodeRange()
635 { 713 {
636 CharacterType* character = currentCharacter<CharacterType>() + 1; 714 CharacterType* character = currentCharacter<CharacterType>() + 1;
(...skipping 889 matching lines...) Expand 10 before | Expand all | Expand 10 after
1526 m_dataStart16[length - 1] = 0; 1604 m_dataStart16[length - 1] = 0;
1527 1605
1528 m_is8BitSource = false; 1606 m_is8BitSource = false;
1529 m_currentCharacter8 = 0; 1607 m_currentCharacter8 = 0;
1530 m_currentCharacter16 = m_dataStart16.get(); 1608 m_currentCharacter16 = m_dataStart16.get();
1531 setTokenStart<UChar>(m_currentCharacter16); 1609 setTokenStart<UChar>(m_currentCharacter16);
1532 m_lexFunc = &CSSTokenizer::realLex<UChar>; 1610 m_lexFunc = &CSSTokenizer::realLex<UChar>;
1533 } 1611 }
1534 1612
1535 } // namespace WebCore 1613 } // namespace WebCore
OLDNEW
« no previous file with comments | « Source/core/css/CSSTokenizer.h ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698