Source/core/css/CSSTokenizer-in.cpp - Issue 196353018: Smaller CSSParser UTF16 buffers for escaped strings.

Side by Side Diff: Source/core/css/CSSTokenizer-in.cpp

Issue 196353018: Smaller CSSParser UTF16 buffers for escaped strings. (Closed) Base URL: https://chromium.googlesource.com/chromium/blink.git@master

Patch Set: Dropped /* static */ and rewrote comment. Created 6 years, 9 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 /*	1 /*

2 * Copyright (C) 2003 Lars Knoll (knoll@kde.org)	2 * Copyright (C) 2003 Lars Knoll (knoll@kde.org)

3 * Copyright (C) 2005 Allan Sandfeld Jensen (kde@carewolf.com)	3 * Copyright (C) 2005 Allan Sandfeld Jensen (kde@carewolf.com)

4 * Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012 Apple Inc. All rights reserved.	4 * Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012 Apple Inc. All rights reserved.

5 * Copyright (C) 2007 Nicholas Shanks <webkit@nickshanks.com>	5 * Copyright (C) 2007 Nicholas Shanks <webkit@nickshanks.com>

6 * Copyright (C) 2008 Eric Seidel <eric@webkit.org>	6 * Copyright (C) 2008 Eric Seidel <eric@webkit.org>

7 * Copyright (C) 2009 Torch Mobile Inc. All rights reserved. (http://www.torchmo bile.com/)	7 * Copyright (C) 2009 Torch Mobile Inc. All rights reserved. (http://www.torchmo bile.com/)

8 * Copyright (C) 2012 Adobe Systems Incorporated. All rights reserved.	8 * Copyright (C) 2012 Adobe Systems Incorporated. All rights reserved.

9 * Copyright (C) 2012 Intel Corporation. All rights reserved.	9 * Copyright (C) 2012 Intel Corporation. All rights reserved.

10 *	10 *

(...skipping 286 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
297 {	297 {

298 return m_currentCharacter8;	298 return m_currentCharacter8;

299 }	299 }

300	300

301 template <>	301 template <>

302 inline UChar*& CSSTokenizer::currentCharacter<UChar>()	302 inline UChar*& CSSTokenizer::currentCharacter<UChar>()

303 {	303 {

304 return m_currentCharacter16;	304 return m_currentCharacter16;

305 }	305 }

306	306

307 UChar*& CSSTokenizer::currentCharacter16()	307 UChar* CSSTokenizer::getStringBuffer16(size_t len)

308 {	308 {

309 if (!m_currentCharacter16) {	309 OwnPtr<UChar[]> buffer = adoptArrayPtr(new UChar[len]);

310 m_dataStart16 = adoptArrayPtr(new UChar[m_length]);

311 m_currentCharacter16 = m_dataStart16.get();

312 }

313	310

314 return m_currentCharacter16;	311 UChar* bufferPtr = buffer.get();

	312

	313 m_cssStrings16.append(buffer.release());

	314 return bufferPtr;

315 }	315 }

316	316

317 template <>	317 template <>

318 inline LChar* CSSTokenizer::dataStart<LChar>()	318 inline LChar* CSSTokenizer::dataStart<LChar>()

319 {	319 {

320 return m_dataStart8.get();	320 return m_dataStart8.get();

321 }	321 }

322	322

323 template <>	323 template <>

324 inline UChar* CSSTokenizer::dataStart<UChar>()	324 inline UChar* CSSTokenizer::dataStart<UChar>()

(...skipping 80 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
405 if (unicode > 0x10ffff)	405 if (unicode > 0x10ffff)

406 unicode = 0xfffd;	406 unicode = 0xfffd;

407	407

408 // Optional space after the escape sequence.	408 // Optional space after the escape sequence.

409 if (isHTMLSpace<CharacterType>(*src))	409 if (isHTMLSpace<CharacterType>(*src))

410 ++src;	410 ++src;

411	411

412 return unicode;	412 return unicode;

413 }	413 }

414	414

415 return *currentCharacter<CharacterType>()++;	415 return *src++;

416 }	416 }

417	417

418 template <>	418 template <>

419 inline void CSSTokenizer::UnicodeToChars<LChar>(LChar*& result, unsigned unicode )	419 inline void CSSTokenizer::UnicodeToChars<LChar>(LChar*& result, unsigned unicode )

420 {	420 {

421 ASSERT(unicode <= 0xff);	421 ASSERT(unicode <= 0xff);

422 *result = unicode;	422 *result = unicode;

423	423

424 ++result;	424 ++result;

425 }	425 }

426	426

427 template <>	427 template <>

428 inline void CSSTokenizer::UnicodeToChars<UChar>(UChar*& result, unsigned unicode )	428 inline void CSSTokenizer::UnicodeToChars<UChar>(UChar*& result, unsigned unicode )

429 {	429 {

430 // Replace unicode with a surrogate pairs when it is bigger than 0xffff	430 // Replace unicode with a surrogate pairs when it is bigger than 0xffff

431 if (U16_LENGTH(unicode) == 2) {	431 if (U16_LENGTH(unicode) == 2) {

432 *result++ = U16_LEAD(unicode);	432 *result++ = U16_LEAD(unicode);

433 *result = U16_TRAIL(unicode);	433 *result = U16_TRAIL(unicode);

434 } else {	434 } else {

435 *result = unicode;	435 *result = unicode;

436 }	436 }

437	437

438 ++result;	438 ++result;

439 }	439 }

440	440

	441 template <typename SrcCharacterType>

	442 size_t CSSTokenizer::peekMaxIdentifierLen(SrcCharacterType* src)

	443 {

	444 // The decoded form of an identifier (after resolving escape

	445 // sequences) will not contain more characters (ASCII or UTF-16

	446 // codepoints) than the input. This code can therefore ignore

	447 // escape sequences completely.

	448 SrcCharacterType* start = src;

	449 do {

	450 if (LIKELY(*src != '\\'))

	451 src++;

	452 else

	453 parseEscape<SrcCharacterType>(src);

	454 } while (isCSSLetter(src[0]) \|\| (src[0] == '\\' && isCSSEscape(src[1])));

	455

	456 return src - start;

	457 }

	458

441 template <typename SrcCharacterType, typename DestCharacterType>	459 template <typename SrcCharacterType, typename DestCharacterType>

442 inline bool CSSTokenizer::parseIdentifierInternal(SrcCharacterType& src, DestCh aracterType& result, bool& hasEscape)	460 inline bool CSSTokenizer::parseIdentifierInternal(SrcCharacterType& src, DestCh aracterType& result, bool& hasEscape)

443 {	461 {

444 hasEscape = false;	462 hasEscape = false;

445 do {	463 do {

446 if (LIKELY(*src != '\\')) {	464 if (LIKELY(*src != '\\')) {

447 result++ = src++;	465 result++ = src++;

448 } else {	466 } else {

449 hasEscape = true;	467 hasEscape = true;

450 SrcCharacterType* savedEscapeStart = src;	468 SrcCharacterType* savedEscapeStart = src;

(...skipping 13 matching lines...) Expand all Loading...
464 inline void CSSTokenizer::parseIdentifier(CharacterType*& result, CSSParserStrin g& resultString, bool& hasEscape)	482 inline void CSSTokenizer::parseIdentifier(CharacterType*& result, CSSParserStrin g& resultString, bool& hasEscape)

465 {	483 {

466 // If a valid identifier start is found, we can safely	484 // If a valid identifier start is found, we can safely

467 // parse the identifier until the next invalid character.	485 // parse the identifier until the next invalid character.

468 ASSERT(isIdentifierStart<CharacterType>());	486 ASSERT(isIdentifierStart<CharacterType>());

469	487

470 CharacterType* start = currentCharacter<CharacterType>();	488 CharacterType* start = currentCharacter<CharacterType>();

471 if (UNLIKELY(!parseIdentifierInternal(currentCharacter<CharacterType>(), res ult, hasEscape))) {	489 if (UNLIKELY(!parseIdentifierInternal(currentCharacter<CharacterType>(), res ult, hasEscape))) {

472 // Found an escape we couldn't handle with 8 bits, copy what has been re cognized and continue	490 // Found an escape we couldn't handle with 8 bits, copy what has been re cognized and continue

473 ASSERT(is8BitSource());	491 ASSERT(is8BitSource());

474 UChar*& result16 = currentCharacter16();	492 UChar* result16 = getStringBuffer16((result - start) + peekMaxIdentifier Len(result));

475 UChar* start16 = result16;	493 UChar* start16 = result16;

476 int i = 0;	494 int i = 0;

477 for (; i < result - start; i++)	495 for (; i < result - start; i++)

478 result16[i] = start[i];	496 result16[i] = start[i];

479	497

480 result16 += i;	498 result16 += i;

481	499

482 parseIdentifierInternal(currentCharacter<CharacterType>(), result16, has Escape);	500 parseIdentifierInternal(currentCharacter<CharacterType>(), result16, has Escape);

483	501

484 resultString.init(start16, result16 - start16);	502 resultString.init(start16, result16 - start16);

485	503

486 return;	504 return;

487 }	505 }

488	506

489 resultString.init(start, result - start);	507 resultString.init(start, result - start);

490 }	508 }

491	509

	510 template <typename SrcCharacterType>

	511 size_t CSSTokenizer::peekMaxStringLen(SrcCharacterType* src, UChar quote)

	512 {

	513 // A string can't be longer than the ASCII characters used to write

	514 // it down so use that as the upper limit.
	rune 2014/03/17 20:18:35 I just noticed now this comment has the same issue I just noticed now this comment has the same issue as I commented on in the previous patch set. Daniel Bratell 2014/03/17 20:25:17 Done. Show quoted text On 2014/03/17 20:18:35, rune - CET wrote: > > I just noticed now this comment has the same issue as I commented on in the > previous patch set. Done.
	515 SrcCharacterType* start = src;

	516 while (true) {

	517 if (UNLIKELY(*src == quote)) {

	518 // String parsing is done.

	519 ++src;

	520 break;

	521 }

	522 if (UNLIKELY(!*src)) {

	523 // String parsing is done, but don't advance pointer if at the end o f input.

	524 break;

	525 }

	526 ASSERT(src > '\r' \|\| (src < '\n' && src) \|\| src == '\v');

	527

	528 if (LIKELY(src[0] != '\\'))

	529 src++;

	530 else if (src[1] == '\n' \|\| src[1] == '\f')

	531 src += 2;

	532 else if (src[1] == '\r')

	533 src += src[2] == '\n' ? 3 : 2;

	534 else

	535 parseEscape<SrcCharacterType>(src);

	536 }

	537

	538 return src - start;

	539 }

	540

492 template <typename SrcCharacterType, typename DestCharacterType>	541 template <typename SrcCharacterType, typename DestCharacterType>

493 inline bool CSSTokenizer::parseStringInternal(SrcCharacterType& src, DestCharac terType& result, UChar quote)	542 inline bool CSSTokenizer::parseStringInternal(SrcCharacterType& src, DestCharac terType& result, UChar quote)

494 {	543 {

495 while (true) {	544 while (true) {

496 if (UNLIKELY(*src == quote)) {	545 if (UNLIKELY(*src == quote)) {

497 // String parsing is done.	546 // String parsing is done.

498 ++src;	547 ++src;

499 return true;	548 return true;

500 }	549 }

501 if (UNLIKELY(!*src)) {	550 if (UNLIKELY(!*src)) {

(...skipping 23 matching lines...) Expand all Loading...
525 }	574 }

526	575

527 template <typename CharacterType>	576 template <typename CharacterType>

528 inline void CSSTokenizer::parseString(CharacterType*& result, CSSParserString& r esultString, UChar quote)	577 inline void CSSTokenizer::parseString(CharacterType*& result, CSSParserString& r esultString, UChar quote)

529 {	578 {

530 CharacterType* start = currentCharacter<CharacterType>();	579 CharacterType* start = currentCharacter<CharacterType>();

531	580

532 if (UNLIKELY(!parseStringInternal(currentCharacter<CharacterType>(), result, quote))) {	581 if (UNLIKELY(!parseStringInternal(currentCharacter<CharacterType>(), result, quote))) {

533 // Found an escape we couldn't handle with 8 bits, copy what has been re cognized and continue	582 // Found an escape we couldn't handle with 8 bits, copy what has been re cognized and continue

534 ASSERT(is8BitSource());	583 ASSERT(is8BitSource());

535 UChar*& result16 = currentCharacter16();	584 UChar* result16 = getStringBuffer16((result - start) + peekMaxStringLen( result, quote));

536 UChar* start16 = result16;	585 UChar* start16 = result16;

537 int i = 0;	586 int i = 0;

538 for (; i < result - start; i++)	587 for (; i < result - start; i++)

539 result16[i] = start[i];	588 result16[i] = start[i];

540	589

541 result16 += i;	590 result16 += i;

542	591

543 parseStringInternal(currentCharacter<CharacterType>(), result16, quote);	592 parseStringInternal(currentCharacter<CharacterType>(), result16, quote);

544	593

545 resultString.init(start16, result16 - start16);	594 resultString.init(start16, result16 - start16);

(...skipping 27 matching lines...) Expand all Loading...
573 }	622 }

574 }	623 }

575	624

576 end = skipWhiteSpace(end);	625 end = skipWhiteSpace(end);

577 if (*end != ')')	626 if (*end != ')')

578 return false;	627 return false;

579	628

580 return true;	629 return true;

581 }	630 }

582	631

	632 template <typename SrcCharacterType>

	633 inline size_t CSSTokenizer::peekMaxURILen(SrcCharacterType* src, UChar quote)

	634 {

	635 // A URI can't be longer than the ASCII characters used to write

	636 // it down so use that as the upper limit.
	rune 2014/03/17 20:18:35 I just noticed now this comment has the same issue I just noticed now this comment has the same issue as I commented on in the previous patch set. Daniel Bratell 2014/03/17 20:25:17 Done. Show quoted text On 2014/03/17 20:18:35, rune - CET wrote: > > I just noticed now this comment has the same issue as I commented on in the > previous patch set. Done.
	637 SrcCharacterType* start = src;

	638 if (quote) {

	639 ASSERT(quote == '"' \|\| quote == '\'');

	640 return peekMaxStringLen(src, quote);

	641 }

	642

	643 while (isURILetter(*src)) {

	644 if (LIKELY(*src != '\\'))

	645 src++;

	646 else

	647 parseEscape<SrcCharacterType>(src);

	648 }

	649

	650 return src - start;

	651 }

	652

583 template <typename SrcCharacterType, typename DestCharacterType>	653 template <typename SrcCharacterType, typename DestCharacterType>

584 inline bool CSSTokenizer::parseURIInternal(SrcCharacterType& src, DestCharacter Type& dest, UChar quote)	654 inline bool CSSTokenizer::parseURIInternal(SrcCharacterType& src, DestCharacter Type& dest, UChar quote)

585 {	655 {

586 if (quote) {	656 if (quote) {

587 ASSERT(quote == '"' \|\| quote == '\'');	657 ASSERT(quote == '"' \|\| quote == '\'');

588 return parseStringInternal(src, dest, quote);	658 return parseStringInternal(src, dest, quote);

589 }	659 }

590	660

591 while (isURILetter(*src)) {	661 while (isURILetter(*src)) {

592 if (LIKELY(*src != '\\')) {	662 if (LIKELY(*src != '\\')) {

593 dest++ = src++;	663 dest++ = src++;

594 } else {	664 } else {

595 unsigned unicode = parseEscape<SrcCharacterType>(src);	665 unsigned unicode = parseEscape<SrcCharacterType>(src);

596 if (unicode > 0xff && sizeof(SrcCharacterType) == 1)	666 if (unicode > 0xff && sizeof(DestCharacterType) == 1)
	Daniel Bratell 2014/03/17 16:01:07 Note: This is an unrelated bug fix. Before this fi Note: This is an unrelated bug fix. Before this fix sizeof(SrcCharacterType) == sizeof(DestCharacterType) when this code was run so the bug wasn't noticed. Daniel Bratell 2014/03/17 20:15:36 Done. Show quoted text On 2014/03/17 16:01:07, Daniel Bratell wrote: > Note: This is an unrelated bug fix. Before this fix sizeof(SrcCharacterType) == > sizeof(DestCharacterType) when this code was run so the bug wasn't noticed. Done.
597 return false;	667 return false;

598 UnicodeToChars(dest, unicode);	668 UnicodeToChars(dest, unicode);

599 }	669 }

600 }	670 }

601	671

602 return true;	672 return true;

603 }	673 }

604	674

605 template <typename CharacterType>	675 template <typename CharacterType>

606 inline void CSSTokenizer::parseURI(CSSParserString& string)	676 inline void CSSTokenizer::parseURI(CSSParserString& string)

607 {	677 {

608 CharacterType* uriStart;	678 CharacterType* uriStart;

609 CharacterType* uriEnd;	679 CharacterType* uriEnd;

610 UChar quote;	680 UChar quote;

611 if (!findURI(uriStart, uriEnd, quote))	681 if (!findURI(uriStart, uriEnd, quote))

612 return;	682 return;

613	683

614 CharacterType* dest = currentCharacter<CharacterType>() = uriStart;	684 CharacterType* dest = currentCharacter<CharacterType>() = uriStart;

615 if (LIKELY(parseURIInternal(currentCharacter<CharacterType>(), dest, quote)) ) {	685 if (LIKELY(parseURIInternal(currentCharacter<CharacterType>(), dest, quote)) ) {

616 string.init(uriStart, dest - uriStart);	686 string.init(uriStart, dest - uriStart);

617 } else {	687 } else {

618 // An escape sequence was encountered that can't be stored in 8 bits.	688 // An escape sequence was encountered that can't be stored in 8 bits.

619 // Reset the current character to the start of the URI and re-parse with	689 // Reset the current character to the start of the URI and re-parse with

620 // a 16-bit destination.	690 // a 16-bit destination.

621 ASSERT(is8BitSource());	691 ASSERT(is8BitSource());

622 UChar* uriStart16 = currentCharacter16();	692 UChar* result16 = getStringBuffer16(peekMaxURILen(uriStart, quote));

	693 UChar* uriStart16 = result16;

623 currentCharacter<CharacterType>() = uriStart;	694 currentCharacter<CharacterType>() = uriStart;

624 bool result = parseURIInternal(currentCharacter<CharacterType>(), curren tCharacter16(), quote);	695 bool result = parseURIInternal(currentCharacter<CharacterType>(), result 16, quote);

625 ASSERT_UNUSED(result, result);	696 ASSERT_UNUSED(result, result);

626 string.init(uriStart16, currentCharacter16() - uriStart16);	697 string.init(uriStart16, result16 - uriStart16);

627 }	698 }

628	699

629 currentCharacter<CharacterType>() = uriEnd + 1;	700 currentCharacter<CharacterType>() = uriEnd + 1;

630 m_token = URI;	701 m_token = URI;

631 }	702 }

632	703

633 template <typename CharacterType>	704 template <typename CharacterType>

634 inline bool CSSTokenizer::parseUnicodeRange()	705 inline bool CSSTokenizer::parseUnicodeRange()

635 {	706 {

636 CharacterType* character = currentCharacter<CharacterType>() + 1;	707 CharacterType* character = currentCharacter<CharacterType>() + 1;

(...skipping 889 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1526 m_dataStart16[length - 1] = 0;	1597 m_dataStart16[length - 1] = 0;

1527	1598

1528 m_is8BitSource = false;	1599 m_is8BitSource = false;

1529 m_currentCharacter8 = 0;	1600 m_currentCharacter8 = 0;

1530 m_currentCharacter16 = m_dataStart16.get();	1601 m_currentCharacter16 = m_dataStart16.get();

1531 setTokenStart<UChar>(m_currentCharacter16);	1602 setTokenStart<UChar>(m_currentCharacter16);

1532 m_lexFunc = &CSSTokenizer::realLex<UChar>;	1603 m_lexFunc = &CSSTokenizer::realLex<UChar>;

1533 }	1604 }

1534	1605

1535 } // namespace WebCore	1606 } // namespace WebCore

OLD	NEW

« no previous file with comments | « Source/core/css/CSSTokenizer.h ('k') | no next file » | no next file with comments »