Source/core/css/CSSTokenizer-in.cpp - Issue 196353018: Smaller CSSParser UTF16 buffers for escaped strings.

Unified Diff: Source/core/css/CSSTokenizer-in.cpp

Issue 196353018: Smaller CSSParser UTF16 buffers for escaped strings. (Closed) Base URL: https://chromium.googlesource.com/chromium/blink.git@master

Patch Set: Created 6 years, 9 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: Source/core/css/CSSTokenizer-in.cpp

diff --git a/Source/core/css/CSSTokenizer-in.cpp b/Source/core/css/CSSTokenizer-in.cpp

index 9c3fb0a771ba9cefc78639b74188eee04bf2973b..86bca4eed90403c1f822ce115c4b9bd7b8b8c1f1 100644

--- a/Source/core/css/CSSTokenizer-in.cpp

+++ b/Source/core/css/CSSTokenizer-in.cpp

@@ -304,14 +304,14 @@ inline UChar*& CSSTokenizer::currentCharacter<UChar>()

return m_currentCharacter16;

}

-UChar*& CSSTokenizer::currentCharacter16()

+UChar* CSSTokenizer::getStringBuffer16(size_t len)

{

- if (!m_currentCharacter16) {

- m_dataStart16 = adoptArrayPtr(new UChar[m_length]);

- m_currentCharacter16 = m_dataStart16.get();

- }

+ OwnPtr<UChar[]> buffer = adoptArrayPtr(new UChar[len]);

- return m_currentCharacter16;

+ UChar* bufferPtr = buffer.get();

+ m_cssStrings16.append(buffer.release());

+ return bufferPtr;

}

template <>

@@ -386,6 +386,7 @@ static inline CharacterType* checkAndSkipString(CharacterType* currentCharacter,

}

template <typename CharacterType>

+/* static */

rune 2014/03/14 12:50:23 There's no precedence for indication of static lik

Daniel Bratell 2014/03/17 20:15:35 Done.

unsigned CSSTokenizer::parseEscape(CharacterType*& src)

{

ASSERT(*src == '\\' && isCSSEscape(src[1]));

@@ -412,10 +413,11 @@ unsigned CSSTokenizer::parseEscape(CharacterType*& src)

return unicode;

}

- return *currentCharacter<CharacterType>()++;

+ return *src++;

}

template <>

+/* static */

rune 2014/03/14 12:50:23 Likewise.

Daniel Bratell 2014/03/17 20:15:35 Done.

inline void CSSTokenizer::UnicodeToChars<LChar>(LChar*& result, unsigned unicode)

{

ASSERT(unicode <= 0xff);

@@ -425,6 +427,7 @@ inline void CSSTokenizer::UnicodeToChars<LChar>(LChar*& result, unsigned unicode

}

template <>

+/* static */

rune 2014/03/14 12:50:23 Likewise.

Daniel Bratell 2014/03/17 20:15:35 Done.

inline void CSSTokenizer::UnicodeToChars<UChar>(UChar*& result, unsigned unicode)

{

// Replace unicode with a surrogate pairs when it is bigger than 0xffff

@@ -438,7 +441,25 @@ inline void CSSTokenizer::UnicodeToChars<UChar>(UChar*& result, unsigned unicode

++result;

}

+template <typename SrcCharacterType>

+/* static */

+size_t CSSTokenizer::peekMaxIdentifierLen(SrcCharacterType* src)

+ // An identifier can't be longer than the ASCII characters used to

+ // write it down so use that count as upper limit.

rune 2014/03/14 12:50:23 By that you mean that: "For escapes, the number of

Daniel Bratell 2014/03/17 15:55:35 That is what it means, but considering that this c

Daniel Bratell 2014/03/17 20:15:35 Done.

+ SrcCharacterType* start = src;

+ do {

+ if (LIKELY(*src != '\\'))

+ src++;

+ else

+ parseEscape<SrcCharacterType>(src);

+ } while (isCSSLetter(src[0]) || (src[0] == '\\' && isCSSEscape(src[1])));

+ return src - start;

template <typename SrcCharacterType, typename DestCharacterType>

+/* static */

rune 2014/03/14 12:50:23 and here.

Daniel Bratell 2014/03/17 20:15:35 Done.

inline bool CSSTokenizer::parseIdentifierInternal(SrcCharacterType*& src, DestCharacterType*& result, bool& hasEscape)

{

hasEscape = false;

@@ -471,7 +492,7 @@ inline void CSSTokenizer::parseIdentifier(CharacterType*& result, CSSParserStrin

if (UNLIKELY(!parseIdentifierInternal(currentCharacter<CharacterType>(), result, hasEscape))) {

// Found an escape we couldn't handle with 8 bits, copy what has been recognized and continue

ASSERT(is8BitSource());

- UChar*& result16 = currentCharacter16();

+ UChar* result16 = getStringBuffer16((result - start) + peekMaxIdentifierLen(result));

UChar* start16 = result16;

int i = 0;

for (; i < result - start; i++)

@@ -489,7 +510,40 @@ inline void CSSTokenizer::parseIdentifier(CharacterType*& result, CSSParserStrin

resultString.init(start, result - start);

}

+template <typename SrcCharacterType>

+/* static */

rune 2014/03/14 12:50:23 Same.

Daniel Bratell 2014/03/17 20:15:35 Done.

+size_t CSSTokenizer::peekMaxStringLen(SrcCharacterType* src, UChar quote)

+ // A string can't be longer than the ASCII characters used to write

+ // it down so use that as the upper limit.

+ SrcCharacterType* start = src;

+ while (true) {

+ if (UNLIKELY(*src == quote)) {

+ // String parsing is done.

+ ++src;

+ break;

+ }

+ if (UNLIKELY(!*src)) {

+ // String parsing is done, but don't advance pointer if at the end of input.

+ break;

+ }

+ ASSERT(*src > '\r' || (*src < '\n' && *src) || *src == '\v');

+ if (LIKELY(src[0] != '\\'))

+ src++;

+ else if (src[1] == '\n' || src[1] == '\f')

+ src += 2;

+ else if (src[1] == '\r')

+ src += src[2] == '\n' ? 3 : 2;

+ else

+ parseEscape<SrcCharacterType>(src);

+ }

+ return src - start;

template <typename SrcCharacterType, typename DestCharacterType>

+/* static */

rune 2014/03/14 12:50:23 Same.

Daniel Bratell 2014/03/17 20:15:35 Done.

inline bool CSSTokenizer::parseStringInternal(SrcCharacterType*& src, DestCharacterType*& result, UChar quote)

{

while (true) {

@@ -532,7 +586,7 @@ inline void CSSTokenizer::parseString(CharacterType*& result, CSSParserString& r

if (UNLIKELY(!parseStringInternal(currentCharacter<CharacterType>(), result, quote))) {

// Found an escape we couldn't handle with 8 bits, copy what has been recognized and continue

ASSERT(is8BitSource());

- UChar*& result16 = currentCharacter16();

+ UChar* result16 = getStringBuffer16((result - start) + peekMaxStringLen(result, quote));

UChar* start16 = result16;

int i = 0;

for (; i < result - start; i++)

@@ -580,7 +634,30 @@ inline bool CSSTokenizer::findURI(CharacterType*& start, CharacterType*& end, UC

return true;

}

+template <typename SrcCharacterType>

+/* static */

rune 2014/03/14 12:50:23 Same.

Daniel Bratell 2014/03/17 20:15:35 Done.

+inline size_t CSSTokenizer::peekMaxURILen(SrcCharacterType* src, UChar quote)

+ // A URI can't be longer than the ASCII characters used to write

+ // it down so use that as the upper limit.

+ SrcCharacterType* start = src;

+ if (quote) {

+ ASSERT(quote == '"' || quote == '\'');

+ return peekMaxStringLen(src, quote);

+ }

+ while (isURILetter(*src)) {

+ if (LIKELY(*src != '\\'))

+ src++;

+ else

+ parseEscape<SrcCharacterType>(src);

+ }

+ return src - start;

template <typename SrcCharacterType, typename DestCharacterType>

+/* static */

rune 2014/03/14 12:50:23 Same.

Daniel Bratell 2014/03/17 20:15:35 Done.

inline bool CSSTokenizer::parseURIInternal(SrcCharacterType*& src, DestCharacterType*& dest, UChar quote)

{

if (quote) {

@@ -593,7 +670,7 @@ inline bool CSSTokenizer::parseURIInternal(SrcCharacterType*& src, DestCharacter

*dest++ = *src++;

} else {

unsigned unicode = parseEscape<SrcCharacterType>(src);

- if (unicode > 0xff && sizeof(SrcCharacterType) == 1)

+ if (unicode > 0xff && sizeof(DestCharacterType) == 1)

return false;

UnicodeToChars(dest, unicode);

}

@@ -619,11 +696,12 @@ inline void CSSTokenizer::parseURI(CSSParserString& string)

// Reset the current character to the start of the URI and re-parse with

// a 16-bit destination.

ASSERT(is8BitSource());

- UChar* uriStart16 = currentCharacter16();

+ UChar* result16 = getStringBuffer16(peekMaxURILen(uriStart, quote));

+ UChar* uriStart16 = result16;

currentCharacter<CharacterType>() = uriStart;

- bool result = parseURIInternal(currentCharacter<CharacterType>(), currentCharacter16(), quote);

+ bool result = parseURIInternal(currentCharacter<CharacterType>(), result16, quote);

ASSERT_UNUSED(result, result);

- string.init(uriStart16, currentCharacter16() - uriStart16);

+ string.init(uriStart16, result16 - uriStart16);

}

currentCharacter<CharacterType>() = uriEnd + 1;

« no previous file with comments | « Source/core/css/CSSTokenizer.h ('k') | no next file » | no next file with comments »