Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(616)

Unified Diff: Source/core/css/CSSTokenizer-in.cpp

Issue 196353018: Smaller CSSParser UTF16 buffers for escaped strings. (Closed) Base URL: https://chromium.googlesource.com/chromium/blink.git@master
Patch Set: Rewrote comments. Created 6 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« Source/core/css/CSSTokenizer.h ('K') | « Source/core/css/CSSTokenizer.h ('k') | no next file » | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: Source/core/css/CSSTokenizer-in.cpp
diff --git a/Source/core/css/CSSTokenizer-in.cpp b/Source/core/css/CSSTokenizer-in.cpp
index 9c3fb0a771ba9cefc78639b74188eee04bf2973b..123e8463a31a4896f33cb303f6bffc0990930f28 100644
--- a/Source/core/css/CSSTokenizer-in.cpp
+++ b/Source/core/css/CSSTokenizer-in.cpp
@@ -304,14 +304,14 @@ inline UChar*& CSSTokenizer::currentCharacter<UChar>()
return m_currentCharacter16;
}
-UChar*& CSSTokenizer::currentCharacter16()
+UChar* CSSTokenizer::getStringBuffer16(size_t len)
Julien - ping for review 2014/03/20 20:57:39 We usually don't put the word "get" on getters as
Daniel Bratell 2014/03/21 15:14:39 Done.
{
- if (!m_currentCharacter16) {
- m_dataStart16 = adoptArrayPtr(new UChar[m_length]);
- m_currentCharacter16 = m_dataStart16.get();
- }
+ OwnPtr<UChar[]> buffer = adoptArrayPtr(new UChar[len]);
- return m_currentCharacter16;
+ UChar* bufferPtr = buffer.get();
+
+ m_cssStrings16.append(buffer.release());
+ return bufferPtr;
}
template <>
@@ -412,7 +412,7 @@ unsigned CSSTokenizer::parseEscape(CharacterType*& src)
return unicode;
}
- return *currentCharacter<CharacterType>()++;
+ return *src++;
}
template <>
@@ -438,6 +438,24 @@ inline void CSSTokenizer::UnicodeToChars<UChar>(UChar*& result, unsigned unicode
++result;
}
+template <typename SrcCharacterType>
+size_t CSSTokenizer::peekMaxIdentifierLen(SrcCharacterType* src)
+{
+ // The decoded form of an identifier (after resolving escape
+ // sequences) will not contain more characters (ASCII or UTF-16
+ // codepoints) than the input. This code can therefore ignore
+ // escape sequences completely.
+ SrcCharacterType* start = src;
+ do {
+ if (LIKELY(*src != '\\'))
+ src++;
+ else
+ parseEscape<SrcCharacterType>(src);
+ } while (isCSSLetter(src[0]) || (src[0] == '\\' && isCSSEscape(src[1])));
+
+ return src - start;
+}
+
template <typename SrcCharacterType, typename DestCharacterType>
inline bool CSSTokenizer::parseIdentifierInternal(SrcCharacterType*& src, DestCharacterType*& result, bool& hasEscape)
{
@@ -471,7 +489,7 @@ inline void CSSTokenizer::parseIdentifier(CharacterType*& result, CSSParserStrin
if (UNLIKELY(!parseIdentifierInternal(currentCharacter<CharacterType>(), result, hasEscape))) {
// Found an escape we couldn't handle with 8 bits, copy what has been recognized and continue
ASSERT(is8BitSource());
- UChar*& result16 = currentCharacter16();
+ UChar* result16 = getStringBuffer16((result - start) + peekMaxIdentifierLen(result));
UChar* start16 = result16;
int i = 0;
for (; i < result - start; i++)
@@ -489,6 +507,39 @@ inline void CSSTokenizer::parseIdentifier(CharacterType*& result, CSSParserStrin
resultString.init(start, result - start);
}
+template <typename SrcCharacterType>
+size_t CSSTokenizer::peekMaxStringLen(SrcCharacterType* src, UChar quote)
+{
+ // The decoded form of a CSS string (after resolving escape
+ // sequences) will not contain more characters (ASCII or UTF-16
+ // codepoints) than the input. This code can therefore ignore
+ // escape sequences completely.
+ SrcCharacterType* start = src;
+ while (true) {
+ if (UNLIKELY(*src == quote)) {
+ // String parsing is done.
+ ++src;
+ break;
+ }
+ if (UNLIKELY(!*src)) {
+ // String parsing is done, but don't advance pointer if at the end of input.
+ break;
+ }
+ ASSERT(*src > '\r' || (*src < '\n' && *src) || *src == '\v');
+
+ if (LIKELY(src[0] != '\\'))
+ src++;
+ else if (src[1] == '\n' || src[1] == '\f')
+ src += 2;
+ else if (src[1] == '\r')
+ src += src[2] == '\n' ? 3 : 2;
Julien - ping for review 2014/03/20 20:57:39 This looks awfully like checkAndSkipString, maybe
+ else
+ parseEscape<SrcCharacterType>(src);
+ }
+
+ return src - start;
+}
+
template <typename SrcCharacterType, typename DestCharacterType>
inline bool CSSTokenizer::parseStringInternal(SrcCharacterType*& src, DestCharacterType*& result, UChar quote)
{
@@ -532,7 +583,7 @@ inline void CSSTokenizer::parseString(CharacterType*& result, CSSParserString& r
if (UNLIKELY(!parseStringInternal(currentCharacter<CharacterType>(), result, quote))) {
// Found an escape we couldn't handle with 8 bits, copy what has been recognized and continue
ASSERT(is8BitSource());
- UChar*& result16 = currentCharacter16();
+ UChar* result16 = getStringBuffer16((result - start) + peekMaxStringLen(result, quote));
UChar* start16 = result16;
int i = 0;
for (; i < result - start; i++)
@@ -580,6 +631,29 @@ inline bool CSSTokenizer::findURI(CharacterType*& start, CharacterType*& end, UC
return true;
}
+template <typename SrcCharacterType>
+inline size_t CSSTokenizer::peekMaxURILen(SrcCharacterType* src, UChar quote)
+{
+ // The decoded form of a URI (after resolving escape sequences)
+ // will not contain more characters (ASCII or UTF-16 codepoints)
+ // than the input. This code can therefore ignore escape sequences
+ // completely.
+ SrcCharacterType* start = src;
+ if (quote) {
+ ASSERT(quote == '"' || quote == '\'');
+ return peekMaxStringLen(src, quote);
+ }
+
+ while (isURILetter(*src)) {
+ if (LIKELY(*src != '\\'))
+ src++;
+ else
+ parseEscape<SrcCharacterType>(src);
+ }
+
+ return src - start;
+}
+
template <typename SrcCharacterType, typename DestCharacterType>
inline bool CSSTokenizer::parseURIInternal(SrcCharacterType*& src, DestCharacterType*& dest, UChar quote)
{
@@ -593,7 +667,7 @@ inline bool CSSTokenizer::parseURIInternal(SrcCharacterType*& src, DestCharacter
*dest++ = *src++;
} else {
unsigned unicode = parseEscape<SrcCharacterType>(src);
- if (unicode > 0xff && sizeof(SrcCharacterType) == 1)
+ if (unicode > 0xff && sizeof(DestCharacterType) == 1)
return false;
UnicodeToChars(dest, unicode);
}
@@ -619,11 +693,12 @@ inline void CSSTokenizer::parseURI(CSSParserString& string)
// Reset the current character to the start of the URI and re-parse with
// a 16-bit destination.
ASSERT(is8BitSource());
- UChar* uriStart16 = currentCharacter16();
+ UChar* result16 = getStringBuffer16(peekMaxURILen(uriStart, quote));
+ UChar* uriStart16 = result16;
currentCharacter<CharacterType>() = uriStart;
- bool result = parseURIInternal(currentCharacter<CharacterType>(), currentCharacter16(), quote);
+ bool result = parseURIInternal(currentCharacter<CharacterType>(), result16, quote);
ASSERT_UNUSED(result, result);
- string.init(uriStart16, currentCharacter16() - uriStart16);
+ string.init(uriStart16, result16 - uriStart16);
}
currentCharacter<CharacterType>() = uriEnd + 1;
« Source/core/css/CSSTokenizer.h ('K') | « Source/core/css/CSSTokenizer.h ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698