third_party/WebKit/Source/wtf/text/StringImpl.h - Issue 2585063002: Cache contains only ascii in StringImpl

Side by Side Diff: third_party/WebKit/Source/wtf/text/StringImpl.h

Issue 2585063002: Cache contains only ascii in StringImpl (Closed)

Patch Set: reviews Created 3 years, 11 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
1 /*	1 /*

2 * Copyright (C) 1999 Lars Knoll (knoll@kde.org)	2 * Copyright (C) 1999 Lars Knoll (knoll@kde.org)

3 * Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010, 2013 Apple Inc. All rights	3 * Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010, 2013 Apple Inc. All rights

4 * reserved.	4 * reserved.

5 * Copyright (C) 2009 Google Inc. All rights reserved.	5 * Copyright (C) 2009 Google Inc. All rights reserved.

6 *	6 *

7 * This library is free software; you can redistribute it and/or	7 * This library is free software; you can redistribute it and/or

8 * modify it under the terms of the GNU Library General Public	8 * modify it under the terms of the GNU Library General Public

9 * License as published by the Free Software Foundation; either	9 * License as published by the Free Software Foundation; either

10 * version 2 of the License, or (at your option) any later version.	10 * version 2 of the License, or (at your option) any later version.

(...skipping 12 matching lines...) Expand all Loading...
23	23

24 #ifndef StringImpl_h	24 #ifndef StringImpl_h

25 #define StringImpl_h	25 #define StringImpl_h

26	26

27 #include "wtf/ASCIICType.h"	27 #include "wtf/ASCIICType.h"

28 #include "wtf/Forward.h"	28 #include "wtf/Forward.h"

29 #include "wtf/HashMap.h"	29 #include "wtf/HashMap.h"

30 #include "wtf/StringHasher.h"	30 #include "wtf/StringHasher.h"

31 #include "wtf/Vector.h"	31 #include "wtf/Vector.h"

32 #include "wtf/WTFExport.h"	32 #include "wtf/WTFExport.h"

	33 #include "wtf/text/ASCIIFastPath.h"

33 #include "wtf/text/Unicode.h"	34 #include "wtf/text/Unicode.h"

34 #include <limits.h>	35 #include <limits.h>

35 #include <string.h>	36 #include <string.h>

36	37

37 #if OS(MACOSX)	38 #if OS(MACOSX)

38 typedef const struct __CFString* CFStringRef;	39 typedef const struct __CFString* CFStringRef;

39 #endif	40 #endif

40	41

41 #ifdef __OBJC__	42 #ifdef __OBJC__

42 @class NSString;	43 @class NSString;

(...skipping 83 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
126	127

127 // Used to construct static strings, which have an special refCount that can	128 // Used to construct static strings, which have an special refCount that can

128 // never hit zero. This means that the static string will never be	129 // never hit zero. This means that the static string will never be

129 // destroyed, which is important because static strings will be shared	130 // destroyed, which is important because static strings will be shared

130 // across threads & ref-counted in a non-threadsafe manner.	131 // across threads & ref-counted in a non-threadsafe manner.

131 enum ConstructEmptyStringTag { ConstructEmptyString };	132 enum ConstructEmptyStringTag { ConstructEmptyString };

132 explicit StringImpl(ConstructEmptyStringTag)	133 explicit StringImpl(ConstructEmptyStringTag)

133 : m_refCount(1),	134 : m_refCount(1),

134 m_length(0),	135 m_length(0),

135 m_hash(0),	136 m_hash(0),

	137 m_containsOnlyASCII(false),
	esprehn 2017/01/04 20:30:37 true true Charlie Harrison 2017/01/04 20:53:51 Done. Show quoted text On 2017/01/04 20:30:37, esprehn wrote: > true Done.
	138 m_needsASCIICheck(true),
	esprehn 2017/01/04 20:30:37 false false Charlie Harrison 2017/01/04 20:53:51 Done. Show quoted text On 2017/01/04 20:30:37, esprehn wrote: > false Done.
136 m_isAtomic(false),	139 m_isAtomic(false),

137 m_is8Bit(true),	140 m_is8Bit(true),

138 m_isStatic(true) {	141 m_isStatic(true) {

139 // Ensure that the hash is computed so that AtomicStringHash can call	142 // Ensure that the hash is computed so that AtomicStringHash can call

140 // existingHash() with impunity. The empty string is special because it	143 // existingHash() with impunity. The empty string is special because it

141 // is never entered into AtomicString's HashKey, but still needs to	144 // is never entered into AtomicString's HashKey, but still needs to

142 // compare correctly.	145 // compare correctly.

143 STRING_STATS_ADD_8BIT_STRING(m_length);	146 STRING_STATS_ADD_8BIT_STRING(m_length);

144 hash();	147 hash();

145 }	148 }

146	149

147 enum ConstructEmptyString16BitTag { ConstructEmptyString16Bit };	150 enum ConstructEmptyString16BitTag { ConstructEmptyString16Bit };

148 explicit StringImpl(ConstructEmptyString16BitTag)	151 explicit StringImpl(ConstructEmptyString16BitTag)

149 : m_refCount(1),	152 : m_refCount(1),

150 m_length(0),	153 m_length(0),

151 m_hash(0),	154 m_hash(0),

	155 m_containsOnlyASCII(false),
	esprehn 2017/01/04 20:30:37 m_containsOnlyASCII(true), the string is empty m_containsOnlyASCII(true), the string is empty Charlie Harrison 2017/01/04 20:53:51 Done. Show quoted text On 2017/01/04 20:30:37, esprehn wrote: > m_containsOnlyASCII(true), the string is empty Done.
	156 m_needsASCIICheck(true),
	esprehn 2017/01/04 20:30:37 false false Charlie Harrison 2017/01/04 20:53:50 Done. Show quoted text On 2017/01/04 20:30:37, esprehn wrote: > false Done.
152 m_isAtomic(false),	157 m_isAtomic(false),

153 m_is8Bit(false),	158 m_is8Bit(false),

154 m_isStatic(true) {	159 m_isStatic(true) {

155 STRING_STATS_ADD_16BIT_STRING(m_length);	160 STRING_STATS_ADD_16BIT_STRING(m_length);

156 hash();	161 hash();

157 }	162 }

158	163

159 // FIXME: there has to be a less hacky way to do this.	164 // FIXME: there has to be a less hacky way to do this.

160 enum Force8Bit { Force8BitConstructor };	165 enum Force8Bit { Force8BitConstructor };

161 StringImpl(unsigned length, Force8Bit)	166 StringImpl(unsigned length, Force8Bit)

162 : m_refCount(1),	167 : m_refCount(1),

163 m_length(length),	168 m_length(length),

164 m_hash(0),	169 m_hash(0),

	170 m_containsOnlyASCII(false),
	esprehn 2017/01/04 20:30:37 (!length) (!length) Charlie Harrison 2017/01/04 20:53:51 Done. Show quoted text On 2017/01/04 20:30:37, esprehn wrote: > (!length) Done.
	171 m_needsASCIICheck(true),
	esprehn 2017/01/04 20:30:37 (length) (length) Charlie Harrison 2017/01/04 20:53:50 Done. Show quoted text On 2017/01/04 20:30:37, esprehn wrote: > (length) Done.
165 m_isAtomic(false),	172 m_isAtomic(false),

166 m_is8Bit(true),	173 m_is8Bit(true),

167 m_isStatic(false) {	174 m_isStatic(false) {

168 DCHECK(m_length);	175 DCHECK(m_length);

169 STRING_STATS_ADD_8BIT_STRING(m_length);	176 STRING_STATS_ADD_8BIT_STRING(m_length);

170 }	177 }

171	178

172 StringImpl(unsigned length)	179 StringImpl(unsigned length)

173 : m_refCount(1),	180 : m_refCount(1),

174 m_length(length),	181 m_length(length),

175 m_hash(0),	182 m_hash(0),

	183 m_containsOnlyASCII(false),
	esprehn 2017/01/04 20:30:37 (!length) (!length) Charlie Harrison 2017/01/04 20:53:50 Done. Show quoted text On 2017/01/04 20:30:37, esprehn wrote: > (!length) Done.
	184 m_needsASCIICheck(true),
	esprehn 2017/01/04 20:30:37 (length) (length) Charlie Harrison 2017/01/04 20:53:50 Done. Show quoted text On 2017/01/04 20:30:37, esprehn wrote: > (length) Done.
176 m_isAtomic(false),	185 m_isAtomic(false),

177 m_is8Bit(false),	186 m_is8Bit(false),

178 m_isStatic(false) {	187 m_isStatic(false) {

179 DCHECK(m_length);	188 DCHECK(m_length);

180 STRING_STATS_ADD_16BIT_STRING(m_length);	189 STRING_STATS_ADD_16BIT_STRING(m_length);

181 }	190 }

182	191

183 enum StaticStringTag { StaticString };	192 enum StaticStringTag { StaticString };

184 StringImpl(unsigned length, unsigned hash, StaticStringTag)	193 StringImpl(unsigned length, unsigned hash, StaticStringTag)

185 : m_refCount(1),	194 : m_refCount(1),

186 m_length(length),	195 m_length(length),

187 m_hash(hash),	196 m_hash(hash),

	197 m_containsOnlyASCII(false),

	198 m_needsASCIICheck(true),
	esprehn 2017/01/04 20:30:37 ditto ditto Charlie Harrison 2017/01/04 20:53:50 Done. Show quoted text On 2017/01/04 20:30:37, esprehn wrote: > ditto Done.
188 m_isAtomic(false),	199 m_isAtomic(false),

189 m_is8Bit(true),	200 m_is8Bit(true),

190 m_isStatic(true) {}	201 m_isStatic(true) {}

191	202

192 public:	203 public:

193 ~StringImpl();	204 ~StringImpl();

194	205

195 static StringImpl* createStatic(const char* string,	206 static StringImpl* createStatic(const char* string,

196 unsigned length,	207 unsigned length,

197 unsigned hash);	208 unsigned hash);

(...skipping 48 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
246	257

247 size_t charactersSizeInBytes() const {	258 size_t charactersSizeInBytes() const {

248 return length() * (is8Bit() ? sizeof(LChar) : sizeof(UChar));	259 return length() * (is8Bit() ? sizeof(LChar) : sizeof(UChar));

249 }	260 }

250	261

251 bool isAtomic() const { return m_isAtomic; }	262 bool isAtomic() const { return m_isAtomic; }

252 void setIsAtomic(bool isAtomic) { m_isAtomic = isAtomic; }	263 void setIsAtomic(bool isAtomic) { m_isAtomic = isAtomic; }

253	264

254 bool isStatic() const { return m_isStatic; }	265 bool isStatic() const { return m_isStatic; }

255	266

	267 bool containsOnlyASCII() const;

	268

256 bool isSafeToSendToAnotherThread() const;	269 bool isSafeToSendToAnotherThread() const;

257	270

258 // The high bits of 'hash' are always empty, but we prefer to store our	271 // The high bits of 'hash' are always empty, but we prefer to store our

259 // flags in the low bits because it makes them slightly more efficient to	272 // flags in the low bits because it makes them slightly more efficient to

260 // access. So, we shift left and right when setting and getting our hash	273 // access. So, we shift left and right when setting and getting our hash

261 // code.	274 // code.

262 void setHash(unsigned hash) const {	275 void setHash(unsigned hash) const {

263 DCHECK(!hasHash());	276 DCHECK(!hasHash());

264 // Multiple clients assume that StringHasher is the canonical string	277 // Multiple clients assume that StringHasher is the canonical string

265 // hash function.	278 // hash function.

(...skipping 221 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
487 DCHECK(hasHash());	500 DCHECK(hasHash());

488 DCHECK_EQ(existingHash(), StringHasher::computeHashAndMaskTop8Bits(	501 DCHECK_EQ(existingHash(), StringHasher::computeHashAndMaskTop8Bits(

489 characters8(), length()));	502 characters8(), length()));

490 }	503 }

491 #endif	504 #endif

492	505

493 private:	506 private:

494 unsigned m_refCount;	507 unsigned m_refCount;

495 const unsigned m_length;	508 const unsigned m_length;

496 mutable unsigned m_hash : 24;	509 mutable unsigned m_hash : 24;

	510 mutable unsigned m_containsOnlyASCII : 1;

	511 mutable unsigned m_needsASCIICheck : 1;

497 unsigned m_isAtomic : 1;	512 unsigned m_isAtomic : 1;

498 const unsigned m_is8Bit : 1;	513 const unsigned m_is8Bit : 1;

499 const unsigned m_isStatic : 1;	514 const unsigned m_isStatic : 1;

500 };	515 };

501	516

502 template <>	517 template <>

503 ALWAYS_INLINE const LChar* StringImpl::getCharacters<LChar>() const {	518 ALWAYS_INLINE const LChar* StringImpl::getCharacters<LChar>() const {

504 return characters8();	519 return characters8();

505 }	520 }

506	521

(...skipping 13 matching lines...) Expand all Loading...
520 return equal(a, reinterpret_cast<const LChar*>(b), length);	535 return equal(a, reinterpret_cast<const LChar*>(b), length);

521 }	536 }

522 inline bool equal(const LChar* a, StringImpl* b) {	537 inline bool equal(const LChar* a, StringImpl* b) {

523 return equal(b, a);	538 return equal(b, a);

524 }	539 }

525 inline bool equal(const char* a, StringImpl* b) {	540 inline bool equal(const char* a, StringImpl* b) {

526 return equal(b, reinterpret_cast<const LChar*>(a));	541 return equal(b, reinterpret_cast<const LChar*>(a));

527 }	542 }

528 WTF_EXPORT bool equalNonNull(const StringImpl* a, const StringImpl* b);	543 WTF_EXPORT bool equalNonNull(const StringImpl* a, const StringImpl* b);

529	544

	545 ALWAYS_INLINE bool StringImpl::containsOnlyASCII() const {

	546 if (m_needsASCIICheck) {

	547 m_containsOnlyASCII =

	548 !length() \|\|
	esprehn 2017/01/04 20:30:37 Strings are immutable, so you don't need the lengt Strings are immutable, so you don't need the length check here, you can do it in the constructor. Charlie Harrison 2017/01/04 20:53:50 Done. Show quoted text On 2017/01/04 20:30:37, esprehn wrote: > Strings are immutable, so you don't need the length check here, you can do it in > the constructor. Done.
	549 (is8Bit() ? charactersAreAllASCII(characters8(), length())

	550 : charactersAreAllASCII(characters16(), length()));
	esprehn 2017/01/04 20:30:37 This is inlining a lot of code which I know the ol This is inlining a lot of code which I know the old code did, but instead I think you want to put the slow path in a separate out of line method, and only ALWAYS_INLINE the fast path. That'll actually be faster since it makes the functions smaller. ALWAYS_INLINE bool StringImpl::containsOnlyASCII() const { if (m_needsASCIICheck) updateContainsOnlyASCII(); return m_containsOnlyASCII; } and updateContainsOnlyASCII is in the .cpp file out of line. Charlie Harrison 2017/01/04 20:53:50 SGTM. Done. Show quoted text On 2017/01/04 20:30:37, esprehn wrote: > This is inlining a lot of code which I know the old code did, but instead I > think you want to put the slow path in a separate out of line method, and only > ALWAYS_INLINE the fast path. That'll actually be faster since it makes the > functions smaller. > > ALWAYS_INLINE bool StringImpl::containsOnlyASCII() const { > if (m_needsASCIICheck) > updateContainsOnlyASCII(); > return m_containsOnlyASCII; > } > > and updateContainsOnlyASCII is in the .cpp file out of line. SGTM. Done.
	551 m_needsASCIICheck = false;

	552 }

	553 return m_containsOnlyASCII;

	554 }

	555

530 template <typename CharType>	556 template <typename CharType>

531 ALWAYS_INLINE bool equal(const CharType* a,	557 ALWAYS_INLINE bool equal(const CharType* a,

532 const CharType* b,	558 const CharType* b,

533 unsigned length) {	559 unsigned length) {

534 return !memcmp(a, b, length * sizeof(CharType));	560 return !memcmp(a, b, length * sizeof(CharType));

535 }	561 }

536	562

537 ALWAYS_INLINE bool equal(const LChar* a, const UChar* b, unsigned length) {	563 ALWAYS_INLINE bool equal(const LChar* a, const UChar* b, unsigned length) {

538 for (unsigned i = 0; i < length; ++i) {	564 for (unsigned i = 0; i < length; ++i) {

539 if (a[i] != b[i])	565 if (a[i] != b[i])

(...skipping 298 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
838 using WTF::TextCaseASCIIInsensitive;	864 using WTF::TextCaseASCIIInsensitive;

839 using WTF::TextCaseUnicodeInsensitive;	865 using WTF::TextCaseUnicodeInsensitive;

840 using WTF::TextCaseSensitive;	866 using WTF::TextCaseSensitive;

841 using WTF::TextCaseSensitivity;	867 using WTF::TextCaseSensitivity;

842 using WTF::equal;	868 using WTF::equal;

843 using WTF::equalNonNull;	869 using WTF::equalNonNull;

844 using WTF::lengthOfNullTerminatedString;	870 using WTF::lengthOfNullTerminatedString;

845 using WTF::reverseFind;	871 using WTF::reverseFind;

846	872

847 #endif	873 #endif

OLD	NEW

« no previous file with comments | « no previous file | third_party/WebKit/Source/wtf/text/WTFString.h » ('j') | no next file with comments »