OLD | NEW |
| (Empty) |
1 /* | |
2 * Copyright (C) 2013 Google, Inc. All Rights Reserved. | |
3 * | |
4 * Redistribution and use in source and binary forms, with or without | |
5 * modification, are permitted provided that the following conditions | |
6 * are met: | |
7 * 1. Redistributions of source code must retain the above copyright | |
8 * notice, this list of conditions and the following disclaimer. | |
9 * 2. Redistributions in binary form must reproduce the above copyright | |
10 * notice, this list of conditions and the following disclaimer in the | |
11 * documentation and/or other materials provided with the distribution. | |
12 * | |
13 * THIS SOFTWARE IS PROVIDED BY GOOGLE INC. ``AS IS'' AND ANY | |
14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | |
16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GOOGLE INC. OR | |
17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, | |
18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, | |
19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR | |
20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY | |
21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
24 */ | |
25 | |
26 #include "config.h" | |
27 #include "core/html/parser/HTMLIdentifier.h" | |
28 | |
29 #include "HTMLNames.h" | |
30 #include "wtf/HashMap.h" | |
31 #include "wtf/MainThread.h" | |
32 #include "wtf/text/StringHash.h" | |
33 | |
34 namespace WebCore { | |
35 | |
36 using namespace HTMLNames; | |
37 | |
38 typedef HashMap<unsigned, StringImpl*, AlreadyHashed> IdentifierTable; | |
39 | |
40 unsigned HTMLIdentifier::maxNameLength = 0; | |
41 | |
42 static IdentifierTable& identifierTable() | |
43 { | |
44 DEFINE_STATIC_LOCAL(IdentifierTable, table, ()); | |
45 ASSERT(isMainThread() || !table.isEmpty()); | |
46 return table; | |
47 } | |
48 | |
49 #ifndef NDEBUG | |
50 bool HTMLIdentifier::isKnown(const StringImpl* string) | |
51 { | |
52 const IdentifierTable& table = identifierTable(); | |
53 return table.contains(string->hash()); | |
54 } | |
55 #endif | |
56 | |
57 StringImpl* HTMLIdentifier::findIfKnown(const UChar* characters, unsigned length
) | |
58 { | |
59 // We don't need to try hashing if we know the string is too long. | |
60 if (length > maxNameLength) | |
61 return 0; | |
62 // computeHashAndMaskTop8Bits is the function StringImpl::hash() uses. | |
63 unsigned hash = StringHasher::computeHashAndMaskTop8Bits(characters, length)
; | |
64 const IdentifierTable& table = identifierTable(); | |
65 ASSERT(!table.isEmpty()); | |
66 | |
67 IdentifierTable::const_iterator it = table.find(hash); | |
68 if (it == table.end()) | |
69 return 0; | |
70 // It's possible to have hash collisions between arbitrary strings and | |
71 // known identifiers (e.g. "bvvfg" collides with "script"). | |
72 // However ASSERTs in addNames() guard against there ever being collisions | |
73 // between known identifiers. | |
74 if (!equal(it->value, characters, length)) | |
75 return 0; | |
76 return it->value; | |
77 } | |
78 | |
79 const String& HTMLIdentifier::asString() const | |
80 { | |
81 ASSERT(isMainThread()); | |
82 return m_string; | |
83 } | |
84 | |
85 const StringImpl* HTMLIdentifier::asStringImpl() const | |
86 { | |
87 return m_string.impl(); | |
88 } | |
89 | |
90 void HTMLIdentifier::addNames(const QualifiedName* const* names, unsigned namesC
ount) | |
91 { | |
92 IdentifierTable& table = identifierTable(); | |
93 for (unsigned i = 0; i < namesCount; ++i) { | |
94 StringImpl* name = names[i]->localName().impl(); | |
95 unsigned hash = name->hash(); | |
96 IdentifierTable::AddResult addResult = table.add(hash, name); | |
97 maxNameLength = std::max(maxNameLength, name->length()); | |
98 // Ensure we're using the same hashing algorithm to get and set. | |
99 ASSERT_UNUSED(addResult, !addResult.isNewEntry || HTMLIdentifier::findIf
Known(String(name).charactersWithNullTermination().data(), name->length()) == na
me); | |
100 // We expect some hash collisions, but only for identical strings. | |
101 // Since all of these names are AtomicStrings pointers should be equal. | |
102 // Note: If you hit this ASSERT, then we had a hash collision among | |
103 // HTMLNames strings, and we need to re-design how we use this hash! | |
104 ASSERT_UNUSED(addResult, !addResult.isNewEntry || name == addResult.iter
ator->value); | |
105 } | |
106 } | |
107 | |
108 void HTMLIdentifier::init() | |
109 { | |
110 ASSERT(isMainThread()); // Not technically necessary, but this is our curren
t expected usage. | |
111 static bool isInitialized = false; | |
112 if (isInitialized) | |
113 return; | |
114 isInitialized = true; | |
115 | |
116 // FIXME: We should atomize small whitespace (\n, \n\n, etc.) | |
117 addNames(getHTMLTags(), HTMLTagsCount); | |
118 addNames(getHTMLAttrs(), HTMLAttrsCount); | |
119 } | |
120 | |
121 } | |
OLD | NEW |