Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(162)

Unified Diff: net/base/registry_controlled_domains/registry_controlled_domain.cc

Issue 197183002: Reduce footprint of registry controlled domain table (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Created 6 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: net/base/registry_controlled_domains/registry_controlled_domain.cc
diff --git a/net/base/registry_controlled_domains/registry_controlled_domain.cc b/net/base/registry_controlled_domains/registry_controlled_domain.cc
index 56d5ed9d6a176864f6f4beb9af06d9d9cfafd485..feebc2de97df288bd01dd853195219ddd4b11fb8 100644
--- a/net/base/registry_controlled_domains/registry_controlled_domain.cc
+++ b/net/base/registry_controlled_domains/registry_controlled_domain.cc
@@ -53,27 +53,125 @@
#include "url/gurl.h"
#include "url/url_parse.h"
-#include "effective_tld_names.cc"
-
namespace net {
namespace registry_controlled_domains {
namespace {
+#include "effective_tld_names-inc.cc"
+
+// See make_dafsa.py for documentation of the generated dafsa byte array.
+
+const unsigned char* graph = kDafsa;
+
+int LookupString(const unsigned char* pos, const char* key, int length) {
Ryan Sleevi 2014/03/19 03:08:32 1) Needs documentation 2) int for byte length = be
+ const char* end = key + length;
+ while (true) {
+ // Read links.
+ const unsigned char* child = pos;
Ryan Sleevi 2014/03/18 01:23:58 Generally speaking, this level of manual string ma
+ while (true) {
+ bool is_last = (*pos & 0x80) != 0;
Ryan Sleevi 2014/03/19 03:08:32 Normally within net/ we encapsulate a lot of the s
Olle Liljenzin 2014/03/19 14:19:42 Iterators may increase readability by adding a fam
+ switch (*pos & 0x60) {
+ case 0x60: // Read three byte offset
+ child += ((pos[0] & 0x1F) << 16) | (pos[1] << 8) | pos[2];
+ pos += 3;
+ break;
+
+ case 0x40: // Read two byte offset
+ child += ((pos[0] & 0x1F) << 8) | pos[1];
+ pos += 2;
+ break;
+
+ default: // Read one byte offset
+ child += pos[0] & 0x3F;
+ pos += 1;
+ }
+ if (key == end) {
+ // End of key reached. A matching child node must be labeled by a
+ // single byte in range 0x80-0x9F encoding the return value.
Ryan Sleevi 2014/03/29 02:14:29 As a justification for why we need to split this c
+ if (!(*child & 0x60)) {
+ // A return value must always be last in a label. If not the byte
+ // array is corrupt.
+ DCHECK(*child & 0x80);
+
+ // Extract return value.
+ return *child & 0x0F;
+ }
+ // The key matches and is exhausted, but child has more characters.
+ if (is_last) {
+ return -1;
Ryan Sleevi 2014/03/19 03:08:32 Not a fan of magic values. If you took base::Strin
+ }
+ // Try next child.
+ } else {
+ // If child node has a single char label.
+ if (*child & 0x80) {
+ // If key matches char in child node label.
+ if ((*child & 0x7F) == *key) {
+ // Consume matching label. Step down in child node and read links.
+ ++key;
+ ++child;
+ pos = child;
+ } else {
+ // Key doesn't match label in this child node.
+ if (is_last) {
+ return -1;
+ }
+ // Try next child.
+ }
+ } else {
+ // Child node label has multi character label.
+ if (*child == *key) {
+ // Found a matching link. Step down in child node.
+ ++key;
+ pos = child + 1;
+ break;
+ } else {
+ // Key doesn't match label in this child node.
+ if (is_last) {
+ return -1;
+ }
+ // Try next child.
+ }
+ }
+ }
+ }
+ // Compare key with node label. First character is already consumed.
+ while (true) {
+ if (key == end) {
+ // End of key reached.
+ if (!(*pos & 0x60)) {
+ // Extract return value.
+ return *pos & 0x0F;
+ }
+ // Node label contains more characters that must match.
+ return -1;
+ }
+ if (*pos & 0x80) {
+ // Last character in node label.
+ if (*key & 0x80 || *key < 0x20 || (*key | 0x80) != *pos) {
+ // Not printable 7-bit ASCII in key or key didn't match.
+ return -1;
+ } else {
+ ++key;
+ ++pos;
+ break;
+ // Read links to child nodes.
+ }
+ } else {
+ if (*key++ != *pos++) {
+ // Key doesn't match node label.
+ return -1;
+ }
+ // Key matches so far and there are more characters to check in this
+ // node label.
+ }
+ }
+ }
+}
const int kExceptionRule = 1;
const int kWildcardRule = 2;
const int kPrivateRule = 4;
-const FindDomainPtr kDefaultFindDomainFunction = Perfect_Hash::FindDomain;
-
-// 'stringpool' is defined as a macro by the gperf-generated
-// "effective_tld_names.cc". Provide a real constant value for it instead.
-const char* const kDefaultStringPool = stringpool;
-#undef stringpool
-
-FindDomainPtr g_find_domain_function = kDefaultFindDomainFunction;
-const char* g_stringpool = kDefaultStringPool;
-
size_t GetRegistryLengthImpl(
const std::string& host,
UnknownRegistryFilter unknown_filter,
@@ -106,45 +204,39 @@ size_t GetRegistryLengthImpl(
while (1) {
const char* domain_str = host.data() + curr_start;
int domain_length = host_check_len - curr_start;
Ryan Sleevi 2014/03/19 03:08:32 this should have been a size_t, IINM.
- const DomainRule* rule = g_find_domain_function(domain_str, domain_length);
-
- // We need to compare the string after finding a match because the
- // no-collisions of perfect hashing only refers to items in the set. Since
- // we're searching for arbitrary domains, there could be collisions.
- // Furthermore, if the apparent match is a private registry and we're not
- // including those, it can't be an actual match.
- if (rule) {
- bool do_check = !(rule->type & kPrivateRule) ||
- private_filter == INCLUDE_PRIVATE_REGISTRIES;
- if (do_check && base::strncasecmp(domain_str,
- g_stringpool + rule->name_offset,
- domain_length) == 0) {
- // Exception rules override wildcard rules when the domain is an exact
- // match, but wildcards take precedence when there's a subdomain.
- if (rule->type & kWildcardRule && (prev_start != std::string::npos)) {
- // If prev_start == host_check_begin, then the host is the registry
- // itself, so return 0.
- return (prev_start == host_check_begin) ?
- 0 : (host.length() - prev_start);
- }
+ int type = LookupString(graph, domain_str, domain_length);
+ bool do_check =
+ type != -1 && (!(type & kPrivateRule) ||
+ private_filter == INCLUDE_PRIVATE_REGISTRIES);
+
+ // If the apparent match is a private registry and we're not including
+ // those, it can't be an actual match.
+ if (do_check) {
+ // Exception rules override wildcard rules when the domain is an exact
+ // match, but wildcards take precedence when there's a subdomain.
+ if (type & kWildcardRule && (prev_start != std::string::npos)) {
+ // If prev_start == host_check_begin, then the host is the registry
+ // itself, so return 0.
+ return (prev_start == host_check_begin) ? 0
+ : (host.length() - prev_start);
+ }
- if (rule->type & kExceptionRule) {
- if (next_dot == std::string::npos) {
- // If we get here, we had an exception rule with no dots (e.g.
- // "!foo"). This would only be valid if we had a corresponding
- // wildcard rule, which would have to be "*". But we explicitly
- // disallow that case, so this kind of rule is invalid.
- NOTREACHED() << "Invalid exception rule";
- return 0;
- }
- return host.length() - next_dot - 1;
+ if (type & kExceptionRule) {
+ if (next_dot == std::string::npos) {
+ // If we get here, we had an exception rule with no dots (e.g.
+ // "!foo"). This would only be valid if we had a corresponding
+ // wildcard rule, which would have to be "*". But we explicitly
+ // disallow that case, so this kind of rule is invalid.
+ NOTREACHED() << "Invalid exception rule";
+ return 0;
}
-
- // If curr_start == host_check_begin, then the host is the registry
- // itself, so return 0.
- return (curr_start == host_check_begin) ?
- 0 : (host.length() - curr_start);
+ return host.length() - next_dot - 1;
}
+
+ // If curr_start == host_check_begin, then the host is the registry
+ // itself, so return 0.
+ return (curr_start == host_check_begin) ? 0
+ : (host.length() - curr_start);
}
if (next_dot >= host_check_len) // Catches std::string::npos as well.
@@ -264,10 +356,8 @@ size_t GetRegistryLength(
return GetRegistryLengthImpl(canon_host, unknown_filter, private_filter);
}
-void SetFindDomainFunctionAndStringPoolForTesting(FindDomainPtr function,
- const char* stringpool) {
- g_find_domain_function = function ? function : kDefaultFindDomainFunction;
- g_stringpool = stringpool ? stringpool : kDefaultStringPool;
+void SetFindDomainGraph(const unsigned char* domains) {
+ graph = domains ? domains : kDafsa;
}
} // namespace registry_controlled_domains

Powered by Google App Engine
This is Rietveld 408576698