Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(494)

Side by Side Diff: net/base/registry_controlled_domains/registry_controlled_domain.cc

Issue 197183002: Reduce footprint of registry controlled domain table (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Added bounds checks Created 6 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 // NB: Modelled after Mozilla's code (originally written by Pamela Greene, 5 // NB: Modelled after Mozilla's code (originally written by Pamela Greene,
6 // later modified by others), but almost entirely rewritten for Chrome. 6 // later modified by others), but almost entirely rewritten for Chrome.
7 // (netwerk/dns/src/nsEffectiveTLDService.cpp) 7 // (netwerk/dns/src/nsEffectiveTLDService.cpp)
8 /* ***** BEGIN LICENSE BLOCK ***** 8 /* ***** BEGIN LICENSE BLOCK *****
9 * Version: MPL 1.1/GPL 2.0/LGPL 2.1 9 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
10 * 10 *
(...skipping 27 matching lines...) Expand all
38 * use your version of this file under the terms of the MPL, indicate your 38 * use your version of this file under the terms of the MPL, indicate your
39 * decision by deleting the provisions above and replace them with the notice 39 * decision by deleting the provisions above and replace them with the notice
40 * and other provisions required by the GPL or the LGPL. If you do not delete 40 * and other provisions required by the GPL or the LGPL. If you do not delete
41 * the provisions above, a recipient may use your version of this file under 41 * the provisions above, a recipient may use your version of this file under
42 * the terms of any one of the MPL, the GPL or the LGPL. 42 * the terms of any one of the MPL, the GPL or the LGPL.
43 * 43 *
44 * ***** END LICENSE BLOCK ***** */ 44 * ***** END LICENSE BLOCK ***** */
45 45
46 #include "net/base/registry_controlled_domains/registry_controlled_domain.h" 46 #include "net/base/registry_controlled_domains/registry_controlled_domain.h"
47 47
48 #include <limits>
48 #include "base/logging.h" 49 #include "base/logging.h"
49 #include "base/strings/string_util.h" 50 #include "base/strings/string_util.h"
50 #include "base/strings/utf_string_conversions.h" 51 #include "base/strings/utf_string_conversions.h"
51 #include "net/base/net_module.h" 52 #include "net/base/net_module.h"
52 #include "net/base/net_util.h" 53 #include "net/base/net_util.h"
53 #include "url/gurl.h" 54 #include "url/gurl.h"
54 #include "url/url_parse.h" 55 #include "url/url_parse.h"
55 56
56 #include "effective_tld_names.cc"
57
58 namespace net { 57 namespace net {
59 namespace registry_controlled_domains { 58 namespace registry_controlled_domains {
60 59
61 namespace { 60 namespace {
61 #include "effective_tld_names-inc.cc"
62
63 // See make_dafsa.py for documentation of the generated dafsa byte array.
64
65 const unsigned char* graph = kDafsa;
66 size_t graph_length = sizeof(kDafsa);
Ryan Sleevi 2014/04/23 01:52:18 Per http://google-styleguide.googlecode.com/svn/tr
67
68 // Read next offset from pos.
69 // Returns true if an offset could be read, false otherwise.
70 bool GetNextOffset(const unsigned char*& pos, const unsigned char* end,
71 const unsigned char*& offset) {
Ryan Sleevi 2014/04/23 01:52:18 Per http://google-styleguide.googlecode.com/svn/tr
Olle Liljenzin 2014/04/24 09:30:00 Then it should be "** pos, * end, ** offset". The
72 if (pos < end) {
Ryan Sleevi 2014/04/23 01:52:18 Convention in Chromium is to handle errors early,
73 // When reading an offset the byte array must always contain at least
74 // three more bytes to consume. First the offset to read, then a node
75 // to skip over and finally a destination node. No object can be smaller
76 // than one byte.
77 CHECK(pos + 2 < end);
78 size_t bytes_consumed;
79 switch (*pos & 0x60) {
80 case 0x60: // Read three byte offset
81 offset += ((pos[0] & 0x1F) << 16) | (pos[1] << 8) | pos[2];
82 bytes_consumed = 3;
83 break;
84 case 0x40: // Read two byte offset
85 offset += ((pos[0] & 0x1F) << 8) | pos[1];
86 bytes_consumed = 2;
87 break;
88 default:
89 offset += pos[0] & 0x3F;
90 bytes_consumed = 1;
91 }
92 if ((*pos & 0x80) != 0) {
93 pos = end;
94 } else {
95 pos += bytes_consumed;
96 }
97 return true;
98 }
99 CHECK(pos == end);
100 return false;
101 }
102
103 // Check if byte at offset is last in label.
104 bool IsEOL(const unsigned char* offset, const unsigned char* end) {
105 CHECK(offset < end);
Ryan Sleevi 2014/04/23 01:52:18 CHECK_LT (and all other occurrences below)
106 return (*offset & 0x80) != 0;
107 }
108
109 // Check if byte at offset matches first character in key.
110 // This version matches characters not last in label.
111 bool IsMatch(const unsigned char* offset, const unsigned char* end,
112 const char* key) {
113 CHECK(offset < end);
114 return *offset == *key;
115 }
116
117 // Check if byte at offset matches first character in key.
118 // This version matches characters last in label.
119 bool IsEndCharMatch(const unsigned char* offset, const unsigned char* end,
120 const char* key) {
121 CHECK(offset < end);
122 return *offset == (*key | 0x80);
123 }
124
125 // Read return value at offset.
126 // Returns true if a return value could be read, false otherwise.
127 bool GetReturnValue(const unsigned char* offset, const unsigned char* end,
128 int* return_value) {
129 CHECK(offset < end);
130 if ((*offset & 0xe0) == 0x80) {
Ryan Sleevi 2014/04/23 01:52:18 nit: If you're going to capitalize 0x0F (line 131)
131 *return_value = *offset & 0x0F;
132 return true;
133 }
134 return false;
135 }
136
137 int kFatal = -1;
Ryan Sleevi 2014/04/23 01:52:18 const int Having looked through this, I suspect t
138
139 // Lookup a domain key in a byte array generated by make_dafsa.py.
140 // The rule type is returned if key is found, otherwise kFatal is returned.
141 int LookupString(const unsigned char* graph, size_t length, const char* key,
142 size_t key_length) {
143 const unsigned char* pos = graph;
144 const unsigned char* end = graph + length;
145 const unsigned char* offset = pos;
146 const char* key_end = key + key_length;
147 while (GetNextOffset(pos, end, offset)) {
148 // char <char>+ end_char offsets
149 // char <char>+ return value
150 // char end_char offsets
151 // char return value
152 // end_char offsets
153 // return_value
154 bool did_consume = false;
155 if (key != key_end && !IsEOL(offset, end)) {
156 // Leading <char> is not a match. Don't dive into this child
157 if (!IsMatch(offset, end, key)) continue;
Ryan Sleevi 2014/04/23 01:52:18 style nit: Convention has been to eschew the one-l
158 did_consume = true;
159 ++offset;
160 ++key;
161 // Possible matches at this point:
162 // <char>+ end_char offsets
163 // <char>+ return value
164 // end_char offsets
165 // return value
166 // Remove all remaining <char> nodes possible
167 while (!IsEOL(offset, end) && key != key_end) {
168 if (!IsMatch(offset, end, key)) return kFatal;
Ryan Sleevi 2014/04/23 01:52:18 Should comment why this is kFatal. I'm not entirel
169 ++key;
170 ++offset;
171 }
172 }
173 // Possible matches:
Ryan Sleevi 2014/04/23 01:52:18 s/Possible matches/Possible matches at this point:
174 // end_char offsets
175 // return_value
176 // If one or more <char> elements were consumed, a failure
177 // to match is terminal. Otherwise, try the next node.
178 if (key == key_end) {
179 int return_value;
180 if (GetReturnValue(offset, end, &return_value)) return return_value;
181 if (did_consume) return kFatal;
182 continue;
183 }
184 if (!IsEndCharMatch(offset, end, key)) {
185 if (did_consume) return kFatal; // Unexpected
186 continue;
187 }
188 ++key;
189 pos = ++offset; // Dive into child
190 }
191 return kFatal; // No match
192 }
62 193
63 const int kExceptionRule = 1; 194 const int kExceptionRule = 1;
64 const int kWildcardRule = 2; 195 const int kWildcardRule = 2;
65 const int kPrivateRule = 4; 196 const int kPrivateRule = 4;
Ryan Sleevi 2014/04/23 01:52:18 While this is not code within a class, convention
66 197
67 const FindDomainPtr kDefaultFindDomainFunction = Perfect_Hash::FindDomain;
68
69 // 'stringpool' is defined as a macro by the gperf-generated
70 // "effective_tld_names.cc". Provide a real constant value for it instead.
71 const char* const kDefaultStringPool = stringpool;
72 #undef stringpool
73
74 FindDomainPtr g_find_domain_function = kDefaultFindDomainFunction;
75 const char* g_stringpool = kDefaultStringPool;
76
77 size_t GetRegistryLengthImpl( 198 size_t GetRegistryLengthImpl(
78 const std::string& host, 199 const std::string& host,
79 UnknownRegistryFilter unknown_filter, 200 UnknownRegistryFilter unknown_filter,
80 PrivateRegistryFilter private_filter) { 201 PrivateRegistryFilter private_filter) {
81 DCHECK(!host.empty()); 202 DCHECK(!host.empty());
82 203
83 // Skip leading dots. 204 // Skip leading dots.
84 const size_t host_check_begin = host.find_first_not_of('.'); 205 const size_t host_check_begin = host.find_first_not_of('.');
85 if (host_check_begin == std::string::npos) 206 if (host_check_begin == std::string::npos)
86 return 0; // Host is only dots. 207 return 0; // Host is only dots.
(...skipping 11 matching lines...) Expand all
98 219
99 // Walk up the domain tree, most specific to least specific, 220 // Walk up the domain tree, most specific to least specific,
100 // looking for matches at each level. 221 // looking for matches at each level.
101 size_t prev_start = std::string::npos; 222 size_t prev_start = std::string::npos;
102 size_t curr_start = host_check_begin; 223 size_t curr_start = host_check_begin;
103 size_t next_dot = host.find('.', curr_start); 224 size_t next_dot = host.find('.', curr_start);
104 if (next_dot >= host_check_len) // Catches std::string::npos as well. 225 if (next_dot >= host_check_len) // Catches std::string::npos as well.
105 return 0; // This can't have a registry + domain. 226 return 0; // This can't have a registry + domain.
106 while (1) { 227 while (1) {
107 const char* domain_str = host.data() + curr_start; 228 const char* domain_str = host.data() + curr_start;
108 int domain_length = host_check_len - curr_start; 229 size_t domain_length = host_check_len - curr_start;
109 const DomainRule* rule = g_find_domain_function(domain_str, domain_length); 230 int type = LookupString(graph, graph_length, domain_str, domain_length);
231 bool do_check =
232 type != -1 && (!(type & kPrivateRule) ||
Ryan Sleevi 2014/04/23 01:52:18 s/-1/kFatal
233 private_filter == INCLUDE_PRIVATE_REGISTRIES);
110 234
111 // We need to compare the string after finding a match because the 235 // If the apparent match is a private registry and we're not including
112 // no-collisions of perfect hashing only refers to items in the set. Since 236 // those, it can't be an actual match.
Ryan Sleevi 2014/04/23 01:52:18 s/ those/ those/
113 // we're searching for arbitrary domains, there could be collisions. 237 if (do_check) {
114 // Furthermore, if the apparent match is a private registry and we're not 238 // Exception rules override wildcard rules when the domain is an exact
115 // including those, it can't be an actual match. 239 // match, but wildcards take precedence when there's a subdomain.
116 if (rule) { 240 if (type & kWildcardRule && (prev_start != std::string::npos)) {
117 bool do_check = !(rule->type & kPrivateRule) || 241 // If prev_start == host_check_begin, then the host is the registry
118 private_filter == INCLUDE_PRIVATE_REGISTRIES; 242 // itself, so return 0.
119 if (do_check && base::strncasecmp(domain_str, 243 return (prev_start == host_check_begin) ? 0
120 g_stringpool + rule->name_offset, 244 : (host.length() - prev_start);
121 domain_length) == 0) { 245 }
122 // Exception rules override wildcard rules when the domain is an exact 246
123 // match, but wildcards take precedence when there's a subdomain. 247 if (type & kExceptionRule) {
124 if (rule->type & kWildcardRule && (prev_start != std::string::npos)) { 248 if (next_dot == std::string::npos) {
125 // If prev_start == host_check_begin, then the host is the registry 249 // If we get here, we had an exception rule with no dots (e.g.
126 // itself, so return 0. 250 // "!foo"). This would only be valid if we had a corresponding
127 return (prev_start == host_check_begin) ? 251 // wildcard rule, which would have to be "*". But we explicitly
128 0 : (host.length() - prev_start); 252 // disallow that case, so this kind of rule is invalid.
253 NOTREACHED() << "Invalid exception rule";
254 return 0;
129 } 255 }
256 return host.length() - next_dot - 1;
257 }
130 258
131 if (rule->type & kExceptionRule) { 259 // If curr_start == host_check_begin, then the host is the registry
132 if (next_dot == std::string::npos) { 260 // itself, so return 0.
133 // If we get here, we had an exception rule with no dots (e.g. 261 return (curr_start == host_check_begin) ? 0
134 // "!foo"). This would only be valid if we had a corresponding 262 : (host.length() - curr_start);
135 // wildcard rule, which would have to be "*". But we explicitly
136 // disallow that case, so this kind of rule is invalid.
137 NOTREACHED() << "Invalid exception rule";
138 return 0;
139 }
140 return host.length() - next_dot - 1;
141 }
142
143 // If curr_start == host_check_begin, then the host is the registry
144 // itself, so return 0.
145 return (curr_start == host_check_begin) ?
146 0 : (host.length() - curr_start);
147 }
148 } 263 }
149 264
150 if (next_dot >= host_check_len) // Catches std::string::npos as well. 265 if (next_dot >= host_check_len) // Catches std::string::npos as well.
151 break; 266 break;
152 267
153 prev_start = curr_start; 268 prev_start = curr_start;
154 curr_start = next_dot + 1; 269 curr_start = next_dot + 1;
155 next_dot = host.find('.', curr_start); 270 next_dot = host.find('.', curr_start);
156 } 271 }
157 272
(...skipping 99 matching lines...) Expand 10 before | Expand all | Expand 10 after
257 PrivateRegistryFilter private_filter) { 372 PrivateRegistryFilter private_filter) {
258 url_canon::CanonHostInfo host_info; 373 url_canon::CanonHostInfo host_info;
259 const std::string canon_host(CanonicalizeHost(host, &host_info)); 374 const std::string canon_host(CanonicalizeHost(host, &host_info));
260 if (canon_host.empty()) 375 if (canon_host.empty())
261 return std::string::npos; 376 return std::string::npos;
262 if (host_info.IsIPAddress()) 377 if (host_info.IsIPAddress())
263 return 0; 378 return 0;
264 return GetRegistryLengthImpl(canon_host, unknown_filter, private_filter); 379 return GetRegistryLengthImpl(canon_host, unknown_filter, private_filter);
265 } 380 }
266 381
267 void SetFindDomainFunctionAndStringPoolForTesting(FindDomainPtr function, 382 void SetFindDomainGraph() {
268 const char* stringpool) { 383 graph = kDafsa;
269 g_find_domain_function = function ? function : kDefaultFindDomainFunction; 384 graph_length = sizeof(kDafsa);
270 g_stringpool = stringpool ? stringpool : kDefaultStringPool; 385 }
386
387 void SetFindDomainGraph(const unsigned char* domains, size_t length) {
388 CHECK(domains);
389 CHECK(length > 0);
Ryan Sleevi 2014/04/23 01:52:18 CHECK_GT, although really, since it's size_t CHEC
390 graph = domains;
391 graph_length = length;
271 } 392 }
272 393
273 } // namespace registry_controlled_domains 394 } // namespace registry_controlled_domains
274 } // namespace net 395 } // namespace net
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698