Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(116)

Side by Side Diff: net/base/registry_controlled_domains/registry_controlled_domain.h

Issue 15140003: Add support for split Public Suffix List distinctions. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Rebased again Created 7 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 // NB: Modelled after Mozilla's code (originally written by Pamela Greene, 5 // NB: Modelled after Mozilla's code (originally written by Pamela Greene,
6 // later modified by others), but almost entirely rewritten for Chrome. 6 // later modified by others), but almost entirely rewritten for Chrome.
7 // (netwerk/dns/src/nsEffectiveTLDService.h) 7 // (netwerk/dns/src/nsEffectiveTLDService.h)
8 /* ***** BEGIN LICENSE BLOCK ***** 8 /* ***** BEGIN LICENSE BLOCK *****
9 * Version: MPL 1.1/GPL 2.0/LGPL 2.1 9 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
10 * 10 *
(...skipping 105 matching lines...) Expand 10 before | Expand all | Expand 10 after
116 #include <string> 116 #include <string>
117 117
118 #include "base/basictypes.h" 118 #include "base/basictypes.h"
119 #include "net/base/net_export.h" 119 #include "net/base/net_export.h"
120 120
121 class GURL; 121 class GURL;
122 122
123 struct DomainRule; 123 struct DomainRule;
124 124
125 namespace net { 125 namespace net {
126 namespace registry_controlled_domains {
126 127
127 class NET_EXPORT RegistryControlledDomainService { 128 // This enum is a required parameter to all public methods declared for this
128 public: 129 // service. The Public Suffix List (http://publicsuffix.org/) this service
129 // Returns the registered, organization-identifying host and all its registry 130 // uses as a data source splits all effective-TLDs into two groups. The main
130 // information, but no subdomains, from the given GURL. Returns an empty 131 // group describes registries that are acknowledged by ICANN. The second group
131 // string if the GURL is invalid, has no host (e.g. a file: URL), has multiple 132 // contains a list of private additions for domains that enable external users
132 // trailing dots, is an IP address, has only one subcomponent (i.e. no dots 133 // to create subdomains, such as appspot.com.
133 // other than leading/trailing ones), or is itself a recognized registry 134 // The RegistryFilter enum lets you choose whether you want to include the
134 // identifier. If no matching rule is found in the effective-TLD data (or in 135 // private additions in your lookup.
135 // the default data, if the resource failed to load), the last subcomponent of 136 // See this for example use cases:
136 // the host is assumed to be the registry. 137 // https://wiki.mozilla.org/Public_Suffix_List/Use_Cases
137 // 138 enum NET_EXPORT PrivateRegistryFilter {
138 // Examples: 139 EXCLUDE_PRIVATE_REGISTRIES = 0,
139 // http://www.google.com/file.html -> "google.com" (com) 140 INCLUDE_PRIVATE_REGISTRIES
140 // http://..google.com/file.html -> "google.com" (com)
141 // http://google.com./file.html -> "google.com." (com)
142 // http://a.b.co.uk/file.html -> "b.co.uk" (co.uk)
143 // file:///C:/bar.html -> "" (no host)
144 // http://foo.com../file.html -> "" (multiple trailing dots)
145 // http://192.168.0.1/file.html -> "" (IP address)
146 // http://bar/file.html -> "" (no subcomponents)
147 // http://co.uk/file.html -> "" (host is a registry)
148 // http://foo.bar/file.html -> "foo.bar" (no rule; assume bar)
149 static std::string GetDomainAndRegistry(const GURL& gurl);
150
151 // Like the GURL version, but takes a host (which is canonicalized internally)
152 // instead of a full GURL.
153 static std::string GetDomainAndRegistry(const std::string& host);
154
155 // This convenience function returns true if the two GURLs both have hosts
156 // and one of the following is true:
157 // * They each have a known domain and registry, and it is the same for both
158 // URLs. Note that this means the trailing dot, if any, must match too.
159 // * They don't have known domains/registries, but the hosts are identical.
160 // Effectively, callers can use this function to check whether the input URLs
161 // represent hosts "on the same site".
162 static bool SameDomainOrHost(const GURL& gurl1, const GURL& gurl2);
163
164 // Finds the length in bytes of the registrar portion of the host in the
165 // given GURL. Returns std::string::npos if the GURL is invalid or has no
166 // host (e.g. a file: URL). Returns 0 if the GURL has multiple trailing dots,
167 // is an IP address, has no subcomponents, or is itself a recognized registry
168 // identifier. If no matching rule is found in the effective-TLD data (or in
169 // the default data, if the resource failed to load), returns 0 if
170 // |allow_unknown_registries| is false, or the length of the last subcomponent
171 // if |allow_unknown_registries| is true.
172 //
173 // Examples:
174 // http://www.google.com/file.html -> 3 (com)
175 // http://..google.com/file.html -> 3 (com)
176 // http://google.com./file.html -> 4 (com)
177 // http://a.b.co.uk/file.html -> 5 (co.uk)
178 // file:///C:/bar.html -> std::string::npos (no host)
179 // http://foo.com../file.html -> 0 (multiple trailing
180 // dots)
181 // http://192.168.0.1/file.html -> 0 (IP address)
182 // http://bar/file.html -> 0 (no subcomponents)
183 // http://co.uk/file.html -> 0 (host is a registry)
184 // http://foo.bar/file.html -> 0 or 3, depending (no rule; assume
185 // bar)
186 static size_t GetRegistryLength(const GURL& gurl,
187 bool allow_unknown_registries);
188
189 // Like the GURL version, but takes a host (which is canonicalized internally)
190 // instead of a full GURL.
191 static size_t GetRegistryLength(const std::string& host,
192 bool allow_unknown_registries);
193
194 private:
195 friend class RegistryControlledDomainTest;
196
197 // Internal workings of the static public methods. See above.
198 static std::string GetDomainAndRegistryImpl(const std::string& host);
199 static size_t GetRegistryLengthImpl(const std::string& host,
200 bool allow_unknown_registries);
201
202 typedef const struct DomainRule* (*FindDomainPtr)(const char *, unsigned int);
203
204 // Used for unit tests, so that a different perfect hash map from the full
205 // list is used. Set to NULL to use the Default function.
206 static void UseFindDomainFunction(FindDomainPtr function);
207
208 // Function that returns a DomainRule given a domain.
209 static FindDomainPtr find_domain_function_;
210
211
212 DISALLOW_IMPLICIT_CONSTRUCTORS(RegistryControlledDomainService);
213 }; 141 };
214 142
143 // This enum is a required parameter to the GetRegistryLength functions
144 // declared for this service. Whenever there is no matching rule in the
145 // effective-TLD data (or in the default data, if the resource failed to
146 // load), the result will be dependent on which enum value was passed in.
147 // If EXCLUDE_UNKNOWN_REGISTRIES was passed in, the resulting registry length
148 // will be 0. If INCLUDE_UNKNOWN_REGISTRIES was passed in, the resulting
149 // registry length will be the length of the last subcomponent (eg. 3 for
150 // foobar.baz).
151 enum NET_EXPORT UnknownRegistryFilter {
152 EXCLUDE_UNKNOWN_REGISTRIES = 0,
153 INCLUDE_UNKNOWN_REGISTRIES
154 };
155
156 // Returns the registered, organization-identifying host and all its registry
157 // information, but no subdomains, from the given GURL. Returns an empty
158 // string if the GURL is invalid, has no host (e.g. a file: URL), has multiple
159 // trailing dots, is an IP address, has only one subcomponent (i.e. no dots
160 // other than leading/trailing ones), or is itself a recognized registry
161 // identifier. If no matching rule is found in the effective-TLD data (or in
162 // the default data, if the resource failed to load), the last subcomponent of
163 // the host is assumed to be the registry.
164 //
165 // Examples:
166 // http://www.google.com/file.html -> "google.com" (com)
167 // http://..google.com/file.html -> "google.com" (com)
168 // http://google.com./file.html -> "google.com." (com)
169 // http://a.b.co.uk/file.html -> "b.co.uk" (co.uk)
170 // file:///C:/bar.html -> "" (no host)
171 // http://foo.com../file.html -> "" (multiple trailing dots)
172 // http://192.168.0.1/file.html -> "" (IP address)
173 // http://bar/file.html -> "" (no subcomponents)
174 // http://co.uk/file.html -> "" (host is a registry)
175 // http://foo.bar/file.html -> "foo.bar" (no rule; assume bar)
176 NET_EXPORT std::string GetDomainAndRegistry(const GURL& gurl,
177 PrivateRegistryFilter filter);
178
179 // Like the GURL version, but takes a host (which is canonicalized internally)
180 // instead of a full GURL.
181 NET_EXPORT std::string GetDomainAndRegistry(const std::string& host,
182 PrivateRegistryFilter filter);
183
184 // This convenience function returns true if the two GURLs both have hosts
185 // and one of the following is true:
186 // * They each have a known domain and registry, and it is the same for both
187 // URLs. Note that this means the trailing dot, if any, must match too.
188 // * They don't have known domains/registries, but the hosts are identical.
189 // Effectively, callers can use this function to check whether the input URLs
190 // represent hosts "on the same site".
191 NET_EXPORT bool SameDomainOrHost(const GURL& gurl1, const GURL& gurl2,
192 PrivateRegistryFilter filter);
193
194 // Finds the length in bytes of the registrar portion of the host in the
195 // given GURL. Returns std::string::npos if the GURL is invalid or has no
196 // host (e.g. a file: URL). Returns 0 if the GURL has multiple trailing dots,
197 // is an IP address, has no subcomponents, or is itself a recognized registry
198 // identifier. The result is also dependent on the UnknownRegistryFilter.
199 // If no matching rule is found in the effective-TLD data (or in
200 // the default data, if the resource failed to load), returns 0 if
201 // |unknown_filter| is EXCLUDE_UNKNOWN_REGISTRIES, or the length of the last
202 // subcomponent if |unknown_filter| is INCLUDE_UNKNOWN_REGISTRIES.
203 //
204 // Examples:
205 // http://www.google.com/file.html -> 3 (com)
206 // http://..google.com/file.html -> 3 (com)
207 // http://google.com./file.html -> 4 (com)
208 // http://a.b.co.uk/file.html -> 5 (co.uk)
209 // file:///C:/bar.html -> std::string::npos (no host)
210 // http://foo.com../file.html -> 0 (multiple trailing
211 // dots)
212 // http://192.168.0.1/file.html -> 0 (IP address)
213 // http://bar/file.html -> 0 (no subcomponents)
214 // http://co.uk/file.html -> 0 (host is a registry)
215 // http://foo.bar/file.html -> 0 or 3, depending (no rule; assume
216 // bar)
217 NET_EXPORT size_t GetRegistryLength(const GURL& gurl,
218 UnknownRegistryFilter unknown_filter,
219 PrivateRegistryFilter private_filter);
220
221 // Like the GURL version, but takes a host (which is canonicalized internally)
222 // instead of a full GURL.
223 NET_EXPORT size_t GetRegistryLength(const std::string& host,
224 UnknownRegistryFilter unknown_filter,
225 PrivateRegistryFilter private_filter);
226
227 typedef const struct DomainRule* (*FindDomainPtr)(const char *, unsigned int);
228
229 // Used for unit tests, so that a different perfect hash map from the full
230 // list is used. Set to NULL to use the Default function.
231 NET_EXPORT_PRIVATE void SetFindDomainFunctionForTesting(
232 FindDomainPtr fn);
233
234 } // namespace registry_controlled_domains
215 } // namespace net 235 } // namespace net
216 236
217 #endif // NET_BASE_REGISTRY_CONTROLLED_DOMAINS_REGISTRY_CONTROLLED_DOMAIN_H_ 237 #endif // NET_BASE_REGISTRY_CONTROLLED_DOMAINS_REGISTRY_CONTROLLED_DOMAIN_H_
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698