OLD | NEW |
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 // NB: Modelled after Mozilla's code (originally written by Pamela Greene, | 5 // NB: Modelled after Mozilla's code (originally written by Pamela Greene, |
6 // later modified by others), but almost entirely rewritten for Chrome. | 6 // later modified by others), but almost entirely rewritten for Chrome. |
7 // (netwerk/dns/src/nsEffectiveTLDService.h) | 7 // (netwerk/dns/src/nsEffectiveTLDService.h) |
8 /* ***** BEGIN LICENSE BLOCK ***** | 8 /* ***** BEGIN LICENSE BLOCK ***** |
9 * Version: MPL 1.1/GPL 2.0/LGPL 2.1 | 9 * Version: MPL 1.1/GPL 2.0/LGPL 2.1 |
10 * | 10 * |
(...skipping 105 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
116 #include <string> | 116 #include <string> |
117 | 117 |
118 #include "base/basictypes.h" | 118 #include "base/basictypes.h" |
119 #include "net/base/net_export.h" | 119 #include "net/base/net_export.h" |
120 | 120 |
121 class GURL; | 121 class GURL; |
122 | 122 |
123 struct DomainRule; | 123 struct DomainRule; |
124 | 124 |
125 namespace net { | 125 namespace net { |
| 126 namespace registry_controlled_domains { |
126 | 127 |
127 class NET_EXPORT RegistryControlledDomainService { | 128 // This enum is a required parameter to all public methods declared for this |
128 public: | 129 // service. The Public Suffix List (http://publicsuffix.org/) this service |
129 // Returns the registered, organization-identifying host and all its registry | 130 // uses as a data source splits all effective-TLDs into two groups. The main |
130 // information, but no subdomains, from the given GURL. Returns an empty | 131 // group describes registries that are acknowledged by ICANN. The second group |
131 // string if the GURL is invalid, has no host (e.g. a file: URL), has multiple | 132 // contains a list of private additions for domains that enable external users |
132 // trailing dots, is an IP address, has only one subcomponent (i.e. no dots | 133 // to create subdomains, such as appspot.com. |
133 // other than leading/trailing ones), or is itself a recognized registry | 134 // The RegistryFilter enum lets you choose whether you want to include the |
134 // identifier. If no matching rule is found in the effective-TLD data (or in | 135 // private additions in your lookup. |
135 // the default data, if the resource failed to load), the last subcomponent of | 136 // See this for example use cases: |
136 // the host is assumed to be the registry. | 137 // https://wiki.mozilla.org/Public_Suffix_List/Use_Cases |
137 // | 138 enum NET_EXPORT PrivateRegistryFilter { |
138 // Examples: | 139 EXCLUDE_PRIVATE_REGISTRIES = 0, |
139 // http://www.google.com/file.html -> "google.com" (com) | 140 INCLUDE_PRIVATE_REGISTRIES |
140 // http://..google.com/file.html -> "google.com" (com) | |
141 // http://google.com./file.html -> "google.com." (com) | |
142 // http://a.b.co.uk/file.html -> "b.co.uk" (co.uk) | |
143 // file:///C:/bar.html -> "" (no host) | |
144 // http://foo.com../file.html -> "" (multiple trailing dots) | |
145 // http://192.168.0.1/file.html -> "" (IP address) | |
146 // http://bar/file.html -> "" (no subcomponents) | |
147 // http://co.uk/file.html -> "" (host is a registry) | |
148 // http://foo.bar/file.html -> "foo.bar" (no rule; assume bar) | |
149 static std::string GetDomainAndRegistry(const GURL& gurl); | |
150 | |
151 // Like the GURL version, but takes a host (which is canonicalized internally) | |
152 // instead of a full GURL. | |
153 static std::string GetDomainAndRegistry(const std::string& host); | |
154 | |
155 // This convenience function returns true if the two GURLs both have hosts | |
156 // and one of the following is true: | |
157 // * They each have a known domain and registry, and it is the same for both | |
158 // URLs. Note that this means the trailing dot, if any, must match too. | |
159 // * They don't have known domains/registries, but the hosts are identical. | |
160 // Effectively, callers can use this function to check whether the input URLs | |
161 // represent hosts "on the same site". | |
162 static bool SameDomainOrHost(const GURL& gurl1, const GURL& gurl2); | |
163 | |
164 // Finds the length in bytes of the registrar portion of the host in the | |
165 // given GURL. Returns std::string::npos if the GURL is invalid or has no | |
166 // host (e.g. a file: URL). Returns 0 if the GURL has multiple trailing dots, | |
167 // is an IP address, has no subcomponents, or is itself a recognized registry | |
168 // identifier. If no matching rule is found in the effective-TLD data (or in | |
169 // the default data, if the resource failed to load), returns 0 if | |
170 // |allow_unknown_registries| is false, or the length of the last subcomponent | |
171 // if |allow_unknown_registries| is true. | |
172 // | |
173 // Examples: | |
174 // http://www.google.com/file.html -> 3 (com) | |
175 // http://..google.com/file.html -> 3 (com) | |
176 // http://google.com./file.html -> 4 (com) | |
177 // http://a.b.co.uk/file.html -> 5 (co.uk) | |
178 // file:///C:/bar.html -> std::string::npos (no host) | |
179 // http://foo.com../file.html -> 0 (multiple trailing | |
180 // dots) | |
181 // http://192.168.0.1/file.html -> 0 (IP address) | |
182 // http://bar/file.html -> 0 (no subcomponents) | |
183 // http://co.uk/file.html -> 0 (host is a registry) | |
184 // http://foo.bar/file.html -> 0 or 3, depending (no rule; assume | |
185 // bar) | |
186 static size_t GetRegistryLength(const GURL& gurl, | |
187 bool allow_unknown_registries); | |
188 | |
189 // Like the GURL version, but takes a host (which is canonicalized internally) | |
190 // instead of a full GURL. | |
191 static size_t GetRegistryLength(const std::string& host, | |
192 bool allow_unknown_registries); | |
193 | |
194 private: | |
195 friend class RegistryControlledDomainTest; | |
196 | |
197 // Internal workings of the static public methods. See above. | |
198 static std::string GetDomainAndRegistryImpl(const std::string& host); | |
199 static size_t GetRegistryLengthImpl(const std::string& host, | |
200 bool allow_unknown_registries); | |
201 | |
202 typedef const struct DomainRule* (*FindDomainPtr)(const char *, unsigned int); | |
203 | |
204 // Used for unit tests, so that a different perfect hash map from the full | |
205 // list is used. Set to NULL to use the Default function. | |
206 static void UseFindDomainFunction(FindDomainPtr function); | |
207 | |
208 // Function that returns a DomainRule given a domain. | |
209 static FindDomainPtr find_domain_function_; | |
210 | |
211 | |
212 DISALLOW_IMPLICIT_CONSTRUCTORS(RegistryControlledDomainService); | |
213 }; | 141 }; |
214 | 142 |
| 143 // This enum is a required parameter to the GetRegistryLength functions |
| 144 // declared for this service. Whenever there is no matching rule in the |
| 145 // effective-TLD data (or in the default data, if the resource failed to |
| 146 // load), the result will be dependent on which enum value was passed in. |
| 147 // If EXCLUDE_UNKNOWN_REGISTRIES was passed in, the resulting registry length |
| 148 // will be 0. If INCLUDE_UNKNOWN_REGISTRIES was passed in, the resulting |
| 149 // registry length will be the length of the last subcomponent (eg. 3 for |
| 150 // foobar.baz). |
| 151 enum NET_EXPORT UnknownRegistryFilter { |
| 152 EXCLUDE_UNKNOWN_REGISTRIES = 0, |
| 153 INCLUDE_UNKNOWN_REGISTRIES |
| 154 }; |
| 155 |
| 156 // Returns the registered, organization-identifying host and all its registry |
| 157 // information, but no subdomains, from the given GURL. Returns an empty |
| 158 // string if the GURL is invalid, has no host (e.g. a file: URL), has multiple |
| 159 // trailing dots, is an IP address, has only one subcomponent (i.e. no dots |
| 160 // other than leading/trailing ones), or is itself a recognized registry |
| 161 // identifier. If no matching rule is found in the effective-TLD data (or in |
| 162 // the default data, if the resource failed to load), the last subcomponent of |
| 163 // the host is assumed to be the registry. |
| 164 // |
| 165 // Examples: |
| 166 // http://www.google.com/file.html -> "google.com" (com) |
| 167 // http://..google.com/file.html -> "google.com" (com) |
| 168 // http://google.com./file.html -> "google.com." (com) |
| 169 // http://a.b.co.uk/file.html -> "b.co.uk" (co.uk) |
| 170 // file:///C:/bar.html -> "" (no host) |
| 171 // http://foo.com../file.html -> "" (multiple trailing dots) |
| 172 // http://192.168.0.1/file.html -> "" (IP address) |
| 173 // http://bar/file.html -> "" (no subcomponents) |
| 174 // http://co.uk/file.html -> "" (host is a registry) |
| 175 // http://foo.bar/file.html -> "foo.bar" (no rule; assume bar) |
| 176 NET_EXPORT std::string GetDomainAndRegistry(const GURL& gurl, |
| 177 PrivateRegistryFilter filter); |
| 178 |
| 179 // Like the GURL version, but takes a host (which is canonicalized internally) |
| 180 // instead of a full GURL. |
| 181 NET_EXPORT std::string GetDomainAndRegistry(const std::string& host, |
| 182 PrivateRegistryFilter filter); |
| 183 |
| 184 // This convenience function returns true if the two GURLs both have hosts |
| 185 // and one of the following is true: |
| 186 // * They each have a known domain and registry, and it is the same for both |
| 187 // URLs. Note that this means the trailing dot, if any, must match too. |
| 188 // * They don't have known domains/registries, but the hosts are identical. |
| 189 // Effectively, callers can use this function to check whether the input URLs |
| 190 // represent hosts "on the same site". |
| 191 NET_EXPORT bool SameDomainOrHost(const GURL& gurl1, const GURL& gurl2, |
| 192 PrivateRegistryFilter filter); |
| 193 |
| 194 // Finds the length in bytes of the registrar portion of the host in the |
| 195 // given GURL. Returns std::string::npos if the GURL is invalid or has no |
| 196 // host (e.g. a file: URL). Returns 0 if the GURL has multiple trailing dots, |
| 197 // is an IP address, has no subcomponents, or is itself a recognized registry |
| 198 // identifier. The result is also dependent on the UnknownRegistryFilter. |
| 199 // If no matching rule is found in the effective-TLD data (or in |
| 200 // the default data, if the resource failed to load), returns 0 if |
| 201 // |unknown_filter| is EXCLUDE_UNKNOWN_REGISTRIES, or the length of the last |
| 202 // subcomponent if |unknown_filter| is INCLUDE_UNKNOWN_REGISTRIES. |
| 203 // |
| 204 // Examples: |
| 205 // http://www.google.com/file.html -> 3 (com) |
| 206 // http://..google.com/file.html -> 3 (com) |
| 207 // http://google.com./file.html -> 4 (com) |
| 208 // http://a.b.co.uk/file.html -> 5 (co.uk) |
| 209 // file:///C:/bar.html -> std::string::npos (no host) |
| 210 // http://foo.com../file.html -> 0 (multiple trailing |
| 211 // dots) |
| 212 // http://192.168.0.1/file.html -> 0 (IP address) |
| 213 // http://bar/file.html -> 0 (no subcomponents) |
| 214 // http://co.uk/file.html -> 0 (host is a registry) |
| 215 // http://foo.bar/file.html -> 0 or 3, depending (no rule; assume |
| 216 // bar) |
| 217 NET_EXPORT size_t GetRegistryLength(const GURL& gurl, |
| 218 UnknownRegistryFilter unknown_filter, |
| 219 PrivateRegistryFilter private_filter); |
| 220 |
| 221 // Like the GURL version, but takes a host (which is canonicalized internally) |
| 222 // instead of a full GURL. |
| 223 NET_EXPORT size_t GetRegistryLength(const std::string& host, |
| 224 UnknownRegistryFilter unknown_filter, |
| 225 PrivateRegistryFilter private_filter); |
| 226 |
| 227 typedef const struct DomainRule* (*FindDomainPtr)(const char *, unsigned int); |
| 228 |
| 229 // Used for unit tests, so that a different perfect hash map from the full |
| 230 // list is used. Set to NULL to use the Default function. |
| 231 NET_EXPORT_PRIVATE void SetFindDomainFunctionForTesting( |
| 232 FindDomainPtr fn); |
| 233 |
| 234 } // namespace registry_controlled_domains |
215 } // namespace net | 235 } // namespace net |
216 | 236 |
217 #endif // NET_BASE_REGISTRY_CONTROLLED_DOMAINS_REGISTRY_CONTROLLED_DOMAIN_H_ | 237 #endif // NET_BASE_REGISTRY_CONTROLLED_DOMAINS_REGISTRY_CONTROLLED_DOMAIN_H_ |
OLD | NEW |