Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(312)

Side by Side Diff: chrome/browser/search_engines/template_url.h

Issue 10908226: Introduces a search term extraction mechanism working for arbitrary search providers. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Rebased Created 8 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #ifndef CHROME_BROWSER_SEARCH_ENGINES_TEMPLATE_URL_H_ 5 #ifndef CHROME_BROWSER_SEARCH_ENGINES_TEMPLATE_URL_H_
6 #define CHROME_BROWSER_SEARCH_ENGINES_TEMPLATE_URL_H_ 6 #define CHROME_BROWSER_SEARCH_ENGINES_TEMPLATE_URL_H_
7 7
8 #include <string> 8 #include <string>
9 #include <vector> 9 #include <vector>
10 10
11 #include "base/gtest_prod_util.h" 11 #include "base/gtest_prod_util.h"
12 #include "base/time.h" 12 #include "base/time.h"
13 #include "chrome/browser/search_engines/template_url_id.h" 13 #include "chrome/browser/search_engines/template_url_id.h"
14 #include "googleurl/src/gurl.h" 14 #include "googleurl/src/gurl.h"
15 #include "googleurl/src/url_parse.h"
15 16
16 class Profile; 17 class Profile;
17 class SearchTermsData; 18 class SearchTermsData;
18 class TemplateURL; 19 class TemplateURL;
19 20
20 21
21 // TemplateURLRef ------------------------------------------------------------- 22 // TemplateURLRef -------------------------------------------------------------
22 23
23 // A TemplateURLRef represents a single URL within the larger TemplateURL class 24 // A TemplateURLRef represents a single URL within the larger TemplateURL class
24 // (which represents an entire "search engine", see below). If 25 // (which represents an entire "search engine", see below). If
25 // SupportsReplacement() is true, this URL has placeholders in it, for which 26 // SupportsReplacement() is true, this URL has placeholders in it, for which
26 // callers can substitute values to get a "real" URL using ReplaceSearchTerms(). 27 // callers can substitute values to get a "real" URL using ReplaceSearchTerms().
27 // 28 //
28 // TemplateURLRefs always have a non-NULL |owner_| TemplateURL, which they 29 // TemplateURLRefs always have a non-NULL |owner_| TemplateURL, which they
29 // access in order to get at important data like the underlying URL string or 30 // access in order to get at important data like the underlying URL string or
30 // the associated Profile. 31 // the associated Profile.
31 class TemplateURLRef { 32 class TemplateURLRef {
32 public: 33 public:
33 // Magic numbers to pass to ReplaceSearchTerms() for the |accepted_suggestion| 34 // Magic numbers to pass to ReplaceSearchTerms() for the |accepted_suggestion|
34 // parameter. Most callers aren't using Suggest capabilities and should just 35 // parameter. Most callers aren't using Suggest capabilities and should just
35 // pass NO_SUGGESTIONS_AVAILABLE. 36 // pass NO_SUGGESTIONS_AVAILABLE.
36 // NOTE: Because positive values are meaningful, make sure these are negative! 37 // NOTE: Because positive values are meaningful, make sure these are negative!
37 enum AcceptedSuggestion { 38 enum AcceptedSuggestion {
38 NO_SUGGESTION_CHOSEN = -1, 39 NO_SUGGESTION_CHOSEN = -1,
39 NO_SUGGESTIONS_AVAILABLE = -2, 40 NO_SUGGESTIONS_AVAILABLE = -2,
40 }; 41 };
41 42
42 // Which kind of URL within our owner we are. This allows us to get at the 43 // Which kind of URL within our owner we are. This allows us to get at the
43 // correct string field. 44 // correct string field. Use |INDEXED| to indicate that the numerical |index_|
45 // should be used instead.
44 enum Type { 46 enum Type {
45 SEARCH, 47 SEARCH,
46 SUGGEST, 48 SUGGEST,
47 INSTANT, 49 INSTANT,
50 INDEXED
48 }; 51 };
49 52
50 // This struct encapsulates arguments passed to 53 // This struct encapsulates arguments passed to
51 // TemplateURLRef::ReplaceSearchTerms methods. By default, only search_terms 54 // TemplateURLRef::ReplaceSearchTerms methods. By default, only search_terms
52 // is required and is passed in the constructor. 55 // is required and is passed in the constructor.
53 struct SearchTermsArgs { 56 struct SearchTermsArgs {
54 explicit SearchTermsArgs(const string16& search_terms); 57 explicit SearchTermsArgs(const string16& search_terms);
55 58
56 // The search terms (query). 59 // The search terms (query).
57 const string16 search_terms; 60 const string16 search_terms;
58 // The original (input) query. 61 // The original (input) query.
59 string16 original_query; 62 string16 original_query;
60 // The optional assisted query stats, aka AQS, used for logging purposes. 63 // The optional assisted query stats, aka AQS, used for logging purposes.
61 // This string contains impressions of all autocomplete matches shown 64 // This string contains impressions of all autocomplete matches shown
62 // at the query submission time. For privacy reasons, we require the 65 // at the query submission time. For privacy reasons, we require the
63 // search provider to support HTTPS protocol in order to receive the AQS 66 // search provider to support HTTPS protocol in order to receive the AQS
64 // param. 67 // param.
65 // For more details, see http://goto.google.com/binary-clients-logging . 68 // For more details, see http://goto.google.com/binary-clients-logging .
66 std::string assisted_query_stats; 69 std::string assisted_query_stats;
67 70
68 // TODO: Remove along with "aq" CGI param. 71 // TODO: Remove along with "aq" CGI param.
69 int accepted_suggestion; 72 int accepted_suggestion;
70 }; 73 };
71 74
72 TemplateURLRef(TemplateURL* owner, Type type); 75 TemplateURLRef(TemplateURL* owner, Type type);
76 TemplateURLRef(TemplateURL* owner, size_t index_in_owner);
73 ~TemplateURLRef(); 77 ~TemplateURLRef();
74 78
75 // Returns the raw URL. None of the parameters will have been replaced. 79 // Returns the raw URL. None of the parameters will have been replaced.
76 std::string GetURL() const; 80 std::string GetURL() const;
77 81
78 // Returns true if this URL supports replacement. 82 // Returns true if this URL supports replacement.
79 bool SupportsReplacement() const; 83 bool SupportsReplacement() const;
80 84
81 // Like SupportsReplacement but usable on threads other than the UI thread. 85 // Like SupportsReplacement but usable on threads other than the UI thread.
82 bool SupportsReplacementUsingTermsData( 86 bool SupportsReplacementUsingTermsData(
(...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after
121 // the key of the search term, otherwise this returns an empty string. 125 // the key of the search term, otherwise this returns an empty string.
122 const std::string& GetSearchTermKey() const; 126 const std::string& GetSearchTermKey() const;
123 127
124 // Converts the specified term in our owner's encoding to a string16. 128 // Converts the specified term in our owner's encoding to a string16.
125 string16 SearchTermToString16(const std::string& term) const; 129 string16 SearchTermToString16(const std::string& term) const;
126 130
127 // Returns true if this TemplateURLRef has a replacement term of 131 // Returns true if this TemplateURLRef has a replacement term of
128 // {google:baseURL} or {google:baseSuggestURL}. 132 // {google:baseURL} or {google:baseSuggestURL}.
129 bool HasGoogleBaseURLs() const; 133 bool HasGoogleBaseURLs() const;
130 134
135 // Extract keywords from the provided |url| using the template URL referred
136 // to. In case of failure, an empty string is returned.
137 string16 ExtractSearchTermsFromURL(const GURL& url) const;
138
131 private: 139 private:
132 friend class TemplateURL; 140 friend class TemplateURL;
133 FRIEND_TEST_ALL_PREFIXES(TemplateURLTest, SetPrepopulatedAndParse); 141 FRIEND_TEST_ALL_PREFIXES(TemplateURLTest, SetPrepopulatedAndParse);
134 FRIEND_TEST_ALL_PREFIXES(TemplateURLTest, ParseParameterKnown); 142 FRIEND_TEST_ALL_PREFIXES(TemplateURLTest, ParseParameterKnown);
135 FRIEND_TEST_ALL_PREFIXES(TemplateURLTest, ParseParameterUnknown); 143 FRIEND_TEST_ALL_PREFIXES(TemplateURLTest, ParseParameterUnknown);
136 FRIEND_TEST_ALL_PREFIXES(TemplateURLTest, ParseURLEmpty); 144 FRIEND_TEST_ALL_PREFIXES(TemplateURLTest, ParseURLEmpty);
137 FRIEND_TEST_ALL_PREFIXES(TemplateURLTest, ParseURLNoTemplateEnd); 145 FRIEND_TEST_ALL_PREFIXES(TemplateURLTest, ParseURLNoTemplateEnd);
138 FRIEND_TEST_ALL_PREFIXES(TemplateURLTest, ParseURLNoKnownParameters); 146 FRIEND_TEST_ALL_PREFIXES(TemplateURLTest, ParseURLNoKnownParameters);
139 FRIEND_TEST_ALL_PREFIXES(TemplateURLTest, ParseURLTwoParameters); 147 FRIEND_TEST_ALL_PREFIXES(TemplateURLTest, ParseURLTwoParameters);
140 FRIEND_TEST_ALL_PREFIXES(TemplateURLTest, ParseURLNestedParameter); 148 FRIEND_TEST_ALL_PREFIXES(TemplateURLTest, ParseURLNestedParameter);
(...skipping 58 matching lines...) Expand 10 before | Expand all | Expand 10 after
199 void ParseIfNecessary() const; 207 void ParseIfNecessary() const;
200 208
201 // Like ParseIfNecessary but usable on threads other than the UI thread. 209 // Like ParseIfNecessary but usable on threads other than the UI thread.
202 void ParseIfNecessaryUsingTermsData( 210 void ParseIfNecessaryUsingTermsData(
203 const SearchTermsData& search_terms_data) const; 211 const SearchTermsData& search_terms_data) const;
204 212
205 // Extracts the query key and host from the url. 213 // Extracts the query key and host from the url.
206 void ParseHostAndSearchTermKey( 214 void ParseHostAndSearchTermKey(
207 const SearchTermsData& search_terms_data) const; 215 const SearchTermsData& search_terms_data) const;
208 216
217 // Extract query key and host given a list of parameters coming from the URL
218 // query or ref.
219 void FindSearchTermsKey(const std::string& params) const;
220
209 // The TemplateURL that contains us. This should outlive us. 221 // The TemplateURL that contains us. This should outlive us.
210 TemplateURL* const owner_; 222 TemplateURL* const owner_;
211 223
212 // What kind of URL we are. 224 // What kind of URL we are.
213 const Type type_; 225 const Type type_;
214 226
227 // If |type_| is |INDEXED|, this |index_| is used instead to refer to a URL
228 // within our owner.
229 const size_t index_in_owner_;
230
215 // Whether the URL has been parsed. 231 // Whether the URL has been parsed.
216 mutable bool parsed_; 232 mutable bool parsed_;
217 233
218 // Whether the url was successfully parsed. 234 // Whether the url was successfully parsed.
219 mutable bool valid_; 235 mutable bool valid_;
220 236
221 // The parsed URL. All terms have been stripped out of this with 237 // The parsed URL. All terms have been stripped out of this with
222 // replacements_ giving the index of the terms to replace. 238 // replacements_ giving the index of the terms to replace.
223 mutable std::string parsed_url_; 239 mutable std::string parsed_url_;
224 240
225 // Do we support replacement? 241 // Do we support replacement?
226 mutable bool supports_replacements_; 242 mutable bool supports_replacements_;
227 243
228 // The replaceable parts of url (parsed_url_). These are ordered by index 244 // The replaceable parts of url (parsed_url_). These are ordered by index
229 // into the string, and may be empty. 245 // into the string, and may be empty.
230 mutable Replacements replacements_; 246 mutable Replacements replacements_;
231 247
232 // Host, path and key of the search term. These are only set if the url 248 // Host, path, key and location of the search term. These are only set if the
233 // contains one search term. 249 // url contains one search term.
234 mutable std::string host_; 250 mutable std::string host_;
235 mutable std::string path_; 251 mutable std::string path_;
236 mutable std::string search_term_key_; 252 mutable std::string search_term_key_;
253 mutable url_parse::Parsed::ComponentType search_term_key_location_;
237 254
238 // Whether the contained URL is a pre-populated URL. 255 // Whether the contained URL is a pre-populated URL.
239 bool prepopulated_; 256 bool prepopulated_;
240 257
241 DISALLOW_COPY_AND_ASSIGN(TemplateURLRef); 258 DISALLOW_COPY_AND_ASSIGN(TemplateURLRef);
242 }; 259 };
243 260
244 261
245 // TemplateURLData ------------------------------------------------------------ 262 // TemplateURLData ------------------------------------------------------------
246 263
(...skipping 11 matching lines...) Expand all
258 275
259 // The shortcut for this TemplateURL. |keyword| must be non-empty. 276 // The shortcut for this TemplateURL. |keyword| must be non-empty.
260 void SetKeyword(const string16& keyword); 277 void SetKeyword(const string16& keyword);
261 const string16& keyword() const { return keyword_; } 278 const string16& keyword() const { return keyword_; }
262 279
263 // The raw URL for the TemplateURL, which may not be valid as-is (e.g. because 280 // The raw URL for the TemplateURL, which may not be valid as-is (e.g. because
264 // it requires substitutions first). This must be non-empty. 281 // it requires substitutions first). This must be non-empty.
265 void SetURL(const std::string& url); 282 void SetURL(const std::string& url);
266 const std::string& url() const { return url_; } 283 const std::string& url() const { return url_; }
267 284
285 // Alternate URL patterns that can be used to match search terms.
286 // The URLs must not contain commas.
287 const std::vector<std::string>& alternate_urls() const {
288 return alternate_urls_;
289 }
290
291 // Returns a serialized version of |alternate_urls| as a comma separated list.
292 std::string SerializeAlternateURLs() const;
293
294 // Deserialize a comma separated list of URLs into |alternate_urls|.
295 void DeserializeAndSetAlternateURLs(const std::string& alternate_urls);
296
268 // Optional additional raw URLs. 297 // Optional additional raw URLs.
269 std::string suggestions_url; 298 std::string suggestions_url;
270 std::string instant_url; 299 std::string instant_url;
271 300
272 // Optional favicon for the TemplateURL. 301 // Optional favicon for the TemplateURL.
273 GURL favicon_url; 302 GURL favicon_url;
274 303
275 // URL to the OSD file this came from. May be empty. 304 // URL to the OSD file this came from. May be empty.
276 GURL originating_url; 305 GURL originating_url;
277 306
(...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after
319 int usage_count; 348 int usage_count;
320 349
321 // If this TemplateURL comes from prepopulated data the prepopulate_id is > 0. 350 // If this TemplateURL comes from prepopulated data the prepopulate_id is > 0.
322 int prepopulate_id; 351 int prepopulate_id;
323 352
324 // The primary unique identifier for Sync. This set on all TemplateURLs 353 // The primary unique identifier for Sync. This set on all TemplateURLs
325 // regardless of whether they have been associated with Sync. 354 // regardless of whether they have been associated with Sync.
326 std::string sync_guid; 355 std::string sync_guid;
327 356
328 private: 357 private:
358 FRIEND_TEST_ALL_PREFIXES(TemplateURLTest, SerializeAlternateURLs);
359
329 // Private so we can enforce using the setters and thus enforce that these 360 // Private so we can enforce using the setters and thus enforce that these
330 // fields are never empty. 361 // fields are never empty.
331 string16 keyword_; 362 string16 keyword_;
332 std::string url_; 363 std::string url_;
364
365 // A list of URL patterns that can be used, in addition to |url_| and
366 // |instant_url|, to extract search terms from a URL. Enforce use of a setter
367 // since |alternate_urls| cannot contain commas as they are serialized as a
368 // comma-separated list.
369 std::vector<std::string> alternate_urls_;
333 }; 370 };
334 371
335 372
336 // TemplateURL ---------------------------------------------------------------- 373 // TemplateURL ----------------------------------------------------------------
337 374
338 // A TemplateURL represents a single "search engine", defined primarily as a 375 // A TemplateURL represents a single "search engine", defined primarily as a
339 // subset of the Open Search Description Document 376 // subset of the Open Search Description Document
340 // (http://www.opensearch.org/Specifications/OpenSearch) plus some extensions. 377 // (http://www.opensearch.org/Specifications/OpenSearch) plus some extensions.
341 // One TemplateURL contains several TemplateURLRefs, which correspond to various 378 // One TemplateURL contains several TemplateURLRefs, which correspond to various
342 // different capabilities (e.g. doing searches or getting suggestions), as well 379 // different capabilities (e.g. doing searches or getting suggestions), as well
(...skipping 72 matching lines...) Expand 10 before | Expand all | Expand 10 after
415 // update the keyword to reflect the current Google base URL TLD. 452 // update the keyword to reflect the current Google base URL TLD.
416 bool IsGoogleSearchURLWithReplaceableKeyword() const; 453 bool IsGoogleSearchURLWithReplaceableKeyword() const;
417 454
418 // Returns true if the keywords match or if 455 // Returns true if the keywords match or if
419 // IsGoogleSearchURLWithReplaceableKeyword() is true for both TemplateURLs. 456 // IsGoogleSearchURLWithReplaceableKeyword() is true for both TemplateURLs.
420 bool HasSameKeywordAs(const TemplateURL& other) const; 457 bool HasSameKeywordAs(const TemplateURL& other) const;
421 458
422 std::string GetExtensionId() const; 459 std::string GetExtensionId() const;
423 bool IsExtensionKeyword() const; 460 bool IsExtensionKeyword() const;
424 461
462 // Returns the total number of URL comprised in this template, including
463 // search, instant and alternate URLs.
464 size_t URLCount() const;
465
466 // Obtain the URL given an |index|. Alternate URLS start at index 0, followed
467 // by instant and then the regular search URL. This allows us to prioritize
468 // some pattern, so if a search term is present both in the query and the ref,
469 // we can prioritize the ref one. The |index| must be less than URLCount().
470 const std::string& GetURL(size_t index) const;
471
472 // Use the various URL comprised in this template to match the provided |url|
473 // and extract keywords from it. If successful the search terms are rerturned.
474 // In case of failure, an empty string is returned.
475 string16 ExtractSearchTermsFromURL(const GURL& url);
476
425 private: 477 private:
426 friend class TemplateURLService; 478 friend class TemplateURLService;
427 479
428 void CopyFrom(const TemplateURL& other); 480 void CopyFrom(const TemplateURL& other);
429 481
430 void SetURL(const std::string& url); 482 void SetURL(const std::string& url);
431 void SetPrepopulateId(int id); 483 void SetPrepopulateId(int id);
432 484
433 // Resets the keyword if IsGoogleSearchURLWithReplaceableKeyword() or |force|. 485 // Resets the keyword if IsGoogleSearchURLWithReplaceableKeyword() or |force|.
434 // The |force| parameter is useful when the existing keyword is known to be 486 // The |force| parameter is useful when the existing keyword is known to be
435 // a placeholder. The resulting keyword is generated using 487 // a placeholder. The resulting keyword is generated using
436 // TemplateURLService::GenerateSearchURL() and 488 // TemplateURLService::GenerateSearchURL() and
437 // TemplateURLService::GenerateKeyword(). 489 // TemplateURLService::GenerateKeyword().
438 void ResetKeywordIfNecessary(bool force); 490 void ResetKeywordIfNecessary(bool force);
439 491
440 Profile* profile_; 492 Profile* profile_;
441 TemplateURLData data_; 493 TemplateURLData data_;
442 TemplateURLRef url_ref_; 494 TemplateURLRef url_ref_;
443 TemplateURLRef suggestions_url_ref_; 495 TemplateURLRef suggestions_url_ref_;
444 TemplateURLRef instant_url_ref_; 496 TemplateURLRef instant_url_ref_;
445 497
446 // TODO(sky): Add date last parsed OSD file. 498 // TODO(sky): Add date last parsed OSD file.
447 499
448 DISALLOW_COPY_AND_ASSIGN(TemplateURL); 500 DISALLOW_COPY_AND_ASSIGN(TemplateURL);
449 }; 501 };
450 502
451 #endif // CHROME_BROWSER_SEARCH_ENGINES_TEMPLATE_URL_H_ 503 #endif // CHROME_BROWSER_SEARCH_ENGINES_TEMPLATE_URL_H_
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698