Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(153)

Side by Side Diff: chrome/browser/search_engines/template_url.h

Issue 10908226: Introduces a search term extraction mechanism working for arbitrary search providers. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Fixed keyword_table_unittest Created 8 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #ifndef CHROME_BROWSER_SEARCH_ENGINES_TEMPLATE_URL_H_ 5 #ifndef CHROME_BROWSER_SEARCH_ENGINES_TEMPLATE_URL_H_
6 #define CHROME_BROWSER_SEARCH_ENGINES_TEMPLATE_URL_H_ 6 #define CHROME_BROWSER_SEARCH_ENGINES_TEMPLATE_URL_H_
7 7
8 #include <string> 8 #include <string>
9 #include <vector> 9 #include <vector>
10 10
11 #include "base/gtest_prod_util.h" 11 #include "base/gtest_prod_util.h"
12 #include "base/time.h" 12 #include "base/time.h"
13 #include "chrome/browser/common/url_database/template_url_id.h" 13 #include "chrome/browser/common/url_database/template_url_id.h"
14 #include "googleurl/src/gurl.h" 14 #include "googleurl/src/gurl.h"
15 #include "googleurl/src/url_parse.h"
15 16
16 class Profile; 17 class Profile;
17 class SearchTermsData; 18 class SearchTermsData;
18 class TemplateURL; 19 class TemplateURL;
19 20
20 21
21 // TemplateURLRef ------------------------------------------------------------- 22 // TemplateURLRef -------------------------------------------------------------
22 23
23 // A TemplateURLRef represents a single URL within the larger TemplateURL class 24 // A TemplateURLRef represents a single URL within the larger TemplateURL class
24 // (which represents an entire "search engine", see below). If 25 // (which represents an entire "search engine", see below). If
25 // SupportsReplacement() is true, this URL has placeholders in it, for which 26 // SupportsReplacement() is true, this URL has placeholders in it, for which
26 // callers can substitute values to get a "real" URL using ReplaceSearchTerms(). 27 // callers can substitute values to get a "real" URL using ReplaceSearchTerms().
27 // 28 //
28 // TemplateURLRefs always have a non-NULL |owner_| TemplateURL, which they 29 // TemplateURLRefs always have a non-NULL |owner_| TemplateURL, which they
29 // access in order to get at important data like the underlying URL string or 30 // access in order to get at important data like the underlying URL string or
30 // the associated Profile. 31 // the associated Profile.
31 class TemplateURLRef { 32 class TemplateURLRef {
32 public: 33 public:
33 // Magic numbers to pass to ReplaceSearchTerms() for the |accepted_suggestion| 34 // Magic numbers to pass to ReplaceSearchTerms() for the |accepted_suggestion|
34 // parameter. Most callers aren't using Suggest capabilities and should just 35 // parameter. Most callers aren't using Suggest capabilities and should just
35 // pass NO_SUGGESTIONS_AVAILABLE. 36 // pass NO_SUGGESTIONS_AVAILABLE.
36 // NOTE: Because positive values are meaningful, make sure these are negative! 37 // NOTE: Because positive values are meaningful, make sure these are negative!
37 enum AcceptedSuggestion { 38 enum AcceptedSuggestion {
38 NO_SUGGESTION_CHOSEN = -1, 39 NO_SUGGESTION_CHOSEN = -1,
39 NO_SUGGESTIONS_AVAILABLE = -2, 40 NO_SUGGESTIONS_AVAILABLE = -2,
40 }; 41 };
41 42
42 // Which kind of URL within our owner we are. This allows us to get at the 43 // Which kind of URL within our owner we are. This allows us to get at the
43 // correct string field. 44 // correct string field. Use |INDEXED| to indicate that the numerical |index_|
dhollowa 2012/09/28 18:06:29 nit: |index_in_owner_|
beaudoin 2012/10/02 17:43:29 Good catch! :) Done.
45 // should be used instead.
44 enum Type { 46 enum Type {
45 SEARCH, 47 SEARCH,
46 SUGGEST, 48 SUGGEST,
47 INSTANT, 49 INSTANT,
50 INDEXED
48 }; 51 };
49 52
50 // This struct encapsulates arguments passed to 53 // This struct encapsulates arguments passed to
51 // TemplateURLRef::ReplaceSearchTerms methods. By default, only search_terms 54 // TemplateURLRef::ReplaceSearchTerms methods. By default, only search_terms
52 // is required and is passed in the constructor. 55 // is required and is passed in the constructor.
53 struct SearchTermsArgs { 56 struct SearchTermsArgs {
54 explicit SearchTermsArgs(const string16& search_terms); 57 explicit SearchTermsArgs(const string16& search_terms);
55 58
56 // The search terms (query). 59 // The search terms (query).
57 const string16 search_terms; 60 const string16 search_terms;
58 // The original (input) query. 61 // The original (input) query.
59 string16 original_query; 62 string16 original_query;
60 // The optional assisted query stats, aka AQS, used for logging purposes. 63 // The optional assisted query stats, aka AQS, used for logging purposes.
61 // This string contains impressions of all autocomplete matches shown 64 // This string contains impressions of all autocomplete matches shown
62 // at the query submission time. For privacy reasons, we require the 65 // at the query submission time. For privacy reasons, we require the
63 // search provider to support HTTPS protocol in order to receive the AQS 66 // search provider to support HTTPS protocol in order to receive the AQS
64 // param. 67 // param.
65 // For more details, see http://goto.google.com/binary-clients-logging . 68 // For more details, see http://goto.google.com/binary-clients-logging .
66 std::string assisted_query_stats; 69 std::string assisted_query_stats;
67 70
68 // TODO: Remove along with "aq" CGI param. 71 // TODO: Remove along with "aq" CGI param.
69 int accepted_suggestion; 72 int accepted_suggestion;
70 }; 73 };
71 74
72 TemplateURLRef(TemplateURL* owner, Type type); 75 TemplateURLRef(TemplateURL* owner, Type type);
76 TemplateURLRef(TemplateURL* owner, size_t index_in_owner);
73 ~TemplateURLRef(); 77 ~TemplateURLRef();
74 78
75 // Returns the raw URL. None of the parameters will have been replaced. 79 // Returns the raw URL. None of the parameters will have been replaced.
76 std::string GetURL() const; 80 std::string GetURL() const;
77 81
78 // Returns true if this URL supports replacement. 82 // Returns true if this URL supports replacement.
79 bool SupportsReplacement() const; 83 bool SupportsReplacement() const;
80 84
81 // Like SupportsReplacement but usable on threads other than the UI thread. 85 // Like SupportsReplacement but usable on threads other than the UI thread.
82 bool SupportsReplacementUsingTermsData( 86 bool SupportsReplacementUsingTermsData(
(...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after
121 // the key of the search term, otherwise this returns an empty string. 125 // the key of the search term, otherwise this returns an empty string.
122 const std::string& GetSearchTermKey() const; 126 const std::string& GetSearchTermKey() const;
123 127
124 // Converts the specified term in our owner's encoding to a string16. 128 // Converts the specified term in our owner's encoding to a string16.
125 string16 SearchTermToString16(const std::string& term) const; 129 string16 SearchTermToString16(const std::string& term) const;
126 130
127 // Returns true if this TemplateURLRef has a replacement term of 131 // Returns true if this TemplateURLRef has a replacement term of
128 // {google:baseURL} or {google:baseSuggestURL}. 132 // {google:baseURL} or {google:baseSuggestURL}.
129 bool HasGoogleBaseURLs() const; 133 bool HasGoogleBaseURLs() const;
130 134
135 // Use the alternate URLs and the search URL to match the provided |url|
136 // and extract |search_terms| from it. Returns false and an empty
137 // |search_terms| if no search terms can be matched.
138 bool ExtractSearchTermsFromURL(const GURL& url,
139 string16* search_terms) const;
140
131 private: 141 private:
132 friend class TemplateURL; 142 friend class TemplateURL;
133 FRIEND_TEST_ALL_PREFIXES(TemplateURLTest, SetPrepopulatedAndParse); 143 FRIEND_TEST_ALL_PREFIXES(TemplateURLTest, SetPrepopulatedAndParse);
134 FRIEND_TEST_ALL_PREFIXES(TemplateURLTest, ParseParameterKnown); 144 FRIEND_TEST_ALL_PREFIXES(TemplateURLTest, ParseParameterKnown);
135 FRIEND_TEST_ALL_PREFIXES(TemplateURLTest, ParseParameterUnknown); 145 FRIEND_TEST_ALL_PREFIXES(TemplateURLTest, ParseParameterUnknown);
136 FRIEND_TEST_ALL_PREFIXES(TemplateURLTest, ParseURLEmpty); 146 FRIEND_TEST_ALL_PREFIXES(TemplateURLTest, ParseURLEmpty);
137 FRIEND_TEST_ALL_PREFIXES(TemplateURLTest, ParseURLNoTemplateEnd); 147 FRIEND_TEST_ALL_PREFIXES(TemplateURLTest, ParseURLNoTemplateEnd);
138 FRIEND_TEST_ALL_PREFIXES(TemplateURLTest, ParseURLNoKnownParameters); 148 FRIEND_TEST_ALL_PREFIXES(TemplateURLTest, ParseURLNoKnownParameters);
139 FRIEND_TEST_ALL_PREFIXES(TemplateURLTest, ParseURLTwoParameters); 149 FRIEND_TEST_ALL_PREFIXES(TemplateURLTest, ParseURLTwoParameters);
140 FRIEND_TEST_ALL_PREFIXES(TemplateURLTest, ParseURLNestedParameter); 150 FRIEND_TEST_ALL_PREFIXES(TemplateURLTest, ParseURLNestedParameter);
(...skipping 58 matching lines...) Expand 10 before | Expand all | Expand 10 after
199 void ParseIfNecessary() const; 209 void ParseIfNecessary() const;
200 210
201 // Like ParseIfNecessary but usable on threads other than the UI thread. 211 // Like ParseIfNecessary but usable on threads other than the UI thread.
202 void ParseIfNecessaryUsingTermsData( 212 void ParseIfNecessaryUsingTermsData(
203 const SearchTermsData& search_terms_data) const; 213 const SearchTermsData& search_terms_data) const;
204 214
205 // Extracts the query key and host from the url. 215 // Extracts the query key and host from the url.
206 void ParseHostAndSearchTermKey( 216 void ParseHostAndSearchTermKey(
207 const SearchTermsData& search_terms_data) const; 217 const SearchTermsData& search_terms_data) const;
208 218
219 // Extract query key and host given a list of parameters coming from the URL
220 // query or ref.
221 void FindSearchTermsKey(const std::string& params) const;
222
209 // The TemplateURL that contains us. This should outlive us. 223 // The TemplateURL that contains us. This should outlive us.
210 TemplateURL* const owner_; 224 TemplateURL* const owner_;
211 225
212 // What kind of URL we are. 226 // What kind of URL we are.
213 const Type type_; 227 const Type type_;
214 228
229 // If |type_| is |INDEXED|, this |index_| is used instead to refer to a URL
dhollowa 2012/09/28 18:06:29 nit: |index_in_owner_|
beaudoin 2012/10/02 17:43:29 Done.
230 // within our owner.
231 const size_t index_in_owner_;
232
215 // Whether the URL has been parsed. 233 // Whether the URL has been parsed.
216 mutable bool parsed_; 234 mutable bool parsed_;
217 235
218 // Whether the url was successfully parsed. 236 // Whether the url was successfully parsed.
219 mutable bool valid_; 237 mutable bool valid_;
220 238
221 // The parsed URL. All terms have been stripped out of this with 239 // The parsed URL. All terms have been stripped out of this with
222 // replacements_ giving the index of the terms to replace. 240 // replacements_ giving the index of the terms to replace.
223 mutable std::string parsed_url_; 241 mutable std::string parsed_url_;
224 242
225 // Do we support replacement? 243 // Do we support replacement?
226 mutable bool supports_replacements_; 244 mutable bool supports_replacements_;
227 245
228 // The replaceable parts of url (parsed_url_). These are ordered by index 246 // The replaceable parts of url (parsed_url_). These are ordered by index
229 // into the string, and may be empty. 247 // into the string, and may be empty.
230 mutable Replacements replacements_; 248 mutable Replacements replacements_;
231 249
232 // Host, path and key of the search term. These are only set if the url 250 // Host, path, key and location of the search term. These are only set if the
233 // contains one search term. 251 // url contains one search term.
234 mutable std::string host_; 252 mutable std::string host_;
235 mutable std::string path_; 253 mutable std::string path_;
236 mutable std::string search_term_key_; 254 mutable std::string search_term_key_;
255 mutable url_parse::Parsed::ComponentType search_term_key_location_;
237 256
238 // Whether the contained URL is a pre-populated URL. 257 // Whether the contained URL is a pre-populated URL.
239 bool prepopulated_; 258 bool prepopulated_;
240 259
241 DISALLOW_COPY_AND_ASSIGN(TemplateURLRef); 260 DISALLOW_COPY_AND_ASSIGN(TemplateURLRef);
242 }; 261 };
243 262
244 263
245 // TemplateURLData ------------------------------------------------------------ 264 // TemplateURLData ------------------------------------------------------------
246 265
(...skipping 71 matching lines...) Expand 10 before | Expand all | Expand 10 after
318 // the New Tab page, de-emphasizing the omnibox as "where you go to search". 337 // the New Tab page, de-emphasizing the omnibox as "where you go to search".
319 int usage_count; 338 int usage_count;
320 339
321 // If this TemplateURL comes from prepopulated data the prepopulate_id is > 0. 340 // If this TemplateURL comes from prepopulated data the prepopulate_id is > 0.
322 int prepopulate_id; 341 int prepopulate_id;
323 342
324 // The primary unique identifier for Sync. This set on all TemplateURLs 343 // The primary unique identifier for Sync. This set on all TemplateURLs
325 // regardless of whether they have been associated with Sync. 344 // regardless of whether they have been associated with Sync.
326 std::string sync_guid; 345 std::string sync_guid;
327 346
347 // A list of URL patterns that can be used, in addition to |url_|, to extract
348 // search terms from a URL.
349 std::vector<std::string> alternate_urls;
350
328 private: 351 private:
352 FRIEND_TEST_ALL_PREFIXES(TemplateURLTest, SerializeAlternateURLs);
353
329 // Private so we can enforce using the setters and thus enforce that these 354 // Private so we can enforce using the setters and thus enforce that these
330 // fields are never empty. 355 // fields are never empty.
331 string16 keyword_; 356 string16 keyword_;
332 std::string url_; 357 std::string url_;
333 }; 358 };
334 359
335 360
336 // TemplateURL ---------------------------------------------------------------- 361 // TemplateURL ----------------------------------------------------------------
337 362
338 // A TemplateURL represents a single "search engine", defined primarily as a 363 // A TemplateURL represents a single "search engine", defined primarily as a
(...skipping 23 matching lines...) Expand all
362 const string16& short_name() const { return data_.short_name; } 387 const string16& short_name() const { return data_.short_name; }
363 // An accessor for the short_name, but adjusted so it can be appropriately 388 // An accessor for the short_name, but adjusted so it can be appropriately
364 // displayed even if it is LTR and the UI is RTL. 389 // displayed even if it is LTR and the UI is RTL.
365 string16 AdjustedShortNameForLocaleDirection() const; 390 string16 AdjustedShortNameForLocaleDirection() const;
366 391
367 const string16& keyword() const { return data_.keyword(); } 392 const string16& keyword() const { return data_.keyword(); }
368 393
369 const std::string& url() const { return data_.url(); } 394 const std::string& url() const { return data_.url(); }
370 const std::string& suggestions_url() const { return data_.suggestions_url; } 395 const std::string& suggestions_url() const { return data_.suggestions_url; }
371 const std::string& instant_url() const { return data_.instant_url; } 396 const std::string& instant_url() const { return data_.instant_url; }
397 const std::vector<std::string>& alternate_urls() const {
398 return data_.alternate_urls;
399 }
372 const GURL& favicon_url() const { return data_.favicon_url; } 400 const GURL& favicon_url() const { return data_.favicon_url; }
373 401
374 const GURL& originating_url() const { return data_.originating_url; } 402 const GURL& originating_url() const { return data_.originating_url; }
375 403
376 bool show_in_default_list() const { return data_.show_in_default_list; } 404 bool show_in_default_list() const { return data_.show_in_default_list; }
377 // Returns true if show_in_default_list() is true and this TemplateURL has a 405 // Returns true if show_in_default_list() is true and this TemplateURL has a
378 // TemplateURLRef that supports replacement. 406 // TemplateURLRef that supports replacement.
379 bool ShowInDefaultList() const; 407 bool ShowInDefaultList() const;
380 408
381 bool safe_for_autoreplace() const { return data_.safe_for_autoreplace; } 409 bool safe_for_autoreplace() const { return data_.safe_for_autoreplace; }
(...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after
415 // update the keyword to reflect the current Google base URL TLD. 443 // update the keyword to reflect the current Google base URL TLD.
416 bool IsGoogleSearchURLWithReplaceableKeyword() const; 444 bool IsGoogleSearchURLWithReplaceableKeyword() const;
417 445
418 // Returns true if the keywords match or if 446 // Returns true if the keywords match or if
419 // IsGoogleSearchURLWithReplaceableKeyword() is true for both TemplateURLs. 447 // IsGoogleSearchURLWithReplaceableKeyword() is true for both TemplateURLs.
420 bool HasSameKeywordAs(const TemplateURL& other) const; 448 bool HasSameKeywordAs(const TemplateURL& other) const;
421 449
422 std::string GetExtensionId() const; 450 std::string GetExtensionId() const;
423 bool IsExtensionKeyword() const; 451 bool IsExtensionKeyword() const;
424 452
453 // Returns the total number of URLs comprised in this template, including
454 // search and alternate URLs.
455 size_t URLCount() const;
456
457 // Obtain the URL given an |index|. Alternate URLS start at index 0, followed
458 // by the regular search URL. This allows us to prioritize some pattern, so if
459 // a search term is present both in the query and the ref, we can prioritize
460 // the ref one. The |index| must be less than URLCount().
461 const std::string& GetURL(size_t index) const;
462
463 // Use the alternate URLs and the search URL to match the provided |url|
464 // and extract |search_terms| from it. Returns false and an empty
465 // |search_terms| if no search terms can be matched or if this is not an
466 // instant extended URL.
467 bool ExtractSearchTermsFromInstantExtendedURL(const GURL& url,
468 string16* search_terms);
469
425 private: 470 private:
426 friend class TemplateURLService; 471 friend class TemplateURLService;
427 472
428 void CopyFrom(const TemplateURL& other); 473 void CopyFrom(const TemplateURL& other);
429 474
430 void SetURL(const std::string& url); 475 void SetURL(const std::string& url);
431 void SetPrepopulateId(int id); 476 void SetPrepopulateId(int id);
432 477
433 // Resets the keyword if IsGoogleSearchURLWithReplaceableKeyword() or |force|. 478 // Resets the keyword if IsGoogleSearchURLWithReplaceableKeyword() or |force|.
434 // The |force| parameter is useful when the existing keyword is known to be 479 // The |force| parameter is useful when the existing keyword is known to be
435 // a placeholder. The resulting keyword is generated using 480 // a placeholder. The resulting keyword is generated using
436 // TemplateURLService::GenerateSearchURL() and 481 // TemplateURLService::GenerateSearchURL() and
437 // TemplateURLService::GenerateKeyword(). 482 // TemplateURLService::GenerateKeyword().
438 void ResetKeywordIfNecessary(bool force); 483 void ResetKeywordIfNecessary(bool force);
439 484
440 Profile* profile_; 485 Profile* profile_;
441 TemplateURLData data_; 486 TemplateURLData data_;
442 TemplateURLRef url_ref_; 487 TemplateURLRef url_ref_;
443 TemplateURLRef suggestions_url_ref_; 488 TemplateURLRef suggestions_url_ref_;
444 TemplateURLRef instant_url_ref_; 489 TemplateURLRef instant_url_ref_;
445 490
446 // TODO(sky): Add date last parsed OSD file. 491 // TODO(sky): Add date last parsed OSD file.
447 492
448 DISALLOW_COPY_AND_ASSIGN(TemplateURL); 493 DISALLOW_COPY_AND_ASSIGN(TemplateURL);
449 }; 494 };
450 495
451 #endif // CHROME_BROWSER_SEARCH_ENGINES_TEMPLATE_URL_H_ 496 #endif // CHROME_BROWSER_SEARCH_ENGINES_TEMPLATE_URL_H_
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698