Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(214)

Side by Side Diff: chrome/browser/search_engines/template_url.h

Issue 10908226: Introduces a search term extraction mechanism working for arbitrary search providers. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Removed version_46.sql, committed separately. Created 8 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #ifndef CHROME_BROWSER_SEARCH_ENGINES_TEMPLATE_URL_H_ 5 #ifndef CHROME_BROWSER_SEARCH_ENGINES_TEMPLATE_URL_H_
6 #define CHROME_BROWSER_SEARCH_ENGINES_TEMPLATE_URL_H_ 6 #define CHROME_BROWSER_SEARCH_ENGINES_TEMPLATE_URL_H_
7 7
8 #include <string> 8 #include <string>
9 #include <vector> 9 #include <vector>
10 10
11 #include "base/gtest_prod_util.h" 11 #include "base/gtest_prod_util.h"
12 #include "base/time.h" 12 #include "base/time.h"
13 #include "chrome/browser/search_engines/template_url_id.h" 13 #include "chrome/browser/search_engines/template_url_id.h"
14 #include "googleurl/src/gurl.h" 14 #include "googleurl/src/gurl.h"
15 #include "googleurl/src/url_parse.h"
15 16
16 class Profile; 17 class Profile;
17 class SearchTermsData; 18 class SearchTermsData;
18 class TemplateURL; 19 class TemplateURL;
19 20
20 21
21 // TemplateURLRef ------------------------------------------------------------- 22 // TemplateURLRef -------------------------------------------------------------
22 23
23 // A TemplateURLRef represents a single URL within the larger TemplateURL class 24 // A TemplateURLRef represents a single URL within the larger TemplateURL class
24 // (which represents an entire "search engine", see below). If 25 // (which represents an entire "search engine", see below). If
25 // SupportsReplacement() is true, this URL has placeholders in it, for which 26 // SupportsReplacement() is true, this URL has placeholders in it, for which
26 // callers can substitute values to get a "real" URL using ReplaceSearchTerms(). 27 // callers can substitute values to get a "real" URL using ReplaceSearchTerms().
27 // 28 //
28 // TemplateURLRefs always have a non-NULL |owner_| TemplateURL, which they 29 // TemplateURLRefs always have a non-NULL |owner_| TemplateURL, which they
29 // access in order to get at important data like the underlying URL string or 30 // access in order to get at important data like the underlying URL string or
30 // the associated Profile. 31 // the associated Profile.
31 class TemplateURLRef { 32 class TemplateURLRef {
32 public: 33 public:
33 // Magic numbers to pass to ReplaceSearchTerms() for the |accepted_suggestion| 34 // Magic numbers to pass to ReplaceSearchTerms() for the |accepted_suggestion|
34 // parameter. Most callers aren't using Suggest capabilities and should just 35 // parameter. Most callers aren't using Suggest capabilities and should just
35 // pass NO_SUGGESTIONS_AVAILABLE. 36 // pass NO_SUGGESTIONS_AVAILABLE.
36 // NOTE: Because positive values are meaningful, make sure these are negative! 37 // NOTE: Because positive values are meaningful, make sure these are negative!
37 enum AcceptedSuggestion { 38 enum AcceptedSuggestion {
38 NO_SUGGESTION_CHOSEN = -1, 39 NO_SUGGESTION_CHOSEN = -1,
39 NO_SUGGESTIONS_AVAILABLE = -2, 40 NO_SUGGESTIONS_AVAILABLE = -2,
40 }; 41 };
41 42
42 // Which kind of URL within our owner we are. This allows us to get at the 43 // Which kind of URL within our owner we are. This allows us to get at the
43 // correct string field. 44 // correct string field. Use |INDEXED| to indicate that the numerical
45 // |index_in_owner_| should be used instead.
44 enum Type { 46 enum Type {
45 SEARCH, 47 SEARCH,
46 SUGGEST, 48 SUGGEST,
47 INSTANT, 49 INSTANT,
50 INDEXED
48 }; 51 };
49 52
50 // This struct encapsulates arguments passed to 53 // This struct encapsulates arguments passed to
51 // TemplateURLRef::ReplaceSearchTerms methods. By default, only search_terms 54 // TemplateURLRef::ReplaceSearchTerms methods. By default, only search_terms
52 // is required and is passed in the constructor. 55 // is required and is passed in the constructor.
53 struct SearchTermsArgs { 56 struct SearchTermsArgs {
54 explicit SearchTermsArgs(const string16& search_terms); 57 explicit SearchTermsArgs(const string16& search_terms);
55 58
56 // The search terms (query). 59 // The search terms (query).
57 const string16 search_terms; 60 const string16 search_terms;
58 // The original (input) query. 61 // The original (input) query.
59 string16 original_query; 62 string16 original_query;
60 // The optional assisted query stats, aka AQS, used for logging purposes. 63 // The optional assisted query stats, aka AQS, used for logging purposes.
61 // This string contains impressions of all autocomplete matches shown 64 // This string contains impressions of all autocomplete matches shown
62 // at the query submission time. For privacy reasons, we require the 65 // at the query submission time. For privacy reasons, we require the
63 // search provider to support HTTPS protocol in order to receive the AQS 66 // search provider to support HTTPS protocol in order to receive the AQS
64 // param. 67 // param.
65 // For more details, see http://goto.google.com/binary-clients-logging . 68 // For more details, see http://goto.google.com/binary-clients-logging .
66 std::string assisted_query_stats; 69 std::string assisted_query_stats;
67 70
68 // TODO: Remove along with "aq" CGI param. 71 // TODO: Remove along with "aq" CGI param.
69 int accepted_suggestion; 72 int accepted_suggestion;
70 }; 73 };
71 74
72 TemplateURLRef(TemplateURL* owner, Type type); 75 TemplateURLRef(TemplateURL* owner, Type type);
76 TemplateURLRef(TemplateURL* owner, size_t index_in_owner);
73 ~TemplateURLRef(); 77 ~TemplateURLRef();
74 78
75 // Returns the raw URL. None of the parameters will have been replaced. 79 // Returns the raw URL. None of the parameters will have been replaced.
76 std::string GetURL() const; 80 std::string GetURL() const;
77 81
78 // Returns true if this URL supports replacement. 82 // Returns true if this URL supports replacement.
79 bool SupportsReplacement() const; 83 bool SupportsReplacement() const;
80 84
81 // Like SupportsReplacement but usable on threads other than the UI thread. 85 // Like SupportsReplacement but usable on threads other than the UI thread.
82 bool SupportsReplacementUsingTermsData( 86 bool SupportsReplacementUsingTermsData(
(...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after
121 // the key of the search term, otherwise this returns an empty string. 125 // the key of the search term, otherwise this returns an empty string.
122 const std::string& GetSearchTermKey() const; 126 const std::string& GetSearchTermKey() const;
123 127
124 // Converts the specified term in our owner's encoding to a string16. 128 // Converts the specified term in our owner's encoding to a string16.
125 string16 SearchTermToString16(const std::string& term) const; 129 string16 SearchTermToString16(const std::string& term) const;
126 130
127 // Returns true if this TemplateURLRef has a replacement term of 131 // Returns true if this TemplateURLRef has a replacement term of
128 // {google:baseURL} or {google:baseSuggestURL}. 132 // {google:baseURL} or {google:baseSuggestURL}.
129 bool HasGoogleBaseURLs() const; 133 bool HasGoogleBaseURLs() const;
130 134
135 // Use the pattern referred to by this TemplateURLRef to match the provided
136 // |url| and extract |search_terms| from it. Returns true if the pattern
137 // matches, even if |search_terms| is empty. Returns false and an empty
138 // |search_terms| if the pattern does not match.
139 bool ExtractSearchTermsFromURL(const GURL& url,
140 string16* search_terms) const;
141
131 private: 142 private:
132 friend class TemplateURL; 143 friend class TemplateURL;
133 FRIEND_TEST_ALL_PREFIXES(TemplateURLTest, SetPrepopulatedAndParse); 144 FRIEND_TEST_ALL_PREFIXES(TemplateURLTest, SetPrepopulatedAndParse);
134 FRIEND_TEST_ALL_PREFIXES(TemplateURLTest, ParseParameterKnown); 145 FRIEND_TEST_ALL_PREFIXES(TemplateURLTest, ParseParameterKnown);
135 FRIEND_TEST_ALL_PREFIXES(TemplateURLTest, ParseParameterUnknown); 146 FRIEND_TEST_ALL_PREFIXES(TemplateURLTest, ParseParameterUnknown);
136 FRIEND_TEST_ALL_PREFIXES(TemplateURLTest, ParseURLEmpty); 147 FRIEND_TEST_ALL_PREFIXES(TemplateURLTest, ParseURLEmpty);
137 FRIEND_TEST_ALL_PREFIXES(TemplateURLTest, ParseURLNoTemplateEnd); 148 FRIEND_TEST_ALL_PREFIXES(TemplateURLTest, ParseURLNoTemplateEnd);
138 FRIEND_TEST_ALL_PREFIXES(TemplateURLTest, ParseURLNoKnownParameters); 149 FRIEND_TEST_ALL_PREFIXES(TemplateURLTest, ParseURLNoKnownParameters);
139 FRIEND_TEST_ALL_PREFIXES(TemplateURLTest, ParseURLTwoParameters); 150 FRIEND_TEST_ALL_PREFIXES(TemplateURLTest, ParseURLTwoParameters);
140 FRIEND_TEST_ALL_PREFIXES(TemplateURLTest, ParseURLNestedParameter); 151 FRIEND_TEST_ALL_PREFIXES(TemplateURLTest, ParseURLNestedParameter);
(...skipping 64 matching lines...) Expand 10 before | Expand all | Expand 10 after
205 // Extracts the query key and host from the url. 216 // Extracts the query key and host from the url.
206 void ParseHostAndSearchTermKey( 217 void ParseHostAndSearchTermKey(
207 const SearchTermsData& search_terms_data) const; 218 const SearchTermsData& search_terms_data) const;
208 219
209 // The TemplateURL that contains us. This should outlive us. 220 // The TemplateURL that contains us. This should outlive us.
210 TemplateURL* const owner_; 221 TemplateURL* const owner_;
211 222
212 // What kind of URL we are. 223 // What kind of URL we are.
213 const Type type_; 224 const Type type_;
214 225
226 // If |type_| is |INDEXED|, this |index_in_owner_| is used instead to refer to
227 // a url within our owner.
228 const size_t index_in_owner_;
229
215 // Whether the URL has been parsed. 230 // Whether the URL has been parsed.
216 mutable bool parsed_; 231 mutable bool parsed_;
217 232
218 // Whether the url was successfully parsed. 233 // Whether the url was successfully parsed.
219 mutable bool valid_; 234 mutable bool valid_;
220 235
221 // The parsed URL. All terms have been stripped out of this with 236 // The parsed URL. All terms have been stripped out of this with
222 // replacements_ giving the index of the terms to replace. 237 // replacements_ giving the index of the terms to replace.
223 mutable std::string parsed_url_; 238 mutable std::string parsed_url_;
224 239
225 // Do we support replacement? 240 // Do we support replacement?
226 mutable bool supports_replacements_; 241 mutable bool supports_replacements_;
227 242
228 // The replaceable parts of url (parsed_url_). These are ordered by index 243 // The replaceable parts of url (parsed_url_). These are ordered by index
229 // into the string, and may be empty. 244 // into the string, and may be empty.
230 mutable Replacements replacements_; 245 mutable Replacements replacements_;
231 246
232 // Host, path and key of the search term. These are only set if the url 247 // Host, path, key and location of the search term. These are only set if the
233 // contains one search term. 248 // url contains one search term.
234 mutable std::string host_; 249 mutable std::string host_;
235 mutable std::string path_; 250 mutable std::string path_;
236 mutable std::string search_term_key_; 251 mutable std::string search_term_key_;
252 mutable url_parse::Parsed::ComponentType search_term_key_location_;
237 253
238 // Whether the contained URL is a pre-populated URL. 254 // Whether the contained URL is a pre-populated URL.
239 bool prepopulated_; 255 bool prepopulated_;
240 256
241 DISALLOW_COPY_AND_ASSIGN(TemplateURLRef); 257 DISALLOW_COPY_AND_ASSIGN(TemplateURLRef);
242 }; 258 };
243 259
244 260
245 // TemplateURLData ------------------------------------------------------------ 261 // TemplateURLData ------------------------------------------------------------
246 262
(...skipping 71 matching lines...) Expand 10 before | Expand all | Expand 10 after
318 // the New Tab page, de-emphasizing the omnibox as "where you go to search". 334 // the New Tab page, de-emphasizing the omnibox as "where you go to search".
319 int usage_count; 335 int usage_count;
320 336
321 // If this TemplateURL comes from prepopulated data the prepopulate_id is > 0. 337 // If this TemplateURL comes from prepopulated data the prepopulate_id is > 0.
322 int prepopulate_id; 338 int prepopulate_id;
323 339
324 // The primary unique identifier for Sync. This set on all TemplateURLs 340 // The primary unique identifier for Sync. This set on all TemplateURLs
325 // regardless of whether they have been associated with Sync. 341 // regardless of whether they have been associated with Sync.
326 std::string sync_guid; 342 std::string sync_guid;
327 343
344 // A list of URL patterns that can be used, in addition to |url_|, to extract
345 // search terms from a URL.
346 std::vector<std::string> alternate_urls;
347
328 private: 348 private:
329 // Private so we can enforce using the setters and thus enforce that these 349 // Private so we can enforce using the setters and thus enforce that these
330 // fields are never empty. 350 // fields are never empty.
331 string16 keyword_; 351 string16 keyword_;
332 std::string url_; 352 std::string url_;
333 }; 353 };
334 354
335 355
336 // TemplateURL ---------------------------------------------------------------- 356 // TemplateURL ----------------------------------------------------------------
337 357
(...skipping 24 matching lines...) Expand all
362 const string16& short_name() const { return data_.short_name; } 382 const string16& short_name() const { return data_.short_name; }
363 // An accessor for the short_name, but adjusted so it can be appropriately 383 // An accessor for the short_name, but adjusted so it can be appropriately
364 // displayed even if it is LTR and the UI is RTL. 384 // displayed even if it is LTR and the UI is RTL.
365 string16 AdjustedShortNameForLocaleDirection() const; 385 string16 AdjustedShortNameForLocaleDirection() const;
366 386
367 const string16& keyword() const { return data_.keyword(); } 387 const string16& keyword() const { return data_.keyword(); }
368 388
369 const std::string& url() const { return data_.url(); } 389 const std::string& url() const { return data_.url(); }
370 const std::string& suggestions_url() const { return data_.suggestions_url; } 390 const std::string& suggestions_url() const { return data_.suggestions_url; }
371 const std::string& instant_url() const { return data_.instant_url; } 391 const std::string& instant_url() const { return data_.instant_url; }
392 const std::vector<std::string>& alternate_urls() const {
393 return data_.alternate_urls;
394 }
372 const GURL& favicon_url() const { return data_.favicon_url; } 395 const GURL& favicon_url() const { return data_.favicon_url; }
373 396
374 const GURL& originating_url() const { return data_.originating_url; } 397 const GURL& originating_url() const { return data_.originating_url; }
375 398
376 bool show_in_default_list() const { return data_.show_in_default_list; } 399 bool show_in_default_list() const { return data_.show_in_default_list; }
377 // Returns true if show_in_default_list() is true and this TemplateURL has a 400 // Returns true if show_in_default_list() is true and this TemplateURL has a
378 // TemplateURLRef that supports replacement. 401 // TemplateURLRef that supports replacement.
379 bool ShowInDefaultList() const; 402 bool ShowInDefaultList() const;
380 403
381 bool safe_for_autoreplace() const { return data_.safe_for_autoreplace; } 404 bool safe_for_autoreplace() const { return data_.safe_for_autoreplace; }
(...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after
415 // update the keyword to reflect the current Google base URL TLD. 438 // update the keyword to reflect the current Google base URL TLD.
416 bool IsGoogleSearchURLWithReplaceableKeyword() const; 439 bool IsGoogleSearchURLWithReplaceableKeyword() const;
417 440
418 // Returns true if the keywords match or if 441 // Returns true if the keywords match or if
419 // IsGoogleSearchURLWithReplaceableKeyword() is true for both TemplateURLs. 442 // IsGoogleSearchURLWithReplaceableKeyword() is true for both TemplateURLs.
420 bool HasSameKeywordAs(const TemplateURL& other) const; 443 bool HasSameKeywordAs(const TemplateURL& other) const;
421 444
422 std::string GetExtensionId() const; 445 std::string GetExtensionId() const;
423 bool IsExtensionKeyword() const; 446 bool IsExtensionKeyword() const;
424 447
448 // Returns the total number of URLs comprised in this template, including
449 // search and alternate URLs.
450 size_t URLCount() const;
451
452 // Gets the search URL at the given index. The alternate URLs, if any, are
453 // numbered starting at 0, and the primary search URL follows. This is used
454 // to decode the search term given a search URL (see
455 // ExtractSearchTermsFromURL()).
456 const std::string& GetURL(size_t index) const;
457
458 // Use the alternate URLs and the search URL to match the provided |url|
459 // and extract |search_terms| from it. Returns false and an empty
460 // |search_terms| if no search terms can be matched. The order in which the
461 // alternate URLs are listed dictates their priority, the URL at index 0
462 // is treated as the highest priority and the primary search URL is treated
463 // as the lowest priority (see GetURL()). For example, if a TemplateURL has
464 // alternate URL "http://foo/#q={searchTerms}" and search URL
465 // "http://foo/?q={searchTerms}", and the URL to be decoded is
466 // "http://foo/?q=a#q=b", the alternate URL will match first and the decoded
467 // search term will be "b".
468 bool ExtractSearchTermsFromURL(const GURL& url, string16* search_terms);
469
425 private: 470 private:
426 friend class TemplateURLService; 471 friend class TemplateURLService;
427 472
428 void CopyFrom(const TemplateURL& other); 473 void CopyFrom(const TemplateURL& other);
429 474
430 void SetURL(const std::string& url); 475 void SetURL(const std::string& url);
431 void SetPrepopulateId(int id); 476 void SetPrepopulateId(int id);
432 477
433 // Resets the keyword if IsGoogleSearchURLWithReplaceableKeyword() or |force|. 478 // Resets the keyword if IsGoogleSearchURLWithReplaceableKeyword() or |force|.
434 // The |force| parameter is useful when the existing keyword is known to be 479 // The |force| parameter is useful when the existing keyword is known to be
435 // a placeholder. The resulting keyword is generated using 480 // a placeholder. The resulting keyword is generated using
436 // TemplateURLService::GenerateSearchURL() and 481 // TemplateURLService::GenerateSearchURL() and
437 // TemplateURLService::GenerateKeyword(). 482 // TemplateURLService::GenerateKeyword().
438 void ResetKeywordIfNecessary(bool force); 483 void ResetKeywordIfNecessary(bool force);
439 484
440 Profile* profile_; 485 Profile* profile_;
441 TemplateURLData data_; 486 TemplateURLData data_;
442 TemplateURLRef url_ref_; 487 TemplateURLRef url_ref_;
443 TemplateURLRef suggestions_url_ref_; 488 TemplateURLRef suggestions_url_ref_;
444 TemplateURLRef instant_url_ref_; 489 TemplateURLRef instant_url_ref_;
445 490
446 // TODO(sky): Add date last parsed OSD file. 491 // TODO(sky): Add date last parsed OSD file.
447 492
448 DISALLOW_COPY_AND_ASSIGN(TemplateURL); 493 DISALLOW_COPY_AND_ASSIGN(TemplateURL);
449 }; 494 };
450 495
451 #endif // CHROME_BROWSER_SEARCH_ENGINES_TEMPLATE_URL_H_ 496 #endif // CHROME_BROWSER_SEARCH_ENGINES_TEMPLATE_URL_H_
OLDNEW
« no previous file with comments | « chrome/browser/policy/policy_browsertest.cc ('k') | chrome/browser/search_engines/template_url.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698