Chromium Code Reviews| Index: chrome/browser/autocomplete/scored_history_match_builder.h |
| diff --git a/chrome/browser/autocomplete/scored_history_match_builder.h b/chrome/browser/autocomplete/scored_history_match_builder.h |
| new file mode 100644 |
| index 0000000000000000000000000000000000000000..ee457b703c1eb4c3f465287dccae521301574651 |
| --- /dev/null |
| +++ b/chrome/browser/autocomplete/scored_history_match_builder.h |
| @@ -0,0 +1,126 @@ |
| +// Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| +// Use of this source code is governed by a BSD-style license that can be |
| +// found in the LICENSE file. |
| + |
| +#ifndef CHROME_BROWSER_AUTOCOMPLETE_SCORED_HISTORY_MATCH_BUILDER_H_ |
| +#define CHROME_BROWSER_AUTOCOMPLETE_SCORED_HISTORY_MATCH_BUILDER_H_ |
| + |
| +#include <vector> |
| + |
| +#include "base/callback.h" |
| +#include "base/strings/string16.h" |
| +#include "components/history/core/browser/history_types.h" |
| +#include "components/history/core/browser/in_memory_url_index_types.h" |
| +#include "components/history/core/browser/scored_history_match.h" |
| +#include "testing/gtest/include/gtest/gtest_prod.h" |
| + |
| +class ScoredHistoryMatchBuilderTest; |
| + |
| +// ScoredHistoryMatchBuilder creates new history matches with a raw score |
| +// calculated for the history item given in |row| with recent visits as |
| +// indicated in |visits|. |
| +// |
| +// First determines if the row qualifies by seeing if all of the terms in |
| +// |terms_vector| occur in |row|. If so, calculates a raw score. This raw |
| +// score is in part determined by whether the matches occur at word boundaries, |
| +// the locations of which are stored in |word_starts|. For some terms, it's |
| +// appropriate to look for the word boundary within the term. For instance, the |
| +// term ".net" should look for a word boundary at the "n". These offsets (".net" |
| +// should have an offset of 1) come from |terms_to_word_starts_offsets|. |
| +// |is_bookmarked| is used to determine if the match's URL is referenced by any |
| +// bookmarks, which can also affect the raw score. The raw score allows the |
| +// matches to be ordered and can be/ used to influence the final score |
| +// calculated by the client of this index. If the row does not qualify the raw |
| +// score will be 0. |languages| is used to help parse/format the URL before |
| +// looking for the terms. |
| +class ScoredHistoryMatchBuilder : public history::ScoredHistoryMatch::Builder { |
| + public: |
| + // Returns whether |url| is bookmarked, used to affect the score. Must support |
| + // being called multiple time. |
|
Mark P
2015/02/04 19:54:04
nit: time->times
|
| + typedef base::Callback<bool(const GURL& url)> IsBookmarkedCallback; |
| + |
| + explicit ScoredHistoryMatchBuilder(const IsBookmarkedCallback& is_bookmarked); |
| + ~ScoredHistoryMatchBuilder() override; |
| + |
| + // Returns |term_matches| after removing all matches that are not at a |
| + // word break that are in the range [|start_pos|, |end_pos|). |
| + // start_pos == string::npos is treated as start_pos = length of string. |
| + // (In other words, no matches will be filtered.) |
| + // end_pos == string::npos is treated as end_pos = length of string. |
| + static history::TermMatches FilterTermMatchesByWordStarts( |
| + const history::TermMatches& term_matches, |
| + const history::WordStarts& terms_to_word_starts_offsets, |
| + const history::WordStarts& word_starts, |
| + size_t start_pos, |
| + size_t end_pos); |
| + |
| + private: |
| + friend class ScoredHistoryMatchBuilderTest; |
| + FRIEND_TEST_ALL_PREFIXES(ScoredHistoryMatchBuilderTest, ScoringBookmarks); |
| + FRIEND_TEST_ALL_PREFIXES(ScoredHistoryMatchBuilderTest, ScoringScheme); |
| + FRIEND_TEST_ALL_PREFIXES(ScoredHistoryMatchBuilderTest, ScoringTLD); |
| + |
| + // Initialize ScoredHistoryMatchBuilder statics. |
| + void Init(); |
| + |
| + // Return a topicality score based on how many matches appear in the |
| + // url and the page's title and where they are (e.g., at word |
| + // boundaries). Revises url_matches and title_matches of |
| + // |scored_history_match| |
| + // in the process so they only reflect matches used for scoring. (For |
| + // instance, |
| + // some mid-word matches are not given credit in scoring.) |
| + static float GetTopicalityScore( |
| + const int num_terms, |
| + const base::string16& cleaned_up_url, |
| + const history::WordStarts& terms_to_word_starts_offsets, |
| + const history::RowWordStarts& word_starts, |
| + history::ScoredHistoryMatch* scored_history_match); |
| + |
| + // Returns a recency score based on |last_visit_days_ago|, which is |
| + // how many days ago the page was last visited. |
| + static float GetRecencyScore(int last_visit_days_ago); |
| + |
| + // Examines the first kMaxVisitsToScore and return a score (higher is |
| + // better) based the rate of visits, whether the page is bookmarked, and |
| + // how often those visits are typed navigations (i.e., explicitly |
| + // invoked by the user). |now| is passed in to avoid unnecessarily |
| + // recomputing it frequently. |
| + static float GetFrequency(const base::Time& now, |
| + const bool bookmarked, |
| + const history::VisitInfoVector& visits); |
| + |
| + // Combines the two component scores into a final score that's |
| + // an appropriate value to use as a relevancy score. |
| + static float GetFinalRelevancyScore(float topicality_score, |
| + float frequency_score); |
| + |
| + // history::ScoredHistoryMatch implementation. |
| + history::ScoredHistoryMatch Build( |
| + const history::URLRow& row, |
| + const history::VisitInfoVector& visits, |
| + const std::string& languages, |
| + const base::string16& lower_string, |
| + const history::String16Vector& terms_vector, |
| + const history::WordStarts& terms_to_word_starts_offsets, |
| + const history::RowWordStarts& word_starts, |
| + const base::Time now) const override; |
| + |
| + // Untyped visits to bookmarked pages score this, compared to 1 for |
| + // untyped visits to non-bookmarked pages and 20 for typed visits. |
| + static int bookmark_value_; |
| + |
| + // If true, we allow input terms to match in the TLD (e.g., .com). |
| + static bool allow_tld_matches_; |
| + |
| + // If true, we allow input terms to match in the scheme (e.g., http://). |
| + static bool allow_scheme_matches_; |
| + |
| + // The IsBookmarkedCallback to use to check whether an URL is bookmarked. May |
| + // be unset during testing. |
| + IsBookmarkedCallback is_bookmarked_; |
| + |
| + DISALLOW_COPY_AND_ASSIGN(ScoredHistoryMatchBuilder); |
| +}; |
| + |
| +#endif // CHROME_BROWSER_AUTOCOMPLETE_SCORED_HISTORY_MATCH_BUILDER_H_ |