components/omnibox/browser/scored_history_match.cc - Issue 2421373003: Omnibox: Improve HQP Scoring for Terms that Start with Punctuation

Unified Diff: components/omnibox/browser/scored_history_match.cc

Issue 2421373003: Omnibox: Improve HQP Scoring for Terms that Start with Punctuation (Closed)

Patch Set: restore dcheck Created 4 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

« no previous file with comments | « no previous file | components/omnibox/browser/scored_history_match_unittest.cc » ('j') | components/omnibox/browser/scored_history_match_unittest.cc » ('J')
Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

Index: components/omnibox/browser/scored_history_match.cc

diff --git a/components/omnibox/browser/scored_history_match.cc b/components/omnibox/browser/scored_history_match.cc

index 6d1b0eac25d2fbf3488bab6e4e0e8ec1e4f5bf1b..6568463d96972dca6df03491462f459e343ab8e0 100644

--- a/components/omnibox/browser/scored_history_match.cc

+++ b/components/omnibox/browser/scored_history_match.cc

@@ -473,31 +473,35 @@ float ScoredHistoryMatch::GetTopicalityScore(

0, colon_pos);

}

for (const auto& url_match : url_matches) {

- const size_t term_offset = terms_to_word_starts_offsets[url_match.term_num];

+ // Calculate the offset in the URL string where the meaningful (word) part

+ // of the term starts. This takes into account times when a term starts

+ // with punctuation such as "/foo".

+ const size_t term_word_offset =

+ url_match.offset + terms_to_word_starts_offsets[url_match.term_num];

// Advance next_word_starts until it's >= the position of the term we're

// considering (adjusted for where the word begins within the term).

while ((next_word_starts != end_word_starts) &&

- (*next_word_starts < (url_match.offset + term_offset))) {

+ (*next_word_starts < term_word_offset)) {

++next_word_starts;

}

const bool at_word_boundary =

(next_word_starts != end_word_starts) &&

- (*next_word_starts == url_match.offset + term_offset);

+ (*next_word_starts == term_word_offset);

if ((question_mark_pos != std::string::npos) &&

- (url_match.offset > question_mark_pos)) {

+ (term_word_offset >= question_mark_pos)) {

// The match is in a CGI ?... fragment.

DCHECK(at_word_boundary);

term_scores[url_match.term_num] += 5;

} else if ((end_of_hostname_pos != std::string::npos) &&

- (url_match.offset > end_of_hostname_pos)) {

+ (term_word_offset >= end_of_hostname_pos)) {

// The match is in the path.

DCHECK(at_word_boundary);

term_scores[url_match.term_num] += 8;

} else if ((colon_pos == std::string::npos) ||

- (url_match.offset > colon_pos)) {

+ (term_word_offset >= colon_pos)) {

// The match is in the hostname.

if ((last_part_of_hostname_pos == std::string::npos) ||

- (url_match.offset < last_part_of_hostname_pos)) {

+ (term_word_offset < last_part_of_hostname_pos)) {

// Either there are no dots in the hostname or this match isn't

// the last dotted component.

term_scores[url_match.term_num] += at_word_boundary ? 10 : 2;

@@ -524,19 +528,22 @@ float ScoredHistoryMatch::GetTopicalityScore(

title_matches, terms_to_word_starts_offsets,

word_starts.title_word_starts_, 0, std::string::npos);

for (const auto& title_match : title_matches) {

- const size_t term_offset =

- terms_to_word_starts_offsets[title_match.term_num];

+ // Calculate the offset in the title string where the meaningful (word) part

+ // of the term starts. This takes into account times when a term starts

+ // with punctuation such as "/foo".

+ const size_t term_word_offset =

+ title_match.offset + terms_to_word_starts_offsets[title_match.term_num];

// Advance next_word_starts until it's >= the position of the term we're

// considering (adjusted for where the word begins within the term).

while ((next_word_starts != end_word_starts) &&

- (*next_word_starts < (title_match.offset + term_offset))) {

+ (*next_word_starts < term_word_offset)) {

++next_word_starts;

++word_num;

}

if (word_num >= num_title_words_to_allow_)

break; // only count the first ten words

DCHECK(next_word_starts != end_word_starts);

- DCHECK_EQ(*next_word_starts, title_match.offset + term_offset)

+ DCHECK_EQ(*next_word_starts, term_word_offset)

<< "not at word boundary";

term_scores[title_match.term_num] += 8;

}

« no previous file with comments | « no previous file | components/omnibox/browser/scored_history_match_unittest.cc » ('j') | components/omnibox/browser/scored_history_match_unittest.cc » ('J')