| OLD | NEW |
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "components/omnibox/browser/scored_history_match.h" | 5 #include "components/omnibox/browser/scored_history_match.h" |
| 6 | 6 |
| 7 #include <math.h> | 7 #include <math.h> |
| 8 | 8 |
| 9 #include <algorithm> | 9 #include <algorithm> |
| 10 #include <vector> | 10 #include <vector> |
| (...skipping 455 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 466 url_matches = FilterTermMatchesByWordStarts( | 466 url_matches = FilterTermMatchesByWordStarts( |
| 467 url_matches, terms_to_word_starts_offsets, word_starts.url_word_starts_, | 467 url_matches, terms_to_word_starts_offsets, word_starts.url_word_starts_, |
| 468 end_of_hostname_pos, std::string::npos); | 468 end_of_hostname_pos, std::string::npos); |
| 469 if (colon_pos != std::string::npos) { | 469 if (colon_pos != std::string::npos) { |
| 470 // Also filter matches not at a word boundary and in the scheme. | 470 // Also filter matches not at a word boundary and in the scheme. |
| 471 url_matches = FilterTermMatchesByWordStarts( | 471 url_matches = FilterTermMatchesByWordStarts( |
| 472 url_matches, terms_to_word_starts_offsets, word_starts.url_word_starts_, | 472 url_matches, terms_to_word_starts_offsets, word_starts.url_word_starts_, |
| 473 0, colon_pos); | 473 0, colon_pos); |
| 474 } | 474 } |
| 475 for (const auto& url_match : url_matches) { | 475 for (const auto& url_match : url_matches) { |
| 476 const size_t term_offset = terms_to_word_starts_offsets[url_match.term_num]; | 476 // Calculate the offset in the URL string where the meaningful (word) part |
| 477 // of the term starts. This takes into account times when a term starts |
| 478 // with punctuation such as "/foo". |
| 479 const size_t term_word_offset = |
| 480 url_match.offset + terms_to_word_starts_offsets[url_match.term_num]; |
| 477 // Advance next_word_starts until it's >= the position of the term we're | 481 // Advance next_word_starts until it's >= the position of the term we're |
| 478 // considering (adjusted for where the word begins within the term). | 482 // considering (adjusted for where the word begins within the term). |
| 479 while ((next_word_starts != end_word_starts) && | 483 while ((next_word_starts != end_word_starts) && |
| 480 (*next_word_starts < (url_match.offset + term_offset))) { | 484 (*next_word_starts < term_word_offset)) { |
| 481 ++next_word_starts; | 485 ++next_word_starts; |
| 482 } | 486 } |
| 483 const bool at_word_boundary = | 487 const bool at_word_boundary = |
| 484 (next_word_starts != end_word_starts) && | 488 (next_word_starts != end_word_starts) && |
| 485 (*next_word_starts == url_match.offset + term_offset); | 489 (*next_word_starts == term_word_offset); |
| 486 if ((question_mark_pos != std::string::npos) && | 490 if ((question_mark_pos != std::string::npos) && |
| 487 (url_match.offset > question_mark_pos)) { | 491 (term_word_offset >= question_mark_pos)) { |
| 488 // The match is in a CGI ?... fragment. | 492 // The match is in a CGI ?... fragment. |
| 489 DCHECK(at_word_boundary); | 493 DCHECK(at_word_boundary); |
| 490 term_scores[url_match.term_num] += 5; | 494 term_scores[url_match.term_num] += 5; |
| 491 } else if ((end_of_hostname_pos != std::string::npos) && | 495 } else if ((end_of_hostname_pos != std::string::npos) && |
| 492 (url_match.offset > end_of_hostname_pos)) { | 496 (term_word_offset >= end_of_hostname_pos)) { |
| 493 // The match is in the path. | 497 // The match is in the path. |
| 494 DCHECK(at_word_boundary); | 498 DCHECK(at_word_boundary); |
| 495 term_scores[url_match.term_num] += 8; | 499 term_scores[url_match.term_num] += 8; |
| 496 } else if ((colon_pos == std::string::npos) || | 500 } else if ((colon_pos == std::string::npos) || |
| 497 (url_match.offset > colon_pos)) { | 501 (term_word_offset >= colon_pos)) { |
| 498 // The match is in the hostname. | 502 // The match is in the hostname. |
| 499 if ((last_part_of_hostname_pos == std::string::npos) || | 503 if ((last_part_of_hostname_pos == std::string::npos) || |
| 500 (url_match.offset < last_part_of_hostname_pos)) { | 504 (term_word_offset < last_part_of_hostname_pos)) { |
| 501 // Either there are no dots in the hostname or this match isn't | 505 // Either there are no dots in the hostname or this match isn't |
| 502 // the last dotted component. | 506 // the last dotted component. |
| 503 term_scores[url_match.term_num] += at_word_boundary ? 10 : 2; | 507 term_scores[url_match.term_num] += at_word_boundary ? 10 : 2; |
| 504 } else { | 508 } else { |
| 505 // The match is in the last part of a dotted hostname (usually this | 509 // The match is in the last part of a dotted hostname (usually this |
| 506 // is the top-level domain .com, .net, etc.). | 510 // is the top-level domain .com, .net, etc.). |
| 507 if (allow_tld_matches_) | 511 if (allow_tld_matches_) |
| 508 term_scores[url_match.term_num] += at_word_boundary ? 10 : 0; | 512 term_scores[url_match.term_num] += at_word_boundary ? 10 : 0; |
| 509 } | 513 } |
| 510 } else { | 514 } else { |
| 511 // The match is in the protocol (a.k.a. scheme). | 515 // The match is in the protocol (a.k.a. scheme). |
| 512 // Matches not at a word boundary should have been filtered already. | 516 // Matches not at a word boundary should have been filtered already. |
| 513 DCHECK(at_word_boundary); | 517 DCHECK(at_word_boundary); |
| 514 match_in_scheme = true; | 518 match_in_scheme = true; |
| 515 if (allow_scheme_matches_) | 519 if (allow_scheme_matches_) |
| 516 term_scores[url_match.term_num] += 10; | 520 term_scores[url_match.term_num] += 10; |
| 517 } | 521 } |
| 518 } | 522 } |
| 519 // Now do the analogous loop over all matches in the title. | 523 // Now do the analogous loop over all matches in the title. |
| 520 next_word_starts = word_starts.title_word_starts_.begin(); | 524 next_word_starts = word_starts.title_word_starts_.begin(); |
| 521 end_word_starts = word_starts.title_word_starts_.end(); | 525 end_word_starts = word_starts.title_word_starts_.end(); |
| 522 size_t word_num = 0; | 526 size_t word_num = 0; |
| 523 title_matches = FilterTermMatchesByWordStarts( | 527 title_matches = FilterTermMatchesByWordStarts( |
| 524 title_matches, terms_to_word_starts_offsets, | 528 title_matches, terms_to_word_starts_offsets, |
| 525 word_starts.title_word_starts_, 0, std::string::npos); | 529 word_starts.title_word_starts_, 0, std::string::npos); |
| 526 for (const auto& title_match : title_matches) { | 530 for (const auto& title_match : title_matches) { |
| 527 const size_t term_offset = | 531 // Calculate the offset in the title string where the meaningful (word) part |
| 528 terms_to_word_starts_offsets[title_match.term_num]; | 532 // of the term starts. This takes into account times when a term starts |
| 533 // with punctuation such as "/foo". |
| 534 const size_t term_word_offset = |
| 535 title_match.offset + terms_to_word_starts_offsets[title_match.term_num]; |
| 529 // Advance next_word_starts until it's >= the position of the term we're | 536 // Advance next_word_starts until it's >= the position of the term we're |
| 530 // considering (adjusted for where the word begins within the term). | 537 // considering (adjusted for where the word begins within the term). |
| 531 while ((next_word_starts != end_word_starts) && | 538 while ((next_word_starts != end_word_starts) && |
| 532 (*next_word_starts < (title_match.offset + term_offset))) { | 539 (*next_word_starts < term_word_offset)) { |
| 533 ++next_word_starts; | 540 ++next_word_starts; |
| 534 ++word_num; | 541 ++word_num; |
| 535 } | 542 } |
| 536 if (word_num >= num_title_words_to_allow_) | 543 if (word_num >= num_title_words_to_allow_) |
| 537 break; // only count the first ten words | 544 break; // only count the first ten words |
| 538 DCHECK(next_word_starts != end_word_starts); | 545 DCHECK(next_word_starts != end_word_starts); |
| 539 DCHECK_EQ(*next_word_starts, title_match.offset + term_offset) | 546 DCHECK_EQ(*next_word_starts, term_word_offset) |
| 540 << "not at word boundary"; | 547 << "not at word boundary"; |
| 541 term_scores[title_match.term_num] += 8; | 548 term_scores[title_match.term_num] += 8; |
| 542 } | 549 } |
| 543 // TODO(mpearson): Restore logic for penalizing out-of-order matches. | 550 // TODO(mpearson): Restore logic for penalizing out-of-order matches. |
| 544 // (Perhaps discount them by 0.8?) | 551 // (Perhaps discount them by 0.8?) |
| 545 // TODO(mpearson): Consider: if the earliest match occurs late in the string, | 552 // TODO(mpearson): Consider: if the earliest match occurs late in the string, |
| 546 // should we discount it? | 553 // should we discount it? |
| 547 // TODO(mpearson): Consider: do we want to score based on how much of the | 554 // TODO(mpearson): Consider: do we want to score based on how much of the |
| 548 // input string the input covers? (I'm leaning toward no.) | 555 // input string the input covers? (I'm leaning toward no.) |
| 549 | 556 |
| (...skipping 165 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 715 base::StringToDouble(it->first, &bucket.first); | 722 base::StringToDouble(it->first, &bucket.first); |
| 716 DCHECK(is_valid_intermediate_score); | 723 DCHECK(is_valid_intermediate_score); |
| 717 bool is_valid_hqp_score = base::StringToInt(it->second, &bucket.second); | 724 bool is_valid_hqp_score = base::StringToInt(it->second, &bucket.second); |
| 718 DCHECK(is_valid_hqp_score); | 725 DCHECK(is_valid_hqp_score); |
| 719 hqp_buckets->push_back(bucket); | 726 hqp_buckets->push_back(bucket); |
| 720 } | 727 } |
| 721 return true; | 728 return true; |
| 722 } | 729 } |
| 723 return false; | 730 return false; |
| 724 } | 731 } |
| OLD | NEW |