Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "components/omnibox/browser/scored_history_match.h" | 5 #include "components/omnibox/browser/scored_history_match.h" |
| 6 | 6 |
| 7 #include <math.h> | 7 #include <math.h> |
| 8 | 8 |
| 9 #include <algorithm> | 9 #include <algorithm> |
| 10 #include <vector> | 10 #include <vector> |
| (...skipping 450 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 461 // The + 3 skips the // that probably appears in the protocol | 461 // The + 3 skips the // that probably appears in the protocol |
| 462 // after the colon. If the protocol doesn't have two slashes after | 462 // after the colon. If the protocol doesn't have two slashes after |
| 463 // the colon, that's okay--all this ends up doing is starting our | 463 // the colon, that's okay--all this ends up doing is starting our |
| 464 // search for the next / a few characters into the hostname. The | 464 // search for the next / a few characters into the hostname. The |
| 465 // only times this can cause problems is if we have a protocol without | 465 // only times this can cause problems is if we have a protocol without |
| 466 // a // after the colon and the hostname is only one or two characters. | 466 // a // after the colon and the hostname is only one or two characters. |
| 467 // This isn't worth worrying about. | 467 // This isn't worth worrying about. |
| 468 const size_t end_of_hostname_pos = (colon_pos != std::string::npos) | 468 const size_t end_of_hostname_pos = (colon_pos != std::string::npos) |
| 469 ? url.find('/', colon_pos + 3) | 469 ? url.find('/', colon_pos + 3) |
| 470 : url.find('/'); | 470 : url.find('/'); |
| 471 size_t last_part_of_hostname_pos = (end_of_hostname_pos != std::string::npos) | 471 const size_t last_part_of_hostname_pos = |
| 472 ? url.rfind('.', end_of_hostname_pos) | 472 (end_of_hostname_pos != std::string::npos) |
| 473 : url.rfind('.'); | 473 ? url.rfind('.', end_of_hostname_pos) |
| 474 : url.rfind('.'); | |
| 475 // Find the port in the last part of the hostname if we've identified such. | |
| 476 // Otherwise, find it starting from later in the URL and looking backwards. | |
| 477 size_t port_pos = (last_part_of_hostname_pos != std::string::npos) | |
| 478 ? url.find(':', last_part_of_hostname_pos) | |
| 479 : url.rfind(':', | |
| 480 (end_of_hostname_pos != std::string::npos) | |
| 481 ? end_of_hostname_pos | |
| 482 : url.length()); | |
| 483 // If we've found the colon in the scheme, that's not the port! | |
| 484 if (port_pos <= colon_pos) | |
| 485 port_pos = std::string::npos; | |
|
Peter Kasting
2016/09/20 20:44:55
I'm really uncomfortable with all this code.
A cl
| |
| 474 // Loop through all URL matches and score them appropriately. | 486 // Loop through all URL matches and score them appropriately. |
| 475 // First, filter all matches not at a word boundary and in the path (or | 487 // First, filter all matches not at a word boundary and in the path (or |
| 476 // later). | 488 // later). |
| 477 url_matches = FilterTermMatchesByWordStarts( | 489 url_matches = FilterTermMatchesByWordStarts( |
| 478 url_matches, terms_to_word_starts_offsets, word_starts.url_word_starts_, | 490 url_matches, terms_to_word_starts_offsets, word_starts.url_word_starts_, |
| 479 end_of_hostname_pos, std::string::npos); | 491 end_of_hostname_pos, std::string::npos); |
| 480 if (colon_pos != std::string::npos) { | 492 if (colon_pos != std::string::npos) { |
| 481 // Also filter matches not at a word boundary and in the scheme. | 493 // Also filter matches not at a word boundary and in the scheme. |
| 482 url_matches = FilterTermMatchesByWordStarts( | 494 url_matches = FilterTermMatchesByWordStarts( |
| 483 url_matches, terms_to_word_starts_offsets, word_starts.url_word_starts_, | 495 url_matches, terms_to_word_starts_offsets, word_starts.url_word_starts_, |
| (...skipping 13 matching lines...) Expand all Loading... | |
| 497 if ((question_mark_pos != std::string::npos) && | 509 if ((question_mark_pos != std::string::npos) && |
| 498 (url_match.offset > question_mark_pos)) { | 510 (url_match.offset > question_mark_pos)) { |
| 499 // The match is in a CGI ?... fragment. | 511 // The match is in a CGI ?... fragment. |
| 500 DCHECK(at_word_boundary); | 512 DCHECK(at_word_boundary); |
| 501 term_scores[url_match.term_num] += 5; | 513 term_scores[url_match.term_num] += 5; |
| 502 } else if ((end_of_hostname_pos != std::string::npos) && | 514 } else if ((end_of_hostname_pos != std::string::npos) && |
| 503 (url_match.offset > end_of_hostname_pos)) { | 515 (url_match.offset > end_of_hostname_pos)) { |
| 504 // The match is in the path. | 516 // The match is in the path. |
| 505 DCHECK(at_word_boundary); | 517 DCHECK(at_word_boundary); |
| 506 term_scores[url_match.term_num] += 8; | 518 term_scores[url_match.term_num] += 8; |
| 519 } else if ((port_pos != std::string::npos) && | |
| 520 (url_match.offset > port_pos)) { | |
| 521 // The match is in the port. | |
| 522 // (This'll also trigger for the last component of raw IPv6 addresses. | |
| 523 // This situation isn't worth worrying about.) | |
| 524 term_scores[url_match.term_num] += at_word_boundary ? 5 : 0; | |
| 507 } else if ((colon_pos == std::string::npos) || | 525 } else if ((colon_pos == std::string::npos) || |
| 508 (url_match.offset > colon_pos)) { | 526 (url_match.offset > colon_pos)) { |
| 509 // The match is in the hostname. | 527 // The match is in the hostname. |
| 510 if ((last_part_of_hostname_pos == std::string::npos) || | 528 if ((last_part_of_hostname_pos == std::string::npos) || |
| 511 (url_match.offset < last_part_of_hostname_pos)) { | 529 (url_match.offset < last_part_of_hostname_pos)) { |
| 512 // Either there are no dots in the hostname or this match isn't | 530 // Either there are no dots in the hostname or this match isn't |
| 513 // the last dotted component. | 531 // the last dotted component. |
| 514 term_scores[url_match.term_num] += at_word_boundary ? 10 : 2; | 532 term_scores[url_match.term_num] += at_word_boundary ? 10 : 2; |
| 515 } else { | 533 } else { |
| 516 // The match is in the last part of a dotted hostname (usually this | 534 // The match is in the last part of a dotted hostname (usually this |
| (...skipping 212 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 729 base::StringToDouble(it->first, &bucket.first); | 747 base::StringToDouble(it->first, &bucket.first); |
| 730 DCHECK(is_valid_intermediate_score); | 748 DCHECK(is_valid_intermediate_score); |
| 731 bool is_valid_hqp_score = base::StringToInt(it->second, &bucket.second); | 749 bool is_valid_hqp_score = base::StringToInt(it->second, &bucket.second); |
| 732 DCHECK(is_valid_hqp_score); | 750 DCHECK(is_valid_hqp_score); |
| 733 hqp_buckets->push_back(bucket); | 751 hqp_buckets->push_back(bucket); |
| 734 } | 752 } |
| 735 return true; | 753 return true; |
| 736 } | 754 } |
| 737 return false; | 755 return false; |
| 738 } | 756 } |
| OLD | NEW |