Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(182)

Side by Side Diff: components/omnibox/browser/scored_history_match.cc

Issue 2355053003: Omnibox - Allow Matching in Port Number (Closed)
Patch Set: Created 4 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "components/omnibox/browser/scored_history_match.h" 5 #include "components/omnibox/browser/scored_history_match.h"
6 6
7 #include <math.h> 7 #include <math.h>
8 8
9 #include <algorithm> 9 #include <algorithm>
10 #include <vector> 10 #include <vector>
(...skipping 450 matching lines...) Expand 10 before | Expand all | Expand 10 after
461 // The + 3 skips the // that probably appears in the protocol 461 // The + 3 skips the // that probably appears in the protocol
462 // after the colon. If the protocol doesn't have two slashes after 462 // after the colon. If the protocol doesn't have two slashes after
463 // the colon, that's okay--all this ends up doing is starting our 463 // the colon, that's okay--all this ends up doing is starting our
464 // search for the next / a few characters into the hostname. The 464 // search for the next / a few characters into the hostname. The
465 // only times this can cause problems is if we have a protocol without 465 // only times this can cause problems is if we have a protocol without
466 // a // after the colon and the hostname is only one or two characters. 466 // a // after the colon and the hostname is only one or two characters.
467 // This isn't worth worrying about. 467 // This isn't worth worrying about.
468 const size_t end_of_hostname_pos = (colon_pos != std::string::npos) 468 const size_t end_of_hostname_pos = (colon_pos != std::string::npos)
469 ? url.find('/', colon_pos + 3) 469 ? url.find('/', colon_pos + 3)
470 : url.find('/'); 470 : url.find('/');
471 size_t last_part_of_hostname_pos = (end_of_hostname_pos != std::string::npos) 471 const size_t last_part_of_hostname_pos =
472 ? url.rfind('.', end_of_hostname_pos) 472 (end_of_hostname_pos != std::string::npos)
473 : url.rfind('.'); 473 ? url.rfind('.', end_of_hostname_pos)
474 : url.rfind('.');
475 // Find the port in the last part of the hostname if we've identified such.
476 // Otherwise, find it starting from later in the URL and looking backwards.
477 size_t port_pos = (last_part_of_hostname_pos != std::string::npos)
478 ? url.find(':', last_part_of_hostname_pos)
479 : url.rfind(':',
480 (end_of_hostname_pos != std::string::npos)
481 ? end_of_hostname_pos
482 : url.length());
483 // If we've found the colon in the scheme, that's not the port!
484 if (port_pos <= colon_pos)
485 port_pos = std::string::npos;
Peter Kasting 2016/09/20 20:44:55 I'm really uncomfortable with all this code. A cl
474 // Loop through all URL matches and score them appropriately. 486 // Loop through all URL matches and score them appropriately.
475 // First, filter all matches not at a word boundary and in the path (or 487 // First, filter all matches not at a word boundary and in the path (or
476 // later). 488 // later).
477 url_matches = FilterTermMatchesByWordStarts( 489 url_matches = FilterTermMatchesByWordStarts(
478 url_matches, terms_to_word_starts_offsets, word_starts.url_word_starts_, 490 url_matches, terms_to_word_starts_offsets, word_starts.url_word_starts_,
479 end_of_hostname_pos, std::string::npos); 491 end_of_hostname_pos, std::string::npos);
480 if (colon_pos != std::string::npos) { 492 if (colon_pos != std::string::npos) {
481 // Also filter matches not at a word boundary and in the scheme. 493 // Also filter matches not at a word boundary and in the scheme.
482 url_matches = FilterTermMatchesByWordStarts( 494 url_matches = FilterTermMatchesByWordStarts(
483 url_matches, terms_to_word_starts_offsets, word_starts.url_word_starts_, 495 url_matches, terms_to_word_starts_offsets, word_starts.url_word_starts_,
(...skipping 13 matching lines...) Expand all
497 if ((question_mark_pos != std::string::npos) && 509 if ((question_mark_pos != std::string::npos) &&
498 (url_match.offset > question_mark_pos)) { 510 (url_match.offset > question_mark_pos)) {
499 // The match is in a CGI ?... fragment. 511 // The match is in a CGI ?... fragment.
500 DCHECK(at_word_boundary); 512 DCHECK(at_word_boundary);
501 term_scores[url_match.term_num] += 5; 513 term_scores[url_match.term_num] += 5;
502 } else if ((end_of_hostname_pos != std::string::npos) && 514 } else if ((end_of_hostname_pos != std::string::npos) &&
503 (url_match.offset > end_of_hostname_pos)) { 515 (url_match.offset > end_of_hostname_pos)) {
504 // The match is in the path. 516 // The match is in the path.
505 DCHECK(at_word_boundary); 517 DCHECK(at_word_boundary);
506 term_scores[url_match.term_num] += 8; 518 term_scores[url_match.term_num] += 8;
519 } else if ((port_pos != std::string::npos) &&
520 (url_match.offset > port_pos)) {
521 // The match is in the port.
522 // (This'll also trigger for the last component of raw IPv6 addresses.
523 // This situation isn't worth worrying about.)
524 term_scores[url_match.term_num] += at_word_boundary ? 5 : 0;
507 } else if ((colon_pos == std::string::npos) || 525 } else if ((colon_pos == std::string::npos) ||
508 (url_match.offset > colon_pos)) { 526 (url_match.offset > colon_pos)) {
509 // The match is in the hostname. 527 // The match is in the hostname.
510 if ((last_part_of_hostname_pos == std::string::npos) || 528 if ((last_part_of_hostname_pos == std::string::npos) ||
511 (url_match.offset < last_part_of_hostname_pos)) { 529 (url_match.offset < last_part_of_hostname_pos)) {
512 // Either there are no dots in the hostname or this match isn't 530 // Either there are no dots in the hostname or this match isn't
513 // the last dotted component. 531 // the last dotted component.
514 term_scores[url_match.term_num] += at_word_boundary ? 10 : 2; 532 term_scores[url_match.term_num] += at_word_boundary ? 10 : 2;
515 } else { 533 } else {
516 // The match is in the last part of a dotted hostname (usually this 534 // The match is in the last part of a dotted hostname (usually this
(...skipping 212 matching lines...) Expand 10 before | Expand all | Expand 10 after
729 base::StringToDouble(it->first, &bucket.first); 747 base::StringToDouble(it->first, &bucket.first);
730 DCHECK(is_valid_intermediate_score); 748 DCHECK(is_valid_intermediate_score);
731 bool is_valid_hqp_score = base::StringToInt(it->second, &bucket.second); 749 bool is_valid_hqp_score = base::StringToInt(it->second, &bucket.second);
732 DCHECK(is_valid_hqp_score); 750 DCHECK(is_valid_hqp_score);
733 hqp_buckets->push_back(bucket); 751 hqp_buckets->push_back(bucket);
734 } 752 }
735 return true; 753 return true;
736 } 754 }
737 return false; 755 return false;
738 } 756 }
OLDNEW
« no previous file with comments | « components/omnibox/browser/scored_history_match.h ('k') | components/omnibox/browser/scored_history_match_unittest.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698