Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(435)

Side by Side Diff: chrome/browser/page_load_metrics/observers/from_gws_page_load_metrics_observer.cc

Issue 2936543002: Move Google search related util methods to page_load_metrics_util (Closed)
Patch Set: incorporated falken's comment Created 3 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2015 The Chromium Authors. All rights reserved. 1 // Copyright 2015 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "chrome/browser/page_load_metrics/observers/from_gws_page_load_metrics_ observer.h" 5 #include "chrome/browser/page_load_metrics/observers/from_gws_page_load_metrics_ observer.h"
6 #include <string> 6 #include <string>
7 7
8 #include "base/metrics/histogram_macros.h" 8 #include "base/metrics/histogram_macros.h"
9 #include "base/strings/string_util.h" 9 #include "base/strings/string_util.h"
10 #include "chrome/browser/page_load_metrics/page_load_metrics_util.h" 10 #include "chrome/browser/page_load_metrics/page_load_metrics_util.h"
(...skipping 298 matching lines...) Expand 10 before | Expand all | Expand 10 after
309 return time_to_interaction.value() + 309 return time_to_interaction.value() +
310 base::TimeDelta::FromMilliseconds(1000) > 310 base::TimeDelta::FromMilliseconds(1000) >
311 abort_info.time_to_abort; 311 abort_info.time_to_abort;
312 } else { 312 } else {
313 return time_to_interaction > abort_info.time_to_abort; 313 return time_to_interaction > abort_info.time_to_abort;
314 } 314 }
315 } 315 }
316 316
317 } // namespace 317 } // namespace
318 318
319 // See
320 // https://docs.google.com/document/d/1jNPZ6Aeh0KV6umw1yZrrkfXRfxWNruwu7FELLx_cp Og/edit
321 // for additional details.
322
323 // static
324 bool FromGWSPageLoadMetricsLogger::IsGoogleSearchHostname(const GURL& url) {
325 base::Optional<std::string> result =
326 page_load_metrics::GetGoogleHostnamePrefix(url);
327 return result && result.value() == "www";
328 }
329
330 // static
331 bool FromGWSPageLoadMetricsLogger::IsGoogleSearchResultUrl(const GURL& url) {
332 // NOTE: we do not require 'q=' in the query, as AJAXy search may instead
333 // store the query in the URL fragment.
334 if (!IsGoogleSearchHostname(url)) {
335 return false;
336 }
337
338 if (!QueryContainsComponentPrefix(url.query_piece(), "q=") &&
339 !QueryContainsComponentPrefix(url.ref_piece(), "q=")) {
340 return false;
341 }
342
343 const base::StringPiece path = url.path_piece();
344 return path == "/search" || path == "/webhp" || path == "/custom" ||
345 path == "/";
346 }
347
348 // static
349 bool FromGWSPageLoadMetricsLogger::IsGoogleSearchRedirectorUrl(
350 const GURL& url) {
351 if (!IsGoogleSearchHostname(url))
352 return false;
353
354 // The primary search redirector. Google search result redirects are
355 // differentiated from other general google redirects by 'source=web' in the
356 // query string.
357 if (url.path_piece() == "/url" && url.has_query() &&
358 QueryContainsComponent(url.query_piece(), "source=web")) {
359 return true;
360 }
361
362 // Intent-based navigations from search are redirected through a second
363 // redirector, which receives its redirect URL in the fragment/hash/ref
364 // portion of the URL (the portion after '#'). We don't check for the presence
365 // of certain params in the ref since this redirector is only used for
366 // redirects from search.
367 return url.path_piece() == "/searchurl/r.html" && url.has_ref();
368 }
369
370 // static
371 bool FromGWSPageLoadMetricsLogger::QueryContainsComponent(
372 const base::StringPiece query,
373 const base::StringPiece component) {
374 return QueryContainsComponentHelper(query, component, false);
375 }
376
377 // static
378 bool FromGWSPageLoadMetricsLogger::QueryContainsComponentPrefix(
379 const base::StringPiece query,
380 const base::StringPiece component) {
381 return QueryContainsComponentHelper(query, component, true);
382 }
383
384 // static
385 bool FromGWSPageLoadMetricsLogger::QueryContainsComponentHelper(
386 const base::StringPiece query,
387 const base::StringPiece component,
388 bool component_is_prefix) {
389 if (query.empty() || component.empty() ||
390 component.length() > query.length()) {
391 return false;
392 }
393
394 // Verify that the provided query string does not include the query or
395 // fragment start character, as the logic below depends on this character not
396 // being included.
397 DCHECK(query[0] != '?' && query[0] != '#');
398
399 // We shouldn't try to find matches beyond the point where there aren't enough
400 // characters left in query to fully match the component.
401 const size_t last_search_start = query.length() - component.length();
402
403 // We need to search for matches in a loop, rather than stopping at the first
404 // match, because we may initially match a substring that isn't a full query
405 // string component. Consider, for instance, the query string 'ab=cd&b=c'. If
406 // we search for component 'b=c', the first substring match will be characters
407 // 1-3 (zero-based) in the query string. However, this isn't a full component
408 // (the full component is ab=cd) so the match will fail. Thus, we must
409 // continue our search to find the second substring match, which in the
410 // example is at characters 6-8 (the end of the query string) and is a
411 // successful component match.
412 for (size_t start_offset = 0; start_offset <= last_search_start;
413 start_offset += component.length()) {
414 start_offset = query.find(component, start_offset);
415 if (start_offset == std::string::npos) {
416 // We searched to end of string and did not find a match.
417 return false;
418 }
419 // Verify that the character prior to the component is valid (either we're
420 // at the beginning of the query string, or are preceded by an ampersand).
421 if (start_offset != 0 && query[start_offset - 1] != '&') {
422 continue;
423 }
424 if (!component_is_prefix) {
425 // Verify that the character after the component substring is valid
426 // (either we're at the end of the query string, or are followed by an
427 // ampersand).
428 const size_t after_offset = start_offset + component.length();
429 if (after_offset < query.length() && query[after_offset] != '&') {
430 continue;
431 }
432 }
433 return true;
434 }
435 return false;
436 }
437
438 FromGWSPageLoadMetricsLogger::FromGWSPageLoadMetricsLogger() {} 319 FromGWSPageLoadMetricsLogger::FromGWSPageLoadMetricsLogger() {}
439 320
440 void FromGWSPageLoadMetricsLogger::SetPreviouslyCommittedUrl(const GURL& url) { 321 void FromGWSPageLoadMetricsLogger::SetPreviouslyCommittedUrl(const GURL& url) {
441 previously_committed_url_is_search_results_ = IsGoogleSearchResultUrl(url); 322 previously_committed_url_is_search_results_ =
323 page_load_metrics::IsGoogleSearchResultUrl(url);
442 previously_committed_url_is_search_redirector_ = 324 previously_committed_url_is_search_redirector_ =
443 IsGoogleSearchRedirectorUrl(url); 325 page_load_metrics::IsGoogleSearchRedirectorUrl(url);
444 } 326 }
445 327
446 void FromGWSPageLoadMetricsLogger::SetProvisionalUrl(const GURL& url) { 328 void FromGWSPageLoadMetricsLogger::SetProvisionalUrl(const GURL& url) {
447 provisional_url_has_search_hostname_ = IsGoogleSearchHostname(url); 329 provisional_url_has_search_hostname_ =
330 page_load_metrics::IsGoogleSearchHostname(url);
448 } 331 }
449 332
450 FromGWSPageLoadMetricsObserver::FromGWSPageLoadMetricsObserver() {} 333 FromGWSPageLoadMetricsObserver::FromGWSPageLoadMetricsObserver() {}
451 334
452 page_load_metrics::PageLoadMetricsObserver::ObservePolicy 335 page_load_metrics::PageLoadMetricsObserver::ObservePolicy
453 FromGWSPageLoadMetricsObserver::OnStart( 336 FromGWSPageLoadMetricsObserver::OnStart(
454 content::NavigationHandle* navigation_handle, 337 content::NavigationHandle* navigation_handle,
455 const GURL& currently_committed_url, 338 const GURL& currently_committed_url,
456 bool started_in_foreground) { 339 bool started_in_foreground) {
457 logger_.SetPreviouslyCommittedUrl(currently_committed_url); 340 logger_.SetPreviouslyCommittedUrl(currently_committed_url);
(...skipping 136 matching lines...) Expand 10 before | Expand all | Expand 10 after
594 return; 477 return;
595 478
596 LogProvisionalAborts(abort_info); 479 LogProvisionalAborts(abort_info);
597 480
598 LogForegroundDurations(page_load_metrics::mojom::PageLoadTiming(), extra_info, 481 LogForegroundDurations(page_load_metrics::mojom::PageLoadTiming(), extra_info,
599 base::TimeTicks()); 482 base::TimeTicks());
600 } 483 }
601 484
602 bool FromGWSPageLoadMetricsLogger::ShouldLogFailedProvisionalLoadMetrics() { 485 bool FromGWSPageLoadMetricsLogger::ShouldLogFailedProvisionalLoadMetrics() {
603 // See comment in ShouldLogPostCommitMetrics above the call to 486 // See comment in ShouldLogPostCommitMetrics above the call to
604 // IsGoogleSearchHostname for more info on this if test. 487 // page_load_metrics::IsGoogleSearchHostname for more info on this if test.
605 if (provisional_url_has_search_hostname_) 488 if (provisional_url_has_search_hostname_)
606 return false; 489 return false;
607 490
608 return previously_committed_url_is_search_results_ || 491 return previously_committed_url_is_search_results_ ||
609 previously_committed_url_is_search_redirector_; 492 previously_committed_url_is_search_redirector_;
610 } 493 }
611 494
612 bool FromGWSPageLoadMetricsLogger::ShouldLogPostCommitMetrics(const GURL& url) { 495 bool FromGWSPageLoadMetricsLogger::ShouldLogPostCommitMetrics(const GURL& url) {
613 DCHECK(!url.is_empty()); 496 DCHECK(!url.is_empty());
614 497
615 // If this page has a URL on a known google search hostname, then it may be a 498 // If this page has a URL on a known google search hostname, then it may be a
616 // page associated with search (either a search results page, or a search 499 // page associated with search (either a search results page, or a search
617 // redirector url), so we should not log stats. We could try to detect only 500 // redirector url), so we should not log stats. We could try to detect only
618 // the specific known search URLs here, and log navigations to other pages on 501 // the specific known search URLs here, and log navigations to other pages on
619 // the google search hostname (for example, a search for 'about google' 502 // the google search hostname (for example, a search for 'about google'
620 // includes a result for https://www.google.com/about/), however, we assume 503 // includes a result for https://www.google.com/about/), however, we assume
621 // these cases are relatively uncommon, and we run the risk of logging metrics 504 // these cases are relatively uncommon, and we run the risk of logging metrics
622 // for some search redirector URLs. Thus we choose the more conservative 505 // for some search redirector URLs. Thus we choose the more conservative
623 // approach of ignoring all urls on known search hostnames. 506 // approach of ignoring all urls on known search hostnames.
624 if (IsGoogleSearchHostname(url)) 507 if (page_load_metrics::IsGoogleSearchHostname(url))
625 return false; 508 return false;
626 509
627 // We're only interested in tracking navigations (e.g. clicks) initiated via 510 // We're only interested in tracking navigations (e.g. clicks) initiated via
628 // links. Note that the redirector will mask these, so don't enforce this if 511 // links. Note that the redirector will mask these, so don't enforce this if
629 // the navigation came from a redirect url. TODO(csharrison): Use this signal 512 // the navigation came from a redirect url. TODO(csharrison): Use this signal
630 // for provisional loads when the content APIs allow for it. 513 // for provisional loads when the content APIs allow for it.
631 if (previously_committed_url_is_search_results_ && 514 if (previously_committed_url_is_search_results_ &&
632 navigation_initiated_via_link_) { 515 navigation_initiated_via_link_) {
633 return true; 516 return true;
634 } 517 }
(...skipping 113 matching lines...) Expand 10 before | Expand all | Expand 10 after
748 first_user_interaction_after_paint_ = 631 first_user_interaction_after_paint_ =
749 base::TimeTicks::Now() - navigation_start_; 632 base::TimeTicks::Now() - navigation_start_;
750 } 633 }
751 } 634 }
752 635
753 void FromGWSPageLoadMetricsLogger::FlushMetricsOnAppEnterBackground( 636 void FromGWSPageLoadMetricsLogger::FlushMetricsOnAppEnterBackground(
754 const page_load_metrics::mojom::PageLoadTiming& timing, 637 const page_load_metrics::mojom::PageLoadTiming& timing,
755 const page_load_metrics::PageLoadExtraInfo& extra_info) { 638 const page_load_metrics::PageLoadExtraInfo& extra_info) {
756 LogForegroundDurations(timing, extra_info, base::TimeTicks::Now()); 639 LogForegroundDurations(timing, extra_info, base::TimeTicks::Now());
757 } 640 }
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698