third_party/WebKit/Source/platform/weborigin/KURL.cpp - Issue 2428093002: [KURL] Avoid re-hashing input if is already canonicalized

Unified Diff: third_party/WebKit/Source/platform/weborigin/KURL.cpp

Issue 2428093002: [KURL] Avoid re-hashing input if is already canonicalized (Closed)

Patch Set: null check :P Created 4 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Index: third_party/WebKit/Source/platform/weborigin/KURL.cpp

diff --git a/third_party/WebKit/Source/platform/weborigin/KURL.cpp b/third_party/WebKit/Source/platform/weborigin/KURL.cpp

index 666ec1043795a491dc0698c7ab5ff94c498730f4..18d0abb949960dd45e8fc5b9c13db1a25e2d8ee9 100644

--- a/third_party/WebKit/Source/platform/weborigin/KURL.cpp

+++ b/third_party/WebKit/Source/platform/weborigin/KURL.cpp

@@ -29,6 +29,7 @@

#include "platform/weborigin/KnownPorts.h"

#include "url/url_util.h"

+#include "wtf/CheckedNumeric.h"

#include "wtf/PtrUtil.h"

#include "wtf/StdLibExtras.h"

#include "wtf/text/CString.h"

@@ -36,6 +37,7 @@

#include "wtf/text/StringUTF8Adaptor.h"

#include "wtf/text/TextEncoding.h"

#include <algorithm>

+#include <limits>

#ifndef NDEBUG

#include <stdio.h>

#endif

@@ -717,26 +719,14 @@ bool protocolIs(const String& url, const char* protocol) {

void KURL::init(const KURL& base,

const String& relative,

const WTF::TextEncoding* queryEncoding) {

- if (!relative.isNull() && relative.is8Bit()) {

- StringUTF8Adaptor relativeUTF8(relative);

- init(base, relativeUTF8.data(), relativeUTF8.length(), queryEncoding);

- } else

- init(base, relative.characters16(), relative.length(), queryEncoding);

- initProtocolIsInHTTPFamily();

- initInnerURL();

-template <typename CHAR>

-void KURL::init(const KURL& base,

- const CHAR* relative,

- int relativeLength,

- const WTF::TextEncoding* queryEncoding) {

// As a performance optimization, we do not use the charset converter

// if encoding is UTF-8 or other Unicode encodings. Note that this is

// per HTML5 2.5.3 (resolving URL). The URL canonicalizer will be more

// efficient with no charset converter object because it can do UTF-8

// internally with no extra copies.

+ StringUTF8Adaptor baseUTF8(base.getString());

// We feel free to make the charset converter object every time since it's

// just a wrapper around a reference.

KURLCharsetConverter charsetConverterObject(queryEncoding);

@@ -745,16 +735,46 @@ void KURL::init(const KURL& base,

? 0

: &charsetConverterObject;

- StringUTF8Adaptor baseUTF8(base.getString());

+ // Clamp to int max to avoid overflow.

url::RawCanonOutputT<char> output;

- m_isValid = url::ResolveRelative(baseUTF8.data(), baseUTF8.length(),

- base.m_parsed, relative, relativeLength,

- charsetConverter, &output, &m_parsed);

+ if (!relative.isNull() && relative.is8Bit()) {

+ StringUTF8Adaptor relativeUTF8(relative);

+ CheckedNumeric<int> relativeLength(

esprehn 2016/11/04 22:55:59 I think you want to use clampTo() in here instead

Charlie Harrison 2016/11/05 02:23:13 Aha, thanks. Done.

+ CheckedNumeric<size_t>(relativeUTF8.length()));

+ m_isValid = url::ResolveRelative(

+ baseUTF8.data(), baseUTF8.length(), base.m_parsed, relativeUTF8.data(),

+ relativeLength.ValueOrDefault(std::numeric_limits<int>::max()),

+ charsetConverter, &output, &m_parsed);

+ } else {

+ CheckedNumeric<int> relativeLength(

esprehn 2016/11/04 22:55:59 ditto

Charlie Harrison 2016/11/05 02:23:13 Done.

+ CheckedNumeric<unsigned>(relative.length()));

+ m_isValid = url::ResolveRelative(

+ baseUTF8.data(), baseUTF8.length(), base.m_parsed,

+ relative.characters16(),

+ relativeLength.ValueOrDefault(std::numeric_limits<int>::max()),

+ charsetConverter, &output, &m_parsed);

+ }

- // See FIXME in KURLPrivate in the header. If canonicalization has not

- // changed the string, we can avoid an extra allocation by using assignment.

- m_string = AtomicString::fromUTF8(output.data(), output.length());

+ // AtomicString::fromUTF8 will re-hash the raw output and check the

+ // AtomicStringTable (addWithTranslator) for the string. This can be very

+ // expensive for large URLs. However, since many URLs are generated from

+ // existing AtomicStrings (which already have their hashes computed), this

+ // fast path is used if the input string is already canonicalized.

+ //

+ // Because this optimization does not apply to non-AtomicStrings, explicitly

+ // check that the input is Atomic before moving forward with it. This also

+ // gets around the problem that marking non-Atomic input as Atomic will render

+ // the string thread unsafe.

esprehn 2016/11/04 22:55:59 fwiw I don't think the thread safety is an issue,

Charlie Harrison 2016/11/05 02:23:13 The KURL is not thread safe, but the String passed

+ if (!relative.isNull() && relative.impl()->isAtomic() &&

+ StringView(output.data(), static_cast<unsigned>(output.length())) ==

+ relative) {

+ m_string = relative;

+ } else {

+ m_string = AtomicString::fromUTF8(output.data(), output.length());

+ }

+ initProtocolIsInHTTPFamily();

+ initInnerURL();

}

void KURL::initInnerURL() {

« no previous file with comments | « third_party/WebKit/Source/platform/weborigin/KURL.h ('k') | no next file » | no next file with comments »