Chromium Code Reviews| Index: third_party/WebKit/Source/platform/weborigin/KURL.cpp |
| diff --git a/third_party/WebKit/Source/platform/weborigin/KURL.cpp b/third_party/WebKit/Source/platform/weborigin/KURL.cpp |
| index 666ec1043795a491dc0698c7ab5ff94c498730f4..18d0abb949960dd45e8fc5b9c13db1a25e2d8ee9 100644 |
| --- a/third_party/WebKit/Source/platform/weborigin/KURL.cpp |
| +++ b/third_party/WebKit/Source/platform/weborigin/KURL.cpp |
| @@ -29,6 +29,7 @@ |
| #include "platform/weborigin/KnownPorts.h" |
| #include "url/url_util.h" |
| +#include "wtf/CheckedNumeric.h" |
| #include "wtf/PtrUtil.h" |
| #include "wtf/StdLibExtras.h" |
| #include "wtf/text/CString.h" |
| @@ -36,6 +37,7 @@ |
| #include "wtf/text/StringUTF8Adaptor.h" |
| #include "wtf/text/TextEncoding.h" |
| #include <algorithm> |
| +#include <limits> |
| #ifndef NDEBUG |
| #include <stdio.h> |
| #endif |
| @@ -717,26 +719,14 @@ bool protocolIs(const String& url, const char* protocol) { |
| void KURL::init(const KURL& base, |
| const String& relative, |
| const WTF::TextEncoding* queryEncoding) { |
| - if (!relative.isNull() && relative.is8Bit()) { |
| - StringUTF8Adaptor relativeUTF8(relative); |
| - init(base, relativeUTF8.data(), relativeUTF8.length(), queryEncoding); |
| - } else |
| - init(base, relative.characters16(), relative.length(), queryEncoding); |
| - initProtocolIsInHTTPFamily(); |
| - initInnerURL(); |
| -} |
| - |
| -template <typename CHAR> |
| -void KURL::init(const KURL& base, |
| - const CHAR* relative, |
| - int relativeLength, |
| - const WTF::TextEncoding* queryEncoding) { |
| // As a performance optimization, we do not use the charset converter |
| // if encoding is UTF-8 or other Unicode encodings. Note that this is |
| // per HTML5 2.5.3 (resolving URL). The URL canonicalizer will be more |
| // efficient with no charset converter object because it can do UTF-8 |
| // internally with no extra copies. |
| + StringUTF8Adaptor baseUTF8(base.getString()); |
| + |
| // We feel free to make the charset converter object every time since it's |
| // just a wrapper around a reference. |
| KURLCharsetConverter charsetConverterObject(queryEncoding); |
| @@ -745,16 +735,46 @@ void KURL::init(const KURL& base, |
| ? 0 |
| : &charsetConverterObject; |
| - StringUTF8Adaptor baseUTF8(base.getString()); |
| - |
| + // Clamp to int max to avoid overflow. |
| url::RawCanonOutputT<char> output; |
| - m_isValid = url::ResolveRelative(baseUTF8.data(), baseUTF8.length(), |
| - base.m_parsed, relative, relativeLength, |
| - charsetConverter, &output, &m_parsed); |
| + if (!relative.isNull() && relative.is8Bit()) { |
| + StringUTF8Adaptor relativeUTF8(relative); |
| + CheckedNumeric<int> relativeLength( |
|
esprehn
2016/11/04 22:55:59
I think you want to use clampTo() in here instead
Charlie Harrison
2016/11/05 02:23:13
Aha, thanks. Done.
|
| + CheckedNumeric<size_t>(relativeUTF8.length())); |
| + m_isValid = url::ResolveRelative( |
| + baseUTF8.data(), baseUTF8.length(), base.m_parsed, relativeUTF8.data(), |
| + relativeLength.ValueOrDefault(std::numeric_limits<int>::max()), |
| + charsetConverter, &output, &m_parsed); |
| + } else { |
| + CheckedNumeric<int> relativeLength( |
|
esprehn
2016/11/04 22:55:59
ditto
Charlie Harrison
2016/11/05 02:23:13
Done.
|
| + CheckedNumeric<unsigned>(relative.length())); |
| + m_isValid = url::ResolveRelative( |
| + baseUTF8.data(), baseUTF8.length(), base.m_parsed, |
| + relative.characters16(), |
| + relativeLength.ValueOrDefault(std::numeric_limits<int>::max()), |
| + charsetConverter, &output, &m_parsed); |
| + } |
| - // See FIXME in KURLPrivate in the header. If canonicalization has not |
| - // changed the string, we can avoid an extra allocation by using assignment. |
| - m_string = AtomicString::fromUTF8(output.data(), output.length()); |
| + // AtomicString::fromUTF8 will re-hash the raw output and check the |
| + // AtomicStringTable (addWithTranslator) for the string. This can be very |
| + // expensive for large URLs. However, since many URLs are generated from |
| + // existing AtomicStrings (which already have their hashes computed), this |
| + // fast path is used if the input string is already canonicalized. |
| + // |
| + // Because this optimization does not apply to non-AtomicStrings, explicitly |
| + // check that the input is Atomic before moving forward with it. This also |
| + // gets around the problem that marking non-Atomic input as Atomic will render |
| + // the string thread unsafe. |
|
esprehn
2016/11/04 22:55:59
fwiw I don't think the thread safety is an issue,
Charlie Harrison
2016/11/05 02:23:13
The KURL is not thread safe, but the String passed
|
| + if (!relative.isNull() && relative.impl()->isAtomic() && |
| + StringView(output.data(), static_cast<unsigned>(output.length())) == |
| + relative) { |
| + m_string = relative; |
| + } else { |
| + m_string = AtomicString::fromUTF8(output.data(), output.length()); |
| + } |
| + |
| + initProtocolIsInHTTPFamily(); |
| + initInnerURL(); |
| } |
| void KURL::initInnerURL() { |