Chromium Code Reviews| Index: net/base/net_util.cc |
| =================================================================== |
| --- net/base/net_util.cc (revision 221015) |
| +++ net/base/net_util.cc (working copy) |
| @@ -83,6 +83,8 @@ |
| namespace { |
| +typedef std::vector<size_t> Offsets; |
| + |
| // what we prepend to get a file URL |
| static const base::FilePath::CharType kFileURLPrefix[] = |
| FILE_PATH_LITERAL("file:///"); |
| @@ -445,8 +447,7 @@ |
| } |
| // Clamps the offsets in |offsets_for_adjustment| to the length of |str|. |
| -void LimitOffsets(const base::string16& str, |
| - std::vector<size_t>* offsets_for_adjustment) { |
| +void LimitOffsets(const base::string16& str, Offsets* offsets_for_adjustment) { |
| if (offsets_for_adjustment) { |
| std::for_each(offsets_for_adjustment->begin(), |
| offsets_for_adjustment->end(), |
| @@ -461,10 +462,9 @@ |
| // |
| // We may want to skip this step in the case of file URLs to allow unicode |
| // UNC hostnames regardless of encodings. |
| -base::string16 IDNToUnicodeWithOffsets( |
| - const std::string& host, |
| - const std::string& languages, |
| - std::vector<size_t>* offsets_for_adjustment) { |
| +base::string16 IDNToUnicodeWithOffsets(const std::string& host, |
| + const std::string& languages, |
| + Offsets* offsets_for_adjustment) { |
| // Convert the ASCII input to a base::string16 for ICU. |
| base::string16 input16; |
| input16.reserve(host.length()); |
| @@ -508,56 +508,6 @@ |
| return out16; |
| } |
| -// Transforms |original_offsets| by subtracting |component_begin| from all |
| -// offsets. Any offset which was not at least this large to begin with is set |
| -// to std::string::npos. |
| -std::vector<size_t> OffsetsIntoComponent( |
|
msw
2013/09/06 00:31:08
Leave this helper intact, instead of writing two n
Peter Kasting
2013/09/06 19:04:36
Restored these two functions. I originally gutted
|
| - const std::vector<size_t>& original_offsets, |
| - size_t component_begin) { |
| - DCHECK_NE(std::string::npos, component_begin); |
| - std::vector<size_t> offsets_into_component(original_offsets); |
| - for (std::vector<size_t>::iterator i(offsets_into_component.begin()); |
| - i != offsets_into_component.end(); ++i) { |
| - if (*i != std::string::npos) |
| - *i = (*i < component_begin) ? std::string::npos : (*i - component_begin); |
| - } |
| - return offsets_into_component; |
| -} |
| - |
| -// Called after we transform a component and append it to an output string. |
| -// Maps |transformed_offsets|, which represent offsets into the transformed |
| -// component itself, into appropriate offsets for the output string, by adding |
| -// |output_component_begin| to each. Determines which offsets need mapping by |
| -// checking to see which of the |original_offsets| were within the designated |
| -// original component, using its provided endpoints. |
| -void AdjustForComponentTransform( |
| - const std::vector<size_t>& original_offsets, |
| - size_t original_component_begin, |
| - size_t original_component_end, |
| - const std::vector<size_t>& transformed_offsets, |
| - size_t output_component_begin, |
| - std::vector<size_t>* offsets_for_adjustment) { |
| - if (!offsets_for_adjustment) |
| - return; |
| - |
| - DCHECK_NE(std::string::npos, original_component_begin); |
| - DCHECK_NE(std::string::npos, original_component_end); |
| - DCHECK_NE(base::string16::npos, output_component_begin); |
| - size_t offsets_size = offsets_for_adjustment->size(); |
| - DCHECK_EQ(offsets_size, original_offsets.size()); |
| - DCHECK_EQ(offsets_size, transformed_offsets.size()); |
| - for (size_t i = 0; i < offsets_size; ++i) { |
| - size_t original_offset = original_offsets[i]; |
| - if ((original_offset >= original_component_begin) && |
| - (original_offset < original_component_end)) { |
| - size_t transformed_offset = transformed_offsets[i]; |
| - (*offsets_for_adjustment)[i] = |
| - (transformed_offset == base::string16::npos) ? |
| - base::string16::npos : (output_component_begin + transformed_offset); |
| - } |
| - } |
| -} |
| - |
| // If |component| is valid, its begin is incremented by |delta|. |
| void AdjustComponent(int delta, url_parse::Component* component) { |
| if (!component->is_valid()) |
| @@ -579,25 +529,30 @@ |
| } |
| // Helper for FormatUrlWithOffsets(). |
| -base::string16 FormatViewSourceUrl( |
| - const GURL& url, |
| - const std::vector<size_t>& original_offsets, |
| - const std::string& languages, |
| - FormatUrlTypes format_types, |
| - UnescapeRule::Type unescape_rules, |
| - url_parse::Parsed* new_parsed, |
| - size_t* prefix_end, |
| - std::vector<size_t>* offsets_for_adjustment) { |
| +base::string16 FormatViewSourceUrl(const GURL& url, |
| + const Offsets& original_offsets, |
| + const std::string& languages, |
| + FormatUrlTypes format_types, |
| + UnescapeRule::Type unescape_rules, |
| + url_parse::Parsed* new_parsed, |
| + size_t* prefix_end, |
| + Offsets* offsets_for_adjustment) { |
| DCHECK(new_parsed); |
| const char kViewSource[] = "view-source:"; |
| const size_t kViewSourceLength = arraysize(kViewSource) - 1; |
| - std::vector<size_t> offsets_into_url( |
| - OffsetsIntoComponent(original_offsets, kViewSourceLength)); |
| - GURL real_url(url.possibly_invalid_spec().substr(kViewSourceLength)); |
| + GURL underlying_url(url.possibly_invalid_spec().substr(kViewSourceLength)); |
| + Offsets offsets_into_underlying_url(original_offsets); |
| + for (Offsets::iterator i(offsets_into_underlying_url.begin()); |
|
msw
2013/09/06 00:31:08
nit: use consistent iter/index looping if you're c
Peter Kasting
2013/09/06 19:04:36
This is no longer so noticeably inconsistent, sinc
|
| + i != offsets_into_underlying_url.end(); ++i) { |
| + *i = ((*i == std::string::npos) || (*i < kViewSourceLength)) ? |
|
msw
2013/09/06 00:31:08
Why should offsets into trimmed leading text go to
Peter Kasting
2013/09/06 19:04:36
It doesn't actually matter what we set them to, si
|
| + std::string::npos : (*i - kViewSourceLength); |
| + } |
| + |
| base::string16 result(ASCIIToUTF16(kViewSource) + |
| - FormatUrlWithOffsets(real_url, languages, format_types, unescape_rules, |
| - new_parsed, prefix_end, &offsets_into_url)); |
| + FormatUrlWithOffsets(underlying_url, languages, format_types, |
| + unescape_rules, new_parsed, prefix_end, |
| + &offsets_into_underlying_url)); |
| // Adjust position values. |
| if (new_parsed->scheme.is_nonempty()) { |
| @@ -610,9 +565,13 @@ |
| AdjustComponents(kViewSourceLength, new_parsed); |
| if (prefix_end) |
| *prefix_end += kViewSourceLength; |
| - AdjustForComponentTransform(original_offsets, kViewSourceLength, |
| - url.possibly_invalid_spec().length(), offsets_into_url, kViewSourceLength, |
| - offsets_for_adjustment); |
| + for (size_t i = 0; i < original_offsets.size(); ++i) { |
|
msw
2013/09/06 00:31:08
Why does this loop use |original_offsets| at all?
Peter Kasting
2013/09/06 19:04:36
This is now moot since this once again calls Adjus
|
| + size_t new_offset = offsets_into_underlying_url[i]; |
| + if (original_offsets[i] >= kViewSourceLength) { |
| + (*offsets_for_adjustment)[i] = (new_offset == base::string16::npos) ? |
|
msw
2013/09/06 00:31:08
offsets_for_adjustment may be NULL, check for that
Peter Kasting
2013/09/06 19:04:36
This is now moot since this once again calls Adjus
|
| + base::string16::npos : (new_offset + kViewSourceLength); |
| + } |
| + } |
| LimitOffsets(result, offsets_for_adjustment); |
| return result; |
| } |
| @@ -622,9 +581,8 @@ |
| AppendComponentTransform() {} |
| virtual ~AppendComponentTransform() {} |
| - virtual base::string16 Execute( |
| - const std::string& component_text, |
| - std::vector<size_t>* offsets_into_component) const = 0; |
| + virtual base::string16 Execute(const std::string& component_text, |
| + Offsets* offsets_into_component) const = 0; |
| // NOTE: No DISALLOW_COPY_AND_ASSIGN here, since gcc < 4.3.0 requires an |
| // accessible copy constructor in order to call AppendFormattedComponent() |
| @@ -640,7 +598,7 @@ |
| private: |
| virtual base::string16 Execute( |
| const std::string& component_text, |
| - std::vector<size_t>* offsets_into_component) const OVERRIDE { |
| + Offsets* offsets_into_component) const OVERRIDE { |
| return IDNToUnicodeWithOffsets(component_text, languages_, |
| offsets_into_component); |
| } |
| @@ -657,7 +615,7 @@ |
| private: |
| virtual base::string16 Execute( |
| const std::string& component_text, |
| - std::vector<size_t>* offsets_into_component) const OVERRIDE { |
| + Offsets* offsets_into_component) const OVERRIDE { |
| return (unescape_rules_ == UnescapeRule::NONE) ? |
| base::UTF8ToUTF16AndAdjustOffsets(component_text, |
| offsets_into_component) : |
| @@ -668,34 +626,75 @@ |
| const UnescapeRule::Type unescape_rules_; |
| }; |
| +// Transforms the portion of |spec| covered by |original_component| according to |
| +// |transform|. Appends the result to |output|. If |output_component| is |
| +// non-NULL, its start and length are set to the transformed component's new |
| +// start and length. For each element in |original_offsets| which is at least |
| +// as large as original_component.begin, the corresponding element of |
| +// |offsets_for_adjustment| is transformed appropriately. |
| void AppendFormattedComponent(const std::string& spec, |
| const url_parse::Component& original_component, |
| - const std::vector<size_t>& original_offsets, |
| + const Offsets& original_offsets, |
| const AppendComponentTransform& transform, |
| base::string16* output, |
| url_parse::Component* output_component, |
| - std::vector<size_t>* offsets_for_adjustment) { |
| + Offsets* offsets_for_adjustment) { |
| DCHECK(output); |
| if (original_component.is_nonempty()) { |
| size_t original_component_begin = |
| static_cast<size_t>(original_component.begin); |
| size_t output_component_begin = output->length(); |
| - if (output_component) |
| - output_component->begin = static_cast<int>(output_component_begin); |
| + std::string component_str(spec, original_component_begin, |
| + static_cast<size_t>(original_component.len)); |
| - std::vector<size_t> offsets_into_component = |
| - OffsetsIntoComponent(original_offsets, original_component_begin); |
| - output->append(transform.Execute(std::string(spec, original_component_begin, |
| - static_cast<size_t>(original_component.len)), &offsets_into_component)); |
| + // Transform |original_offsets| into a vetor of offsets relative to |
|
msw
2013/09/06 00:31:08
nit: 'vector'
Peter Kasting
2013/09/06 19:04:36
This is now moot.
|
| + // |component_str|. |
| + Offsets offsets_into_component(original_offsets); |
| + size_t original_component_end = |
| + static_cast<size_t>(original_component.end()); |
| + for (Offsets::iterator i(offsets_into_component.begin()); |
| + i != offsets_into_component.end(); ++i) { |
| + // If the offset originally pointed into this component, adjust down by |
| + // |original_component_begin|. (Other offsets are ignored, since it |
|
msw
2013/09/06 00:31:08
Is it even worthwhile to ignore other offsets? It
Peter Kasting
2013/09/06 19:04:36
You're right, we can unconditionally subtract sinc
|
| + // doesn't matter what the transform does with them, as we won't be paying |
| + // attention to them below). |
| + if ((*i >= original_component_begin) && (*i < original_component_end)) |
| + *i -= original_component_begin; |
| + } |
| + // Now format |component_str| and adjust the offsets accordingly. |
| + output->append(transform.Execute(component_str, &offsets_into_component)); |
| + |
| + if (offsets_for_adjustment) { |
| + // Transform back to absolute offsets by checking where each element of |
| + // |original_offsets| pointed. |
| + DCHECK_EQ(original_offsets.size(), offsets_for_adjustment->size()); |
| + for (size_t i = 0; i < original_offsets.size(); ++i) { |
| + size_t original_offset = original_offsets[i]; |
| + if ((original_offset >= original_component_begin) && |
| + (original_offset < original_component_end)) { |
| + // This offset originally pointed into the transformed component. |
|
msw
2013/09/06 00:31:08
I find it odd that we are adjusting offsets within
Peter Kasting
2013/09/06 19:04:36
Yes, any time the path needs escaping or unescapin
|
| + // Adjust the transformed relative offset back up by |
| + // |output_component_begin|. |
| + (*offsets_for_adjustment)[i] = |
| + (offsets_into_component[i] == base::string16::npos) ? |
| + base::string16::npos : |
| + (offsets_into_component[i] + output_component_begin); |
| + } else if ((original_offset >= original_component_end) && |
| + (original_offset != std::string::npos)) { |
|
msw
2013/09/06 00:31:08
nit: fix indent
Peter Kasting
2013/09/06 19:04:36
Done.
|
| + // This offset pointed after the transformed component. Adjust by the |
| + // overall difference in the transformed strings to this point. |
| + (*offsets_for_adjustment)[i] = |
| + original_offset - original_component_end + output->length(); |
|
msw
2013/09/06 00:31:08
I suppose either is approach should yield the same
Peter Kasting
2013/09/06 19:04:36
They do yield the same result.
Now that this code
|
| + } |
| + } |
| + } |
| + |
| if (output_component) { |
| + output_component->begin = static_cast<int>(output_component_begin); |
| output_component->len = |
| static_cast<int>(output->length() - output_component_begin); |
| } |
| - AdjustForComponentTransform(original_offsets, original_component_begin, |
| - static_cast<size_t>(original_component.end()), |
| - offsets_into_component, output_component_begin, |
| - offsets_for_adjustment); |
| } else if (output_component) { |
| output_component->reset(); |
| } |
| @@ -1637,7 +1636,7 @@ |
| void AppendFormattedHost(const GURL& url, |
| const std::string& languages, |
| base::string16* output) { |
| - std::vector<size_t> offsets; |
| + Offsets offsets; |
| AppendFormattedComponent(url.possibly_invalid_spec(), |
| url.parsed_for_possibly_invalid_spec().host, offsets, |
| HostComponentTransform(languages), output, NULL, NULL); |
| @@ -1650,13 +1649,13 @@ |
| UnescapeRule::Type unescape_rules, |
| url_parse::Parsed* new_parsed, |
| size_t* prefix_end, |
| - std::vector<size_t>* offsets_for_adjustment) { |
| + Offsets* offsets_for_adjustment) { |
| url_parse::Parsed parsed_temp; |
| if (!new_parsed) |
| new_parsed = &parsed_temp; |
| else |
| *new_parsed = url_parse::Parsed(); |
| - std::vector<size_t> original_offsets; |
| + Offsets original_offsets; |
| if (offsets_for_adjustment) |
| original_offsets = *offsets_for_adjustment; |
| @@ -1668,7 +1667,8 @@ |
| if (url.SchemeIs(kViewSource) && |
| !StartsWithASCII(url.possibly_invalid_spec(), kViewSourceTwice, false)) { |
| return FormatViewSourceUrl(url, original_offsets, languages, format_types, |
| - unescape_rules, new_parsed, prefix_end, offsets_for_adjustment); |
| + unescape_rules, new_parsed, prefix_end, |
| + offsets_for_adjustment); |
| } |
| // We handle both valid and invalid URLs (this will give us the spec |
| @@ -1726,32 +1726,13 @@ |
| AppendFormattedComponent(spec, parsed.username, original_offsets, |
| NonHostComponentTransform(unescape_rules), &url_string, |
| &new_parsed->username, offsets_for_adjustment); |
| - if (parsed.password.is_valid()) { |
| - size_t colon = parsed.username.end(); |
|
msw
2013/09/06 00:31:08
What was this code doing and why is it no longer n
Peter Kasting
2013/09/06 19:04:36
This is somewhat subtle.
Before this change, Appe
|
| - DCHECK_EQ(static_cast<size_t>(parsed.password.begin - 1), colon); |
| - std::vector<size_t>::const_iterator colon_iter = |
| - std::find(original_offsets.begin(), original_offsets.end(), colon); |
| - if (colon_iter != original_offsets.end()) { |
| - (*offsets_for_adjustment)[colon_iter - original_offsets.begin()] = |
| - url_string.length(); |
| - } |
| + if (parsed.password.is_valid()) |
| url_string.push_back(':'); |
| - } |
| AppendFormattedComponent(spec, parsed.password, original_offsets, |
| NonHostComponentTransform(unescape_rules), &url_string, |
| &new_parsed->password, offsets_for_adjustment); |
| - if (parsed.username.is_valid() || parsed.password.is_valid()) { |
| - size_t at_sign = (parsed.password.is_valid() ? |
|
msw
2013/09/06 00:31:08
Ditto: What was this code doing and why is it no l
|
| - parsed.password : parsed.username).end(); |
| - DCHECK_EQ(static_cast<size_t>(parsed.host.begin - 1), at_sign); |
| - std::vector<size_t>::const_iterator at_sign_iter = |
| - std::find(original_offsets.begin(), original_offsets.end(), at_sign); |
| - if (at_sign_iter != original_offsets.end()) { |
| - (*offsets_for_adjustment)[at_sign_iter - original_offsets.begin()] = |
| - url_string.length(); |
| - } |
| + if (parsed.username.is_valid() || parsed.password.is_valid()) |
| url_string.push_back('@'); |
| - } |
| } |
| if (prefix_end) |
| *prefix_end = static_cast<size_t>(url_string.length()); |
| @@ -1779,6 +1760,10 @@ |
| AppendFormattedComponent(spec, parsed.path, original_offsets, |
| NonHostComponentTransform(unescape_rules), &url_string, |
| &new_parsed->path, offsets_for_adjustment); |
| + } else { |
| + base::OffsetAdjuster offset_adjuster(offsets_for_adjustment); |
|
msw
2013/09/06 00:31:08
What is this doing? Adjusting offsets past a remov
Peter Kasting
2013/09/06 19:04:36
Yes.
This only really matters for when we have an
|
| + offset_adjuster.Add(base::OffsetAdjuster::Adjustment( |
| + url_string.length(), parsed.path.len, 0)); |
| } |
| if (parsed.query.is_valid()) |
| url_string.push_back('?'); |
| @@ -1787,27 +1772,12 @@ |
| &new_parsed->query, offsets_for_adjustment); |
| // Ref. This is valid, unescaped UTF-8, so we can just convert. |
| - if (parsed.ref.is_valid()) { |
| + if (parsed.ref.is_valid()) |
| url_string.push_back('#'); |
| - size_t original_ref_begin = static_cast<size_t>(parsed.ref.begin); |
| - size_t output_ref_begin = url_string.length(); |
| - new_parsed->ref.begin = static_cast<int>(output_ref_begin); |
| + AppendFormattedComponent(spec, parsed.ref, original_offsets, |
|
msw
2013/09/06 00:31:08
Nice!
Peter Kasting
2013/09/06 19:04:36
Yeah... old code made me facepalm.
|
| + NonHostComponentTransform(UnescapeRule::NONE), &url_string, |
| + &new_parsed->ref, offsets_for_adjustment); |
| - std::vector<size_t> offsets_into_ref( |
| - OffsetsIntoComponent(original_offsets, original_ref_begin)); |
| - if (parsed.ref.len > 0) { |
| - url_string.append(base::UTF8ToUTF16AndAdjustOffsets( |
| - spec.substr(original_ref_begin, static_cast<size_t>(parsed.ref.len)), |
| - &offsets_into_ref)); |
| - } |
| - |
| - new_parsed->ref.len = |
| - static_cast<int>(url_string.length() - new_parsed->ref.begin); |
| - AdjustForComponentTransform(original_offsets, original_ref_begin, |
| - static_cast<size_t>(parsed.ref.end()), offsets_into_ref, |
| - output_ref_begin, offsets_for_adjustment); |
| - } |
| - |
| // If we need to strip out http do it after the fact. This way we don't need |
| // to worry about how offset_for_adjustment is interpreted. |
| if (omit_http && StartsWith(url_string, ASCIIToUTF16(kHTTP), true)) { |
| @@ -1838,7 +1808,7 @@ |
| url_parse::Parsed* new_parsed, |
| size_t* prefix_end, |
| size_t* offset_for_adjustment) { |
| - std::vector<size_t> offsets; |
| + Offsets offsets; |
| if (offset_for_adjustment) |
| offsets.push_back(*offset_for_adjustment); |
| base::string16 result = FormatUrlWithOffsets(url, languages, format_types, |