Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(161)

Side by Side Diff: url/url_canon_etc.cc

Issue 2895953002: Update dangling markup mitigations. (Closed)
Patch Set: Test. Created 3 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2013 The Chromium Authors. All rights reserved. 1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 // Canonicalizers for random bits that aren't big enough for their own files. 5 // Canonicalizers for random bits that aren't big enough for their own files.
6 6
7 #include <string.h> 7 #include <string.h>
8 8
9 #include "url/url_canon.h" 9 #include "url/url_canon.h"
10 #include "url/url_canon_internal.h" 10 #include "url/url_canon_internal.h"
11 11
12 namespace url { 12 namespace url {
13 13
14 namespace { 14 namespace {
15 15
16 // Returns true if the given character should be removed from the middle of a 16 // Returns true if the given character should be removed from the middle of a
17 // URL. 17 // URL.
18 inline bool IsRemovableURLWhitespace(int ch) { 18 inline bool IsRemovableURLWhitespace(int ch) {
19 return ch == '\r' || ch == '\n' || ch == '\t'; 19 return ch == '\r' || ch == '\n' || ch == '\t';
20 } 20 }
21 21
22 // Backend for RemoveURLWhitespace (see declaration in url_canon.h). 22 // Backend for RemoveURLWhitespace (see declaration in url_canon.h).
23 // It sucks that we have to do this, since this takes about 13% of the total URL 23 // It sucks that we have to do this, since this takes about 13% of the total URL
24 // canonicalization time. 24 // canonicalization time.
25 template<typename CHAR> 25 template <typename CHAR>
26 const CHAR* DoRemoveURLWhitespace(const CHAR* input, int input_len, 26 const CHAR* DoRemoveURLWhitespace(const CHAR* input,
27 int input_len,
27 CanonOutputT<CHAR>* buffer, 28 CanonOutputT<CHAR>* buffer,
28 int* output_len) { 29 int* output_len,
30 bool* potentially_dangling_markup) {
29 // Fast verification that there's nothing that needs removal. This is the 99% 31 // Fast verification that there's nothing that needs removal. This is the 99%
30 // case, so we want it to be fast and don't care about impacting the speed 32 // case, so we want it to be fast and don't care about impacting the speed
31 // when we do find whitespace. 33 // when we do find whitespace.
32 int found_whitespace = false; 34 int found_whitespace = false;
33 for (int i = 0; i < input_len; i++) { 35 for (int i = 0; i < input_len; i++) {
34 if (!IsRemovableURLWhitespace(input[i])) 36 if (!IsRemovableURLWhitespace(input[i]))
35 continue; 37 continue;
36 found_whitespace = true; 38 found_whitespace = true;
37 break; 39 break;
38 } 40 }
39 41
40 if (!found_whitespace) { 42 if (!found_whitespace) {
41 // Didn't find any whitespace, we don't need to do anything. We can just 43 // Didn't find any whitespace, we don't need to do anything. We can just
42 // return the input as the output. 44 // return the input as the output.
43 *output_len = input_len; 45 *output_len = input_len;
44 return input; 46 return input;
45 } 47 }
46 48
47 // Remove the whitespace into the new buffer and return it. 49 // Remove the whitespace into the new buffer and return it.
48 for (int i = 0; i < input_len; i++) { 50 for (int i = 0; i < input_len; i++) {
49 if (!IsRemovableURLWhitespace(input[i])) 51 if (!IsRemovableURLWhitespace(input[i])) {
52 if (potentially_dangling_markup && input[i] == 0x3C)
53 *potentially_dangling_markup = true;
50 buffer->push_back(input[i]); 54 buffer->push_back(input[i]);
55 }
51 } 56 }
52 *output_len = buffer->length(); 57 *output_len = buffer->length();
53 return buffer->data(); 58 return buffer->data();
54 } 59 }
55 60
56 // Contains the canonical version of each possible input letter in the scheme 61 // Contains the canonical version of each possible input letter in the scheme
57 // (basically, lower-cased). The corresponding entry will be 0 if the letter 62 // (basically, lower-cased). The corresponding entry will be 0 if the letter
58 // is not allowed in a scheme. 63 // is not allowed in a scheme.
59 const char kSchemeCanonical[0x80] = { 64 const char kSchemeCanonical[0x80] = {
60 // 00-1f: all are invalid 65 // 00-1f: all are invalid
(...skipping 206 matching lines...) Expand 10 before | Expand all | Expand 10 after
267 ReadUTFChar(spec, &i, end, &code_point); 272 ReadUTFChar(spec, &i, end, &code_point);
268 AppendUTF8Value(code_point, output); 273 AppendUTF8Value(code_point, output);
269 } 274 }
270 } 275 }
271 276
272 out_ref->len = output->length() - out_ref->begin; 277 out_ref->len = output->length() - out_ref->begin;
273 } 278 }
274 279
275 } // namespace 280 } // namespace
276 281
277 const char* RemoveURLWhitespace(const char* input, int input_len, 282 const char* RemoveURLWhitespace(const char* input,
283 int input_len,
278 CanonOutputT<char>* buffer, 284 CanonOutputT<char>* buffer,
279 int* output_len) { 285 int* output_len,
280 return DoRemoveURLWhitespace(input, input_len, buffer, output_len); 286 bool* potentially_dangling_markup) {
287 return DoRemoveURLWhitespace(input, input_len, buffer, output_len,
288 potentially_dangling_markup);
281 } 289 }
282 290
283 const base::char16* RemoveURLWhitespace(const base::char16* input, 291 const base::char16* RemoveURLWhitespace(const base::char16* input,
284 int input_len, 292 int input_len,
285 CanonOutputT<base::char16>* buffer, 293 CanonOutputT<base::char16>* buffer,
286 int* output_len) { 294 int* output_len,
287 return DoRemoveURLWhitespace(input, input_len, buffer, output_len); 295 bool* potentially_dangling_markup) {
296 return DoRemoveURLWhitespace(input, input_len, buffer, output_len,
297 potentially_dangling_markup);
288 } 298 }
289 299
290 char CanonicalSchemeChar(base::char16 ch) { 300 char CanonicalSchemeChar(base::char16 ch) {
291 if (ch >= 0x80) 301 if (ch >= 0x80)
292 return 0; // Non-ASCII is not supported by schemes. 302 return 0; // Non-ASCII is not supported by schemes.
293 return kSchemeCanonical[ch]; 303 return kSchemeCanonical[ch];
294 } 304 }
295 305
296 bool CanonicalizeScheme(const char* spec, 306 bool CanonicalizeScheme(const char* spec,
297 const Component& scheme, 307 const Component& scheme,
(...skipping 60 matching lines...) Expand 10 before | Expand all | Expand 10 after
358 } 368 }
359 369
360 void CanonicalizeRef(const base::char16* spec, 370 void CanonicalizeRef(const base::char16* spec,
361 const Component& ref, 371 const Component& ref,
362 CanonOutput* output, 372 CanonOutput* output,
363 Component* out_ref) { 373 Component* out_ref) {
364 DoCanonicalizeRef<base::char16, base::char16>(spec, ref, output, out_ref); 374 DoCanonicalizeRef<base::char16, base::char16>(spec, ref, output, out_ref);
365 } 375 }
366 376
367 } // namespace url 377 } // namespace url
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698