url/url_canon_etc.cc - Issue 2895953002: Update dangling markup mitigations.

Side by Side Diff: url/url_canon_etc.cc

Issue 2895953002: Update dangling markup mitigations. (Closed)

Patch Set: Test. Created 3 years, 7 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
1 // Copyright 2013 The Chromium Authors. All rights reserved.	1 // Copyright 2013 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 // Canonicalizers for random bits that aren't big enough for their own files.	5 // Canonicalizers for random bits that aren't big enough for their own files.

6	6

7 #include <string.h>	7 #include <string.h>

8	8

9 #include "url/url_canon.h"	9 #include "url/url_canon.h"

10 #include "url/url_canon_internal.h"	10 #include "url/url_canon_internal.h"

11	11

12 namespace url {	12 namespace url {

13	13

14 namespace {	14 namespace {

15	15

16 // Returns true if the given character should be removed from the middle of a	16 // Returns true if the given character should be removed from the middle of a

17 // URL.	17 // URL.

18 inline bool IsRemovableURLWhitespace(int ch) {	18 inline bool IsRemovableURLWhitespace(int ch) {

19 return ch == '\r' \|\| ch == '\n' \|\| ch == '\t';	19 return ch == '\r' \|\| ch == '\n' \|\| ch == '\t';

20 }	20 }

21	21

22 // Backend for RemoveURLWhitespace (see declaration in url_canon.h).	22 // Backend for RemoveURLWhitespace (see declaration in url_canon.h).

23 // It sucks that we have to do this, since this takes about 13% of the total URL	23 // It sucks that we have to do this, since this takes about 13% of the total URL

24 // canonicalization time.	24 // canonicalization time.

25 template<typename CHAR>	25 template <typename CHAR>

26 const CHAR* DoRemoveURLWhitespace(const CHAR* input, int input_len,	26 const CHAR* DoRemoveURLWhitespace(const CHAR* input,

	27 int input_len,

27 CanonOutputT<CHAR>* buffer,	28 CanonOutputT<CHAR>* buffer,

28 int* output_len) {	29 int* output_len,

	30 bool* potentially_dangling_markup) {

29 // Fast verification that there's nothing that needs removal. This is the 99%	31 // Fast verification that there's nothing that needs removal. This is the 99%

30 // case, so we want it to be fast and don't care about impacting the speed	32 // case, so we want it to be fast and don't care about impacting the speed

31 // when we do find whitespace.	33 // when we do find whitespace.

32 int found_whitespace = false;	34 int found_whitespace = false;

33 for (int i = 0; i < input_len; i++) {	35 for (int i = 0; i < input_len; i++) {

34 if (!IsRemovableURLWhitespace(input[i]))	36 if (!IsRemovableURLWhitespace(input[i]))

35 continue;	37 continue;

36 found_whitespace = true;	38 found_whitespace = true;

37 break;	39 break;

38 }	40 }

39	41

40 if (!found_whitespace) {	42 if (!found_whitespace) {

41 // Didn't find any whitespace, we don't need to do anything. We can just	43 // Didn't find any whitespace, we don't need to do anything. We can just

42 // return the input as the output.	44 // return the input as the output.

43 *output_len = input_len;	45 *output_len = input_len;

44 return input;	46 return input;

45 }	47 }

46	48

47 // Remove the whitespace into the new buffer and return it.	49 // Remove the whitespace into the new buffer and return it.

48 for (int i = 0; i < input_len; i++) {	50 for (int i = 0; i < input_len; i++) {

49 if (!IsRemovableURLWhitespace(input[i]))	51 if (!IsRemovableURLWhitespace(input[i])) {

	52 if (potentially_dangling_markup && input[i] == 0x3C)

	53 *potentially_dangling_markup = true;

50 buffer->push_back(input[i]);	54 buffer->push_back(input[i]);

	55 }

51 }	56 }

52 *output_len = buffer->length();	57 *output_len = buffer->length();

53 return buffer->data();	58 return buffer->data();

54 }	59 }

55	60

56 // Contains the canonical version of each possible input letter in the scheme	61 // Contains the canonical version of each possible input letter in the scheme

57 // (basically, lower-cased). The corresponding entry will be 0 if the letter	62 // (basically, lower-cased). The corresponding entry will be 0 if the letter

58 // is not allowed in a scheme.	63 // is not allowed in a scheme.

59 const char kSchemeCanonical[0x80] = {	64 const char kSchemeCanonical[0x80] = {

60 // 00-1f: all are invalid	65 // 00-1f: all are invalid

(...skipping 206 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
267 ReadUTFChar(spec, &i, end, &code_point);	272 ReadUTFChar(spec, &i, end, &code_point);

268 AppendUTF8Value(code_point, output);	273 AppendUTF8Value(code_point, output);

269 }	274 }

270 }	275 }

271	276

272 out_ref->len = output->length() - out_ref->begin;	277 out_ref->len = output->length() - out_ref->begin;

273 }	278 }

274	279

275 } // namespace	280 } // namespace

276	281

277 const char* RemoveURLWhitespace(const char* input, int input_len,	282 const char* RemoveURLWhitespace(const char* input,

	283 int input_len,

278 CanonOutputT<char>* buffer,	284 CanonOutputT<char>* buffer,

279 int* output_len) {	285 int* output_len,

280 return DoRemoveURLWhitespace(input, input_len, buffer, output_len);	286 bool* potentially_dangling_markup) {

	287 return DoRemoveURLWhitespace(input, input_len, buffer, output_len,

	288 potentially_dangling_markup);

281 }	289 }

282	290

283 const base::char16* RemoveURLWhitespace(const base::char16* input,	291 const base::char16* RemoveURLWhitespace(const base::char16* input,

284 int input_len,	292 int input_len,

285 CanonOutputT<base::char16>* buffer,	293 CanonOutputT<base::char16>* buffer,

286 int* output_len) {	294 int* output_len,

287 return DoRemoveURLWhitespace(input, input_len, buffer, output_len);	295 bool* potentially_dangling_markup) {

	296 return DoRemoveURLWhitespace(input, input_len, buffer, output_len,

	297 potentially_dangling_markup);

288 }	298 }

289	299

290 char CanonicalSchemeChar(base::char16 ch) {	300 char CanonicalSchemeChar(base::char16 ch) {

291 if (ch >= 0x80)	301 if (ch >= 0x80)

292 return 0; // Non-ASCII is not supported by schemes.	302 return 0; // Non-ASCII is not supported by schemes.

293 return kSchemeCanonical[ch];	303 return kSchemeCanonical[ch];

294 }	304 }

295	305

296 bool CanonicalizeScheme(const char* spec,	306 bool CanonicalizeScheme(const char* spec,

297 const Component& scheme,	307 const Component& scheme,

(...skipping 60 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
358 }	368 }

359	369

360 void CanonicalizeRef(const base::char16* spec,	370 void CanonicalizeRef(const base::char16* spec,

361 const Component& ref,	371 const Component& ref,

362 CanonOutput* output,	372 CanonOutput* output,

363 Component* out_ref) {	373 Component* out_ref) {

364 DoCanonicalizeRef<base::char16, base::char16>(spec, ref, output, out_ref);	374 DoCanonicalizeRef<base::char16, base::char16>(spec, ref, output, out_ref);

365 }	375 }

366	376

367 } // namespace url	377 } // namespace url

OLD	NEW

« third_party/WebKit/LayoutTests/external/wpt/fetch/dangling-markup-mitigation.tentative.html ('K') | « url/url_canon.h ('k') | url/url_canon_relative.cc » ('j') | no next file with comments »