url/url_canon_host.cc - Issue 2397873002: Reject some previuosly-escaped chars in hostnames.

Unified Diff: url/url_canon_host.cc

Issue 2397873002: Reject some previuosly-escaped chars in hostnames.

Patch Set: Some tests fixed Created 4 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Index: url/url_canon_host.cc

diff --git a/url/url_canon_host.cc b/url/url_canon_host.cc

index 76a22369b87648ce3651a94e4ca6aaa460224ac6..433ae1408197e68e90ea6b3b67f464656a248ff4 100644

--- a/url/url_canon_host.cc

+++ b/url/url_canon_host.cc

@@ -10,7 +10,7 @@ namespace url {

namespace {

-// For reference, here's what IE supports:

+// For reference, here's what IE6 supported:

// Key: 0 (disallowed: failure if present in the input)

// + (allowed either escaped or unescaped, and unmodified)

// U (allowed escaped or unescaped but always unescaped if present in

@@ -38,34 +38,23 @@ namespace {

// Surprisingly, space is accepted in the input and always escaped.

// This table lists the canonical version of all characters we allow in the

-// input, with 0 indicating it is disallowed. We use the magic kEscapedHostChar

-// value to indicate that this character should be escaped. We are a little more

-// restrictive than IE, but less restrictive than Firefox.

-//

-// Note that we disallow the % character. We will allow it when part of an

-// escape sequence, of course, but this disallows "%25". Even though IE allows

-// it, allowing it would put us in a funny state. If there was an invalid

-// escape sequence like "%zz", we'll add "%25zz" to the output and fail.

-// Allowing percents means we'll succeed a second time, so validity would change

-// based on how many times you run the canonicalizer. We prefer to always report

-// the same vailidity, so reject this.

-const unsigned char kEsc = 0xff;

+// input, with 0 indicating it is disallowed.

const unsigned char kHostCharLookup[0x80] = {

// 00-1f: all are invalid

0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

// ' ' ! " # $ % & ' ( ) * + , - . /

- kEsc,kEsc,kEsc,kEsc,kEsc, 0, kEsc,kEsc,kEsc,kEsc,kEsc, '+',kEsc, '-', '.', 0,

+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, '-', '.', 0,

// 0 1 2 3 4 5 6 7 8 9 : ; < = > ?

- '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', 0 ,kEsc,kEsc,kEsc, 0 ,

+ '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', 0, 0, 0, 0, 0 ,

// @ A B C D E F G H I J K L M N O

- kEsc, 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',

+ 0, 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',

// P Q R S T U V W X Y Z [ \ ] ^ _

'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '[', 0 , ']', 0 , '_',

// ` a b c d e f g h i j k l m n o

- kEsc, 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',

+ 0, 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',

// p q r s t u v w x y z { | } ~

- 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',kEsc,kEsc,kEsc, 0 , 0 };

+ 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 0, 0, 0, 0, 0 };

const int kTempHostBufferLen = 1024;

typedef RawCanonOutputT<char, kTempHostBufferLen> StackBuffer;

@@ -142,9 +131,6 @@ bool DoSimpleHost(const INCHAR* host,

// Invalid character, add it as percent-escaped and mark as failed.

AppendEscapedChar(source, output);

success = false;

- } else if (replacement == kEsc) {

- // This character is valid but should be escaped.

- AppendEscapedChar(source, output);

} else {

// Common case, the given character is valid in a hostname, the lookup

// table tells us the canonical representation of that character (lower

« no previous file with comments | « net/proxy/proxy_config_service_linux_unittest.cc ('k') | url/url_canon_unittest.cc » ('j') | no next file with comments »