| Index: url/url_canon_host.cc
|
| diff --git a/url/url_canon_host.cc b/url/url_canon_host.cc
|
| index 76a22369b87648ce3651a94e4ca6aaa460224ac6..433ae1408197e68e90ea6b3b67f464656a248ff4 100644
|
| --- a/url/url_canon_host.cc
|
| +++ b/url/url_canon_host.cc
|
| @@ -10,7 +10,7 @@ namespace url {
|
|
|
| namespace {
|
|
|
| -// For reference, here's what IE supports:
|
| +// For reference, here's what IE6 supported:
|
| // Key: 0 (disallowed: failure if present in the input)
|
| // + (allowed either escaped or unescaped, and unmodified)
|
| // U (allowed escaped or unescaped but always unescaped if present in
|
| @@ -38,34 +38,23 @@ namespace {
|
| // Surprisingly, space is accepted in the input and always escaped.
|
|
|
| // This table lists the canonical version of all characters we allow in the
|
| -// input, with 0 indicating it is disallowed. We use the magic kEscapedHostChar
|
| -// value to indicate that this character should be escaped. We are a little more
|
| -// restrictive than IE, but less restrictive than Firefox.
|
| -//
|
| -// Note that we disallow the % character. We will allow it when part of an
|
| -// escape sequence, of course, but this disallows "%25". Even though IE allows
|
| -// it, allowing it would put us in a funny state. If there was an invalid
|
| -// escape sequence like "%zz", we'll add "%25zz" to the output and fail.
|
| -// Allowing percents means we'll succeed a second time, so validity would change
|
| -// based on how many times you run the canonicalizer. We prefer to always report
|
| -// the same vailidity, so reject this.
|
| -const unsigned char kEsc = 0xff;
|
| +// input, with 0 indicating it is disallowed.
|
| const unsigned char kHostCharLookup[0x80] = {
|
| // 00-1f: all are invalid
|
| 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
| 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
| // ' ' ! " # $ % & ' ( ) * + , - . /
|
| - kEsc,kEsc,kEsc,kEsc,kEsc, 0, kEsc,kEsc,kEsc,kEsc,kEsc, '+',kEsc, '-', '.', 0,
|
| + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, '-', '.', 0,
|
| // 0 1 2 3 4 5 6 7 8 9 : ; < = > ?
|
| - '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', 0 ,kEsc,kEsc,kEsc, 0 ,
|
| + '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', 0, 0, 0, 0, 0 ,
|
| // @ A B C D E F G H I J K L M N O
|
| - kEsc, 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
|
| + 0, 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
|
| // P Q R S T U V W X Y Z [ \ ] ^ _
|
| 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '[', 0 , ']', 0 , '_',
|
| // ` a b c d e f g h i j k l m n o
|
| - kEsc, 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
|
| + 0, 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
|
| // p q r s t u v w x y z { | } ~
|
| - 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',kEsc,kEsc,kEsc, 0 , 0 };
|
| + 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 0, 0, 0, 0, 0 };
|
|
|
| const int kTempHostBufferLen = 1024;
|
| typedef RawCanonOutputT<char, kTempHostBufferLen> StackBuffer;
|
| @@ -142,9 +131,6 @@ bool DoSimpleHost(const INCHAR* host,
|
| // Invalid character, add it as percent-escaped and mark as failed.
|
| AppendEscapedChar(source, output);
|
| success = false;
|
| - } else if (replacement == kEsc) {
|
| - // This character is valid but should be escaped.
|
| - AppendEscapedChar(source, output);
|
| } else {
|
| // Common case, the given character is valid in a hostname, the lookup
|
| // table tells us the canonical representation of that character (lower
|
|
|