Index: src/url_util_unittest.cc |
=================================================================== |
--- src/url_util_unittest.cc (revision 137) |
+++ src/url_util_unittest.cc (working copy) |
@@ -30,6 +30,7 @@ |
#include "googleurl/src/url_canon.h" |
#include "googleurl/src/url_canon_stdstring.h" |
#include "googleurl/src/url_parse.h" |
+#include "googleurl/src/url_test_utils.h" |
#include "googleurl/src/url_util.h" |
#include "testing/gtest/include/gtest/gtest.h" |
@@ -65,15 +66,16 @@ |
EXPECT_FALSE(url_util::FindAndCompareScheme("", 0, "", &found_scheme)); |
EXPECT_TRUE(found_scheme == url_parse::Component()); |
- // When there is a whitespace char in scheme, it should canonicalize the url before |
- // comparison. |
+ // When there is a whitespace char in scheme, it should canonicalize the url |
+ // before comparison. |
const char whtspc_str[] = " \r\n\tjav\ra\nscri\tpt:alert(1)"; |
EXPECT_TRUE(url_util::FindAndCompareScheme( |
whtspc_str, static_cast<int>(strlen(whtspc_str)), "javascript", |
&found_scheme)); |
EXPECT_TRUE(found_scheme == url_parse::Component(1, 10)); |
- |
- // Control characters should be stripped out on the ends, and kept in the middle. |
+ |
+ // Control characters should be stripped out on the ends, and kept in the |
+ // middle. |
const char ctrl_str[] = "\02jav\02scr\03ipt:alert(1)"; |
EXPECT_FALSE(url_util::FindAndCompareScheme( |
ctrl_str, static_cast<int>(strlen(ctrl_str)), "javascript", |
@@ -160,3 +162,57 @@ |
CheckReplaceScheme("http://google.com/", "about")); |
} |
+TEST(URLUtilTest, DecodeURLEscapeSequences) { |
+ struct DecodeCase { |
+ const char* input; |
+ const char* output; |
+ } decode_cases[] = { |
+ {"hello, world", "hello, world"}, |
+ {"%01%02%03%04%05%06%07%08%09%0a%0B%0C%0D%0e%0f/", |
+ "\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0B\x0C\x0D\x0e\x0f/"}, |
+ {"%10%11%12%13%14%15%16%17%18%19%1a%1B%1C%1D%1e%1f/", |
+ "\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1B\x1C\x1D\x1e\x1f/"}, |
+ {"%20%21%22%23%24%25%26%27%28%29%2a%2B%2C%2D%2e%2f/", |
+ " !\"#$%&'()*+,-.//"}, |
+ {"%30%31%32%33%34%35%36%37%38%39%3a%3B%3C%3D%3e%3f/", |
+ "0123456789:;<=>?/"}, |
+ {"%40%41%42%43%44%45%46%47%48%49%4a%4B%4C%4D%4e%4f/", |
+ "@ABCDEFGHIJKLMNO/"}, |
+ {"%50%51%52%53%54%55%56%57%58%59%5a%5B%5C%5D%5e%5f/", |
+ "PQRSTUVWXYZ[\\]^_/"}, |
+ {"%60%61%62%63%64%65%66%67%68%69%6a%6B%6C%6D%6e%6f/", |
+ "`abcdefghijklmno/"}, |
+ {"%70%71%72%73%74%75%76%77%78%79%7a%7B%7C%7D%7e%7f/", |
+ "pqrstuvwxyz{|}~\x7f/"}, |
+ // Test un-UTF-8-ization. |
+ {"%e4%bd%a0%e5%a5%bd", "\xe4\xbd\xa0\xe5\xa5\xbd"}, |
+ }; |
+ |
+ for (size_t i = 0; i < ARRAYSIZE_UNSAFE(decode_cases); i++) { |
+ const char* input = decode_cases[i].input; |
+ url_canon::RawCanonOutputT<char16> output; |
+ url_util::DecodeURLEscapeSequences(input, strlen(input), &output); |
+ EXPECT_EQ(decode_cases[i].output, |
+ url_test_utils::ConvertUTF16ToUTF8( |
+ string16(output.data(), output.length()))); |
+ } |
+ |
+ // Our decode should decode %00 |
+ const char zero_input[] = "%00"; |
+ url_canon::RawCanonOutputT<char16> zero_output; |
+ url_util::DecodeURLEscapeSequences(zero_input, strlen(zero_input), |
+ &zero_output); |
+ EXPECT_NE("%00", |
+ url_test_utils::ConvertUTF16ToUTF8( |
+ string16(zero_output.data(), zero_output.length()))); |
+ |
+ // Test the error behavior for invalid UTF-8. |
+ const char invalid_input[] = "%e4%a0%e5%a5%bd"; |
+ const char16 invalid_expected[4] = {0x00e4, 0x00a0, 0x597d, 0}; |
+ url_canon::RawCanonOutputT<char16> invalid_output; |
+ url_util::DecodeURLEscapeSequences(invalid_input, strlen(invalid_input), |
+ &invalid_output); |
+ EXPECT_EQ(string16(invalid_expected), |
+ string16(invalid_output.data(), invalid_output.length())); |
+} |
+ |