OLD | NEW |
1 // Copyright 2008, Google Inc. | 1 // Copyright 2008, Google Inc. |
2 // All rights reserved. | 2 // All rights reserved. |
3 // | 3 // |
4 // Redistribution and use in source and binary forms, with or without | 4 // Redistribution and use in source and binary forms, with or without |
5 // modification, are permitted provided that the following conditions are | 5 // modification, are permitted provided that the following conditions are |
6 // met: | 6 // met: |
7 // | 7 // |
8 // * Redistributions of source code must retain the above copyright | 8 // * Redistributions of source code must retain the above copyright |
9 // notice, this list of conditions and the following disclaimer. | 9 // notice, this list of conditions and the following disclaimer. |
10 // * Redistributions in binary form must reproduce the above | 10 // * Redistributions in binary form must reproduce the above |
(...skipping 12 matching lines...) Expand all Loading... |
23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | 23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | 24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | 25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | 26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | 27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | 28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
29 | 29 |
30 #include "googleurl/src/url_canon.h" | 30 #include "googleurl/src/url_canon.h" |
31 #include "googleurl/src/url_canon_stdstring.h" | 31 #include "googleurl/src/url_canon_stdstring.h" |
32 #include "googleurl/src/url_parse.h" | 32 #include "googleurl/src/url_parse.h" |
| 33 #include "googleurl/src/url_test_utils.h" |
33 #include "googleurl/src/url_util.h" | 34 #include "googleurl/src/url_util.h" |
34 #include "testing/gtest/include/gtest/gtest.h" | 35 #include "testing/gtest/include/gtest/gtest.h" |
35 | 36 |
36 TEST(URLUtilTest, FindAndCompareScheme) { | 37 TEST(URLUtilTest, FindAndCompareScheme) { |
37 url_parse::Component found_scheme; | 38 url_parse::Component found_scheme; |
38 | 39 |
39 // Simple case where the scheme is found and matches. | 40 // Simple case where the scheme is found and matches. |
40 const char kStr1[] = "http://www.com/"; | 41 const char kStr1[] = "http://www.com/"; |
41 EXPECT_TRUE(url_util::FindAndCompareScheme( | 42 EXPECT_TRUE(url_util::FindAndCompareScheme( |
42 kStr1, static_cast<int>(strlen(kStr1)), "http", NULL)); | 43 kStr1, static_cast<int>(strlen(kStr1)), "http", NULL)); |
(...skipping 15 matching lines...) Expand all Loading... |
58 // When there is an empty scheme, it should match the empty scheme. | 59 // When there is an empty scheme, it should match the empty scheme. |
59 const char kStr3[] = ":foo.com/"; | 60 const char kStr3[] = ":foo.com/"; |
60 EXPECT_TRUE(url_util::FindAndCompareScheme( | 61 EXPECT_TRUE(url_util::FindAndCompareScheme( |
61 kStr3, static_cast<int>(strlen(kStr3)), "", &found_scheme)); | 62 kStr3, static_cast<int>(strlen(kStr3)), "", &found_scheme)); |
62 EXPECT_TRUE(found_scheme == url_parse::Component(0, 0)); | 63 EXPECT_TRUE(found_scheme == url_parse::Component(0, 0)); |
63 | 64 |
64 // But when there is no scheme, it should fail. | 65 // But when there is no scheme, it should fail. |
65 EXPECT_FALSE(url_util::FindAndCompareScheme("", 0, "", &found_scheme)); | 66 EXPECT_FALSE(url_util::FindAndCompareScheme("", 0, "", &found_scheme)); |
66 EXPECT_TRUE(found_scheme == url_parse::Component()); | 67 EXPECT_TRUE(found_scheme == url_parse::Component()); |
67 | 68 |
68 // When there is a whitespace char in scheme, it should canonicalize the url b
efore | 69 // When there is a whitespace char in scheme, it should canonicalize the url |
69 // comparison. | 70 // before comparison. |
70 const char whtspc_str[] = " \r\n\tjav\ra\nscri\tpt:alert(1)"; | 71 const char whtspc_str[] = " \r\n\tjav\ra\nscri\tpt:alert(1)"; |
71 EXPECT_TRUE(url_util::FindAndCompareScheme( | 72 EXPECT_TRUE(url_util::FindAndCompareScheme( |
72 whtspc_str, static_cast<int>(strlen(whtspc_str)), "javascript", | 73 whtspc_str, static_cast<int>(strlen(whtspc_str)), "javascript", |
73 &found_scheme)); | 74 &found_scheme)); |
74 EXPECT_TRUE(found_scheme == url_parse::Component(1, 10)); | 75 EXPECT_TRUE(found_scheme == url_parse::Component(1, 10)); |
75 | 76 |
76 // Control characters should be stripped out on the ends, and kept in the midd
le. | 77 // Control characters should be stripped out on the ends, and kept in the |
| 78 // middle. |
77 const char ctrl_str[] = "\02jav\02scr\03ipt:alert(1)"; | 79 const char ctrl_str[] = "\02jav\02scr\03ipt:alert(1)"; |
78 EXPECT_FALSE(url_util::FindAndCompareScheme( | 80 EXPECT_FALSE(url_util::FindAndCompareScheme( |
79 ctrl_str, static_cast<int>(strlen(ctrl_str)), "javascript", | 81 ctrl_str, static_cast<int>(strlen(ctrl_str)), "javascript", |
80 &found_scheme)); | 82 &found_scheme)); |
81 EXPECT_TRUE(found_scheme == url_parse::Component(1, 11)); | 83 EXPECT_TRUE(found_scheme == url_parse::Component(1, 11)); |
82 } | 84 } |
83 | 85 |
84 TEST(URLUtilTest, ReplaceComponents) { | 86 TEST(URLUtilTest, ReplaceComponents) { |
85 url_parse::Parsed parsed; | 87 url_parse::Parsed parsed; |
86 url_canon::RawCanonOutputT<char> output; | 88 url_canon::RawCanonOutputT<char> output; |
(...skipping 66 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
153 EXPECT_EQ("file:///E:/foo/", | 155 EXPECT_EQ("file:///E:/foo/", |
154 CheckReplaceScheme("http://localhost/e:foo/", "file")); | 156 CheckReplaceScheme("http://localhost/e:foo/", "file")); |
155 #endif | 157 #endif |
156 | 158 |
157 // This will probably change to "about://google.com/" when we fix | 159 // This will probably change to "about://google.com/" when we fix |
158 // http://crbug.com/160 which should also be an acceptable result. | 160 // http://crbug.com/160 which should also be an acceptable result. |
159 EXPECT_EQ("about://google.com/", | 161 EXPECT_EQ("about://google.com/", |
160 CheckReplaceScheme("http://google.com/", "about")); | 162 CheckReplaceScheme("http://google.com/", "about")); |
161 } | 163 } |
162 | 164 |
| 165 TEST(URLUtilTest, DecodeURLEscapeSequences) { |
| 166 struct DecodeCase { |
| 167 const char* input; |
| 168 const char* output; |
| 169 } decode_cases[] = { |
| 170 {"hello, world", "hello, world"}, |
| 171 {"%01%02%03%04%05%06%07%08%09%0a%0B%0C%0D%0e%0f/", |
| 172 "\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0B\x0C\x0D\x0e\x0f/"}, |
| 173 {"%10%11%12%13%14%15%16%17%18%19%1a%1B%1C%1D%1e%1f/", |
| 174 "\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1B\x1C\x1D\x1e\x1f/"}, |
| 175 {"%20%21%22%23%24%25%26%27%28%29%2a%2B%2C%2D%2e%2f/", |
| 176 " !\"#$%&'()*+,-.//"}, |
| 177 {"%30%31%32%33%34%35%36%37%38%39%3a%3B%3C%3D%3e%3f/", |
| 178 "0123456789:;<=>?/"}, |
| 179 {"%40%41%42%43%44%45%46%47%48%49%4a%4B%4C%4D%4e%4f/", |
| 180 "@ABCDEFGHIJKLMNO/"}, |
| 181 {"%50%51%52%53%54%55%56%57%58%59%5a%5B%5C%5D%5e%5f/", |
| 182 "PQRSTUVWXYZ[\\]^_/"}, |
| 183 {"%60%61%62%63%64%65%66%67%68%69%6a%6B%6C%6D%6e%6f/", |
| 184 "`abcdefghijklmno/"}, |
| 185 {"%70%71%72%73%74%75%76%77%78%79%7a%7B%7C%7D%7e%7f/", |
| 186 "pqrstuvwxyz{|}~\x7f/"}, |
| 187 // Test un-UTF-8-ization. |
| 188 {"%e4%bd%a0%e5%a5%bd", "\xe4\xbd\xa0\xe5\xa5\xbd"}, |
| 189 }; |
| 190 |
| 191 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(decode_cases); i++) { |
| 192 const char* input = decode_cases[i].input; |
| 193 url_canon::RawCanonOutputT<char16> output; |
| 194 url_util::DecodeURLEscapeSequences(input, strlen(input), &output); |
| 195 EXPECT_EQ(decode_cases[i].output, |
| 196 url_test_utils::ConvertUTF16ToUTF8( |
| 197 string16(output.data(), output.length()))); |
| 198 } |
| 199 |
| 200 // Our decode should decode %00 |
| 201 const char zero_input[] = "%00"; |
| 202 url_canon::RawCanonOutputT<char16> zero_output; |
| 203 url_util::DecodeURLEscapeSequences(zero_input, strlen(zero_input), |
| 204 &zero_output); |
| 205 EXPECT_NE("%00", |
| 206 url_test_utils::ConvertUTF16ToUTF8( |
| 207 string16(zero_output.data(), zero_output.length()))); |
| 208 |
| 209 // Test the error behavior for invalid UTF-8. |
| 210 const char invalid_input[] = "%e4%a0%e5%a5%bd"; |
| 211 const char16 invalid_expected[4] = {0x00e4, 0x00a0, 0x597d, 0}; |
| 212 url_canon::RawCanonOutputT<char16> invalid_output; |
| 213 url_util::DecodeURLEscapeSequences(invalid_input, strlen(invalid_input), |
| 214 &invalid_output); |
| 215 EXPECT_EQ(string16(invalid_expected), |
| 216 string16(invalid_output.data(), invalid_output.length())); |
| 217 } |
| 218 |
OLD | NEW |