| OLD | NEW |
| 1 // Copyright 2008, Google Inc. | 1 // Copyright 2008, Google Inc. |
| 2 // All rights reserved. | 2 // All rights reserved. |
| 3 // | 3 // |
| 4 // Redistribution and use in source and binary forms, with or without | 4 // Redistribution and use in source and binary forms, with or without |
| 5 // modification, are permitted provided that the following conditions are | 5 // modification, are permitted provided that the following conditions are |
| 6 // met: | 6 // met: |
| 7 // | 7 // |
| 8 // * Redistributions of source code must retain the above copyright | 8 // * Redistributions of source code must retain the above copyright |
| 9 // notice, this list of conditions and the following disclaimer. | 9 // notice, this list of conditions and the following disclaimer. |
| 10 // * Redistributions in binary form must reproduce the above | 10 // * Redistributions in binary form must reproduce the above |
| (...skipping 12 matching lines...) Expand all Loading... |
| 23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | 23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| 24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | 24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| 25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | 25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| 26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | 26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| 27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | 27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| 28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | 28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 29 | 29 |
| 30 #include "googleurl/src/url_canon.h" | 30 #include "googleurl/src/url_canon.h" |
| 31 #include "googleurl/src/url_canon_stdstring.h" | 31 #include "googleurl/src/url_canon_stdstring.h" |
| 32 #include "googleurl/src/url_parse.h" | 32 #include "googleurl/src/url_parse.h" |
| 33 #include "googleurl/src/url_test_utils.h" |
| 33 #include "googleurl/src/url_util.h" | 34 #include "googleurl/src/url_util.h" |
| 34 #include "testing/gtest/include/gtest/gtest.h" | 35 #include "testing/gtest/include/gtest/gtest.h" |
| 35 | 36 |
| 36 TEST(URLUtilTest, FindAndCompareScheme) { | 37 TEST(URLUtilTest, FindAndCompareScheme) { |
| 37 url_parse::Component found_scheme; | 38 url_parse::Component found_scheme; |
| 38 | 39 |
| 39 // Simple case where the scheme is found and matches. | 40 // Simple case where the scheme is found and matches. |
| 40 const char kStr1[] = "http://www.com/"; | 41 const char kStr1[] = "http://www.com/"; |
| 41 EXPECT_TRUE(url_util::FindAndCompareScheme( | 42 EXPECT_TRUE(url_util::FindAndCompareScheme( |
| 42 kStr1, static_cast<int>(strlen(kStr1)), "http", NULL)); | 43 kStr1, static_cast<int>(strlen(kStr1)), "http", NULL)); |
| (...skipping 15 matching lines...) Expand all Loading... |
| 58 // When there is an empty scheme, it should match the empty scheme. | 59 // When there is an empty scheme, it should match the empty scheme. |
| 59 const char kStr3[] = ":foo.com/"; | 60 const char kStr3[] = ":foo.com/"; |
| 60 EXPECT_TRUE(url_util::FindAndCompareScheme( | 61 EXPECT_TRUE(url_util::FindAndCompareScheme( |
| 61 kStr3, static_cast<int>(strlen(kStr3)), "", &found_scheme)); | 62 kStr3, static_cast<int>(strlen(kStr3)), "", &found_scheme)); |
| 62 EXPECT_TRUE(found_scheme == url_parse::Component(0, 0)); | 63 EXPECT_TRUE(found_scheme == url_parse::Component(0, 0)); |
| 63 | 64 |
| 64 // But when there is no scheme, it should fail. | 65 // But when there is no scheme, it should fail. |
| 65 EXPECT_FALSE(url_util::FindAndCompareScheme("", 0, "", &found_scheme)); | 66 EXPECT_FALSE(url_util::FindAndCompareScheme("", 0, "", &found_scheme)); |
| 66 EXPECT_TRUE(found_scheme == url_parse::Component()); | 67 EXPECT_TRUE(found_scheme == url_parse::Component()); |
| 67 | 68 |
| 68 // When there is a whitespace char in scheme, it should canonicalize the url b
efore | 69 // When there is a whitespace char in scheme, it should canonicalize the url |
| 69 // comparison. | 70 // before comparison. |
| 70 const char whtspc_str[] = " \r\n\tjav\ra\nscri\tpt:alert(1)"; | 71 const char whtspc_str[] = " \r\n\tjav\ra\nscri\tpt:alert(1)"; |
| 71 EXPECT_TRUE(url_util::FindAndCompareScheme( | 72 EXPECT_TRUE(url_util::FindAndCompareScheme( |
| 72 whtspc_str, static_cast<int>(strlen(whtspc_str)), "javascript", | 73 whtspc_str, static_cast<int>(strlen(whtspc_str)), "javascript", |
| 73 &found_scheme)); | 74 &found_scheme)); |
| 74 EXPECT_TRUE(found_scheme == url_parse::Component(1, 10)); | 75 EXPECT_TRUE(found_scheme == url_parse::Component(1, 10)); |
| 75 | 76 |
| 76 // Control characters should be stripped out on the ends, and kept in the midd
le. | 77 // Control characters should be stripped out on the ends, and kept in the |
| 78 // middle. |
| 77 const char ctrl_str[] = "\02jav\02scr\03ipt:alert(1)"; | 79 const char ctrl_str[] = "\02jav\02scr\03ipt:alert(1)"; |
| 78 EXPECT_FALSE(url_util::FindAndCompareScheme( | 80 EXPECT_FALSE(url_util::FindAndCompareScheme( |
| 79 ctrl_str, static_cast<int>(strlen(ctrl_str)), "javascript", | 81 ctrl_str, static_cast<int>(strlen(ctrl_str)), "javascript", |
| 80 &found_scheme)); | 82 &found_scheme)); |
| 81 EXPECT_TRUE(found_scheme == url_parse::Component(1, 11)); | 83 EXPECT_TRUE(found_scheme == url_parse::Component(1, 11)); |
| 82 } | 84 } |
| 83 | 85 |
| 84 TEST(URLUtilTest, ReplaceComponents) { | 86 TEST(URLUtilTest, ReplaceComponents) { |
| 85 url_parse::Parsed parsed; | 87 url_parse::Parsed parsed; |
| 86 url_canon::RawCanonOutputT<char> output; | 88 url_canon::RawCanonOutputT<char> output; |
| (...skipping 66 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 153 EXPECT_EQ("file:///E:/foo/", | 155 EXPECT_EQ("file:///E:/foo/", |
| 154 CheckReplaceScheme("http://localhost/e:foo/", "file")); | 156 CheckReplaceScheme("http://localhost/e:foo/", "file")); |
| 155 #endif | 157 #endif |
| 156 | 158 |
| 157 // This will probably change to "about://google.com/" when we fix | 159 // This will probably change to "about://google.com/" when we fix |
| 158 // http://crbug.com/160 which should also be an acceptable result. | 160 // http://crbug.com/160 which should also be an acceptable result. |
| 159 EXPECT_EQ("about://google.com/", | 161 EXPECT_EQ("about://google.com/", |
| 160 CheckReplaceScheme("http://google.com/", "about")); | 162 CheckReplaceScheme("http://google.com/", "about")); |
| 161 } | 163 } |
| 162 | 164 |
| 165 TEST(URLUtilTest, DecodeURLEscapeSequences) { |
| 166 struct DecodeCase { |
| 167 const char* input; |
| 168 const char* output; |
| 169 } decode_cases[] = { |
| 170 {"hello, world", "hello, world"}, |
| 171 {"%01%02%03%04%05%06%07%08%09%0a%0B%0C%0D%0e%0f/", |
| 172 "\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0B\x0C\x0D\x0e\x0f/"}, |
| 173 {"%10%11%12%13%14%15%16%17%18%19%1a%1B%1C%1D%1e%1f/", |
| 174 "\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1B\x1C\x1D\x1e\x1f/"}, |
| 175 {"%20%21%22%23%24%25%26%27%28%29%2a%2B%2C%2D%2e%2f/", |
| 176 " !\"#$%&'()*+,-.//"}, |
| 177 {"%30%31%32%33%34%35%36%37%38%39%3a%3B%3C%3D%3e%3f/", |
| 178 "0123456789:;<=>?/"}, |
| 179 {"%40%41%42%43%44%45%46%47%48%49%4a%4B%4C%4D%4e%4f/", |
| 180 "@ABCDEFGHIJKLMNO/"}, |
| 181 {"%50%51%52%53%54%55%56%57%58%59%5a%5B%5C%5D%5e%5f/", |
| 182 "PQRSTUVWXYZ[\\]^_/"}, |
| 183 {"%60%61%62%63%64%65%66%67%68%69%6a%6B%6C%6D%6e%6f/", |
| 184 "`abcdefghijklmno/"}, |
| 185 {"%70%71%72%73%74%75%76%77%78%79%7a%7B%7C%7D%7e%7f/", |
| 186 "pqrstuvwxyz{|}~\x7f/"}, |
| 187 // Test un-UTF-8-ization. |
| 188 {"%e4%bd%a0%e5%a5%bd", "\xe4\xbd\xa0\xe5\xa5\xbd"}, |
| 189 }; |
| 190 |
| 191 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(decode_cases); i++) { |
| 192 const char* input = decode_cases[i].input; |
| 193 url_canon::RawCanonOutputT<char16> output; |
| 194 url_util::DecodeURLEscapeSequences(input, strlen(input), &output); |
| 195 EXPECT_EQ(decode_cases[i].output, |
| 196 url_test_utils::ConvertUTF16ToUTF8( |
| 197 string16(output.data(), output.length()))); |
| 198 } |
| 199 |
| 200 // Our decode should decode %00 |
| 201 const char zero_input[] = "%00"; |
| 202 url_canon::RawCanonOutputT<char16> zero_output; |
| 203 url_util::DecodeURLEscapeSequences(zero_input, strlen(zero_input), |
| 204 &zero_output); |
| 205 EXPECT_NE("%00", |
| 206 url_test_utils::ConvertUTF16ToUTF8( |
| 207 string16(zero_output.data(), zero_output.length()))); |
| 208 |
| 209 // Test the error behavior for invalid UTF-8. |
| 210 const char invalid_input[] = "%e4%a0%e5%a5%bd"; |
| 211 const char16 invalid_expected[4] = {0x00e4, 0x00a0, 0x597d, 0}; |
| 212 url_canon::RawCanonOutputT<char16> invalid_output; |
| 213 url_util::DecodeURLEscapeSequences(invalid_input, strlen(invalid_input), |
| 214 &invalid_output); |
| 215 EXPECT_EQ(string16(invalid_expected), |
| 216 string16(invalid_output.data(), invalid_output.length())); |
| 217 } |
| 218 |
| OLD | NEW |