| OLD | NEW |
| (Empty) |
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | |
| 2 // Use of this source code is governed by a BSD-style license that can be | |
| 3 // found in the LICENSE file. | |
| 4 | |
| 5 #include "base/basictypes.h" | |
| 6 #include "net/base/data_url.h" | |
| 7 #include "testing/gtest/include/gtest/gtest.h" | |
| 8 #include "url/gurl.h" | |
| 9 | |
| 10 namespace { | |
| 11 | |
| 12 struct ParseTestData { | |
| 13 const char* url; | |
| 14 bool is_valid; | |
| 15 const char* mime_type; | |
| 16 const char* charset; | |
| 17 const char* data; | |
| 18 }; | |
| 19 | |
| 20 } | |
| 21 | |
| 22 TEST(DataURLTest, Parse) { | |
| 23 const ParseTestData tests[] = { | |
| 24 { "data:", | |
| 25 false, | |
| 26 "", | |
| 27 "", | |
| 28 "" }, | |
| 29 | |
| 30 { "data:,", | |
| 31 true, | |
| 32 "text/plain", | |
| 33 "US-ASCII", | |
| 34 "" }, | |
| 35 | |
| 36 { "data:;base64,", | |
| 37 true, | |
| 38 "text/plain", | |
| 39 "US-ASCII", | |
| 40 "" }, | |
| 41 | |
| 42 { "data:;charset=,test", | |
| 43 false, | |
| 44 "", | |
| 45 "", | |
| 46 "" }, | |
| 47 | |
| 48 { "data:TeXt/HtMl,<b>x</b>", | |
| 49 true, | |
| 50 "text/html", | |
| 51 "US-ASCII", | |
| 52 "<b>x</b>" }, | |
| 53 | |
| 54 { "data:,foo", | |
| 55 true, | |
| 56 "text/plain", | |
| 57 "US-ASCII", | |
| 58 "foo" }, | |
| 59 | |
| 60 { "data:;base64,aGVsbG8gd29ybGQ=", | |
| 61 true, | |
| 62 "text/plain", | |
| 63 "US-ASCII", | |
| 64 "hello world" }, | |
| 65 | |
| 66 // Allow invalid mediatype for backward compatibility but set mime_type to | |
| 67 // "text/plain" instead of the invalid mediatype. | |
| 68 { "data:foo,boo", | |
| 69 true, | |
| 70 "text/plain", | |
| 71 "US-ASCII", | |
| 72 "boo" }, | |
| 73 | |
| 74 // When accepting an invalid mediatype, override charset with "US-ASCII" | |
| 75 { "data:foo;charset=UTF-8,boo", | |
| 76 true, | |
| 77 "text/plain", | |
| 78 "US-ASCII", | |
| 79 "boo" }, | |
| 80 | |
| 81 // Invalid mediatype. Includes a slash but the type part is not a token. | |
| 82 { "data:f(oo/bar;baz=1;charset=kk,boo", | |
| 83 true, | |
| 84 "text/plain", | |
| 85 "US-ASCII", | |
| 86 "boo" }, | |
| 87 | |
| 88 { "data:foo/bar;baz=1;charset=kk,boo", | |
| 89 true, | |
| 90 "foo/bar", | |
| 91 "kk", | |
| 92 "boo" }, | |
| 93 | |
| 94 { "data:foo/bar;charset=kk;baz=1,boo", | |
| 95 true, | |
| 96 "foo/bar", | |
| 97 "kk", | |
| 98 "boo" }, | |
| 99 | |
| 100 { "data:text/html,%3Chtml%3E%3Cbody%3E%3Cb%3Ehello%20world" | |
| 101 "%3C%2Fb%3E%3C%2Fbody%3E%3C%2Fhtml%3E", | |
| 102 true, | |
| 103 "text/html", | |
| 104 "US-ASCII", | |
| 105 "<html><body><b>hello world</b></body></html>" }, | |
| 106 | |
| 107 { "data:text/html,<html><body><b>hello world</b></body></html>", | |
| 108 true, | |
| 109 "text/html", | |
| 110 "US-ASCII", | |
| 111 "<html><body><b>hello world</b></body></html>" }, | |
| 112 | |
| 113 // the comma cannot be url-escaped! | |
| 114 { "data:%2Cblah", | |
| 115 false, | |
| 116 "", | |
| 117 "", | |
| 118 "" }, | |
| 119 | |
| 120 // invalid base64 content | |
| 121 { "data:;base64,aGVs_-_-", | |
| 122 false, | |
| 123 "", | |
| 124 "", | |
| 125 "" }, | |
| 126 | |
| 127 // Spaces should be removed from non-text data URLs (we already tested | |
| 128 // spaces above). | |
| 129 { "data:image/fractal,a b c d e f g", | |
| 130 true, | |
| 131 "image/fractal", | |
| 132 "US-ASCII", | |
| 133 "abcdefg" }, | |
| 134 | |
| 135 // Spaces should also be removed from anything base-64 encoded | |
| 136 { "data:;base64,aGVs bG8gd2 9ybGQ=", | |
| 137 true, | |
| 138 "text/plain", | |
| 139 "US-ASCII", | |
| 140 "hello world" }, | |
| 141 | |
| 142 // Other whitespace should also be removed from anything base-64 encoded. | |
| 143 { "data:;base64,aGVs bG8gd2 \n9ybGQ=", | |
| 144 true, | |
| 145 "text/plain", | |
| 146 "US-ASCII", | |
| 147 "hello world" }, | |
| 148 | |
| 149 // In base64 encoding, escaped whitespace should be stripped. | |
| 150 // (This test was taken from acid3) | |
| 151 // http://b/1054495 | |
| 152 { "data:text/javascript;base64,%20ZD%20Qg%0D%0APS%20An%20Zm91cic%0D%0A%207" | |
| 153 "%20", | |
| 154 true, | |
| 155 "text/javascript", | |
| 156 "US-ASCII", | |
| 157 "d4 = 'four';" }, | |
| 158 | |
| 159 // Only unescaped whitespace should be stripped in non-base64. | |
| 160 // http://b/1157796 | |
| 161 { "data:img/png,A B %20 %0A C", | |
| 162 true, | |
| 163 "img/png", | |
| 164 "US-ASCII", | |
| 165 "AB \nC" }, | |
| 166 | |
| 167 { "data:text/plain;charset=utf-8;base64,SGVsbMO2", | |
| 168 true, | |
| 169 "text/plain", | |
| 170 "utf-8", | |
| 171 "Hell\xC3\xB6" }, | |
| 172 | |
| 173 // Not sufficiently padded. | |
| 174 { "data:;base64,aGVsbG8gd29ybGQ", | |
| 175 true, | |
| 176 "text/plain", | |
| 177 "US-ASCII", | |
| 178 "hello world" }, | |
| 179 | |
| 180 // Bad encoding (truncated). | |
| 181 { "data:;base64,aGVsbG8gd29yb", | |
| 182 false, | |
| 183 "", | |
| 184 "", | |
| 185 "" }, | |
| 186 | |
| 187 // BiDi control characters should be unescaped and preserved as is, and | |
| 188 // should not be replaced with % versions. In the below case, \xE2\x80\x8F | |
| 189 // is the RTL mark and the parsed text should preserve it as is. | |
| 190 { | |
| 191 "data:text/plain;charset=utf-8,\xE2\x80\x8Ftest", | |
| 192 true, | |
| 193 "text/plain", | |
| 194 "utf-8", | |
| 195 "\xE2\x80\x8Ftest"}, | |
| 196 | |
| 197 // Same as above but with Arabic text after RTL mark. | |
| 198 { | |
| 199 "data:text/plain;charset=utf-8," | |
| 200 "\xE2\x80\x8F\xD8\xA7\xD8\xAE\xD8\xAA\xD8\xA8\xD8\xA7\xD8\xB1", | |
| 201 true, | |
| 202 "text/plain", | |
| 203 "utf-8", | |
| 204 "\xE2\x80\x8F\xD8\xA7\xD8\xAE\xD8\xAA\xD8\xA8\xD8\xA7\xD8\xB1"}, | |
| 205 | |
| 206 // RTL mark encoded as %E2%80%8F should be unescaped too. Note that when | |
| 207 // wrapped in a GURL, this URL and the next effectively become the same as | |
| 208 // the previous two URLs. | |
| 209 { | |
| 210 "data:text/plain;charset=utf-8,%E2%80%8Ftest", | |
| 211 true, | |
| 212 "text/plain", | |
| 213 "utf-8", | |
| 214 "\xE2\x80\x8Ftest"}, | |
| 215 | |
| 216 // Same as above but with Arabic text after RTL mark. | |
| 217 { | |
| 218 "data:text/plain;charset=utf-8," | |
| 219 "%E2%80%8F\xD8\xA7\xD8\xAE\xD8\xAA\xD8\xA8\xD8\xA7\xD8\xB1", | |
| 220 true, | |
| 221 "text/plain", | |
| 222 "utf-8", | |
| 223 "\xE2\x80\x8F\xD8\xA7\xD8\xAE\xD8\xAA\xD8\xA8\xD8\xA7\xD8\xB1"} | |
| 224 | |
| 225 // TODO(darin): add more interesting tests | |
| 226 }; | |
| 227 | |
| 228 for (size_t i = 0; i < arraysize(tests); ++i) { | |
| 229 std::string mime_type; | |
| 230 std::string charset; | |
| 231 std::string data; | |
| 232 bool ok = | |
| 233 net::DataURL::Parse(GURL(tests[i].url), &mime_type, &charset, &data); | |
| 234 EXPECT_EQ(ok, tests[i].is_valid); | |
| 235 if (tests[i].is_valid) { | |
| 236 EXPECT_EQ(tests[i].mime_type, mime_type); | |
| 237 EXPECT_EQ(tests[i].charset, charset); | |
| 238 EXPECT_EQ(tests[i].data, data); | |
| 239 } | |
| 240 } | |
| 241 } | |
| OLD | NEW |