Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(395)

Side by Side Diff: base/string_util_unittest.cc

Issue 661205: Make IsStringUTF8 reject (U+FDD0 .. U+FDEF) ... (Closed) Base URL: svn://chrome-svn/chrome/trunk/src/
Patch Set: '' Created 10 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « base/string_util.cc ('k') | base/utf_string_conversion_utils.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright (c) 2010 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2010 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include <math.h> 5 #include <math.h>
6 #include <stdarg.h> 6 #include <stdarg.h>
7 7
8 #include <limits> 8 #include <limits>
9 #include <sstream> 9 #include <sstream>
10 10
(...skipping 207 matching lines...) Expand 10 before | Expand all | Expand 10 after
218 // BOMs in UTF-16(BE|LE) and UTF-32(BE|LE) 218 // BOMs in UTF-16(BE|LE) and UTF-32(BE|LE)
219 EXPECT_FALSE(IsStringUTF8("\xfe\xff")); 219 EXPECT_FALSE(IsStringUTF8("\xfe\xff"));
220 EXPECT_FALSE(IsStringUTF8("\xff\xfe")); 220 EXPECT_FALSE(IsStringUTF8("\xff\xfe"));
221 EXPECT_FALSE(IsStringUTF8(std::string("\x00\x00\xfe\xff", 4))); 221 EXPECT_FALSE(IsStringUTF8(std::string("\x00\x00\xfe\xff", 4)));
222 EXPECT_FALSE(IsStringUTF8("\xff\xfe\x00\x00")); 222 EXPECT_FALSE(IsStringUTF8("\xff\xfe\x00\x00"));
223 223
224 // Non-characters : U+xxFFF[EF] where xx is 0x00 through 0x10 and <FDD0,FDEF> 224 // Non-characters : U+xxFFF[EF] where xx is 0x00 through 0x10 and <FDD0,FDEF>
225 EXPECT_FALSE(IsStringUTF8("\xef\xbf\xbe")); // U+FFFE) 225 EXPECT_FALSE(IsStringUTF8("\xef\xbf\xbe")); // U+FFFE)
226 EXPECT_FALSE(IsStringUTF8("\xf0\x8f\xbf\xbe")); // U+1FFFE 226 EXPECT_FALSE(IsStringUTF8("\xf0\x8f\xbf\xbe")); // U+1FFFE
227 EXPECT_FALSE(IsStringUTF8("\xf3\xbf\xbf\xbf")); // U+10FFFF 227 EXPECT_FALSE(IsStringUTF8("\xf3\xbf\xbf\xbf")); // U+10FFFF
228
229 // This should also be false, but currently we pass them through.
230 // Disable them for now.
231 #if 0
232 EXPECT_FALSE(IsStringUTF8("\xef\xb7\x90")); // U+FDD0 228 EXPECT_FALSE(IsStringUTF8("\xef\xb7\x90")); // U+FDD0
233 EXPECT_FALSE(IsStringUTF8("\xef\xb7\xaf")); // U+FDEF 229 EXPECT_FALSE(IsStringUTF8("\xef\xb7\xaf")); // U+FDEF
234 #endif
235
236 // Strings in legacy encodings. We can certainly make up strings 230 // Strings in legacy encodings. We can certainly make up strings
237 // in a legacy encoding that are valid in UTF-8, but in real data, 231 // in a legacy encoding that are valid in UTF-8, but in real data,
238 // most of them are invalid as UTF-8. 232 // most of them are invalid as UTF-8.
239 EXPECT_FALSE(IsStringUTF8("caf\xe9")); // cafe with U+00E9 in ISO-8859-1 233 EXPECT_FALSE(IsStringUTF8("caf\xe9")); // cafe with U+00E9 in ISO-8859-1
240 EXPECT_FALSE(IsStringUTF8("\xb0\xa1\xb0\xa2")); // U+AC00, U+AC001 in EUC-KR 234 EXPECT_FALSE(IsStringUTF8("\xb0\xa1\xb0\xa2")); // U+AC00, U+AC001 in EUC-KR
241 EXPECT_FALSE(IsStringUTF8("\xa7\x41\xa6\x6e")); // U+4F60 U+597D in Big5 235 EXPECT_FALSE(IsStringUTF8("\xa7\x41\xa6\x6e")); // U+4F60 U+597D in Big5
242 // "abc" with U+201[CD] in windows-125[0-8] 236 // "abc" with U+201[CD] in windows-125[0-8]
243 EXPECT_FALSE(IsStringUTF8("\x93" "abc\x94")); 237 EXPECT_FALSE(IsStringUTF8("\x93" "abc\x94"));
244 // U+0639 U+064E U+0644 U+064E in ISO-8859-6 238 // U+0639 U+064E U+0644 U+064E in ISO-8859-6
245 EXPECT_FALSE(IsStringUTF8("\xd9\xee\xe4\xee")); 239 EXPECT_FALSE(IsStringUTF8("\xd9\xee\xe4\xee"));
(...skipping 1245 matching lines...) Expand 10 before | Expand all | Expand 10 after
1491 EXPECT_FALSE(ContainsOnlyChars("Hello", "")); 1485 EXPECT_FALSE(ContainsOnlyChars("Hello", ""));
1492 1486
1493 EXPECT_TRUE(ContainsOnlyChars("", "1234")); 1487 EXPECT_TRUE(ContainsOnlyChars("", "1234"));
1494 EXPECT_TRUE(ContainsOnlyChars("1", "1234")); 1488 EXPECT_TRUE(ContainsOnlyChars("1", "1234"));
1495 EXPECT_TRUE(ContainsOnlyChars("1", "4321")); 1489 EXPECT_TRUE(ContainsOnlyChars("1", "4321"));
1496 EXPECT_TRUE(ContainsOnlyChars("123", "4321")); 1490 EXPECT_TRUE(ContainsOnlyChars("123", "4321"));
1497 EXPECT_FALSE(ContainsOnlyChars("123a", "4321")); 1491 EXPECT_FALSE(ContainsOnlyChars("123a", "4321"));
1498 } 1492 }
1499 1493
1500 } // namaspace base 1494 } // namaspace base
OLDNEW
« no previous file with comments | « base/string_util.cc ('k') | base/utf_string_conversion_utils.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698