Index: third_party/WebKit/Source/platform/TextCodecFuzzer.cpp |
diff --git a/third_party/WebKit/Source/platform/TextCodecFuzzer.cpp b/third_party/WebKit/Source/platform/TextCodecFuzzer.cpp |
new file mode 100644 |
index 0000000000000000000000000000000000000000..7f5a34f738fd22517e1d09dc01f7cc39ec221a00 |
--- /dev/null |
+++ b/third_party/WebKit/Source/platform/TextCodecFuzzer.cpp |
@@ -0,0 +1,150 @@ |
+// Copyright 2016 The Chromium Authors. All rights reserved. |
mmoroz
2017/03/03 09:49:47
2017
Charlie Harrison
2017/03/03 19:33:55
Done.
|
+// Use of this source code is governed by a BSD-style license that can be |
+// found in the LICENSE file. |
+ |
+#include "wtf/text/TextCodec.h" |
+ |
+#include "platform/testing/BlinkFuzzerTestSupport.h" |
+#include "platform/testing/FuzzedDataProvider.h" |
+#include "wtf/Vector.h" |
+#include "wtf/text/CString.h" |
+#include "wtf/text/TextEncoding.h" |
+#include "wtf/text/TextEncodingRegistry.h" |
+#include "wtf/text/WTFString.h" |
+ |
+using namespace blink; |
+ |
+// TODO(jsbell): This fuzzes code in wtf/ but has dependencies on platform/, |
+// so it must live in the latter directory. Once wtf/ moves into platform/wtf |
+// this should move there as well. |
+ |
+class TextCodecFuzzHarness {}; |
+extern "C" int LLVMFuzzerInitialize(int* argc, char*** argv) { |
+ InitializeBlinkFuzzTest(argc, argv); |
+ return 0; |
+} |
+ |
+// Fuzzer for WTF::TextCodec. |
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) { |
mmoroz
2017/03/03 09:49:47
As I understood, we are using first 2 bytes to ini
Charlie Harrison
2017/03/03 19:33:55
Done, but we are actually using 3 bytes.
|
+ static String encodingName = |
+#if defined(BIG5) |
+ "Big5" |
+#elif defined(EUC_JP) |
+ "EUC-JP" |
+#elif defined(EUC_KR) |
+ "EUC-KR" |
+#elif defined(GBK) |
+ "GBK" |
+#elif defined(IBM866) |
+ "IBM866" |
+#elif defined(ISO_2022_JP) |
+ "ISO-2022-JP" |
+#elif defined(ISO_8859_10) |
+ "ISO-8859-10" |
+#elif defined(ISO_8859_13) |
+ "ISO-8859-13" |
+#elif defined(ISO_8859_14) |
+ "ISO-8859-14" |
+#elif defined(ISO_8859_15) |
+ "ISO-8859-15" |
+#elif defined(ISO_8859_16) |
+ "ISO-8859-16" |
+#elif defined(ISO_8859_2) |
+ "ISO-8859-2" |
+#elif defined(ISO_8859_3) |
+ "ISO-8859-3" |
+#elif defined(ISO_8859_4) |
+ "ISO-8859-4" |
+#elif defined(ISO_8859_5) |
+ "ISO-8859-5" |
+#elif defined(ISO_8859_6) |
+ "ISO-8859-6" |
+#elif defined(ISO_8859_7) |
+ "ISO-8859-7" |
+#elif defined(ISO_8859_8) |
+ "ISO-8859-8" |
+#elif defined(ISO_8859_8_I) |
+ "ISO-8859-8-I" |
+#elif defined(KOI8_R) |
+ "KOI8-R" |
+#elif defined(KOI8_U) |
+ "KOI8-U" |
+#elif defined(SHIFT_JIS) |
+ "Shift_JIS" |
+#elif defined(UTF_16BE) |
+ "UTF-16BE" |
+#elif defined(UTF_16LE) |
+ "UTF-16LE" |
+#elif defined(UTF_32) |
+ "UTF-32" |
+#elif defined(UTF_32BE) |
+ "UTF-32BE" |
+#elif defined(UTF_32LE) |
+ "UTF-32LE" |
+#elif defined(UTF_8) |
+ "UTF-8" |
+#elif defined(GB18030) |
+ "gb18030" |
+#elif defined(MACINTOSH) |
+ "macintosh" |
+#elif defined(WINDOWS_1250) |
+ "windows-1250" |
+#elif defined(WINDOWS_1251) |
+ "windows-1251" |
+#elif defined(WINDOWS_1252) |
+ "windows-1252" |
+#elif defined(WINDOWS_1253) |
+ "windows-1253" |
+#elif defined(WINDOWS_1254) |
+ "windows-1254" |
+#elif defined(WINDOWS_1255) |
+ "windows-1255" |
+#elif defined(WINDOWS_1256) |
+ "windows-1256" |
+#elif defined(WINDOWS_1257) |
+ "windows-1257" |
+#elif defined(WINDOWS_1258) |
+ "windows-1258" |
+#elif defined(WINDOWS_874) |
+ "windows-874" |
+#elif defined(X_MAC_CYRILLIC) |
+ "x-mac-cyrillic" |
+#elif defined(X_USER_DEFINED) |
+ "x-user-defined" |
+#endif |
+ ""; |
+ |
+ WTF::getEncodingNamesForTesting(); |
+ static const Vector<WTF::FlushBehavior> flushOptions{ |
+ WTF::DoNotFlush, WTF::FetchEOF, WTF::DataEOF}; |
+ static const Vector<WTF::UnencodableHandling> unencodableHandlingOptions{ |
mmoroz
2017/03/03 09:49:47
Does it make sense to have #118 and #120 inside LL
Charlie Harrison
2017/03/03 19:33:55
Done.
|
+ WTF::QuestionMarksForUnencodables, WTF::EntitiesForUnencodables, |
+ WTF::URLEncodedEntitiesForUnencodables, |
+ WTF::CSSEncodedEntitiesForUnencodables}; |
+ |
+ WTF::TextEncoding encoding(encodingName); |
+ |
+ FuzzedDataProvider fuzzedData(data, size); |
+ bool stopOnError = fuzzedData.ConsumeBool(); |
+ WTF::UnencodableHandling unencodableHandling = |
+ fuzzedData.PickValueInVector(unencodableHandlingOptions); |
+ WTF::FlushBehavior flushBehavior = fuzzedData.PickValueInVector(flushOptions); |
+ |
+ std::unique_ptr<TextCodec> codec = newTextCodec(encoding); |
+ bool sawError; |
+ codec->decode(reinterpret_cast<const char*>(data), size, flushBehavior, |
mmoroz
2017/03/03 09:49:47
I think that we should use `ConsumeRemainingBytes`
Charlie Harrison
2017/03/03 19:33:55
Yes! Sorry this was just a simple error.
|
+ stopOnError, sawError); |
+ |
+ if (size % sizeof(LChar) == 0) { |
+ std::unique_ptr<TextCodec> codec = newTextCodec(encoding); |
+ codec->encode(reinterpret_cast<const LChar*>(data), size / sizeof(LChar), |
mmoroz
2017/03/03 09:49:47
The same as for line 135. Let's call `ConsumeRemai
Charlie Harrison
2017/03/03 19:33:55
Done.
|
+ unencodableHandling); |
+ } |
+ if (size % sizeof(UChar) == 0) { |
+ std::unique_ptr<TextCodec> codec = newTextCodec(encoding); |
+ codec->encode(reinterpret_cast<const UChar*>(data), size / sizeof(UChar), |
mmoroz
2017/03/03 09:49:47
Why we call `decode` on line 135 and `encode` here
jsbell
2017/03/03 17:35:43
The fuzzer is using the input data in three ways t
Charlie Harrison
2017/03/03 19:33:55
I have updated comments based on this discussion t
|
+ unencodableHandling); |
+ } |
+ |
+ return 0; |
+} |