OLD | NEW |
---|---|
(Empty) | |
1 // Copyright 2016 The Chromium Authors. All rights reserved. | |
mmoroz
2017/03/03 09:49:47
2017
Charlie Harrison
2017/03/03 19:33:55
Done.
| |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 | |
5 #include "wtf/text/TextCodec.h" | |
6 | |
7 #include "platform/testing/BlinkFuzzerTestSupport.h" | |
8 #include "platform/testing/FuzzedDataProvider.h" | |
9 #include "wtf/Vector.h" | |
10 #include "wtf/text/CString.h" | |
11 #include "wtf/text/TextEncoding.h" | |
12 #include "wtf/text/TextEncodingRegistry.h" | |
13 #include "wtf/text/WTFString.h" | |
14 | |
15 using namespace blink; | |
16 | |
17 // TODO(jsbell): This fuzzes code in wtf/ but has dependencies on platform/, | |
18 // so it must live in the latter directory. Once wtf/ moves into platform/wtf | |
19 // this should move there as well. | |
20 | |
21 class TextCodecFuzzHarness {}; | |
22 extern "C" int LLVMFuzzerInitialize(int* argc, char*** argv) { | |
23 InitializeBlinkFuzzTest(argc, argv); | |
24 return 0; | |
25 } | |
26 | |
27 // Fuzzer for WTF::TextCodec. | |
28 extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) { | |
mmoroz
2017/03/03 09:49:47
As I understood, we are using first 2 bytes to ini
Charlie Harrison
2017/03/03 19:33:55
Done, but we are actually using 3 bytes.
| |
29 static String encodingName = | |
30 #if defined(BIG5) | |
31 "Big5" | |
32 #elif defined(EUC_JP) | |
33 "EUC-JP" | |
34 #elif defined(EUC_KR) | |
35 "EUC-KR" | |
36 #elif defined(GBK) | |
37 "GBK" | |
38 #elif defined(IBM866) | |
39 "IBM866" | |
40 #elif defined(ISO_2022_JP) | |
41 "ISO-2022-JP" | |
42 #elif defined(ISO_8859_10) | |
43 "ISO-8859-10" | |
44 #elif defined(ISO_8859_13) | |
45 "ISO-8859-13" | |
46 #elif defined(ISO_8859_14) | |
47 "ISO-8859-14" | |
48 #elif defined(ISO_8859_15) | |
49 "ISO-8859-15" | |
50 #elif defined(ISO_8859_16) | |
51 "ISO-8859-16" | |
52 #elif defined(ISO_8859_2) | |
53 "ISO-8859-2" | |
54 #elif defined(ISO_8859_3) | |
55 "ISO-8859-3" | |
56 #elif defined(ISO_8859_4) | |
57 "ISO-8859-4" | |
58 #elif defined(ISO_8859_5) | |
59 "ISO-8859-5" | |
60 #elif defined(ISO_8859_6) | |
61 "ISO-8859-6" | |
62 #elif defined(ISO_8859_7) | |
63 "ISO-8859-7" | |
64 #elif defined(ISO_8859_8) | |
65 "ISO-8859-8" | |
66 #elif defined(ISO_8859_8_I) | |
67 "ISO-8859-8-I" | |
68 #elif defined(KOI8_R) | |
69 "KOI8-R" | |
70 #elif defined(KOI8_U) | |
71 "KOI8-U" | |
72 #elif defined(SHIFT_JIS) | |
73 "Shift_JIS" | |
74 #elif defined(UTF_16BE) | |
75 "UTF-16BE" | |
76 #elif defined(UTF_16LE) | |
77 "UTF-16LE" | |
78 #elif defined(UTF_32) | |
79 "UTF-32" | |
80 #elif defined(UTF_32BE) | |
81 "UTF-32BE" | |
82 #elif defined(UTF_32LE) | |
83 "UTF-32LE" | |
84 #elif defined(UTF_8) | |
85 "UTF-8" | |
86 #elif defined(GB18030) | |
87 "gb18030" | |
88 #elif defined(MACINTOSH) | |
89 "macintosh" | |
90 #elif defined(WINDOWS_1250) | |
91 "windows-1250" | |
92 #elif defined(WINDOWS_1251) | |
93 "windows-1251" | |
94 #elif defined(WINDOWS_1252) | |
95 "windows-1252" | |
96 #elif defined(WINDOWS_1253) | |
97 "windows-1253" | |
98 #elif defined(WINDOWS_1254) | |
99 "windows-1254" | |
100 #elif defined(WINDOWS_1255) | |
101 "windows-1255" | |
102 #elif defined(WINDOWS_1256) | |
103 "windows-1256" | |
104 #elif defined(WINDOWS_1257) | |
105 "windows-1257" | |
106 #elif defined(WINDOWS_1258) | |
107 "windows-1258" | |
108 #elif defined(WINDOWS_874) | |
109 "windows-874" | |
110 #elif defined(X_MAC_CYRILLIC) | |
111 "x-mac-cyrillic" | |
112 #elif defined(X_USER_DEFINED) | |
113 "x-user-defined" | |
114 #endif | |
115 ""; | |
116 | |
117 WTF::getEncodingNamesForTesting(); | |
118 static const Vector<WTF::FlushBehavior> flushOptions{ | |
119 WTF::DoNotFlush, WTF::FetchEOF, WTF::DataEOF}; | |
120 static const Vector<WTF::UnencodableHandling> unencodableHandlingOptions{ | |
mmoroz
2017/03/03 09:49:47
Does it make sense to have #118 and #120 inside LL
Charlie Harrison
2017/03/03 19:33:55
Done.
| |
121 WTF::QuestionMarksForUnencodables, WTF::EntitiesForUnencodables, | |
122 WTF::URLEncodedEntitiesForUnencodables, | |
123 WTF::CSSEncodedEntitiesForUnencodables}; | |
124 | |
125 WTF::TextEncoding encoding(encodingName); | |
126 | |
127 FuzzedDataProvider fuzzedData(data, size); | |
128 bool stopOnError = fuzzedData.ConsumeBool(); | |
129 WTF::UnencodableHandling unencodableHandling = | |
130 fuzzedData.PickValueInVector(unencodableHandlingOptions); | |
131 WTF::FlushBehavior flushBehavior = fuzzedData.PickValueInVector(flushOptions); | |
132 | |
133 std::unique_ptr<TextCodec> codec = newTextCodec(encoding); | |
134 bool sawError; | |
135 codec->decode(reinterpret_cast<const char*>(data), size, flushBehavior, | |
mmoroz
2017/03/03 09:49:47
I think that we should use `ConsumeRemainingBytes`
Charlie Harrison
2017/03/03 19:33:55
Yes! Sorry this was just a simple error.
| |
136 stopOnError, sawError); | |
137 | |
138 if (size % sizeof(LChar) == 0) { | |
139 std::unique_ptr<TextCodec> codec = newTextCodec(encoding); | |
140 codec->encode(reinterpret_cast<const LChar*>(data), size / sizeof(LChar), | |
mmoroz
2017/03/03 09:49:47
The same as for line 135. Let's call `ConsumeRemai
Charlie Harrison
2017/03/03 19:33:55
Done.
| |
141 unencodableHandling); | |
142 } | |
143 if (size % sizeof(UChar) == 0) { | |
144 std::unique_ptr<TextCodec> codec = newTextCodec(encoding); | |
145 codec->encode(reinterpret_cast<const UChar*>(data), size / sizeof(UChar), | |
mmoroz
2017/03/03 09:49:47
Why we call `decode` on line 135 and `encode` here
jsbell
2017/03/03 17:35:43
The fuzzer is using the input data in three ways t
Charlie Harrison
2017/03/03 19:33:55
I have updated comments based on this discussion t
| |
146 unencodableHandling); | |
147 } | |
148 | |
149 return 0; | |
150 } | |
OLD | NEW |