| Index: testing/libfuzzer/fuzzers/unicode_string_codepage_create_fuzzer.cc
|
| diff --git a/testing/libfuzzer/fuzzers/unicode_string_codepage_create_fuzzer.cc b/testing/libfuzzer/fuzzers/unicode_string_codepage_create_fuzzer.cc
|
| new file mode 100644
|
| index 0000000000000000000000000000000000000000..08806f2bf826bf413a19d2abe49075626f1ec6ad
|
| --- /dev/null
|
| +++ b/testing/libfuzzer/fuzzers/unicode_string_codepage_create_fuzzer.cc
|
| @@ -0,0 +1,97 @@
|
| +// Copyright (c) 2015 The Chromium Authors. All rights reserved.
|
| +// Use of this source code is governed by a BSD-style license that can be
|
| +// found in the LICENSE file.
|
| +
|
| +#include <algorithm>
|
| +#include <array>
|
| +#include <vector>
|
| +
|
| +#include "third_party/icu/source/common/unicode/unistr.h"
|
| +
|
| +// Taken from third_party/icu/source/data/mappings/convrtrs.txt file.
|
| +static const std::array<const char*, 45> kConverters = {
|
| + {
|
| + "UTF-8",
|
| + "utf-16be",
|
| + "utf-16le",
|
| + "UTF-32",
|
| + "UTF-32BE",
|
| + "UTF-32LE",
|
| + "ibm866-html",
|
| + "iso-8859-2-html",
|
| + "iso-8859-3-html",
|
| + "iso-8859-4-html",
|
| + "iso-8859-5-html",
|
| + "iso-8859-6-html",
|
| + "iso-8859-7-html",
|
| + "iso-8859-8-html",
|
| + "ISO-8859-8-I",
|
| + "iso-8859-10-html",
|
| + "iso-8859-13-html",
|
| + "iso-8859-14-html",
|
| + "iso-8859-15-html",
|
| + "iso-8859-16-html",
|
| + "koi8-r-html",
|
| + "koi8-u-html",
|
| + "macintosh-html",
|
| + "windows-874-html",
|
| + "windows-1250-html",
|
| + "windows-1251-html",
|
| + "windows-1252-html",
|
| + "windows-1253-html",
|
| + "windows-1254-html",
|
| + "windows-1255-html",
|
| + "windows-1256-html",
|
| + "windows-1257-html",
|
| + "windows-1258-html",
|
| + "x-mac-cyrillic-html",
|
| + "windows-936-2000",
|
| + "gb18030",
|
| + "big5-html",
|
| + "euc-jp-html",
|
| + "ISO_2022,locale=ja,version=0",
|
| + "shift_jis-html",
|
| + "euc-kr-html",
|
| + "ISO-2022-KR",
|
| + "ISO-2022-CN",
|
| + "ISO-2022-CN-EXT",
|
| + "HZ-GB-2312"
|
| + }
|
| +};
|
| +
|
| +// Entry point for LibFuzzer.
|
| +extern "C" int LLVMFuzzerTestOneInput(const unsigned char *data, size_t size) {
|
| + if (size < 2)
|
| + return 0;
|
| +
|
| + // Need null-terminated string.
|
| + std::vector<char> buffer(size, 0);
|
| + // First byte will be used for random decisions.
|
| + unsigned char selector = data[0];
|
| + size -= 1;
|
| + std::copy(data + 1, data + size, buffer.data());
|
| +
|
| + // Pointer to a part of fuzzer's data or to some valid codepage value.
|
| + char* codepage;
|
| +
|
| + if (selector & 1) {
|
| + // Use random codepage value provided by fuzzer (split data in two parts).
|
| + // Remove least significant bit because we already used it as input value.
|
| + size_t codepage_length = (selector >> 1) & 0xF;
|
| + if (size <= codepage_length)
|
| + size /= 2;
|
| + else
|
| + size -= codepage_length;
|
| +
|
| + codepage = buffer.data() + size;
|
| + } else {
|
| + // Use one of valid codepage values.
|
| + // Remove least significant bit because we already used it as input value.
|
| + size_t index = (selector >> 1) % kConverters.size();
|
| + codepage = const_cast<char*>(kConverters[index]);
|
| + }
|
| +
|
| + icu::UnicodeString unicode_string(buffer.data(), size, codepage);
|
| +
|
| + return 0;
|
| +}
|
|
|