| Index: base/i18n/streaming_utf8_validator_unittest.cc
|
| diff --git a/base/i18n/streaming_utf8_validator_unittest.cc b/base/i18n/streaming_utf8_validator_unittest.cc
|
| deleted file mode 100644
|
| index 20ea564c0322a68d9ffc39b6a3911f4a0feafcd2..0000000000000000000000000000000000000000
|
| --- a/base/i18n/streaming_utf8_validator_unittest.cc
|
| +++ /dev/null
|
| @@ -1,412 +0,0 @@
|
| -// Copyright 2014 The Chromium Authors. All rights reserved.
|
| -// Use of this source code is governed by a BSD-style license that can be
|
| -// found in the LICENSE file.
|
| -
|
| -#include "base/i18n/streaming_utf8_validator.h"
|
| -
|
| -#include <stdio.h>
|
| -#include <string.h>
|
| -
|
| -#include <string>
|
| -
|
| -#include "base/strings/string_piece.h"
|
| -#include "testing/gtest/include/gtest/gtest.h"
|
| -
|
| -// Define BASE_I18N_UTF8_VALIDATOR_THOROUGH_TEST to verify that this class
|
| -// accepts exactly the same set of 4-byte strings as ICU-based validation. This
|
| -// tests every possible 4-byte string, so it is too slow to run routinely on
|
| -// low-powered machines.
|
| -//
|
| -// #define BASE_I18N_UTF8_VALIDATOR_THOROUGH_TEST
|
| -
|
| -#ifdef BASE_I18N_UTF8_VALIDATOR_THOROUGH_TEST
|
| -
|
| -#include "base/basictypes.h"
|
| -#include "base/bind.h"
|
| -#include "base/location.h"
|
| -#include "base/logging.h"
|
| -#include "base/memory/ref_counted.h"
|
| -#include "base/strings/string_util.h"
|
| -#include "base/strings/stringprintf.h"
|
| -#include "base/strings/utf_string_conversion_utils.h"
|
| -#include "base/synchronization/condition_variable.h"
|
| -#include "base/synchronization/lock.h"
|
| -#include "base/threading/sequenced_worker_pool.h"
|
| -#include "third_party/icu/source/common/unicode/utf8.h"
|
| -
|
| -#endif // BASE_I18N_UTF8_VALIDATOR_THOROUGH_TEST
|
| -
|
| -namespace base {
|
| -namespace {
|
| -
|
| -// Avoid having to qualify the enum values in the tests.
|
| -const StreamingUtf8Validator::State VALID_ENDPOINT =
|
| - StreamingUtf8Validator::VALID_ENDPOINT;
|
| -const StreamingUtf8Validator::State VALID_MIDPOINT =
|
| - StreamingUtf8Validator::VALID_MIDPOINT;
|
| -const StreamingUtf8Validator::State INVALID = StreamingUtf8Validator::INVALID;
|
| -
|
| -#ifdef BASE_I18N_UTF8_VALIDATOR_THOROUGH_TEST
|
| -
|
| -const uint32 kThoroughTestChunkSize = 1 << 24;
|
| -
|
| -class StreamingUtf8ValidatorThoroughTest : public ::testing::Test {
|
| - protected:
|
| - StreamingUtf8ValidatorThoroughTest()
|
| - : all_done_(&lock_), tasks_dispatched_(0), tasks_finished_(0) {}
|
| -
|
| - // This uses the same logic as base::IsStringUTF8 except it considers
|
| - // non-characters valid (and doesn't require a string as input).
|
| - static bool IsStringUtf8(const char* src, int32 src_len) {
|
| - int32 char_index = 0;
|
| -
|
| - while (char_index < src_len) {
|
| - int32 code_point;
|
| - U8_NEXT(src, char_index, src_len, code_point);
|
| - if (!base::IsValidCodepoint(code_point))
|
| - return false;
|
| - }
|
| - return true;
|
| - }
|
| -
|
| - // Converts the passed-in integer to a 4 byte string and then
|
| - // verifies that IsStringUtf8 and StreamingUtf8Validator agree on
|
| - // whether it is valid UTF-8 or not.
|
| - void TestNumber(uint32 n) const {
|
| - char test[sizeof n];
|
| - memcpy(test, &n, sizeof n);
|
| - StreamingUtf8Validator validator;
|
| - EXPECT_EQ(IsStringUtf8(test, sizeof n),
|
| - validator.AddBytes(test, sizeof n) == VALID_ENDPOINT)
|
| - << "Difference of opinion for \""
|
| - << base::StringPrintf("\\x%02X\\x%02X\\x%02X\\x%02X",
|
| - test[0] & 0xFF,
|
| - test[1] & 0xFF,
|
| - test[2] & 0xFF,
|
| - test[3] & 0xFF) << "\"";
|
| - }
|
| -
|
| - public:
|
| - // Tests the 4-byte sequences corresponding to the |size| integers
|
| - // starting at |begin|. This is intended to be run from a worker
|
| - // pool. Signals |all_done_| at the end if it thinks all tasks are
|
| - // finished.
|
| - void TestRange(uint32 begin, uint32 size) {
|
| - for (uint32 i = 0; i < size; ++i) {
|
| - TestNumber(begin + i);
|
| - }
|
| - base::AutoLock al(lock_);
|
| - ++tasks_finished_;
|
| - LOG(INFO) << tasks_finished_ << " / " << tasks_dispatched_
|
| - << " tasks done\n";
|
| - if (tasks_finished_ >= tasks_dispatched_) {
|
| - all_done_.Signal();
|
| - }
|
| - }
|
| -
|
| - protected:
|
| - base::Lock lock_;
|
| - base::ConditionVariable all_done_;
|
| - int tasks_dispatched_;
|
| - int tasks_finished_;
|
| -};
|
| -
|
| -TEST_F(StreamingUtf8ValidatorThoroughTest, TestEverything) {
|
| - scoped_refptr<base::SequencedWorkerPool> pool =
|
| - new base::SequencedWorkerPool(32, "TestEverything");
|
| - base::AutoLock al(lock_);
|
| - uint32 begin = 0;
|
| - do {
|
| - pool->PostWorkerTask(
|
| - FROM_HERE,
|
| - base::Bind(&StreamingUtf8ValidatorThoroughTest::TestRange,
|
| - base::Unretained(this),
|
| - begin,
|
| - kThoroughTestChunkSize));
|
| - ++tasks_dispatched_;
|
| - begin += kThoroughTestChunkSize;
|
| - } while (begin != 0);
|
| - while (tasks_finished_ < tasks_dispatched_)
|
| - all_done_.Wait();
|
| -}
|
| -
|
| -#endif // BASE_I18N_UTF8_VALIDATOR_THOROUGH_TEST
|
| -
|
| -// These valid and invalid UTF-8 sequences are based on the tests from
|
| -// base/strings/string_util_unittest.cc
|
| -
|
| -// All of the strings in |valid| must represent a single codepoint, because
|
| -// partial sequences are constructed by taking non-empty prefixes of these
|
| -// strings.
|
| -const char* const valid[] = {"\r", "\n", "a",
|
| - "\xc2\x81", "\xe1\x80\xbf", "\xf1\x80\xa0\xbf",
|
| - "\xef\xbb\xbf", // UTF-8 BOM
|
| -};
|
| -
|
| -const char* const* const valid_end = valid + arraysize(valid);
|
| -
|
| -const char* const invalid[] = {
|
| - // always invalid bytes
|
| - "\xc0", "\xc1",
|
| - "\xf5", "\xf6", "\xf7",
|
| - "\xf8", "\xf9", "\xfa", "\xfb", "\xfc", "\xfd", "\xfe", "\xff",
|
| - // surrogate code points
|
| - "\xed\xa0\x80", "\xed\x0a\x8f", "\xed\xbf\xbf",
|
| - //
|
| - // overlong sequences
|
| - "\xc0\x80" // U+0000
|
| - "\xc1\x80", // "A"
|
| - "\xc1\x81", // "B"
|
| - "\xe0\x80\x80", // U+0000
|
| - "\xe0\x82\x80", // U+0080
|
| - "\xe0\x9f\xbf", // U+07ff
|
| - "\xf0\x80\x80\x8D", // U+000D
|
| - "\xf0\x80\x82\x91", // U+0091
|
| - "\xf0\x80\xa0\x80", // U+0800
|
| - "\xf0\x8f\xbb\xbf", // U+FEFF (BOM)
|
| - "\xf8\x80\x80\x80\xbf", // U+003F
|
| - "\xfc\x80\x80\x80\xa0\xa5",
|
| - //
|
| - // Beyond U+10FFFF
|
| - "\xf4\x90\x80\x80", // U+110000
|
| - "\xf8\xa0\xbf\x80\xbf", // 5 bytes
|
| - "\xfc\x9c\xbf\x80\xbf\x80", // 6 bytes
|
| - //
|
| - // BOMs in UTF-16(BE|LE)
|
| - "\xfe\xff", "\xff\xfe",
|
| -};
|
| -
|
| -const char* const* const invalid_end = invalid + arraysize(invalid);
|
| -
|
| -// A ForwardIterator which returns all the non-empty prefixes of the elements of
|
| -// "valid".
|
| -class PartialIterator {
|
| - public:
|
| - // The constructor returns the first iterator, ie. it is equivalent to
|
| - // begin().
|
| - PartialIterator() : index_(0), prefix_length_(0) { Advance(); }
|
| - // The trivial destructor left intentionally undefined.
|
| - // This is a value type; the default copy constructor and assignment operator
|
| - // generated by the compiler are used.
|
| -
|
| - static PartialIterator end() { return PartialIterator(arraysize(valid), 1); }
|
| -
|
| - PartialIterator& operator++() {
|
| - Advance();
|
| - return *this;
|
| - }
|
| -
|
| - base::StringPiece operator*() const {
|
| - return base::StringPiece(valid[index_], prefix_length_);
|
| - }
|
| -
|
| - bool operator==(const PartialIterator& rhs) const {
|
| - return index_ == rhs.index_ && prefix_length_ == rhs.prefix_length_;
|
| - }
|
| -
|
| - bool operator!=(const PartialIterator& rhs) const { return !(rhs == *this); }
|
| -
|
| - private:
|
| - // This constructor is used by the end() method.
|
| - PartialIterator(size_t index, size_t prefix_length)
|
| - : index_(index), prefix_length_(prefix_length) {}
|
| -
|
| - void Advance() {
|
| - if (index_ < arraysize(valid) && prefix_length_ < strlen(valid[index_]))
|
| - ++prefix_length_;
|
| - while (index_ < arraysize(valid) &&
|
| - prefix_length_ == strlen(valid[index_])) {
|
| - ++index_;
|
| - prefix_length_ = 1;
|
| - }
|
| - }
|
| -
|
| - // The UTF-8 sequence, as an offset into the |valid| array.
|
| - size_t index_;
|
| - size_t prefix_length_;
|
| -};
|
| -
|
| -// A test fixture for tests which test one UTF-8 sequence (or invalid
|
| -// byte sequence) at a time.
|
| -class StreamingUtf8ValidatorSingleSequenceTest : public ::testing::Test {
|
| - protected:
|
| - // Iterator must be convertible when de-referenced to StringPiece.
|
| - template <typename Iterator>
|
| - void CheckRange(Iterator begin,
|
| - Iterator end,
|
| - StreamingUtf8Validator::State expected) {
|
| - for (Iterator it = begin; it != end; ++it) {
|
| - StreamingUtf8Validator validator;
|
| - base::StringPiece sequence = *it;
|
| - EXPECT_EQ(expected,
|
| - validator.AddBytes(sequence.data(), sequence.size()))
|
| - << "Failed for \"" << sequence << "\"";
|
| - }
|
| - }
|
| -
|
| - // Adding input a byte at a time should make absolutely no difference.
|
| - template <typename Iterator>
|
| - void CheckRangeByteAtATime(Iterator begin,
|
| - Iterator end,
|
| - StreamingUtf8Validator::State expected) {
|
| - for (Iterator it = begin; it != end; ++it) {
|
| - StreamingUtf8Validator validator;
|
| - base::StringPiece sequence = *it;
|
| - StreamingUtf8Validator::State state = VALID_ENDPOINT;
|
| - for (base::StringPiece::const_iterator cit = sequence.begin();
|
| - cit != sequence.end();
|
| - ++cit) {
|
| - state = validator.AddBytes(&*cit, 1);
|
| - }
|
| - EXPECT_EQ(expected, state) << "Failed for \"" << sequence << "\"";
|
| - }
|
| - }
|
| -};
|
| -
|
| -// A test fixture for tests which test the concatenation of byte sequences.
|
| -class StreamingUtf8ValidatorDoubleSequenceTest : public ::testing::Test {
|
| - protected:
|
| - // Check every possible concatenation of byte sequences from two
|
| - // ranges, and verify that the combination matches the expected
|
| - // state.
|
| - template <typename Iterator1, typename Iterator2>
|
| - void CheckCombinations(Iterator1 begin1,
|
| - Iterator1 end1,
|
| - Iterator2 begin2,
|
| - Iterator2 end2,
|
| - StreamingUtf8Validator::State expected) {
|
| - StreamingUtf8Validator validator;
|
| - for (Iterator1 it1 = begin1; it1 != end1; ++it1) {
|
| - base::StringPiece c1 = *it1;
|
| - for (Iterator2 it2 = begin2; it2 != end2; ++it2) {
|
| - base::StringPiece c2 = *it2;
|
| - validator.AddBytes(c1.data(), c1.size());
|
| - EXPECT_EQ(expected, validator.AddBytes(c2.data(), c2.size()))
|
| - << "Failed for \"" << c1 << c2 << "\"";
|
| - validator.Reset();
|
| - }
|
| - }
|
| - }
|
| -};
|
| -
|
| -TEST(StreamingUtf8ValidatorTest, NothingIsValid) {
|
| - static const char kNothing[] = "";
|
| - EXPECT_EQ(VALID_ENDPOINT, StreamingUtf8Validator().AddBytes(kNothing, 0));
|
| -}
|
| -
|
| -// Because the members of the |valid| array need to be non-zero length
|
| -// sequences and are measured with strlen(), |valid| cannot be used it
|
| -// to test the NUL character '\0', so the NUL character gets its own
|
| -// test.
|
| -TEST(StreamingUtf8ValidatorTest, NulIsValid) {
|
| - static const char kNul[] = "\x00";
|
| - EXPECT_EQ(VALID_ENDPOINT, StreamingUtf8Validator().AddBytes(kNul, 1));
|
| -}
|
| -
|
| -// Just a basic sanity test before we start getting fancy.
|
| -TEST(StreamingUtf8ValidatorTest, HelloWorld) {
|
| - static const char kHelloWorld[] = "Hello, World!";
|
| - EXPECT_EQ(
|
| - VALID_ENDPOINT,
|
| - StreamingUtf8Validator().AddBytes(kHelloWorld, strlen(kHelloWorld)));
|
| -}
|
| -
|
| -// Check that the Reset() method works.
|
| -TEST(StreamingUtf8ValidatorTest, ResetWorks) {
|
| - StreamingUtf8Validator validator;
|
| - EXPECT_EQ(INVALID, validator.AddBytes("\xC0", 1));
|
| - EXPECT_EQ(INVALID, validator.AddBytes("a", 1));
|
| - validator.Reset();
|
| - EXPECT_EQ(VALID_ENDPOINT, validator.AddBytes("a", 1));
|
| -}
|
| -
|
| -TEST_F(StreamingUtf8ValidatorSingleSequenceTest, Valid) {
|
| - CheckRange(valid, valid_end, VALID_ENDPOINT);
|
| -}
|
| -
|
| -TEST_F(StreamingUtf8ValidatorSingleSequenceTest, Partial) {
|
| - CheckRange(PartialIterator(), PartialIterator::end(), VALID_MIDPOINT);
|
| -}
|
| -
|
| -TEST_F(StreamingUtf8ValidatorSingleSequenceTest, Invalid) {
|
| - CheckRange(invalid, invalid_end, INVALID);
|
| -}
|
| -
|
| -TEST_F(StreamingUtf8ValidatorSingleSequenceTest, ValidByByte) {
|
| - CheckRangeByteAtATime(valid, valid_end, VALID_ENDPOINT);
|
| -}
|
| -
|
| -TEST_F(StreamingUtf8ValidatorSingleSequenceTest, PartialByByte) {
|
| - CheckRangeByteAtATime(
|
| - PartialIterator(), PartialIterator::end(), VALID_MIDPOINT);
|
| -}
|
| -
|
| -TEST_F(StreamingUtf8ValidatorSingleSequenceTest, InvalidByByte) {
|
| - CheckRangeByteAtATime(invalid, invalid_end, INVALID);
|
| -}
|
| -
|
| -TEST_F(StreamingUtf8ValidatorDoubleSequenceTest, ValidPlusValidIsValid) {
|
| - CheckCombinations(valid, valid_end, valid, valid_end, VALID_ENDPOINT);
|
| -}
|
| -
|
| -TEST_F(StreamingUtf8ValidatorDoubleSequenceTest, ValidPlusPartialIsPartial) {
|
| - CheckCombinations(valid,
|
| - valid_end,
|
| - PartialIterator(),
|
| - PartialIterator::end(),
|
| - VALID_MIDPOINT);
|
| -}
|
| -
|
| -TEST_F(StreamingUtf8ValidatorDoubleSequenceTest, PartialPlusValidIsInvalid) {
|
| - CheckCombinations(
|
| - PartialIterator(), PartialIterator::end(), valid, valid_end, INVALID);
|
| -}
|
| -
|
| -TEST_F(StreamingUtf8ValidatorDoubleSequenceTest, PartialPlusPartialIsInvalid) {
|
| - CheckCombinations(PartialIterator(),
|
| - PartialIterator::end(),
|
| - PartialIterator(),
|
| - PartialIterator::end(),
|
| - INVALID);
|
| -}
|
| -
|
| -TEST_F(StreamingUtf8ValidatorDoubleSequenceTest, ValidPlusInvalidIsInvalid) {
|
| - CheckCombinations(valid, valid_end, invalid, invalid_end, INVALID);
|
| -}
|
| -
|
| -TEST_F(StreamingUtf8ValidatorDoubleSequenceTest, InvalidPlusValidIsInvalid) {
|
| - CheckCombinations(invalid, invalid_end, valid, valid_end, INVALID);
|
| -}
|
| -
|
| -TEST_F(StreamingUtf8ValidatorDoubleSequenceTest, InvalidPlusInvalidIsInvalid) {
|
| - CheckCombinations(invalid, invalid_end, invalid, invalid_end, INVALID);
|
| -}
|
| -
|
| -TEST_F(StreamingUtf8ValidatorDoubleSequenceTest, InvalidPlusPartialIsInvalid) {
|
| - CheckCombinations(
|
| - invalid, invalid_end, PartialIterator(), PartialIterator::end(), INVALID);
|
| -}
|
| -
|
| -TEST_F(StreamingUtf8ValidatorDoubleSequenceTest, PartialPlusInvalidIsInvalid) {
|
| - CheckCombinations(
|
| - PartialIterator(), PartialIterator::end(), invalid, invalid_end, INVALID);
|
| -}
|
| -
|
| -TEST(StreamingUtf8ValidatorValidateTest, EmptyIsValid) {
|
| - EXPECT_TRUE(StreamingUtf8Validator::Validate(std::string()));
|
| -}
|
| -
|
| -TEST(StreamingUtf8ValidatorValidateTest, SimpleValidCase) {
|
| - EXPECT_TRUE(StreamingUtf8Validator::Validate("\xc2\x81"));
|
| -}
|
| -
|
| -TEST(StreamingUtf8ValidatorValidateTest, SimpleInvalidCase) {
|
| - EXPECT_FALSE(StreamingUtf8Validator::Validate("\xc0\x80"));
|
| -}
|
| -
|
| -TEST(StreamingUtf8ValidatorValidateTest, TruncatedIsInvalid) {
|
| - EXPECT_FALSE(StreamingUtf8Validator::Validate("\xc2"));
|
| -}
|
| -
|
| -} // namespace
|
| -} // namespace base
|
|
|