base/i18n/streaming_utf8_validator_unittest.cc - Issue 1647803004: Move base to DEPS

Unified Diff: base/i18n/streaming_utf8_validator_unittest.cc

Issue 1647803004: Move base to DEPS (Closed) Base URL: git@github.com:domokit/mojo.git@master

Patch Set: Created 4 years, 11 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Index: base/i18n/streaming_utf8_validator_unittest.cc

diff --git a/base/i18n/streaming_utf8_validator_unittest.cc b/base/i18n/streaming_utf8_validator_unittest.cc

deleted file mode 100644

index 20ea564c0322a68d9ffc39b6a3911f4a0feafcd2..0000000000000000000000000000000000000000

--- a/base/i18n/streaming_utf8_validator_unittest.cc

+++ /dev/null

@@ -1,412 +0,0 @@

-// Use of this source code is governed by a BSD-style license that can be

-// found in the LICENSE file.

-#include "base/i18n/streaming_utf8_validator.h"

-#include <stdio.h>

-#include <string.h>

-#include <string>

-#include "base/strings/string_piece.h"

-#include "testing/gtest/include/gtest/gtest.h"

-// Define BASE_I18N_UTF8_VALIDATOR_THOROUGH_TEST to verify that this class

-// accepts exactly the same set of 4-byte strings as ICU-based validation. This

-// tests every possible 4-byte string, so it is too slow to run routinely on

-// low-powered machines.

-//

-// #define BASE_I18N_UTF8_VALIDATOR_THOROUGH_TEST

-#ifdef BASE_I18N_UTF8_VALIDATOR_THOROUGH_TEST

-#include "base/basictypes.h"

-#include "base/bind.h"

-#include "base/location.h"

-#include "base/logging.h"

-#include "base/memory/ref_counted.h"

-#include "base/strings/string_util.h"

-#include "base/strings/stringprintf.h"

-#include "base/strings/utf_string_conversion_utils.h"

-#include "base/synchronization/condition_variable.h"

-#include "base/synchronization/lock.h"

-#include "base/threading/sequenced_worker_pool.h"

-#include "third_party/icu/source/common/unicode/utf8.h"

-#endif // BASE_I18N_UTF8_VALIDATOR_THOROUGH_TEST

-namespace base {

-namespace {

-// Avoid having to qualify the enum values in the tests.

-const StreamingUtf8Validator::State VALID_ENDPOINT =

- StreamingUtf8Validator::VALID_ENDPOINT;

-const StreamingUtf8Validator::State VALID_MIDPOINT =

- StreamingUtf8Validator::VALID_MIDPOINT;

-const StreamingUtf8Validator::State INVALID = StreamingUtf8Validator::INVALID;

-#ifdef BASE_I18N_UTF8_VALIDATOR_THOROUGH_TEST

-const uint32 kThoroughTestChunkSize = 1 << 24;

-class StreamingUtf8ValidatorThoroughTest : public ::testing::Test {

- protected:

- StreamingUtf8ValidatorThoroughTest()

- : all_done_(&lock_), tasks_dispatched_(0), tasks_finished_(0) {}

- // This uses the same logic as base::IsStringUTF8 except it considers

- // non-characters valid (and doesn't require a string as input).

- static bool IsStringUtf8(const char* src, int32 src_len) {

- int32 char_index = 0;

- while (char_index < src_len) {

- int32 code_point;

- U8_NEXT(src, char_index, src_len, code_point);

- if (!base::IsValidCodepoint(code_point))

- return false;

- }

- return true;

- }

- // Converts the passed-in integer to a 4 byte string and then

- // verifies that IsStringUtf8 and StreamingUtf8Validator agree on

- // whether it is valid UTF-8 or not.

- void TestNumber(uint32 n) const {

- char test[sizeof n];

- memcpy(test, &n, sizeof n);

- StreamingUtf8Validator validator;

- EXPECT_EQ(IsStringUtf8(test, sizeof n),

- validator.AddBytes(test, sizeof n) == VALID_ENDPOINT)

- << "Difference of opinion for \""

- << base::StringPrintf("\\x%02X\\x%02X\\x%02X\\x%02X",

- test[0] & 0xFF,

- test[1] & 0xFF,

- test[2] & 0xFF,

- test[3] & 0xFF) << "\"";

- }

- public:

- // Tests the 4-byte sequences corresponding to the |size| integers

- // starting at |begin|. This is intended to be run from a worker

- // pool. Signals |all_done_| at the end if it thinks all tasks are

- // finished.

- void TestRange(uint32 begin, uint32 size) {

- for (uint32 i = 0; i < size; ++i) {

- TestNumber(begin + i);

- }

- base::AutoLock al(lock_);

- ++tasks_finished_;

- LOG(INFO) << tasks_finished_ << " / " << tasks_dispatched_

- << " tasks done\n";

- if (tasks_finished_ >= tasks_dispatched_) {

- all_done_.Signal();

- }

- protected:

- base::Lock lock_;

- base::ConditionVariable all_done_;

- int tasks_dispatched_;

- int tasks_finished_;

-};

-TEST_F(StreamingUtf8ValidatorThoroughTest, TestEverything) {

- scoped_refptr<base::SequencedWorkerPool> pool =

- new base::SequencedWorkerPool(32, "TestEverything");

- base::AutoLock al(lock_);

- uint32 begin = 0;

- do {

- pool->PostWorkerTask(

- FROM_HERE,

- base::Bind(&StreamingUtf8ValidatorThoroughTest::TestRange,

- base::Unretained(this),

- begin,

- kThoroughTestChunkSize));

- ++tasks_dispatched_;

- begin += kThoroughTestChunkSize;

- } while (begin != 0);

- while (tasks_finished_ < tasks_dispatched_)

- all_done_.Wait();

-#endif // BASE_I18N_UTF8_VALIDATOR_THOROUGH_TEST

-// These valid and invalid UTF-8 sequences are based on the tests from

-// base/strings/string_util_unittest.cc

-// All of the strings in |valid| must represent a single codepoint, because

-// partial sequences are constructed by taking non-empty prefixes of these

-// strings.

-const char* const valid[] = {"\r", "\n", "a",

- "\xc2\x81", "\xe1\x80\xbf", "\xf1\x80\xa0\xbf",

- "\xef\xbb\xbf", // UTF-8 BOM

-};

-const char* const* const valid_end = valid + arraysize(valid);

-const char* const invalid[] = {

- // always invalid bytes

- "\xc0", "\xc1",

- "\xf5", "\xf6", "\xf7",

- "\xf8", "\xf9", "\xfa", "\xfb", "\xfc", "\xfd", "\xfe", "\xff",

- // surrogate code points

- "\xed\xa0\x80", "\xed\x0a\x8f", "\xed\xbf\xbf",

- //

- // overlong sequences

- "\xc0\x80" // U+0000

- "\xc1\x80", // "A"

- "\xc1\x81", // "B"

- "\xe0\x80\x80", // U+0000

- "\xe0\x82\x80", // U+0080

- "\xe0\x9f\xbf", // U+07ff

- "\xf0\x80\x80\x8D", // U+000D

- "\xf0\x80\x82\x91", // U+0091

- "\xf0\x80\xa0\x80", // U+0800

- "\xf0\x8f\xbb\xbf", // U+FEFF (BOM)

- "\xf8\x80\x80\x80\xbf", // U+003F

- "\xfc\x80\x80\x80\xa0\xa5",

- //

- // Beyond U+10FFFF

- "\xf4\x90\x80\x80", // U+110000

- "\xf8\xa0\xbf\x80\xbf", // 5 bytes

- "\xfc\x9c\xbf\x80\xbf\x80", // 6 bytes

- //

- // BOMs in UTF-16(BE|LE)

- "\xfe\xff", "\xff\xfe",

-};

-const char* const* const invalid_end = invalid + arraysize(invalid);

-// A ForwardIterator which returns all the non-empty prefixes of the elements of

-// "valid".

-class PartialIterator {

- public:

- // The constructor returns the first iterator, ie. it is equivalent to

- // begin().

- PartialIterator() : index_(0), prefix_length_(0) { Advance(); }

- // The trivial destructor left intentionally undefined.

- // This is a value type; the default copy constructor and assignment operator

- // generated by the compiler are used.

- static PartialIterator end() { return PartialIterator(arraysize(valid), 1); }

- PartialIterator& operator++() {

- Advance();

- return *this;

- }

- base::StringPiece operator*() const {

- return base::StringPiece(valid[index_], prefix_length_);

- }

- bool operator==(const PartialIterator& rhs) const {

- return index_ == rhs.index_ && prefix_length_ == rhs.prefix_length_;

- }

- bool operator!=(const PartialIterator& rhs) const { return !(rhs == *this); }

- private:

- // This constructor is used by the end() method.

- PartialIterator(size_t index, size_t prefix_length)

- : index_(index), prefix_length_(prefix_length) {}

- void Advance() {

- if (index_ < arraysize(valid) && prefix_length_ < strlen(valid[index_]))

- ++prefix_length_;

- while (index_ < arraysize(valid) &&

- prefix_length_ == strlen(valid[index_])) {

- ++index_;

- prefix_length_ = 1;

- }

- // The UTF-8 sequence, as an offset into the |valid| array.

- size_t index_;

- size_t prefix_length_;

-};

-// A test fixture for tests which test one UTF-8 sequence (or invalid

-// byte sequence) at a time.

-class StreamingUtf8ValidatorSingleSequenceTest : public ::testing::Test {

- protected:

- // Iterator must be convertible when de-referenced to StringPiece.

- template <typename Iterator>

- void CheckRange(Iterator begin,

- Iterator end,

- StreamingUtf8Validator::State expected) {

- for (Iterator it = begin; it != end; ++it) {

- StreamingUtf8Validator validator;

- base::StringPiece sequence = *it;

- EXPECT_EQ(expected,

- validator.AddBytes(sequence.data(), sequence.size()))

- << "Failed for \"" << sequence << "\"";

- }

- // Adding input a byte at a time should make absolutely no difference.

- template <typename Iterator>

- void CheckRangeByteAtATime(Iterator begin,

- Iterator end,

- StreamingUtf8Validator::State expected) {

- for (Iterator it = begin; it != end; ++it) {

- StreamingUtf8Validator validator;

- base::StringPiece sequence = *it;

- StreamingUtf8Validator::State state = VALID_ENDPOINT;

- for (base::StringPiece::const_iterator cit = sequence.begin();

- cit != sequence.end();

- ++cit) {

- state = validator.AddBytes(&*cit, 1);

- }

- EXPECT_EQ(expected, state) << "Failed for \"" << sequence << "\"";

- }

-};

-// A test fixture for tests which test the concatenation of byte sequences.

-class StreamingUtf8ValidatorDoubleSequenceTest : public ::testing::Test {

- protected:

- // Check every possible concatenation of byte sequences from two

- // ranges, and verify that the combination matches the expected

- // state.

- template <typename Iterator1, typename Iterator2>

- void CheckCombinations(Iterator1 begin1,

- Iterator1 end1,

- Iterator2 begin2,

- Iterator2 end2,

- StreamingUtf8Validator::State expected) {

- StreamingUtf8Validator validator;

- for (Iterator1 it1 = begin1; it1 != end1; ++it1) {

- base::StringPiece c1 = *it1;

- for (Iterator2 it2 = begin2; it2 != end2; ++it2) {

- base::StringPiece c2 = *it2;

- validator.AddBytes(c1.data(), c1.size());

- EXPECT_EQ(expected, validator.AddBytes(c2.data(), c2.size()))

- << "Failed for \"" << c1 << c2 << "\"";

- validator.Reset();

- }

-};

-TEST(StreamingUtf8ValidatorTest, NothingIsValid) {

- static const char kNothing[] = "";

- EXPECT_EQ(VALID_ENDPOINT, StreamingUtf8Validator().AddBytes(kNothing, 0));

-// Because the members of the |valid| array need to be non-zero length

-// sequences and are measured with strlen(), |valid| cannot be used it

-// to test the NUL character '\0', so the NUL character gets its own

-// test.

-TEST(StreamingUtf8ValidatorTest, NulIsValid) {

- static const char kNul[] = "\x00";

- EXPECT_EQ(VALID_ENDPOINT, StreamingUtf8Validator().AddBytes(kNul, 1));

-// Just a basic sanity test before we start getting fancy.

-TEST(StreamingUtf8ValidatorTest, HelloWorld) {

- static const char kHelloWorld[] = "Hello, World!";

- EXPECT_EQ(

- VALID_ENDPOINT,

- StreamingUtf8Validator().AddBytes(kHelloWorld, strlen(kHelloWorld)));

-// Check that the Reset() method works.

-TEST(StreamingUtf8ValidatorTest, ResetWorks) {

- StreamingUtf8Validator validator;

- EXPECT_EQ(INVALID, validator.AddBytes("\xC0", 1));

- EXPECT_EQ(INVALID, validator.AddBytes("a", 1));

- validator.Reset();

- EXPECT_EQ(VALID_ENDPOINT, validator.AddBytes("a", 1));

-TEST_F(StreamingUtf8ValidatorSingleSequenceTest, Valid) {

- CheckRange(valid, valid_end, VALID_ENDPOINT);

-TEST_F(StreamingUtf8ValidatorSingleSequenceTest, Partial) {

- CheckRange(PartialIterator(), PartialIterator::end(), VALID_MIDPOINT);

-TEST_F(StreamingUtf8ValidatorSingleSequenceTest, Invalid) {

- CheckRange(invalid, invalid_end, INVALID);

-TEST_F(StreamingUtf8ValidatorSingleSequenceTest, ValidByByte) {

- CheckRangeByteAtATime(valid, valid_end, VALID_ENDPOINT);

-TEST_F(StreamingUtf8ValidatorSingleSequenceTest, PartialByByte) {

- CheckRangeByteAtATime(

- PartialIterator(), PartialIterator::end(), VALID_MIDPOINT);

-TEST_F(StreamingUtf8ValidatorSingleSequenceTest, InvalidByByte) {

- CheckRangeByteAtATime(invalid, invalid_end, INVALID);

-TEST_F(StreamingUtf8ValidatorDoubleSequenceTest, ValidPlusValidIsValid) {

- CheckCombinations(valid, valid_end, valid, valid_end, VALID_ENDPOINT);

-TEST_F(StreamingUtf8ValidatorDoubleSequenceTest, ValidPlusPartialIsPartial) {

- CheckCombinations(valid,

- valid_end,

- PartialIterator(),

- PartialIterator::end(),

- VALID_MIDPOINT);

-TEST_F(StreamingUtf8ValidatorDoubleSequenceTest, PartialPlusValidIsInvalid) {

- CheckCombinations(

- PartialIterator(), PartialIterator::end(), valid, valid_end, INVALID);

-TEST_F(StreamingUtf8ValidatorDoubleSequenceTest, PartialPlusPartialIsInvalid) {

- CheckCombinations(PartialIterator(),

- PartialIterator::end(),

- PartialIterator(),

- PartialIterator::end(),

- INVALID);

-TEST_F(StreamingUtf8ValidatorDoubleSequenceTest, ValidPlusInvalidIsInvalid) {

- CheckCombinations(valid, valid_end, invalid, invalid_end, INVALID);

-TEST_F(StreamingUtf8ValidatorDoubleSequenceTest, InvalidPlusValidIsInvalid) {

- CheckCombinations(invalid, invalid_end, valid, valid_end, INVALID);

-TEST_F(StreamingUtf8ValidatorDoubleSequenceTest, InvalidPlusInvalidIsInvalid) {

- CheckCombinations(invalid, invalid_end, invalid, invalid_end, INVALID);

-TEST_F(StreamingUtf8ValidatorDoubleSequenceTest, InvalidPlusPartialIsInvalid) {

- CheckCombinations(

- invalid, invalid_end, PartialIterator(), PartialIterator::end(), INVALID);

-TEST_F(StreamingUtf8ValidatorDoubleSequenceTest, PartialPlusInvalidIsInvalid) {

- CheckCombinations(

- PartialIterator(), PartialIterator::end(), invalid, invalid_end, INVALID);

-TEST(StreamingUtf8ValidatorValidateTest, EmptyIsValid) {

- EXPECT_TRUE(StreamingUtf8Validator::Validate(std::string()));

-TEST(StreamingUtf8ValidatorValidateTest, SimpleValidCase) {

- EXPECT_TRUE(StreamingUtf8Validator::Validate("\xc2\x81"));

-TEST(StreamingUtf8ValidatorValidateTest, SimpleInvalidCase) {

- EXPECT_FALSE(StreamingUtf8Validator::Validate("\xc0\x80"));

-TEST(StreamingUtf8ValidatorValidateTest, TruncatedIsInvalid) {

- EXPECT_FALSE(StreamingUtf8Validator::Validate("\xc2"));

-} // namespace

-} // namespace base

« no previous file with comments | « base/i18n/streaming_utf8_validator_perftest.cc ('k') | base/i18n/string_compare.h » ('j') | no next file with comments »