third_party/protobuf/java/src/test/java/com/google/protobuf/IsValidUtf8TestUtil.java - Issue 1842653006: Update //third_party/protobuf to version 3.

Unified Diff: third_party/protobuf/java/src/test/java/com/google/protobuf/IsValidUtf8TestUtil.java

Issue 1842653006: Update //third_party/protobuf to version 3. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: merge Created 4 years, 8 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

« no previous file with comments | « third_party/protobuf/java/src/test/java/com/google/protobuf/IsValidUtf8Test.java ('k') | third_party/protobuf/java/src/test/java/com/google/protobuf/LazyFieldLiteTest.java » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

Index: third_party/protobuf/java/src/test/java/com/google/protobuf/IsValidUtf8TestUtil.java

diff --git a/third_party/protobuf/java/src/test/java/com/google/protobuf/IsValidUtf8TestUtil.java b/third_party/protobuf/java/src/test/java/com/google/protobuf/IsValidUtf8TestUtil.java

new file mode 100644

index 0000000000000000000000000000000000000000..321669f3147b2d982f02125a689039963a1ddefc

--- /dev/null

+++ b/third_party/protobuf/java/src/test/java/com/google/protobuf/IsValidUtf8TestUtil.java

@@ -0,0 +1,420 @@

+// Protocol Buffers - Google's data interchange format

+// https://developers.google.com/protocol-buffers/

+//

+// Redistribution and use in source and binary forms, with or without

+// modification, are permitted provided that the following conditions are

+// met:

+//

+// * Redistributions of source code must retain the above copyright

+// notice, this list of conditions and the following disclaimer.

+// * Redistributions in binary form must reproduce the above

+// copyright notice, this list of conditions and the following disclaimer

+// in the documentation and/or other materials provided with the

+// distribution.

+// * Neither the name of Google Inc. nor the names of its

+// contributors may be used to endorse or promote products derived from

+// this software without specific prior written permission.

+//

+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS

+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT

+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR

+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT

+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,

+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT

+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,

+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY

+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT

+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE

+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

+package com.google.protobuf;

+import static junit.framework.Assert.*;

+import java.io.UnsupportedEncodingException;

+import java.nio.ByteBuffer;

+import java.nio.CharBuffer;

+import java.nio.charset.CharsetDecoder;

+import java.nio.charset.CharsetEncoder;

+import java.nio.charset.CoderResult;

+import java.nio.charset.CodingErrorAction;

+import java.util.ArrayList;

+import java.util.Arrays;

+import java.util.List;

+import java.util.Random;

+import java.util.logging.Logger;

+/**

+ * Shared testing code for {@link IsValidUtf8Test} and

+ * {@link IsValidUtf8FourByteTest}.

+ *

+ * @author jonp@google.com (Jon Perlow)

+ * @author martinrb@google.com (Martin Buchholz)

+ */

+class IsValidUtf8TestUtil {

+ private static Logger logger = Logger.getLogger(

+ IsValidUtf8TestUtil.class.getName());

+ // 128 - [chars 0x0000 to 0x007f]

+ static long ONE_BYTE_ROUNDTRIPPABLE_CHARACTERS = 0x007f - 0x0000 + 1;

+ // 128

+ static long EXPECTED_ONE_BYTE_ROUNDTRIPPABLE_COUNT =

+ ONE_BYTE_ROUNDTRIPPABLE_CHARACTERS;

+ // 1920 [chars 0x0080 to 0x07FF]

+ static long TWO_BYTE_ROUNDTRIPPABLE_CHARACTERS = 0x07FF - 0x0080 + 1;

+ // 18,304

+ static long EXPECTED_TWO_BYTE_ROUNDTRIPPABLE_COUNT =

+ // Both bytes are one byte characters

+ (long) Math.pow(EXPECTED_ONE_BYTE_ROUNDTRIPPABLE_COUNT, 2) +

+ // The possible number of two byte characters

+ TWO_BYTE_ROUNDTRIPPABLE_CHARACTERS;

+ // 2048

+ static long THREE_BYTE_SURROGATES = 2 * 1024;

+ // 61,440 [chars 0x0800 to 0xFFFF, minus surrogates]

+ static long THREE_BYTE_ROUNDTRIPPABLE_CHARACTERS =

+ 0xFFFF - 0x0800 + 1 - THREE_BYTE_SURROGATES;

+ // 2,650,112

+ static long EXPECTED_THREE_BYTE_ROUNDTRIPPABLE_COUNT =

+ // All one byte characters

+ (long) Math.pow(EXPECTED_ONE_BYTE_ROUNDTRIPPABLE_COUNT, 3) +

+ // One two byte character and a one byte character

+ 2 * TWO_BYTE_ROUNDTRIPPABLE_CHARACTERS *

+ ONE_BYTE_ROUNDTRIPPABLE_CHARACTERS +

+ // Three byte characters

+ THREE_BYTE_ROUNDTRIPPABLE_CHARACTERS;

+ // 1,048,576 [chars 0x10000L to 0x10FFFF]

+ static long FOUR_BYTE_ROUNDTRIPPABLE_CHARACTERS = 0x10FFFF - 0x10000L + 1;

+ // 289,571,839

+ static long EXPECTED_FOUR_BYTE_ROUNDTRIPPABLE_COUNT =

+ // All one byte characters

+ (long) Math.pow(EXPECTED_ONE_BYTE_ROUNDTRIPPABLE_COUNT, 4) +

+ // One and three byte characters

+ 2 * THREE_BYTE_ROUNDTRIPPABLE_CHARACTERS *

+ ONE_BYTE_ROUNDTRIPPABLE_CHARACTERS +

+ // Two two byte characters

+ TWO_BYTE_ROUNDTRIPPABLE_CHARACTERS * TWO_BYTE_ROUNDTRIPPABLE_CHARACTERS +

+ // Permutations of one and two byte characters

+ 3 * TWO_BYTE_ROUNDTRIPPABLE_CHARACTERS *

+ ONE_BYTE_ROUNDTRIPPABLE_CHARACTERS *

+ ONE_BYTE_ROUNDTRIPPABLE_CHARACTERS +

+ // Four byte characters

+ FOUR_BYTE_ROUNDTRIPPABLE_CHARACTERS;

+ static class Shard {

+ final long index;

+ final long start;

+ final long lim;

+ final long expected;

+ public Shard(long index, long start, long lim, long expected) {

+ assertTrue(start < lim);

+ this.index = index;

+ this.start = start;

+ this.lim = lim;

+ this.expected = expected;

+ }

+ static final long[] FOUR_BYTE_SHARDS_EXPECTED_ROUNTRIPPABLES =

+ generateFourByteShardsExpectedRunnables();

+ private static long[] generateFourByteShardsExpectedRunnables() {

+ long[] expected = new long[128];

+ // 0-63 are all 5300224

+ for (int i = 0; i <= 63; i++) {

+ expected[i] = 5300224;

+ }

+ // 97-111 are all 2342912

+ for (int i = 97; i <= 111; i++) {

+ expected[i] = 2342912;

+ }

+ // 113-117 are all 1048576

+ for (int i = 113; i <= 117; i++) {

+ expected[i] = 1048576;

+ }

+ // One offs

+ expected[112] = 786432;

+ expected[118] = 786432;

+ expected[119] = 1048576;

+ expected[120] = 458752;

+ expected[121] = 524288;

+ expected[122] = 65536;

+ // Anything not assigned was the default 0.

+ return expected;

+ }

+ static final List<Shard> FOUR_BYTE_SHARDS = generateFourByteShards(

+ 128, FOUR_BYTE_SHARDS_EXPECTED_ROUNTRIPPABLES);

+ private static List<Shard> generateFourByteShards(

+ int numShards, long[] expected) {

+ assertEquals(numShards, expected.length);

+ List<Shard> shards = new ArrayList<Shard>(numShards);

+ long LIM = 1L << 32;

+ long increment = LIM / numShards;

+ assertTrue(LIM % numShards == 0);

+ for (int i = 0; i < numShards; i++) {

+ shards.add(new Shard(i,

+ increment * i,

+ increment * (i + 1),

+ expected[i]));

+ }

+ return shards;

+ }

+ /**

+ * Helper to run the loop to test all the permutations for the number of bytes

+ * specified.

+ *

+ * @param numBytes the number of bytes in the byte array

+ * @param expectedCount the expected number of roundtrippable permutations

+ */

+ static void testBytes(int numBytes, long expectedCount)

+ throws UnsupportedEncodingException {

+ testBytes(numBytes, expectedCount, 0, -1);

+ }

+ /**

+ * Helper to run the loop to test all the permutations for the number of bytes

+ * specified. This overload is useful for debugging to get the loop to start

+ * at a certain character.

+ *

+ * @param numBytes the number of bytes in the byte array

+ * @param expectedCount the expected number of roundtrippable permutations

+ * @param start the starting bytes encoded as a long as big-endian

+ * @param lim the limit of bytes to process encoded as a long as big-endian,

+ * or -1 to mean the max limit for numBytes

+ */

+ static void testBytes(int numBytes, long expectedCount, long start, long lim)

+ throws UnsupportedEncodingException {

+ Random rnd = new Random();

+ byte[] bytes = new byte[numBytes];

+ if (lim == -1) {

+ lim = 1L << (numBytes * 8);

+ }

+ long count = 0;

+ long countRoundTripped = 0;

+ for (long byteChar = start; byteChar < lim; byteChar++) {

+ long tmpByteChar = byteChar;

+ for (int i = 0; i < numBytes; i++) {

+ bytes[bytes.length - i - 1] = (byte) tmpByteChar;

+ tmpByteChar = tmpByteChar >> 8;

+ }

+ ByteString bs = ByteString.copyFrom(bytes);

+ boolean isRoundTrippable = bs.isValidUtf8();

+ String s = new String(bytes, Internal.UTF_8);

+ byte[] bytesReencoded = s.getBytes(Internal.UTF_8);

+ boolean bytesEqual = Arrays.equals(bytes, bytesReencoded);

+ if (bytesEqual != isRoundTrippable) {

+ outputFailure(byteChar, bytes, bytesReencoded);

+ }

+ // Check agreement with static Utf8 methods.

+ assertEquals(isRoundTrippable, Utf8.isValidUtf8(bytes));

+ assertEquals(isRoundTrippable, Utf8.isValidUtf8(bytes, 0, numBytes));

+ // Test partial sequences.

+ // Partition numBytes into three segments (not necessarily non-empty).

+ int i = rnd.nextInt(numBytes);

+ int j = rnd.nextInt(numBytes);

+ if (j < i) {

+ int tmp = i; i = j; j = tmp;

+ }

+ int state1 = Utf8.partialIsValidUtf8(Utf8.COMPLETE, bytes, 0, i);

+ int state2 = Utf8.partialIsValidUtf8(state1, bytes, i, j);

+ int state3 = Utf8.partialIsValidUtf8(state2, bytes, j, numBytes);

+ if (isRoundTrippable != (state3 == Utf8.COMPLETE)) {

+ System.out.printf("state=%04x %04x %04x i=%d j=%d%n",

+ state1, state2, state3, i, j);

+ outputFailure(byteChar, bytes, bytesReencoded);

+ }

+ assertEquals(isRoundTrippable, (state3 == Utf8.COMPLETE));

+ // Test ropes built out of small partial sequences

+ ByteString rope = RopeByteString.newInstanceForTest(

+ bs.substring(0, i),

+ RopeByteString.newInstanceForTest(

+ bs.substring(i, j),

+ bs.substring(j, numBytes)));

+ assertSame(RopeByteString.class, rope.getClass());

+ ByteString[] byteStrings = { bs, bs.substring(0, numBytes), rope };

+ for (ByteString x : byteStrings) {

+ assertEquals(isRoundTrippable,

+ x.isValidUtf8());

+ assertEquals(state3,

+ x.partialIsValidUtf8(Utf8.COMPLETE, 0, numBytes));

+ assertEquals(state1,

+ x.partialIsValidUtf8(Utf8.COMPLETE, 0, i));

+ assertEquals(state1,

+ x.substring(0, i).partialIsValidUtf8(Utf8.COMPLETE, 0, i));

+ assertEquals(state2,

+ x.partialIsValidUtf8(state1, i, j - i));

+ assertEquals(state2,

+ x.substring(i, j).partialIsValidUtf8(state1, 0, j - i));

+ assertEquals(state3,

+ x.partialIsValidUtf8(state2, j, numBytes - j));

+ assertEquals(state3,

+ x.substring(j, numBytes)

+ .partialIsValidUtf8(state2, 0, numBytes - j));

+ }

+ // ByteString reduplication should not affect its UTF-8 validity.

+ ByteString ropeADope =

+ RopeByteString.newInstanceForTest(bs, bs.substring(0, numBytes));

+ assertEquals(isRoundTrippable, ropeADope.isValidUtf8());

+ if (isRoundTrippable) {

+ countRoundTripped++;

+ }

+ count++;

+ if (byteChar != 0 && byteChar % 1000000L == 0) {

+ logger.info("Processed " + (byteChar / 1000000L) +

+ " million characters");

+ }

+ logger.info("Round tripped " + countRoundTripped + " of " + count);

+ assertEquals(expectedCount, countRoundTripped);

+ }

+ /**

+ * Variation of {@link #testBytes} that does less allocation using the

+ * low-level encoders/decoders directly. Checked in because it's useful for

+ * debugging when trying to process bytes faster, but since it doesn't use the

+ * actual String class, it's possible for incompatibilities to develop

+ * (although unlikely).

+ *

+ * @param numBytes the number of bytes in the byte array

+ * @param expectedCount the expected number of roundtrippable permutations

+ * @param start the starting bytes encoded as a long as big-endian

+ * @param lim the limit of bytes to process encoded as a long as big-endian,

+ * or -1 to mean the max limit for numBytes

+ */

+ void testBytesUsingByteBuffers(

+ int numBytes, long expectedCount, long start, long lim)

+ throws UnsupportedEncodingException {

+ CharsetDecoder decoder = Internal.UTF_8.newDecoder()

+ .onMalformedInput(CodingErrorAction.REPLACE)

+ .onUnmappableCharacter(CodingErrorAction.REPLACE);

+ CharsetEncoder encoder = Internal.UTF_8.newEncoder()

+ .onMalformedInput(CodingErrorAction.REPLACE)

+ .onUnmappableCharacter(CodingErrorAction.REPLACE);

+ byte[] bytes = new byte[numBytes];

+ int maxChars = (int) (decoder.maxCharsPerByte() * numBytes) + 1;

+ char[] charsDecoded =

+ new char[(int) (decoder.maxCharsPerByte() * numBytes) + 1];

+ int maxBytes = (int) (encoder.maxBytesPerChar() * maxChars) + 1;

+ byte[] bytesReencoded = new byte[maxBytes];

+ ByteBuffer bb = ByteBuffer.wrap(bytes);

+ CharBuffer cb = CharBuffer.wrap(charsDecoded);

+ ByteBuffer bbReencoded = ByteBuffer.wrap(bytesReencoded);

+ if (lim == -1) {

+ lim = 1L << (numBytes * 8);

+ }

+ long count = 0;

+ long countRoundTripped = 0;

+ for (long byteChar = start; byteChar < lim; byteChar++) {

+ bb.rewind();

+ bb.limit(bytes.length);

+ cb.rewind();

+ cb.limit(charsDecoded.length);

+ bbReencoded.rewind();

+ bbReencoded.limit(bytesReencoded.length);

+ encoder.reset();

+ decoder.reset();

+ long tmpByteChar = byteChar;

+ for (int i = 0; i < bytes.length; i++) {

+ bytes[bytes.length - i - 1] = (byte) tmpByteChar;

+ tmpByteChar = tmpByteChar >> 8;

+ }

+ boolean isRoundTrippable = ByteString.copyFrom(bytes).isValidUtf8();

+ CoderResult result = decoder.decode(bb, cb, true);

+ assertFalse(result.isError());

+ result = decoder.flush(cb);

+ assertFalse(result.isError());

+ int charLen = cb.position();

+ cb.rewind();

+ cb.limit(charLen);

+ result = encoder.encode(cb, bbReencoded, true);

+ assertFalse(result.isError());

+ result = encoder.flush(bbReencoded);

+ assertFalse(result.isError());

+ boolean bytesEqual = true;

+ int bytesLen = bbReencoded.position();

+ if (bytesLen != numBytes) {

+ bytesEqual = false;

+ } else {

+ for (int i = 0; i < numBytes; i++) {

+ if (bytes[i] != bytesReencoded[i]) {

+ bytesEqual = false;

+ break;

+ }

+ if (bytesEqual != isRoundTrippable) {

+ outputFailure(byteChar, bytes, bytesReencoded, bytesLen);

+ }

+ count++;

+ if (isRoundTrippable) {

+ countRoundTripped++;

+ }

+ if (byteChar != 0 && byteChar % 1000000 == 0) {

+ logger.info("Processed " + (byteChar / 1000000) +

+ " million characters");

+ }

+ logger.info("Round tripped " + countRoundTripped + " of " + count);

+ assertEquals(expectedCount, countRoundTripped);

+ }

+ private static void outputFailure(long byteChar, byte[] bytes, byte[] after) {

+ outputFailure(byteChar, bytes, after, after.length);

+ }

+ private static void outputFailure(long byteChar, byte[] bytes, byte[] after,

+ int len) {

+ fail("Failure: (" + Long.toHexString(byteChar) + ") " +

+ toHexString(bytes) + " => " + toHexString(after, len));

+ }

+ private static String toHexString(byte[] b) {

+ return toHexString(b, b.length);

+ }

+ private static String toHexString(byte[] b, int len) {

+ StringBuilder s = new StringBuilder();

+ s.append("\"");

+ for (int i = 0; i < len; i++) {

+ if (i > 0) {

+ s.append(" ");

+ }

+ s.append(String.format("%02x", b[i] & 0xFF));

+ }

+ s.append("\"");

+ return s.toString();

+ }