| Index: third_party/protobuf/java/core/src/test/java/com/google/protobuf/IsValidUtf8TestUtil.java
|
| diff --git a/third_party/protobuf/java/src/test/java/com/google/protobuf/IsValidUtf8TestUtil.java b/third_party/protobuf/java/core/src/test/java/com/google/protobuf/IsValidUtf8TestUtil.java
|
| similarity index 65%
|
| rename from third_party/protobuf/java/src/test/java/com/google/protobuf/IsValidUtf8TestUtil.java
|
| rename to third_party/protobuf/java/core/src/test/java/com/google/protobuf/IsValidUtf8TestUtil.java
|
| index 321669f3147b2d982f02125a689039963a1ddefc..16a808bf31b896feb444b8ec921759f7f7789b6e 100644
|
| --- a/third_party/protobuf/java/src/test/java/com/google/protobuf/IsValidUtf8TestUtil.java
|
| +++ b/third_party/protobuf/java/core/src/test/java/com/google/protobuf/IsValidUtf8TestUtil.java
|
| @@ -30,9 +30,13 @@
|
|
|
| package com.google.protobuf;
|
|
|
| -import static junit.framework.Assert.*;
|
| +import static org.junit.Assert.assertEquals;
|
| +import static org.junit.Assert.assertFalse;
|
| +import static org.junit.Assert.assertSame;
|
| +import static org.junit.Assert.assertTrue;
|
| +import static org.junit.Assert.fail;
|
|
|
| -import java.io.UnsupportedEncodingException;
|
| +import java.lang.ref.SoftReference;
|
| import java.nio.ByteBuffer;
|
| import java.nio.CharBuffer;
|
| import java.nio.charset.CharsetDecoder;
|
| @@ -52,64 +56,105 @@ import java.util.logging.Logger;
|
| * @author jonp@google.com (Jon Perlow)
|
| * @author martinrb@google.com (Martin Buchholz)
|
| */
|
| -class IsValidUtf8TestUtil {
|
| - private static Logger logger = Logger.getLogger(
|
| - IsValidUtf8TestUtil.class.getName());
|
| +final class IsValidUtf8TestUtil {
|
| + private static Logger logger = Logger.getLogger(IsValidUtf8TestUtil.class.getName());
|
| +
|
| + private IsValidUtf8TestUtil() {}
|
| +
|
| + static interface ByteStringFactory {
|
| + ByteString newByteString(byte[] bytes);
|
| + }
|
| +
|
| + static final ByteStringFactory LITERAL_FACTORY = new ByteStringFactory() {
|
| + @Override
|
| + public ByteString newByteString(byte[] bytes) {
|
| + return ByteString.wrap(bytes);
|
| + }
|
| + };
|
| +
|
| + static final ByteStringFactory HEAP_NIO_FACTORY = new ByteStringFactory() {
|
| + @Override
|
| + public ByteString newByteString(byte[] bytes) {
|
| + return new NioByteString(ByteBuffer.wrap(bytes));
|
| + }
|
| + };
|
| +
|
| + private static ThreadLocal<SoftReference<ByteBuffer>> directBuffer =
|
| + new ThreadLocal<SoftReference<ByteBuffer>>();
|
| +
|
| + /**
|
| + * Factory for direct {@link ByteBuffer} instances. To reduce direct memory usage, this
|
| + * uses a thread local direct buffer. This means that each call will overwrite the buffer's
|
| + * contents from the previous call, so the calling code must be careful not to continue using
|
| + * a buffer returned from a previous invocation.
|
| + */
|
| + static final ByteStringFactory DIRECT_NIO_FACTORY = new ByteStringFactory() {
|
| + @Override
|
| + public ByteString newByteString(byte[] bytes) {
|
| + SoftReference<ByteBuffer> ref = directBuffer.get();
|
| + ByteBuffer buffer = ref == null ? null : ref.get();
|
| + if (buffer == null || buffer.capacity() < bytes.length) {
|
| + buffer = ByteBuffer.allocateDirect(bytes.length);
|
| + directBuffer.set(new SoftReference<ByteBuffer>(buffer));
|
| + }
|
| + buffer.clear();
|
| + buffer.put(bytes);
|
| + buffer.flip();
|
| + return new NioByteString(buffer);
|
| + }
|
| + };
|
|
|
| // 128 - [chars 0x0000 to 0x007f]
|
| - static long ONE_BYTE_ROUNDTRIPPABLE_CHARACTERS = 0x007f - 0x0000 + 1;
|
| + static final long ONE_BYTE_ROUNDTRIPPABLE_CHARACTERS = 0x007f - 0x0000 + 1;
|
|
|
| // 128
|
| - static long EXPECTED_ONE_BYTE_ROUNDTRIPPABLE_COUNT =
|
| - ONE_BYTE_ROUNDTRIPPABLE_CHARACTERS;
|
| + static final long EXPECTED_ONE_BYTE_ROUNDTRIPPABLE_COUNT = ONE_BYTE_ROUNDTRIPPABLE_CHARACTERS;
|
|
|
| // 1920 [chars 0x0080 to 0x07FF]
|
| - static long TWO_BYTE_ROUNDTRIPPABLE_CHARACTERS = 0x07FF - 0x0080 + 1;
|
| + static final long TWO_BYTE_ROUNDTRIPPABLE_CHARACTERS = 0x07FF - 0x0080 + 1;
|
|
|
| // 18,304
|
| - static long EXPECTED_TWO_BYTE_ROUNDTRIPPABLE_COUNT =
|
| + static final long EXPECTED_TWO_BYTE_ROUNDTRIPPABLE_COUNT =
|
| // Both bytes are one byte characters
|
| (long) Math.pow(EXPECTED_ONE_BYTE_ROUNDTRIPPABLE_COUNT, 2) +
|
| // The possible number of two byte characters
|
| TWO_BYTE_ROUNDTRIPPABLE_CHARACTERS;
|
|
|
| // 2048
|
| - static long THREE_BYTE_SURROGATES = 2 * 1024;
|
| + static final long THREE_BYTE_SURROGATES = 2 * 1024;
|
|
|
| // 61,440 [chars 0x0800 to 0xFFFF, minus surrogates]
|
| - static long THREE_BYTE_ROUNDTRIPPABLE_CHARACTERS =
|
| + static final long THREE_BYTE_ROUNDTRIPPABLE_CHARACTERS =
|
| 0xFFFF - 0x0800 + 1 - THREE_BYTE_SURROGATES;
|
|
|
| // 2,650,112
|
| - static long EXPECTED_THREE_BYTE_ROUNDTRIPPABLE_COUNT =
|
| + static final long EXPECTED_THREE_BYTE_ROUNDTRIPPABLE_COUNT =
|
| // All one byte characters
|
| (long) Math.pow(EXPECTED_ONE_BYTE_ROUNDTRIPPABLE_COUNT, 3) +
|
| // One two byte character and a one byte character
|
| - 2 * TWO_BYTE_ROUNDTRIPPABLE_CHARACTERS *
|
| - ONE_BYTE_ROUNDTRIPPABLE_CHARACTERS +
|
| - // Three byte characters
|
| + 2 * TWO_BYTE_ROUNDTRIPPABLE_CHARACTERS * ONE_BYTE_ROUNDTRIPPABLE_CHARACTERS +
|
| + // Three byte characters
|
| THREE_BYTE_ROUNDTRIPPABLE_CHARACTERS;
|
|
|
| // 1,048,576 [chars 0x10000L to 0x10FFFF]
|
| - static long FOUR_BYTE_ROUNDTRIPPABLE_CHARACTERS = 0x10FFFF - 0x10000L + 1;
|
| + static final long FOUR_BYTE_ROUNDTRIPPABLE_CHARACTERS = 0x10FFFF - 0x10000L + 1;
|
|
|
| // 289,571,839
|
| - static long EXPECTED_FOUR_BYTE_ROUNDTRIPPABLE_COUNT =
|
| + static final long EXPECTED_FOUR_BYTE_ROUNDTRIPPABLE_COUNT =
|
| // All one byte characters
|
| (long) Math.pow(EXPECTED_ONE_BYTE_ROUNDTRIPPABLE_COUNT, 4) +
|
| // One and three byte characters
|
| - 2 * THREE_BYTE_ROUNDTRIPPABLE_CHARACTERS *
|
| - ONE_BYTE_ROUNDTRIPPABLE_CHARACTERS +
|
| + 2 * THREE_BYTE_ROUNDTRIPPABLE_CHARACTERS * ONE_BYTE_ROUNDTRIPPABLE_CHARACTERS +
|
| // Two two byte characters
|
| TWO_BYTE_ROUNDTRIPPABLE_CHARACTERS * TWO_BYTE_ROUNDTRIPPABLE_CHARACTERS +
|
| // Permutations of one and two byte characters
|
| - 3 * TWO_BYTE_ROUNDTRIPPABLE_CHARACTERS *
|
| - ONE_BYTE_ROUNDTRIPPABLE_CHARACTERS *
|
| - ONE_BYTE_ROUNDTRIPPABLE_CHARACTERS +
|
| + 3 * TWO_BYTE_ROUNDTRIPPABLE_CHARACTERS * ONE_BYTE_ROUNDTRIPPABLE_CHARACTERS
|
| + * ONE_BYTE_ROUNDTRIPPABLE_CHARACTERS
|
| + +
|
| // Four byte characters
|
| FOUR_BYTE_ROUNDTRIPPABLE_CHARACTERS;
|
|
|
| - static class Shard {
|
| + static final class Shard {
|
| final long index;
|
| final long start;
|
| final long lim;
|
| @@ -138,7 +183,7 @@ class IsValidUtf8TestUtil {
|
|
|
| // 97-111 are all 2342912
|
| for (int i = 97; i <= 111; i++) {
|
| - expected[i] = 2342912;
|
| + expected[i] = 2342912;
|
| }
|
|
|
| // 113-117 are all 1048576
|
| @@ -158,22 +203,18 @@ class IsValidUtf8TestUtil {
|
| return expected;
|
| }
|
|
|
| - static final List<Shard> FOUR_BYTE_SHARDS = generateFourByteShards(
|
| - 128, FOUR_BYTE_SHARDS_EXPECTED_ROUNTRIPPABLES);
|
| + static final List<Shard> FOUR_BYTE_SHARDS =
|
| + generateFourByteShards(128, FOUR_BYTE_SHARDS_EXPECTED_ROUNTRIPPABLES);
|
|
|
|
|
| - private static List<Shard> generateFourByteShards(
|
| - int numShards, long[] expected) {
|
| + private static List<Shard> generateFourByteShards(int numShards, long[] expected) {
|
| assertEquals(numShards, expected.length);
|
| List<Shard> shards = new ArrayList<Shard>(numShards);
|
| long LIM = 1L << 32;
|
| long increment = LIM / numShards;
|
| assertTrue(LIM % numShards == 0);
|
| for (int i = 0; i < numShards; i++) {
|
| - shards.add(new Shard(i,
|
| - increment * i,
|
| - increment * (i + 1),
|
| - expected[i]));
|
| + shards.add(new Shard(i, increment * i, increment * (i + 1), expected[i]));
|
| }
|
| return shards;
|
| }
|
| @@ -182,12 +223,12 @@ class IsValidUtf8TestUtil {
|
| * Helper to run the loop to test all the permutations for the number of bytes
|
| * specified.
|
| *
|
| + * @param factory the factory for {@link ByteString} instances.
|
| * @param numBytes the number of bytes in the byte array
|
| * @param expectedCount the expected number of roundtrippable permutations
|
| */
|
| - static void testBytes(int numBytes, long expectedCount)
|
| - throws UnsupportedEncodingException {
|
| - testBytes(numBytes, expectedCount, 0, -1);
|
| + static void testBytes(ByteStringFactory factory, int numBytes, long expectedCount) {
|
| + testBytes(factory, numBytes, expectedCount, 0, -1);
|
| }
|
|
|
| /**
|
| @@ -195,14 +236,15 @@ class IsValidUtf8TestUtil {
|
| * specified. This overload is useful for debugging to get the loop to start
|
| * at a certain character.
|
| *
|
| + * @param factory the factory for {@link ByteString} instances.
|
| * @param numBytes the number of bytes in the byte array
|
| * @param expectedCount the expected number of roundtrippable permutations
|
| * @param start the starting bytes encoded as a long as big-endian
|
| * @param lim the limit of bytes to process encoded as a long as big-endian,
|
| * or -1 to mean the max limit for numBytes
|
| */
|
| - static void testBytes(int numBytes, long expectedCount, long start, long lim)
|
| - throws UnsupportedEncodingException {
|
| + static void testBytes(
|
| + ByteStringFactory factory, int numBytes, long expectedCount, long start, long lim) {
|
| Random rnd = new Random();
|
| byte[] bytes = new byte[numBytes];
|
|
|
| @@ -217,7 +259,7 @@ class IsValidUtf8TestUtil {
|
| bytes[bytes.length - i - 1] = (byte) tmpByteChar;
|
| tmpByteChar = tmpByteChar >> 8;
|
| }
|
| - ByteString bs = ByteString.copyFrom(bytes);
|
| + ByteString bs = factory.newByteString(bytes);
|
| boolean isRoundTrippable = bs.isValidUtf8();
|
| String s = new String(bytes, Internal.UTF_8);
|
| byte[] bytesReencoded = s.getBytes(Internal.UTF_8);
|
| @@ -236,14 +278,15 @@ class IsValidUtf8TestUtil {
|
| int i = rnd.nextInt(numBytes);
|
| int j = rnd.nextInt(numBytes);
|
| if (j < i) {
|
| - int tmp = i; i = j; j = tmp;
|
| + int tmp = i;
|
| + i = j;
|
| + j = tmp;
|
| }
|
| int state1 = Utf8.partialIsValidUtf8(Utf8.COMPLETE, bytes, 0, i);
|
| int state2 = Utf8.partialIsValidUtf8(state1, bytes, i, j);
|
| int state3 = Utf8.partialIsValidUtf8(state2, bytes, j, numBytes);
|
| if (isRoundTrippable != (state3 == Utf8.COMPLETE)) {
|
| - System.out.printf("state=%04x %04x %04x i=%d j=%d%n",
|
| - state1, state2, state3, i, j);
|
| + System.out.printf("state=%04x %04x %04x i=%d j=%d%n", state1, state2, state3, i, j);
|
| outputFailure(byteChar, bytes, bytesReencoded);
|
| }
|
| assertEquals(isRoundTrippable, (state3 == Utf8.COMPLETE));
|
| @@ -251,36 +294,24 @@ class IsValidUtf8TestUtil {
|
| // Test ropes built out of small partial sequences
|
| ByteString rope = RopeByteString.newInstanceForTest(
|
| bs.substring(0, i),
|
| - RopeByteString.newInstanceForTest(
|
| - bs.substring(i, j),
|
| - bs.substring(j, numBytes)));
|
| + RopeByteString.newInstanceForTest(bs.substring(i, j), bs.substring(j, numBytes)));
|
| assertSame(RopeByteString.class, rope.getClass());
|
|
|
| - ByteString[] byteStrings = { bs, bs.substring(0, numBytes), rope };
|
| + ByteString[] byteStrings = {bs, bs.substring(0, numBytes), rope};
|
| for (ByteString x : byteStrings) {
|
| - assertEquals(isRoundTrippable,
|
| - x.isValidUtf8());
|
| - assertEquals(state3,
|
| - x.partialIsValidUtf8(Utf8.COMPLETE, 0, numBytes));
|
| -
|
| - assertEquals(state1,
|
| - x.partialIsValidUtf8(Utf8.COMPLETE, 0, i));
|
| - assertEquals(state1,
|
| - x.substring(0, i).partialIsValidUtf8(Utf8.COMPLETE, 0, i));
|
| - assertEquals(state2,
|
| - x.partialIsValidUtf8(state1, i, j - i));
|
| - assertEquals(state2,
|
| - x.substring(i, j).partialIsValidUtf8(state1, 0, j - i));
|
| - assertEquals(state3,
|
| - x.partialIsValidUtf8(state2, j, numBytes - j));
|
| - assertEquals(state3,
|
| - x.substring(j, numBytes)
|
| - .partialIsValidUtf8(state2, 0, numBytes - j));
|
| + assertEquals(isRoundTrippable, x.isValidUtf8());
|
| + assertEquals(state3, x.partialIsValidUtf8(Utf8.COMPLETE, 0, numBytes));
|
| +
|
| + assertEquals(state1, x.partialIsValidUtf8(Utf8.COMPLETE, 0, i));
|
| + assertEquals(state1, x.substring(0, i).partialIsValidUtf8(Utf8.COMPLETE, 0, i));
|
| + assertEquals(state2, x.partialIsValidUtf8(state1, i, j - i));
|
| + assertEquals(state2, x.substring(i, j).partialIsValidUtf8(state1, 0, j - i));
|
| + assertEquals(state3, x.partialIsValidUtf8(state2, j, numBytes - j));
|
| + assertEquals(state3, x.substring(j, numBytes).partialIsValidUtf8(state2, 0, numBytes - j));
|
| }
|
|
|
| // ByteString reduplication should not affect its UTF-8 validity.
|
| - ByteString ropeADope =
|
| - RopeByteString.newInstanceForTest(bs, bs.substring(0, numBytes));
|
| + ByteString ropeADope = RopeByteString.newInstanceForTest(bs, bs.substring(0, numBytes));
|
| assertEquals(isRoundTrippable, ropeADope.isValidUtf8());
|
|
|
| if (isRoundTrippable) {
|
| @@ -288,8 +319,7 @@ class IsValidUtf8TestUtil {
|
| }
|
| count++;
|
| if (byteChar != 0 && byteChar % 1000000L == 0) {
|
| - logger.info("Processed " + (byteChar / 1000000L) +
|
| - " million characters");
|
| + logger.info("Processed " + (byteChar / 1000000L) + " million characters");
|
| }
|
| }
|
| logger.info("Round tripped " + countRoundTripped + " of " + count);
|
| @@ -303,25 +333,26 @@ class IsValidUtf8TestUtil {
|
| * actual String class, it's possible for incompatibilities to develop
|
| * (although unlikely).
|
| *
|
| + * @param factory the factory for {@link ByteString} instances.
|
| * @param numBytes the number of bytes in the byte array
|
| * @param expectedCount the expected number of roundtrippable permutations
|
| * @param start the starting bytes encoded as a long as big-endian
|
| * @param lim the limit of bytes to process encoded as a long as big-endian,
|
| * or -1 to mean the max limit for numBytes
|
| */
|
| - void testBytesUsingByteBuffers(
|
| - int numBytes, long expectedCount, long start, long lim)
|
| - throws UnsupportedEncodingException {
|
| - CharsetDecoder decoder = Internal.UTF_8.newDecoder()
|
| - .onMalformedInput(CodingErrorAction.REPLACE)
|
| - .onUnmappableCharacter(CodingErrorAction.REPLACE);
|
| - CharsetEncoder encoder = Internal.UTF_8.newEncoder()
|
| - .onMalformedInput(CodingErrorAction.REPLACE)
|
| - .onUnmappableCharacter(CodingErrorAction.REPLACE);
|
| + static void testBytesUsingByteBuffers(
|
| + ByteStringFactory factory, int numBytes, long expectedCount, long start, long lim) {
|
| + CharsetDecoder decoder =
|
| + Internal.UTF_8.newDecoder()
|
| + .onMalformedInput(CodingErrorAction.REPLACE)
|
| + .onUnmappableCharacter(CodingErrorAction.REPLACE);
|
| + CharsetEncoder encoder =
|
| + Internal.UTF_8.newEncoder()
|
| + .onMalformedInput(CodingErrorAction.REPLACE)
|
| + .onUnmappableCharacter(CodingErrorAction.REPLACE);
|
| byte[] bytes = new byte[numBytes];
|
| int maxChars = (int) (decoder.maxCharsPerByte() * numBytes) + 1;
|
| - char[] charsDecoded =
|
| - new char[(int) (decoder.maxCharsPerByte() * numBytes) + 1];
|
| + char[] charsDecoded = new char[(int) (decoder.maxCharsPerByte() * numBytes) + 1];
|
| int maxBytes = (int) (encoder.maxBytesPerChar() * maxChars) + 1;
|
| byte[] bytesReencoded = new byte[maxBytes];
|
|
|
| @@ -347,7 +378,7 @@ class IsValidUtf8TestUtil {
|
| bytes[bytes.length - i - 1] = (byte) tmpByteChar;
|
| tmpByteChar = tmpByteChar >> 8;
|
| }
|
| - boolean isRoundTrippable = ByteString.copyFrom(bytes).isValidUtf8();
|
| + boolean isRoundTrippable = factory.newByteString(bytes).isValidUtf8();
|
| CoderResult result = decoder.decode(bb, cb, true);
|
| assertFalse(result.isError());
|
| result = decoder.flush(cb);
|
| @@ -382,8 +413,7 @@ class IsValidUtf8TestUtil {
|
| countRoundTripped++;
|
| }
|
| if (byteChar != 0 && byteChar % 1000000 == 0) {
|
| - logger.info("Processed " + (byteChar / 1000000) +
|
| - " million characters");
|
| + logger.info("Processed " + (byteChar / 1000000) + " million characters");
|
| }
|
| }
|
| logger.info("Round tripped " + countRoundTripped + " of " + count);
|
| @@ -394,10 +424,9 @@ class IsValidUtf8TestUtil {
|
| outputFailure(byteChar, bytes, after, after.length);
|
| }
|
|
|
| - private static void outputFailure(long byteChar, byte[] bytes, byte[] after,
|
| - int len) {
|
| - fail("Failure: (" + Long.toHexString(byteChar) + ") " +
|
| - toHexString(bytes) + " => " + toHexString(after, len));
|
| + private static void outputFailure(long byteChar, byte[] bytes, byte[] after, int len) {
|
| + fail("Failure: (" + Long.toHexString(byteChar) + ") " + toHexString(bytes) + " => "
|
| + + toHexString(after, len));
|
| }
|
|
|
| private static String toHexString(byte[] b) {
|
| @@ -416,5 +445,4 @@ class IsValidUtf8TestUtil {
|
| s.append("\"");
|
| return s.toString();
|
| }
|
| -
|
| }
|
|
|