Index: third_party/protobuf/java/core/src/test/java/com/google/protobuf/IsValidUtf8TestUtil.java |
diff --git a/third_party/protobuf/java/src/test/java/com/google/protobuf/IsValidUtf8TestUtil.java b/third_party/protobuf/java/core/src/test/java/com/google/protobuf/IsValidUtf8TestUtil.java |
similarity index 65% |
rename from third_party/protobuf/java/src/test/java/com/google/protobuf/IsValidUtf8TestUtil.java |
rename to third_party/protobuf/java/core/src/test/java/com/google/protobuf/IsValidUtf8TestUtil.java |
index 321669f3147b2d982f02125a689039963a1ddefc..16a808bf31b896feb444b8ec921759f7f7789b6e 100644 |
--- a/third_party/protobuf/java/src/test/java/com/google/protobuf/IsValidUtf8TestUtil.java |
+++ b/third_party/protobuf/java/core/src/test/java/com/google/protobuf/IsValidUtf8TestUtil.java |
@@ -30,9 +30,13 @@ |
package com.google.protobuf; |
-import static junit.framework.Assert.*; |
+import static org.junit.Assert.assertEquals; |
+import static org.junit.Assert.assertFalse; |
+import static org.junit.Assert.assertSame; |
+import static org.junit.Assert.assertTrue; |
+import static org.junit.Assert.fail; |
-import java.io.UnsupportedEncodingException; |
+import java.lang.ref.SoftReference; |
import java.nio.ByteBuffer; |
import java.nio.CharBuffer; |
import java.nio.charset.CharsetDecoder; |
@@ -52,64 +56,105 @@ import java.util.logging.Logger; |
* @author jonp@google.com (Jon Perlow) |
* @author martinrb@google.com (Martin Buchholz) |
*/ |
-class IsValidUtf8TestUtil { |
- private static Logger logger = Logger.getLogger( |
- IsValidUtf8TestUtil.class.getName()); |
+final class IsValidUtf8TestUtil { |
+ private static Logger logger = Logger.getLogger(IsValidUtf8TestUtil.class.getName()); |
+ |
+ private IsValidUtf8TestUtil() {} |
+ |
+ static interface ByteStringFactory { |
+ ByteString newByteString(byte[] bytes); |
+ } |
+ |
+ static final ByteStringFactory LITERAL_FACTORY = new ByteStringFactory() { |
+ @Override |
+ public ByteString newByteString(byte[] bytes) { |
+ return ByteString.wrap(bytes); |
+ } |
+ }; |
+ |
+ static final ByteStringFactory HEAP_NIO_FACTORY = new ByteStringFactory() { |
+ @Override |
+ public ByteString newByteString(byte[] bytes) { |
+ return new NioByteString(ByteBuffer.wrap(bytes)); |
+ } |
+ }; |
+ |
+ private static ThreadLocal<SoftReference<ByteBuffer>> directBuffer = |
+ new ThreadLocal<SoftReference<ByteBuffer>>(); |
+ |
+ /** |
+ * Factory for direct {@link ByteBuffer} instances. To reduce direct memory usage, this |
+ * uses a thread local direct buffer. This means that each call will overwrite the buffer's |
+ * contents from the previous call, so the calling code must be careful not to continue using |
+ * a buffer returned from a previous invocation. |
+ */ |
+ static final ByteStringFactory DIRECT_NIO_FACTORY = new ByteStringFactory() { |
+ @Override |
+ public ByteString newByteString(byte[] bytes) { |
+ SoftReference<ByteBuffer> ref = directBuffer.get(); |
+ ByteBuffer buffer = ref == null ? null : ref.get(); |
+ if (buffer == null || buffer.capacity() < bytes.length) { |
+ buffer = ByteBuffer.allocateDirect(bytes.length); |
+ directBuffer.set(new SoftReference<ByteBuffer>(buffer)); |
+ } |
+ buffer.clear(); |
+ buffer.put(bytes); |
+ buffer.flip(); |
+ return new NioByteString(buffer); |
+ } |
+ }; |
// 128 - [chars 0x0000 to 0x007f] |
- static long ONE_BYTE_ROUNDTRIPPABLE_CHARACTERS = 0x007f - 0x0000 + 1; |
+ static final long ONE_BYTE_ROUNDTRIPPABLE_CHARACTERS = 0x007f - 0x0000 + 1; |
// 128 |
- static long EXPECTED_ONE_BYTE_ROUNDTRIPPABLE_COUNT = |
- ONE_BYTE_ROUNDTRIPPABLE_CHARACTERS; |
+ static final long EXPECTED_ONE_BYTE_ROUNDTRIPPABLE_COUNT = ONE_BYTE_ROUNDTRIPPABLE_CHARACTERS; |
// 1920 [chars 0x0080 to 0x07FF] |
- static long TWO_BYTE_ROUNDTRIPPABLE_CHARACTERS = 0x07FF - 0x0080 + 1; |
+ static final long TWO_BYTE_ROUNDTRIPPABLE_CHARACTERS = 0x07FF - 0x0080 + 1; |
// 18,304 |
- static long EXPECTED_TWO_BYTE_ROUNDTRIPPABLE_COUNT = |
+ static final long EXPECTED_TWO_BYTE_ROUNDTRIPPABLE_COUNT = |
// Both bytes are one byte characters |
(long) Math.pow(EXPECTED_ONE_BYTE_ROUNDTRIPPABLE_COUNT, 2) + |
// The possible number of two byte characters |
TWO_BYTE_ROUNDTRIPPABLE_CHARACTERS; |
// 2048 |
- static long THREE_BYTE_SURROGATES = 2 * 1024; |
+ static final long THREE_BYTE_SURROGATES = 2 * 1024; |
// 61,440 [chars 0x0800 to 0xFFFF, minus surrogates] |
- static long THREE_BYTE_ROUNDTRIPPABLE_CHARACTERS = |
+ static final long THREE_BYTE_ROUNDTRIPPABLE_CHARACTERS = |
0xFFFF - 0x0800 + 1 - THREE_BYTE_SURROGATES; |
// 2,650,112 |
- static long EXPECTED_THREE_BYTE_ROUNDTRIPPABLE_COUNT = |
+ static final long EXPECTED_THREE_BYTE_ROUNDTRIPPABLE_COUNT = |
// All one byte characters |
(long) Math.pow(EXPECTED_ONE_BYTE_ROUNDTRIPPABLE_COUNT, 3) + |
// One two byte character and a one byte character |
- 2 * TWO_BYTE_ROUNDTRIPPABLE_CHARACTERS * |
- ONE_BYTE_ROUNDTRIPPABLE_CHARACTERS + |
- // Three byte characters |
+ 2 * TWO_BYTE_ROUNDTRIPPABLE_CHARACTERS * ONE_BYTE_ROUNDTRIPPABLE_CHARACTERS + |
+ // Three byte characters |
THREE_BYTE_ROUNDTRIPPABLE_CHARACTERS; |
// 1,048,576 [chars 0x10000L to 0x10FFFF] |
- static long FOUR_BYTE_ROUNDTRIPPABLE_CHARACTERS = 0x10FFFF - 0x10000L + 1; |
+ static final long FOUR_BYTE_ROUNDTRIPPABLE_CHARACTERS = 0x10FFFF - 0x10000L + 1; |
// 289,571,839 |
- static long EXPECTED_FOUR_BYTE_ROUNDTRIPPABLE_COUNT = |
+ static final long EXPECTED_FOUR_BYTE_ROUNDTRIPPABLE_COUNT = |
// All one byte characters |
(long) Math.pow(EXPECTED_ONE_BYTE_ROUNDTRIPPABLE_COUNT, 4) + |
// One and three byte characters |
- 2 * THREE_BYTE_ROUNDTRIPPABLE_CHARACTERS * |
- ONE_BYTE_ROUNDTRIPPABLE_CHARACTERS + |
+ 2 * THREE_BYTE_ROUNDTRIPPABLE_CHARACTERS * ONE_BYTE_ROUNDTRIPPABLE_CHARACTERS + |
// Two two byte characters |
TWO_BYTE_ROUNDTRIPPABLE_CHARACTERS * TWO_BYTE_ROUNDTRIPPABLE_CHARACTERS + |
// Permutations of one and two byte characters |
- 3 * TWO_BYTE_ROUNDTRIPPABLE_CHARACTERS * |
- ONE_BYTE_ROUNDTRIPPABLE_CHARACTERS * |
- ONE_BYTE_ROUNDTRIPPABLE_CHARACTERS + |
+ 3 * TWO_BYTE_ROUNDTRIPPABLE_CHARACTERS * ONE_BYTE_ROUNDTRIPPABLE_CHARACTERS |
+ * ONE_BYTE_ROUNDTRIPPABLE_CHARACTERS |
+ + |
// Four byte characters |
FOUR_BYTE_ROUNDTRIPPABLE_CHARACTERS; |
- static class Shard { |
+ static final class Shard { |
final long index; |
final long start; |
final long lim; |
@@ -138,7 +183,7 @@ class IsValidUtf8TestUtil { |
// 97-111 are all 2342912 |
for (int i = 97; i <= 111; i++) { |
- expected[i] = 2342912; |
+ expected[i] = 2342912; |
} |
// 113-117 are all 1048576 |
@@ -158,22 +203,18 @@ class IsValidUtf8TestUtil { |
return expected; |
} |
- static final List<Shard> FOUR_BYTE_SHARDS = generateFourByteShards( |
- 128, FOUR_BYTE_SHARDS_EXPECTED_ROUNTRIPPABLES); |
+ static final List<Shard> FOUR_BYTE_SHARDS = |
+ generateFourByteShards(128, FOUR_BYTE_SHARDS_EXPECTED_ROUNTRIPPABLES); |
- private static List<Shard> generateFourByteShards( |
- int numShards, long[] expected) { |
+ private static List<Shard> generateFourByteShards(int numShards, long[] expected) { |
assertEquals(numShards, expected.length); |
List<Shard> shards = new ArrayList<Shard>(numShards); |
long LIM = 1L << 32; |
long increment = LIM / numShards; |
assertTrue(LIM % numShards == 0); |
for (int i = 0; i < numShards; i++) { |
- shards.add(new Shard(i, |
- increment * i, |
- increment * (i + 1), |
- expected[i])); |
+ shards.add(new Shard(i, increment * i, increment * (i + 1), expected[i])); |
} |
return shards; |
} |
@@ -182,12 +223,12 @@ class IsValidUtf8TestUtil { |
* Helper to run the loop to test all the permutations for the number of bytes |
* specified. |
* |
+ * @param factory the factory for {@link ByteString} instances. |
* @param numBytes the number of bytes in the byte array |
* @param expectedCount the expected number of roundtrippable permutations |
*/ |
- static void testBytes(int numBytes, long expectedCount) |
- throws UnsupportedEncodingException { |
- testBytes(numBytes, expectedCount, 0, -1); |
+ static void testBytes(ByteStringFactory factory, int numBytes, long expectedCount) { |
+ testBytes(factory, numBytes, expectedCount, 0, -1); |
} |
/** |
@@ -195,14 +236,15 @@ class IsValidUtf8TestUtil { |
* specified. This overload is useful for debugging to get the loop to start |
* at a certain character. |
* |
+ * @param factory the factory for {@link ByteString} instances. |
* @param numBytes the number of bytes in the byte array |
* @param expectedCount the expected number of roundtrippable permutations |
* @param start the starting bytes encoded as a long as big-endian |
* @param lim the limit of bytes to process encoded as a long as big-endian, |
* or -1 to mean the max limit for numBytes |
*/ |
- static void testBytes(int numBytes, long expectedCount, long start, long lim) |
- throws UnsupportedEncodingException { |
+ static void testBytes( |
+ ByteStringFactory factory, int numBytes, long expectedCount, long start, long lim) { |
Random rnd = new Random(); |
byte[] bytes = new byte[numBytes]; |
@@ -217,7 +259,7 @@ class IsValidUtf8TestUtil { |
bytes[bytes.length - i - 1] = (byte) tmpByteChar; |
tmpByteChar = tmpByteChar >> 8; |
} |
- ByteString bs = ByteString.copyFrom(bytes); |
+ ByteString bs = factory.newByteString(bytes); |
boolean isRoundTrippable = bs.isValidUtf8(); |
String s = new String(bytes, Internal.UTF_8); |
byte[] bytesReencoded = s.getBytes(Internal.UTF_8); |
@@ -236,14 +278,15 @@ class IsValidUtf8TestUtil { |
int i = rnd.nextInt(numBytes); |
int j = rnd.nextInt(numBytes); |
if (j < i) { |
- int tmp = i; i = j; j = tmp; |
+ int tmp = i; |
+ i = j; |
+ j = tmp; |
} |
int state1 = Utf8.partialIsValidUtf8(Utf8.COMPLETE, bytes, 0, i); |
int state2 = Utf8.partialIsValidUtf8(state1, bytes, i, j); |
int state3 = Utf8.partialIsValidUtf8(state2, bytes, j, numBytes); |
if (isRoundTrippable != (state3 == Utf8.COMPLETE)) { |
- System.out.printf("state=%04x %04x %04x i=%d j=%d%n", |
- state1, state2, state3, i, j); |
+ System.out.printf("state=%04x %04x %04x i=%d j=%d%n", state1, state2, state3, i, j); |
outputFailure(byteChar, bytes, bytesReencoded); |
} |
assertEquals(isRoundTrippable, (state3 == Utf8.COMPLETE)); |
@@ -251,36 +294,24 @@ class IsValidUtf8TestUtil { |
// Test ropes built out of small partial sequences |
ByteString rope = RopeByteString.newInstanceForTest( |
bs.substring(0, i), |
- RopeByteString.newInstanceForTest( |
- bs.substring(i, j), |
- bs.substring(j, numBytes))); |
+ RopeByteString.newInstanceForTest(bs.substring(i, j), bs.substring(j, numBytes))); |
assertSame(RopeByteString.class, rope.getClass()); |
- ByteString[] byteStrings = { bs, bs.substring(0, numBytes), rope }; |
+ ByteString[] byteStrings = {bs, bs.substring(0, numBytes), rope}; |
for (ByteString x : byteStrings) { |
- assertEquals(isRoundTrippable, |
- x.isValidUtf8()); |
- assertEquals(state3, |
- x.partialIsValidUtf8(Utf8.COMPLETE, 0, numBytes)); |
- |
- assertEquals(state1, |
- x.partialIsValidUtf8(Utf8.COMPLETE, 0, i)); |
- assertEquals(state1, |
- x.substring(0, i).partialIsValidUtf8(Utf8.COMPLETE, 0, i)); |
- assertEquals(state2, |
- x.partialIsValidUtf8(state1, i, j - i)); |
- assertEquals(state2, |
- x.substring(i, j).partialIsValidUtf8(state1, 0, j - i)); |
- assertEquals(state3, |
- x.partialIsValidUtf8(state2, j, numBytes - j)); |
- assertEquals(state3, |
- x.substring(j, numBytes) |
- .partialIsValidUtf8(state2, 0, numBytes - j)); |
+ assertEquals(isRoundTrippable, x.isValidUtf8()); |
+ assertEquals(state3, x.partialIsValidUtf8(Utf8.COMPLETE, 0, numBytes)); |
+ |
+ assertEquals(state1, x.partialIsValidUtf8(Utf8.COMPLETE, 0, i)); |
+ assertEquals(state1, x.substring(0, i).partialIsValidUtf8(Utf8.COMPLETE, 0, i)); |
+ assertEquals(state2, x.partialIsValidUtf8(state1, i, j - i)); |
+ assertEquals(state2, x.substring(i, j).partialIsValidUtf8(state1, 0, j - i)); |
+ assertEquals(state3, x.partialIsValidUtf8(state2, j, numBytes - j)); |
+ assertEquals(state3, x.substring(j, numBytes).partialIsValidUtf8(state2, 0, numBytes - j)); |
} |
// ByteString reduplication should not affect its UTF-8 validity. |
- ByteString ropeADope = |
- RopeByteString.newInstanceForTest(bs, bs.substring(0, numBytes)); |
+ ByteString ropeADope = RopeByteString.newInstanceForTest(bs, bs.substring(0, numBytes)); |
assertEquals(isRoundTrippable, ropeADope.isValidUtf8()); |
if (isRoundTrippable) { |
@@ -288,8 +319,7 @@ class IsValidUtf8TestUtil { |
} |
count++; |
if (byteChar != 0 && byteChar % 1000000L == 0) { |
- logger.info("Processed " + (byteChar / 1000000L) + |
- " million characters"); |
+ logger.info("Processed " + (byteChar / 1000000L) + " million characters"); |
} |
} |
logger.info("Round tripped " + countRoundTripped + " of " + count); |
@@ -303,25 +333,26 @@ class IsValidUtf8TestUtil { |
* actual String class, it's possible for incompatibilities to develop |
* (although unlikely). |
* |
+ * @param factory the factory for {@link ByteString} instances. |
* @param numBytes the number of bytes in the byte array |
* @param expectedCount the expected number of roundtrippable permutations |
* @param start the starting bytes encoded as a long as big-endian |
* @param lim the limit of bytes to process encoded as a long as big-endian, |
* or -1 to mean the max limit for numBytes |
*/ |
- void testBytesUsingByteBuffers( |
- int numBytes, long expectedCount, long start, long lim) |
- throws UnsupportedEncodingException { |
- CharsetDecoder decoder = Internal.UTF_8.newDecoder() |
- .onMalformedInput(CodingErrorAction.REPLACE) |
- .onUnmappableCharacter(CodingErrorAction.REPLACE); |
- CharsetEncoder encoder = Internal.UTF_8.newEncoder() |
- .onMalformedInput(CodingErrorAction.REPLACE) |
- .onUnmappableCharacter(CodingErrorAction.REPLACE); |
+ static void testBytesUsingByteBuffers( |
+ ByteStringFactory factory, int numBytes, long expectedCount, long start, long lim) { |
+ CharsetDecoder decoder = |
+ Internal.UTF_8.newDecoder() |
+ .onMalformedInput(CodingErrorAction.REPLACE) |
+ .onUnmappableCharacter(CodingErrorAction.REPLACE); |
+ CharsetEncoder encoder = |
+ Internal.UTF_8.newEncoder() |
+ .onMalformedInput(CodingErrorAction.REPLACE) |
+ .onUnmappableCharacter(CodingErrorAction.REPLACE); |
byte[] bytes = new byte[numBytes]; |
int maxChars = (int) (decoder.maxCharsPerByte() * numBytes) + 1; |
- char[] charsDecoded = |
- new char[(int) (decoder.maxCharsPerByte() * numBytes) + 1]; |
+ char[] charsDecoded = new char[(int) (decoder.maxCharsPerByte() * numBytes) + 1]; |
int maxBytes = (int) (encoder.maxBytesPerChar() * maxChars) + 1; |
byte[] bytesReencoded = new byte[maxBytes]; |
@@ -347,7 +378,7 @@ class IsValidUtf8TestUtil { |
bytes[bytes.length - i - 1] = (byte) tmpByteChar; |
tmpByteChar = tmpByteChar >> 8; |
} |
- boolean isRoundTrippable = ByteString.copyFrom(bytes).isValidUtf8(); |
+ boolean isRoundTrippable = factory.newByteString(bytes).isValidUtf8(); |
CoderResult result = decoder.decode(bb, cb, true); |
assertFalse(result.isError()); |
result = decoder.flush(cb); |
@@ -382,8 +413,7 @@ class IsValidUtf8TestUtil { |
countRoundTripped++; |
} |
if (byteChar != 0 && byteChar % 1000000 == 0) { |
- logger.info("Processed " + (byteChar / 1000000) + |
- " million characters"); |
+ logger.info("Processed " + (byteChar / 1000000) + " million characters"); |
} |
} |
logger.info("Round tripped " + countRoundTripped + " of " + count); |
@@ -394,10 +424,9 @@ class IsValidUtf8TestUtil { |
outputFailure(byteChar, bytes, after, after.length); |
} |
- private static void outputFailure(long byteChar, byte[] bytes, byte[] after, |
- int len) { |
- fail("Failure: (" + Long.toHexString(byteChar) + ") " + |
- toHexString(bytes) + " => " + toHexString(after, len)); |
+ private static void outputFailure(long byteChar, byte[] bytes, byte[] after, int len) { |
+ fail("Failure: (" + Long.toHexString(byteChar) + ") " + toHexString(bytes) + " => " |
+ + toHexString(after, len)); |
} |
private static String toHexString(byte[] b) { |
@@ -416,5 +445,4 @@ class IsValidUtf8TestUtil { |
s.append("\""); |
return s.toString(); |
} |
- |
} |