| Index: third_party/protobuf/java/src/test/java/com/google/protobuf/IsValidUtf8TestUtil.java
|
| diff --git a/third_party/protobuf/java/src/test/java/com/google/protobuf/IsValidUtf8TestUtil.java b/third_party/protobuf/java/src/test/java/com/google/protobuf/IsValidUtf8TestUtil.java
|
| deleted file mode 100644
|
| index 4cb3d5b91ef33dcfc0a3b77a4b87940bddf08884..0000000000000000000000000000000000000000
|
| --- a/third_party/protobuf/java/src/test/java/com/google/protobuf/IsValidUtf8TestUtil.java
|
| +++ /dev/null
|
| @@ -1,421 +0,0 @@
|
| -// Protocol Buffers - Google's data interchange format
|
| -// Copyright 2008 Google Inc. All rights reserved.
|
| -// http://code.google.com/p/protobuf/
|
| -//
|
| -// Redistribution and use in source and binary forms, with or without
|
| -// modification, are permitted provided that the following conditions are
|
| -// met:
|
| -//
|
| -// * Redistributions of source code must retain the above copyright
|
| -// notice, this list of conditions and the following disclaimer.
|
| -// * Redistributions in binary form must reproduce the above
|
| -// copyright notice, this list of conditions and the following disclaimer
|
| -// in the documentation and/or other materials provided with the
|
| -// distribution.
|
| -// * Neither the name of Google Inc. nor the names of its
|
| -// contributors may be used to endorse or promote products derived from
|
| -// this software without specific prior written permission.
|
| -//
|
| -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
| -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
| -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
| -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
| -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
| -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
| -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
| -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
| -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
| -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
| -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
| -
|
| -package com.google.protobuf;
|
| -
|
| -import static junit.framework.Assert.*;
|
| -
|
| -import java.io.UnsupportedEncodingException;
|
| -import java.util.ArrayList;
|
| -import java.util.Arrays;
|
| -import java.util.List;
|
| -import java.util.Random;
|
| -import java.util.logging.Logger;
|
| -import java.nio.charset.CharsetDecoder;
|
| -import java.nio.charset.Charset;
|
| -import java.nio.charset.CodingErrorAction;
|
| -import java.nio.charset.CharsetEncoder;
|
| -import java.nio.charset.CoderResult;
|
| -import java.nio.ByteBuffer;
|
| -import java.nio.CharBuffer;
|
| -
|
| -/**
|
| - * Shared testing code for {@link IsValidUtf8Test} and
|
| - * {@link IsValidUtf8FourByteTest}.
|
| - *
|
| - * @author jonp@google.com (Jon Perlow)
|
| - * @author martinrb@google.com (Martin Buchholz)
|
| - */
|
| -class IsValidUtf8TestUtil {
|
| - private static Logger logger = Logger.getLogger(
|
| - IsValidUtf8TestUtil.class.getName());
|
| -
|
| - // 128 - [chars 0x0000 to 0x007f]
|
| - static long ONE_BYTE_ROUNDTRIPPABLE_CHARACTERS = 0x007f - 0x0000 + 1;
|
| -
|
| - // 128
|
| - static long EXPECTED_ONE_BYTE_ROUNDTRIPPABLE_COUNT =
|
| - ONE_BYTE_ROUNDTRIPPABLE_CHARACTERS;
|
| -
|
| - // 1920 [chars 0x0080 to 0x07FF]
|
| - static long TWO_BYTE_ROUNDTRIPPABLE_CHARACTERS = 0x07FF - 0x0080 + 1;
|
| -
|
| - // 18,304
|
| - static long EXPECTED_TWO_BYTE_ROUNDTRIPPABLE_COUNT =
|
| - // Both bytes are one byte characters
|
| - (long) Math.pow(EXPECTED_ONE_BYTE_ROUNDTRIPPABLE_COUNT, 2) +
|
| - // The possible number of two byte characters
|
| - TWO_BYTE_ROUNDTRIPPABLE_CHARACTERS;
|
| -
|
| - // 2048
|
| - static long THREE_BYTE_SURROGATES = 2 * 1024;
|
| -
|
| - // 61,440 [chars 0x0800 to 0xFFFF, minus surrogates]
|
| - static long THREE_BYTE_ROUNDTRIPPABLE_CHARACTERS =
|
| - 0xFFFF - 0x0800 + 1 - THREE_BYTE_SURROGATES;
|
| -
|
| - // 2,650,112
|
| - static long EXPECTED_THREE_BYTE_ROUNDTRIPPABLE_COUNT =
|
| - // All one byte characters
|
| - (long) Math.pow(EXPECTED_ONE_BYTE_ROUNDTRIPPABLE_COUNT, 3) +
|
| - // One two byte character and a one byte character
|
| - 2 * TWO_BYTE_ROUNDTRIPPABLE_CHARACTERS *
|
| - ONE_BYTE_ROUNDTRIPPABLE_CHARACTERS +
|
| - // Three byte characters
|
| - THREE_BYTE_ROUNDTRIPPABLE_CHARACTERS;
|
| -
|
| - // 1,048,576 [chars 0x10000L to 0x10FFFF]
|
| - static long FOUR_BYTE_ROUNDTRIPPABLE_CHARACTERS = 0x10FFFF - 0x10000L + 1;
|
| -
|
| - // 289,571,839
|
| - static long EXPECTED_FOUR_BYTE_ROUNDTRIPPABLE_COUNT =
|
| - // All one byte characters
|
| - (long) Math.pow(EXPECTED_ONE_BYTE_ROUNDTRIPPABLE_COUNT, 4) +
|
| - // One and three byte characters
|
| - 2 * THREE_BYTE_ROUNDTRIPPABLE_CHARACTERS *
|
| - ONE_BYTE_ROUNDTRIPPABLE_CHARACTERS +
|
| - // Two two byte characters
|
| - TWO_BYTE_ROUNDTRIPPABLE_CHARACTERS * TWO_BYTE_ROUNDTRIPPABLE_CHARACTERS +
|
| - // Permutations of one and two byte characters
|
| - 3 * TWO_BYTE_ROUNDTRIPPABLE_CHARACTERS *
|
| - ONE_BYTE_ROUNDTRIPPABLE_CHARACTERS *
|
| - ONE_BYTE_ROUNDTRIPPABLE_CHARACTERS +
|
| - // Four byte characters
|
| - FOUR_BYTE_ROUNDTRIPPABLE_CHARACTERS;
|
| -
|
| - static class Shard {
|
| - final long index;
|
| - final long start;
|
| - final long lim;
|
| - final long expected;
|
| -
|
| -
|
| - public Shard(long index, long start, long lim, long expected) {
|
| - assertTrue(start < lim);
|
| - this.index = index;
|
| - this.start = start;
|
| - this.lim = lim;
|
| - this.expected = expected;
|
| - }
|
| - }
|
| -
|
| - static final long[] FOUR_BYTE_SHARDS_EXPECTED_ROUNTRIPPABLES =
|
| - generateFourByteShardsExpectedRunnables();
|
| -
|
| - private static long[] generateFourByteShardsExpectedRunnables() {
|
| - long[] expected = new long[128];
|
| -
|
| - // 0-63 are all 5300224
|
| - for (int i = 0; i <= 63; i++) {
|
| - expected[i] = 5300224;
|
| - }
|
| -
|
| - // 97-111 are all 2342912
|
| - for (int i = 97; i <= 111; i++) {
|
| - expected[i] = 2342912;
|
| - }
|
| -
|
| - // 113-117 are all 1048576
|
| - for (int i = 113; i <= 117; i++) {
|
| - expected[i] = 1048576;
|
| - }
|
| -
|
| - // One offs
|
| - expected[112] = 786432;
|
| - expected[118] = 786432;
|
| - expected[119] = 1048576;
|
| - expected[120] = 458752;
|
| - expected[121] = 524288;
|
| - expected[122] = 65536;
|
| -
|
| - // Anything not assigned was the default 0.
|
| - return expected;
|
| - }
|
| -
|
| - static final List<Shard> FOUR_BYTE_SHARDS = generateFourByteShards(
|
| - 128, FOUR_BYTE_SHARDS_EXPECTED_ROUNTRIPPABLES);
|
| -
|
| -
|
| - private static List<Shard> generateFourByteShards(
|
| - int numShards, long[] expected) {
|
| - assertEquals(numShards, expected.length);
|
| - List<Shard> shards = new ArrayList<Shard>(numShards);
|
| - long LIM = 1L << 32;
|
| - long increment = LIM / numShards;
|
| - assertTrue(LIM % numShards == 0);
|
| - for (int i = 0; i < numShards; i++) {
|
| - shards.add(new Shard(i,
|
| - increment * i,
|
| - increment * (i + 1),
|
| - expected[i]));
|
| - }
|
| - return shards;
|
| - }
|
| -
|
| - /**
|
| - * Helper to run the loop to test all the permutations for the number of bytes
|
| - * specified.
|
| - *
|
| - * @param numBytes the number of bytes in the byte array
|
| - * @param expectedCount the expected number of roundtrippable permutations
|
| - */
|
| - static void testBytes(int numBytes, long expectedCount)
|
| - throws UnsupportedEncodingException {
|
| - testBytes(numBytes, expectedCount, 0, -1);
|
| - }
|
| -
|
| - /**
|
| - * Helper to run the loop to test all the permutations for the number of bytes
|
| - * specified. This overload is useful for debugging to get the loop to start
|
| - * at a certain character.
|
| - *
|
| - * @param numBytes the number of bytes in the byte array
|
| - * @param expectedCount the expected number of roundtrippable permutations
|
| - * @param start the starting bytes encoded as a long as big-endian
|
| - * @param lim the limit of bytes to process encoded as a long as big-endian,
|
| - * or -1 to mean the max limit for numBytes
|
| - */
|
| - static void testBytes(int numBytes, long expectedCount, long start, long lim)
|
| - throws UnsupportedEncodingException {
|
| - Random rnd = new Random();
|
| - byte[] bytes = new byte[numBytes];
|
| -
|
| - if (lim == -1) {
|
| - lim = 1L << (numBytes * 8);
|
| - }
|
| - long count = 0;
|
| - long countRoundTripped = 0;
|
| - for (long byteChar = start; byteChar < lim; byteChar++) {
|
| - long tmpByteChar = byteChar;
|
| - for (int i = 0; i < numBytes; i++) {
|
| - bytes[bytes.length - i - 1] = (byte) tmpByteChar;
|
| - tmpByteChar = tmpByteChar >> 8;
|
| - }
|
| - ByteString bs = ByteString.copyFrom(bytes);
|
| - boolean isRoundTrippable = bs.isValidUtf8();
|
| - String s = new String(bytes, "UTF-8");
|
| - byte[] bytesReencoded = s.getBytes("UTF-8");
|
| - boolean bytesEqual = Arrays.equals(bytes, bytesReencoded);
|
| -
|
| - if (bytesEqual != isRoundTrippable) {
|
| - outputFailure(byteChar, bytes, bytesReencoded);
|
| - }
|
| -
|
| - // Check agreement with static Utf8 methods.
|
| - assertEquals(isRoundTrippable, Utf8.isValidUtf8(bytes));
|
| - assertEquals(isRoundTrippable, Utf8.isValidUtf8(bytes, 0, numBytes));
|
| -
|
| - // Test partial sequences.
|
| - // Partition numBytes into three segments (not necessarily non-empty).
|
| - int i = rnd.nextInt(numBytes);
|
| - int j = rnd.nextInt(numBytes);
|
| - if (j < i) {
|
| - int tmp = i; i = j; j = tmp;
|
| - }
|
| - int state1 = Utf8.partialIsValidUtf8(Utf8.COMPLETE, bytes, 0, i);
|
| - int state2 = Utf8.partialIsValidUtf8(state1, bytes, i, j);
|
| - int state3 = Utf8.partialIsValidUtf8(state2, bytes, j, numBytes);
|
| - if (isRoundTrippable != (state3 == Utf8.COMPLETE)) {
|
| - System.out.printf("state=%04x %04x %04x i=%d j=%d%n",
|
| - state1, state2, state3, i, j);
|
| - outputFailure(byteChar, bytes, bytesReencoded);
|
| - }
|
| - assertEquals(isRoundTrippable, (state3 == Utf8.COMPLETE));
|
| -
|
| - // Test ropes built out of small partial sequences
|
| - ByteString rope = RopeByteString.newInstanceForTest(
|
| - bs.substring(0, i),
|
| - RopeByteString.newInstanceForTest(
|
| - bs.substring(i, j),
|
| - bs.substring(j, numBytes)));
|
| - assertSame(RopeByteString.class, rope.getClass());
|
| -
|
| - ByteString[] byteStrings = { bs, bs.substring(0, numBytes), rope };
|
| - for (ByteString x : byteStrings) {
|
| - assertEquals(isRoundTrippable,
|
| - x.isValidUtf8());
|
| - assertEquals(state3,
|
| - x.partialIsValidUtf8(Utf8.COMPLETE, 0, numBytes));
|
| -
|
| - assertEquals(state1,
|
| - x.partialIsValidUtf8(Utf8.COMPLETE, 0, i));
|
| - assertEquals(state1,
|
| - x.substring(0, i).partialIsValidUtf8(Utf8.COMPLETE, 0, i));
|
| - assertEquals(state2,
|
| - x.partialIsValidUtf8(state1, i, j - i));
|
| - assertEquals(state2,
|
| - x.substring(i, j).partialIsValidUtf8(state1, 0, j - i));
|
| - assertEquals(state3,
|
| - x.partialIsValidUtf8(state2, j, numBytes - j));
|
| - assertEquals(state3,
|
| - x.substring(j, numBytes)
|
| - .partialIsValidUtf8(state2, 0, numBytes - j));
|
| - }
|
| -
|
| - // ByteString reduplication should not affect its UTF-8 validity.
|
| - ByteString ropeADope =
|
| - RopeByteString.newInstanceForTest(bs, bs.substring(0, numBytes));
|
| - assertEquals(isRoundTrippable, ropeADope.isValidUtf8());
|
| -
|
| - if (isRoundTrippable) {
|
| - countRoundTripped++;
|
| - }
|
| - count++;
|
| - if (byteChar != 0 && byteChar % 1000000L == 0) {
|
| - logger.info("Processed " + (byteChar / 1000000L) +
|
| - " million characters");
|
| - }
|
| - }
|
| - logger.info("Round tripped " + countRoundTripped + " of " + count);
|
| - assertEquals(expectedCount, countRoundTripped);
|
| - }
|
| -
|
| - /**
|
| - * Variation of {@link #testBytes} that does less allocation using the
|
| - * low-level encoders/decoders directly. Checked in because it's useful for
|
| - * debugging when trying to process bytes faster, but since it doesn't use the
|
| - * actual String class, it's possible for incompatibilities to develop
|
| - * (although unlikely).
|
| - *
|
| - * @param numBytes the number of bytes in the byte array
|
| - * @param expectedCount the expected number of roundtrippable permutations
|
| - * @param start the starting bytes encoded as a long as big-endian
|
| - * @param lim the limit of bytes to process encoded as a long as big-endian,
|
| - * or -1 to mean the max limit for numBytes
|
| - */
|
| - void testBytesUsingByteBuffers(
|
| - int numBytes, long expectedCount, long start, long lim)
|
| - throws UnsupportedEncodingException {
|
| - CharsetDecoder decoder = Charset.forName("UTF-8").newDecoder()
|
| - .onMalformedInput(CodingErrorAction.REPLACE)
|
| - .onUnmappableCharacter(CodingErrorAction.REPLACE);
|
| - CharsetEncoder encoder = Charset.forName("UTF-8").newEncoder()
|
| - .onMalformedInput(CodingErrorAction.REPLACE)
|
| - .onUnmappableCharacter(CodingErrorAction.REPLACE);
|
| - byte[] bytes = new byte[numBytes];
|
| - int maxChars = (int) (decoder.maxCharsPerByte() * numBytes) + 1;
|
| - char[] charsDecoded =
|
| - new char[(int) (decoder.maxCharsPerByte() * numBytes) + 1];
|
| - int maxBytes = (int) (encoder.maxBytesPerChar() * maxChars) + 1;
|
| - byte[] bytesReencoded = new byte[maxBytes];
|
| -
|
| - ByteBuffer bb = ByteBuffer.wrap(bytes);
|
| - CharBuffer cb = CharBuffer.wrap(charsDecoded);
|
| - ByteBuffer bbReencoded = ByteBuffer.wrap(bytesReencoded);
|
| - if (lim == -1) {
|
| - lim = 1L << (numBytes * 8);
|
| - }
|
| - long count = 0;
|
| - long countRoundTripped = 0;
|
| - for (long byteChar = start; byteChar < lim; byteChar++) {
|
| - bb.rewind();
|
| - bb.limit(bytes.length);
|
| - cb.rewind();
|
| - cb.limit(charsDecoded.length);
|
| - bbReencoded.rewind();
|
| - bbReencoded.limit(bytesReencoded.length);
|
| - encoder.reset();
|
| - decoder.reset();
|
| - long tmpByteChar = byteChar;
|
| - for (int i = 0; i < bytes.length; i++) {
|
| - bytes[bytes.length - i - 1] = (byte) tmpByteChar;
|
| - tmpByteChar = tmpByteChar >> 8;
|
| - }
|
| - boolean isRoundTrippable = ByteString.copyFrom(bytes).isValidUtf8();
|
| - CoderResult result = decoder.decode(bb, cb, true);
|
| - assertFalse(result.isError());
|
| - result = decoder.flush(cb);
|
| - assertFalse(result.isError());
|
| -
|
| - int charLen = cb.position();
|
| - cb.rewind();
|
| - cb.limit(charLen);
|
| - result = encoder.encode(cb, bbReencoded, true);
|
| - assertFalse(result.isError());
|
| - result = encoder.flush(bbReencoded);
|
| - assertFalse(result.isError());
|
| -
|
| - boolean bytesEqual = true;
|
| - int bytesLen = bbReencoded.position();
|
| - if (bytesLen != numBytes) {
|
| - bytesEqual = false;
|
| - } else {
|
| - for (int i = 0; i < numBytes; i++) {
|
| - if (bytes[i] != bytesReencoded[i]) {
|
| - bytesEqual = false;
|
| - break;
|
| - }
|
| - }
|
| - }
|
| - if (bytesEqual != isRoundTrippable) {
|
| - outputFailure(byteChar, bytes, bytesReencoded, bytesLen);
|
| - }
|
| -
|
| - count++;
|
| - if (isRoundTrippable) {
|
| - countRoundTripped++;
|
| - }
|
| - if (byteChar != 0 && byteChar % 1000000 == 0) {
|
| - logger.info("Processed " + (byteChar / 1000000) +
|
| - " million characters");
|
| - }
|
| - }
|
| - logger.info("Round tripped " + countRoundTripped + " of " + count);
|
| - assertEquals(expectedCount, countRoundTripped);
|
| - }
|
| -
|
| - private static void outputFailure(long byteChar, byte[] bytes, byte[] after) {
|
| - outputFailure(byteChar, bytes, after, after.length);
|
| - }
|
| -
|
| - private static void outputFailure(long byteChar, byte[] bytes, byte[] after,
|
| - int len) {
|
| - fail("Failure: (" + Long.toHexString(byteChar) + ") " +
|
| - toHexString(bytes) + " => " + toHexString(after, len));
|
| - }
|
| -
|
| - private static String toHexString(byte[] b) {
|
| - return toHexString(b, b.length);
|
| - }
|
| -
|
| - private static String toHexString(byte[] b, int len) {
|
| - StringBuilder s = new StringBuilder();
|
| - s.append("\"");
|
| - for (int i = 0; i < len; i++) {
|
| - if (i > 0) {
|
| - s.append(" ");
|
| - }
|
| - s.append(String.format("%02x", b[i] & 0xFF));
|
| - }
|
| - s.append("\"");
|
| - return s.toString();
|
| - }
|
| -
|
| -}
|
|
|