| Index: third_party/protobuf/java/src/main/java/com/google/protobuf/ByteString.java
|
| ===================================================================
|
| --- third_party/protobuf/java/src/main/java/com/google/protobuf/ByteString.java (revision 216642)
|
| +++ third_party/protobuf/java/src/main/java/com/google/protobuf/ByteString.java (working copy)
|
| @@ -30,140 +30,413 @@
|
|
|
| package com.google.protobuf;
|
|
|
| +import java.io.ByteArrayOutputStream;
|
| +import java.io.IOException;
|
| import java.io.InputStream;
|
| -import java.io.ByteArrayInputStream;
|
| -import java.io.ByteArrayOutputStream;
|
| -import java.io.FilterOutputStream;
|
| +import java.io.OutputStream;
|
| import java.io.UnsupportedEncodingException;
|
| import java.nio.ByteBuffer;
|
| +import java.util.ArrayList;
|
| +import java.util.Arrays;
|
| +import java.util.Collection;
|
| +import java.util.Iterator;
|
| import java.util.List;
|
| +import java.util.NoSuchElementException;
|
|
|
| /**
|
| - * Immutable array of bytes.
|
| + * Immutable sequence of bytes. Substring is supported by sharing the reference
|
| + * to the immutable underlying bytes, as with {@link String}. Concatenation is
|
| + * likewise supported without copying (long strings) by building a tree of
|
| + * pieces in {@link RopeByteString}.
|
| + * <p>
|
| + * Like {@link String}, the contents of a {@link ByteString} can never be
|
| + * observed to change, not even in the presence of a data race or incorrect
|
| + * API usage in the client code.
|
| *
|
| * @author crazybob@google.com Bob Lee
|
| * @author kenton@google.com Kenton Varda
|
| + * @author carlanton@google.com Carl Haverl
|
| + * @author martinrb@google.com Martin Buchholz
|
| */
|
| -public final class ByteString {
|
| - private final byte[] bytes;
|
| +public abstract class ByteString implements Iterable<Byte> {
|
|
|
| - private ByteString(final byte[] bytes) {
|
| - this.bytes = bytes;
|
| - }
|
| + /**
|
| + * When two strings to be concatenated have a combined length shorter than
|
| + * this, we just copy their bytes on {@link #concat(ByteString)}.
|
| + * The trade-off is copy size versus the overhead of creating tree nodes
|
| + * in {@link RopeByteString}.
|
| + */
|
| + static final int CONCATENATE_BY_COPY_SIZE = 128;
|
|
|
| /**
|
| - * Gets the byte at the given index.
|
| + * When copying an InputStream into a ByteString with .readFrom(),
|
| + * the chunks in the underlying rope start at 256 bytes, but double
|
| + * each iteration up to 8192 bytes.
|
| + */
|
| + static final int MIN_READ_FROM_CHUNK_SIZE = 0x100; // 256b
|
| + static final int MAX_READ_FROM_CHUNK_SIZE = 0x2000; // 8k
|
| +
|
| + /**
|
| + * Empty {@code ByteString}.
|
| + */
|
| + public static final ByteString EMPTY = new LiteralByteString(new byte[0]);
|
| +
|
| + // This constructor is here to prevent subclassing outside of this package,
|
| + ByteString() {}
|
| +
|
| + /**
|
| + * Gets the byte at the given index. This method should be used only for
|
| + * random access to individual bytes. To access bytes sequentially, use the
|
| + * {@link ByteIterator} returned by {@link #iterator()}, and call {@link
|
| + * #substring(int, int)} first if necessary.
|
| *
|
| + * @param index index of byte
|
| + * @return the value
|
| * @throws ArrayIndexOutOfBoundsException {@code index} is < 0 or >= size
|
| */
|
| - public byte byteAt(final int index) {
|
| - return bytes[index];
|
| + public abstract byte byteAt(int index);
|
| +
|
| + /**
|
| + * Return a {@link ByteString.ByteIterator} over the bytes in the ByteString.
|
| + * To avoid auto-boxing, you may get the iterator manually and call
|
| + * {@link ByteIterator#nextByte()}.
|
| + *
|
| + * @return the iterator
|
| + */
|
| + public abstract ByteIterator iterator();
|
| +
|
| + /**
|
| + * This interface extends {@code Iterator<Byte>}, so that we can return an
|
| + * unboxed {@code byte}.
|
| + */
|
| + public interface ByteIterator extends Iterator<Byte> {
|
| + /**
|
| + * An alternative to {@link Iterator#next()} that returns an
|
| + * unboxed primitive {@code byte}.
|
| + *
|
| + * @return the next {@code byte} in the iteration
|
| + * @throws NoSuchElementException if the iteration has no more elements
|
| + */
|
| + byte nextByte();
|
| }
|
|
|
| /**
|
| * Gets the number of bytes.
|
| + *
|
| + * @return size in bytes
|
| */
|
| - public int size() {
|
| - return bytes.length;
|
| - }
|
| + public abstract int size();
|
|
|
| /**
|
| * Returns {@code true} if the size is {@code 0}, {@code false} otherwise.
|
| + *
|
| + * @return true if this is zero bytes long
|
| */
|
| public boolean isEmpty() {
|
| - return bytes.length == 0;
|
| + return size() == 0;
|
| }
|
|
|
| // =================================================================
|
| - // byte[] -> ByteString
|
| + // ByteString -> substring
|
|
|
| /**
|
| - * Empty ByteString.
|
| + * Return the substring from {@code beginIndex}, inclusive, to the end of the
|
| + * string.
|
| + *
|
| + * @param beginIndex start at this index
|
| + * @return substring sharing underlying data
|
| + * @throws IndexOutOfBoundsException if {@code beginIndex < 0} or
|
| + * {@code beginIndex > size()}.
|
| */
|
| - public static final ByteString EMPTY = new ByteString(new byte[0]);
|
| + public ByteString substring(int beginIndex) {
|
| + return substring(beginIndex, size());
|
| + }
|
|
|
| /**
|
| + * Return the substring from {@code beginIndex}, inclusive, to {@code
|
| + * endIndex}, exclusive.
|
| + *
|
| + * @param beginIndex start at this index
|
| + * @param endIndex the last character is the one before this index
|
| + * @return substring sharing underlying data
|
| + * @throws IndexOutOfBoundsException if {@code beginIndex < 0},
|
| + * {@code endIndex > size()}, or {@code beginIndex > endIndex}.
|
| + */
|
| + public abstract ByteString substring(int beginIndex, int endIndex);
|
| +
|
| + /**
|
| + * Tests if this bytestring starts with the specified prefix.
|
| + * Similar to {@link String#startsWith(String)}
|
| + *
|
| + * @param prefix the prefix.
|
| + * @return <code>true</code> if the byte sequence represented by the
|
| + * argument is a prefix of the byte sequence represented by
|
| + * this string; <code>false</code> otherwise.
|
| + */
|
| + public boolean startsWith(ByteString prefix) {
|
| + return size() >= prefix.size() &&
|
| + substring(0, prefix.size()).equals(prefix);
|
| + }
|
| +
|
| + // =================================================================
|
| + // byte[] -> ByteString
|
| +
|
| + /**
|
| * Copies the given bytes into a {@code ByteString}.
|
| + *
|
| + * @param bytes source array
|
| + * @param offset offset in source array
|
| + * @param size number of bytes to copy
|
| + * @return new {@code ByteString}
|
| */
|
| - public static ByteString copyFrom(final byte[] bytes, final int offset,
|
| - final int size) {
|
| - final byte[] copy = new byte[size];
|
| + public static ByteString copyFrom(byte[] bytes, int offset, int size) {
|
| + byte[] copy = new byte[size];
|
| System.arraycopy(bytes, offset, copy, 0, size);
|
| - return new ByteString(copy);
|
| + return new LiteralByteString(copy);
|
| }
|
|
|
| /**
|
| * Copies the given bytes into a {@code ByteString}.
|
| + *
|
| + * @param bytes to copy
|
| + * @return new {@code ByteString}
|
| */
|
| - public static ByteString copyFrom(final byte[] bytes) {
|
| + public static ByteString copyFrom(byte[] bytes) {
|
| return copyFrom(bytes, 0, bytes.length);
|
| }
|
|
|
| /**
|
| - * Copies {@code size} bytes from a {@code java.nio.ByteBuffer} into
|
| + * Copies the next {@code size} bytes from a {@code java.nio.ByteBuffer} into
|
| * a {@code ByteString}.
|
| + *
|
| + * @param bytes source buffer
|
| + * @param size number of bytes to copy
|
| + * @return new {@code ByteString}
|
| */
|
| - public static ByteString copyFrom(final ByteBuffer bytes, final int size) {
|
| - final byte[] copy = new byte[size];
|
| + public static ByteString copyFrom(ByteBuffer bytes, int size) {
|
| + byte[] copy = new byte[size];
|
| bytes.get(copy);
|
| - return new ByteString(copy);
|
| + return new LiteralByteString(copy);
|
| }
|
|
|
| /**
|
| * Copies the remaining bytes from a {@code java.nio.ByteBuffer} into
|
| * a {@code ByteString}.
|
| + *
|
| + * @param bytes sourceBuffer
|
| + * @return new {@code ByteString}
|
| */
|
| - public static ByteString copyFrom(final ByteBuffer bytes) {
|
| + public static ByteString copyFrom(ByteBuffer bytes) {
|
| return copyFrom(bytes, bytes.remaining());
|
| }
|
|
|
| /**
|
| * Encodes {@code text} into a sequence of bytes using the named charset
|
| * and returns the result as a {@code ByteString}.
|
| + *
|
| + * @param text source string
|
| + * @param charsetName encoding to use
|
| + * @return new {@code ByteString}
|
| + * @throws UnsupportedEncodingException if the encoding isn't found
|
| */
|
| - public static ByteString copyFrom(final String text, final String charsetName)
|
| + public static ByteString copyFrom(String text, String charsetName)
|
| throws UnsupportedEncodingException {
|
| - return new ByteString(text.getBytes(charsetName));
|
| + return new LiteralByteString(text.getBytes(charsetName));
|
| }
|
|
|
| /**
|
| * Encodes {@code text} into a sequence of UTF-8 bytes and returns the
|
| * result as a {@code ByteString}.
|
| + *
|
| + * @param text source string
|
| + * @return new {@code ByteString}
|
| */
|
| - public static ByteString copyFromUtf8(final String text) {
|
| + public static ByteString copyFromUtf8(String text) {
|
| try {
|
| - return new ByteString(text.getBytes("UTF-8"));
|
| + return new LiteralByteString(text.getBytes("UTF-8"));
|
| } catch (UnsupportedEncodingException e) {
|
| throw new RuntimeException("UTF-8 not supported?", e);
|
| }
|
| }
|
|
|
| + // =================================================================
|
| + // InputStream -> ByteString
|
| +
|
| /**
|
| - * Concatenates all byte strings in the list and returns the result.
|
| + * Completely reads the given stream's bytes into a
|
| + * {@code ByteString}, blocking if necessary until all bytes are
|
| + * read through to the end of the stream.
|
| *
|
| + * <b>Performance notes:</b> The returned {@code ByteString} is an
|
| + * immutable tree of byte arrays ("chunks") of the stream data. The
|
| + * first chunk is small, with subsequent chunks each being double
|
| + * the size, up to 8K. If the caller knows the precise length of
|
| + * the stream and wishes to avoid all unnecessary copies and
|
| + * allocations, consider using the two-argument version of this
|
| + * method, below.
|
| + *
|
| + * @param streamToDrain The source stream, which is read completely
|
| + * but not closed.
|
| + * @return A new {@code ByteString} which is made up of chunks of
|
| + * various sizes, depending on the behavior of the underlying
|
| + * stream.
|
| + * @throws IOException IOException is thrown if there is a problem
|
| + * reading the underlying stream.
|
| + */
|
| + public static ByteString readFrom(InputStream streamToDrain)
|
| + throws IOException {
|
| + return readFrom(
|
| + streamToDrain, MIN_READ_FROM_CHUNK_SIZE, MAX_READ_FROM_CHUNK_SIZE);
|
| + }
|
| +
|
| + /**
|
| + * Completely reads the given stream's bytes into a
|
| + * {@code ByteString}, blocking if necessary until all bytes are
|
| + * read through to the end of the stream.
|
| + *
|
| + * <b>Performance notes:</b> The returned {@code ByteString} is an
|
| + * immutable tree of byte arrays ("chunks") of the stream data. The
|
| + * chunkSize parameter sets the size of these byte arrays. In
|
| + * particular, if the chunkSize is precisely the same as the length
|
| + * of the stream, unnecessary allocations and copies will be
|
| + * avoided. Otherwise, the chunks will be of the given size, except
|
| + * for the last chunk, which will be resized (via a reallocation and
|
| + * copy) to contain the remainder of the stream.
|
| + *
|
| + * @param streamToDrain The source stream, which is read completely
|
| + * but not closed.
|
| + * @param chunkSize The size of the chunks in which to read the
|
| + * stream.
|
| + * @return A new {@code ByteString} which is made up of chunks of
|
| + * the given size.
|
| + * @throws IOException IOException is thrown if there is a problem
|
| + * reading the underlying stream.
|
| + */
|
| + public static ByteString readFrom(InputStream streamToDrain, int chunkSize)
|
| + throws IOException {
|
| + return readFrom(streamToDrain, chunkSize, chunkSize);
|
| + }
|
| +
|
| + // Helper method that takes the chunk size range as a parameter.
|
| + public static ByteString readFrom(InputStream streamToDrain, int minChunkSize,
|
| + int maxChunkSize) throws IOException {
|
| + Collection<ByteString> results = new ArrayList<ByteString>();
|
| +
|
| + // copy the inbound bytes into a list of chunks; the chunk size
|
| + // grows exponentially to support both short and long streams.
|
| + int chunkSize = minChunkSize;
|
| + while (true) {
|
| + ByteString chunk = readChunk(streamToDrain, chunkSize);
|
| + if (chunk == null) {
|
| + break;
|
| + }
|
| + results.add(chunk);
|
| + chunkSize = Math.min(chunkSize * 2, maxChunkSize);
|
| + }
|
| +
|
| + return ByteString.copyFrom(results);
|
| + }
|
| +
|
| + /**
|
| + * Blocks until a chunk of the given size can be made from the
|
| + * stream, or EOF is reached. Calls read() repeatedly in case the
|
| + * given stream implementation doesn't completely fill the given
|
| + * buffer in one read() call.
|
| + *
|
| + * @return A chunk of the desired size, or else a chunk as large as
|
| + * was available when end of stream was reached. Returns null if the
|
| + * given stream had no more data in it.
|
| + */
|
| + private static ByteString readChunk(InputStream in, final int chunkSize)
|
| + throws IOException {
|
| + final byte[] buf = new byte[chunkSize];
|
| + int bytesRead = 0;
|
| + while (bytesRead < chunkSize) {
|
| + final int count = in.read(buf, bytesRead, chunkSize - bytesRead);
|
| + if (count == -1) {
|
| + break;
|
| + }
|
| + bytesRead += count;
|
| + }
|
| +
|
| + if (bytesRead == 0) {
|
| + return null;
|
| + } else {
|
| + return ByteString.copyFrom(buf, 0, bytesRead);
|
| + }
|
| + }
|
| +
|
| + // =================================================================
|
| + // Multiple ByteStrings -> One ByteString
|
| +
|
| + /**
|
| + * Concatenate the given {@code ByteString} to this one. Short concatenations,
|
| + * of total size smaller than {@link ByteString#CONCATENATE_BY_COPY_SIZE}, are
|
| + * produced by copying the underlying bytes (as per Rope.java, <a
|
| + * href="http://www.cs.ubc.ca/local/reading/proceedings/spe91-95/spe/vol25/issue12/spe986.pdf">
|
| + * BAP95 </a>. In general, the concatenate involves no copying.
|
| + *
|
| + * @param other string to concatenate
|
| + * @return a new {@code ByteString} instance
|
| + */
|
| + public ByteString concat(ByteString other) {
|
| + int thisSize = size();
|
| + int otherSize = other.size();
|
| + if ((long) thisSize + otherSize >= Integer.MAX_VALUE) {
|
| + throw new IllegalArgumentException("ByteString would be too long: " +
|
| + thisSize + "+" + otherSize);
|
| + }
|
| +
|
| + return RopeByteString.concatenate(this, other);
|
| + }
|
| +
|
| + /**
|
| + * Concatenates all byte strings in the iterable and returns the result.
|
| + * This is designed to run in O(list size), not O(total bytes).
|
| + *
|
| * <p>The returned {@code ByteString} is not necessarily a unique object.
|
| * If the list is empty, the returned object is the singleton empty
|
| * {@code ByteString}. If the list has only one element, that
|
| * {@code ByteString} will be returned without copying.
|
| + *
|
| + * @param byteStrings strings to be concatenated
|
| + * @return new {@code ByteString}
|
| */
|
| - public static ByteString copyFrom(List<ByteString> list) {
|
| - if (list.size() == 0) {
|
| - return EMPTY;
|
| - } else if (list.size() == 1) {
|
| - return list.get(0);
|
| + public static ByteString copyFrom(Iterable<ByteString> byteStrings) {
|
| + Collection<ByteString> collection;
|
| + if (!(byteStrings instanceof Collection)) {
|
| + collection = new ArrayList<ByteString>();
|
| + for (ByteString byteString : byteStrings) {
|
| + collection.add(byteString);
|
| + }
|
| + } else {
|
| + collection = (Collection<ByteString>) byteStrings;
|
| }
|
| + ByteString result;
|
| + if (collection.isEmpty()) {
|
| + result = EMPTY;
|
| + } else {
|
| + result = balancedConcat(collection.iterator(), collection.size());
|
| + }
|
| + return result;
|
| + }
|
|
|
| - int size = 0;
|
| - for (ByteString str : list) {
|
| - size += str.size();
|
| + // Internal function used by copyFrom(Iterable<ByteString>).
|
| + // Create a balanced concatenation of the next "length" elements from the
|
| + // iterable.
|
| + private static ByteString balancedConcat(Iterator<ByteString> iterator,
|
| + int length) {
|
| + assert length >= 1;
|
| + ByteString result;
|
| + if (length == 1) {
|
| + result = iterator.next();
|
| + } else {
|
| + int halfLength = length >>> 1;
|
| + ByteString left = balancedConcat(iterator, halfLength);
|
| + ByteString right = balancedConcat(iterator, length - halfLength);
|
| + result = left.concat(right);
|
| }
|
| - byte[] bytes = new byte[size];
|
| - int pos = 0;
|
| - for (ByteString str : list) {
|
| - System.arraycopy(str.bytes, 0, bytes, pos, str.size());
|
| - pos += str.size();
|
| - }
|
| - return new ByteString(bytes);
|
| + return result;
|
| }
|
|
|
| // =================================================================
|
| @@ -174,206 +447,446 @@
|
| *
|
| * @param target buffer to copy into
|
| * @param offset in the target buffer
|
| + * @throws IndexOutOfBoundsException if the offset is negative or too large
|
| */
|
| - public void copyTo(final byte[] target, final int offset) {
|
| - System.arraycopy(bytes, 0, target, offset, bytes.length);
|
| + public void copyTo(byte[] target, int offset) {
|
| + copyTo(target, 0, offset, size());
|
| }
|
|
|
| /**
|
| * Copies bytes into a buffer.
|
| *
|
| - * @param target buffer to copy into
|
| + * @param target buffer to copy into
|
| * @param sourceOffset offset within these bytes
|
| * @param targetOffset offset within the target buffer
|
| - * @param size number of bytes to copy
|
| + * @param numberToCopy number of bytes to copy
|
| + * @throws IndexOutOfBoundsException if an offset or size is negative or too
|
| + * large
|
| */
|
| - public void copyTo(final byte[] target, final int sourceOffset,
|
| - final int targetOffset,
|
| - final int size) {
|
| - System.arraycopy(bytes, sourceOffset, target, targetOffset, size);
|
| + public void copyTo(byte[] target, int sourceOffset, int targetOffset,
|
| + int numberToCopy) {
|
| + if (sourceOffset < 0) {
|
| + throw new IndexOutOfBoundsException("Source offset < 0: " + sourceOffset);
|
| + }
|
| + if (targetOffset < 0) {
|
| + throw new IndexOutOfBoundsException("Target offset < 0: " + targetOffset);
|
| + }
|
| + if (numberToCopy < 0) {
|
| + throw new IndexOutOfBoundsException("Length < 0: " + numberToCopy);
|
| + }
|
| + if (sourceOffset + numberToCopy > size()) {
|
| + throw new IndexOutOfBoundsException(
|
| + "Source end offset < 0: " + (sourceOffset + numberToCopy));
|
| + }
|
| + if (targetOffset + numberToCopy > target.length) {
|
| + throw new IndexOutOfBoundsException(
|
| + "Target end offset < 0: " + (targetOffset + numberToCopy));
|
| + }
|
| + if (numberToCopy > 0) {
|
| + copyToInternal(target, sourceOffset, targetOffset, numberToCopy);
|
| + }
|
| }
|
|
|
| /**
|
| + * Internal (package private) implementation of
|
| + * @link{#copyTo(byte[],int,int,int}.
|
| + * It assumes that all error checking has already been performed and that
|
| + * @code{numberToCopy > 0}.
|
| + */
|
| + protected abstract void copyToInternal(byte[] target, int sourceOffset,
|
| + int targetOffset, int numberToCopy);
|
| +
|
| + /**
|
| * Copies bytes into a ByteBuffer.
|
| *
|
| * @param target ByteBuffer to copy into.
|
| - * @throws ReadOnlyBufferException if the {@code target} is read-only
|
| - * @throws BufferOverflowException if the {@code target}'s remaining()
|
| - * space is not large enough to hold the data.
|
| + * @throws java.nio.ReadOnlyBufferException if the {@code target} is read-only
|
| + * @throws java.nio.BufferOverflowException if the {@code target}'s
|
| + * remaining() space is not large enough to hold the data.
|
| */
|
| - public void copyTo(ByteBuffer target) {
|
| - target.put(bytes, 0, bytes.length);
|
| - }
|
| + public abstract void copyTo(ByteBuffer target);
|
|
|
| /**
|
| * Copies bytes to a {@code byte[]}.
|
| + *
|
| + * @return copied bytes
|
| */
|
| public byte[] toByteArray() {
|
| - final int size = bytes.length;
|
| - final byte[] copy = new byte[size];
|
| - System.arraycopy(bytes, 0, copy, 0, size);
|
| - return copy;
|
| + int size = size();
|
| + byte[] result = new byte[size];
|
| + copyToInternal(result, 0, 0, size);
|
| + return result;
|
| }
|
|
|
| /**
|
| - * Constructs a new read-only {@code java.nio.ByteBuffer} with the
|
| - * same backing byte array.
|
| + * Writes the complete contents of this byte string to
|
| + * the specified output stream argument.
|
| + *
|
| + * @param out the output stream to which to write the data.
|
| + * @throws IOException if an I/O error occurs.
|
| */
|
| - public ByteBuffer asReadOnlyByteBuffer() {
|
| - final ByteBuffer byteBuffer = ByteBuffer.wrap(bytes);
|
| - return byteBuffer.asReadOnlyBuffer();
|
| - }
|
| + public abstract void writeTo(OutputStream out) throws IOException;
|
|
|
| /**
|
| + * Constructs a read-only {@code java.nio.ByteBuffer} whose content
|
| + * is equal to the contents of this byte string.
|
| + * The result uses the same backing array as the byte string, if possible.
|
| + *
|
| + * @return wrapped bytes
|
| + */
|
| + public abstract ByteBuffer asReadOnlyByteBuffer();
|
| +
|
| + /**
|
| + * Constructs a list of read-only {@code java.nio.ByteBuffer} objects
|
| + * such that the concatenation of their contents is equal to the contents
|
| + * of this byte string. The result uses the same backing arrays as the
|
| + * byte string.
|
| + * <p>
|
| + * By returning a list, implementations of this method may be able to avoid
|
| + * copying even when there are multiple backing arrays.
|
| + *
|
| + * @return a list of wrapped bytes
|
| + */
|
| + public abstract List<ByteBuffer> asReadOnlyByteBufferList();
|
| +
|
| + /**
|
| * Constructs a new {@code String} by decoding the bytes using the
|
| * specified charset.
|
| + *
|
| + * @param charsetName encode using this charset
|
| + * @return new string
|
| + * @throws UnsupportedEncodingException if charset isn't recognized
|
| */
|
| - public String toString(final String charsetName)
|
| - throws UnsupportedEncodingException {
|
| - return new String(bytes, charsetName);
|
| - }
|
| + public abstract String toString(String charsetName)
|
| + throws UnsupportedEncodingException;
|
|
|
| + // =================================================================
|
| + // UTF-8 decoding
|
| +
|
| /**
|
| * Constructs a new {@code String} by decoding the bytes as UTF-8.
|
| + *
|
| + * @return new string using UTF-8 encoding
|
| */
|
| public String toStringUtf8() {
|
| try {
|
| - return new String(bytes, "UTF-8");
|
| + return toString("UTF-8");
|
| } catch (UnsupportedEncodingException e) {
|
| throw new RuntimeException("UTF-8 not supported?", e);
|
| }
|
| }
|
|
|
| + /**
|
| + * Tells whether this {@code ByteString} represents a well-formed UTF-8
|
| + * byte sequence, such that the original bytes can be converted to a
|
| + * String object and then round tripped back to bytes without loss.
|
| + *
|
| + * <p>More precisely, returns {@code true} whenever: <pre> {@code
|
| + * Arrays.equals(byteString.toByteArray(),
|
| + * new String(byteString.toByteArray(), "UTF-8").getBytes("UTF-8"))
|
| + * }</pre>
|
| + *
|
| + * <p>This method returns {@code false} for "overlong" byte sequences,
|
| + * as well as for 3-byte sequences that would map to a surrogate
|
| + * character, in accordance with the restricted definition of UTF-8
|
| + * introduced in Unicode 3.1. Note that the UTF-8 decoder included in
|
| + * Oracle's JDK has been modified to also reject "overlong" byte
|
| + * sequences, but (as of 2011) still accepts 3-byte surrogate
|
| + * character byte sequences.
|
| + *
|
| + * <p>See the Unicode Standard,</br>
|
| + * Table 3-6. <em>UTF-8 Bit Distribution</em>,</br>
|
| + * Table 3-7. <em>Well Formed UTF-8 Byte Sequences</em>.
|
| + *
|
| + * @return whether the bytes in this {@code ByteString} are a
|
| + * well-formed UTF-8 byte sequence
|
| + */
|
| + public abstract boolean isValidUtf8();
|
| +
|
| + /**
|
| + * Tells whether the given byte sequence is a well-formed, malformed, or
|
| + * incomplete UTF-8 byte sequence. This method accepts and returns a partial
|
| + * state result, allowing the bytes for a complete UTF-8 byte sequence to be
|
| + * composed from multiple {@code ByteString} segments.
|
| + *
|
| + * @param state either {@code 0} (if this is the initial decoding operation)
|
| + * or the value returned from a call to a partial decoding method for the
|
| + * previous bytes
|
| + * @param offset offset of the first byte to check
|
| + * @param length number of bytes to check
|
| + *
|
| + * @return {@code -1} if the partial byte sequence is definitely malformed,
|
| + * {@code 0} if it is well-formed (no additional input needed), or, if the
|
| + * byte sequence is "incomplete", i.e. apparently terminated in the middle of
|
| + * a character, an opaque integer "state" value containing enough information
|
| + * to decode the character when passed to a subsequent invocation of a
|
| + * partial decoding method.
|
| + */
|
| + protected abstract int partialIsValidUtf8(int state, int offset, int length);
|
| +
|
| // =================================================================
|
| // equals() and hashCode()
|
|
|
| @Override
|
| - public boolean equals(final Object o) {
|
| - if (o == this) {
|
| - return true;
|
| - }
|
| + public abstract boolean equals(Object o);
|
|
|
| - if (!(o instanceof ByteString)) {
|
| - return false;
|
| - }
|
| -
|
| - final ByteString other = (ByteString) o;
|
| - final int size = bytes.length;
|
| - if (size != other.bytes.length) {
|
| - return false;
|
| - }
|
| -
|
| - final byte[] thisBytes = bytes;
|
| - final byte[] otherBytes = other.bytes;
|
| - for (int i = 0; i < size; i++) {
|
| - if (thisBytes[i] != otherBytes[i]) {
|
| - return false;
|
| - }
|
| - }
|
| -
|
| - return true;
|
| - }
|
| -
|
| - private volatile int hash = 0;
|
| -
|
| + /**
|
| + * Return a non-zero hashCode depending only on the sequence of bytes
|
| + * in this ByteString.
|
| + *
|
| + * @return hashCode value for this object
|
| + */
|
| @Override
|
| - public int hashCode() {
|
| - int h = hash;
|
| + public abstract int hashCode();
|
|
|
| - if (h == 0) {
|
| - final byte[] thisBytes = bytes;
|
| - final int size = bytes.length;
|
| -
|
| - h = size;
|
| - for (int i = 0; i < size; i++) {
|
| - h = h * 31 + thisBytes[i];
|
| - }
|
| - if (h == 0) {
|
| - h = 1;
|
| - }
|
| -
|
| - hash = h;
|
| - }
|
| -
|
| - return h;
|
| - }
|
| -
|
| // =================================================================
|
| // Input stream
|
|
|
| /**
|
| * Creates an {@code InputStream} which can be used to read the bytes.
|
| + * <p>
|
| + * The {@link InputStream} returned by this method is guaranteed to be
|
| + * completely non-blocking. The method {@link InputStream#available()}
|
| + * returns the number of bytes remaining in the stream. The methods
|
| + * {@link InputStream#read(byte[]), {@link InputStream#read(byte[],int,int)}
|
| + * and {@link InputStream#skip(long)} will read/skip as many bytes as are
|
| + * available.
|
| + * <p>
|
| + * The methods in the returned {@link InputStream} might <b>not</b> be
|
| + * thread safe.
|
| + *
|
| + * @return an input stream that returns the bytes of this byte string.
|
| */
|
| - public InputStream newInput() {
|
| - return new ByteArrayInputStream(bytes);
|
| - }
|
| + public abstract InputStream newInput();
|
|
|
| /**
|
| * Creates a {@link CodedInputStream} which can be used to read the bytes.
|
| - * Using this is more efficient than creating a {@link CodedInputStream}
|
| - * wrapping the result of {@link #newInput()}.
|
| + * Using this is often more efficient than creating a {@link CodedInputStream}
|
| + * that wraps the result of {@link #newInput()}.
|
| + *
|
| + * @return stream based on wrapped data
|
| */
|
| - public CodedInputStream newCodedInput() {
|
| - // We trust CodedInputStream not to modify the bytes, or to give anyone
|
| - // else access to them.
|
| - return CodedInputStream.newInstance(bytes);
|
| - }
|
| + public abstract CodedInputStream newCodedInput();
|
|
|
| // =================================================================
|
| // Output stream
|
|
|
| /**
|
| - * Creates a new {@link Output} with the given initial capacity.
|
| + * Creates a new {@link Output} with the given initial capacity. Call {@link
|
| + * Output#toByteString()} to create the {@code ByteString} instance.
|
| + * <p>
|
| + * A {@link ByteString.Output} offers the same functionality as a
|
| + * {@link ByteArrayOutputStream}, except that it returns a {@link ByteString}
|
| + * rather than a {@code byte} array.
|
| + *
|
| + * @param initialCapacity estimate of number of bytes to be written
|
| + * @return {@code OutputStream} for building a {@code ByteString}
|
| */
|
| - public static Output newOutput(final int initialCapacity) {
|
| - return new Output(new ByteArrayOutputStream(initialCapacity));
|
| + public static Output newOutput(int initialCapacity) {
|
| + return new Output(initialCapacity);
|
| }
|
|
|
| /**
|
| - * Creates a new {@link Output}.
|
| + * Creates a new {@link Output}. Call {@link Output#toByteString()} to create
|
| + * the {@code ByteString} instance.
|
| + * <p>
|
| + * A {@link ByteString.Output} offers the same functionality as a
|
| + * {@link ByteArrayOutputStream}, except that it returns a {@link ByteString}
|
| + * rather than a {@code byte array}.
|
| + *
|
| + * @return {@code OutputStream} for building a {@code ByteString}
|
| */
|
| public static Output newOutput() {
|
| - return newOutput(32);
|
| + return new Output(CONCATENATE_BY_COPY_SIZE);
|
| }
|
|
|
| /**
|
| * Outputs to a {@code ByteString} instance. Call {@link #toByteString()} to
|
| * create the {@code ByteString} instance.
|
| */
|
| - public static final class Output extends FilterOutputStream {
|
| - private final ByteArrayOutputStream bout;
|
| + public static final class Output extends OutputStream {
|
| + // Implementation note.
|
| + // The public methods of this class must be synchronized. ByteStrings
|
| + // are guaranteed to be immutable. Without some sort of locking, it could
|
| + // be possible for one thread to call toByteSring(), while another thread
|
| + // is still modifying the underlying byte array.
|
|
|
| + private static final byte[] EMPTY_BYTE_ARRAY = new byte[0];
|
| + // argument passed by user, indicating initial capacity.
|
| + private final int initialCapacity;
|
| + // ByteStrings to be concatenated to create the result
|
| + private final ArrayList<ByteString> flushedBuffers;
|
| + // Total number of bytes in the ByteStrings of flushedBuffers
|
| + private int flushedBuffersTotalBytes;
|
| + // Current buffer to which we are writing
|
| + private byte[] buffer;
|
| + // Location in buffer[] to which we write the next byte.
|
| + private int bufferPos;
|
| +
|
| /**
|
| - * Constructs a new output with the given initial capacity.
|
| + * Creates a new ByteString output stream with the specified
|
| + * initial capacity.
|
| + *
|
| + * @param initialCapacity the initial capacity of the output stream.
|
| */
|
| - private Output(final ByteArrayOutputStream bout) {
|
| - super(bout);
|
| - this.bout = bout;
|
| + Output(int initialCapacity) {
|
| + if (initialCapacity < 0) {
|
| + throw new IllegalArgumentException("Buffer size < 0");
|
| + }
|
| + this.initialCapacity = initialCapacity;
|
| + this.flushedBuffers = new ArrayList<ByteString>();
|
| + this.buffer = new byte[initialCapacity];
|
| }
|
|
|
| + @Override
|
| + public synchronized void write(int b) {
|
| + if (bufferPos == buffer.length) {
|
| + flushFullBuffer(1);
|
| + }
|
| + buffer[bufferPos++] = (byte)b;
|
| + }
|
| +
|
| + @Override
|
| + public synchronized void write(byte[] b, int offset, int length) {
|
| + if (length <= buffer.length - bufferPos) {
|
| + // The bytes can fit into the current buffer.
|
| + System.arraycopy(b, offset, buffer, bufferPos, length);
|
| + bufferPos += length;
|
| + } else {
|
| + // Use up the current buffer
|
| + int copySize = buffer.length - bufferPos;
|
| + System.arraycopy(b, offset, buffer, bufferPos, copySize);
|
| + offset += copySize;
|
| + length -= copySize;
|
| + // Flush the buffer, and get a new buffer at least big enough to cover
|
| + // what we still need to output
|
| + flushFullBuffer(length);
|
| + System.arraycopy(b, offset, buffer, 0 /* count */, length);
|
| + bufferPos = length;
|
| + }
|
| + }
|
| +
|
| /**
|
| - * Creates a {@code ByteString} instance from this {@code Output}.
|
| + * Creates a byte string. Its size is the current size of this output
|
| + * stream and its output has been copied to it.
|
| + *
|
| + * @return the current contents of this output stream, as a byte string.
|
| */
|
| - public ByteString toByteString() {
|
| - final byte[] byteArray = bout.toByteArray();
|
| - return new ByteString(byteArray);
|
| + public synchronized ByteString toByteString() {
|
| + flushLastBuffer();
|
| + return ByteString.copyFrom(flushedBuffers);
|
| }
|
| +
|
| + /**
|
| + * Writes the complete contents of this byte array output stream to
|
| + * the specified output stream argument.
|
| + *
|
| + * @param out the output stream to which to write the data.
|
| + * @throws IOException if an I/O error occurs.
|
| + */
|
| + public void writeTo(OutputStream out) throws IOException {
|
| + ByteString[] cachedFlushBuffers;
|
| + byte[] cachedBuffer;
|
| + int cachedBufferPos;
|
| + synchronized (this) {
|
| + // Copy the information we need into local variables so as to hold
|
| + // the lock for as short a time as possible.
|
| + cachedFlushBuffers =
|
| + flushedBuffers.toArray(new ByteString[flushedBuffers.size()]);
|
| + cachedBuffer = buffer;
|
| + cachedBufferPos = bufferPos;
|
| + }
|
| + for (ByteString byteString : cachedFlushBuffers) {
|
| + byteString.writeTo(out);
|
| + }
|
| +
|
| + out.write(Arrays.copyOf(cachedBuffer, cachedBufferPos));
|
| + }
|
| +
|
| + /**
|
| + * Returns the current size of the output stream.
|
| + *
|
| + * @return the current size of the output stream
|
| + */
|
| + public synchronized int size() {
|
| + return flushedBuffersTotalBytes + bufferPos;
|
| + }
|
| +
|
| + /**
|
| + * Resets this stream, so that all currently accumulated output in the
|
| + * output stream is discarded. The output stream can be used again,
|
| + * reusing the already allocated buffer space.
|
| + */
|
| + public synchronized void reset() {
|
| + flushedBuffers.clear();
|
| + flushedBuffersTotalBytes = 0;
|
| + bufferPos = 0;
|
| + }
|
| +
|
| + @Override
|
| + public String toString() {
|
| + return String.format("<ByteString.Output@%s size=%d>",
|
| + Integer.toHexString(System.identityHashCode(this)), size());
|
| + }
|
| +
|
| + /**
|
| + * Internal function used by writers. The current buffer is full, and the
|
| + * writer needs a new buffer whose size is at least the specified minimum
|
| + * size.
|
| + */
|
| + private void flushFullBuffer(int minSize) {
|
| + flushedBuffers.add(new LiteralByteString(buffer));
|
| + flushedBuffersTotalBytes += buffer.length;
|
| + // We want to increase our total capacity by 50%, but as a minimum,
|
| + // the new buffer should also at least be >= minSize and
|
| + // >= initial Capacity.
|
| + int newSize = Math.max(initialCapacity,
|
| + Math.max(minSize, flushedBuffersTotalBytes >>> 1));
|
| + buffer = new byte[newSize];
|
| + bufferPos = 0;
|
| + }
|
| +
|
| + /**
|
| + * Internal function used by {@link #toByteString()}. The current buffer may
|
| + * or may not be full, but it needs to be flushed.
|
| + */
|
| + private void flushLastBuffer() {
|
| + if (bufferPos < buffer.length) {
|
| + if (bufferPos > 0) {
|
| + byte[] bufferCopy = Arrays.copyOf(buffer, bufferPos);
|
| + flushedBuffers.add(new LiteralByteString(bufferCopy));
|
| + }
|
| + // We reuse this buffer for further writes.
|
| + } else {
|
| + // Buffer is completely full. Huzzah.
|
| + flushedBuffers.add(new LiteralByteString(buffer));
|
| + // 99% of the time, we're not going to use this OutputStream again.
|
| + // We set buffer to an empty byte stream so that we're handling this
|
| + // case without wasting space. In the rare case that more writes
|
| + // *do* occur, this empty buffer will be flushed and an appropriately
|
| + // sized new buffer will be created.
|
| + buffer = EMPTY_BYTE_ARRAY;
|
| + }
|
| + flushedBuffersTotalBytes += bufferPos;
|
| + bufferPos = 0;
|
| + }
|
| }
|
|
|
| /**
|
| - * Constructs a new ByteString builder, which allows you to efficiently
|
| - * construct a {@code ByteString} by writing to a {@link CodedOutputStream}.
|
| - * Using this is much more efficient than calling {@code newOutput()} and
|
| - * wrapping that in a {@code CodedOutputStream}.
|
| + * Constructs a new {@code ByteString} builder, which allows you to
|
| + * efficiently construct a {@code ByteString} by writing to a {@link
|
| + * CodedOutputStream}. Using this is much more efficient than calling {@code
|
| + * newOutput()} and wrapping that in a {@code CodedOutputStream}.
|
| *
|
| * <p>This is package-private because it's a somewhat confusing interface.
|
| * Users can call {@link Message#toByteString()} instead of calling this
|
| * directly.
|
| *
|
| - * @param size The target byte size of the {@code ByteString}. You must
|
| - * write exactly this many bytes before building the result.
|
| + * @param size The target byte size of the {@code ByteString}. You must write
|
| + * exactly this many bytes before building the result.
|
| + * @return the builder
|
| */
|
| - static CodedBuilder newCodedBuilder(final int size) {
|
| + static CodedBuilder newCodedBuilder(int size) {
|
| return new CodedBuilder(size);
|
| }
|
|
|
| @@ -382,7 +895,7 @@
|
| private final CodedOutputStream output;
|
| private final byte[] buffer;
|
|
|
| - private CodedBuilder(final int size) {
|
| + private CodedBuilder(int size) {
|
| buffer = new byte[size];
|
| output = CodedOutputStream.newInstance(buffer);
|
| }
|
| @@ -393,11 +906,57 @@
|
| // We can be confident that the CodedOutputStream will not modify the
|
| // underlying bytes anymore because it already wrote all of them. So,
|
| // no need to make a copy.
|
| - return new ByteString(buffer);
|
| + return new LiteralByteString(buffer);
|
| }
|
|
|
| public CodedOutputStream getCodedOutput() {
|
| return output;
|
| }
|
| }
|
| +
|
| + // =================================================================
|
| + // Methods {@link RopeByteString} needs on instances, which aren't part of the
|
| + // public API.
|
| +
|
| + /**
|
| + * Return the depth of the tree representing this {@code ByteString}, if any,
|
| + * whose root is this node. If this is a leaf node, return 0.
|
| + *
|
| + * @return tree depth or zero
|
| + */
|
| + protected abstract int getTreeDepth();
|
| +
|
| + /**
|
| + * Return {@code true} if this ByteString is literal (a leaf node) or a
|
| + * flat-enough tree in the sense of {@link RopeByteString}.
|
| + *
|
| + * @return true if the tree is flat enough
|
| + */
|
| + protected abstract boolean isBalanced();
|
| +
|
| + /**
|
| + * Return the cached hash code if available.
|
| + *
|
| + * @return value of cached hash code or 0 if not computed yet
|
| + */
|
| + protected abstract int peekCachedHashCode();
|
| +
|
| + /**
|
| + * Compute the hash across the value bytes starting with the given hash, and
|
| + * return the result. This is used to compute the hash across strings
|
| + * represented as a set of pieces by allowing the hash computation to be
|
| + * continued from piece to piece.
|
| + *
|
| + * @param h starting hash value
|
| + * @param offset offset into this value to start looking at data values
|
| + * @param length number of data values to include in the hash computation
|
| + * @return ending hash value
|
| + */
|
| + protected abstract int partialHash(int h, int offset, int length);
|
| +
|
| + @Override
|
| + public String toString() {
|
| + return String.format("<ByteString@%s size=%d>",
|
| + Integer.toHexString(System.identityHashCode(this)), size());
|
| + }
|
| }
|
|
|