Index: third_party/protobuf/java/src/main/java/com/google/protobuf/ByteString.java |
=================================================================== |
--- third_party/protobuf/java/src/main/java/com/google/protobuf/ByteString.java (revision 216642) |
+++ third_party/protobuf/java/src/main/java/com/google/protobuf/ByteString.java (working copy) |
@@ -30,140 +30,413 @@ |
package com.google.protobuf; |
+import java.io.ByteArrayOutputStream; |
+import java.io.IOException; |
import java.io.InputStream; |
-import java.io.ByteArrayInputStream; |
-import java.io.ByteArrayOutputStream; |
-import java.io.FilterOutputStream; |
+import java.io.OutputStream; |
import java.io.UnsupportedEncodingException; |
import java.nio.ByteBuffer; |
+import java.util.ArrayList; |
+import java.util.Arrays; |
+import java.util.Collection; |
+import java.util.Iterator; |
import java.util.List; |
+import java.util.NoSuchElementException; |
/** |
- * Immutable array of bytes. |
+ * Immutable sequence of bytes. Substring is supported by sharing the reference |
+ * to the immutable underlying bytes, as with {@link String}. Concatenation is |
+ * likewise supported without copying (long strings) by building a tree of |
+ * pieces in {@link RopeByteString}. |
+ * <p> |
+ * Like {@link String}, the contents of a {@link ByteString} can never be |
+ * observed to change, not even in the presence of a data race or incorrect |
+ * API usage in the client code. |
* |
* @author crazybob@google.com Bob Lee |
* @author kenton@google.com Kenton Varda |
+ * @author carlanton@google.com Carl Haverl |
+ * @author martinrb@google.com Martin Buchholz |
*/ |
-public final class ByteString { |
- private final byte[] bytes; |
+public abstract class ByteString implements Iterable<Byte> { |
- private ByteString(final byte[] bytes) { |
- this.bytes = bytes; |
- } |
+ /** |
+ * When two strings to be concatenated have a combined length shorter than |
+ * this, we just copy their bytes on {@link #concat(ByteString)}. |
+ * The trade-off is copy size versus the overhead of creating tree nodes |
+ * in {@link RopeByteString}. |
+ */ |
+ static final int CONCATENATE_BY_COPY_SIZE = 128; |
/** |
- * Gets the byte at the given index. |
+ * When copying an InputStream into a ByteString with .readFrom(), |
+ * the chunks in the underlying rope start at 256 bytes, but double |
+ * each iteration up to 8192 bytes. |
+ */ |
+ static final int MIN_READ_FROM_CHUNK_SIZE = 0x100; // 256b |
+ static final int MAX_READ_FROM_CHUNK_SIZE = 0x2000; // 8k |
+ |
+ /** |
+ * Empty {@code ByteString}. |
+ */ |
+ public static final ByteString EMPTY = new LiteralByteString(new byte[0]); |
+ |
+ // This constructor is here to prevent subclassing outside of this package, |
+ ByteString() {} |
+ |
+ /** |
+ * Gets the byte at the given index. This method should be used only for |
+ * random access to individual bytes. To access bytes sequentially, use the |
+ * {@link ByteIterator} returned by {@link #iterator()}, and call {@link |
+ * #substring(int, int)} first if necessary. |
* |
+ * @param index index of byte |
+ * @return the value |
* @throws ArrayIndexOutOfBoundsException {@code index} is < 0 or >= size |
*/ |
- public byte byteAt(final int index) { |
- return bytes[index]; |
+ public abstract byte byteAt(int index); |
+ |
+ /** |
+ * Return a {@link ByteString.ByteIterator} over the bytes in the ByteString. |
+ * To avoid auto-boxing, you may get the iterator manually and call |
+ * {@link ByteIterator#nextByte()}. |
+ * |
+ * @return the iterator |
+ */ |
+ public abstract ByteIterator iterator(); |
+ |
+ /** |
+ * This interface extends {@code Iterator<Byte>}, so that we can return an |
+ * unboxed {@code byte}. |
+ */ |
+ public interface ByteIterator extends Iterator<Byte> { |
+ /** |
+ * An alternative to {@link Iterator#next()} that returns an |
+ * unboxed primitive {@code byte}. |
+ * |
+ * @return the next {@code byte} in the iteration |
+ * @throws NoSuchElementException if the iteration has no more elements |
+ */ |
+ byte nextByte(); |
} |
/** |
* Gets the number of bytes. |
+ * |
+ * @return size in bytes |
*/ |
- public int size() { |
- return bytes.length; |
- } |
+ public abstract int size(); |
/** |
* Returns {@code true} if the size is {@code 0}, {@code false} otherwise. |
+ * |
+ * @return true if this is zero bytes long |
*/ |
public boolean isEmpty() { |
- return bytes.length == 0; |
+ return size() == 0; |
} |
// ================================================================= |
- // byte[] -> ByteString |
+ // ByteString -> substring |
/** |
- * Empty ByteString. |
+ * Return the substring from {@code beginIndex}, inclusive, to the end of the |
+ * string. |
+ * |
+ * @param beginIndex start at this index |
+ * @return substring sharing underlying data |
+ * @throws IndexOutOfBoundsException if {@code beginIndex < 0} or |
+ * {@code beginIndex > size()}. |
*/ |
- public static final ByteString EMPTY = new ByteString(new byte[0]); |
+ public ByteString substring(int beginIndex) { |
+ return substring(beginIndex, size()); |
+ } |
/** |
+ * Return the substring from {@code beginIndex}, inclusive, to {@code |
+ * endIndex}, exclusive. |
+ * |
+ * @param beginIndex start at this index |
+ * @param endIndex the last character is the one before this index |
+ * @return substring sharing underlying data |
+ * @throws IndexOutOfBoundsException if {@code beginIndex < 0}, |
+ * {@code endIndex > size()}, or {@code beginIndex > endIndex}. |
+ */ |
+ public abstract ByteString substring(int beginIndex, int endIndex); |
+ |
+ /** |
+ * Tests if this bytestring starts with the specified prefix. |
+ * Similar to {@link String#startsWith(String)} |
+ * |
+ * @param prefix the prefix. |
+ * @return <code>true</code> if the byte sequence represented by the |
+ * argument is a prefix of the byte sequence represented by |
+ * this string; <code>false</code> otherwise. |
+ */ |
+ public boolean startsWith(ByteString prefix) { |
+ return size() >= prefix.size() && |
+ substring(0, prefix.size()).equals(prefix); |
+ } |
+ |
+ // ================================================================= |
+ // byte[] -> ByteString |
+ |
+ /** |
* Copies the given bytes into a {@code ByteString}. |
+ * |
+ * @param bytes source array |
+ * @param offset offset in source array |
+ * @param size number of bytes to copy |
+ * @return new {@code ByteString} |
*/ |
- public static ByteString copyFrom(final byte[] bytes, final int offset, |
- final int size) { |
- final byte[] copy = new byte[size]; |
+ public static ByteString copyFrom(byte[] bytes, int offset, int size) { |
+ byte[] copy = new byte[size]; |
System.arraycopy(bytes, offset, copy, 0, size); |
- return new ByteString(copy); |
+ return new LiteralByteString(copy); |
} |
/** |
* Copies the given bytes into a {@code ByteString}. |
+ * |
+ * @param bytes to copy |
+ * @return new {@code ByteString} |
*/ |
- public static ByteString copyFrom(final byte[] bytes) { |
+ public static ByteString copyFrom(byte[] bytes) { |
return copyFrom(bytes, 0, bytes.length); |
} |
/** |
- * Copies {@code size} bytes from a {@code java.nio.ByteBuffer} into |
+ * Copies the next {@code size} bytes from a {@code java.nio.ByteBuffer} into |
* a {@code ByteString}. |
+ * |
+ * @param bytes source buffer |
+ * @param size number of bytes to copy |
+ * @return new {@code ByteString} |
*/ |
- public static ByteString copyFrom(final ByteBuffer bytes, final int size) { |
- final byte[] copy = new byte[size]; |
+ public static ByteString copyFrom(ByteBuffer bytes, int size) { |
+ byte[] copy = new byte[size]; |
bytes.get(copy); |
- return new ByteString(copy); |
+ return new LiteralByteString(copy); |
} |
/** |
* Copies the remaining bytes from a {@code java.nio.ByteBuffer} into |
* a {@code ByteString}. |
+ * |
+ * @param bytes sourceBuffer |
+ * @return new {@code ByteString} |
*/ |
- public static ByteString copyFrom(final ByteBuffer bytes) { |
+ public static ByteString copyFrom(ByteBuffer bytes) { |
return copyFrom(bytes, bytes.remaining()); |
} |
/** |
* Encodes {@code text} into a sequence of bytes using the named charset |
* and returns the result as a {@code ByteString}. |
+ * |
+ * @param text source string |
+ * @param charsetName encoding to use |
+ * @return new {@code ByteString} |
+ * @throws UnsupportedEncodingException if the encoding isn't found |
*/ |
- public static ByteString copyFrom(final String text, final String charsetName) |
+ public static ByteString copyFrom(String text, String charsetName) |
throws UnsupportedEncodingException { |
- return new ByteString(text.getBytes(charsetName)); |
+ return new LiteralByteString(text.getBytes(charsetName)); |
} |
/** |
* Encodes {@code text} into a sequence of UTF-8 bytes and returns the |
* result as a {@code ByteString}. |
+ * |
+ * @param text source string |
+ * @return new {@code ByteString} |
*/ |
- public static ByteString copyFromUtf8(final String text) { |
+ public static ByteString copyFromUtf8(String text) { |
try { |
- return new ByteString(text.getBytes("UTF-8")); |
+ return new LiteralByteString(text.getBytes("UTF-8")); |
} catch (UnsupportedEncodingException e) { |
throw new RuntimeException("UTF-8 not supported?", e); |
} |
} |
+ // ================================================================= |
+ // InputStream -> ByteString |
+ |
/** |
- * Concatenates all byte strings in the list and returns the result. |
+ * Completely reads the given stream's bytes into a |
+ * {@code ByteString}, blocking if necessary until all bytes are |
+ * read through to the end of the stream. |
* |
+ * <b>Performance notes:</b> The returned {@code ByteString} is an |
+ * immutable tree of byte arrays ("chunks") of the stream data. The |
+ * first chunk is small, with subsequent chunks each being double |
+ * the size, up to 8K. If the caller knows the precise length of |
+ * the stream and wishes to avoid all unnecessary copies and |
+ * allocations, consider using the two-argument version of this |
+ * method, below. |
+ * |
+ * @param streamToDrain The source stream, which is read completely |
+ * but not closed. |
+ * @return A new {@code ByteString} which is made up of chunks of |
+ * various sizes, depending on the behavior of the underlying |
+ * stream. |
+ * @throws IOException IOException is thrown if there is a problem |
+ * reading the underlying stream. |
+ */ |
+ public static ByteString readFrom(InputStream streamToDrain) |
+ throws IOException { |
+ return readFrom( |
+ streamToDrain, MIN_READ_FROM_CHUNK_SIZE, MAX_READ_FROM_CHUNK_SIZE); |
+ } |
+ |
+ /** |
+ * Completely reads the given stream's bytes into a |
+ * {@code ByteString}, blocking if necessary until all bytes are |
+ * read through to the end of the stream. |
+ * |
+ * <b>Performance notes:</b> The returned {@code ByteString} is an |
+ * immutable tree of byte arrays ("chunks") of the stream data. The |
+ * chunkSize parameter sets the size of these byte arrays. In |
+ * particular, if the chunkSize is precisely the same as the length |
+ * of the stream, unnecessary allocations and copies will be |
+ * avoided. Otherwise, the chunks will be of the given size, except |
+ * for the last chunk, which will be resized (via a reallocation and |
+ * copy) to contain the remainder of the stream. |
+ * |
+ * @param streamToDrain The source stream, which is read completely |
+ * but not closed. |
+ * @param chunkSize The size of the chunks in which to read the |
+ * stream. |
+ * @return A new {@code ByteString} which is made up of chunks of |
+ * the given size. |
+ * @throws IOException IOException is thrown if there is a problem |
+ * reading the underlying stream. |
+ */ |
+ public static ByteString readFrom(InputStream streamToDrain, int chunkSize) |
+ throws IOException { |
+ return readFrom(streamToDrain, chunkSize, chunkSize); |
+ } |
+ |
+ // Helper method that takes the chunk size range as a parameter. |
+ public static ByteString readFrom(InputStream streamToDrain, int minChunkSize, |
+ int maxChunkSize) throws IOException { |
+ Collection<ByteString> results = new ArrayList<ByteString>(); |
+ |
+ // copy the inbound bytes into a list of chunks; the chunk size |
+ // grows exponentially to support both short and long streams. |
+ int chunkSize = minChunkSize; |
+ while (true) { |
+ ByteString chunk = readChunk(streamToDrain, chunkSize); |
+ if (chunk == null) { |
+ break; |
+ } |
+ results.add(chunk); |
+ chunkSize = Math.min(chunkSize * 2, maxChunkSize); |
+ } |
+ |
+ return ByteString.copyFrom(results); |
+ } |
+ |
+ /** |
+ * Blocks until a chunk of the given size can be made from the |
+ * stream, or EOF is reached. Calls read() repeatedly in case the |
+ * given stream implementation doesn't completely fill the given |
+ * buffer in one read() call. |
+ * |
+ * @return A chunk of the desired size, or else a chunk as large as |
+ * was available when end of stream was reached. Returns null if the |
+ * given stream had no more data in it. |
+ */ |
+ private static ByteString readChunk(InputStream in, final int chunkSize) |
+ throws IOException { |
+ final byte[] buf = new byte[chunkSize]; |
+ int bytesRead = 0; |
+ while (bytesRead < chunkSize) { |
+ final int count = in.read(buf, bytesRead, chunkSize - bytesRead); |
+ if (count == -1) { |
+ break; |
+ } |
+ bytesRead += count; |
+ } |
+ |
+ if (bytesRead == 0) { |
+ return null; |
+ } else { |
+ return ByteString.copyFrom(buf, 0, bytesRead); |
+ } |
+ } |
+ |
+ // ================================================================= |
+ // Multiple ByteStrings -> One ByteString |
+ |
+ /** |
+ * Concatenate the given {@code ByteString} to this one. Short concatenations, |
+ * of total size smaller than {@link ByteString#CONCATENATE_BY_COPY_SIZE}, are |
+ * produced by copying the underlying bytes (as per Rope.java, <a |
+ * href="http://www.cs.ubc.ca/local/reading/proceedings/spe91-95/spe/vol25/issue12/spe986.pdf"> |
+ * BAP95 </a>. In general, the concatenate involves no copying. |
+ * |
+ * @param other string to concatenate |
+ * @return a new {@code ByteString} instance |
+ */ |
+ public ByteString concat(ByteString other) { |
+ int thisSize = size(); |
+ int otherSize = other.size(); |
+ if ((long) thisSize + otherSize >= Integer.MAX_VALUE) { |
+ throw new IllegalArgumentException("ByteString would be too long: " + |
+ thisSize + "+" + otherSize); |
+ } |
+ |
+ return RopeByteString.concatenate(this, other); |
+ } |
+ |
+ /** |
+ * Concatenates all byte strings in the iterable and returns the result. |
+ * This is designed to run in O(list size), not O(total bytes). |
+ * |
* <p>The returned {@code ByteString} is not necessarily a unique object. |
* If the list is empty, the returned object is the singleton empty |
* {@code ByteString}. If the list has only one element, that |
* {@code ByteString} will be returned without copying. |
+ * |
+ * @param byteStrings strings to be concatenated |
+ * @return new {@code ByteString} |
*/ |
- public static ByteString copyFrom(List<ByteString> list) { |
- if (list.size() == 0) { |
- return EMPTY; |
- } else if (list.size() == 1) { |
- return list.get(0); |
+ public static ByteString copyFrom(Iterable<ByteString> byteStrings) { |
+ Collection<ByteString> collection; |
+ if (!(byteStrings instanceof Collection)) { |
+ collection = new ArrayList<ByteString>(); |
+ for (ByteString byteString : byteStrings) { |
+ collection.add(byteString); |
+ } |
+ } else { |
+ collection = (Collection<ByteString>) byteStrings; |
} |
+ ByteString result; |
+ if (collection.isEmpty()) { |
+ result = EMPTY; |
+ } else { |
+ result = balancedConcat(collection.iterator(), collection.size()); |
+ } |
+ return result; |
+ } |
- int size = 0; |
- for (ByteString str : list) { |
- size += str.size(); |
+ // Internal function used by copyFrom(Iterable<ByteString>). |
+ // Create a balanced concatenation of the next "length" elements from the |
+ // iterable. |
+ private static ByteString balancedConcat(Iterator<ByteString> iterator, |
+ int length) { |
+ assert length >= 1; |
+ ByteString result; |
+ if (length == 1) { |
+ result = iterator.next(); |
+ } else { |
+ int halfLength = length >>> 1; |
+ ByteString left = balancedConcat(iterator, halfLength); |
+ ByteString right = balancedConcat(iterator, length - halfLength); |
+ result = left.concat(right); |
} |
- byte[] bytes = new byte[size]; |
- int pos = 0; |
- for (ByteString str : list) { |
- System.arraycopy(str.bytes, 0, bytes, pos, str.size()); |
- pos += str.size(); |
- } |
- return new ByteString(bytes); |
+ return result; |
} |
// ================================================================= |
@@ -174,206 +447,446 @@ |
* |
* @param target buffer to copy into |
* @param offset in the target buffer |
+ * @throws IndexOutOfBoundsException if the offset is negative or too large |
*/ |
- public void copyTo(final byte[] target, final int offset) { |
- System.arraycopy(bytes, 0, target, offset, bytes.length); |
+ public void copyTo(byte[] target, int offset) { |
+ copyTo(target, 0, offset, size()); |
} |
/** |
* Copies bytes into a buffer. |
* |
- * @param target buffer to copy into |
+ * @param target buffer to copy into |
* @param sourceOffset offset within these bytes |
* @param targetOffset offset within the target buffer |
- * @param size number of bytes to copy |
+ * @param numberToCopy number of bytes to copy |
+ * @throws IndexOutOfBoundsException if an offset or size is negative or too |
+ * large |
*/ |
- public void copyTo(final byte[] target, final int sourceOffset, |
- final int targetOffset, |
- final int size) { |
- System.arraycopy(bytes, sourceOffset, target, targetOffset, size); |
+ public void copyTo(byte[] target, int sourceOffset, int targetOffset, |
+ int numberToCopy) { |
+ if (sourceOffset < 0) { |
+ throw new IndexOutOfBoundsException("Source offset < 0: " + sourceOffset); |
+ } |
+ if (targetOffset < 0) { |
+ throw new IndexOutOfBoundsException("Target offset < 0: " + targetOffset); |
+ } |
+ if (numberToCopy < 0) { |
+ throw new IndexOutOfBoundsException("Length < 0: " + numberToCopy); |
+ } |
+ if (sourceOffset + numberToCopy > size()) { |
+ throw new IndexOutOfBoundsException( |
+ "Source end offset < 0: " + (sourceOffset + numberToCopy)); |
+ } |
+ if (targetOffset + numberToCopy > target.length) { |
+ throw new IndexOutOfBoundsException( |
+ "Target end offset < 0: " + (targetOffset + numberToCopy)); |
+ } |
+ if (numberToCopy > 0) { |
+ copyToInternal(target, sourceOffset, targetOffset, numberToCopy); |
+ } |
} |
/** |
+ * Internal (package private) implementation of |
+ * @link{#copyTo(byte[],int,int,int}. |
+ * It assumes that all error checking has already been performed and that |
+ * @code{numberToCopy > 0}. |
+ */ |
+ protected abstract void copyToInternal(byte[] target, int sourceOffset, |
+ int targetOffset, int numberToCopy); |
+ |
+ /** |
* Copies bytes into a ByteBuffer. |
* |
* @param target ByteBuffer to copy into. |
- * @throws ReadOnlyBufferException if the {@code target} is read-only |
- * @throws BufferOverflowException if the {@code target}'s remaining() |
- * space is not large enough to hold the data. |
+ * @throws java.nio.ReadOnlyBufferException if the {@code target} is read-only |
+ * @throws java.nio.BufferOverflowException if the {@code target}'s |
+ * remaining() space is not large enough to hold the data. |
*/ |
- public void copyTo(ByteBuffer target) { |
- target.put(bytes, 0, bytes.length); |
- } |
+ public abstract void copyTo(ByteBuffer target); |
/** |
* Copies bytes to a {@code byte[]}. |
+ * |
+ * @return copied bytes |
*/ |
public byte[] toByteArray() { |
- final int size = bytes.length; |
- final byte[] copy = new byte[size]; |
- System.arraycopy(bytes, 0, copy, 0, size); |
- return copy; |
+ int size = size(); |
+ byte[] result = new byte[size]; |
+ copyToInternal(result, 0, 0, size); |
+ return result; |
} |
/** |
- * Constructs a new read-only {@code java.nio.ByteBuffer} with the |
- * same backing byte array. |
+ * Writes the complete contents of this byte string to |
+ * the specified output stream argument. |
+ * |
+ * @param out the output stream to which to write the data. |
+ * @throws IOException if an I/O error occurs. |
*/ |
- public ByteBuffer asReadOnlyByteBuffer() { |
- final ByteBuffer byteBuffer = ByteBuffer.wrap(bytes); |
- return byteBuffer.asReadOnlyBuffer(); |
- } |
+ public abstract void writeTo(OutputStream out) throws IOException; |
/** |
+ * Constructs a read-only {@code java.nio.ByteBuffer} whose content |
+ * is equal to the contents of this byte string. |
+ * The result uses the same backing array as the byte string, if possible. |
+ * |
+ * @return wrapped bytes |
+ */ |
+ public abstract ByteBuffer asReadOnlyByteBuffer(); |
+ |
+ /** |
+ * Constructs a list of read-only {@code java.nio.ByteBuffer} objects |
+ * such that the concatenation of their contents is equal to the contents |
+ * of this byte string. The result uses the same backing arrays as the |
+ * byte string. |
+ * <p> |
+ * By returning a list, implementations of this method may be able to avoid |
+ * copying even when there are multiple backing arrays. |
+ * |
+ * @return a list of wrapped bytes |
+ */ |
+ public abstract List<ByteBuffer> asReadOnlyByteBufferList(); |
+ |
+ /** |
* Constructs a new {@code String} by decoding the bytes using the |
* specified charset. |
+ * |
+ * @param charsetName encode using this charset |
+ * @return new string |
+ * @throws UnsupportedEncodingException if charset isn't recognized |
*/ |
- public String toString(final String charsetName) |
- throws UnsupportedEncodingException { |
- return new String(bytes, charsetName); |
- } |
+ public abstract String toString(String charsetName) |
+ throws UnsupportedEncodingException; |
+ // ================================================================= |
+ // UTF-8 decoding |
+ |
/** |
* Constructs a new {@code String} by decoding the bytes as UTF-8. |
+ * |
+ * @return new string using UTF-8 encoding |
*/ |
public String toStringUtf8() { |
try { |
- return new String(bytes, "UTF-8"); |
+ return toString("UTF-8"); |
} catch (UnsupportedEncodingException e) { |
throw new RuntimeException("UTF-8 not supported?", e); |
} |
} |
+ /** |
+ * Tells whether this {@code ByteString} represents a well-formed UTF-8 |
+ * byte sequence, such that the original bytes can be converted to a |
+ * String object and then round tripped back to bytes without loss. |
+ * |
+ * <p>More precisely, returns {@code true} whenever: <pre> {@code |
+ * Arrays.equals(byteString.toByteArray(), |
+ * new String(byteString.toByteArray(), "UTF-8").getBytes("UTF-8")) |
+ * }</pre> |
+ * |
+ * <p>This method returns {@code false} for "overlong" byte sequences, |
+ * as well as for 3-byte sequences that would map to a surrogate |
+ * character, in accordance with the restricted definition of UTF-8 |
+ * introduced in Unicode 3.1. Note that the UTF-8 decoder included in |
+ * Oracle's JDK has been modified to also reject "overlong" byte |
+ * sequences, but (as of 2011) still accepts 3-byte surrogate |
+ * character byte sequences. |
+ * |
+ * <p>See the Unicode Standard,</br> |
+ * Table 3-6. <em>UTF-8 Bit Distribution</em>,</br> |
+ * Table 3-7. <em>Well Formed UTF-8 Byte Sequences</em>. |
+ * |
+ * @return whether the bytes in this {@code ByteString} are a |
+ * well-formed UTF-8 byte sequence |
+ */ |
+ public abstract boolean isValidUtf8(); |
+ |
+ /** |
+ * Tells whether the given byte sequence is a well-formed, malformed, or |
+ * incomplete UTF-8 byte sequence. This method accepts and returns a partial |
+ * state result, allowing the bytes for a complete UTF-8 byte sequence to be |
+ * composed from multiple {@code ByteString} segments. |
+ * |
+ * @param state either {@code 0} (if this is the initial decoding operation) |
+ * or the value returned from a call to a partial decoding method for the |
+ * previous bytes |
+ * @param offset offset of the first byte to check |
+ * @param length number of bytes to check |
+ * |
+ * @return {@code -1} if the partial byte sequence is definitely malformed, |
+ * {@code 0} if it is well-formed (no additional input needed), or, if the |
+ * byte sequence is "incomplete", i.e. apparently terminated in the middle of |
+ * a character, an opaque integer "state" value containing enough information |
+ * to decode the character when passed to a subsequent invocation of a |
+ * partial decoding method. |
+ */ |
+ protected abstract int partialIsValidUtf8(int state, int offset, int length); |
+ |
// ================================================================= |
// equals() and hashCode() |
@Override |
- public boolean equals(final Object o) { |
- if (o == this) { |
- return true; |
- } |
+ public abstract boolean equals(Object o); |
- if (!(o instanceof ByteString)) { |
- return false; |
- } |
- |
- final ByteString other = (ByteString) o; |
- final int size = bytes.length; |
- if (size != other.bytes.length) { |
- return false; |
- } |
- |
- final byte[] thisBytes = bytes; |
- final byte[] otherBytes = other.bytes; |
- for (int i = 0; i < size; i++) { |
- if (thisBytes[i] != otherBytes[i]) { |
- return false; |
- } |
- } |
- |
- return true; |
- } |
- |
- private volatile int hash = 0; |
- |
+ /** |
+ * Return a non-zero hashCode depending only on the sequence of bytes |
+ * in this ByteString. |
+ * |
+ * @return hashCode value for this object |
+ */ |
@Override |
- public int hashCode() { |
- int h = hash; |
+ public abstract int hashCode(); |
- if (h == 0) { |
- final byte[] thisBytes = bytes; |
- final int size = bytes.length; |
- |
- h = size; |
- for (int i = 0; i < size; i++) { |
- h = h * 31 + thisBytes[i]; |
- } |
- if (h == 0) { |
- h = 1; |
- } |
- |
- hash = h; |
- } |
- |
- return h; |
- } |
- |
// ================================================================= |
// Input stream |
/** |
* Creates an {@code InputStream} which can be used to read the bytes. |
+ * <p> |
+ * The {@link InputStream} returned by this method is guaranteed to be |
+ * completely non-blocking. The method {@link InputStream#available()} |
+ * returns the number of bytes remaining in the stream. The methods |
+ * {@link InputStream#read(byte[]), {@link InputStream#read(byte[],int,int)} |
+ * and {@link InputStream#skip(long)} will read/skip as many bytes as are |
+ * available. |
+ * <p> |
+ * The methods in the returned {@link InputStream} might <b>not</b> be |
+ * thread safe. |
+ * |
+ * @return an input stream that returns the bytes of this byte string. |
*/ |
- public InputStream newInput() { |
- return new ByteArrayInputStream(bytes); |
- } |
+ public abstract InputStream newInput(); |
/** |
* Creates a {@link CodedInputStream} which can be used to read the bytes. |
- * Using this is more efficient than creating a {@link CodedInputStream} |
- * wrapping the result of {@link #newInput()}. |
+ * Using this is often more efficient than creating a {@link CodedInputStream} |
+ * that wraps the result of {@link #newInput()}. |
+ * |
+ * @return stream based on wrapped data |
*/ |
- public CodedInputStream newCodedInput() { |
- // We trust CodedInputStream not to modify the bytes, or to give anyone |
- // else access to them. |
- return CodedInputStream.newInstance(bytes); |
- } |
+ public abstract CodedInputStream newCodedInput(); |
// ================================================================= |
// Output stream |
/** |
- * Creates a new {@link Output} with the given initial capacity. |
+ * Creates a new {@link Output} with the given initial capacity. Call {@link |
+ * Output#toByteString()} to create the {@code ByteString} instance. |
+ * <p> |
+ * A {@link ByteString.Output} offers the same functionality as a |
+ * {@link ByteArrayOutputStream}, except that it returns a {@link ByteString} |
+ * rather than a {@code byte} array. |
+ * |
+ * @param initialCapacity estimate of number of bytes to be written |
+ * @return {@code OutputStream} for building a {@code ByteString} |
*/ |
- public static Output newOutput(final int initialCapacity) { |
- return new Output(new ByteArrayOutputStream(initialCapacity)); |
+ public static Output newOutput(int initialCapacity) { |
+ return new Output(initialCapacity); |
} |
/** |
- * Creates a new {@link Output}. |
+ * Creates a new {@link Output}. Call {@link Output#toByteString()} to create |
+ * the {@code ByteString} instance. |
+ * <p> |
+ * A {@link ByteString.Output} offers the same functionality as a |
+ * {@link ByteArrayOutputStream}, except that it returns a {@link ByteString} |
+ * rather than a {@code byte array}. |
+ * |
+ * @return {@code OutputStream} for building a {@code ByteString} |
*/ |
public static Output newOutput() { |
- return newOutput(32); |
+ return new Output(CONCATENATE_BY_COPY_SIZE); |
} |
/** |
* Outputs to a {@code ByteString} instance. Call {@link #toByteString()} to |
* create the {@code ByteString} instance. |
*/ |
- public static final class Output extends FilterOutputStream { |
- private final ByteArrayOutputStream bout; |
+ public static final class Output extends OutputStream { |
+ // Implementation note. |
+ // The public methods of this class must be synchronized. ByteStrings |
+ // are guaranteed to be immutable. Without some sort of locking, it could |
+ // be possible for one thread to call toByteSring(), while another thread |
+ // is still modifying the underlying byte array. |
+ private static final byte[] EMPTY_BYTE_ARRAY = new byte[0]; |
+ // argument passed by user, indicating initial capacity. |
+ private final int initialCapacity; |
+ // ByteStrings to be concatenated to create the result |
+ private final ArrayList<ByteString> flushedBuffers; |
+ // Total number of bytes in the ByteStrings of flushedBuffers |
+ private int flushedBuffersTotalBytes; |
+ // Current buffer to which we are writing |
+ private byte[] buffer; |
+ // Location in buffer[] to which we write the next byte. |
+ private int bufferPos; |
+ |
/** |
- * Constructs a new output with the given initial capacity. |
+ * Creates a new ByteString output stream with the specified |
+ * initial capacity. |
+ * |
+ * @param initialCapacity the initial capacity of the output stream. |
*/ |
- private Output(final ByteArrayOutputStream bout) { |
- super(bout); |
- this.bout = bout; |
+ Output(int initialCapacity) { |
+ if (initialCapacity < 0) { |
+ throw new IllegalArgumentException("Buffer size < 0"); |
+ } |
+ this.initialCapacity = initialCapacity; |
+ this.flushedBuffers = new ArrayList<ByteString>(); |
+ this.buffer = new byte[initialCapacity]; |
} |
+ @Override |
+ public synchronized void write(int b) { |
+ if (bufferPos == buffer.length) { |
+ flushFullBuffer(1); |
+ } |
+ buffer[bufferPos++] = (byte)b; |
+ } |
+ |
+ @Override |
+ public synchronized void write(byte[] b, int offset, int length) { |
+ if (length <= buffer.length - bufferPos) { |
+ // The bytes can fit into the current buffer. |
+ System.arraycopy(b, offset, buffer, bufferPos, length); |
+ bufferPos += length; |
+ } else { |
+ // Use up the current buffer |
+ int copySize = buffer.length - bufferPos; |
+ System.arraycopy(b, offset, buffer, bufferPos, copySize); |
+ offset += copySize; |
+ length -= copySize; |
+ // Flush the buffer, and get a new buffer at least big enough to cover |
+ // what we still need to output |
+ flushFullBuffer(length); |
+ System.arraycopy(b, offset, buffer, 0 /* count */, length); |
+ bufferPos = length; |
+ } |
+ } |
+ |
/** |
- * Creates a {@code ByteString} instance from this {@code Output}. |
+ * Creates a byte string. Its size is the current size of this output |
+ * stream and its output has been copied to it. |
+ * |
+ * @return the current contents of this output stream, as a byte string. |
*/ |
- public ByteString toByteString() { |
- final byte[] byteArray = bout.toByteArray(); |
- return new ByteString(byteArray); |
+ public synchronized ByteString toByteString() { |
+ flushLastBuffer(); |
+ return ByteString.copyFrom(flushedBuffers); |
} |
+ |
+ /** |
+ * Writes the complete contents of this byte array output stream to |
+ * the specified output stream argument. |
+ * |
+ * @param out the output stream to which to write the data. |
+ * @throws IOException if an I/O error occurs. |
+ */ |
+ public void writeTo(OutputStream out) throws IOException { |
+ ByteString[] cachedFlushBuffers; |
+ byte[] cachedBuffer; |
+ int cachedBufferPos; |
+ synchronized (this) { |
+ // Copy the information we need into local variables so as to hold |
+ // the lock for as short a time as possible. |
+ cachedFlushBuffers = |
+ flushedBuffers.toArray(new ByteString[flushedBuffers.size()]); |
+ cachedBuffer = buffer; |
+ cachedBufferPos = bufferPos; |
+ } |
+ for (ByteString byteString : cachedFlushBuffers) { |
+ byteString.writeTo(out); |
+ } |
+ |
+ out.write(Arrays.copyOf(cachedBuffer, cachedBufferPos)); |
+ } |
+ |
+ /** |
+ * Returns the current size of the output stream. |
+ * |
+ * @return the current size of the output stream |
+ */ |
+ public synchronized int size() { |
+ return flushedBuffersTotalBytes + bufferPos; |
+ } |
+ |
+ /** |
+ * Resets this stream, so that all currently accumulated output in the |
+ * output stream is discarded. The output stream can be used again, |
+ * reusing the already allocated buffer space. |
+ */ |
+ public synchronized void reset() { |
+ flushedBuffers.clear(); |
+ flushedBuffersTotalBytes = 0; |
+ bufferPos = 0; |
+ } |
+ |
+ @Override |
+ public String toString() { |
+ return String.format("<ByteString.Output@%s size=%d>", |
+ Integer.toHexString(System.identityHashCode(this)), size()); |
+ } |
+ |
+ /** |
+ * Internal function used by writers. The current buffer is full, and the |
+ * writer needs a new buffer whose size is at least the specified minimum |
+ * size. |
+ */ |
+ private void flushFullBuffer(int minSize) { |
+ flushedBuffers.add(new LiteralByteString(buffer)); |
+ flushedBuffersTotalBytes += buffer.length; |
+ // We want to increase our total capacity by 50%, but as a minimum, |
+ // the new buffer should also at least be >= minSize and |
+ // >= initial Capacity. |
+ int newSize = Math.max(initialCapacity, |
+ Math.max(minSize, flushedBuffersTotalBytes >>> 1)); |
+ buffer = new byte[newSize]; |
+ bufferPos = 0; |
+ } |
+ |
+ /** |
+ * Internal function used by {@link #toByteString()}. The current buffer may |
+ * or may not be full, but it needs to be flushed. |
+ */ |
+ private void flushLastBuffer() { |
+ if (bufferPos < buffer.length) { |
+ if (bufferPos > 0) { |
+ byte[] bufferCopy = Arrays.copyOf(buffer, bufferPos); |
+ flushedBuffers.add(new LiteralByteString(bufferCopy)); |
+ } |
+ // We reuse this buffer for further writes. |
+ } else { |
+ // Buffer is completely full. Huzzah. |
+ flushedBuffers.add(new LiteralByteString(buffer)); |
+ // 99% of the time, we're not going to use this OutputStream again. |
+ // We set buffer to an empty byte stream so that we're handling this |
+ // case without wasting space. In the rare case that more writes |
+ // *do* occur, this empty buffer will be flushed and an appropriately |
+ // sized new buffer will be created. |
+ buffer = EMPTY_BYTE_ARRAY; |
+ } |
+ flushedBuffersTotalBytes += bufferPos; |
+ bufferPos = 0; |
+ } |
} |
/** |
- * Constructs a new ByteString builder, which allows you to efficiently |
- * construct a {@code ByteString} by writing to a {@link CodedOutputStream}. |
- * Using this is much more efficient than calling {@code newOutput()} and |
- * wrapping that in a {@code CodedOutputStream}. |
+ * Constructs a new {@code ByteString} builder, which allows you to |
+ * efficiently construct a {@code ByteString} by writing to a {@link |
+ * CodedOutputStream}. Using this is much more efficient than calling {@code |
+ * newOutput()} and wrapping that in a {@code CodedOutputStream}. |
* |
* <p>This is package-private because it's a somewhat confusing interface. |
* Users can call {@link Message#toByteString()} instead of calling this |
* directly. |
* |
- * @param size The target byte size of the {@code ByteString}. You must |
- * write exactly this many bytes before building the result. |
+ * @param size The target byte size of the {@code ByteString}. You must write |
+ * exactly this many bytes before building the result. |
+ * @return the builder |
*/ |
- static CodedBuilder newCodedBuilder(final int size) { |
+ static CodedBuilder newCodedBuilder(int size) { |
return new CodedBuilder(size); |
} |
@@ -382,7 +895,7 @@ |
private final CodedOutputStream output; |
private final byte[] buffer; |
- private CodedBuilder(final int size) { |
+ private CodedBuilder(int size) { |
buffer = new byte[size]; |
output = CodedOutputStream.newInstance(buffer); |
} |
@@ -393,11 +906,57 @@ |
// We can be confident that the CodedOutputStream will not modify the |
// underlying bytes anymore because it already wrote all of them. So, |
// no need to make a copy. |
- return new ByteString(buffer); |
+ return new LiteralByteString(buffer); |
} |
public CodedOutputStream getCodedOutput() { |
return output; |
} |
} |
+ |
+ // ================================================================= |
+ // Methods {@link RopeByteString} needs on instances, which aren't part of the |
+ // public API. |
+ |
+ /** |
+ * Return the depth of the tree representing this {@code ByteString}, if any, |
+ * whose root is this node. If this is a leaf node, return 0. |
+ * |
+ * @return tree depth or zero |
+ */ |
+ protected abstract int getTreeDepth(); |
+ |
+ /** |
+ * Return {@code true} if this ByteString is literal (a leaf node) or a |
+ * flat-enough tree in the sense of {@link RopeByteString}. |
+ * |
+ * @return true if the tree is flat enough |
+ */ |
+ protected abstract boolean isBalanced(); |
+ |
+ /** |
+ * Return the cached hash code if available. |
+ * |
+ * @return value of cached hash code or 0 if not computed yet |
+ */ |
+ protected abstract int peekCachedHashCode(); |
+ |
+ /** |
+ * Compute the hash across the value bytes starting with the given hash, and |
+ * return the result. This is used to compute the hash across strings |
+ * represented as a set of pieces by allowing the hash computation to be |
+ * continued from piece to piece. |
+ * |
+ * @param h starting hash value |
+ * @param offset offset into this value to start looking at data values |
+ * @param length number of data values to include in the hash computation |
+ * @return ending hash value |
+ */ |
+ protected abstract int partialHash(int h, int offset, int length); |
+ |
+ @Override |
+ public String toString() { |
+ return String.format("<ByteString@%s size=%d>", |
+ Integer.toHexString(System.identityHashCode(this)), size()); |
+ } |
} |