| Index: third_party/protobuf/java/src/main/java/com/google/protobuf/Internal.java
|
| diff --git a/third_party/protobuf/java/src/main/java/com/google/protobuf/Internal.java b/third_party/protobuf/java/src/main/java/com/google/protobuf/Internal.java
|
| new file mode 100644
|
| index 0000000000000000000000000000000000000000..05eab57ad4e9ce31abbb276c9adfc925c805fede
|
| --- /dev/null
|
| +++ b/third_party/protobuf/java/src/main/java/com/google/protobuf/Internal.java
|
| @@ -0,0 +1,206 @@
|
| +// Protocol Buffers - Google's data interchange format
|
| +// Copyright 2008 Google Inc. All rights reserved.
|
| +// http://code.google.com/p/protobuf/
|
| +//
|
| +// Redistribution and use in source and binary forms, with or without
|
| +// modification, are permitted provided that the following conditions are
|
| +// met:
|
| +//
|
| +// * Redistributions of source code must retain the above copyright
|
| +// notice, this list of conditions and the following disclaimer.
|
| +// * Redistributions in binary form must reproduce the above
|
| +// copyright notice, this list of conditions and the following disclaimer
|
| +// in the documentation and/or other materials provided with the
|
| +// distribution.
|
| +// * Neither the name of Google Inc. nor the names of its
|
| +// contributors may be used to endorse or promote products derived from
|
| +// this software without specific prior written permission.
|
| +//
|
| +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
| +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
| +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
| +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
| +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
| +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
| +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
| +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
| +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
| +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
| +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
| +
|
| +package com.google.protobuf;
|
| +
|
| +import java.io.UnsupportedEncodingException;
|
| +
|
| +/**
|
| + * The classes contained within are used internally by the Protocol Buffer
|
| + * library and generated message implementations. They are public only because
|
| + * those generated messages do not reside in the {@code protobuf} package.
|
| + * Others should not use this class directly.
|
| + *
|
| + * @author kenton@google.com (Kenton Varda)
|
| + */
|
| +public class Internal {
|
| + /**
|
| + * Helper called by generated code to construct default values for string
|
| + * fields.
|
| + * <p>
|
| + * The protocol compiler does not actually contain a UTF-8 decoder -- it
|
| + * just pushes UTF-8-encoded text around without touching it. The one place
|
| + * where this presents a problem is when generating Java string literals.
|
| + * Unicode characters in the string literal would normally need to be encoded
|
| + * using a Unicode escape sequence, which would require decoding them.
|
| + * To get around this, protoc instead embeds the UTF-8 bytes into the
|
| + * generated code and leaves it to the runtime library to decode them.
|
| + * <p>
|
| + * It gets worse, though. If protoc just generated a byte array, like:
|
| + * new byte[] {0x12, 0x34, 0x56, 0x78}
|
| + * Java actually generates *code* which allocates an array and then fills
|
| + * in each value. This is much less efficient than just embedding the bytes
|
| + * directly into the bytecode. To get around this, we need another
|
| + * work-around. String literals are embedded directly, so protoc actually
|
| + * generates a string literal corresponding to the bytes. The easiest way
|
| + * to do this is to use the ISO-8859-1 character set, which corresponds to
|
| + * the first 256 characters of the Unicode range. Protoc can then use
|
| + * good old CEscape to generate the string.
|
| + * <p>
|
| + * So we have a string literal which represents a set of bytes which
|
| + * represents another string. This function -- stringDefaultValue --
|
| + * converts from the generated string to the string we actually want. The
|
| + * generated code calls this automatically.
|
| + */
|
| + public static String stringDefaultValue(String bytes) {
|
| + try {
|
| + return new String(bytes.getBytes("ISO-8859-1"), "UTF-8");
|
| + } catch (UnsupportedEncodingException e) {
|
| + // This should never happen since all JVMs are required to implement
|
| + // both of the above character sets.
|
| + throw new IllegalStateException(
|
| + "Java VM does not support a standard character set.", e);
|
| + }
|
| + }
|
| +
|
| + /**
|
| + * Helper called by generated code to construct default values for bytes
|
| + * fields.
|
| + * <p>
|
| + * This is a lot like {@link #stringDefaultValue}, but for bytes fields.
|
| + * In this case we only need the second of the two hacks -- allowing us to
|
| + * embed raw bytes as a string literal with ISO-8859-1 encoding.
|
| + */
|
| + public static ByteString bytesDefaultValue(String bytes) {
|
| + try {
|
| + return ByteString.copyFrom(bytes.getBytes("ISO-8859-1"));
|
| + } catch (UnsupportedEncodingException e) {
|
| + // This should never happen since all JVMs are required to implement
|
| + // ISO-8859-1.
|
| + throw new IllegalStateException(
|
| + "Java VM does not support a standard character set.", e);
|
| + }
|
| + }
|
| +
|
| + /**
|
| + * Helper called by generated code to determine if a byte array is a valid
|
| + * UTF-8 encoded string such that the original bytes can be converted to
|
| + * a String object and then back to a byte array round tripping the bytes
|
| + * without loss.
|
| + * <p>
|
| + * This is inspired by UTF_8.java in sun.nio.cs.
|
| + *
|
| + * @param byteString the string to check
|
| + * @return whether the byte array is round trippable
|
| + */
|
| + public static boolean isValidUtf8(ByteString byteString) {
|
| + int index = 0;
|
| + int size = byteString.size();
|
| + // To avoid the masking, we could change this to use bytes;
|
| + // Then X > 0xC2 gets turned into X < -0xC2; X < 0x80
|
| + // gets turned into X >= 0, etc.
|
| +
|
| + while (index < size) {
|
| + int byte1 = byteString.byteAt(index++) & 0xFF;
|
| + if (byte1 < 0x80) {
|
| + // fast loop for single bytes
|
| + continue;
|
| +
|
| + // we know from this point on that we have 2-4 byte forms
|
| + } else if (byte1 < 0xC2 || byte1 > 0xF4) {
|
| + // catch illegal first bytes: < C2 or > F4
|
| + return false;
|
| + }
|
| + if (index >= size) {
|
| + // fail if we run out of bytes
|
| + return false;
|
| + }
|
| + int byte2 = byteString.byteAt(index++) & 0xFF;
|
| + if (byte2 < 0x80 || byte2 > 0xBF) {
|
| + // general trail-byte test
|
| + return false;
|
| + }
|
| + if (byte1 <= 0xDF) {
|
| + // two-byte form; general trail-byte test is sufficient
|
| + continue;
|
| + }
|
| +
|
| + // we know from this point on that we have 3 or 4 byte forms
|
| + if (index >= size) {
|
| + // fail if we run out of bytes
|
| + return false;
|
| + }
|
| + int byte3 = byteString.byteAt(index++) & 0xFF;
|
| + if (byte3 < 0x80 || byte3 > 0xBF) {
|
| + // general trail-byte test
|
| + return false;
|
| + }
|
| + if (byte1 <= 0xEF) {
|
| + // three-byte form. Vastly more frequent than four-byte forms
|
| + // The following has an extra test, but not worth restructuring
|
| + if (byte1 == 0xE0 && byte2 < 0xA0 ||
|
| + byte1 == 0xED && byte2 > 0x9F) {
|
| + // check special cases of byte2
|
| + return false;
|
| + }
|
| +
|
| + } else {
|
| + // four-byte form
|
| +
|
| + if (index >= size) {
|
| + // fail if we run out of bytes
|
| + return false;
|
| + }
|
| + int byte4 = byteString.byteAt(index++) & 0xFF;
|
| + if (byte4 < 0x80 || byte4 > 0xBF) {
|
| + // general trail-byte test
|
| + return false;
|
| + }
|
| + // The following has an extra test, but not worth restructuring
|
| + if (byte1 == 0xF0 && byte2 < 0x90 ||
|
| + byte1 == 0xF4 && byte2 > 0x8F) {
|
| + // check special cases of byte2
|
| + return false;
|
| + }
|
| + }
|
| + }
|
| + return true;
|
| + }
|
| +
|
| + /**
|
| + * Interface for an enum value or value descriptor, to be used in FieldSet.
|
| + * The lite library stores enum values directly in FieldSets but the full
|
| + * library stores EnumValueDescriptors in order to better support reflection.
|
| + */
|
| + public interface EnumLite {
|
| + int getNumber();
|
| + }
|
| +
|
| + /**
|
| + * Interface for an object which maps integers to {@link EnumLite}s.
|
| + * {@link Descriptors.EnumDescriptor} implements this interface by mapping
|
| + * numbers to {@link Descriptors.EnumValueDescriptor}s. Additionally,
|
| + * every generated enum type has a static method internalGetValueMap() which
|
| + * returns an implementation of this type that maps numbers to enum values.
|
| + */
|
| + public interface EnumLiteMap<T extends EnumLite> {
|
| + T findValueByNumber(int number);
|
| + }
|
| +}
|
|
|