Index: third_party/protobuf/python/google/protobuf/pyext/message.cc |
diff --git a/third_party/protobuf/python/google/protobuf/pyext/message.cc b/third_party/protobuf/python/google/protobuf/pyext/message.cc |
index 863cde0132f2fae4ea9c011bd8d12e80209d7277..83c151ff626aabab287e8bfe39b4b8b80e99791f 100644 |
--- a/third_party/protobuf/python/google/protobuf/pyext/message.cc |
+++ b/third_party/protobuf/python/google/protobuf/pyext/message.cc |
@@ -98,31 +98,6 @@ static PyObject* PythonMessage_class; |
static PyObject* kEmptyWeakref; |
static PyObject* WKT_classes = NULL; |
-// Defines the Metaclass of all Message classes. |
-// It allows us to cache some C++ pointers in the class object itself, they are |
-// faster to extract than from the type's dictionary. |
- |
-struct PyMessageMeta { |
- // This is how CPython subclasses C structures: the base structure must be |
- // the first member of the object. |
- PyHeapTypeObject super; |
- |
- // C++ descriptor of this message. |
- const Descriptor* message_descriptor; |
- |
- // Owned reference, used to keep the pointer above alive. |
- PyObject* py_message_descriptor; |
- |
- // The Python DescriptorPool used to create the class. It is needed to resolve |
- // fields descriptors, including extensions fields; its C++ MessageFactory is |
- // used to instantiate submessages. |
- // This can be different from DESCRIPTOR.file.pool, in the case of a custom |
- // DescriptorPool which defines new extensions. |
- // We own the reference, because it's important to keep the descriptors and |
- // factory alive. |
- PyDescriptorPool* py_descriptor_pool; |
-}; |
- |
namespace message_meta { |
static int InsertEmptyWeakref(PyTypeObject* base); |
@@ -173,10 +148,6 @@ static int AddDescriptors(PyObject* cls, const Descriptor* descriptor) { |
} |
// For each enum set cls.<enum name> = EnumTypeWrapper(<enum descriptor>). |
- // |
- // The enum descriptor we get from |
- // <messagedescriptor>.enum_types_by_name[name] |
- // which was built previously. |
for (int i = 0; i < descriptor->enum_type_count(); ++i) { |
const EnumDescriptor* enum_descriptor = descriptor->enum_type(i); |
ScopedPyObjectPtr enum_type( |
@@ -309,7 +280,7 @@ static PyObject* New(PyTypeObject* type, |
if (result == NULL) { |
return NULL; |
} |
- PyMessageMeta* newtype = reinterpret_cast<PyMessageMeta*>(result.get()); |
+ CMessageClass* newtype = reinterpret_cast<CMessageClass*>(result.get()); |
// Insert the empty weakref into the base classes. |
if (InsertEmptyWeakref( |
@@ -338,7 +309,7 @@ static PyObject* New(PyTypeObject* type, |
// Add the message to the DescriptorPool. |
if (cdescriptor_pool::RegisterMessageClass(newtype->py_descriptor_pool, |
- descriptor, result.get()) < 0) { |
+ descriptor, newtype) < 0) { |
return NULL; |
} |
@@ -349,7 +320,7 @@ static PyObject* New(PyTypeObject* type, |
return result.release(); |
} |
-static void Dealloc(PyMessageMeta *self) { |
+static void Dealloc(CMessageClass *self) { |
Py_DECREF(self->py_message_descriptor); |
Py_DECREF(self->py_descriptor_pool); |
Py_TYPE(self)->tp_free(reinterpret_cast<PyObject*>(self)); |
@@ -378,10 +349,10 @@ static int InsertEmptyWeakref(PyTypeObject *base_type) { |
} // namespace message_meta |
-PyTypeObject PyMessageMeta_Type = { |
+PyTypeObject CMessageClass_Type = { |
PyVarObject_HEAD_INIT(&PyType_Type, 0) |
FULL_MODULE_NAME ".MessageMeta", // tp_name |
- sizeof(PyMessageMeta), // tp_basicsize |
+ sizeof(CMessageClass), // tp_basicsize |
0, // tp_itemsize |
(destructor)message_meta::Dealloc, // tp_dealloc |
0, // tp_print |
@@ -419,16 +390,16 @@ PyTypeObject PyMessageMeta_Type = { |
message_meta::New, // tp_new |
}; |
-static PyMessageMeta* CheckMessageClass(PyTypeObject* cls) { |
- if (!PyObject_TypeCheck(cls, &PyMessageMeta_Type)) { |
+static CMessageClass* CheckMessageClass(PyTypeObject* cls) { |
+ if (!PyObject_TypeCheck(cls, &CMessageClass_Type)) { |
PyErr_Format(PyExc_TypeError, "Class %s is not a Message", cls->tp_name); |
return NULL; |
} |
- return reinterpret_cast<PyMessageMeta*>(cls); |
+ return reinterpret_cast<CMessageClass*>(cls); |
} |
static const Descriptor* GetMessageDescriptor(PyTypeObject* cls) { |
- PyMessageMeta* type = CheckMessageClass(cls); |
+ CMessageClass* type = CheckMessageClass(cls); |
if (type == NULL) { |
return NULL; |
} |
@@ -783,9 +754,9 @@ namespace cmessage { |
PyDescriptorPool* GetDescriptorPoolForMessage(CMessage* message) { |
// No need to check the type: the type of instances of CMessage is always |
- // an instance of PyMessageMeta. Let's prove it with a debug-only check. |
+ // an instance of CMessageClass. Let's prove it with a debug-only check. |
GOOGLE_DCHECK(PyObject_TypeCheck(message, &CMessage_Type)); |
- return reinterpret_cast<PyMessageMeta*>(Py_TYPE(message))->py_descriptor_pool; |
+ return reinterpret_cast<CMessageClass*>(Py_TYPE(message))->py_descriptor_pool; |
} |
MessageFactory* GetFactoryForMessage(CMessage* message) { |
@@ -1090,6 +1061,10 @@ int InitAttributes(CMessage* self, PyObject* kwargs) { |
PyString_AsString(name)); |
return -1; |
} |
+ if (value == Py_None) { |
+ // field=None is the same as no field at all. |
+ continue; |
+ } |
if (descriptor->is_map()) { |
ScopedPyObjectPtr map(GetAttr(self, name)); |
const FieldDescriptor* value_descriptor = |
@@ -1220,9 +1195,9 @@ int InitAttributes(CMessage* self, PyObject* kwargs) { |
// Allocates an incomplete Python Message: the caller must fill self->message, |
// self->owner and eventually self->parent. |
-CMessage* NewEmptyMessage(PyObject* type, const Descriptor *descriptor) { |
+CMessage* NewEmptyMessage(CMessageClass* type) { |
CMessage* self = reinterpret_cast<CMessage*>( |
- PyType_GenericAlloc(reinterpret_cast<PyTypeObject*>(type), 0)); |
+ PyType_GenericAlloc(&type->super.ht_type, 0)); |
if (self == NULL) { |
return NULL; |
} |
@@ -1242,7 +1217,7 @@ CMessage* NewEmptyMessage(PyObject* type, const Descriptor *descriptor) { |
// Creates a new C++ message and takes ownership. |
static PyObject* New(PyTypeObject* cls, |
PyObject* unused_args, PyObject* unused_kwargs) { |
- PyMessageMeta* type = CheckMessageClass(cls); |
+ CMessageClass* type = CheckMessageClass(cls); |
if (type == NULL) { |
return NULL; |
} |
@@ -1258,8 +1233,7 @@ static PyObject* New(PyTypeObject* cls, |
return NULL; |
} |
- CMessage* self = NewEmptyMessage(reinterpret_cast<PyObject*>(type), |
- message_descriptor); |
+ CMessage* self = NewEmptyMessage(type); |
if (self == NULL) { |
return NULL; |
} |
@@ -1851,8 +1825,12 @@ static PyObject* ToStr(CMessage* self) { |
PyObject* MergeFrom(CMessage* self, PyObject* arg) { |
CMessage* other_message; |
- if (!PyObject_TypeCheck(reinterpret_cast<PyObject *>(arg), &CMessage_Type)) { |
- PyErr_SetString(PyExc_TypeError, "Must be a message"); |
+ if (!PyObject_TypeCheck(arg, &CMessage_Type)) { |
+ PyErr_Format(PyExc_TypeError, |
+ "Parameter to MergeFrom() must be instance of same class: " |
+ "expected %s got %s.", |
+ self->message->GetDescriptor()->full_name().c_str(), |
+ Py_TYPE(arg)->tp_name); |
return NULL; |
} |
@@ -1860,8 +1838,8 @@ PyObject* MergeFrom(CMessage* self, PyObject* arg) { |
if (other_message->message->GetDescriptor() != |
self->message->GetDescriptor()) { |
PyErr_Format(PyExc_TypeError, |
- "Tried to merge from a message with a different type. " |
- "to: %s, from: %s", |
+ "Parameter to MergeFrom() must be instance of same class: " |
+ "expected %s got %s.", |
self->message->GetDescriptor()->full_name().c_str(), |
other_message->message->GetDescriptor()->full_name().c_str()); |
return NULL; |
@@ -1879,8 +1857,12 @@ PyObject* MergeFrom(CMessage* self, PyObject* arg) { |
static PyObject* CopyFrom(CMessage* self, PyObject* arg) { |
CMessage* other_message; |
- if (!PyObject_TypeCheck(reinterpret_cast<PyObject *>(arg), &CMessage_Type)) { |
- PyErr_SetString(PyExc_TypeError, "Must be a message"); |
+ if (!PyObject_TypeCheck(arg, &CMessage_Type)) { |
+ PyErr_Format(PyExc_TypeError, |
+ "Parameter to CopyFrom() must be instance of same class: " |
+ "expected %s got %s.", |
+ self->message->GetDescriptor()->full_name().c_str(), |
+ Py_TYPE(arg)->tp_name); |
return NULL; |
} |
@@ -1893,8 +1875,8 @@ static PyObject* CopyFrom(CMessage* self, PyObject* arg) { |
if (other_message->message->GetDescriptor() != |
self->message->GetDescriptor()) { |
PyErr_Format(PyExc_TypeError, |
- "Tried to copy from a message with a different type. " |
- "to: %s, from: %s", |
+ "Parameter to CopyFrom() must be instance of same class: " |
+ "expected %s got %s.", |
self->message->GetDescriptor()->full_name().c_str(), |
other_message->message->GetDescriptor()->full_name().c_str()); |
return NULL; |
@@ -1911,6 +1893,30 @@ static PyObject* CopyFrom(CMessage* self, PyObject* arg) { |
Py_RETURN_NONE; |
} |
+// Protobuf has a 64MB limit built in, this variable will override this. Please |
+// do not enable this unless you fully understand the implications: protobufs |
+// must all be kept in memory at the same time, so if they grow too big you may |
+// get OOM errors. The protobuf APIs do not provide any tools for processing |
+// protobufs in chunks. If you have protos this big you should break them up if |
+// it is at all convenient to do so. |
+static bool allow_oversize_protos = false; |
+ |
+// Provide a method in the module to set allow_oversize_protos to a boolean |
+// value. This method returns the newly value of allow_oversize_protos. |
+static PyObject* SetAllowOversizeProtos(PyObject* m, PyObject* arg) { |
+ if (!arg || !PyBool_Check(arg)) { |
+ PyErr_SetString(PyExc_TypeError, |
+ "Argument to SetAllowOversizeProtos must be boolean"); |
+ return NULL; |
+ } |
+ allow_oversize_protos = PyObject_IsTrue(arg); |
+ if (allow_oversize_protos) { |
+ Py_RETURN_TRUE; |
+ } else { |
+ Py_RETURN_FALSE; |
+ } |
+} |
+ |
static PyObject* MergeFromString(CMessage* self, PyObject* arg) { |
const void* data; |
Py_ssize_t data_length; |
@@ -1921,6 +1927,9 @@ static PyObject* MergeFromString(CMessage* self, PyObject* arg) { |
AssureWritable(self); |
io::CodedInputStream input( |
reinterpret_cast<const uint8*>(data), data_length); |
+ if (allow_oversize_protos) { |
+ input.SetTotalBytesLimit(INT_MAX, INT_MAX); |
+ } |
PyDescriptorPool* pool = GetDescriptorPoolForMessage(self); |
input.SetExtensionRegistry(pool->pool, pool->message_factory); |
bool success = self->message->MergePartialFromCodedStream(&input); |
@@ -1988,10 +1997,34 @@ static PyObject* RegisterExtension(PyObject* cls, |
PyErr_SetString(PyExc_TypeError, "no extensions_by_number on class"); |
return NULL; |
} |
+ |
ScopedPyObjectPtr number(PyObject_GetAttrString(extension_handle, "number")); |
if (number == NULL) { |
return NULL; |
} |
+ |
+ // If the extension was already registered by number, check that it is the |
+ // same. |
+ existing_extension = PyDict_GetItem(extensions_by_number.get(), number.get()); |
+ if (existing_extension != NULL) { |
+ const FieldDescriptor* existing_extension_descriptor = |
+ GetExtensionDescriptor(existing_extension); |
+ if (existing_extension_descriptor != descriptor) { |
+ const Descriptor* msg_desc = GetMessageDescriptor( |
+ reinterpret_cast<PyTypeObject*>(cls)); |
+ PyErr_Format( |
+ PyExc_ValueError, |
+ "Extensions \"%s\" and \"%s\" both try to extend message type " |
+ "\"%s\" with field number %ld.", |
+ existing_extension_descriptor->full_name().c_str(), |
+ descriptor->full_name().c_str(), |
+ msg_desc->full_name().c_str(), |
+ PyInt_AsLong(number.get())); |
+ return NULL; |
+ } |
+ // Nothing else to do. |
+ Py_RETURN_NONE; |
+ } |
if (PyDict_SetItem(extensions_by_number.get(), number.get(), |
extension_handle) < 0) { |
return NULL; |
@@ -2123,10 +2156,20 @@ static PyObject* ListFields(CMessage* self) { |
PyList_SET_ITEM(all_fields.get(), actual_size, t.release()); |
++actual_size; |
} |
- Py_SIZE(all_fields.get()) = actual_size; |
+ if (static_cast<size_t>(actual_size) != fields.size() && |
+ (PyList_SetSlice(all_fields.get(), actual_size, fields.size(), NULL) < |
+ 0)) { |
+ return NULL; |
+ } |
return all_fields.release(); |
} |
+static PyObject* DiscardUnknownFields(CMessage* self) { |
+ AssureWritable(self); |
+ self->message->DiscardUnknownFields(); |
+ Py_RETURN_NONE; |
+} |
+ |
PyObject* FindInitializationErrors(CMessage* self) { |
Message* message = self->message; |
vector<string> errors; |
@@ -2270,14 +2313,13 @@ PyObject* InternalGetSubMessage( |
const Message& sub_message = reflection->GetMessage( |
*self->message, field_descriptor, pool->message_factory); |
- PyObject *message_class = cdescriptor_pool::GetMessageClass( |
+ CMessageClass* message_class = cdescriptor_pool::GetMessageClass( |
pool, field_descriptor->message_type()); |
if (message_class == NULL) { |
return NULL; |
} |
- CMessage* cmsg = cmessage::NewEmptyMessage(message_class, |
- sub_message.GetDescriptor()); |
+ CMessage* cmsg = cmessage::NewEmptyMessage(message_class); |
if (cmsg == NULL) { |
return NULL; |
} |
@@ -2546,6 +2588,8 @@ static PyMethodDef Methods[] = { |
"Clears a message field." }, |
{ "CopyFrom", (PyCFunction)CopyFrom, METH_O, |
"Copies a protocol message into the current message." }, |
+ { "DiscardUnknownFields", (PyCFunction)DiscardUnknownFields, METH_NOARGS, |
+ "Discards the unknown fields." }, |
{ "FindInitializationErrors", (PyCFunction)FindInitializationErrors, |
METH_NOARGS, |
"Finds unset required fields." }, |
@@ -2615,7 +2659,7 @@ PyObject* GetAttr(CMessage* self, PyObject* name) { |
const Descriptor* entry_type = field_descriptor->message_type(); |
const FieldDescriptor* value_type = entry_type->FindFieldByName("value"); |
if (value_type->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) { |
- PyObject* value_class = cdescriptor_pool::GetMessageClass( |
+ CMessageClass* value_class = cdescriptor_pool::GetMessageClass( |
GetDescriptorPoolForMessage(self), value_type->message_type()); |
if (value_class == NULL) { |
return NULL; |
@@ -2638,7 +2682,7 @@ PyObject* GetAttr(CMessage* self, PyObject* name) { |
if (field_descriptor->label() == FieldDescriptor::LABEL_REPEATED) { |
PyObject* py_container = NULL; |
if (field_descriptor->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) { |
- PyObject *message_class = cdescriptor_pool::GetMessageClass( |
+ CMessageClass* message_class = cdescriptor_pool::GetMessageClass( |
GetDescriptorPoolForMessage(self), field_descriptor->message_type()); |
if (message_class == NULL) { |
return NULL; |
@@ -2702,7 +2746,7 @@ int SetAttr(CMessage* self, PyObject* name, PyObject* value) { |
PyErr_Format(PyExc_AttributeError, |
"Assignment not allowed " |
- "(no field \"%s\"in protocol message object).", |
+ "(no field \"%s\" in protocol message object).", |
PyString_AsString(name)); |
return -1; |
} |
@@ -2710,7 +2754,7 @@ int SetAttr(CMessage* self, PyObject* name, PyObject* value) { |
} // namespace cmessage |
PyTypeObject CMessage_Type = { |
- PyVarObject_HEAD_INIT(&PyMessageMeta_Type, 0) |
+ PyVarObject_HEAD_INIT(&CMessageClass_Type, 0) |
FULL_MODULE_NAME ".CMessage", // tp_name |
sizeof(CMessage), // tp_basicsize |
0, // tp_itemsize |
@@ -2719,7 +2763,7 @@ PyTypeObject CMessage_Type = { |
0, // tp_getattr |
0, // tp_setattr |
0, // tp_compare |
- 0, // tp_repr |
+ (reprfunc)cmessage::ToStr, // tp_repr |
0, // tp_as_number |
0, // tp_as_sequence |
0, // tp_as_mapping |
@@ -2825,12 +2869,12 @@ bool InitProto2MessageModule(PyObject *m) { |
// Initialize constants defined in this file. |
InitGlobals(); |
- PyMessageMeta_Type.tp_base = &PyType_Type; |
- if (PyType_Ready(&PyMessageMeta_Type) < 0) { |
+ CMessageClass_Type.tp_base = &PyType_Type; |
+ if (PyType_Ready(&CMessageClass_Type) < 0) { |
return false; |
} |
PyModule_AddObject(m, "MessageMeta", |
- reinterpret_cast<PyObject*>(&PyMessageMeta_Type)); |
+ reinterpret_cast<PyObject*>(&CMessageClass_Type)); |
if (PyType_Ready(&CMessage_Type) < 0) { |
return false; |
@@ -3037,6 +3081,12 @@ bool InitProto2MessageModule(PyObject *m) { |
} // namespace python |
} // namespace protobuf |
+static PyMethodDef ModuleMethods[] = { |
+ {"SetAllowOversizeProtos", |
+ (PyCFunction)google::protobuf::python::cmessage::SetAllowOversizeProtos, |
+ METH_O, "Enable/disable oversize proto parsing."}, |
+ { NULL, NULL} |
+}; |
#if PY_MAJOR_VERSION >= 3 |
static struct PyModuleDef _module = { |
@@ -3044,7 +3094,7 @@ static struct PyModuleDef _module = { |
"_message", |
google::protobuf::python::module_docstring, |
-1, |
- NULL, |
+ ModuleMethods, /* m_methods */ |
NULL, |
NULL, |
NULL, |
@@ -3063,7 +3113,8 @@ extern "C" { |
#if PY_MAJOR_VERSION >= 3 |
m = PyModule_Create(&_module); |
#else |
- m = Py_InitModule3("_message", NULL, google::protobuf::python::module_docstring); |
+ m = Py_InitModule3("_message", ModuleMethods, |
+ google::protobuf::python::module_docstring); |
#endif |
if (m == NULL) { |
return INITFUNC_ERRORVAL; |