| OLD | NEW |
| 1 # Protocol Buffers - Google's data interchange format | 1 # Protocol Buffers - Google's data interchange format |
| 2 # Copyright 2008 Google Inc. All rights reserved. | 2 # Copyright 2008 Google Inc. All rights reserved. |
| 3 # https://developers.google.com/protocol-buffers/ | 3 # https://developers.google.com/protocol-buffers/ |
| 4 # | 4 # |
| 5 # Redistribution and use in source and binary forms, with or without | 5 # Redistribution and use in source and binary forms, with or without |
| 6 # modification, are permitted provided that the following conditions are | 6 # modification, are permitted provided that the following conditions are |
| 7 # met: | 7 # met: |
| 8 # | 8 # |
| 9 # * Redistributions of source code must retain the above copyright | 9 # * Redistributions of source code must retain the above copyright |
| 10 # notice, this list of conditions and the following disclaimer. | 10 # notice, this list of conditions and the following disclaimer. |
| (...skipping 30 matching lines...) Expand all Loading... |
| 41 """ | 41 """ |
| 42 | 42 |
| 43 __author__ = 'kenton@google.com (Kenton Varda)' | 43 __author__ = 'kenton@google.com (Kenton Varda)' |
| 44 | 44 |
| 45 import io | 45 import io |
| 46 import re | 46 import re |
| 47 | 47 |
| 48 import six | 48 import six |
| 49 | 49 |
| 50 if six.PY3: | 50 if six.PY3: |
| 51 long = int | 51 long = int # pylint: disable=redefined-builtin,invalid-name |
| 52 | 52 |
| 53 # pylint: disable=g-import-not-at-top |
| 53 from google.protobuf.internal import type_checkers | 54 from google.protobuf.internal import type_checkers |
| 54 from google.protobuf import descriptor | 55 from google.protobuf import descriptor |
| 55 from google.protobuf import text_encoding | 56 from google.protobuf import text_encoding |
| 56 | 57 |
| 57 __all__ = ['MessageToString', 'PrintMessage', 'PrintField', | 58 __all__ = ['MessageToString', 'PrintMessage', 'PrintField', 'PrintFieldValue', |
| 58 'PrintFieldValue', 'Merge'] | 59 'Merge'] |
| 59 | |
| 60 | 60 |
| 61 _INTEGER_CHECKERS = (type_checkers.Uint32ValueChecker(), | 61 _INTEGER_CHECKERS = (type_checkers.Uint32ValueChecker(), |
| 62 type_checkers.Int32ValueChecker(), | 62 type_checkers.Int32ValueChecker(), |
| 63 type_checkers.Uint64ValueChecker(), | 63 type_checkers.Uint64ValueChecker(), |
| 64 type_checkers.Int64ValueChecker()) | 64 type_checkers.Int64ValueChecker()) |
| 65 _FLOAT_INFINITY = re.compile('-?inf(?:inity)?f?', re.IGNORECASE) | 65 _FLOAT_INFINITY = re.compile('-?inf(?:inity)?f?', re.IGNORECASE) |
| 66 _FLOAT_NAN = re.compile('nanf?', re.IGNORECASE) | 66 _FLOAT_NAN = re.compile('nanf?', re.IGNORECASE) |
| 67 _FLOAT_TYPES = frozenset([descriptor.FieldDescriptor.CPPTYPE_FLOAT, | 67 _FLOAT_TYPES = frozenset([descriptor.FieldDescriptor.CPPTYPE_FLOAT, |
| 68 descriptor.FieldDescriptor.CPPTYPE_DOUBLE]) | 68 descriptor.FieldDescriptor.CPPTYPE_DOUBLE]) |
| 69 _QUOTES = frozenset(("'", '"')) | 69 _QUOTES = frozenset(("'", '"')) |
| 70 _ANY_FULL_TYPE_NAME = 'google.protobuf.Any' |
| 70 | 71 |
| 71 | 72 |
| 72 class Error(Exception): | 73 class Error(Exception): |
| 73 """Top-level module error for text_format.""" | 74 """Top-level module error for text_format.""" |
| 74 | 75 |
| 75 | 76 |
| 76 class ParseError(Error): | 77 class ParseError(Error): |
| 77 """Thrown in case of text parsing error.""" | 78 """Thrown in case of text parsing or tokenizing error.""" |
| 79 |
| 80 def __init__(self, message=None, line=None, column=None): |
| 81 if message is not None and line is not None: |
| 82 loc = str(line) |
| 83 if column is not None: |
| 84 loc += ':{0}'.format(column) |
| 85 message = '{0} : {1}'.format(loc, message) |
| 86 if message is not None: |
| 87 super(ParseError, self).__init__(message) |
| 88 else: |
| 89 super(ParseError, self).__init__() |
| 90 self._line = line |
| 91 self._column = column |
| 92 |
| 93 def GetLine(self): |
| 94 return self._line |
| 95 |
| 96 def GetColumn(self): |
| 97 return self._column |
| 78 | 98 |
| 79 | 99 |
| 80 class TextWriter(object): | 100 class TextWriter(object): |
| 101 |
| 81 def __init__(self, as_utf8): | 102 def __init__(self, as_utf8): |
| 82 if six.PY2: | 103 if six.PY2: |
| 83 self._writer = io.BytesIO() | 104 self._writer = io.BytesIO() |
| 84 else: | 105 else: |
| 85 self._writer = io.StringIO() | 106 self._writer = io.StringIO() |
| 86 | 107 |
| 87 def write(self, val): | 108 def write(self, val): |
| 88 if six.PY2: | 109 if six.PY2: |
| 89 if isinstance(val, six.text_type): | 110 if isinstance(val, six.text_type): |
| 90 val = val.encode('utf-8') | 111 val = val.encode('utf-8') |
| 91 return self._writer.write(val) | 112 return self._writer.write(val) |
| 92 | 113 |
| 93 def close(self): | 114 def close(self): |
| 94 return self._writer.close() | 115 return self._writer.close() |
| 95 | 116 |
| 96 def getvalue(self): | 117 def getvalue(self): |
| 97 return self._writer.getvalue() | 118 return self._writer.getvalue() |
| 98 | 119 |
| 99 | 120 |
| 100 def MessageToString(message, as_utf8=False, as_one_line=False, | 121 def MessageToString(message, |
| 101 pointy_brackets=False, use_index_order=False, | 122 as_utf8=False, |
| 102 float_format=None, use_field_number=False): | 123 as_one_line=False, |
| 124 pointy_brackets=False, |
| 125 use_index_order=False, |
| 126 float_format=None, |
| 127 use_field_number=False, |
| 128 descriptor_pool=None, |
| 129 indent=0): |
| 103 """Convert protobuf message to text format. | 130 """Convert protobuf message to text format. |
| 104 | 131 |
| 105 Floating point values can be formatted compactly with 15 digits of | 132 Floating point values can be formatted compactly with 15 digits of |
| 106 precision (which is the most that IEEE 754 "double" can guarantee) | 133 precision (which is the most that IEEE 754 "double" can guarantee) |
| 107 using float_format='.15g'. To ensure that converting to text and back to a | 134 using float_format='.15g'. To ensure that converting to text and back to a |
| 108 proto will result in an identical value, float_format='.17g' should be used. | 135 proto will result in an identical value, float_format='.17g' should be used. |
| 109 | 136 |
| 110 Args: | 137 Args: |
| 111 message: The protocol buffers message. | 138 message: The protocol buffers message. |
| 112 as_utf8: Produce text output in UTF8 format. | 139 as_utf8: Produce text output in UTF8 format. |
| 113 as_one_line: Don't introduce newlines between fields. | 140 as_one_line: Don't introduce newlines between fields. |
| 114 pointy_brackets: If True, use angle brackets instead of curly braces for | 141 pointy_brackets: If True, use angle brackets instead of curly braces for |
| 115 nesting. | 142 nesting. |
| 116 use_index_order: If True, print fields of a proto message using the order | 143 use_index_order: If True, print fields of a proto message using the order |
| 117 defined in source code instead of the field number. By default, use the | 144 defined in source code instead of the field number. By default, use the |
| 118 field number order. | 145 field number order. |
| 119 float_format: If set, use this to specify floating point number formatting | 146 float_format: If set, use this to specify floating point number formatting |
| 120 (per the "Format Specification Mini-Language"); otherwise, str() is used. | 147 (per the "Format Specification Mini-Language"); otherwise, str() is used. |
| 121 use_field_number: If True, print field numbers instead of names. | 148 use_field_number: If True, print field numbers instead of names. |
| 149 descriptor_pool: A DescriptorPool used to resolve Any types. |
| 150 indent: The indent level, in terms of spaces, for pretty print. |
| 122 | 151 |
| 123 Returns: | 152 Returns: |
| 124 A string of the text formatted protocol buffer message. | 153 A string of the text formatted protocol buffer message. |
| 125 """ | 154 """ |
| 126 out = TextWriter(as_utf8) | 155 out = TextWriter(as_utf8) |
| 127 printer = _Printer(out, 0, as_utf8, as_one_line, | 156 printer = _Printer(out, indent, as_utf8, as_one_line, pointy_brackets, |
| 128 pointy_brackets, use_index_order, float_format, | 157 use_index_order, float_format, use_field_number, |
| 129 use_field_number) | 158 descriptor_pool) |
| 130 printer.PrintMessage(message) | 159 printer.PrintMessage(message) |
| 131 result = out.getvalue() | 160 result = out.getvalue() |
| 132 out.close() | 161 out.close() |
| 133 if as_one_line: | 162 if as_one_line: |
| 134 return result.rstrip() | 163 return result.rstrip() |
| 135 return result | 164 return result |
| 136 | 165 |
| 137 | 166 |
| 138 def _IsMapEntry(field): | 167 def _IsMapEntry(field): |
| 139 return (field.type == descriptor.FieldDescriptor.TYPE_MESSAGE and | 168 return (field.type == descriptor.FieldDescriptor.TYPE_MESSAGE and |
| 140 field.message_type.has_options and | 169 field.message_type.has_options and |
| 141 field.message_type.GetOptions().map_entry) | 170 field.message_type.GetOptions().map_entry) |
| 142 | 171 |
| 143 | 172 |
| 144 def PrintMessage(message, out, indent=0, as_utf8=False, as_one_line=False, | 173 def PrintMessage(message, |
| 145 pointy_brackets=False, use_index_order=False, | 174 out, |
| 146 float_format=None, use_field_number=False): | 175 indent=0, |
| 147 printer = _Printer(out, indent, as_utf8, as_one_line, | 176 as_utf8=False, |
| 148 pointy_brackets, use_index_order, float_format, | 177 as_one_line=False, |
| 149 use_field_number) | 178 pointy_brackets=False, |
| 179 use_index_order=False, |
| 180 float_format=None, |
| 181 use_field_number=False, |
| 182 descriptor_pool=None): |
| 183 printer = _Printer(out, indent, as_utf8, as_one_line, pointy_brackets, |
| 184 use_index_order, float_format, use_field_number, |
| 185 descriptor_pool) |
| 150 printer.PrintMessage(message) | 186 printer.PrintMessage(message) |
| 151 | 187 |
| 152 | 188 |
| 153 def PrintField(field, value, out, indent=0, as_utf8=False, as_one_line=False, | 189 def PrintField(field, |
| 154 pointy_brackets=False, use_index_order=False, float_format=None): | 190 value, |
| 191 out, |
| 192 indent=0, |
| 193 as_utf8=False, |
| 194 as_one_line=False, |
| 195 pointy_brackets=False, |
| 196 use_index_order=False, |
| 197 float_format=None): |
| 155 """Print a single field name/value pair.""" | 198 """Print a single field name/value pair.""" |
| 156 printer = _Printer(out, indent, as_utf8, as_one_line, | 199 printer = _Printer(out, indent, as_utf8, as_one_line, pointy_brackets, |
| 157 pointy_brackets, use_index_order, float_format) | 200 use_index_order, float_format) |
| 158 printer.PrintField(field, value) | 201 printer.PrintField(field, value) |
| 159 | 202 |
| 160 | 203 |
| 161 def PrintFieldValue(field, value, out, indent=0, as_utf8=False, | 204 def PrintFieldValue(field, |
| 162 as_one_line=False, pointy_brackets=False, | 205 value, |
| 206 out, |
| 207 indent=0, |
| 208 as_utf8=False, |
| 209 as_one_line=False, |
| 210 pointy_brackets=False, |
| 163 use_index_order=False, | 211 use_index_order=False, |
| 164 float_format=None): | 212 float_format=None): |
| 165 """Print a single field value (not including name).""" | 213 """Print a single field value (not including name).""" |
| 166 printer = _Printer(out, indent, as_utf8, as_one_line, | 214 printer = _Printer(out, indent, as_utf8, as_one_line, pointy_brackets, |
| 167 pointy_brackets, use_index_order, float_format) | 215 use_index_order, float_format) |
| 168 printer.PrintFieldValue(field, value) | 216 printer.PrintFieldValue(field, value) |
| 169 | 217 |
| 170 | 218 |
| 219 def _BuildMessageFromTypeName(type_name, descriptor_pool): |
| 220 """Returns a protobuf message instance. |
| 221 |
| 222 Args: |
| 223 type_name: Fully-qualified protobuf message type name string. |
| 224 descriptor_pool: DescriptorPool instance. |
| 225 |
| 226 Returns: |
| 227 A Message instance of type matching type_name, or None if the a Descriptor |
| 228 wasn't found matching type_name. |
| 229 """ |
| 230 # pylint: disable=g-import-not-at-top |
| 231 from google.protobuf import message_factory |
| 232 factory = message_factory.MessageFactory(descriptor_pool) |
| 233 try: |
| 234 message_descriptor = descriptor_pool.FindMessageTypeByName(type_name) |
| 235 except KeyError: |
| 236 return None |
| 237 message_type = factory.GetPrototype(message_descriptor) |
| 238 return message_type() |
| 239 |
| 240 |
| 171 class _Printer(object): | 241 class _Printer(object): |
| 172 """Text format printer for protocol message.""" | 242 """Text format printer for protocol message.""" |
| 173 | 243 |
| 174 def __init__(self, out, indent=0, as_utf8=False, as_one_line=False, | 244 def __init__(self, |
| 175 pointy_brackets=False, use_index_order=False, float_format=None, | 245 out, |
| 176 use_field_number=False): | 246 indent=0, |
| 247 as_utf8=False, |
| 248 as_one_line=False, |
| 249 pointy_brackets=False, |
| 250 use_index_order=False, |
| 251 float_format=None, |
| 252 use_field_number=False, |
| 253 descriptor_pool=None): |
| 177 """Initialize the Printer. | 254 """Initialize the Printer. |
| 178 | 255 |
| 179 Floating point values can be formatted compactly with 15 digits of | 256 Floating point values can be formatted compactly with 15 digits of |
| 180 precision (which is the most that IEEE 754 "double" can guarantee) | 257 precision (which is the most that IEEE 754 "double" can guarantee) |
| 181 using float_format='.15g'. To ensure that converting to text and back to a | 258 using float_format='.15g'. To ensure that converting to text and back to a |
| 182 proto will result in an identical value, float_format='.17g' should be used. | 259 proto will result in an identical value, float_format='.17g' should be used. |
| 183 | 260 |
| 184 Args: | 261 Args: |
| 185 out: To record the text format result. | 262 out: To record the text format result. |
| 186 indent: The indent level for pretty print. | 263 indent: The indent level for pretty print. |
| 187 as_utf8: Produce text output in UTF8 format. | 264 as_utf8: Produce text output in UTF8 format. |
| 188 as_one_line: Don't introduce newlines between fields. | 265 as_one_line: Don't introduce newlines between fields. |
| 189 pointy_brackets: If True, use angle brackets instead of curly braces for | 266 pointy_brackets: If True, use angle brackets instead of curly braces for |
| 190 nesting. | 267 nesting. |
| 191 use_index_order: If True, print fields of a proto message using the order | 268 use_index_order: If True, print fields of a proto message using the order |
| 192 defined in source code instead of the field number. By default, use the | 269 defined in source code instead of the field number. By default, use the |
| 193 field number order. | 270 field number order. |
| 194 float_format: If set, use this to specify floating point number formatting | 271 float_format: If set, use this to specify floating point number formatting |
| 195 (per the "Format Specification Mini-Language"); otherwise, str() is | 272 (per the "Format Specification Mini-Language"); otherwise, str() is |
| 196 used. | 273 used. |
| 197 use_field_number: If True, print field numbers instead of names. | 274 use_field_number: If True, print field numbers instead of names. |
| 275 descriptor_pool: A DescriptorPool used to resolve Any types. |
| 198 """ | 276 """ |
| 199 self.out = out | 277 self.out = out |
| 200 self.indent = indent | 278 self.indent = indent |
| 201 self.as_utf8 = as_utf8 | 279 self.as_utf8 = as_utf8 |
| 202 self.as_one_line = as_one_line | 280 self.as_one_line = as_one_line |
| 203 self.pointy_brackets = pointy_brackets | 281 self.pointy_brackets = pointy_brackets |
| 204 self.use_index_order = use_index_order | 282 self.use_index_order = use_index_order |
| 205 self.float_format = float_format | 283 self.float_format = float_format |
| 206 self.use_field_number = use_field_number | 284 self.use_field_number = use_field_number |
| 285 self.descriptor_pool = descriptor_pool |
| 286 |
| 287 def _TryPrintAsAnyMessage(self, message): |
| 288 """Serializes if message is a google.protobuf.Any field.""" |
| 289 packed_message = _BuildMessageFromTypeName(message.TypeName(), |
| 290 self.descriptor_pool) |
| 291 if packed_message: |
| 292 packed_message.MergeFromString(message.value) |
| 293 self.out.write('%s[%s]' % (self.indent * ' ', message.type_url)) |
| 294 self._PrintMessageFieldValue(packed_message) |
| 295 self.out.write(' ' if self.as_one_line else '\n') |
| 296 return True |
| 297 else: |
| 298 return False |
| 207 | 299 |
| 208 def PrintMessage(self, message): | 300 def PrintMessage(self, message): |
| 209 """Convert protobuf message to text format. | 301 """Convert protobuf message to text format. |
| 210 | 302 |
| 211 Args: | 303 Args: |
| 212 message: The protocol buffers message. | 304 message: The protocol buffers message. |
| 213 """ | 305 """ |
| 306 if (message.DESCRIPTOR.full_name == _ANY_FULL_TYPE_NAME and |
| 307 self.descriptor_pool and self._TryPrintAsAnyMessage(message)): |
| 308 return |
| 214 fields = message.ListFields() | 309 fields = message.ListFields() |
| 215 if self.use_index_order: | 310 if self.use_index_order: |
| 216 fields.sort(key=lambda x: x[0].index) | 311 fields.sort(key=lambda x: x[0].index) |
| 217 for field, value in fields: | 312 for field, value in fields: |
| 218 if _IsMapEntry(field): | 313 if _IsMapEntry(field): |
| 219 for key in sorted(value): | 314 for key in sorted(value): |
| 220 # This is slow for maps with submessage entires because it copies the | 315 # This is slow for maps with submessage entires because it copies the |
| 221 # entire tree. Unfortunately this would take significant refactoring | 316 # entire tree. Unfortunately this would take significant refactoring |
| 222 # of this file to work around. | 317 # of this file to work around. |
| 223 # | 318 # |
| 224 # TODO(haberman): refactor and optimize if this becomes an issue. | 319 # TODO(haberman): refactor and optimize if this becomes an issue. |
| 225 entry_submsg = field.message_type._concrete_class( | 320 entry_submsg = field.message_type._concrete_class(key=key, |
| 226 key=key, value=value[key]) | 321 value=value[key]) |
| 227 self.PrintField(field, entry_submsg) | 322 self.PrintField(field, entry_submsg) |
| 228 elif field.label == descriptor.FieldDescriptor.LABEL_REPEATED: | 323 elif field.label == descriptor.FieldDescriptor.LABEL_REPEATED: |
| 229 for element in value: | 324 for element in value: |
| 230 self.PrintField(field, element) | 325 self.PrintField(field, element) |
| 231 else: | 326 else: |
| 232 self.PrintField(field, value) | 327 self.PrintField(field, value) |
| 233 | 328 |
| 234 def PrintField(self, field, value): | 329 def PrintField(self, field, value): |
| 235 """Print a single field name/value pair.""" | 330 """Print a single field name/value pair.""" |
| 236 out = self.out | 331 out = self.out |
| (...skipping 20 matching lines...) Expand all Loading... |
| 257 # The colon is optional in this case, but our cross-language golden files | 352 # The colon is optional in this case, but our cross-language golden files |
| 258 # don't include it. | 353 # don't include it. |
| 259 out.write(': ') | 354 out.write(': ') |
| 260 | 355 |
| 261 self.PrintFieldValue(field, value) | 356 self.PrintFieldValue(field, value) |
| 262 if self.as_one_line: | 357 if self.as_one_line: |
| 263 out.write(' ') | 358 out.write(' ') |
| 264 else: | 359 else: |
| 265 out.write('\n') | 360 out.write('\n') |
| 266 | 361 |
| 362 def _PrintMessageFieldValue(self, value): |
| 363 if self.pointy_brackets: |
| 364 openb = '<' |
| 365 closeb = '>' |
| 366 else: |
| 367 openb = '{' |
| 368 closeb = '}' |
| 369 |
| 370 if self.as_one_line: |
| 371 self.out.write(' %s ' % openb) |
| 372 self.PrintMessage(value) |
| 373 self.out.write(closeb) |
| 374 else: |
| 375 self.out.write(' %s\n' % openb) |
| 376 self.indent += 2 |
| 377 self.PrintMessage(value) |
| 378 self.indent -= 2 |
| 379 self.out.write(' ' * self.indent + closeb) |
| 380 |
| 267 def PrintFieldValue(self, field, value): | 381 def PrintFieldValue(self, field, value): |
| 268 """Print a single field value (not including name). | 382 """Print a single field value (not including name). |
| 269 | 383 |
| 270 For repeated fields, the value should be a single element. | 384 For repeated fields, the value should be a single element. |
| 271 | 385 |
| 272 Args: | 386 Args: |
| 273 field: The descriptor of the field to be printed. | 387 field: The descriptor of the field to be printed. |
| 274 value: The value of the field. | 388 value: The value of the field. |
| 275 """ | 389 """ |
| 276 out = self.out | 390 out = self.out |
| 277 if self.pointy_brackets: | |
| 278 openb = '<' | |
| 279 closeb = '>' | |
| 280 else: | |
| 281 openb = '{' | |
| 282 closeb = '}' | |
| 283 | |
| 284 if field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_MESSAGE: | 391 if field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_MESSAGE: |
| 285 if self.as_one_line: | 392 self._PrintMessageFieldValue(value) |
| 286 out.write(' %s ' % openb) | |
| 287 self.PrintMessage(value) | |
| 288 out.write(closeb) | |
| 289 else: | |
| 290 out.write(' %s\n' % openb) | |
| 291 self.indent += 2 | |
| 292 self.PrintMessage(value) | |
| 293 self.indent -= 2 | |
| 294 out.write(' ' * self.indent + closeb) | |
| 295 elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_ENUM: | 393 elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_ENUM: |
| 296 enum_value = field.enum_type.values_by_number.get(value, None) | 394 enum_value = field.enum_type.values_by_number.get(value, None) |
| 297 if enum_value is not None: | 395 if enum_value is not None: |
| 298 out.write(enum_value.name) | 396 out.write(enum_value.name) |
| 299 else: | 397 else: |
| 300 out.write(str(value)) | 398 out.write(str(value)) |
| 301 elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_STRING: | 399 elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_STRING: |
| 302 out.write('\"') | 400 out.write('\"') |
| 303 if isinstance(value, six.text_type): | 401 if isinstance(value, six.text_type): |
| 304 out_value = value.encode('utf-8') | 402 out_value = value.encode('utf-8') |
| (...skipping 10 matching lines...) Expand all Loading... |
| 315 if value: | 413 if value: |
| 316 out.write('true') | 414 out.write('true') |
| 317 else: | 415 else: |
| 318 out.write('false') | 416 out.write('false') |
| 319 elif field.cpp_type in _FLOAT_TYPES and self.float_format is not None: | 417 elif field.cpp_type in _FLOAT_TYPES and self.float_format is not None: |
| 320 out.write('{1:{0}}'.format(self.float_format, value)) | 418 out.write('{1:{0}}'.format(self.float_format, value)) |
| 321 else: | 419 else: |
| 322 out.write(str(value)) | 420 out.write(str(value)) |
| 323 | 421 |
| 324 | 422 |
| 325 def Parse(text, message, | 423 def Parse(text, |
| 326 allow_unknown_extension=False, allow_field_number=False): | 424 message, |
| 327 """Parses an text representation of a protocol message into a message. | 425 allow_unknown_extension=False, |
| 426 allow_field_number=False): |
| 427 """Parses a text representation of a protocol message into a message. |
| 328 | 428 |
| 329 Args: | 429 Args: |
| 330 text: Message text representation. | 430 text: Message text representation. |
| 331 message: A protocol buffer message to merge into. | 431 message: A protocol buffer message to merge into. |
| 332 allow_unknown_extension: if True, skip over missing extensions and keep | 432 allow_unknown_extension: if True, skip over missing extensions and keep |
| 333 parsing | 433 parsing |
| 334 allow_field_number: if True, both field number and field name are allowed. | 434 allow_field_number: if True, both field number and field name are allowed. |
| 335 | 435 |
| 336 Returns: | 436 Returns: |
| 337 The same message passed as argument. | 437 The same message passed as argument. |
| 338 | 438 |
| 339 Raises: | 439 Raises: |
| 340 ParseError: On text parsing problems. | 440 ParseError: On text parsing problems. |
| 341 """ | 441 """ |
| 342 if not isinstance(text, str): | 442 if not isinstance(text, str): |
| 343 text = text.decode('utf-8') | 443 text = text.decode('utf-8') |
| 344 return ParseLines(text.split('\n'), message, allow_unknown_extension, | 444 return ParseLines( |
| 345 allow_field_number) | 445 text.split('\n'), message, allow_unknown_extension, allow_field_number) |
| 346 | 446 |
| 347 | 447 |
| 348 def Merge(text, message, allow_unknown_extension=False, | 448 def Merge(text, |
| 349 allow_field_number=False): | 449 message, |
| 350 """Parses an text representation of a protocol message into a message. | 450 allow_unknown_extension=False, |
| 451 allow_field_number=False, |
| 452 descriptor_pool=None): |
| 453 """Parses a text representation of a protocol message into a message. |
| 351 | 454 |
| 352 Like Parse(), but allows repeated values for a non-repeated field, and uses | 455 Like Parse(), but allows repeated values for a non-repeated field, and uses |
| 353 the last one. | 456 the last one. |
| 354 | 457 |
| 355 Args: | 458 Args: |
| 356 text: Message text representation. | 459 text: Message text representation. |
| 357 message: A protocol buffer message to merge into. | 460 message: A protocol buffer message to merge into. |
| 358 allow_unknown_extension: if True, skip over missing extensions and keep | 461 allow_unknown_extension: if True, skip over missing extensions and keep |
| 359 parsing | 462 parsing |
| 360 allow_field_number: if True, both field number and field name are allowed. | 463 allow_field_number: if True, both field number and field name are allowed. |
| 464 descriptor_pool: A DescriptorPool used to resolve Any types. |
| 361 | 465 |
| 362 Returns: | 466 Returns: |
| 363 The same message passed as argument. | 467 The same message passed as argument. |
| 364 | 468 |
| 365 Raises: | 469 Raises: |
| 366 ParseError: On text parsing problems. | 470 ParseError: On text parsing problems. |
| 367 """ | 471 """ |
| 368 return MergeLines(text.split('\n'), message, allow_unknown_extension, | 472 return MergeLines( |
| 369 allow_field_number) | 473 text.split('\n'), |
| 474 message, |
| 475 allow_unknown_extension, |
| 476 allow_field_number, |
| 477 descriptor_pool=descriptor_pool) |
| 370 | 478 |
| 371 | 479 |
| 372 def ParseLines(lines, message, allow_unknown_extension=False, | 480 def ParseLines(lines, |
| 481 message, |
| 482 allow_unknown_extension=False, |
| 373 allow_field_number=False): | 483 allow_field_number=False): |
| 374 """Parses an text representation of a protocol message into a message. | 484 """Parses a text representation of a protocol message into a message. |
| 375 | 485 |
| 376 Args: | 486 Args: |
| 377 lines: An iterable of lines of a message's text representation. | 487 lines: An iterable of lines of a message's text representation. |
| 378 message: A protocol buffer message to merge into. | 488 message: A protocol buffer message to merge into. |
| 379 allow_unknown_extension: if True, skip over missing extensions and keep | 489 allow_unknown_extension: if True, skip over missing extensions and keep |
| 380 parsing | 490 parsing |
| 381 allow_field_number: if True, both field number and field name are allowed. | 491 allow_field_number: if True, both field number and field name are allowed. |
| 492 descriptor_pool: A DescriptorPool used to resolve Any types. |
| 382 | 493 |
| 383 Returns: | 494 Returns: |
| 384 The same message passed as argument. | 495 The same message passed as argument. |
| 385 | 496 |
| 386 Raises: | 497 Raises: |
| 387 ParseError: On text parsing problems. | 498 ParseError: On text parsing problems. |
| 388 """ | 499 """ |
| 389 parser = _Parser(allow_unknown_extension, allow_field_number) | 500 parser = _Parser(allow_unknown_extension, allow_field_number) |
| 390 return parser.ParseLines(lines, message) | 501 return parser.ParseLines(lines, message) |
| 391 | 502 |
| 392 | 503 |
| 393 def MergeLines(lines, message, allow_unknown_extension=False, | 504 def MergeLines(lines, |
| 394 allow_field_number=False): | 505 message, |
| 395 """Parses an text representation of a protocol message into a message. | 506 allow_unknown_extension=False, |
| 507 allow_field_number=False, |
| 508 descriptor_pool=None): |
| 509 """Parses a text representation of a protocol message into a message. |
| 396 | 510 |
| 397 Args: | 511 Args: |
| 398 lines: An iterable of lines of a message's text representation. | 512 lines: An iterable of lines of a message's text representation. |
| 399 message: A protocol buffer message to merge into. | 513 message: A protocol buffer message to merge into. |
| 400 allow_unknown_extension: if True, skip over missing extensions and keep | 514 allow_unknown_extension: if True, skip over missing extensions and keep |
| 401 parsing | 515 parsing |
| 402 allow_field_number: if True, both field number and field name are allowed. | 516 allow_field_number: if True, both field number and field name are allowed. |
| 403 | 517 |
| 404 Returns: | 518 Returns: |
| 405 The same message passed as argument. | 519 The same message passed as argument. |
| 406 | 520 |
| 407 Raises: | 521 Raises: |
| 408 ParseError: On text parsing problems. | 522 ParseError: On text parsing problems. |
| 409 """ | 523 """ |
| 410 parser = _Parser(allow_unknown_extension, allow_field_number) | 524 parser = _Parser(allow_unknown_extension, |
| 525 allow_field_number, |
| 526 descriptor_pool=descriptor_pool) |
| 411 return parser.MergeLines(lines, message) | 527 return parser.MergeLines(lines, message) |
| 412 | 528 |
| 413 | 529 |
| 414 class _Parser(object): | 530 class _Parser(object): |
| 415 """Text format parser for protocol message.""" | 531 """Text format parser for protocol message.""" |
| 416 | 532 |
| 417 def __init__(self, allow_unknown_extension=False, allow_field_number=False): | 533 def __init__(self, |
| 534 allow_unknown_extension=False, |
| 535 allow_field_number=False, |
| 536 descriptor_pool=None): |
| 418 self.allow_unknown_extension = allow_unknown_extension | 537 self.allow_unknown_extension = allow_unknown_extension |
| 419 self.allow_field_number = allow_field_number | 538 self.allow_field_number = allow_field_number |
| 539 self.descriptor_pool = descriptor_pool |
| 420 | 540 |
| 421 def ParseFromString(self, text, message): | 541 def ParseFromString(self, text, message): |
| 422 """Parses an text representation of a protocol message into a message.""" | 542 """Parses a text representation of a protocol message into a message.""" |
| 423 if not isinstance(text, str): | 543 if not isinstance(text, str): |
| 424 text = text.decode('utf-8') | 544 text = text.decode('utf-8') |
| 425 return self.ParseLines(text.split('\n'), message) | 545 return self.ParseLines(text.split('\n'), message) |
| 426 | 546 |
| 427 def ParseLines(self, lines, message): | 547 def ParseLines(self, lines, message): |
| 428 """Parses an text representation of a protocol message into a message.""" | 548 """Parses a text representation of a protocol message into a message.""" |
| 429 self._allow_multiple_scalars = False | 549 self._allow_multiple_scalars = False |
| 430 self._ParseOrMerge(lines, message) | 550 self._ParseOrMerge(lines, message) |
| 431 return message | 551 return message |
| 432 | 552 |
| 433 def MergeFromString(self, text, message): | 553 def MergeFromString(self, text, message): |
| 434 """Merges an text representation of a protocol message into a message.""" | 554 """Merges a text representation of a protocol message into a message.""" |
| 435 return self._MergeLines(text.split('\n'), message) | 555 return self._MergeLines(text.split('\n'), message) |
| 436 | 556 |
| 437 def MergeLines(self, lines, message): | 557 def MergeLines(self, lines, message): |
| 438 """Merges an text representation of a protocol message into a message.""" | 558 """Merges a text representation of a protocol message into a message.""" |
| 439 self._allow_multiple_scalars = True | 559 self._allow_multiple_scalars = True |
| 440 self._ParseOrMerge(lines, message) | 560 self._ParseOrMerge(lines, message) |
| 441 return message | 561 return message |
| 442 | 562 |
| 443 def _ParseOrMerge(self, lines, message): | 563 def _ParseOrMerge(self, lines, message): |
| 444 """Converts an text representation of a protocol message into a message. | 564 """Converts a text representation of a protocol message into a message. |
| 445 | 565 |
| 446 Args: | 566 Args: |
| 447 lines: Lines of a message's text representation. | 567 lines: Lines of a message's text representation. |
| 448 message: A protocol buffer message to merge into. | 568 message: A protocol buffer message to merge into. |
| 449 | 569 |
| 450 Raises: | 570 Raises: |
| 451 ParseError: On text parsing problems. | 571 ParseError: On text parsing problems. |
| 452 """ | 572 """ |
| 453 tokenizer = _Tokenizer(lines) | 573 tokenizer = Tokenizer(lines) |
| 454 while not tokenizer.AtEnd(): | 574 while not tokenizer.AtEnd(): |
| 455 self._MergeField(tokenizer, message) | 575 self._MergeField(tokenizer, message) |
| 456 | 576 |
| 457 def _MergeField(self, tokenizer, message): | 577 def _MergeField(self, tokenizer, message): |
| 458 """Merges a single protocol message field into a message. | 578 """Merges a single protocol message field into a message. |
| 459 | 579 |
| 460 Args: | 580 Args: |
| 461 tokenizer: A tokenizer to parse the field name and values. | 581 tokenizer: A tokenizer to parse the field name and values. |
| 462 message: A protocol message to record the data. | 582 message: A protocol message to record the data. |
| 463 | 583 |
| (...skipping 20 matching lines...) Expand all Loading... |
| 484 field = message.Extensions._FindExtensionByName(name) | 604 field = message.Extensions._FindExtensionByName(name) |
| 485 # pylint: enable=protected-access | 605 # pylint: enable=protected-access |
| 486 if not field: | 606 if not field: |
| 487 if self.allow_unknown_extension: | 607 if self.allow_unknown_extension: |
| 488 field = None | 608 field = None |
| 489 else: | 609 else: |
| 490 raise tokenizer.ParseErrorPreviousToken( | 610 raise tokenizer.ParseErrorPreviousToken( |
| 491 'Extension "%s" not registered.' % name) | 611 'Extension "%s" not registered.' % name) |
| 492 elif message_descriptor != field.containing_type: | 612 elif message_descriptor != field.containing_type: |
| 493 raise tokenizer.ParseErrorPreviousToken( | 613 raise tokenizer.ParseErrorPreviousToken( |
| 494 'Extension "%s" does not extend message type "%s".' % ( | 614 'Extension "%s" does not extend message type "%s".' % |
| 495 name, message_descriptor.full_name)) | 615 (name, message_descriptor.full_name)) |
| 496 | 616 |
| 497 tokenizer.Consume(']') | 617 tokenizer.Consume(']') |
| 498 | 618 |
| 499 else: | 619 else: |
| 500 name = tokenizer.ConsumeIdentifier() | 620 name = tokenizer.ConsumeIdentifierOrNumber() |
| 501 if self.allow_field_number and name.isdigit(): | 621 if self.allow_field_number and name.isdigit(): |
| 502 number = ParseInteger(name, True, True) | 622 number = ParseInteger(name, True, True) |
| 503 field = message_descriptor.fields_by_number.get(number, None) | 623 field = message_descriptor.fields_by_number.get(number, None) |
| 504 if not field and message_descriptor.is_extendable: | 624 if not field and message_descriptor.is_extendable: |
| 505 field = message.Extensions._FindExtensionByNumber(number) | 625 field = message.Extensions._FindExtensionByNumber(number) |
| 506 else: | 626 else: |
| 507 field = message_descriptor.fields_by_name.get(name, None) | 627 field = message_descriptor.fields_by_name.get(name, None) |
| 508 | 628 |
| 509 # Group names are expected to be capitalized as they appear in the | 629 # Group names are expected to be capitalized as they appear in the |
| 510 # .proto file, which actually matches their type names, not their field | 630 # .proto file, which actually matches their type names, not their field |
| 511 # names. | 631 # names. |
| 512 if not field: | 632 if not field: |
| 513 field = message_descriptor.fields_by_name.get(name.lower(), None) | 633 field = message_descriptor.fields_by_name.get(name.lower(), None) |
| 514 if field and field.type != descriptor.FieldDescriptor.TYPE_GROUP: | 634 if field and field.type != descriptor.FieldDescriptor.TYPE_GROUP: |
| 515 field = None | 635 field = None |
| 516 | 636 |
| 517 if (field and field.type == descriptor.FieldDescriptor.TYPE_GROUP and | 637 if (field and field.type == descriptor.FieldDescriptor.TYPE_GROUP and |
| 518 field.message_type.name != name): | 638 field.message_type.name != name): |
| 519 field = None | 639 field = None |
| 520 | 640 |
| 521 if not field: | 641 if not field: |
| 522 raise tokenizer.ParseErrorPreviousToken( | 642 raise tokenizer.ParseErrorPreviousToken( |
| 523 'Message type "%s" has no field named "%s".' % ( | 643 'Message type "%s" has no field named "%s".' % |
| 524 message_descriptor.full_name, name)) | 644 (message_descriptor.full_name, name)) |
| 525 | 645 |
| 526 if field: | 646 if field: |
| 527 if not self._allow_multiple_scalars and field.containing_oneof: | 647 if not self._allow_multiple_scalars and field.containing_oneof: |
| 528 # Check if there's a different field set in this oneof. | 648 # Check if there's a different field set in this oneof. |
| 529 # Note that we ignore the case if the same field was set before, and we | 649 # Note that we ignore the case if the same field was set before, and we |
| 530 # apply _allow_multiple_scalars to non-scalar fields as well. | 650 # apply _allow_multiple_scalars to non-scalar fields as well. |
| 531 which_oneof = message.WhichOneof(field.containing_oneof.name) | 651 which_oneof = message.WhichOneof(field.containing_oneof.name) |
| 532 if which_oneof is not None and which_oneof != field.name: | 652 if which_oneof is not None and which_oneof != field.name: |
| 533 raise tokenizer.ParseErrorPreviousToken( | 653 raise tokenizer.ParseErrorPreviousToken( |
| 534 'Field "%s" is specified along with field "%s", another member ' | 654 'Field "%s" is specified along with field "%s", another member ' |
| 535 'of oneof "%s" for message type "%s".' % ( | 655 'of oneof "%s" for message type "%s".' % |
| 536 field.name, which_oneof, field.containing_oneof.name, | 656 (field.name, which_oneof, field.containing_oneof.name, |
| 537 message_descriptor.full_name)) | 657 message_descriptor.full_name)) |
| 538 | 658 |
| 539 if field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_MESSAGE: | 659 if field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_MESSAGE: |
| 540 tokenizer.TryConsume(':') | 660 tokenizer.TryConsume(':') |
| 541 merger = self._MergeMessageField | 661 merger = self._MergeMessageField |
| 542 else: | 662 else: |
| 543 tokenizer.Consume(':') | 663 tokenizer.Consume(':') |
| 544 merger = self._MergeScalarField | 664 merger = self._MergeScalarField |
| 545 | 665 |
| 546 if (field.label == descriptor.FieldDescriptor.LABEL_REPEATED | 666 if (field.label == descriptor.FieldDescriptor.LABEL_REPEATED and |
| 547 and tokenizer.TryConsume('[')): | 667 tokenizer.TryConsume('[')): |
| 548 # Short repeated format, e.g. "foo: [1, 2, 3]" | 668 # Short repeated format, e.g. "foo: [1, 2, 3]" |
| 549 while True: | 669 while True: |
| 550 merger(tokenizer, message, field) | 670 merger(tokenizer, message, field) |
| 551 if tokenizer.TryConsume(']'): break | 671 if tokenizer.TryConsume(']'): |
| 672 break |
| 552 tokenizer.Consume(',') | 673 tokenizer.Consume(',') |
| 553 | 674 |
| 554 else: | 675 else: |
| 555 merger(tokenizer, message, field) | 676 merger(tokenizer, message, field) |
| 556 | 677 |
| 557 else: # Proto field is unknown. | 678 else: # Proto field is unknown. |
| 558 assert self.allow_unknown_extension | 679 assert self.allow_unknown_extension |
| 559 _SkipFieldContents(tokenizer) | 680 _SkipFieldContents(tokenizer) |
| 560 | 681 |
| 561 # For historical reasons, fields may optionally be separated by commas or | 682 # For historical reasons, fields may optionally be separated by commas or |
| 562 # semicolons. | 683 # semicolons. |
| 563 if not tokenizer.TryConsume(','): | 684 if not tokenizer.TryConsume(','): |
| 564 tokenizer.TryConsume(';') | 685 tokenizer.TryConsume(';') |
| 565 | 686 |
| 687 def _ConsumeAnyTypeUrl(self, tokenizer): |
| 688 """Consumes a google.protobuf.Any type URL and returns the type name.""" |
| 689 # Consume "type.googleapis.com/". |
| 690 tokenizer.ConsumeIdentifier() |
| 691 tokenizer.Consume('.') |
| 692 tokenizer.ConsumeIdentifier() |
| 693 tokenizer.Consume('.') |
| 694 tokenizer.ConsumeIdentifier() |
| 695 tokenizer.Consume('/') |
| 696 # Consume the fully-qualified type name. |
| 697 name = [tokenizer.ConsumeIdentifier()] |
| 698 while tokenizer.TryConsume('.'): |
| 699 name.append(tokenizer.ConsumeIdentifier()) |
| 700 return '.'.join(name) |
| 701 |
| 566 def _MergeMessageField(self, tokenizer, message, field): | 702 def _MergeMessageField(self, tokenizer, message, field): |
| 567 """Merges a single scalar field into a message. | 703 """Merges a single scalar field into a message. |
| 568 | 704 |
| 569 Args: | 705 Args: |
| 570 tokenizer: A tokenizer to parse the field value. | 706 tokenizer: A tokenizer to parse the field value. |
| 571 message: The message of which field is a member. | 707 message: The message of which field is a member. |
| 572 field: The descriptor of the field to be merged. | 708 field: The descriptor of the field to be merged. |
| 573 | 709 |
| 574 Raises: | 710 Raises: |
| 575 ParseError: In case of text parsing problems. | 711 ParseError: In case of text parsing problems. |
| 576 """ | 712 """ |
| 577 is_map_entry = _IsMapEntry(field) | 713 is_map_entry = _IsMapEntry(field) |
| 578 | 714 |
| 579 if tokenizer.TryConsume('<'): | 715 if tokenizer.TryConsume('<'): |
| 580 end_token = '>' | 716 end_token = '>' |
| 581 else: | 717 else: |
| 582 tokenizer.Consume('{') | 718 tokenizer.Consume('{') |
| 583 end_token = '}' | 719 end_token = '}' |
| 584 | 720 |
| 585 if field.label == descriptor.FieldDescriptor.LABEL_REPEATED: | 721 if (field.message_type.full_name == _ANY_FULL_TYPE_NAME and |
| 722 tokenizer.TryConsume('[')): |
| 723 packed_type_name = self._ConsumeAnyTypeUrl(tokenizer) |
| 724 tokenizer.Consume(']') |
| 725 tokenizer.TryConsume(':') |
| 726 if tokenizer.TryConsume('<'): |
| 727 expanded_any_end_token = '>' |
| 728 else: |
| 729 tokenizer.Consume('{') |
| 730 expanded_any_end_token = '}' |
| 731 if not self.descriptor_pool: |
| 732 raise ParseError('Descriptor pool required to parse expanded Any field') |
| 733 expanded_any_sub_message = _BuildMessageFromTypeName(packed_type_name, |
| 734 self.descriptor_pool) |
| 735 if not expanded_any_sub_message: |
| 736 raise ParseError('Type %s not found in descriptor pool' % |
| 737 packed_type_name) |
| 738 while not tokenizer.TryConsume(expanded_any_end_token): |
| 739 if tokenizer.AtEnd(): |
| 740 raise tokenizer.ParseErrorPreviousToken('Expected "%s".' % |
| 741 (expanded_any_end_token,)) |
| 742 self._MergeField(tokenizer, expanded_any_sub_message) |
| 743 if field.label == descriptor.FieldDescriptor.LABEL_REPEATED: |
| 744 any_message = getattr(message, field.name).add() |
| 745 else: |
| 746 any_message = getattr(message, field.name) |
| 747 any_message.Pack(expanded_any_sub_message) |
| 748 elif field.label == descriptor.FieldDescriptor.LABEL_REPEATED: |
| 586 if field.is_extension: | 749 if field.is_extension: |
| 587 sub_message = message.Extensions[field].add() | 750 sub_message = message.Extensions[field].add() |
| 588 elif is_map_entry: | 751 elif is_map_entry: |
| 589 # pylint: disable=protected-access | 752 # pylint: disable=protected-access |
| 590 sub_message = field.message_type._concrete_class() | 753 sub_message = field.message_type._concrete_class() |
| 591 else: | 754 else: |
| 592 sub_message = getattr(message, field.name).add() | 755 sub_message = getattr(message, field.name).add() |
| 593 else: | 756 else: |
| 594 if field.is_extension: | 757 if field.is_extension: |
| 595 sub_message = message.Extensions[field] | 758 sub_message = message.Extensions[field] |
| (...skipping 25 matching lines...) Expand all Loading... |
| 621 Raises: | 784 Raises: |
| 622 ParseError: In case of text parsing problems. | 785 ParseError: In case of text parsing problems. |
| 623 RuntimeError: On runtime errors. | 786 RuntimeError: On runtime errors. |
| 624 """ | 787 """ |
| 625 _ = self.allow_unknown_extension | 788 _ = self.allow_unknown_extension |
| 626 value = None | 789 value = None |
| 627 | 790 |
| 628 if field.type in (descriptor.FieldDescriptor.TYPE_INT32, | 791 if field.type in (descriptor.FieldDescriptor.TYPE_INT32, |
| 629 descriptor.FieldDescriptor.TYPE_SINT32, | 792 descriptor.FieldDescriptor.TYPE_SINT32, |
| 630 descriptor.FieldDescriptor.TYPE_SFIXED32): | 793 descriptor.FieldDescriptor.TYPE_SFIXED32): |
| 631 value = tokenizer.ConsumeInt32() | 794 value = _ConsumeInt32(tokenizer) |
| 632 elif field.type in (descriptor.FieldDescriptor.TYPE_INT64, | 795 elif field.type in (descriptor.FieldDescriptor.TYPE_INT64, |
| 633 descriptor.FieldDescriptor.TYPE_SINT64, | 796 descriptor.FieldDescriptor.TYPE_SINT64, |
| 634 descriptor.FieldDescriptor.TYPE_SFIXED64): | 797 descriptor.FieldDescriptor.TYPE_SFIXED64): |
| 635 value = tokenizer.ConsumeInt64() | 798 value = _ConsumeInt64(tokenizer) |
| 636 elif field.type in (descriptor.FieldDescriptor.TYPE_UINT32, | 799 elif field.type in (descriptor.FieldDescriptor.TYPE_UINT32, |
| 637 descriptor.FieldDescriptor.TYPE_FIXED32): | 800 descriptor.FieldDescriptor.TYPE_FIXED32): |
| 638 value = tokenizer.ConsumeUint32() | 801 value = _ConsumeUint32(tokenizer) |
| 639 elif field.type in (descriptor.FieldDescriptor.TYPE_UINT64, | 802 elif field.type in (descriptor.FieldDescriptor.TYPE_UINT64, |
| 640 descriptor.FieldDescriptor.TYPE_FIXED64): | 803 descriptor.FieldDescriptor.TYPE_FIXED64): |
| 641 value = tokenizer.ConsumeUint64() | 804 value = _ConsumeUint64(tokenizer) |
| 642 elif field.type in (descriptor.FieldDescriptor.TYPE_FLOAT, | 805 elif field.type in (descriptor.FieldDescriptor.TYPE_FLOAT, |
| 643 descriptor.FieldDescriptor.TYPE_DOUBLE): | 806 descriptor.FieldDescriptor.TYPE_DOUBLE): |
| 644 value = tokenizer.ConsumeFloat() | 807 value = tokenizer.ConsumeFloat() |
| 645 elif field.type == descriptor.FieldDescriptor.TYPE_BOOL: | 808 elif field.type == descriptor.FieldDescriptor.TYPE_BOOL: |
| 646 value = tokenizer.ConsumeBool() | 809 value = tokenizer.ConsumeBool() |
| 647 elif field.type == descriptor.FieldDescriptor.TYPE_STRING: | 810 elif field.type == descriptor.FieldDescriptor.TYPE_STRING: |
| 648 value = tokenizer.ConsumeString() | 811 value = tokenizer.ConsumeString() |
| 649 elif field.type == descriptor.FieldDescriptor.TYPE_BYTES: | 812 elif field.type == descriptor.FieldDescriptor.TYPE_BYTES: |
| 650 value = tokenizer.ConsumeByteString() | 813 value = tokenizer.ConsumeByteString() |
| 651 elif field.type == descriptor.FieldDescriptor.TYPE_ENUM: | 814 elif field.type == descriptor.FieldDescriptor.TYPE_ENUM: |
| (...skipping 94 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 746 ParseError: In case an invalid field value is found. | 909 ParseError: In case an invalid field value is found. |
| 747 """ | 910 """ |
| 748 # String/bytes tokens can come in multiple adjacent string literals. | 911 # String/bytes tokens can come in multiple adjacent string literals. |
| 749 # If we can consume one, consume as many as we can. | 912 # If we can consume one, consume as many as we can. |
| 750 if tokenizer.TryConsumeByteString(): | 913 if tokenizer.TryConsumeByteString(): |
| 751 while tokenizer.TryConsumeByteString(): | 914 while tokenizer.TryConsumeByteString(): |
| 752 pass | 915 pass |
| 753 return | 916 return |
| 754 | 917 |
| 755 if (not tokenizer.TryConsumeIdentifier() and | 918 if (not tokenizer.TryConsumeIdentifier() and |
| 756 not tokenizer.TryConsumeInt64() and | 919 not _TryConsumeInt64(tokenizer) and not _TryConsumeUint64(tokenizer) and |
| 757 not tokenizer.TryConsumeUint64() and | |
| 758 not tokenizer.TryConsumeFloat()): | 920 not tokenizer.TryConsumeFloat()): |
| 759 raise ParseError('Invalid field value: ' + tokenizer.token) | 921 raise ParseError('Invalid field value: ' + tokenizer.token) |
| 760 | 922 |
| 761 | 923 |
| 762 class _Tokenizer(object): | 924 class Tokenizer(object): |
| 763 """Protocol buffer text representation tokenizer. | 925 """Protocol buffer text representation tokenizer. |
| 764 | 926 |
| 765 This class handles the lower level string parsing by splitting it into | 927 This class handles the lower level string parsing by splitting it into |
| 766 meaningful tokens. | 928 meaningful tokens. |
| 767 | 929 |
| 768 It was directly ported from the Java protocol buffer API. | 930 It was directly ported from the Java protocol buffer API. |
| 769 """ | 931 """ |
| 770 | 932 |
| 771 _WHITESPACE = re.compile('(\\s|(#.*$))+', re.MULTILINE) | 933 _WHITESPACE = re.compile(r'\s+') |
| 934 _COMMENT = re.compile(r'(\s*#.*$)', re.MULTILINE) |
| 935 _WHITESPACE_OR_COMMENT = re.compile(r'(\s|(#.*$))+', re.MULTILINE) |
| 772 _TOKEN = re.compile('|'.join([ | 936 _TOKEN = re.compile('|'.join([ |
| 773 r'[a-zA-Z_][0-9a-zA-Z_+-]*', # an identifier | 937 r'[a-zA-Z_][0-9a-zA-Z_+-]*', # an identifier |
| 774 r'([0-9+-]|(\.[0-9]))[0-9a-zA-Z_.+-]*', # a number | 938 r'([0-9+-]|(\.[0-9]))[0-9a-zA-Z_.+-]*', # a number |
| 775 ] + [ # quoted str for each quote mark | 939 ] + [ # quoted str for each quote mark |
| 776 r'{qt}([^{qt}\n\\]|\\.)*({qt}|\\?$)'.format(qt=mark) for mark in _QUOTES | 940 r'{qt}([^{qt}\n\\]|\\.)*({qt}|\\?$)'.format(qt=mark) for mark in _QUOTES |
| 777 ])) | 941 ])) |
| 778 | 942 |
| 779 _IDENTIFIER = re.compile(r'\w+') | 943 _IDENTIFIER = re.compile(r'[^\d\W]\w*') |
| 944 _IDENTIFIER_OR_NUMBER = re.compile(r'\w+') |
| 780 | 945 |
| 781 def __init__(self, lines): | 946 def __init__(self, lines, skip_comments=True): |
| 782 self._position = 0 | 947 self._position = 0 |
| 783 self._line = -1 | 948 self._line = -1 |
| 784 self._column = 0 | 949 self._column = 0 |
| 785 self._token_start = None | 950 self._token_start = None |
| 786 self.token = '' | 951 self.token = '' |
| 787 self._lines = iter(lines) | 952 self._lines = iter(lines) |
| 788 self._current_line = '' | 953 self._current_line = '' |
| 789 self._previous_line = 0 | 954 self._previous_line = 0 |
| 790 self._previous_column = 0 | 955 self._previous_column = 0 |
| 791 self._more_lines = True | 956 self._more_lines = True |
| 957 self._skip_comments = skip_comments |
| 958 self._whitespace_pattern = (skip_comments and self._WHITESPACE_OR_COMMENT |
| 959 or self._WHITESPACE) |
| 792 self._SkipWhitespace() | 960 self._SkipWhitespace() |
| 793 self.NextToken() | 961 self.NextToken() |
| 794 | 962 |
| 795 def LookingAt(self, token): | 963 def LookingAt(self, token): |
| 796 return self.token == token | 964 return self.token == token |
| 797 | 965 |
| 798 def AtEnd(self): | 966 def AtEnd(self): |
| 799 """Checks the end of the text was reached. | 967 """Checks the end of the text was reached. |
| 800 | 968 |
| 801 Returns: | 969 Returns: |
| 802 True iff the end was reached. | 970 True iff the end was reached. |
| 803 """ | 971 """ |
| 804 return not self.token | 972 return not self.token |
| 805 | 973 |
| 806 def _PopLine(self): | 974 def _PopLine(self): |
| 807 while len(self._current_line) <= self._column: | 975 while len(self._current_line) <= self._column: |
| 808 try: | 976 try: |
| 809 self._current_line = next(self._lines) | 977 self._current_line = next(self._lines) |
| 810 except StopIteration: | 978 except StopIteration: |
| 811 self._current_line = '' | 979 self._current_line = '' |
| 812 self._more_lines = False | 980 self._more_lines = False |
| 813 return | 981 return |
| 814 else: | 982 else: |
| 815 self._line += 1 | 983 self._line += 1 |
| 816 self._column = 0 | 984 self._column = 0 |
| 817 | 985 |
| 818 def _SkipWhitespace(self): | 986 def _SkipWhitespace(self): |
| 819 while True: | 987 while True: |
| 820 self._PopLine() | 988 self._PopLine() |
| 821 match = self._WHITESPACE.match(self._current_line, self._column) | 989 match = self._whitespace_pattern.match(self._current_line, self._column) |
| 822 if not match: | 990 if not match: |
| 823 break | 991 break |
| 824 length = len(match.group(0)) | 992 length = len(match.group(0)) |
| 825 self._column += length | 993 self._column += length |
| 826 | 994 |
| 827 def TryConsume(self, token): | 995 def TryConsume(self, token): |
| 828 """Tries to consume a given piece of text. | 996 """Tries to consume a given piece of text. |
| 829 | 997 |
| 830 Args: | 998 Args: |
| 831 token: Text to consume. | 999 token: Text to consume. |
| 832 | 1000 |
| 833 Returns: | 1001 Returns: |
| 834 True iff the text was consumed. | 1002 True iff the text was consumed. |
| 835 """ | 1003 """ |
| 836 if self.token == token: | 1004 if self.token == token: |
| 837 self.NextToken() | 1005 self.NextToken() |
| 838 return True | 1006 return True |
| 839 return False | 1007 return False |
| 840 | 1008 |
| 841 def Consume(self, token): | 1009 def Consume(self, token): |
| 842 """Consumes a piece of text. | 1010 """Consumes a piece of text. |
| 843 | 1011 |
| 844 Args: | 1012 Args: |
| 845 token: Text to consume. | 1013 token: Text to consume. |
| 846 | 1014 |
| 847 Raises: | 1015 Raises: |
| 848 ParseError: If the text couldn't be consumed. | 1016 ParseError: If the text couldn't be consumed. |
| 849 """ | 1017 """ |
| 850 if not self.TryConsume(token): | 1018 if not self.TryConsume(token): |
| 851 raise self._ParseError('Expected "%s".' % token) | 1019 raise self.ParseError('Expected "%s".' % token) |
| 1020 |
| 1021 def ConsumeComment(self): |
| 1022 result = self.token |
| 1023 if not self._COMMENT.match(result): |
| 1024 raise self.ParseError('Expected comment.') |
| 1025 self.NextToken() |
| 1026 return result |
| 852 | 1027 |
| 853 def TryConsumeIdentifier(self): | 1028 def TryConsumeIdentifier(self): |
| 854 try: | 1029 try: |
| 855 self.ConsumeIdentifier() | 1030 self.ConsumeIdentifier() |
| 856 return True | 1031 return True |
| 857 except ParseError: | 1032 except ParseError: |
| 858 return False | 1033 return False |
| 859 | 1034 |
| 860 def ConsumeIdentifier(self): | 1035 def ConsumeIdentifier(self): |
| 861 """Consumes protocol message field identifier. | 1036 """Consumes protocol message field identifier. |
| 862 | 1037 |
| 863 Returns: | 1038 Returns: |
| 864 Identifier string. | 1039 Identifier string. |
| 865 | 1040 |
| 866 Raises: | 1041 Raises: |
| 867 ParseError: If an identifier couldn't be consumed. | 1042 ParseError: If an identifier couldn't be consumed. |
| 868 """ | 1043 """ |
| 869 result = self.token | 1044 result = self.token |
| 870 if not self._IDENTIFIER.match(result): | 1045 if not self._IDENTIFIER.match(result): |
| 871 raise self._ParseError('Expected identifier.') | 1046 raise self.ParseError('Expected identifier.') |
| 872 self.NextToken() | 1047 self.NextToken() |
| 873 return result | 1048 return result |
| 874 | 1049 |
| 875 def ConsumeInt32(self): | 1050 def TryConsumeIdentifierOrNumber(self): |
| 876 """Consumes a signed 32bit integer number. | 1051 try: |
| 1052 self.ConsumeIdentifierOrNumber() |
| 1053 return True |
| 1054 except ParseError: |
| 1055 return False |
| 877 | 1056 |
| 1057 def ConsumeIdentifierOrNumber(self): |
| 1058 """Consumes protocol message field identifier. |
| 1059 |
| 1060 Returns: |
| 1061 Identifier string. |
| 1062 |
| 1063 Raises: |
| 1064 ParseError: If an identifier couldn't be consumed. |
| 1065 """ |
| 1066 result = self.token |
| 1067 if not self._IDENTIFIER_OR_NUMBER.match(result): |
| 1068 raise self.ParseError('Expected identifier or number.') |
| 1069 self.NextToken() |
| 1070 return result |
| 1071 |
| 1072 def TryConsumeInteger(self): |
| 1073 try: |
| 1074 # Note: is_long only affects value type, not whether an error is raised. |
| 1075 self.ConsumeInteger() |
| 1076 return True |
| 1077 except ParseError: |
| 1078 return False |
| 1079 |
| 1080 def ConsumeInteger(self, is_long=False): |
| 1081 """Consumes an integer number. |
| 1082 |
| 1083 Args: |
| 1084 is_long: True if the value should be returned as a long integer. |
| 878 Returns: | 1085 Returns: |
| 879 The integer parsed. | 1086 The integer parsed. |
| 880 | 1087 |
| 881 Raises: | 1088 Raises: |
| 882 ParseError: If a signed 32bit integer couldn't be consumed. | 1089 ParseError: If an integer couldn't be consumed. |
| 883 """ | 1090 """ |
| 884 try: | 1091 try: |
| 885 result = ParseInteger(self.token, is_signed=True, is_long=False) | 1092 result = _ParseAbstractInteger(self.token, is_long=is_long) |
| 886 except ValueError as e: | 1093 except ValueError as e: |
| 887 raise self._ParseError(str(e)) | 1094 raise self.ParseError(str(e)) |
| 888 self.NextToken() | 1095 self.NextToken() |
| 889 return result | 1096 return result |
| 890 | 1097 |
| 891 def ConsumeUint32(self): | |
| 892 """Consumes an unsigned 32bit integer number. | |
| 893 | |
| 894 Returns: | |
| 895 The integer parsed. | |
| 896 | |
| 897 Raises: | |
| 898 ParseError: If an unsigned 32bit integer couldn't be consumed. | |
| 899 """ | |
| 900 try: | |
| 901 result = ParseInteger(self.token, is_signed=False, is_long=False) | |
| 902 except ValueError as e: | |
| 903 raise self._ParseError(str(e)) | |
| 904 self.NextToken() | |
| 905 return result | |
| 906 | |
| 907 def TryConsumeInt64(self): | |
| 908 try: | |
| 909 self.ConsumeInt64() | |
| 910 return True | |
| 911 except ParseError: | |
| 912 return False | |
| 913 | |
| 914 def ConsumeInt64(self): | |
| 915 """Consumes a signed 64bit integer number. | |
| 916 | |
| 917 Returns: | |
| 918 The integer parsed. | |
| 919 | |
| 920 Raises: | |
| 921 ParseError: If a signed 64bit integer couldn't be consumed. | |
| 922 """ | |
| 923 try: | |
| 924 result = ParseInteger(self.token, is_signed=True, is_long=True) | |
| 925 except ValueError as e: | |
| 926 raise self._ParseError(str(e)) | |
| 927 self.NextToken() | |
| 928 return result | |
| 929 | |
| 930 def TryConsumeUint64(self): | |
| 931 try: | |
| 932 self.ConsumeUint64() | |
| 933 return True | |
| 934 except ParseError: | |
| 935 return False | |
| 936 | |
| 937 def ConsumeUint64(self): | |
| 938 """Consumes an unsigned 64bit integer number. | |
| 939 | |
| 940 Returns: | |
| 941 The integer parsed. | |
| 942 | |
| 943 Raises: | |
| 944 ParseError: If an unsigned 64bit integer couldn't be consumed. | |
| 945 """ | |
| 946 try: | |
| 947 result = ParseInteger(self.token, is_signed=False, is_long=True) | |
| 948 except ValueError as e: | |
| 949 raise self._ParseError(str(e)) | |
| 950 self.NextToken() | |
| 951 return result | |
| 952 | |
| 953 def TryConsumeFloat(self): | 1098 def TryConsumeFloat(self): |
| 954 try: | 1099 try: |
| 955 self.ConsumeFloat() | 1100 self.ConsumeFloat() |
| 956 return True | 1101 return True |
| 957 except ParseError: | 1102 except ParseError: |
| 958 return False | 1103 return False |
| 959 | 1104 |
| 960 def ConsumeFloat(self): | 1105 def ConsumeFloat(self): |
| 961 """Consumes an floating point number. | 1106 """Consumes an floating point number. |
| 962 | 1107 |
| 963 Returns: | 1108 Returns: |
| 964 The number parsed. | 1109 The number parsed. |
| 965 | 1110 |
| 966 Raises: | 1111 Raises: |
| 967 ParseError: If a floating point number couldn't be consumed. | 1112 ParseError: If a floating point number couldn't be consumed. |
| 968 """ | 1113 """ |
| 969 try: | 1114 try: |
| 970 result = ParseFloat(self.token) | 1115 result = ParseFloat(self.token) |
| 971 except ValueError as e: | 1116 except ValueError as e: |
| 972 raise self._ParseError(str(e)) | 1117 raise self.ParseError(str(e)) |
| 973 self.NextToken() | 1118 self.NextToken() |
| 974 return result | 1119 return result |
| 975 | 1120 |
| 976 def ConsumeBool(self): | 1121 def ConsumeBool(self): |
| 977 """Consumes a boolean value. | 1122 """Consumes a boolean value. |
| 978 | 1123 |
| 979 Returns: | 1124 Returns: |
| 980 The bool parsed. | 1125 The bool parsed. |
| 981 | 1126 |
| 982 Raises: | 1127 Raises: |
| 983 ParseError: If a boolean value couldn't be consumed. | 1128 ParseError: If a boolean value couldn't be consumed. |
| 984 """ | 1129 """ |
| 985 try: | 1130 try: |
| 986 result = ParseBool(self.token) | 1131 result = ParseBool(self.token) |
| 987 except ValueError as e: | 1132 except ValueError as e: |
| 988 raise self._ParseError(str(e)) | 1133 raise self.ParseError(str(e)) |
| 989 self.NextToken() | 1134 self.NextToken() |
| 990 return result | 1135 return result |
| 991 | 1136 |
| 992 def TryConsumeByteString(self): | 1137 def TryConsumeByteString(self): |
| 993 try: | 1138 try: |
| 994 self.ConsumeByteString() | 1139 self.ConsumeByteString() |
| 995 return True | 1140 return True |
| 996 except ParseError: | 1141 except ParseError: |
| 997 return False | 1142 return False |
| 998 | 1143 |
| (...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1032 tokens which are automatically concatenated, like in C or Python. This | 1177 tokens which are automatically concatenated, like in C or Python. This |
| 1033 method only consumes one token. | 1178 method only consumes one token. |
| 1034 | 1179 |
| 1035 Returns: | 1180 Returns: |
| 1036 The token parsed. | 1181 The token parsed. |
| 1037 Raises: | 1182 Raises: |
| 1038 ParseError: When the wrong format data is found. | 1183 ParseError: When the wrong format data is found. |
| 1039 """ | 1184 """ |
| 1040 text = self.token | 1185 text = self.token |
| 1041 if len(text) < 1 or text[0] not in _QUOTES: | 1186 if len(text) < 1 or text[0] not in _QUOTES: |
| 1042 raise self._ParseError('Expected string but found: %r' % (text,)) | 1187 raise self.ParseError('Expected string but found: %r' % (text,)) |
| 1043 | 1188 |
| 1044 if len(text) < 2 or text[-1] != text[0]: | 1189 if len(text) < 2 or text[-1] != text[0]: |
| 1045 raise self._ParseError('String missing ending quote: %r' % (text,)) | 1190 raise self.ParseError('String missing ending quote: %r' % (text,)) |
| 1046 | 1191 |
| 1047 try: | 1192 try: |
| 1048 result = text_encoding.CUnescape(text[1:-1]) | 1193 result = text_encoding.CUnescape(text[1:-1]) |
| 1049 except ValueError as e: | 1194 except ValueError as e: |
| 1050 raise self._ParseError(str(e)) | 1195 raise self.ParseError(str(e)) |
| 1051 self.NextToken() | 1196 self.NextToken() |
| 1052 return result | 1197 return result |
| 1053 | 1198 |
| 1054 def ConsumeEnum(self, field): | 1199 def ConsumeEnum(self, field): |
| 1055 try: | 1200 try: |
| 1056 result = ParseEnum(field, self.token) | 1201 result = ParseEnum(field, self.token) |
| 1057 except ValueError as e: | 1202 except ValueError as e: |
| 1058 raise self._ParseError(str(e)) | 1203 raise self.ParseError(str(e)) |
| 1059 self.NextToken() | 1204 self.NextToken() |
| 1060 return result | 1205 return result |
| 1061 | 1206 |
| 1062 def ParseErrorPreviousToken(self, message): | 1207 def ParseErrorPreviousToken(self, message): |
| 1063 """Creates and *returns* a ParseError for the previously read token. | 1208 """Creates and *returns* a ParseError for the previously read token. |
| 1064 | 1209 |
| 1065 Args: | 1210 Args: |
| 1066 message: A message to set for the exception. | 1211 message: A message to set for the exception. |
| 1067 | 1212 |
| 1068 Returns: | 1213 Returns: |
| 1069 A ParseError instance. | 1214 A ParseError instance. |
| 1070 """ | 1215 """ |
| 1071 return ParseError('%d:%d : %s' % ( | 1216 return ParseError(message, self._previous_line + 1, |
| 1072 self._previous_line + 1, self._previous_column + 1, message)) | 1217 self._previous_column + 1) |
| 1073 | 1218 |
| 1074 def _ParseError(self, message): | 1219 def ParseError(self, message): |
| 1075 """Creates and *returns* a ParseError for the current token.""" | 1220 """Creates and *returns* a ParseError for the current token.""" |
| 1076 return ParseError('%d:%d : %s' % ( | 1221 return ParseError(message, self._line + 1, self._column + 1) |
| 1077 self._line + 1, self._column + 1, message)) | |
| 1078 | 1222 |
| 1079 def _StringParseError(self, e): | 1223 def _StringParseError(self, e): |
| 1080 return self._ParseError('Couldn\'t parse string: ' + str(e)) | 1224 return self.ParseError('Couldn\'t parse string: ' + str(e)) |
| 1081 | 1225 |
| 1082 def NextToken(self): | 1226 def NextToken(self): |
| 1083 """Reads the next meaningful token.""" | 1227 """Reads the next meaningful token.""" |
| 1084 self._previous_line = self._line | 1228 self._previous_line = self._line |
| 1085 self._previous_column = self._column | 1229 self._previous_column = self._column |
| 1086 | 1230 |
| 1087 self._column += len(self.token) | 1231 self._column += len(self.token) |
| 1088 self._SkipWhitespace() | 1232 self._SkipWhitespace() |
| 1089 | 1233 |
| 1090 if not self._more_lines: | 1234 if not self._more_lines: |
| 1091 self.token = '' | 1235 self.token = '' |
| 1092 return | 1236 return |
| 1093 | 1237 |
| 1094 match = self._TOKEN.match(self._current_line, self._column) | 1238 match = self._TOKEN.match(self._current_line, self._column) |
| 1239 if not match and not self._skip_comments: |
| 1240 match = self._COMMENT.match(self._current_line, self._column) |
| 1095 if match: | 1241 if match: |
| 1096 token = match.group(0) | 1242 token = match.group(0) |
| 1097 self.token = token | 1243 self.token = token |
| 1098 else: | 1244 else: |
| 1099 self.token = self._current_line[self._column] | 1245 self.token = self._current_line[self._column] |
| 1100 | 1246 |
| 1247 # Aliased so it can still be accessed by current visibility violators. |
| 1248 # TODO(dbarnett): Migrate violators to textformat_tokenizer. |
| 1249 _Tokenizer = Tokenizer # pylint: disable=invalid-name |
| 1250 |
| 1251 |
| 1252 def _ConsumeInt32(tokenizer): |
| 1253 """Consumes a signed 32bit integer number from tokenizer. |
| 1254 |
| 1255 Args: |
| 1256 tokenizer: A tokenizer used to parse the number. |
| 1257 |
| 1258 Returns: |
| 1259 The integer parsed. |
| 1260 |
| 1261 Raises: |
| 1262 ParseError: If a signed 32bit integer couldn't be consumed. |
| 1263 """ |
| 1264 return _ConsumeInteger(tokenizer, is_signed=True, is_long=False) |
| 1265 |
| 1266 |
| 1267 def _ConsumeUint32(tokenizer): |
| 1268 """Consumes an unsigned 32bit integer number from tokenizer. |
| 1269 |
| 1270 Args: |
| 1271 tokenizer: A tokenizer used to parse the number. |
| 1272 |
| 1273 Returns: |
| 1274 The integer parsed. |
| 1275 |
| 1276 Raises: |
| 1277 ParseError: If an unsigned 32bit integer couldn't be consumed. |
| 1278 """ |
| 1279 return _ConsumeInteger(tokenizer, is_signed=False, is_long=False) |
| 1280 |
| 1281 |
| 1282 def _TryConsumeInt64(tokenizer): |
| 1283 try: |
| 1284 _ConsumeInt64(tokenizer) |
| 1285 return True |
| 1286 except ParseError: |
| 1287 return False |
| 1288 |
| 1289 |
| 1290 def _ConsumeInt64(tokenizer): |
| 1291 """Consumes a signed 32bit integer number from tokenizer. |
| 1292 |
| 1293 Args: |
| 1294 tokenizer: A tokenizer used to parse the number. |
| 1295 |
| 1296 Returns: |
| 1297 The integer parsed. |
| 1298 |
| 1299 Raises: |
| 1300 ParseError: If a signed 32bit integer couldn't be consumed. |
| 1301 """ |
| 1302 return _ConsumeInteger(tokenizer, is_signed=True, is_long=True) |
| 1303 |
| 1304 |
| 1305 def _TryConsumeUint64(tokenizer): |
| 1306 try: |
| 1307 _ConsumeUint64(tokenizer) |
| 1308 return True |
| 1309 except ParseError: |
| 1310 return False |
| 1311 |
| 1312 |
| 1313 def _ConsumeUint64(tokenizer): |
| 1314 """Consumes an unsigned 64bit integer number from tokenizer. |
| 1315 |
| 1316 Args: |
| 1317 tokenizer: A tokenizer used to parse the number. |
| 1318 |
| 1319 Returns: |
| 1320 The integer parsed. |
| 1321 |
| 1322 Raises: |
| 1323 ParseError: If an unsigned 64bit integer couldn't be consumed. |
| 1324 """ |
| 1325 return _ConsumeInteger(tokenizer, is_signed=False, is_long=True) |
| 1326 |
| 1327 |
| 1328 def _TryConsumeInteger(tokenizer, is_signed=False, is_long=False): |
| 1329 try: |
| 1330 _ConsumeInteger(tokenizer, is_signed=is_signed, is_long=is_long) |
| 1331 return True |
| 1332 except ParseError: |
| 1333 return False |
| 1334 |
| 1335 |
| 1336 def _ConsumeInteger(tokenizer, is_signed=False, is_long=False): |
| 1337 """Consumes an integer number from tokenizer. |
| 1338 |
| 1339 Args: |
| 1340 tokenizer: A tokenizer used to parse the number. |
| 1341 is_signed: True if a signed integer must be parsed. |
| 1342 is_long: True if a long integer must be parsed. |
| 1343 |
| 1344 Returns: |
| 1345 The integer parsed. |
| 1346 |
| 1347 Raises: |
| 1348 ParseError: If an integer with given characteristics couldn't be consumed. |
| 1349 """ |
| 1350 try: |
| 1351 result = ParseInteger(tokenizer.token, is_signed=is_signed, is_long=is_long) |
| 1352 except ValueError as e: |
| 1353 raise tokenizer.ParseError(str(e)) |
| 1354 tokenizer.NextToken() |
| 1355 return result |
| 1356 |
| 1101 | 1357 |
| 1102 def ParseInteger(text, is_signed=False, is_long=False): | 1358 def ParseInteger(text, is_signed=False, is_long=False): |
| 1103 """Parses an integer. | 1359 """Parses an integer. |
| 1104 | 1360 |
| 1105 Args: | 1361 Args: |
| 1106 text: The text to parse. | 1362 text: The text to parse. |
| 1107 is_signed: True if a signed integer must be parsed. | 1363 is_signed: True if a signed integer must be parsed. |
| 1108 is_long: True if a long integer must be parsed. | 1364 is_long: True if a long integer must be parsed. |
| 1109 | 1365 |
| 1110 Returns: | 1366 Returns: |
| 1111 The integer value. | 1367 The integer value. |
| 1112 | 1368 |
| 1113 Raises: | 1369 Raises: |
| 1114 ValueError: Thrown Iff the text is not a valid integer. | 1370 ValueError: Thrown Iff the text is not a valid integer. |
| 1115 """ | 1371 """ |
| 1116 # Do the actual parsing. Exception handling is propagated to caller. | 1372 # Do the actual parsing. Exception handling is propagated to caller. |
| 1373 result = _ParseAbstractInteger(text, is_long=is_long) |
| 1374 |
| 1375 # Check if the integer is sane. Exceptions handled by callers. |
| 1376 checker = _INTEGER_CHECKERS[2 * int(is_long) + int(is_signed)] |
| 1377 checker.CheckValue(result) |
| 1378 return result |
| 1379 |
| 1380 |
| 1381 def _ParseAbstractInteger(text, is_long=False): |
| 1382 """Parses an integer without checking size/signedness. |
| 1383 |
| 1384 Args: |
| 1385 text: The text to parse. |
| 1386 is_long: True if the value should be returned as a long integer. |
| 1387 |
| 1388 Returns: |
| 1389 The integer value. |
| 1390 |
| 1391 Raises: |
| 1392 ValueError: Thrown Iff the text is not a valid integer. |
| 1393 """ |
| 1394 # Do the actual parsing. Exception handling is propagated to caller. |
| 1117 try: | 1395 try: |
| 1118 # We force 32-bit values to int and 64-bit values to long to make | 1396 # We force 32-bit values to int and 64-bit values to long to make |
| 1119 # alternate implementations where the distinction is more significant | 1397 # alternate implementations where the distinction is more significant |
| 1120 # (e.g. the C++ implementation) simpler. | 1398 # (e.g. the C++ implementation) simpler. |
| 1121 if is_long: | 1399 if is_long: |
| 1122 result = long(text, 0) | 1400 return long(text, 0) |
| 1123 else: | 1401 else: |
| 1124 result = int(text, 0) | 1402 return int(text, 0) |
| 1125 except ValueError: | 1403 except ValueError: |
| 1126 raise ValueError('Couldn\'t parse integer: %s' % text) | 1404 raise ValueError('Couldn\'t parse integer: %s' % text) |
| 1127 | 1405 |
| 1128 # Check if the integer is sane. Exceptions handled by callers. | |
| 1129 checker = _INTEGER_CHECKERS[2 * int(is_long) + int(is_signed)] | |
| 1130 checker.CheckValue(result) | |
| 1131 return result | |
| 1132 | |
| 1133 | 1406 |
| 1134 def ParseFloat(text): | 1407 def ParseFloat(text): |
| 1135 """Parse a floating point number. | 1408 """Parse a floating point number. |
| 1136 | 1409 |
| 1137 Args: | 1410 Args: |
| 1138 text: Text to parse. | 1411 text: Text to parse. |
| 1139 | 1412 |
| 1140 Returns: | 1413 Returns: |
| 1141 The number parsed. | 1414 The number parsed. |
| 1142 | 1415 |
| (...skipping 56 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1199 Raises: | 1472 Raises: |
| 1200 ValueError: If the enum value could not be parsed. | 1473 ValueError: If the enum value could not be parsed. |
| 1201 """ | 1474 """ |
| 1202 enum_descriptor = field.enum_type | 1475 enum_descriptor = field.enum_type |
| 1203 try: | 1476 try: |
| 1204 number = int(value, 0) | 1477 number = int(value, 0) |
| 1205 except ValueError: | 1478 except ValueError: |
| 1206 # Identifier. | 1479 # Identifier. |
| 1207 enum_value = enum_descriptor.values_by_name.get(value, None) | 1480 enum_value = enum_descriptor.values_by_name.get(value, None) |
| 1208 if enum_value is None: | 1481 if enum_value is None: |
| 1209 raise ValueError( | 1482 raise ValueError('Enum type "%s" has no value named %s.' % |
| 1210 'Enum type "%s" has no value named %s.' % ( | 1483 (enum_descriptor.full_name, value)) |
| 1211 enum_descriptor.full_name, value)) | |
| 1212 else: | 1484 else: |
| 1213 # Numeric value. | 1485 # Numeric value. |
| 1214 enum_value = enum_descriptor.values_by_number.get(number, None) | 1486 enum_value = enum_descriptor.values_by_number.get(number, None) |
| 1215 if enum_value is None: | 1487 if enum_value is None: |
| 1216 raise ValueError( | 1488 raise ValueError('Enum type "%s" has no value with number %d.' % |
| 1217 'Enum type "%s" has no value with number %d.' % ( | 1489 (enum_descriptor.full_name, number)) |
| 1218 enum_descriptor.full_name, number)) | |
| 1219 return enum_value.number | 1490 return enum_value.number |
| OLD | NEW |