| OLD | NEW |
| 1 # Protocol Buffers - Google's data interchange format | 1 # Protocol Buffers - Google's data interchange format |
| 2 # Copyright 2008 Google Inc. All rights reserved. | 2 # Copyright 2008 Google Inc. All rights reserved. |
| 3 # https://developers.google.com/protocol-buffers/ | 3 # https://developers.google.com/protocol-buffers/ |
| 4 # | 4 # |
| 5 # Redistribution and use in source and binary forms, with or without | 5 # Redistribution and use in source and binary forms, with or without |
| 6 # modification, are permitted provided that the following conditions are | 6 # modification, are permitted provided that the following conditions are |
| 7 # met: | 7 # met: |
| 8 # | 8 # |
| 9 # * Redistributions of source code must retain the above copyright | 9 # * Redistributions of source code must retain the above copyright |
| 10 # notice, this list of conditions and the following disclaimer. | 10 # notice, this list of conditions and the following disclaimer. |
| (...skipping 30 matching lines...) Expand all Loading... |
| 41 """ | 41 """ |
| 42 | 42 |
| 43 __author__ = 'kenton@google.com (Kenton Varda)' | 43 __author__ = 'kenton@google.com (Kenton Varda)' |
| 44 | 44 |
| 45 import io | 45 import io |
| 46 import re | 46 import re |
| 47 | 47 |
| 48 import six | 48 import six |
| 49 | 49 |
| 50 if six.PY3: | 50 if six.PY3: |
| 51 long = int | 51 long = int # pylint: disable=redefined-builtin,invalid-name |
| 52 | 52 |
| 53 # pylint: disable=g-import-not-at-top |
| 53 from google.protobuf.internal import type_checkers | 54 from google.protobuf.internal import type_checkers |
| 54 from google.protobuf import descriptor | 55 from google.protobuf import descriptor |
| 55 from google.protobuf import text_encoding | 56 from google.protobuf import text_encoding |
| 56 | 57 |
| 57 __all__ = ['MessageToString', 'PrintMessage', 'PrintField', | 58 __all__ = ['MessageToString', 'PrintMessage', 'PrintField', 'PrintFieldValue', |
| 58 'PrintFieldValue', 'Merge'] | 59 'Merge'] |
| 59 | |
| 60 | 60 |
| 61 _INTEGER_CHECKERS = (type_checkers.Uint32ValueChecker(), | 61 _INTEGER_CHECKERS = (type_checkers.Uint32ValueChecker(), |
| 62 type_checkers.Int32ValueChecker(), | 62 type_checkers.Int32ValueChecker(), |
| 63 type_checkers.Uint64ValueChecker(), | 63 type_checkers.Uint64ValueChecker(), |
| 64 type_checkers.Int64ValueChecker()) | 64 type_checkers.Int64ValueChecker()) |
| 65 _FLOAT_INFINITY = re.compile('-?inf(?:inity)?f?', re.IGNORECASE) | 65 _FLOAT_INFINITY = re.compile('-?inf(?:inity)?f?', re.IGNORECASE) |
| 66 _FLOAT_NAN = re.compile('nanf?', re.IGNORECASE) | 66 _FLOAT_NAN = re.compile('nanf?', re.IGNORECASE) |
| 67 _FLOAT_TYPES = frozenset([descriptor.FieldDescriptor.CPPTYPE_FLOAT, | 67 _FLOAT_TYPES = frozenset([descriptor.FieldDescriptor.CPPTYPE_FLOAT, |
| 68 descriptor.FieldDescriptor.CPPTYPE_DOUBLE]) | 68 descriptor.FieldDescriptor.CPPTYPE_DOUBLE]) |
| 69 _QUOTES = frozenset(("'", '"')) | 69 _QUOTES = frozenset(("'", '"')) |
| 70 _ANY_FULL_TYPE_NAME = 'google.protobuf.Any' |
| 70 | 71 |
| 71 | 72 |
| 72 class Error(Exception): | 73 class Error(Exception): |
| 73 """Top-level module error for text_format.""" | 74 """Top-level module error for text_format.""" |
| 74 | 75 |
| 75 | 76 |
| 76 class ParseError(Error): | 77 class ParseError(Error): |
| 77 """Thrown in case of text parsing error.""" | 78 """Thrown in case of text parsing or tokenizing error.""" |
| 79 |
| 80 def __init__(self, message=None, line=None, column=None): |
| 81 if message is not None and line is not None: |
| 82 loc = str(line) |
| 83 if column is not None: |
| 84 loc += ':{0}'.format(column) |
| 85 message = '{0} : {1}'.format(loc, message) |
| 86 if message is not None: |
| 87 super(ParseError, self).__init__(message) |
| 88 else: |
| 89 super(ParseError, self).__init__() |
| 90 self._line = line |
| 91 self._column = column |
| 92 |
| 93 def GetLine(self): |
| 94 return self._line |
| 95 |
| 96 def GetColumn(self): |
| 97 return self._column |
| 78 | 98 |
| 79 | 99 |
| 80 class TextWriter(object): | 100 class TextWriter(object): |
| 101 |
| 81 def __init__(self, as_utf8): | 102 def __init__(self, as_utf8): |
| 82 if six.PY2: | 103 if six.PY2: |
| 83 self._writer = io.BytesIO() | 104 self._writer = io.BytesIO() |
| 84 else: | 105 else: |
| 85 self._writer = io.StringIO() | 106 self._writer = io.StringIO() |
| 86 | 107 |
| 87 def write(self, val): | 108 def write(self, val): |
| 88 if six.PY2: | 109 if six.PY2: |
| 89 if isinstance(val, six.text_type): | 110 if isinstance(val, six.text_type): |
| 90 val = val.encode('utf-8') | 111 val = val.encode('utf-8') |
| 91 return self._writer.write(val) | 112 return self._writer.write(val) |
| 92 | 113 |
| 93 def close(self): | 114 def close(self): |
| 94 return self._writer.close() | 115 return self._writer.close() |
| 95 | 116 |
| 96 def getvalue(self): | 117 def getvalue(self): |
| 97 return self._writer.getvalue() | 118 return self._writer.getvalue() |
| 98 | 119 |
| 99 | 120 |
| 100 def MessageToString(message, as_utf8=False, as_one_line=False, | 121 def MessageToString(message, |
| 101 pointy_brackets=False, use_index_order=False, | 122 as_utf8=False, |
| 102 float_format=None, use_field_number=False): | 123 as_one_line=False, |
| 124 pointy_brackets=False, |
| 125 use_index_order=False, |
| 126 float_format=None, |
| 127 use_field_number=False, |
| 128 descriptor_pool=None, |
| 129 indent=0): |
| 103 """Convert protobuf message to text format. | 130 """Convert protobuf message to text format. |
| 104 | 131 |
| 105 Floating point values can be formatted compactly with 15 digits of | 132 Floating point values can be formatted compactly with 15 digits of |
| 106 precision (which is the most that IEEE 754 "double" can guarantee) | 133 precision (which is the most that IEEE 754 "double" can guarantee) |
| 107 using float_format='.15g'. To ensure that converting to text and back to a | 134 using float_format='.15g'. To ensure that converting to text and back to a |
| 108 proto will result in an identical value, float_format='.17g' should be used. | 135 proto will result in an identical value, float_format='.17g' should be used. |
| 109 | 136 |
| 110 Args: | 137 Args: |
| 111 message: The protocol buffers message. | 138 message: The protocol buffers message. |
| 112 as_utf8: Produce text output in UTF8 format. | 139 as_utf8: Produce text output in UTF8 format. |
| 113 as_one_line: Don't introduce newlines between fields. | 140 as_one_line: Don't introduce newlines between fields. |
| 114 pointy_brackets: If True, use angle brackets instead of curly braces for | 141 pointy_brackets: If True, use angle brackets instead of curly braces for |
| 115 nesting. | 142 nesting. |
| 116 use_index_order: If True, print fields of a proto message using the order | 143 use_index_order: If True, print fields of a proto message using the order |
| 117 defined in source code instead of the field number. By default, use the | 144 defined in source code instead of the field number. By default, use the |
| 118 field number order. | 145 field number order. |
| 119 float_format: If set, use this to specify floating point number formatting | 146 float_format: If set, use this to specify floating point number formatting |
| 120 (per the "Format Specification Mini-Language"); otherwise, str() is used. | 147 (per the "Format Specification Mini-Language"); otherwise, str() is used. |
| 121 use_field_number: If True, print field numbers instead of names. | 148 use_field_number: If True, print field numbers instead of names. |
| 149 descriptor_pool: A DescriptorPool used to resolve Any types. |
| 150 indent: The indent level, in terms of spaces, for pretty print. |
| 122 | 151 |
| 123 Returns: | 152 Returns: |
| 124 A string of the text formatted protocol buffer message. | 153 A string of the text formatted protocol buffer message. |
| 125 """ | 154 """ |
| 126 out = TextWriter(as_utf8) | 155 out = TextWriter(as_utf8) |
| 127 printer = _Printer(out, 0, as_utf8, as_one_line, | 156 printer = _Printer(out, indent, as_utf8, as_one_line, pointy_brackets, |
| 128 pointy_brackets, use_index_order, float_format, | 157 use_index_order, float_format, use_field_number, |
| 129 use_field_number) | 158 descriptor_pool) |
| 130 printer.PrintMessage(message) | 159 printer.PrintMessage(message) |
| 131 result = out.getvalue() | 160 result = out.getvalue() |
| 132 out.close() | 161 out.close() |
| 133 if as_one_line: | 162 if as_one_line: |
| 134 return result.rstrip() | 163 return result.rstrip() |
| 135 return result | 164 return result |
| 136 | 165 |
| 137 | 166 |
| 138 def _IsMapEntry(field): | 167 def _IsMapEntry(field): |
| 139 return (field.type == descriptor.FieldDescriptor.TYPE_MESSAGE and | 168 return (field.type == descriptor.FieldDescriptor.TYPE_MESSAGE and |
| 140 field.message_type.has_options and | 169 field.message_type.has_options and |
| 141 field.message_type.GetOptions().map_entry) | 170 field.message_type.GetOptions().map_entry) |
| 142 | 171 |
| 143 | 172 |
| 144 def PrintMessage(message, out, indent=0, as_utf8=False, as_one_line=False, | 173 def PrintMessage(message, |
| 145 pointy_brackets=False, use_index_order=False, | 174 out, |
| 146 float_format=None, use_field_number=False): | 175 indent=0, |
| 147 printer = _Printer(out, indent, as_utf8, as_one_line, | 176 as_utf8=False, |
| 148 pointy_brackets, use_index_order, float_format, | 177 as_one_line=False, |
| 149 use_field_number) | 178 pointy_brackets=False, |
| 179 use_index_order=False, |
| 180 float_format=None, |
| 181 use_field_number=False, |
| 182 descriptor_pool=None): |
| 183 printer = _Printer(out, indent, as_utf8, as_one_line, pointy_brackets, |
| 184 use_index_order, float_format, use_field_number, |
| 185 descriptor_pool) |
| 150 printer.PrintMessage(message) | 186 printer.PrintMessage(message) |
| 151 | 187 |
| 152 | 188 |
| 153 def PrintField(field, value, out, indent=0, as_utf8=False, as_one_line=False, | 189 def PrintField(field, |
| 154 pointy_brackets=False, use_index_order=False, float_format=None): | 190 value, |
| 191 out, |
| 192 indent=0, |
| 193 as_utf8=False, |
| 194 as_one_line=False, |
| 195 pointy_brackets=False, |
| 196 use_index_order=False, |
| 197 float_format=None): |
| 155 """Print a single field name/value pair.""" | 198 """Print a single field name/value pair.""" |
| 156 printer = _Printer(out, indent, as_utf8, as_one_line, | 199 printer = _Printer(out, indent, as_utf8, as_one_line, pointy_brackets, |
| 157 pointy_brackets, use_index_order, float_format) | 200 use_index_order, float_format) |
| 158 printer.PrintField(field, value) | 201 printer.PrintField(field, value) |
| 159 | 202 |
| 160 | 203 |
| 161 def PrintFieldValue(field, value, out, indent=0, as_utf8=False, | 204 def PrintFieldValue(field, |
| 162 as_one_line=False, pointy_brackets=False, | 205 value, |
| 206 out, |
| 207 indent=0, |
| 208 as_utf8=False, |
| 209 as_one_line=False, |
| 210 pointy_brackets=False, |
| 163 use_index_order=False, | 211 use_index_order=False, |
| 164 float_format=None): | 212 float_format=None): |
| 165 """Print a single field value (not including name).""" | 213 """Print a single field value (not including name).""" |
| 166 printer = _Printer(out, indent, as_utf8, as_one_line, | 214 printer = _Printer(out, indent, as_utf8, as_one_line, pointy_brackets, |
| 167 pointy_brackets, use_index_order, float_format) | 215 use_index_order, float_format) |
| 168 printer.PrintFieldValue(field, value) | 216 printer.PrintFieldValue(field, value) |
| 169 | 217 |
| 170 | 218 |
| 219 def _BuildMessageFromTypeName(type_name, descriptor_pool): |
| 220 """Returns a protobuf message instance. |
| 221 |
| 222 Args: |
| 223 type_name: Fully-qualified protobuf message type name string. |
| 224 descriptor_pool: DescriptorPool instance. |
| 225 |
| 226 Returns: |
| 227 A Message instance of type matching type_name, or None if the a Descriptor |
| 228 wasn't found matching type_name. |
| 229 """ |
| 230 # pylint: disable=g-import-not-at-top |
| 231 from google.protobuf import symbol_database |
| 232 database = symbol_database.Default() |
| 233 try: |
| 234 message_descriptor = descriptor_pool.FindMessageTypeByName(type_name) |
| 235 except KeyError: |
| 236 return None |
| 237 message_type = database.GetPrototype(message_descriptor) |
| 238 return message_type() |
| 239 |
| 240 |
| 171 class _Printer(object): | 241 class _Printer(object): |
| 172 """Text format printer for protocol message.""" | 242 """Text format printer for protocol message.""" |
| 173 | 243 |
| 174 def __init__(self, out, indent=0, as_utf8=False, as_one_line=False, | 244 def __init__(self, |
| 175 pointy_brackets=False, use_index_order=False, float_format=None, | 245 out, |
| 176 use_field_number=False): | 246 indent=0, |
| 247 as_utf8=False, |
| 248 as_one_line=False, |
| 249 pointy_brackets=False, |
| 250 use_index_order=False, |
| 251 float_format=None, |
| 252 use_field_number=False, |
| 253 descriptor_pool=None): |
| 177 """Initialize the Printer. | 254 """Initialize the Printer. |
| 178 | 255 |
| 179 Floating point values can be formatted compactly with 15 digits of | 256 Floating point values can be formatted compactly with 15 digits of |
| 180 precision (which is the most that IEEE 754 "double" can guarantee) | 257 precision (which is the most that IEEE 754 "double" can guarantee) |
| 181 using float_format='.15g'. To ensure that converting to text and back to a | 258 using float_format='.15g'. To ensure that converting to text and back to a |
| 182 proto will result in an identical value, float_format='.17g' should be used. | 259 proto will result in an identical value, float_format='.17g' should be used. |
| 183 | 260 |
| 184 Args: | 261 Args: |
| 185 out: To record the text format result. | 262 out: To record the text format result. |
| 186 indent: The indent level for pretty print. | 263 indent: The indent level for pretty print. |
| 187 as_utf8: Produce text output in UTF8 format. | 264 as_utf8: Produce text output in UTF8 format. |
| 188 as_one_line: Don't introduce newlines between fields. | 265 as_one_line: Don't introduce newlines between fields. |
| 189 pointy_brackets: If True, use angle brackets instead of curly braces for | 266 pointy_brackets: If True, use angle brackets instead of curly braces for |
| 190 nesting. | 267 nesting. |
| 191 use_index_order: If True, print fields of a proto message using the order | 268 use_index_order: If True, print fields of a proto message using the order |
| 192 defined in source code instead of the field number. By default, use the | 269 defined in source code instead of the field number. By default, use the |
| 193 field number order. | 270 field number order. |
| 194 float_format: If set, use this to specify floating point number formatting | 271 float_format: If set, use this to specify floating point number formatting |
| 195 (per the "Format Specification Mini-Language"); otherwise, str() is | 272 (per the "Format Specification Mini-Language"); otherwise, str() is |
| 196 used. | 273 used. |
| 197 use_field_number: If True, print field numbers instead of names. | 274 use_field_number: If True, print field numbers instead of names. |
| 275 descriptor_pool: A DescriptorPool used to resolve Any types. |
| 198 """ | 276 """ |
| 199 self.out = out | 277 self.out = out |
| 200 self.indent = indent | 278 self.indent = indent |
| 201 self.as_utf8 = as_utf8 | 279 self.as_utf8 = as_utf8 |
| 202 self.as_one_line = as_one_line | 280 self.as_one_line = as_one_line |
| 203 self.pointy_brackets = pointy_brackets | 281 self.pointy_brackets = pointy_brackets |
| 204 self.use_index_order = use_index_order | 282 self.use_index_order = use_index_order |
| 205 self.float_format = float_format | 283 self.float_format = float_format |
| 206 self.use_field_number = use_field_number | 284 self.use_field_number = use_field_number |
| 285 self.descriptor_pool = descriptor_pool |
| 286 |
| 287 def _TryPrintAsAnyMessage(self, message): |
| 288 """Serializes if message is a google.protobuf.Any field.""" |
| 289 packed_message = _BuildMessageFromTypeName(message.TypeName(), |
| 290 self.descriptor_pool) |
| 291 if packed_message: |
| 292 packed_message.MergeFromString(message.value) |
| 293 self.out.write('%s[%s]' % (self.indent * ' ', message.type_url)) |
| 294 self._PrintMessageFieldValue(packed_message) |
| 295 self.out.write(' ' if self.as_one_line else '\n') |
| 296 return True |
| 297 else: |
| 298 return False |
| 207 | 299 |
| 208 def PrintMessage(self, message): | 300 def PrintMessage(self, message): |
| 209 """Convert protobuf message to text format. | 301 """Convert protobuf message to text format. |
| 210 | 302 |
| 211 Args: | 303 Args: |
| 212 message: The protocol buffers message. | 304 message: The protocol buffers message. |
| 213 """ | 305 """ |
| 306 if (message.DESCRIPTOR.full_name == _ANY_FULL_TYPE_NAME and |
| 307 self.descriptor_pool and self._TryPrintAsAnyMessage(message)): |
| 308 return |
| 214 fields = message.ListFields() | 309 fields = message.ListFields() |
| 215 if self.use_index_order: | 310 if self.use_index_order: |
| 216 fields.sort(key=lambda x: x[0].index) | 311 fields.sort(key=lambda x: x[0].index) |
| 217 for field, value in fields: | 312 for field, value in fields: |
| 218 if _IsMapEntry(field): | 313 if _IsMapEntry(field): |
| 219 for key in sorted(value): | 314 for key in sorted(value): |
| 220 # This is slow for maps with submessage entires because it copies the | 315 # This is slow for maps with submessage entires because it copies the |
| 221 # entire tree. Unfortunately this would take significant refactoring | 316 # entire tree. Unfortunately this would take significant refactoring |
| 222 # of this file to work around. | 317 # of this file to work around. |
| 223 # | 318 # |
| 224 # TODO(haberman): refactor and optimize if this becomes an issue. | 319 # TODO(haberman): refactor and optimize if this becomes an issue. |
| 225 entry_submsg = field.message_type._concrete_class( | 320 entry_submsg = value.GetEntryClass()(key=key, value=value[key]) |
| 226 key=key, value=value[key]) | |
| 227 self.PrintField(field, entry_submsg) | 321 self.PrintField(field, entry_submsg) |
| 228 elif field.label == descriptor.FieldDescriptor.LABEL_REPEATED: | 322 elif field.label == descriptor.FieldDescriptor.LABEL_REPEATED: |
| 229 for element in value: | 323 for element in value: |
| 230 self.PrintField(field, element) | 324 self.PrintField(field, element) |
| 231 else: | 325 else: |
| 232 self.PrintField(field, value) | 326 self.PrintField(field, value) |
| 233 | 327 |
| 234 def PrintField(self, field, value): | 328 def PrintField(self, field, value): |
| 235 """Print a single field name/value pair.""" | 329 """Print a single field name/value pair.""" |
| 236 out = self.out | 330 out = self.out |
| (...skipping 20 matching lines...) Expand all Loading... |
| 257 # The colon is optional in this case, but our cross-language golden files | 351 # The colon is optional in this case, but our cross-language golden files |
| 258 # don't include it. | 352 # don't include it. |
| 259 out.write(': ') | 353 out.write(': ') |
| 260 | 354 |
| 261 self.PrintFieldValue(field, value) | 355 self.PrintFieldValue(field, value) |
| 262 if self.as_one_line: | 356 if self.as_one_line: |
| 263 out.write(' ') | 357 out.write(' ') |
| 264 else: | 358 else: |
| 265 out.write('\n') | 359 out.write('\n') |
| 266 | 360 |
| 361 def _PrintMessageFieldValue(self, value): |
| 362 if self.pointy_brackets: |
| 363 openb = '<' |
| 364 closeb = '>' |
| 365 else: |
| 366 openb = '{' |
| 367 closeb = '}' |
| 368 |
| 369 if self.as_one_line: |
| 370 self.out.write(' %s ' % openb) |
| 371 self.PrintMessage(value) |
| 372 self.out.write(closeb) |
| 373 else: |
| 374 self.out.write(' %s\n' % openb) |
| 375 self.indent += 2 |
| 376 self.PrintMessage(value) |
| 377 self.indent -= 2 |
| 378 self.out.write(' ' * self.indent + closeb) |
| 379 |
| 267 def PrintFieldValue(self, field, value): | 380 def PrintFieldValue(self, field, value): |
| 268 """Print a single field value (not including name). | 381 """Print a single field value (not including name). |
| 269 | 382 |
| 270 For repeated fields, the value should be a single element. | 383 For repeated fields, the value should be a single element. |
| 271 | 384 |
| 272 Args: | 385 Args: |
| 273 field: The descriptor of the field to be printed. | 386 field: The descriptor of the field to be printed. |
| 274 value: The value of the field. | 387 value: The value of the field. |
| 275 """ | 388 """ |
| 276 out = self.out | 389 out = self.out |
| 277 if self.pointy_brackets: | |
| 278 openb = '<' | |
| 279 closeb = '>' | |
| 280 else: | |
| 281 openb = '{' | |
| 282 closeb = '}' | |
| 283 | |
| 284 if field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_MESSAGE: | 390 if field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_MESSAGE: |
| 285 if self.as_one_line: | 391 self._PrintMessageFieldValue(value) |
| 286 out.write(' %s ' % openb) | |
| 287 self.PrintMessage(value) | |
| 288 out.write(closeb) | |
| 289 else: | |
| 290 out.write(' %s\n' % openb) | |
| 291 self.indent += 2 | |
| 292 self.PrintMessage(value) | |
| 293 self.indent -= 2 | |
| 294 out.write(' ' * self.indent + closeb) | |
| 295 elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_ENUM: | 392 elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_ENUM: |
| 296 enum_value = field.enum_type.values_by_number.get(value, None) | 393 enum_value = field.enum_type.values_by_number.get(value, None) |
| 297 if enum_value is not None: | 394 if enum_value is not None: |
| 298 out.write(enum_value.name) | 395 out.write(enum_value.name) |
| 299 else: | 396 else: |
| 300 out.write(str(value)) | 397 out.write(str(value)) |
| 301 elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_STRING: | 398 elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_STRING: |
| 302 out.write('\"') | 399 out.write('\"') |
| 303 if isinstance(value, six.text_type): | 400 if isinstance(value, six.text_type): |
| 304 out_value = value.encode('utf-8') | 401 out_value = value.encode('utf-8') |
| (...skipping 10 matching lines...) Expand all Loading... |
| 315 if value: | 412 if value: |
| 316 out.write('true') | 413 out.write('true') |
| 317 else: | 414 else: |
| 318 out.write('false') | 415 out.write('false') |
| 319 elif field.cpp_type in _FLOAT_TYPES and self.float_format is not None: | 416 elif field.cpp_type in _FLOAT_TYPES and self.float_format is not None: |
| 320 out.write('{1:{0}}'.format(self.float_format, value)) | 417 out.write('{1:{0}}'.format(self.float_format, value)) |
| 321 else: | 418 else: |
| 322 out.write(str(value)) | 419 out.write(str(value)) |
| 323 | 420 |
| 324 | 421 |
| 325 def Parse(text, message, | 422 def Parse(text, |
| 326 allow_unknown_extension=False, allow_field_number=False): | 423 message, |
| 327 """Parses an text representation of a protocol message into a message. | 424 allow_unknown_extension=False, |
| 425 allow_field_number=False): |
| 426 """Parses a text representation of a protocol message into a message. |
| 328 | 427 |
| 329 Args: | 428 Args: |
| 330 text: Message text representation. | 429 text: Message text representation. |
| 331 message: A protocol buffer message to merge into. | 430 message: A protocol buffer message to merge into. |
| 332 allow_unknown_extension: if True, skip over missing extensions and keep | 431 allow_unknown_extension: if True, skip over missing extensions and keep |
| 333 parsing | 432 parsing |
| 334 allow_field_number: if True, both field number and field name are allowed. | 433 allow_field_number: if True, both field number and field name are allowed. |
| 335 | 434 |
| 336 Returns: | 435 Returns: |
| 337 The same message passed as argument. | 436 The same message passed as argument. |
| 338 | 437 |
| 339 Raises: | 438 Raises: |
| 340 ParseError: On text parsing problems. | 439 ParseError: On text parsing problems. |
| 341 """ | 440 """ |
| 342 if not isinstance(text, str): | 441 if not isinstance(text, str): |
| 343 text = text.decode('utf-8') | 442 text = text.decode('utf-8') |
| 344 return ParseLines(text.split('\n'), message, allow_unknown_extension, | 443 return ParseLines( |
| 345 allow_field_number) | 444 text.split('\n'), message, allow_unknown_extension, allow_field_number) |
| 346 | 445 |
| 347 | 446 |
| 348 def Merge(text, message, allow_unknown_extension=False, | 447 def Merge(text, |
| 349 allow_field_number=False): | 448 message, |
| 350 """Parses an text representation of a protocol message into a message. | 449 allow_unknown_extension=False, |
| 450 allow_field_number=False, |
| 451 descriptor_pool=None): |
| 452 """Parses a text representation of a protocol message into a message. |
| 351 | 453 |
| 352 Like Parse(), but allows repeated values for a non-repeated field, and uses | 454 Like Parse(), but allows repeated values for a non-repeated field, and uses |
| 353 the last one. | 455 the last one. |
| 354 | 456 |
| 355 Args: | 457 Args: |
| 356 text: Message text representation. | 458 text: Message text representation. |
| 357 message: A protocol buffer message to merge into. | 459 message: A protocol buffer message to merge into. |
| 358 allow_unknown_extension: if True, skip over missing extensions and keep | 460 allow_unknown_extension: if True, skip over missing extensions and keep |
| 359 parsing | 461 parsing |
| 360 allow_field_number: if True, both field number and field name are allowed. | 462 allow_field_number: if True, both field number and field name are allowed. |
| 463 descriptor_pool: A DescriptorPool used to resolve Any types. |
| 361 | 464 |
| 362 Returns: | 465 Returns: |
| 363 The same message passed as argument. | 466 The same message passed as argument. |
| 364 | 467 |
| 365 Raises: | 468 Raises: |
| 366 ParseError: On text parsing problems. | 469 ParseError: On text parsing problems. |
| 367 """ | 470 """ |
| 368 return MergeLines(text.split('\n'), message, allow_unknown_extension, | 471 return MergeLines( |
| 369 allow_field_number) | 472 text.split('\n'), |
| 473 message, |
| 474 allow_unknown_extension, |
| 475 allow_field_number, |
| 476 descriptor_pool=descriptor_pool) |
| 370 | 477 |
| 371 | 478 |
| 372 def ParseLines(lines, message, allow_unknown_extension=False, | 479 def ParseLines(lines, |
| 480 message, |
| 481 allow_unknown_extension=False, |
| 373 allow_field_number=False): | 482 allow_field_number=False): |
| 374 """Parses an text representation of a protocol message into a message. | 483 """Parses a text representation of a protocol message into a message. |
| 375 | 484 |
| 376 Args: | 485 Args: |
| 377 lines: An iterable of lines of a message's text representation. | 486 lines: An iterable of lines of a message's text representation. |
| 378 message: A protocol buffer message to merge into. | 487 message: A protocol buffer message to merge into. |
| 379 allow_unknown_extension: if True, skip over missing extensions and keep | 488 allow_unknown_extension: if True, skip over missing extensions and keep |
| 380 parsing | 489 parsing |
| 381 allow_field_number: if True, both field number and field name are allowed. | 490 allow_field_number: if True, both field number and field name are allowed. |
| 491 descriptor_pool: A DescriptorPool used to resolve Any types. |
| 382 | 492 |
| 383 Returns: | 493 Returns: |
| 384 The same message passed as argument. | 494 The same message passed as argument. |
| 385 | 495 |
| 386 Raises: | 496 Raises: |
| 387 ParseError: On text parsing problems. | 497 ParseError: On text parsing problems. |
| 388 """ | 498 """ |
| 389 parser = _Parser(allow_unknown_extension, allow_field_number) | 499 parser = _Parser(allow_unknown_extension, allow_field_number) |
| 390 return parser.ParseLines(lines, message) | 500 return parser.ParseLines(lines, message) |
| 391 | 501 |
| 392 | 502 |
| 393 def MergeLines(lines, message, allow_unknown_extension=False, | 503 def MergeLines(lines, |
| 394 allow_field_number=False): | 504 message, |
| 395 """Parses an text representation of a protocol message into a message. | 505 allow_unknown_extension=False, |
| 506 allow_field_number=False, |
| 507 descriptor_pool=None): |
| 508 """Parses a text representation of a protocol message into a message. |
| 396 | 509 |
| 397 Args: | 510 Args: |
| 398 lines: An iterable of lines of a message's text representation. | 511 lines: An iterable of lines of a message's text representation. |
| 399 message: A protocol buffer message to merge into. | 512 message: A protocol buffer message to merge into. |
| 400 allow_unknown_extension: if True, skip over missing extensions and keep | 513 allow_unknown_extension: if True, skip over missing extensions and keep |
| 401 parsing | 514 parsing |
| 402 allow_field_number: if True, both field number and field name are allowed. | 515 allow_field_number: if True, both field number and field name are allowed. |
| 403 | 516 |
| 404 Returns: | 517 Returns: |
| 405 The same message passed as argument. | 518 The same message passed as argument. |
| 406 | 519 |
| 407 Raises: | 520 Raises: |
| 408 ParseError: On text parsing problems. | 521 ParseError: On text parsing problems. |
| 409 """ | 522 """ |
| 410 parser = _Parser(allow_unknown_extension, allow_field_number) | 523 parser = _Parser(allow_unknown_extension, |
| 524 allow_field_number, |
| 525 descriptor_pool=descriptor_pool) |
| 411 return parser.MergeLines(lines, message) | 526 return parser.MergeLines(lines, message) |
| 412 | 527 |
| 413 | 528 |
| 414 class _Parser(object): | 529 class _Parser(object): |
| 415 """Text format parser for protocol message.""" | 530 """Text format parser for protocol message.""" |
| 416 | 531 |
| 417 def __init__(self, allow_unknown_extension=False, allow_field_number=False): | 532 def __init__(self, |
| 533 allow_unknown_extension=False, |
| 534 allow_field_number=False, |
| 535 descriptor_pool=None): |
| 418 self.allow_unknown_extension = allow_unknown_extension | 536 self.allow_unknown_extension = allow_unknown_extension |
| 419 self.allow_field_number = allow_field_number | 537 self.allow_field_number = allow_field_number |
| 538 self.descriptor_pool = descriptor_pool |
| 420 | 539 |
| 421 def ParseFromString(self, text, message): | 540 def ParseFromString(self, text, message): |
| 422 """Parses an text representation of a protocol message into a message.""" | 541 """Parses a text representation of a protocol message into a message.""" |
| 423 if not isinstance(text, str): | 542 if not isinstance(text, str): |
| 424 text = text.decode('utf-8') | 543 text = text.decode('utf-8') |
| 425 return self.ParseLines(text.split('\n'), message) | 544 return self.ParseLines(text.split('\n'), message) |
| 426 | 545 |
| 427 def ParseLines(self, lines, message): | 546 def ParseLines(self, lines, message): |
| 428 """Parses an text representation of a protocol message into a message.""" | 547 """Parses a text representation of a protocol message into a message.""" |
| 429 self._allow_multiple_scalars = False | 548 self._allow_multiple_scalars = False |
| 430 self._ParseOrMerge(lines, message) | 549 self._ParseOrMerge(lines, message) |
| 431 return message | 550 return message |
| 432 | 551 |
| 433 def MergeFromString(self, text, message): | 552 def MergeFromString(self, text, message): |
| 434 """Merges an text representation of a protocol message into a message.""" | 553 """Merges a text representation of a protocol message into a message.""" |
| 435 return self._MergeLines(text.split('\n'), message) | 554 return self._MergeLines(text.split('\n'), message) |
| 436 | 555 |
| 437 def MergeLines(self, lines, message): | 556 def MergeLines(self, lines, message): |
| 438 """Merges an text representation of a protocol message into a message.""" | 557 """Merges a text representation of a protocol message into a message.""" |
| 439 self._allow_multiple_scalars = True | 558 self._allow_multiple_scalars = True |
| 440 self._ParseOrMerge(lines, message) | 559 self._ParseOrMerge(lines, message) |
| 441 return message | 560 return message |
| 442 | 561 |
| 443 def _ParseOrMerge(self, lines, message): | 562 def _ParseOrMerge(self, lines, message): |
| 444 """Converts an text representation of a protocol message into a message. | 563 """Converts a text representation of a protocol message into a message. |
| 445 | 564 |
| 446 Args: | 565 Args: |
| 447 lines: Lines of a message's text representation. | 566 lines: Lines of a message's text representation. |
| 448 message: A protocol buffer message to merge into. | 567 message: A protocol buffer message to merge into. |
| 449 | 568 |
| 450 Raises: | 569 Raises: |
| 451 ParseError: On text parsing problems. | 570 ParseError: On text parsing problems. |
| 452 """ | 571 """ |
| 453 tokenizer = _Tokenizer(lines) | 572 tokenizer = Tokenizer(lines) |
| 454 while not tokenizer.AtEnd(): | 573 while not tokenizer.AtEnd(): |
| 455 self._MergeField(tokenizer, message) | 574 self._MergeField(tokenizer, message) |
| 456 | 575 |
| 457 def _MergeField(self, tokenizer, message): | 576 def _MergeField(self, tokenizer, message): |
| 458 """Merges a single protocol message field into a message. | 577 """Merges a single protocol message field into a message. |
| 459 | 578 |
| 460 Args: | 579 Args: |
| 461 tokenizer: A tokenizer to parse the field name and values. | 580 tokenizer: A tokenizer to parse the field name and values. |
| 462 message: A protocol message to record the data. | 581 message: A protocol message to record the data. |
| 463 | 582 |
| (...skipping 20 matching lines...) Expand all Loading... |
| 484 field = message.Extensions._FindExtensionByName(name) | 603 field = message.Extensions._FindExtensionByName(name) |
| 485 # pylint: enable=protected-access | 604 # pylint: enable=protected-access |
| 486 if not field: | 605 if not field: |
| 487 if self.allow_unknown_extension: | 606 if self.allow_unknown_extension: |
| 488 field = None | 607 field = None |
| 489 else: | 608 else: |
| 490 raise tokenizer.ParseErrorPreviousToken( | 609 raise tokenizer.ParseErrorPreviousToken( |
| 491 'Extension "%s" not registered.' % name) | 610 'Extension "%s" not registered.' % name) |
| 492 elif message_descriptor != field.containing_type: | 611 elif message_descriptor != field.containing_type: |
| 493 raise tokenizer.ParseErrorPreviousToken( | 612 raise tokenizer.ParseErrorPreviousToken( |
| 494 'Extension "%s" does not extend message type "%s".' % ( | 613 'Extension "%s" does not extend message type "%s".' % |
| 495 name, message_descriptor.full_name)) | 614 (name, message_descriptor.full_name)) |
| 496 | 615 |
| 497 tokenizer.Consume(']') | 616 tokenizer.Consume(']') |
| 498 | 617 |
| 499 else: | 618 else: |
| 500 name = tokenizer.ConsumeIdentifier() | 619 name = tokenizer.ConsumeIdentifierOrNumber() |
| 501 if self.allow_field_number and name.isdigit(): | 620 if self.allow_field_number and name.isdigit(): |
| 502 number = ParseInteger(name, True, True) | 621 number = ParseInteger(name, True, True) |
| 503 field = message_descriptor.fields_by_number.get(number, None) | 622 field = message_descriptor.fields_by_number.get(number, None) |
| 504 if not field and message_descriptor.is_extendable: | 623 if not field and message_descriptor.is_extendable: |
| 505 field = message.Extensions._FindExtensionByNumber(number) | 624 field = message.Extensions._FindExtensionByNumber(number) |
| 506 else: | 625 else: |
| 507 field = message_descriptor.fields_by_name.get(name, None) | 626 field = message_descriptor.fields_by_name.get(name, None) |
| 508 | 627 |
| 509 # Group names are expected to be capitalized as they appear in the | 628 # Group names are expected to be capitalized as they appear in the |
| 510 # .proto file, which actually matches their type names, not their field | 629 # .proto file, which actually matches their type names, not their field |
| 511 # names. | 630 # names. |
| 512 if not field: | 631 if not field: |
| 513 field = message_descriptor.fields_by_name.get(name.lower(), None) | 632 field = message_descriptor.fields_by_name.get(name.lower(), None) |
| 514 if field and field.type != descriptor.FieldDescriptor.TYPE_GROUP: | 633 if field and field.type != descriptor.FieldDescriptor.TYPE_GROUP: |
| 515 field = None | 634 field = None |
| 516 | 635 |
| 517 if (field and field.type == descriptor.FieldDescriptor.TYPE_GROUP and | 636 if (field and field.type == descriptor.FieldDescriptor.TYPE_GROUP and |
| 518 field.message_type.name != name): | 637 field.message_type.name != name): |
| 519 field = None | 638 field = None |
| 520 | 639 |
| 521 if not field: | 640 if not field: |
| 522 raise tokenizer.ParseErrorPreviousToken( | 641 raise tokenizer.ParseErrorPreviousToken( |
| 523 'Message type "%s" has no field named "%s".' % ( | 642 'Message type "%s" has no field named "%s".' % |
| 524 message_descriptor.full_name, name)) | 643 (message_descriptor.full_name, name)) |
| 525 | 644 |
| 526 if field: | 645 if field: |
| 527 if not self._allow_multiple_scalars and field.containing_oneof: | 646 if not self._allow_multiple_scalars and field.containing_oneof: |
| 528 # Check if there's a different field set in this oneof. | 647 # Check if there's a different field set in this oneof. |
| 529 # Note that we ignore the case if the same field was set before, and we | 648 # Note that we ignore the case if the same field was set before, and we |
| 530 # apply _allow_multiple_scalars to non-scalar fields as well. | 649 # apply _allow_multiple_scalars to non-scalar fields as well. |
| 531 which_oneof = message.WhichOneof(field.containing_oneof.name) | 650 which_oneof = message.WhichOneof(field.containing_oneof.name) |
| 532 if which_oneof is not None and which_oneof != field.name: | 651 if which_oneof is not None and which_oneof != field.name: |
| 533 raise tokenizer.ParseErrorPreviousToken( | 652 raise tokenizer.ParseErrorPreviousToken( |
| 534 'Field "%s" is specified along with field "%s", another member ' | 653 'Field "%s" is specified along with field "%s", another member ' |
| 535 'of oneof "%s" for message type "%s".' % ( | 654 'of oneof "%s" for message type "%s".' % |
| 536 field.name, which_oneof, field.containing_oneof.name, | 655 (field.name, which_oneof, field.containing_oneof.name, |
| 537 message_descriptor.full_name)) | 656 message_descriptor.full_name)) |
| 538 | 657 |
| 539 if field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_MESSAGE: | 658 if field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_MESSAGE: |
| 540 tokenizer.TryConsume(':') | 659 tokenizer.TryConsume(':') |
| 541 merger = self._MergeMessageField | 660 merger = self._MergeMessageField |
| 542 else: | 661 else: |
| 543 tokenizer.Consume(':') | 662 tokenizer.Consume(':') |
| 544 merger = self._MergeScalarField | 663 merger = self._MergeScalarField |
| 545 | 664 |
| 546 if (field.label == descriptor.FieldDescriptor.LABEL_REPEATED | 665 if (field.label == descriptor.FieldDescriptor.LABEL_REPEATED and |
| 547 and tokenizer.TryConsume('[')): | 666 tokenizer.TryConsume('[')): |
| 548 # Short repeated format, e.g. "foo: [1, 2, 3]" | 667 # Short repeated format, e.g. "foo: [1, 2, 3]" |
| 549 while True: | 668 while True: |
| 550 merger(tokenizer, message, field) | 669 merger(tokenizer, message, field) |
| 551 if tokenizer.TryConsume(']'): break | 670 if tokenizer.TryConsume(']'): |
| 671 break |
| 552 tokenizer.Consume(',') | 672 tokenizer.Consume(',') |
| 553 | 673 |
| 554 else: | 674 else: |
| 555 merger(tokenizer, message, field) | 675 merger(tokenizer, message, field) |
| 556 | 676 |
| 557 else: # Proto field is unknown. | 677 else: # Proto field is unknown. |
| 558 assert self.allow_unknown_extension | 678 assert self.allow_unknown_extension |
| 559 _SkipFieldContents(tokenizer) | 679 _SkipFieldContents(tokenizer) |
| 560 | 680 |
| 561 # For historical reasons, fields may optionally be separated by commas or | 681 # For historical reasons, fields may optionally be separated by commas or |
| 562 # semicolons. | 682 # semicolons. |
| 563 if not tokenizer.TryConsume(','): | 683 if not tokenizer.TryConsume(','): |
| 564 tokenizer.TryConsume(';') | 684 tokenizer.TryConsume(';') |
| 565 | 685 |
| 686 def _ConsumeAnyTypeUrl(self, tokenizer): |
| 687 """Consumes a google.protobuf.Any type URL and returns the type name.""" |
| 688 # Consume "type.googleapis.com/". |
| 689 tokenizer.ConsumeIdentifier() |
| 690 tokenizer.Consume('.') |
| 691 tokenizer.ConsumeIdentifier() |
| 692 tokenizer.Consume('.') |
| 693 tokenizer.ConsumeIdentifier() |
| 694 tokenizer.Consume('/') |
| 695 # Consume the fully-qualified type name. |
| 696 name = [tokenizer.ConsumeIdentifier()] |
| 697 while tokenizer.TryConsume('.'): |
| 698 name.append(tokenizer.ConsumeIdentifier()) |
| 699 return '.'.join(name) |
| 700 |
| 566 def _MergeMessageField(self, tokenizer, message, field): | 701 def _MergeMessageField(self, tokenizer, message, field): |
| 567 """Merges a single scalar field into a message. | 702 """Merges a single scalar field into a message. |
| 568 | 703 |
| 569 Args: | 704 Args: |
| 570 tokenizer: A tokenizer to parse the field value. | 705 tokenizer: A tokenizer to parse the field value. |
| 571 message: The message of which field is a member. | 706 message: The message of which field is a member. |
| 572 field: The descriptor of the field to be merged. | 707 field: The descriptor of the field to be merged. |
| 573 | 708 |
| 574 Raises: | 709 Raises: |
| 575 ParseError: In case of text parsing problems. | 710 ParseError: In case of text parsing problems. |
| 576 """ | 711 """ |
| 577 is_map_entry = _IsMapEntry(field) | 712 is_map_entry = _IsMapEntry(field) |
| 578 | 713 |
| 579 if tokenizer.TryConsume('<'): | 714 if tokenizer.TryConsume('<'): |
| 580 end_token = '>' | 715 end_token = '>' |
| 581 else: | 716 else: |
| 582 tokenizer.Consume('{') | 717 tokenizer.Consume('{') |
| 583 end_token = '}' | 718 end_token = '}' |
| 584 | 719 |
| 585 if field.label == descriptor.FieldDescriptor.LABEL_REPEATED: | 720 if (field.message_type.full_name == _ANY_FULL_TYPE_NAME and |
| 721 tokenizer.TryConsume('[')): |
| 722 packed_type_name = self._ConsumeAnyTypeUrl(tokenizer) |
| 723 tokenizer.Consume(']') |
| 724 tokenizer.TryConsume(':') |
| 725 if tokenizer.TryConsume('<'): |
| 726 expanded_any_end_token = '>' |
| 727 else: |
| 728 tokenizer.Consume('{') |
| 729 expanded_any_end_token = '}' |
| 730 if not self.descriptor_pool: |
| 731 raise ParseError('Descriptor pool required to parse expanded Any field') |
| 732 expanded_any_sub_message = _BuildMessageFromTypeName(packed_type_name, |
| 733 self.descriptor_pool) |
| 734 if not expanded_any_sub_message: |
| 735 raise ParseError('Type %s not found in descriptor pool' % |
| 736 packed_type_name) |
| 737 while not tokenizer.TryConsume(expanded_any_end_token): |
| 738 if tokenizer.AtEnd(): |
| 739 raise tokenizer.ParseErrorPreviousToken('Expected "%s".' % |
| 740 (expanded_any_end_token,)) |
| 741 self._MergeField(tokenizer, expanded_any_sub_message) |
| 742 if field.label == descriptor.FieldDescriptor.LABEL_REPEATED: |
| 743 any_message = getattr(message, field.name).add() |
| 744 else: |
| 745 any_message = getattr(message, field.name) |
| 746 any_message.Pack(expanded_any_sub_message) |
| 747 elif field.label == descriptor.FieldDescriptor.LABEL_REPEATED: |
| 586 if field.is_extension: | 748 if field.is_extension: |
| 587 sub_message = message.Extensions[field].add() | 749 sub_message = message.Extensions[field].add() |
| 588 elif is_map_entry: | 750 elif is_map_entry: |
| 589 # pylint: disable=protected-access | 751 sub_message = getattr(message, field.name).GetEntryClass()() |
| 590 sub_message = field.message_type._concrete_class() | |
| 591 else: | 752 else: |
| 592 sub_message = getattr(message, field.name).add() | 753 sub_message = getattr(message, field.name).add() |
| 593 else: | 754 else: |
| 594 if field.is_extension: | 755 if field.is_extension: |
| 595 sub_message = message.Extensions[field] | 756 sub_message = message.Extensions[field] |
| 596 else: | 757 else: |
| 597 sub_message = getattr(message, field.name) | 758 sub_message = getattr(message, field.name) |
| 598 sub_message.SetInParent() | 759 sub_message.SetInParent() |
| 599 | 760 |
| 600 while not tokenizer.TryConsume(end_token): | 761 while not tokenizer.TryConsume(end_token): |
| (...skipping 20 matching lines...) Expand all Loading... |
| 621 Raises: | 782 Raises: |
| 622 ParseError: In case of text parsing problems. | 783 ParseError: In case of text parsing problems. |
| 623 RuntimeError: On runtime errors. | 784 RuntimeError: On runtime errors. |
| 624 """ | 785 """ |
| 625 _ = self.allow_unknown_extension | 786 _ = self.allow_unknown_extension |
| 626 value = None | 787 value = None |
| 627 | 788 |
| 628 if field.type in (descriptor.FieldDescriptor.TYPE_INT32, | 789 if field.type in (descriptor.FieldDescriptor.TYPE_INT32, |
| 629 descriptor.FieldDescriptor.TYPE_SINT32, | 790 descriptor.FieldDescriptor.TYPE_SINT32, |
| 630 descriptor.FieldDescriptor.TYPE_SFIXED32): | 791 descriptor.FieldDescriptor.TYPE_SFIXED32): |
| 631 value = tokenizer.ConsumeInt32() | 792 value = _ConsumeInt32(tokenizer) |
| 632 elif field.type in (descriptor.FieldDescriptor.TYPE_INT64, | 793 elif field.type in (descriptor.FieldDescriptor.TYPE_INT64, |
| 633 descriptor.FieldDescriptor.TYPE_SINT64, | 794 descriptor.FieldDescriptor.TYPE_SINT64, |
| 634 descriptor.FieldDescriptor.TYPE_SFIXED64): | 795 descriptor.FieldDescriptor.TYPE_SFIXED64): |
| 635 value = tokenizer.ConsumeInt64() | 796 value = _ConsumeInt64(tokenizer) |
| 636 elif field.type in (descriptor.FieldDescriptor.TYPE_UINT32, | 797 elif field.type in (descriptor.FieldDescriptor.TYPE_UINT32, |
| 637 descriptor.FieldDescriptor.TYPE_FIXED32): | 798 descriptor.FieldDescriptor.TYPE_FIXED32): |
| 638 value = tokenizer.ConsumeUint32() | 799 value = _ConsumeUint32(tokenizer) |
| 639 elif field.type in (descriptor.FieldDescriptor.TYPE_UINT64, | 800 elif field.type in (descriptor.FieldDescriptor.TYPE_UINT64, |
| 640 descriptor.FieldDescriptor.TYPE_FIXED64): | 801 descriptor.FieldDescriptor.TYPE_FIXED64): |
| 641 value = tokenizer.ConsumeUint64() | 802 value = _ConsumeUint64(tokenizer) |
| 642 elif field.type in (descriptor.FieldDescriptor.TYPE_FLOAT, | 803 elif field.type in (descriptor.FieldDescriptor.TYPE_FLOAT, |
| 643 descriptor.FieldDescriptor.TYPE_DOUBLE): | 804 descriptor.FieldDescriptor.TYPE_DOUBLE): |
| 644 value = tokenizer.ConsumeFloat() | 805 value = tokenizer.ConsumeFloat() |
| 645 elif field.type == descriptor.FieldDescriptor.TYPE_BOOL: | 806 elif field.type == descriptor.FieldDescriptor.TYPE_BOOL: |
| 646 value = tokenizer.ConsumeBool() | 807 value = tokenizer.ConsumeBool() |
| 647 elif field.type == descriptor.FieldDescriptor.TYPE_STRING: | 808 elif field.type == descriptor.FieldDescriptor.TYPE_STRING: |
| 648 value = tokenizer.ConsumeString() | 809 value = tokenizer.ConsumeString() |
| 649 elif field.type == descriptor.FieldDescriptor.TYPE_BYTES: | 810 elif field.type == descriptor.FieldDescriptor.TYPE_BYTES: |
| 650 value = tokenizer.ConsumeByteString() | 811 value = tokenizer.ConsumeByteString() |
| 651 elif field.type == descriptor.FieldDescriptor.TYPE_ENUM: | 812 elif field.type == descriptor.FieldDescriptor.TYPE_ENUM: |
| (...skipping 94 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 746 ParseError: In case an invalid field value is found. | 907 ParseError: In case an invalid field value is found. |
| 747 """ | 908 """ |
| 748 # String/bytes tokens can come in multiple adjacent string literals. | 909 # String/bytes tokens can come in multiple adjacent string literals. |
| 749 # If we can consume one, consume as many as we can. | 910 # If we can consume one, consume as many as we can. |
| 750 if tokenizer.TryConsumeByteString(): | 911 if tokenizer.TryConsumeByteString(): |
| 751 while tokenizer.TryConsumeByteString(): | 912 while tokenizer.TryConsumeByteString(): |
| 752 pass | 913 pass |
| 753 return | 914 return |
| 754 | 915 |
| 755 if (not tokenizer.TryConsumeIdentifier() and | 916 if (not tokenizer.TryConsumeIdentifier() and |
| 756 not tokenizer.TryConsumeInt64() and | 917 not _TryConsumeInt64(tokenizer) and not _TryConsumeUint64(tokenizer) and |
| 757 not tokenizer.TryConsumeUint64() and | |
| 758 not tokenizer.TryConsumeFloat()): | 918 not tokenizer.TryConsumeFloat()): |
| 759 raise ParseError('Invalid field value: ' + tokenizer.token) | 919 raise ParseError('Invalid field value: ' + tokenizer.token) |
| 760 | 920 |
| 761 | 921 |
| 762 class _Tokenizer(object): | 922 class Tokenizer(object): |
| 763 """Protocol buffer text representation tokenizer. | 923 """Protocol buffer text representation tokenizer. |
| 764 | 924 |
| 765 This class handles the lower level string parsing by splitting it into | 925 This class handles the lower level string parsing by splitting it into |
| 766 meaningful tokens. | 926 meaningful tokens. |
| 767 | 927 |
| 768 It was directly ported from the Java protocol buffer API. | 928 It was directly ported from the Java protocol buffer API. |
| 769 """ | 929 """ |
| 770 | 930 |
| 771 _WHITESPACE = re.compile('(\\s|(#.*$))+', re.MULTILINE) | 931 _WHITESPACE = re.compile(r'\s+') |
| 932 _COMMENT = re.compile(r'(\s*#.*$)', re.MULTILINE) |
| 933 _WHITESPACE_OR_COMMENT = re.compile(r'(\s|(#.*$))+', re.MULTILINE) |
| 772 _TOKEN = re.compile('|'.join([ | 934 _TOKEN = re.compile('|'.join([ |
| 773 r'[a-zA-Z_][0-9a-zA-Z_+-]*', # an identifier | 935 r'[a-zA-Z_][0-9a-zA-Z_+-]*', # an identifier |
| 774 r'([0-9+-]|(\.[0-9]))[0-9a-zA-Z_.+-]*', # a number | 936 r'([0-9+-]|(\.[0-9]))[0-9a-zA-Z_.+-]*', # a number |
| 775 ] + [ # quoted str for each quote mark | 937 ] + [ # quoted str for each quote mark |
| 776 r'{qt}([^{qt}\n\\]|\\.)*({qt}|\\?$)'.format(qt=mark) for mark in _QUOTES | 938 r'{qt}([^{qt}\n\\]|\\.)*({qt}|\\?$)'.format(qt=mark) for mark in _QUOTES |
| 777 ])) | 939 ])) |
| 778 | 940 |
| 779 _IDENTIFIER = re.compile(r'\w+') | 941 _IDENTIFIER = re.compile(r'[^\d\W]\w*') |
| 942 _IDENTIFIER_OR_NUMBER = re.compile(r'\w+') |
| 780 | 943 |
| 781 def __init__(self, lines): | 944 def __init__(self, lines, skip_comments=True): |
| 782 self._position = 0 | 945 self._position = 0 |
| 783 self._line = -1 | 946 self._line = -1 |
| 784 self._column = 0 | 947 self._column = 0 |
| 785 self._token_start = None | 948 self._token_start = None |
| 786 self.token = '' | 949 self.token = '' |
| 787 self._lines = iter(lines) | 950 self._lines = iter(lines) |
| 788 self._current_line = '' | 951 self._current_line = '' |
| 789 self._previous_line = 0 | 952 self._previous_line = 0 |
| 790 self._previous_column = 0 | 953 self._previous_column = 0 |
| 791 self._more_lines = True | 954 self._more_lines = True |
| 955 self._skip_comments = skip_comments |
| 956 self._whitespace_pattern = (skip_comments and self._WHITESPACE_OR_COMMENT |
| 957 or self._WHITESPACE) |
| 792 self._SkipWhitespace() | 958 self._SkipWhitespace() |
| 793 self.NextToken() | 959 self.NextToken() |
| 794 | 960 |
| 795 def LookingAt(self, token): | 961 def LookingAt(self, token): |
| 796 return self.token == token | 962 return self.token == token |
| 797 | 963 |
| 798 def AtEnd(self): | 964 def AtEnd(self): |
| 799 """Checks the end of the text was reached. | 965 """Checks the end of the text was reached. |
| 800 | 966 |
| 801 Returns: | 967 Returns: |
| 802 True iff the end was reached. | 968 True iff the end was reached. |
| 803 """ | 969 """ |
| 804 return not self.token | 970 return not self.token |
| 805 | 971 |
| 806 def _PopLine(self): | 972 def _PopLine(self): |
| 807 while len(self._current_line) <= self._column: | 973 while len(self._current_line) <= self._column: |
| 808 try: | 974 try: |
| 809 self._current_line = next(self._lines) | 975 self._current_line = next(self._lines) |
| 810 except StopIteration: | 976 except StopIteration: |
| 811 self._current_line = '' | 977 self._current_line = '' |
| 812 self._more_lines = False | 978 self._more_lines = False |
| 813 return | 979 return |
| 814 else: | 980 else: |
| 815 self._line += 1 | 981 self._line += 1 |
| 816 self._column = 0 | 982 self._column = 0 |
| 817 | 983 |
| 818 def _SkipWhitespace(self): | 984 def _SkipWhitespace(self): |
| 819 while True: | 985 while True: |
| 820 self._PopLine() | 986 self._PopLine() |
| 821 match = self._WHITESPACE.match(self._current_line, self._column) | 987 match = self._whitespace_pattern.match(self._current_line, self._column) |
| 822 if not match: | 988 if not match: |
| 823 break | 989 break |
| 824 length = len(match.group(0)) | 990 length = len(match.group(0)) |
| 825 self._column += length | 991 self._column += length |
| 826 | 992 |
| 827 def TryConsume(self, token): | 993 def TryConsume(self, token): |
| 828 """Tries to consume a given piece of text. | 994 """Tries to consume a given piece of text. |
| 829 | 995 |
| 830 Args: | 996 Args: |
| 831 token: Text to consume. | 997 token: Text to consume. |
| 832 | 998 |
| 833 Returns: | 999 Returns: |
| 834 True iff the text was consumed. | 1000 True iff the text was consumed. |
| 835 """ | 1001 """ |
| 836 if self.token == token: | 1002 if self.token == token: |
| 837 self.NextToken() | 1003 self.NextToken() |
| 838 return True | 1004 return True |
| 839 return False | 1005 return False |
| 840 | 1006 |
| 841 def Consume(self, token): | 1007 def Consume(self, token): |
| 842 """Consumes a piece of text. | 1008 """Consumes a piece of text. |
| 843 | 1009 |
| 844 Args: | 1010 Args: |
| 845 token: Text to consume. | 1011 token: Text to consume. |
| 846 | 1012 |
| 847 Raises: | 1013 Raises: |
| 848 ParseError: If the text couldn't be consumed. | 1014 ParseError: If the text couldn't be consumed. |
| 849 """ | 1015 """ |
| 850 if not self.TryConsume(token): | 1016 if not self.TryConsume(token): |
| 851 raise self._ParseError('Expected "%s".' % token) | 1017 raise self.ParseError('Expected "%s".' % token) |
| 1018 |
| 1019 def ConsumeComment(self): |
| 1020 result = self.token |
| 1021 if not self._COMMENT.match(result): |
| 1022 raise self.ParseError('Expected comment.') |
| 1023 self.NextToken() |
| 1024 return result |
| 852 | 1025 |
| 853 def TryConsumeIdentifier(self): | 1026 def TryConsumeIdentifier(self): |
| 854 try: | 1027 try: |
| 855 self.ConsumeIdentifier() | 1028 self.ConsumeIdentifier() |
| 856 return True | 1029 return True |
| 857 except ParseError: | 1030 except ParseError: |
| 858 return False | 1031 return False |
| 859 | 1032 |
| 860 def ConsumeIdentifier(self): | 1033 def ConsumeIdentifier(self): |
| 861 """Consumes protocol message field identifier. | 1034 """Consumes protocol message field identifier. |
| 862 | 1035 |
| 863 Returns: | 1036 Returns: |
| 864 Identifier string. | 1037 Identifier string. |
| 865 | 1038 |
| 866 Raises: | 1039 Raises: |
| 867 ParseError: If an identifier couldn't be consumed. | 1040 ParseError: If an identifier couldn't be consumed. |
| 868 """ | 1041 """ |
| 869 result = self.token | 1042 result = self.token |
| 870 if not self._IDENTIFIER.match(result): | 1043 if not self._IDENTIFIER.match(result): |
| 871 raise self._ParseError('Expected identifier.') | 1044 raise self.ParseError('Expected identifier.') |
| 872 self.NextToken() | 1045 self.NextToken() |
| 873 return result | 1046 return result |
| 874 | 1047 |
| 875 def ConsumeInt32(self): | 1048 def TryConsumeIdentifierOrNumber(self): |
| 876 """Consumes a signed 32bit integer number. | 1049 try: |
| 1050 self.ConsumeIdentifierOrNumber() |
| 1051 return True |
| 1052 except ParseError: |
| 1053 return False |
| 877 | 1054 |
| 1055 def ConsumeIdentifierOrNumber(self): |
| 1056 """Consumes protocol message field identifier. |
| 1057 |
| 1058 Returns: |
| 1059 Identifier string. |
| 1060 |
| 1061 Raises: |
| 1062 ParseError: If an identifier couldn't be consumed. |
| 1063 """ |
| 1064 result = self.token |
| 1065 if not self._IDENTIFIER_OR_NUMBER.match(result): |
| 1066 raise self.ParseError('Expected identifier or number.') |
| 1067 self.NextToken() |
| 1068 return result |
| 1069 |
| 1070 def TryConsumeInteger(self): |
| 1071 try: |
| 1072 # Note: is_long only affects value type, not whether an error is raised. |
| 1073 self.ConsumeInteger() |
| 1074 return True |
| 1075 except ParseError: |
| 1076 return False |
| 1077 |
| 1078 def ConsumeInteger(self, is_long=False): |
| 1079 """Consumes an integer number. |
| 1080 |
| 1081 Args: |
| 1082 is_long: True if the value should be returned as a long integer. |
| 878 Returns: | 1083 Returns: |
| 879 The integer parsed. | 1084 The integer parsed. |
| 880 | 1085 |
| 881 Raises: | 1086 Raises: |
| 882 ParseError: If a signed 32bit integer couldn't be consumed. | 1087 ParseError: If an integer couldn't be consumed. |
| 883 """ | 1088 """ |
| 884 try: | 1089 try: |
| 885 result = ParseInteger(self.token, is_signed=True, is_long=False) | 1090 result = _ParseAbstractInteger(self.token, is_long=is_long) |
| 886 except ValueError as e: | 1091 except ValueError as e: |
| 887 raise self._ParseError(str(e)) | 1092 raise self.ParseError(str(e)) |
| 888 self.NextToken() | 1093 self.NextToken() |
| 889 return result | 1094 return result |
| 890 | 1095 |
| 891 def ConsumeUint32(self): | |
| 892 """Consumes an unsigned 32bit integer number. | |
| 893 | |
| 894 Returns: | |
| 895 The integer parsed. | |
| 896 | |
| 897 Raises: | |
| 898 ParseError: If an unsigned 32bit integer couldn't be consumed. | |
| 899 """ | |
| 900 try: | |
| 901 result = ParseInteger(self.token, is_signed=False, is_long=False) | |
| 902 except ValueError as e: | |
| 903 raise self._ParseError(str(e)) | |
| 904 self.NextToken() | |
| 905 return result | |
| 906 | |
| 907 def TryConsumeInt64(self): | |
| 908 try: | |
| 909 self.ConsumeInt64() | |
| 910 return True | |
| 911 except ParseError: | |
| 912 return False | |
| 913 | |
| 914 def ConsumeInt64(self): | |
| 915 """Consumes a signed 64bit integer number. | |
| 916 | |
| 917 Returns: | |
| 918 The integer parsed. | |
| 919 | |
| 920 Raises: | |
| 921 ParseError: If a signed 64bit integer couldn't be consumed. | |
| 922 """ | |
| 923 try: | |
| 924 result = ParseInteger(self.token, is_signed=True, is_long=True) | |
| 925 except ValueError as e: | |
| 926 raise self._ParseError(str(e)) | |
| 927 self.NextToken() | |
| 928 return result | |
| 929 | |
| 930 def TryConsumeUint64(self): | |
| 931 try: | |
| 932 self.ConsumeUint64() | |
| 933 return True | |
| 934 except ParseError: | |
| 935 return False | |
| 936 | |
| 937 def ConsumeUint64(self): | |
| 938 """Consumes an unsigned 64bit integer number. | |
| 939 | |
| 940 Returns: | |
| 941 The integer parsed. | |
| 942 | |
| 943 Raises: | |
| 944 ParseError: If an unsigned 64bit integer couldn't be consumed. | |
| 945 """ | |
| 946 try: | |
| 947 result = ParseInteger(self.token, is_signed=False, is_long=True) | |
| 948 except ValueError as e: | |
| 949 raise self._ParseError(str(e)) | |
| 950 self.NextToken() | |
| 951 return result | |
| 952 | |
| 953 def TryConsumeFloat(self): | 1096 def TryConsumeFloat(self): |
| 954 try: | 1097 try: |
| 955 self.ConsumeFloat() | 1098 self.ConsumeFloat() |
| 956 return True | 1099 return True |
| 957 except ParseError: | 1100 except ParseError: |
| 958 return False | 1101 return False |
| 959 | 1102 |
| 960 def ConsumeFloat(self): | 1103 def ConsumeFloat(self): |
| 961 """Consumes an floating point number. | 1104 """Consumes an floating point number. |
| 962 | 1105 |
| 963 Returns: | 1106 Returns: |
| 964 The number parsed. | 1107 The number parsed. |
| 965 | 1108 |
| 966 Raises: | 1109 Raises: |
| 967 ParseError: If a floating point number couldn't be consumed. | 1110 ParseError: If a floating point number couldn't be consumed. |
| 968 """ | 1111 """ |
| 969 try: | 1112 try: |
| 970 result = ParseFloat(self.token) | 1113 result = ParseFloat(self.token) |
| 971 except ValueError as e: | 1114 except ValueError as e: |
| 972 raise self._ParseError(str(e)) | 1115 raise self.ParseError(str(e)) |
| 973 self.NextToken() | 1116 self.NextToken() |
| 974 return result | 1117 return result |
| 975 | 1118 |
| 976 def ConsumeBool(self): | 1119 def ConsumeBool(self): |
| 977 """Consumes a boolean value. | 1120 """Consumes a boolean value. |
| 978 | 1121 |
| 979 Returns: | 1122 Returns: |
| 980 The bool parsed. | 1123 The bool parsed. |
| 981 | 1124 |
| 982 Raises: | 1125 Raises: |
| 983 ParseError: If a boolean value couldn't be consumed. | 1126 ParseError: If a boolean value couldn't be consumed. |
| 984 """ | 1127 """ |
| 985 try: | 1128 try: |
| 986 result = ParseBool(self.token) | 1129 result = ParseBool(self.token) |
| 987 except ValueError as e: | 1130 except ValueError as e: |
| 988 raise self._ParseError(str(e)) | 1131 raise self.ParseError(str(e)) |
| 989 self.NextToken() | 1132 self.NextToken() |
| 990 return result | 1133 return result |
| 991 | 1134 |
| 992 def TryConsumeByteString(self): | 1135 def TryConsumeByteString(self): |
| 993 try: | 1136 try: |
| 994 self.ConsumeByteString() | 1137 self.ConsumeByteString() |
| 995 return True | 1138 return True |
| 996 except ParseError: | 1139 except ParseError: |
| 997 return False | 1140 return False |
| 998 | 1141 |
| (...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1032 tokens which are automatically concatenated, like in C or Python. This | 1175 tokens which are automatically concatenated, like in C or Python. This |
| 1033 method only consumes one token. | 1176 method only consumes one token. |
| 1034 | 1177 |
| 1035 Returns: | 1178 Returns: |
| 1036 The token parsed. | 1179 The token parsed. |
| 1037 Raises: | 1180 Raises: |
| 1038 ParseError: When the wrong format data is found. | 1181 ParseError: When the wrong format data is found. |
| 1039 """ | 1182 """ |
| 1040 text = self.token | 1183 text = self.token |
| 1041 if len(text) < 1 or text[0] not in _QUOTES: | 1184 if len(text) < 1 or text[0] not in _QUOTES: |
| 1042 raise self._ParseError('Expected string but found: %r' % (text,)) | 1185 raise self.ParseError('Expected string but found: %r' % (text,)) |
| 1043 | 1186 |
| 1044 if len(text) < 2 or text[-1] != text[0]: | 1187 if len(text) < 2 or text[-1] != text[0]: |
| 1045 raise self._ParseError('String missing ending quote: %r' % (text,)) | 1188 raise self.ParseError('String missing ending quote: %r' % (text,)) |
| 1046 | 1189 |
| 1047 try: | 1190 try: |
| 1048 result = text_encoding.CUnescape(text[1:-1]) | 1191 result = text_encoding.CUnescape(text[1:-1]) |
| 1049 except ValueError as e: | 1192 except ValueError as e: |
| 1050 raise self._ParseError(str(e)) | 1193 raise self.ParseError(str(e)) |
| 1051 self.NextToken() | 1194 self.NextToken() |
| 1052 return result | 1195 return result |
| 1053 | 1196 |
| 1054 def ConsumeEnum(self, field): | 1197 def ConsumeEnum(self, field): |
| 1055 try: | 1198 try: |
| 1056 result = ParseEnum(field, self.token) | 1199 result = ParseEnum(field, self.token) |
| 1057 except ValueError as e: | 1200 except ValueError as e: |
| 1058 raise self._ParseError(str(e)) | 1201 raise self.ParseError(str(e)) |
| 1059 self.NextToken() | 1202 self.NextToken() |
| 1060 return result | 1203 return result |
| 1061 | 1204 |
| 1062 def ParseErrorPreviousToken(self, message): | 1205 def ParseErrorPreviousToken(self, message): |
| 1063 """Creates and *returns* a ParseError for the previously read token. | 1206 """Creates and *returns* a ParseError for the previously read token. |
| 1064 | 1207 |
| 1065 Args: | 1208 Args: |
| 1066 message: A message to set for the exception. | 1209 message: A message to set for the exception. |
| 1067 | 1210 |
| 1068 Returns: | 1211 Returns: |
| 1069 A ParseError instance. | 1212 A ParseError instance. |
| 1070 """ | 1213 """ |
| 1071 return ParseError('%d:%d : %s' % ( | 1214 return ParseError(message, self._previous_line + 1, |
| 1072 self._previous_line + 1, self._previous_column + 1, message)) | 1215 self._previous_column + 1) |
| 1073 | 1216 |
| 1074 def _ParseError(self, message): | 1217 def ParseError(self, message): |
| 1075 """Creates and *returns* a ParseError for the current token.""" | 1218 """Creates and *returns* a ParseError for the current token.""" |
| 1076 return ParseError('%d:%d : %s' % ( | 1219 return ParseError(message, self._line + 1, self._column + 1) |
| 1077 self._line + 1, self._column + 1, message)) | |
| 1078 | 1220 |
| 1079 def _StringParseError(self, e): | 1221 def _StringParseError(self, e): |
| 1080 return self._ParseError('Couldn\'t parse string: ' + str(e)) | 1222 return self.ParseError('Couldn\'t parse string: ' + str(e)) |
| 1081 | 1223 |
| 1082 def NextToken(self): | 1224 def NextToken(self): |
| 1083 """Reads the next meaningful token.""" | 1225 """Reads the next meaningful token.""" |
| 1084 self._previous_line = self._line | 1226 self._previous_line = self._line |
| 1085 self._previous_column = self._column | 1227 self._previous_column = self._column |
| 1086 | 1228 |
| 1087 self._column += len(self.token) | 1229 self._column += len(self.token) |
| 1088 self._SkipWhitespace() | 1230 self._SkipWhitespace() |
| 1089 | 1231 |
| 1090 if not self._more_lines: | 1232 if not self._more_lines: |
| 1091 self.token = '' | 1233 self.token = '' |
| 1092 return | 1234 return |
| 1093 | 1235 |
| 1094 match = self._TOKEN.match(self._current_line, self._column) | 1236 match = self._TOKEN.match(self._current_line, self._column) |
| 1237 if not match and not self._skip_comments: |
| 1238 match = self._COMMENT.match(self._current_line, self._column) |
| 1095 if match: | 1239 if match: |
| 1096 token = match.group(0) | 1240 token = match.group(0) |
| 1097 self.token = token | 1241 self.token = token |
| 1098 else: | 1242 else: |
| 1099 self.token = self._current_line[self._column] | 1243 self.token = self._current_line[self._column] |
| 1100 | 1244 |
| 1245 # Aliased so it can still be accessed by current visibility violators. |
| 1246 # TODO(dbarnett): Migrate violators to textformat_tokenizer. |
| 1247 _Tokenizer = Tokenizer # pylint: disable=invalid-name |
| 1248 |
| 1249 |
| 1250 def _ConsumeInt32(tokenizer): |
| 1251 """Consumes a signed 32bit integer number from tokenizer. |
| 1252 |
| 1253 Args: |
| 1254 tokenizer: A tokenizer used to parse the number. |
| 1255 |
| 1256 Returns: |
| 1257 The integer parsed. |
| 1258 |
| 1259 Raises: |
| 1260 ParseError: If a signed 32bit integer couldn't be consumed. |
| 1261 """ |
| 1262 return _ConsumeInteger(tokenizer, is_signed=True, is_long=False) |
| 1263 |
| 1264 |
| 1265 def _ConsumeUint32(tokenizer): |
| 1266 """Consumes an unsigned 32bit integer number from tokenizer. |
| 1267 |
| 1268 Args: |
| 1269 tokenizer: A tokenizer used to parse the number. |
| 1270 |
| 1271 Returns: |
| 1272 The integer parsed. |
| 1273 |
| 1274 Raises: |
| 1275 ParseError: If an unsigned 32bit integer couldn't be consumed. |
| 1276 """ |
| 1277 return _ConsumeInteger(tokenizer, is_signed=False, is_long=False) |
| 1278 |
| 1279 |
| 1280 def _TryConsumeInt64(tokenizer): |
| 1281 try: |
| 1282 _ConsumeInt64(tokenizer) |
| 1283 return True |
| 1284 except ParseError: |
| 1285 return False |
| 1286 |
| 1287 |
| 1288 def _ConsumeInt64(tokenizer): |
| 1289 """Consumes a signed 32bit integer number from tokenizer. |
| 1290 |
| 1291 Args: |
| 1292 tokenizer: A tokenizer used to parse the number. |
| 1293 |
| 1294 Returns: |
| 1295 The integer parsed. |
| 1296 |
| 1297 Raises: |
| 1298 ParseError: If a signed 32bit integer couldn't be consumed. |
| 1299 """ |
| 1300 return _ConsumeInteger(tokenizer, is_signed=True, is_long=True) |
| 1301 |
| 1302 |
| 1303 def _TryConsumeUint64(tokenizer): |
| 1304 try: |
| 1305 _ConsumeUint64(tokenizer) |
| 1306 return True |
| 1307 except ParseError: |
| 1308 return False |
| 1309 |
| 1310 |
| 1311 def _ConsumeUint64(tokenizer): |
| 1312 """Consumes an unsigned 64bit integer number from tokenizer. |
| 1313 |
| 1314 Args: |
| 1315 tokenizer: A tokenizer used to parse the number. |
| 1316 |
| 1317 Returns: |
| 1318 The integer parsed. |
| 1319 |
| 1320 Raises: |
| 1321 ParseError: If an unsigned 64bit integer couldn't be consumed. |
| 1322 """ |
| 1323 return _ConsumeInteger(tokenizer, is_signed=False, is_long=True) |
| 1324 |
| 1325 |
| 1326 def _TryConsumeInteger(tokenizer, is_signed=False, is_long=False): |
| 1327 try: |
| 1328 _ConsumeInteger(tokenizer, is_signed=is_signed, is_long=is_long) |
| 1329 return True |
| 1330 except ParseError: |
| 1331 return False |
| 1332 |
| 1333 |
| 1334 def _ConsumeInteger(tokenizer, is_signed=False, is_long=False): |
| 1335 """Consumes an integer number from tokenizer. |
| 1336 |
| 1337 Args: |
| 1338 tokenizer: A tokenizer used to parse the number. |
| 1339 is_signed: True if a signed integer must be parsed. |
| 1340 is_long: True if a long integer must be parsed. |
| 1341 |
| 1342 Returns: |
| 1343 The integer parsed. |
| 1344 |
| 1345 Raises: |
| 1346 ParseError: If an integer with given characteristics couldn't be consumed. |
| 1347 """ |
| 1348 try: |
| 1349 result = ParseInteger(tokenizer.token, is_signed=is_signed, is_long=is_long) |
| 1350 except ValueError as e: |
| 1351 raise tokenizer.ParseError(str(e)) |
| 1352 tokenizer.NextToken() |
| 1353 return result |
| 1354 |
| 1101 | 1355 |
| 1102 def ParseInteger(text, is_signed=False, is_long=False): | 1356 def ParseInteger(text, is_signed=False, is_long=False): |
| 1103 """Parses an integer. | 1357 """Parses an integer. |
| 1104 | 1358 |
| 1105 Args: | 1359 Args: |
| 1106 text: The text to parse. | 1360 text: The text to parse. |
| 1107 is_signed: True if a signed integer must be parsed. | 1361 is_signed: True if a signed integer must be parsed. |
| 1108 is_long: True if a long integer must be parsed. | 1362 is_long: True if a long integer must be parsed. |
| 1109 | 1363 |
| 1110 Returns: | 1364 Returns: |
| 1111 The integer value. | 1365 The integer value. |
| 1112 | 1366 |
| 1113 Raises: | 1367 Raises: |
| 1114 ValueError: Thrown Iff the text is not a valid integer. | 1368 ValueError: Thrown Iff the text is not a valid integer. |
| 1115 """ | 1369 """ |
| 1116 # Do the actual parsing. Exception handling is propagated to caller. | 1370 # Do the actual parsing. Exception handling is propagated to caller. |
| 1371 result = _ParseAbstractInteger(text, is_long=is_long) |
| 1372 |
| 1373 # Check if the integer is sane. Exceptions handled by callers. |
| 1374 checker = _INTEGER_CHECKERS[2 * int(is_long) + int(is_signed)] |
| 1375 checker.CheckValue(result) |
| 1376 return result |
| 1377 |
| 1378 |
| 1379 def _ParseAbstractInteger(text, is_long=False): |
| 1380 """Parses an integer without checking size/signedness. |
| 1381 |
| 1382 Args: |
| 1383 text: The text to parse. |
| 1384 is_long: True if the value should be returned as a long integer. |
| 1385 |
| 1386 Returns: |
| 1387 The integer value. |
| 1388 |
| 1389 Raises: |
| 1390 ValueError: Thrown Iff the text is not a valid integer. |
| 1391 """ |
| 1392 # Do the actual parsing. Exception handling is propagated to caller. |
| 1117 try: | 1393 try: |
| 1118 # We force 32-bit values to int and 64-bit values to long to make | 1394 # We force 32-bit values to int and 64-bit values to long to make |
| 1119 # alternate implementations where the distinction is more significant | 1395 # alternate implementations where the distinction is more significant |
| 1120 # (e.g. the C++ implementation) simpler. | 1396 # (e.g. the C++ implementation) simpler. |
| 1121 if is_long: | 1397 if is_long: |
| 1122 result = long(text, 0) | 1398 return long(text, 0) |
| 1123 else: | 1399 else: |
| 1124 result = int(text, 0) | 1400 return int(text, 0) |
| 1125 except ValueError: | 1401 except ValueError: |
| 1126 raise ValueError('Couldn\'t parse integer: %s' % text) | 1402 raise ValueError('Couldn\'t parse integer: %s' % text) |
| 1127 | 1403 |
| 1128 # Check if the integer is sane. Exceptions handled by callers. | |
| 1129 checker = _INTEGER_CHECKERS[2 * int(is_long) + int(is_signed)] | |
| 1130 checker.CheckValue(result) | |
| 1131 return result | |
| 1132 | |
| 1133 | 1404 |
| 1134 def ParseFloat(text): | 1405 def ParseFloat(text): |
| 1135 """Parse a floating point number. | 1406 """Parse a floating point number. |
| 1136 | 1407 |
| 1137 Args: | 1408 Args: |
| 1138 text: Text to parse. | 1409 text: Text to parse. |
| 1139 | 1410 |
| 1140 Returns: | 1411 Returns: |
| 1141 The number parsed. | 1412 The number parsed. |
| 1142 | 1413 |
| (...skipping 25 matching lines...) Expand all Loading... |
| 1168 | 1439 |
| 1169 Args: | 1440 Args: |
| 1170 text: Text to parse. | 1441 text: Text to parse. |
| 1171 | 1442 |
| 1172 Returns: | 1443 Returns: |
| 1173 Boolean values parsed | 1444 Boolean values parsed |
| 1174 | 1445 |
| 1175 Raises: | 1446 Raises: |
| 1176 ValueError: If text is not a valid boolean. | 1447 ValueError: If text is not a valid boolean. |
| 1177 """ | 1448 """ |
| 1178 if text in ('true', 't', '1'): | 1449 if text in ('true', 't', '1', 'True'): |
| 1179 return True | 1450 return True |
| 1180 elif text in ('false', 'f', '0'): | 1451 elif text in ('false', 'f', '0', 'False'): |
| 1181 return False | 1452 return False |
| 1182 else: | 1453 else: |
| 1183 raise ValueError('Expected "true" or "false".') | 1454 raise ValueError('Expected "true" or "false".') |
| 1184 | 1455 |
| 1185 | 1456 |
| 1186 def ParseEnum(field, value): | 1457 def ParseEnum(field, value): |
| 1187 """Parse an enum value. | 1458 """Parse an enum value. |
| 1188 | 1459 |
| 1189 The value can be specified by a number (the enum value), or by | 1460 The value can be specified by a number (the enum value), or by |
| 1190 a string literal (the enum name). | 1461 a string literal (the enum name). |
| 1191 | 1462 |
| 1192 Args: | 1463 Args: |
| 1193 field: Enum field descriptor. | 1464 field: Enum field descriptor. |
| 1194 value: String value. | 1465 value: String value. |
| 1195 | 1466 |
| 1196 Returns: | 1467 Returns: |
| 1197 Enum value number. | 1468 Enum value number. |
| 1198 | 1469 |
| 1199 Raises: | 1470 Raises: |
| 1200 ValueError: If the enum value could not be parsed. | 1471 ValueError: If the enum value could not be parsed. |
| 1201 """ | 1472 """ |
| 1202 enum_descriptor = field.enum_type | 1473 enum_descriptor = field.enum_type |
| 1203 try: | 1474 try: |
| 1204 number = int(value, 0) | 1475 number = int(value, 0) |
| 1205 except ValueError: | 1476 except ValueError: |
| 1206 # Identifier. | 1477 # Identifier. |
| 1207 enum_value = enum_descriptor.values_by_name.get(value, None) | 1478 enum_value = enum_descriptor.values_by_name.get(value, None) |
| 1208 if enum_value is None: | 1479 if enum_value is None: |
| 1209 raise ValueError( | 1480 raise ValueError('Enum type "%s" has no value named %s.' % |
| 1210 'Enum type "%s" has no value named %s.' % ( | 1481 (enum_descriptor.full_name, value)) |
| 1211 enum_descriptor.full_name, value)) | |
| 1212 else: | 1482 else: |
| 1213 # Numeric value. | 1483 # Numeric value. |
| 1214 enum_value = enum_descriptor.values_by_number.get(number, None) | 1484 enum_value = enum_descriptor.values_by_number.get(number, None) |
| 1215 if enum_value is None: | 1485 if enum_value is None: |
| 1216 raise ValueError( | 1486 raise ValueError('Enum type "%s" has no value with number %d.' % |
| 1217 'Enum type "%s" has no value with number %d.' % ( | 1487 (enum_descriptor.full_name, number)) |
| 1218 enum_descriptor.full_name, number)) | |
| 1219 return enum_value.number | 1488 return enum_value.number |
| OLD | NEW |