| OLD | NEW |
| 1 # Protocol Buffers - Google's data interchange format | 1 # Protocol Buffers - Google's data interchange format |
| 2 # Copyright 2008 Google Inc. All rights reserved. | 2 # Copyright 2008 Google Inc. All rights reserved. |
| 3 # https://developers.google.com/protocol-buffers/ | 3 # https://developers.google.com/protocol-buffers/ |
| 4 # | 4 # |
| 5 # Redistribution and use in source and binary forms, with or without | 5 # Redistribution and use in source and binary forms, with or without |
| 6 # modification, are permitted provided that the following conditions are | 6 # modification, are permitted provided that the following conditions are |
| 7 # met: | 7 # met: |
| 8 # | 8 # |
| 9 # * Redistributions of source code must retain the above copyright | 9 # * Redistributions of source code must retain the above copyright |
| 10 # notice, this list of conditions and the following disclaimer. | 10 # notice, this list of conditions and the following disclaimer. |
| (...skipping 30 matching lines...) Expand all Loading... |
| 41 """ | 41 """ |
| 42 | 42 |
| 43 __author__ = 'kenton@google.com (Kenton Varda)' | 43 __author__ = 'kenton@google.com (Kenton Varda)' |
| 44 | 44 |
| 45 import io | 45 import io |
| 46 import re | 46 import re |
| 47 | 47 |
| 48 import six | 48 import six |
| 49 | 49 |
| 50 if six.PY3: | 50 if six.PY3: |
| 51 long = int # pylint: disable=redefined-builtin,invalid-name | 51 long = int |
| 52 | 52 |
| 53 # pylint: disable=g-import-not-at-top | |
| 54 from google.protobuf.internal import type_checkers | 53 from google.protobuf.internal import type_checkers |
| 55 from google.protobuf import descriptor | 54 from google.protobuf import descriptor |
| 56 from google.protobuf import text_encoding | 55 from google.protobuf import text_encoding |
| 57 | 56 |
| 58 __all__ = ['MessageToString', 'PrintMessage', 'PrintField', 'PrintFieldValue', | 57 __all__ = ['MessageToString', 'PrintMessage', 'PrintField', |
| 59 'Merge'] | 58 'PrintFieldValue', 'Merge'] |
| 59 |
| 60 | 60 |
| 61 _INTEGER_CHECKERS = (type_checkers.Uint32ValueChecker(), | 61 _INTEGER_CHECKERS = (type_checkers.Uint32ValueChecker(), |
| 62 type_checkers.Int32ValueChecker(), | 62 type_checkers.Int32ValueChecker(), |
| 63 type_checkers.Uint64ValueChecker(), | 63 type_checkers.Uint64ValueChecker(), |
| 64 type_checkers.Int64ValueChecker()) | 64 type_checkers.Int64ValueChecker()) |
| 65 _FLOAT_INFINITY = re.compile('-?inf(?:inity)?f?', re.IGNORECASE) | 65 _FLOAT_INFINITY = re.compile('-?inf(?:inity)?f?', re.IGNORECASE) |
| 66 _FLOAT_NAN = re.compile('nanf?', re.IGNORECASE) | 66 _FLOAT_NAN = re.compile('nanf?', re.IGNORECASE) |
| 67 _FLOAT_TYPES = frozenset([descriptor.FieldDescriptor.CPPTYPE_FLOAT, | 67 _FLOAT_TYPES = frozenset([descriptor.FieldDescriptor.CPPTYPE_FLOAT, |
| 68 descriptor.FieldDescriptor.CPPTYPE_DOUBLE]) | 68 descriptor.FieldDescriptor.CPPTYPE_DOUBLE]) |
| 69 _QUOTES = frozenset(("'", '"')) | 69 _QUOTES = frozenset(("'", '"')) |
| 70 _ANY_FULL_TYPE_NAME = 'google.protobuf.Any' | |
| 71 | 70 |
| 72 | 71 |
| 73 class Error(Exception): | 72 class Error(Exception): |
| 74 """Top-level module error for text_format.""" | 73 """Top-level module error for text_format.""" |
| 75 | 74 |
| 76 | 75 |
| 77 class ParseError(Error): | 76 class ParseError(Error): |
| 78 """Thrown in case of text parsing or tokenizing error.""" | 77 """Thrown in case of text parsing error.""" |
| 79 | |
| 80 def __init__(self, message=None, line=None, column=None): | |
| 81 if message is not None and line is not None: | |
| 82 loc = str(line) | |
| 83 if column is not None: | |
| 84 loc += ':{0}'.format(column) | |
| 85 message = '{0} : {1}'.format(loc, message) | |
| 86 if message is not None: | |
| 87 super(ParseError, self).__init__(message) | |
| 88 else: | |
| 89 super(ParseError, self).__init__() | |
| 90 self._line = line | |
| 91 self._column = column | |
| 92 | |
| 93 def GetLine(self): | |
| 94 return self._line | |
| 95 | |
| 96 def GetColumn(self): | |
| 97 return self._column | |
| 98 | 78 |
| 99 | 79 |
| 100 class TextWriter(object): | 80 class TextWriter(object): |
| 101 | |
| 102 def __init__(self, as_utf8): | 81 def __init__(self, as_utf8): |
| 103 if six.PY2: | 82 if six.PY2: |
| 104 self._writer = io.BytesIO() | 83 self._writer = io.BytesIO() |
| 105 else: | 84 else: |
| 106 self._writer = io.StringIO() | 85 self._writer = io.StringIO() |
| 107 | 86 |
| 108 def write(self, val): | 87 def write(self, val): |
| 109 if six.PY2: | 88 if six.PY2: |
| 110 if isinstance(val, six.text_type): | 89 if isinstance(val, six.text_type): |
| 111 val = val.encode('utf-8') | 90 val = val.encode('utf-8') |
| 112 return self._writer.write(val) | 91 return self._writer.write(val) |
| 113 | 92 |
| 114 def close(self): | 93 def close(self): |
| 115 return self._writer.close() | 94 return self._writer.close() |
| 116 | 95 |
| 117 def getvalue(self): | 96 def getvalue(self): |
| 118 return self._writer.getvalue() | 97 return self._writer.getvalue() |
| 119 | 98 |
| 120 | 99 |
| 121 def MessageToString(message, | 100 def MessageToString(message, as_utf8=False, as_one_line=False, |
| 122 as_utf8=False, | 101 pointy_brackets=False, use_index_order=False, |
| 123 as_one_line=False, | 102 float_format=None, use_field_number=False): |
| 124 pointy_brackets=False, | |
| 125 use_index_order=False, | |
| 126 float_format=None, | |
| 127 use_field_number=False, | |
| 128 descriptor_pool=None, | |
| 129 indent=0): | |
| 130 """Convert protobuf message to text format. | 103 """Convert protobuf message to text format. |
| 131 | 104 |
| 132 Floating point values can be formatted compactly with 15 digits of | 105 Floating point values can be formatted compactly with 15 digits of |
| 133 precision (which is the most that IEEE 754 "double" can guarantee) | 106 precision (which is the most that IEEE 754 "double" can guarantee) |
| 134 using float_format='.15g'. To ensure that converting to text and back to a | 107 using float_format='.15g'. To ensure that converting to text and back to a |
| 135 proto will result in an identical value, float_format='.17g' should be used. | 108 proto will result in an identical value, float_format='.17g' should be used. |
| 136 | 109 |
| 137 Args: | 110 Args: |
| 138 message: The protocol buffers message. | 111 message: The protocol buffers message. |
| 139 as_utf8: Produce text output in UTF8 format. | 112 as_utf8: Produce text output in UTF8 format. |
| 140 as_one_line: Don't introduce newlines between fields. | 113 as_one_line: Don't introduce newlines between fields. |
| 141 pointy_brackets: If True, use angle brackets instead of curly braces for | 114 pointy_brackets: If True, use angle brackets instead of curly braces for |
| 142 nesting. | 115 nesting. |
| 143 use_index_order: If True, print fields of a proto message using the order | 116 use_index_order: If True, print fields of a proto message using the order |
| 144 defined in source code instead of the field number. By default, use the | 117 defined in source code instead of the field number. By default, use the |
| 145 field number order. | 118 field number order. |
| 146 float_format: If set, use this to specify floating point number formatting | 119 float_format: If set, use this to specify floating point number formatting |
| 147 (per the "Format Specification Mini-Language"); otherwise, str() is used. | 120 (per the "Format Specification Mini-Language"); otherwise, str() is used. |
| 148 use_field_number: If True, print field numbers instead of names. | 121 use_field_number: If True, print field numbers instead of names. |
| 149 descriptor_pool: A DescriptorPool used to resolve Any types. | |
| 150 indent: The indent level, in terms of spaces, for pretty print. | |
| 151 | 122 |
| 152 Returns: | 123 Returns: |
| 153 A string of the text formatted protocol buffer message. | 124 A string of the text formatted protocol buffer message. |
| 154 """ | 125 """ |
| 155 out = TextWriter(as_utf8) | 126 out = TextWriter(as_utf8) |
| 156 printer = _Printer(out, indent, as_utf8, as_one_line, pointy_brackets, | 127 printer = _Printer(out, 0, as_utf8, as_one_line, |
| 157 use_index_order, float_format, use_field_number, | 128 pointy_brackets, use_index_order, float_format, |
| 158 descriptor_pool) | 129 use_field_number) |
| 159 printer.PrintMessage(message) | 130 printer.PrintMessage(message) |
| 160 result = out.getvalue() | 131 result = out.getvalue() |
| 161 out.close() | 132 out.close() |
| 162 if as_one_line: | 133 if as_one_line: |
| 163 return result.rstrip() | 134 return result.rstrip() |
| 164 return result | 135 return result |
| 165 | 136 |
| 166 | 137 |
| 167 def _IsMapEntry(field): | 138 def _IsMapEntry(field): |
| 168 return (field.type == descriptor.FieldDescriptor.TYPE_MESSAGE and | 139 return (field.type == descriptor.FieldDescriptor.TYPE_MESSAGE and |
| 169 field.message_type.has_options and | 140 field.message_type.has_options and |
| 170 field.message_type.GetOptions().map_entry) | 141 field.message_type.GetOptions().map_entry) |
| 171 | 142 |
| 172 | 143 |
| 173 def PrintMessage(message, | 144 def PrintMessage(message, out, indent=0, as_utf8=False, as_one_line=False, |
| 174 out, | 145 pointy_brackets=False, use_index_order=False, |
| 175 indent=0, | 146 float_format=None, use_field_number=False): |
| 176 as_utf8=False, | 147 printer = _Printer(out, indent, as_utf8, as_one_line, |
| 177 as_one_line=False, | 148 pointy_brackets, use_index_order, float_format, |
| 178 pointy_brackets=False, | 149 use_field_number) |
| 179 use_index_order=False, | |
| 180 float_format=None, | |
| 181 use_field_number=False, | |
| 182 descriptor_pool=None): | |
| 183 printer = _Printer(out, indent, as_utf8, as_one_line, pointy_brackets, | |
| 184 use_index_order, float_format, use_field_number, | |
| 185 descriptor_pool) | |
| 186 printer.PrintMessage(message) | 150 printer.PrintMessage(message) |
| 187 | 151 |
| 188 | 152 |
| 189 def PrintField(field, | 153 def PrintField(field, value, out, indent=0, as_utf8=False, as_one_line=False, |
| 190 value, | 154 pointy_brackets=False, use_index_order=False, float_format=None): |
| 191 out, | |
| 192 indent=0, | |
| 193 as_utf8=False, | |
| 194 as_one_line=False, | |
| 195 pointy_brackets=False, | |
| 196 use_index_order=False, | |
| 197 float_format=None): | |
| 198 """Print a single field name/value pair.""" | 155 """Print a single field name/value pair.""" |
| 199 printer = _Printer(out, indent, as_utf8, as_one_line, pointy_brackets, | 156 printer = _Printer(out, indent, as_utf8, as_one_line, |
| 200 use_index_order, float_format) | 157 pointy_brackets, use_index_order, float_format) |
| 201 printer.PrintField(field, value) | 158 printer.PrintField(field, value) |
| 202 | 159 |
| 203 | 160 |
| 204 def PrintFieldValue(field, | 161 def PrintFieldValue(field, value, out, indent=0, as_utf8=False, |
| 205 value, | 162 as_one_line=False, pointy_brackets=False, |
| 206 out, | |
| 207 indent=0, | |
| 208 as_utf8=False, | |
| 209 as_one_line=False, | |
| 210 pointy_brackets=False, | |
| 211 use_index_order=False, | 163 use_index_order=False, |
| 212 float_format=None): | 164 float_format=None): |
| 213 """Print a single field value (not including name).""" | 165 """Print a single field value (not including name).""" |
| 214 printer = _Printer(out, indent, as_utf8, as_one_line, pointy_brackets, | 166 printer = _Printer(out, indent, as_utf8, as_one_line, |
| 215 use_index_order, float_format) | 167 pointy_brackets, use_index_order, float_format) |
| 216 printer.PrintFieldValue(field, value) | 168 printer.PrintFieldValue(field, value) |
| 217 | 169 |
| 218 | 170 |
| 219 def _BuildMessageFromTypeName(type_name, descriptor_pool): | |
| 220 """Returns a protobuf message instance. | |
| 221 | |
| 222 Args: | |
| 223 type_name: Fully-qualified protobuf message type name string. | |
| 224 descriptor_pool: DescriptorPool instance. | |
| 225 | |
| 226 Returns: | |
| 227 A Message instance of type matching type_name, or None if the a Descriptor | |
| 228 wasn't found matching type_name. | |
| 229 """ | |
| 230 # pylint: disable=g-import-not-at-top | |
| 231 from google.protobuf import symbol_database | |
| 232 database = symbol_database.Default() | |
| 233 try: | |
| 234 message_descriptor = descriptor_pool.FindMessageTypeByName(type_name) | |
| 235 except KeyError: | |
| 236 return None | |
| 237 message_type = database.GetPrototype(message_descriptor) | |
| 238 return message_type() | |
| 239 | |
| 240 | |
| 241 class _Printer(object): | 171 class _Printer(object): |
| 242 """Text format printer for protocol message.""" | 172 """Text format printer for protocol message.""" |
| 243 | 173 |
| 244 def __init__(self, | 174 def __init__(self, out, indent=0, as_utf8=False, as_one_line=False, |
| 245 out, | 175 pointy_brackets=False, use_index_order=False, float_format=None, |
| 246 indent=0, | 176 use_field_number=False): |
| 247 as_utf8=False, | |
| 248 as_one_line=False, | |
| 249 pointy_brackets=False, | |
| 250 use_index_order=False, | |
| 251 float_format=None, | |
| 252 use_field_number=False, | |
| 253 descriptor_pool=None): | |
| 254 """Initialize the Printer. | 177 """Initialize the Printer. |
| 255 | 178 |
| 256 Floating point values can be formatted compactly with 15 digits of | 179 Floating point values can be formatted compactly with 15 digits of |
| 257 precision (which is the most that IEEE 754 "double" can guarantee) | 180 precision (which is the most that IEEE 754 "double" can guarantee) |
| 258 using float_format='.15g'. To ensure that converting to text and back to a | 181 using float_format='.15g'. To ensure that converting to text and back to a |
| 259 proto will result in an identical value, float_format='.17g' should be used. | 182 proto will result in an identical value, float_format='.17g' should be used. |
| 260 | 183 |
| 261 Args: | 184 Args: |
| 262 out: To record the text format result. | 185 out: To record the text format result. |
| 263 indent: The indent level for pretty print. | 186 indent: The indent level for pretty print. |
| 264 as_utf8: Produce text output in UTF8 format. | 187 as_utf8: Produce text output in UTF8 format. |
| 265 as_one_line: Don't introduce newlines between fields. | 188 as_one_line: Don't introduce newlines between fields. |
| 266 pointy_brackets: If True, use angle brackets instead of curly braces for | 189 pointy_brackets: If True, use angle brackets instead of curly braces for |
| 267 nesting. | 190 nesting. |
| 268 use_index_order: If True, print fields of a proto message using the order | 191 use_index_order: If True, print fields of a proto message using the order |
| 269 defined in source code instead of the field number. By default, use the | 192 defined in source code instead of the field number. By default, use the |
| 270 field number order. | 193 field number order. |
| 271 float_format: If set, use this to specify floating point number formatting | 194 float_format: If set, use this to specify floating point number formatting |
| 272 (per the "Format Specification Mini-Language"); otherwise, str() is | 195 (per the "Format Specification Mini-Language"); otherwise, str() is |
| 273 used. | 196 used. |
| 274 use_field_number: If True, print field numbers instead of names. | 197 use_field_number: If True, print field numbers instead of names. |
| 275 descriptor_pool: A DescriptorPool used to resolve Any types. | |
| 276 """ | 198 """ |
| 277 self.out = out | 199 self.out = out |
| 278 self.indent = indent | 200 self.indent = indent |
| 279 self.as_utf8 = as_utf8 | 201 self.as_utf8 = as_utf8 |
| 280 self.as_one_line = as_one_line | 202 self.as_one_line = as_one_line |
| 281 self.pointy_brackets = pointy_brackets | 203 self.pointy_brackets = pointy_brackets |
| 282 self.use_index_order = use_index_order | 204 self.use_index_order = use_index_order |
| 283 self.float_format = float_format | 205 self.float_format = float_format |
| 284 self.use_field_number = use_field_number | 206 self.use_field_number = use_field_number |
| 285 self.descriptor_pool = descriptor_pool | |
| 286 | |
| 287 def _TryPrintAsAnyMessage(self, message): | |
| 288 """Serializes if message is a google.protobuf.Any field.""" | |
| 289 packed_message = _BuildMessageFromTypeName(message.TypeName(), | |
| 290 self.descriptor_pool) | |
| 291 if packed_message: | |
| 292 packed_message.MergeFromString(message.value) | |
| 293 self.out.write('%s[%s]' % (self.indent * ' ', message.type_url)) | |
| 294 self._PrintMessageFieldValue(packed_message) | |
| 295 self.out.write(' ' if self.as_one_line else '\n') | |
| 296 return True | |
| 297 else: | |
| 298 return False | |
| 299 | 207 |
| 300 def PrintMessage(self, message): | 208 def PrintMessage(self, message): |
| 301 """Convert protobuf message to text format. | 209 """Convert protobuf message to text format. |
| 302 | 210 |
| 303 Args: | 211 Args: |
| 304 message: The protocol buffers message. | 212 message: The protocol buffers message. |
| 305 """ | 213 """ |
| 306 if (message.DESCRIPTOR.full_name == _ANY_FULL_TYPE_NAME and | |
| 307 self.descriptor_pool and self._TryPrintAsAnyMessage(message)): | |
| 308 return | |
| 309 fields = message.ListFields() | 214 fields = message.ListFields() |
| 310 if self.use_index_order: | 215 if self.use_index_order: |
| 311 fields.sort(key=lambda x: x[0].index) | 216 fields.sort(key=lambda x: x[0].index) |
| 312 for field, value in fields: | 217 for field, value in fields: |
| 313 if _IsMapEntry(field): | 218 if _IsMapEntry(field): |
| 314 for key in sorted(value): | 219 for key in sorted(value): |
| 315 # This is slow for maps with submessage entires because it copies the | 220 # This is slow for maps with submessage entires because it copies the |
| 316 # entire tree. Unfortunately this would take significant refactoring | 221 # entire tree. Unfortunately this would take significant refactoring |
| 317 # of this file to work around. | 222 # of this file to work around. |
| 318 # | 223 # |
| 319 # TODO(haberman): refactor and optimize if this becomes an issue. | 224 # TODO(haberman): refactor and optimize if this becomes an issue. |
| 320 entry_submsg = value.GetEntryClass()(key=key, value=value[key]) | 225 entry_submsg = field.message_type._concrete_class( |
| 226 key=key, value=value[key]) |
| 321 self.PrintField(field, entry_submsg) | 227 self.PrintField(field, entry_submsg) |
| 322 elif field.label == descriptor.FieldDescriptor.LABEL_REPEATED: | 228 elif field.label == descriptor.FieldDescriptor.LABEL_REPEATED: |
| 323 for element in value: | 229 for element in value: |
| 324 self.PrintField(field, element) | 230 self.PrintField(field, element) |
| 325 else: | 231 else: |
| 326 self.PrintField(field, value) | 232 self.PrintField(field, value) |
| 327 | 233 |
| 328 def PrintField(self, field, value): | 234 def PrintField(self, field, value): |
| 329 """Print a single field name/value pair.""" | 235 """Print a single field name/value pair.""" |
| 330 out = self.out | 236 out = self.out |
| (...skipping 20 matching lines...) Expand all Loading... |
| 351 # The colon is optional in this case, but our cross-language golden files | 257 # The colon is optional in this case, but our cross-language golden files |
| 352 # don't include it. | 258 # don't include it. |
| 353 out.write(': ') | 259 out.write(': ') |
| 354 | 260 |
| 355 self.PrintFieldValue(field, value) | 261 self.PrintFieldValue(field, value) |
| 356 if self.as_one_line: | 262 if self.as_one_line: |
| 357 out.write(' ') | 263 out.write(' ') |
| 358 else: | 264 else: |
| 359 out.write('\n') | 265 out.write('\n') |
| 360 | 266 |
| 361 def _PrintMessageFieldValue(self, value): | |
| 362 if self.pointy_brackets: | |
| 363 openb = '<' | |
| 364 closeb = '>' | |
| 365 else: | |
| 366 openb = '{' | |
| 367 closeb = '}' | |
| 368 | |
| 369 if self.as_one_line: | |
| 370 self.out.write(' %s ' % openb) | |
| 371 self.PrintMessage(value) | |
| 372 self.out.write(closeb) | |
| 373 else: | |
| 374 self.out.write(' %s\n' % openb) | |
| 375 self.indent += 2 | |
| 376 self.PrintMessage(value) | |
| 377 self.indent -= 2 | |
| 378 self.out.write(' ' * self.indent + closeb) | |
| 379 | |
| 380 def PrintFieldValue(self, field, value): | 267 def PrintFieldValue(self, field, value): |
| 381 """Print a single field value (not including name). | 268 """Print a single field value (not including name). |
| 382 | 269 |
| 383 For repeated fields, the value should be a single element. | 270 For repeated fields, the value should be a single element. |
| 384 | 271 |
| 385 Args: | 272 Args: |
| 386 field: The descriptor of the field to be printed. | 273 field: The descriptor of the field to be printed. |
| 387 value: The value of the field. | 274 value: The value of the field. |
| 388 """ | 275 """ |
| 389 out = self.out | 276 out = self.out |
| 277 if self.pointy_brackets: |
| 278 openb = '<' |
| 279 closeb = '>' |
| 280 else: |
| 281 openb = '{' |
| 282 closeb = '}' |
| 283 |
| 390 if field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_MESSAGE: | 284 if field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_MESSAGE: |
| 391 self._PrintMessageFieldValue(value) | 285 if self.as_one_line: |
| 286 out.write(' %s ' % openb) |
| 287 self.PrintMessage(value) |
| 288 out.write(closeb) |
| 289 else: |
| 290 out.write(' %s\n' % openb) |
| 291 self.indent += 2 |
| 292 self.PrintMessage(value) |
| 293 self.indent -= 2 |
| 294 out.write(' ' * self.indent + closeb) |
| 392 elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_ENUM: | 295 elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_ENUM: |
| 393 enum_value = field.enum_type.values_by_number.get(value, None) | 296 enum_value = field.enum_type.values_by_number.get(value, None) |
| 394 if enum_value is not None: | 297 if enum_value is not None: |
| 395 out.write(enum_value.name) | 298 out.write(enum_value.name) |
| 396 else: | 299 else: |
| 397 out.write(str(value)) | 300 out.write(str(value)) |
| 398 elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_STRING: | 301 elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_STRING: |
| 399 out.write('\"') | 302 out.write('\"') |
| 400 if isinstance(value, six.text_type): | 303 if isinstance(value, six.text_type): |
| 401 out_value = value.encode('utf-8') | 304 out_value = value.encode('utf-8') |
| (...skipping 10 matching lines...) Expand all Loading... |
| 412 if value: | 315 if value: |
| 413 out.write('true') | 316 out.write('true') |
| 414 else: | 317 else: |
| 415 out.write('false') | 318 out.write('false') |
| 416 elif field.cpp_type in _FLOAT_TYPES and self.float_format is not None: | 319 elif field.cpp_type in _FLOAT_TYPES and self.float_format is not None: |
| 417 out.write('{1:{0}}'.format(self.float_format, value)) | 320 out.write('{1:{0}}'.format(self.float_format, value)) |
| 418 else: | 321 else: |
| 419 out.write(str(value)) | 322 out.write(str(value)) |
| 420 | 323 |
| 421 | 324 |
| 422 def Parse(text, | 325 def Parse(text, message, |
| 423 message, | 326 allow_unknown_extension=False, allow_field_number=False): |
| 424 allow_unknown_extension=False, | 327 """Parses an text representation of a protocol message into a message. |
| 425 allow_field_number=False): | |
| 426 """Parses a text representation of a protocol message into a message. | |
| 427 | 328 |
| 428 Args: | 329 Args: |
| 429 text: Message text representation. | 330 text: Message text representation. |
| 430 message: A protocol buffer message to merge into. | 331 message: A protocol buffer message to merge into. |
| 431 allow_unknown_extension: if True, skip over missing extensions and keep | 332 allow_unknown_extension: if True, skip over missing extensions and keep |
| 432 parsing | 333 parsing |
| 433 allow_field_number: if True, both field number and field name are allowed. | 334 allow_field_number: if True, both field number and field name are allowed. |
| 434 | 335 |
| 435 Returns: | 336 Returns: |
| 436 The same message passed as argument. | 337 The same message passed as argument. |
| 437 | 338 |
| 438 Raises: | 339 Raises: |
| 439 ParseError: On text parsing problems. | 340 ParseError: On text parsing problems. |
| 440 """ | 341 """ |
| 441 if not isinstance(text, str): | 342 if not isinstance(text, str): |
| 442 text = text.decode('utf-8') | 343 text = text.decode('utf-8') |
| 443 return ParseLines( | 344 return ParseLines(text.split('\n'), message, allow_unknown_extension, |
| 444 text.split('\n'), message, allow_unknown_extension, allow_field_number) | 345 allow_field_number) |
| 445 | 346 |
| 446 | 347 |
| 447 def Merge(text, | 348 def Merge(text, message, allow_unknown_extension=False, |
| 448 message, | 349 allow_field_number=False): |
| 449 allow_unknown_extension=False, | 350 """Parses an text representation of a protocol message into a message. |
| 450 allow_field_number=False, | |
| 451 descriptor_pool=None): | |
| 452 """Parses a text representation of a protocol message into a message. | |
| 453 | 351 |
| 454 Like Parse(), but allows repeated values for a non-repeated field, and uses | 352 Like Parse(), but allows repeated values for a non-repeated field, and uses |
| 455 the last one. | 353 the last one. |
| 456 | 354 |
| 457 Args: | 355 Args: |
| 458 text: Message text representation. | 356 text: Message text representation. |
| 459 message: A protocol buffer message to merge into. | 357 message: A protocol buffer message to merge into. |
| 460 allow_unknown_extension: if True, skip over missing extensions and keep | 358 allow_unknown_extension: if True, skip over missing extensions and keep |
| 461 parsing | 359 parsing |
| 462 allow_field_number: if True, both field number and field name are allowed. | 360 allow_field_number: if True, both field number and field name are allowed. |
| 463 descriptor_pool: A DescriptorPool used to resolve Any types. | |
| 464 | 361 |
| 465 Returns: | 362 Returns: |
| 466 The same message passed as argument. | 363 The same message passed as argument. |
| 467 | 364 |
| 468 Raises: | 365 Raises: |
| 469 ParseError: On text parsing problems. | 366 ParseError: On text parsing problems. |
| 470 """ | 367 """ |
| 471 return MergeLines( | 368 return MergeLines(text.split('\n'), message, allow_unknown_extension, |
| 472 text.split('\n'), | 369 allow_field_number) |
| 473 message, | |
| 474 allow_unknown_extension, | |
| 475 allow_field_number, | |
| 476 descriptor_pool=descriptor_pool) | |
| 477 | 370 |
| 478 | 371 |
| 479 def ParseLines(lines, | 372 def ParseLines(lines, message, allow_unknown_extension=False, |
| 480 message, | |
| 481 allow_unknown_extension=False, | |
| 482 allow_field_number=False): | 373 allow_field_number=False): |
| 483 """Parses a text representation of a protocol message into a message. | 374 """Parses an text representation of a protocol message into a message. |
| 484 | 375 |
| 485 Args: | 376 Args: |
| 486 lines: An iterable of lines of a message's text representation. | 377 lines: An iterable of lines of a message's text representation. |
| 487 message: A protocol buffer message to merge into. | 378 message: A protocol buffer message to merge into. |
| 488 allow_unknown_extension: if True, skip over missing extensions and keep | 379 allow_unknown_extension: if True, skip over missing extensions and keep |
| 489 parsing | 380 parsing |
| 490 allow_field_number: if True, both field number and field name are allowed. | 381 allow_field_number: if True, both field number and field name are allowed. |
| 491 descriptor_pool: A DescriptorPool used to resolve Any types. | |
| 492 | 382 |
| 493 Returns: | 383 Returns: |
| 494 The same message passed as argument. | 384 The same message passed as argument. |
| 495 | 385 |
| 496 Raises: | 386 Raises: |
| 497 ParseError: On text parsing problems. | 387 ParseError: On text parsing problems. |
| 498 """ | 388 """ |
| 499 parser = _Parser(allow_unknown_extension, allow_field_number) | 389 parser = _Parser(allow_unknown_extension, allow_field_number) |
| 500 return parser.ParseLines(lines, message) | 390 return parser.ParseLines(lines, message) |
| 501 | 391 |
| 502 | 392 |
| 503 def MergeLines(lines, | 393 def MergeLines(lines, message, allow_unknown_extension=False, |
| 504 message, | 394 allow_field_number=False): |
| 505 allow_unknown_extension=False, | 395 """Parses an text representation of a protocol message into a message. |
| 506 allow_field_number=False, | |
| 507 descriptor_pool=None): | |
| 508 """Parses a text representation of a protocol message into a message. | |
| 509 | 396 |
| 510 Args: | 397 Args: |
| 511 lines: An iterable of lines of a message's text representation. | 398 lines: An iterable of lines of a message's text representation. |
| 512 message: A protocol buffer message to merge into. | 399 message: A protocol buffer message to merge into. |
| 513 allow_unknown_extension: if True, skip over missing extensions and keep | 400 allow_unknown_extension: if True, skip over missing extensions and keep |
| 514 parsing | 401 parsing |
| 515 allow_field_number: if True, both field number and field name are allowed. | 402 allow_field_number: if True, both field number and field name are allowed. |
| 516 | 403 |
| 517 Returns: | 404 Returns: |
| 518 The same message passed as argument. | 405 The same message passed as argument. |
| 519 | 406 |
| 520 Raises: | 407 Raises: |
| 521 ParseError: On text parsing problems. | 408 ParseError: On text parsing problems. |
| 522 """ | 409 """ |
| 523 parser = _Parser(allow_unknown_extension, | 410 parser = _Parser(allow_unknown_extension, allow_field_number) |
| 524 allow_field_number, | |
| 525 descriptor_pool=descriptor_pool) | |
| 526 return parser.MergeLines(lines, message) | 411 return parser.MergeLines(lines, message) |
| 527 | 412 |
| 528 | 413 |
| 529 class _Parser(object): | 414 class _Parser(object): |
| 530 """Text format parser for protocol message.""" | 415 """Text format parser for protocol message.""" |
| 531 | 416 |
| 532 def __init__(self, | 417 def __init__(self, allow_unknown_extension=False, allow_field_number=False): |
| 533 allow_unknown_extension=False, | |
| 534 allow_field_number=False, | |
| 535 descriptor_pool=None): | |
| 536 self.allow_unknown_extension = allow_unknown_extension | 418 self.allow_unknown_extension = allow_unknown_extension |
| 537 self.allow_field_number = allow_field_number | 419 self.allow_field_number = allow_field_number |
| 538 self.descriptor_pool = descriptor_pool | |
| 539 | 420 |
| 540 def ParseFromString(self, text, message): | 421 def ParseFromString(self, text, message): |
| 541 """Parses a text representation of a protocol message into a message.""" | 422 """Parses an text representation of a protocol message into a message.""" |
| 542 if not isinstance(text, str): | 423 if not isinstance(text, str): |
| 543 text = text.decode('utf-8') | 424 text = text.decode('utf-8') |
| 544 return self.ParseLines(text.split('\n'), message) | 425 return self.ParseLines(text.split('\n'), message) |
| 545 | 426 |
| 546 def ParseLines(self, lines, message): | 427 def ParseLines(self, lines, message): |
| 547 """Parses a text representation of a protocol message into a message.""" | 428 """Parses an text representation of a protocol message into a message.""" |
| 548 self._allow_multiple_scalars = False | 429 self._allow_multiple_scalars = False |
| 549 self._ParseOrMerge(lines, message) | 430 self._ParseOrMerge(lines, message) |
| 550 return message | 431 return message |
| 551 | 432 |
| 552 def MergeFromString(self, text, message): | 433 def MergeFromString(self, text, message): |
| 553 """Merges a text representation of a protocol message into a message.""" | 434 """Merges an text representation of a protocol message into a message.""" |
| 554 return self._MergeLines(text.split('\n'), message) | 435 return self._MergeLines(text.split('\n'), message) |
| 555 | 436 |
| 556 def MergeLines(self, lines, message): | 437 def MergeLines(self, lines, message): |
| 557 """Merges a text representation of a protocol message into a message.""" | 438 """Merges an text representation of a protocol message into a message.""" |
| 558 self._allow_multiple_scalars = True | 439 self._allow_multiple_scalars = True |
| 559 self._ParseOrMerge(lines, message) | 440 self._ParseOrMerge(lines, message) |
| 560 return message | 441 return message |
| 561 | 442 |
| 562 def _ParseOrMerge(self, lines, message): | 443 def _ParseOrMerge(self, lines, message): |
| 563 """Converts a text representation of a protocol message into a message. | 444 """Converts an text representation of a protocol message into a message. |
| 564 | 445 |
| 565 Args: | 446 Args: |
| 566 lines: Lines of a message's text representation. | 447 lines: Lines of a message's text representation. |
| 567 message: A protocol buffer message to merge into. | 448 message: A protocol buffer message to merge into. |
| 568 | 449 |
| 569 Raises: | 450 Raises: |
| 570 ParseError: On text parsing problems. | 451 ParseError: On text parsing problems. |
| 571 """ | 452 """ |
| 572 tokenizer = Tokenizer(lines) | 453 tokenizer = _Tokenizer(lines) |
| 573 while not tokenizer.AtEnd(): | 454 while not tokenizer.AtEnd(): |
| 574 self._MergeField(tokenizer, message) | 455 self._MergeField(tokenizer, message) |
| 575 | 456 |
| 576 def _MergeField(self, tokenizer, message): | 457 def _MergeField(self, tokenizer, message): |
| 577 """Merges a single protocol message field into a message. | 458 """Merges a single protocol message field into a message. |
| 578 | 459 |
| 579 Args: | 460 Args: |
| 580 tokenizer: A tokenizer to parse the field name and values. | 461 tokenizer: A tokenizer to parse the field name and values. |
| 581 message: A protocol message to record the data. | 462 message: A protocol message to record the data. |
| 582 | 463 |
| (...skipping 20 matching lines...) Expand all Loading... |
| 603 field = message.Extensions._FindExtensionByName(name) | 484 field = message.Extensions._FindExtensionByName(name) |
| 604 # pylint: enable=protected-access | 485 # pylint: enable=protected-access |
| 605 if not field: | 486 if not field: |
| 606 if self.allow_unknown_extension: | 487 if self.allow_unknown_extension: |
| 607 field = None | 488 field = None |
| 608 else: | 489 else: |
| 609 raise tokenizer.ParseErrorPreviousToken( | 490 raise tokenizer.ParseErrorPreviousToken( |
| 610 'Extension "%s" not registered.' % name) | 491 'Extension "%s" not registered.' % name) |
| 611 elif message_descriptor != field.containing_type: | 492 elif message_descriptor != field.containing_type: |
| 612 raise tokenizer.ParseErrorPreviousToken( | 493 raise tokenizer.ParseErrorPreviousToken( |
| 613 'Extension "%s" does not extend message type "%s".' % | 494 'Extension "%s" does not extend message type "%s".' % ( |
| 614 (name, message_descriptor.full_name)) | 495 name, message_descriptor.full_name)) |
| 615 | 496 |
| 616 tokenizer.Consume(']') | 497 tokenizer.Consume(']') |
| 617 | 498 |
| 618 else: | 499 else: |
| 619 name = tokenizer.ConsumeIdentifierOrNumber() | 500 name = tokenizer.ConsumeIdentifier() |
| 620 if self.allow_field_number and name.isdigit(): | 501 if self.allow_field_number and name.isdigit(): |
| 621 number = ParseInteger(name, True, True) | 502 number = ParseInteger(name, True, True) |
| 622 field = message_descriptor.fields_by_number.get(number, None) | 503 field = message_descriptor.fields_by_number.get(number, None) |
| 623 if not field and message_descriptor.is_extendable: | 504 if not field and message_descriptor.is_extendable: |
| 624 field = message.Extensions._FindExtensionByNumber(number) | 505 field = message.Extensions._FindExtensionByNumber(number) |
| 625 else: | 506 else: |
| 626 field = message_descriptor.fields_by_name.get(name, None) | 507 field = message_descriptor.fields_by_name.get(name, None) |
| 627 | 508 |
| 628 # Group names are expected to be capitalized as they appear in the | 509 # Group names are expected to be capitalized as they appear in the |
| 629 # .proto file, which actually matches their type names, not their field | 510 # .proto file, which actually matches their type names, not their field |
| 630 # names. | 511 # names. |
| 631 if not field: | 512 if not field: |
| 632 field = message_descriptor.fields_by_name.get(name.lower(), None) | 513 field = message_descriptor.fields_by_name.get(name.lower(), None) |
| 633 if field and field.type != descriptor.FieldDescriptor.TYPE_GROUP: | 514 if field and field.type != descriptor.FieldDescriptor.TYPE_GROUP: |
| 634 field = None | 515 field = None |
| 635 | 516 |
| 636 if (field and field.type == descriptor.FieldDescriptor.TYPE_GROUP and | 517 if (field and field.type == descriptor.FieldDescriptor.TYPE_GROUP and |
| 637 field.message_type.name != name): | 518 field.message_type.name != name): |
| 638 field = None | 519 field = None |
| 639 | 520 |
| 640 if not field: | 521 if not field: |
| 641 raise tokenizer.ParseErrorPreviousToken( | 522 raise tokenizer.ParseErrorPreviousToken( |
| 642 'Message type "%s" has no field named "%s".' % | 523 'Message type "%s" has no field named "%s".' % ( |
| 643 (message_descriptor.full_name, name)) | 524 message_descriptor.full_name, name)) |
| 644 | 525 |
| 645 if field: | 526 if field: |
| 646 if not self._allow_multiple_scalars and field.containing_oneof: | 527 if not self._allow_multiple_scalars and field.containing_oneof: |
| 647 # Check if there's a different field set in this oneof. | 528 # Check if there's a different field set in this oneof. |
| 648 # Note that we ignore the case if the same field was set before, and we | 529 # Note that we ignore the case if the same field was set before, and we |
| 649 # apply _allow_multiple_scalars to non-scalar fields as well. | 530 # apply _allow_multiple_scalars to non-scalar fields as well. |
| 650 which_oneof = message.WhichOneof(field.containing_oneof.name) | 531 which_oneof = message.WhichOneof(field.containing_oneof.name) |
| 651 if which_oneof is not None and which_oneof != field.name: | 532 if which_oneof is not None and which_oneof != field.name: |
| 652 raise tokenizer.ParseErrorPreviousToken( | 533 raise tokenizer.ParseErrorPreviousToken( |
| 653 'Field "%s" is specified along with field "%s", another member ' | 534 'Field "%s" is specified along with field "%s", another member ' |
| 654 'of oneof "%s" for message type "%s".' % | 535 'of oneof "%s" for message type "%s".' % ( |
| 655 (field.name, which_oneof, field.containing_oneof.name, | 536 field.name, which_oneof, field.containing_oneof.name, |
| 656 message_descriptor.full_name)) | 537 message_descriptor.full_name)) |
| 657 | 538 |
| 658 if field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_MESSAGE: | 539 if field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_MESSAGE: |
| 659 tokenizer.TryConsume(':') | 540 tokenizer.TryConsume(':') |
| 660 merger = self._MergeMessageField | 541 merger = self._MergeMessageField |
| 661 else: | 542 else: |
| 662 tokenizer.Consume(':') | 543 tokenizer.Consume(':') |
| 663 merger = self._MergeScalarField | 544 merger = self._MergeScalarField |
| 664 | 545 |
| 665 if (field.label == descriptor.FieldDescriptor.LABEL_REPEATED and | 546 if (field.label == descriptor.FieldDescriptor.LABEL_REPEATED |
| 666 tokenizer.TryConsume('[')): | 547 and tokenizer.TryConsume('[')): |
| 667 # Short repeated format, e.g. "foo: [1, 2, 3]" | 548 # Short repeated format, e.g. "foo: [1, 2, 3]" |
| 668 while True: | 549 while True: |
| 669 merger(tokenizer, message, field) | 550 merger(tokenizer, message, field) |
| 670 if tokenizer.TryConsume(']'): | 551 if tokenizer.TryConsume(']'): break |
| 671 break | |
| 672 tokenizer.Consume(',') | 552 tokenizer.Consume(',') |
| 673 | 553 |
| 674 else: | 554 else: |
| 675 merger(tokenizer, message, field) | 555 merger(tokenizer, message, field) |
| 676 | 556 |
| 677 else: # Proto field is unknown. | 557 else: # Proto field is unknown. |
| 678 assert self.allow_unknown_extension | 558 assert self.allow_unknown_extension |
| 679 _SkipFieldContents(tokenizer) | 559 _SkipFieldContents(tokenizer) |
| 680 | 560 |
| 681 # For historical reasons, fields may optionally be separated by commas or | 561 # For historical reasons, fields may optionally be separated by commas or |
| 682 # semicolons. | 562 # semicolons. |
| 683 if not tokenizer.TryConsume(','): | 563 if not tokenizer.TryConsume(','): |
| 684 tokenizer.TryConsume(';') | 564 tokenizer.TryConsume(';') |
| 685 | 565 |
| 686 def _ConsumeAnyTypeUrl(self, tokenizer): | |
| 687 """Consumes a google.protobuf.Any type URL and returns the type name.""" | |
| 688 # Consume "type.googleapis.com/". | |
| 689 tokenizer.ConsumeIdentifier() | |
| 690 tokenizer.Consume('.') | |
| 691 tokenizer.ConsumeIdentifier() | |
| 692 tokenizer.Consume('.') | |
| 693 tokenizer.ConsumeIdentifier() | |
| 694 tokenizer.Consume('/') | |
| 695 # Consume the fully-qualified type name. | |
| 696 name = [tokenizer.ConsumeIdentifier()] | |
| 697 while tokenizer.TryConsume('.'): | |
| 698 name.append(tokenizer.ConsumeIdentifier()) | |
| 699 return '.'.join(name) | |
| 700 | |
| 701 def _MergeMessageField(self, tokenizer, message, field): | 566 def _MergeMessageField(self, tokenizer, message, field): |
| 702 """Merges a single scalar field into a message. | 567 """Merges a single scalar field into a message. |
| 703 | 568 |
| 704 Args: | 569 Args: |
| 705 tokenizer: A tokenizer to parse the field value. | 570 tokenizer: A tokenizer to parse the field value. |
| 706 message: The message of which field is a member. | 571 message: The message of which field is a member. |
| 707 field: The descriptor of the field to be merged. | 572 field: The descriptor of the field to be merged. |
| 708 | 573 |
| 709 Raises: | 574 Raises: |
| 710 ParseError: In case of text parsing problems. | 575 ParseError: In case of text parsing problems. |
| 711 """ | 576 """ |
| 712 is_map_entry = _IsMapEntry(field) | 577 is_map_entry = _IsMapEntry(field) |
| 713 | 578 |
| 714 if tokenizer.TryConsume('<'): | 579 if tokenizer.TryConsume('<'): |
| 715 end_token = '>' | 580 end_token = '>' |
| 716 else: | 581 else: |
| 717 tokenizer.Consume('{') | 582 tokenizer.Consume('{') |
| 718 end_token = '}' | 583 end_token = '}' |
| 719 | 584 |
| 720 if (field.message_type.full_name == _ANY_FULL_TYPE_NAME and | 585 if field.label == descriptor.FieldDescriptor.LABEL_REPEATED: |
| 721 tokenizer.TryConsume('[')): | |
| 722 packed_type_name = self._ConsumeAnyTypeUrl(tokenizer) | |
| 723 tokenizer.Consume(']') | |
| 724 tokenizer.TryConsume(':') | |
| 725 if tokenizer.TryConsume('<'): | |
| 726 expanded_any_end_token = '>' | |
| 727 else: | |
| 728 tokenizer.Consume('{') | |
| 729 expanded_any_end_token = '}' | |
| 730 if not self.descriptor_pool: | |
| 731 raise ParseError('Descriptor pool required to parse expanded Any field') | |
| 732 expanded_any_sub_message = _BuildMessageFromTypeName(packed_type_name, | |
| 733 self.descriptor_pool) | |
| 734 if not expanded_any_sub_message: | |
| 735 raise ParseError('Type %s not found in descriptor pool' % | |
| 736 packed_type_name) | |
| 737 while not tokenizer.TryConsume(expanded_any_end_token): | |
| 738 if tokenizer.AtEnd(): | |
| 739 raise tokenizer.ParseErrorPreviousToken('Expected "%s".' % | |
| 740 (expanded_any_end_token,)) | |
| 741 self._MergeField(tokenizer, expanded_any_sub_message) | |
| 742 if field.label == descriptor.FieldDescriptor.LABEL_REPEATED: | |
| 743 any_message = getattr(message, field.name).add() | |
| 744 else: | |
| 745 any_message = getattr(message, field.name) | |
| 746 any_message.Pack(expanded_any_sub_message) | |
| 747 elif field.label == descriptor.FieldDescriptor.LABEL_REPEATED: | |
| 748 if field.is_extension: | 586 if field.is_extension: |
| 749 sub_message = message.Extensions[field].add() | 587 sub_message = message.Extensions[field].add() |
| 750 elif is_map_entry: | 588 elif is_map_entry: |
| 751 sub_message = getattr(message, field.name).GetEntryClass()() | 589 # pylint: disable=protected-access |
| 590 sub_message = field.message_type._concrete_class() |
| 752 else: | 591 else: |
| 753 sub_message = getattr(message, field.name).add() | 592 sub_message = getattr(message, field.name).add() |
| 754 else: | 593 else: |
| 755 if field.is_extension: | 594 if field.is_extension: |
| 756 sub_message = message.Extensions[field] | 595 sub_message = message.Extensions[field] |
| 757 else: | 596 else: |
| 758 sub_message = getattr(message, field.name) | 597 sub_message = getattr(message, field.name) |
| 759 sub_message.SetInParent() | 598 sub_message.SetInParent() |
| 760 | 599 |
| 761 while not tokenizer.TryConsume(end_token): | 600 while not tokenizer.TryConsume(end_token): |
| (...skipping 20 matching lines...) Expand all Loading... |
| 782 Raises: | 621 Raises: |
| 783 ParseError: In case of text parsing problems. | 622 ParseError: In case of text parsing problems. |
| 784 RuntimeError: On runtime errors. | 623 RuntimeError: On runtime errors. |
| 785 """ | 624 """ |
| 786 _ = self.allow_unknown_extension | 625 _ = self.allow_unknown_extension |
| 787 value = None | 626 value = None |
| 788 | 627 |
| 789 if field.type in (descriptor.FieldDescriptor.TYPE_INT32, | 628 if field.type in (descriptor.FieldDescriptor.TYPE_INT32, |
| 790 descriptor.FieldDescriptor.TYPE_SINT32, | 629 descriptor.FieldDescriptor.TYPE_SINT32, |
| 791 descriptor.FieldDescriptor.TYPE_SFIXED32): | 630 descriptor.FieldDescriptor.TYPE_SFIXED32): |
| 792 value = _ConsumeInt32(tokenizer) | 631 value = tokenizer.ConsumeInt32() |
| 793 elif field.type in (descriptor.FieldDescriptor.TYPE_INT64, | 632 elif field.type in (descriptor.FieldDescriptor.TYPE_INT64, |
| 794 descriptor.FieldDescriptor.TYPE_SINT64, | 633 descriptor.FieldDescriptor.TYPE_SINT64, |
| 795 descriptor.FieldDescriptor.TYPE_SFIXED64): | 634 descriptor.FieldDescriptor.TYPE_SFIXED64): |
| 796 value = _ConsumeInt64(tokenizer) | 635 value = tokenizer.ConsumeInt64() |
| 797 elif field.type in (descriptor.FieldDescriptor.TYPE_UINT32, | 636 elif field.type in (descriptor.FieldDescriptor.TYPE_UINT32, |
| 798 descriptor.FieldDescriptor.TYPE_FIXED32): | 637 descriptor.FieldDescriptor.TYPE_FIXED32): |
| 799 value = _ConsumeUint32(tokenizer) | 638 value = tokenizer.ConsumeUint32() |
| 800 elif field.type in (descriptor.FieldDescriptor.TYPE_UINT64, | 639 elif field.type in (descriptor.FieldDescriptor.TYPE_UINT64, |
| 801 descriptor.FieldDescriptor.TYPE_FIXED64): | 640 descriptor.FieldDescriptor.TYPE_FIXED64): |
| 802 value = _ConsumeUint64(tokenizer) | 641 value = tokenizer.ConsumeUint64() |
| 803 elif field.type in (descriptor.FieldDescriptor.TYPE_FLOAT, | 642 elif field.type in (descriptor.FieldDescriptor.TYPE_FLOAT, |
| 804 descriptor.FieldDescriptor.TYPE_DOUBLE): | 643 descriptor.FieldDescriptor.TYPE_DOUBLE): |
| 805 value = tokenizer.ConsumeFloat() | 644 value = tokenizer.ConsumeFloat() |
| 806 elif field.type == descriptor.FieldDescriptor.TYPE_BOOL: | 645 elif field.type == descriptor.FieldDescriptor.TYPE_BOOL: |
| 807 value = tokenizer.ConsumeBool() | 646 value = tokenizer.ConsumeBool() |
| 808 elif field.type == descriptor.FieldDescriptor.TYPE_STRING: | 647 elif field.type == descriptor.FieldDescriptor.TYPE_STRING: |
| 809 value = tokenizer.ConsumeString() | 648 value = tokenizer.ConsumeString() |
| 810 elif field.type == descriptor.FieldDescriptor.TYPE_BYTES: | 649 elif field.type == descriptor.FieldDescriptor.TYPE_BYTES: |
| 811 value = tokenizer.ConsumeByteString() | 650 value = tokenizer.ConsumeByteString() |
| 812 elif field.type == descriptor.FieldDescriptor.TYPE_ENUM: | 651 elif field.type == descriptor.FieldDescriptor.TYPE_ENUM: |
| (...skipping 94 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 907 ParseError: In case an invalid field value is found. | 746 ParseError: In case an invalid field value is found. |
| 908 """ | 747 """ |
| 909 # String/bytes tokens can come in multiple adjacent string literals. | 748 # String/bytes tokens can come in multiple adjacent string literals. |
| 910 # If we can consume one, consume as many as we can. | 749 # If we can consume one, consume as many as we can. |
| 911 if tokenizer.TryConsumeByteString(): | 750 if tokenizer.TryConsumeByteString(): |
| 912 while tokenizer.TryConsumeByteString(): | 751 while tokenizer.TryConsumeByteString(): |
| 913 pass | 752 pass |
| 914 return | 753 return |
| 915 | 754 |
| 916 if (not tokenizer.TryConsumeIdentifier() and | 755 if (not tokenizer.TryConsumeIdentifier() and |
| 917 not _TryConsumeInt64(tokenizer) and not _TryConsumeUint64(tokenizer) and | 756 not tokenizer.TryConsumeInt64() and |
| 757 not tokenizer.TryConsumeUint64() and |
| 918 not tokenizer.TryConsumeFloat()): | 758 not tokenizer.TryConsumeFloat()): |
| 919 raise ParseError('Invalid field value: ' + tokenizer.token) | 759 raise ParseError('Invalid field value: ' + tokenizer.token) |
| 920 | 760 |
| 921 | 761 |
| 922 class Tokenizer(object): | 762 class _Tokenizer(object): |
| 923 """Protocol buffer text representation tokenizer. | 763 """Protocol buffer text representation tokenizer. |
| 924 | 764 |
| 925 This class handles the lower level string parsing by splitting it into | 765 This class handles the lower level string parsing by splitting it into |
| 926 meaningful tokens. | 766 meaningful tokens. |
| 927 | 767 |
| 928 It was directly ported from the Java protocol buffer API. | 768 It was directly ported from the Java protocol buffer API. |
| 929 """ | 769 """ |
| 930 | 770 |
| 931 _WHITESPACE = re.compile(r'\s+') | 771 _WHITESPACE = re.compile('(\\s|(#.*$))+', re.MULTILINE) |
| 932 _COMMENT = re.compile(r'(\s*#.*$)', re.MULTILINE) | |
| 933 _WHITESPACE_OR_COMMENT = re.compile(r'(\s|(#.*$))+', re.MULTILINE) | |
| 934 _TOKEN = re.compile('|'.join([ | 772 _TOKEN = re.compile('|'.join([ |
| 935 r'[a-zA-Z_][0-9a-zA-Z_+-]*', # an identifier | 773 r'[a-zA-Z_][0-9a-zA-Z_+-]*', # an identifier |
| 936 r'([0-9+-]|(\.[0-9]))[0-9a-zA-Z_.+-]*', # a number | 774 r'([0-9+-]|(\.[0-9]))[0-9a-zA-Z_.+-]*', # a number |
| 937 ] + [ # quoted str for each quote mark | 775 ] + [ # quoted str for each quote mark |
| 938 r'{qt}([^{qt}\n\\]|\\.)*({qt}|\\?$)'.format(qt=mark) for mark in _QUOTES | 776 r'{qt}([^{qt}\n\\]|\\.)*({qt}|\\?$)'.format(qt=mark) for mark in _QUOTES |
| 939 ])) | 777 ])) |
| 940 | 778 |
| 941 _IDENTIFIER = re.compile(r'[^\d\W]\w*') | 779 _IDENTIFIER = re.compile(r'\w+') |
| 942 _IDENTIFIER_OR_NUMBER = re.compile(r'\w+') | |
| 943 | 780 |
| 944 def __init__(self, lines, skip_comments=True): | 781 def __init__(self, lines): |
| 945 self._position = 0 | 782 self._position = 0 |
| 946 self._line = -1 | 783 self._line = -1 |
| 947 self._column = 0 | 784 self._column = 0 |
| 948 self._token_start = None | 785 self._token_start = None |
| 949 self.token = '' | 786 self.token = '' |
| 950 self._lines = iter(lines) | 787 self._lines = iter(lines) |
| 951 self._current_line = '' | 788 self._current_line = '' |
| 952 self._previous_line = 0 | 789 self._previous_line = 0 |
| 953 self._previous_column = 0 | 790 self._previous_column = 0 |
| 954 self._more_lines = True | 791 self._more_lines = True |
| 955 self._skip_comments = skip_comments | |
| 956 self._whitespace_pattern = (skip_comments and self._WHITESPACE_OR_COMMENT | |
| 957 or self._WHITESPACE) | |
| 958 self._SkipWhitespace() | 792 self._SkipWhitespace() |
| 959 self.NextToken() | 793 self.NextToken() |
| 960 | 794 |
| 961 def LookingAt(self, token): | 795 def LookingAt(self, token): |
| 962 return self.token == token | 796 return self.token == token |
| 963 | 797 |
| 964 def AtEnd(self): | 798 def AtEnd(self): |
| 965 """Checks the end of the text was reached. | 799 """Checks the end of the text was reached. |
| 966 | 800 |
| 967 Returns: | 801 Returns: |
| 968 True iff the end was reached. | 802 True iff the end was reached. |
| 969 """ | 803 """ |
| 970 return not self.token | 804 return not self.token |
| 971 | 805 |
| 972 def _PopLine(self): | 806 def _PopLine(self): |
| 973 while len(self._current_line) <= self._column: | 807 while len(self._current_line) <= self._column: |
| 974 try: | 808 try: |
| 975 self._current_line = next(self._lines) | 809 self._current_line = next(self._lines) |
| 976 except StopIteration: | 810 except StopIteration: |
| 977 self._current_line = '' | 811 self._current_line = '' |
| 978 self._more_lines = False | 812 self._more_lines = False |
| 979 return | 813 return |
| 980 else: | 814 else: |
| 981 self._line += 1 | 815 self._line += 1 |
| 982 self._column = 0 | 816 self._column = 0 |
| 983 | 817 |
| 984 def _SkipWhitespace(self): | 818 def _SkipWhitespace(self): |
| 985 while True: | 819 while True: |
| 986 self._PopLine() | 820 self._PopLine() |
| 987 match = self._whitespace_pattern.match(self._current_line, self._column) | 821 match = self._WHITESPACE.match(self._current_line, self._column) |
| 988 if not match: | 822 if not match: |
| 989 break | 823 break |
| 990 length = len(match.group(0)) | 824 length = len(match.group(0)) |
| 991 self._column += length | 825 self._column += length |
| 992 | 826 |
| 993 def TryConsume(self, token): | 827 def TryConsume(self, token): |
| 994 """Tries to consume a given piece of text. | 828 """Tries to consume a given piece of text. |
| 995 | 829 |
| 996 Args: | 830 Args: |
| 997 token: Text to consume. | 831 token: Text to consume. |
| 998 | 832 |
| 999 Returns: | 833 Returns: |
| 1000 True iff the text was consumed. | 834 True iff the text was consumed. |
| 1001 """ | 835 """ |
| 1002 if self.token == token: | 836 if self.token == token: |
| 1003 self.NextToken() | 837 self.NextToken() |
| 1004 return True | 838 return True |
| 1005 return False | 839 return False |
| 1006 | 840 |
| 1007 def Consume(self, token): | 841 def Consume(self, token): |
| 1008 """Consumes a piece of text. | 842 """Consumes a piece of text. |
| 1009 | 843 |
| 1010 Args: | 844 Args: |
| 1011 token: Text to consume. | 845 token: Text to consume. |
| 1012 | 846 |
| 1013 Raises: | 847 Raises: |
| 1014 ParseError: If the text couldn't be consumed. | 848 ParseError: If the text couldn't be consumed. |
| 1015 """ | 849 """ |
| 1016 if not self.TryConsume(token): | 850 if not self.TryConsume(token): |
| 1017 raise self.ParseError('Expected "%s".' % token) | 851 raise self._ParseError('Expected "%s".' % token) |
| 1018 | |
| 1019 def ConsumeComment(self): | |
| 1020 result = self.token | |
| 1021 if not self._COMMENT.match(result): | |
| 1022 raise self.ParseError('Expected comment.') | |
| 1023 self.NextToken() | |
| 1024 return result | |
| 1025 | 852 |
| 1026 def TryConsumeIdentifier(self): | 853 def TryConsumeIdentifier(self): |
| 1027 try: | 854 try: |
| 1028 self.ConsumeIdentifier() | 855 self.ConsumeIdentifier() |
| 1029 return True | 856 return True |
| 1030 except ParseError: | 857 except ParseError: |
| 1031 return False | 858 return False |
| 1032 | 859 |
| 1033 def ConsumeIdentifier(self): | 860 def ConsumeIdentifier(self): |
| 1034 """Consumes protocol message field identifier. | 861 """Consumes protocol message field identifier. |
| 1035 | 862 |
| 1036 Returns: | 863 Returns: |
| 1037 Identifier string. | 864 Identifier string. |
| 1038 | 865 |
| 1039 Raises: | 866 Raises: |
| 1040 ParseError: If an identifier couldn't be consumed. | 867 ParseError: If an identifier couldn't be consumed. |
| 1041 """ | 868 """ |
| 1042 result = self.token | 869 result = self.token |
| 1043 if not self._IDENTIFIER.match(result): | 870 if not self._IDENTIFIER.match(result): |
| 1044 raise self.ParseError('Expected identifier.') | 871 raise self._ParseError('Expected identifier.') |
| 1045 self.NextToken() | 872 self.NextToken() |
| 1046 return result | 873 return result |
| 1047 | 874 |
| 1048 def TryConsumeIdentifierOrNumber(self): | 875 def ConsumeInt32(self): |
| 876 """Consumes a signed 32bit integer number. |
| 877 |
| 878 Returns: |
| 879 The integer parsed. |
| 880 |
| 881 Raises: |
| 882 ParseError: If a signed 32bit integer couldn't be consumed. |
| 883 """ |
| 1049 try: | 884 try: |
| 1050 self.ConsumeIdentifierOrNumber() | 885 result = ParseInteger(self.token, is_signed=True, is_long=False) |
| 886 except ValueError as e: |
| 887 raise self._ParseError(str(e)) |
| 888 self.NextToken() |
| 889 return result |
| 890 |
| 891 def ConsumeUint32(self): |
| 892 """Consumes an unsigned 32bit integer number. |
| 893 |
| 894 Returns: |
| 895 The integer parsed. |
| 896 |
| 897 Raises: |
| 898 ParseError: If an unsigned 32bit integer couldn't be consumed. |
| 899 """ |
| 900 try: |
| 901 result = ParseInteger(self.token, is_signed=False, is_long=False) |
| 902 except ValueError as e: |
| 903 raise self._ParseError(str(e)) |
| 904 self.NextToken() |
| 905 return result |
| 906 |
| 907 def TryConsumeInt64(self): |
| 908 try: |
| 909 self.ConsumeInt64() |
| 1051 return True | 910 return True |
| 1052 except ParseError: | 911 except ParseError: |
| 1053 return False | 912 return False |
| 1054 | 913 |
| 1055 def ConsumeIdentifierOrNumber(self): | 914 def ConsumeInt64(self): |
| 1056 """Consumes protocol message field identifier. | 915 """Consumes a signed 64bit integer number. |
| 1057 | 916 |
| 1058 Returns: | 917 Returns: |
| 1059 Identifier string. | 918 The integer parsed. |
| 1060 | 919 |
| 1061 Raises: | 920 Raises: |
| 1062 ParseError: If an identifier couldn't be consumed. | 921 ParseError: If a signed 64bit integer couldn't be consumed. |
| 1063 """ | 922 """ |
| 1064 result = self.token | 923 try: |
| 1065 if not self._IDENTIFIER_OR_NUMBER.match(result): | 924 result = ParseInteger(self.token, is_signed=True, is_long=True) |
| 1066 raise self.ParseError('Expected identifier or number.') | 925 except ValueError as e: |
| 926 raise self._ParseError(str(e)) |
| 1067 self.NextToken() | 927 self.NextToken() |
| 1068 return result | 928 return result |
| 1069 | 929 |
| 1070 def TryConsumeInteger(self): | 930 def TryConsumeUint64(self): |
| 1071 try: | 931 try: |
| 1072 # Note: is_long only affects value type, not whether an error is raised. | 932 self.ConsumeUint64() |
| 1073 self.ConsumeInteger() | |
| 1074 return True | 933 return True |
| 1075 except ParseError: | 934 except ParseError: |
| 1076 return False | 935 return False |
| 1077 | 936 |
| 1078 def ConsumeInteger(self, is_long=False): | 937 def ConsumeUint64(self): |
| 1079 """Consumes an integer number. | 938 """Consumes an unsigned 64bit integer number. |
| 1080 | 939 |
| 1081 Args: | |
| 1082 is_long: True if the value should be returned as a long integer. | |
| 1083 Returns: | 940 Returns: |
| 1084 The integer parsed. | 941 The integer parsed. |
| 1085 | 942 |
| 1086 Raises: | 943 Raises: |
| 1087 ParseError: If an integer couldn't be consumed. | 944 ParseError: If an unsigned 64bit integer couldn't be consumed. |
| 1088 """ | 945 """ |
| 1089 try: | 946 try: |
| 1090 result = _ParseAbstractInteger(self.token, is_long=is_long) | 947 result = ParseInteger(self.token, is_signed=False, is_long=True) |
| 1091 except ValueError as e: | 948 except ValueError as e: |
| 1092 raise self.ParseError(str(e)) | 949 raise self._ParseError(str(e)) |
| 1093 self.NextToken() | 950 self.NextToken() |
| 1094 return result | 951 return result |
| 1095 | 952 |
| 1096 def TryConsumeFloat(self): | 953 def TryConsumeFloat(self): |
| 1097 try: | 954 try: |
| 1098 self.ConsumeFloat() | 955 self.ConsumeFloat() |
| 1099 return True | 956 return True |
| 1100 except ParseError: | 957 except ParseError: |
| 1101 return False | 958 return False |
| 1102 | 959 |
| 1103 def ConsumeFloat(self): | 960 def ConsumeFloat(self): |
| 1104 """Consumes an floating point number. | 961 """Consumes an floating point number. |
| 1105 | 962 |
| 1106 Returns: | 963 Returns: |
| 1107 The number parsed. | 964 The number parsed. |
| 1108 | 965 |
| 1109 Raises: | 966 Raises: |
| 1110 ParseError: If a floating point number couldn't be consumed. | 967 ParseError: If a floating point number couldn't be consumed. |
| 1111 """ | 968 """ |
| 1112 try: | 969 try: |
| 1113 result = ParseFloat(self.token) | 970 result = ParseFloat(self.token) |
| 1114 except ValueError as e: | 971 except ValueError as e: |
| 1115 raise self.ParseError(str(e)) | 972 raise self._ParseError(str(e)) |
| 1116 self.NextToken() | 973 self.NextToken() |
| 1117 return result | 974 return result |
| 1118 | 975 |
| 1119 def ConsumeBool(self): | 976 def ConsumeBool(self): |
| 1120 """Consumes a boolean value. | 977 """Consumes a boolean value. |
| 1121 | 978 |
| 1122 Returns: | 979 Returns: |
| 1123 The bool parsed. | 980 The bool parsed. |
| 1124 | 981 |
| 1125 Raises: | 982 Raises: |
| 1126 ParseError: If a boolean value couldn't be consumed. | 983 ParseError: If a boolean value couldn't be consumed. |
| 1127 """ | 984 """ |
| 1128 try: | 985 try: |
| 1129 result = ParseBool(self.token) | 986 result = ParseBool(self.token) |
| 1130 except ValueError as e: | 987 except ValueError as e: |
| 1131 raise self.ParseError(str(e)) | 988 raise self._ParseError(str(e)) |
| 1132 self.NextToken() | 989 self.NextToken() |
| 1133 return result | 990 return result |
| 1134 | 991 |
| 1135 def TryConsumeByteString(self): | 992 def TryConsumeByteString(self): |
| 1136 try: | 993 try: |
| 1137 self.ConsumeByteString() | 994 self.ConsumeByteString() |
| 1138 return True | 995 return True |
| 1139 except ParseError: | 996 except ParseError: |
| 1140 return False | 997 return False |
| 1141 | 998 |
| (...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1175 tokens which are automatically concatenated, like in C or Python. This | 1032 tokens which are automatically concatenated, like in C or Python. This |
| 1176 method only consumes one token. | 1033 method only consumes one token. |
| 1177 | 1034 |
| 1178 Returns: | 1035 Returns: |
| 1179 The token parsed. | 1036 The token parsed. |
| 1180 Raises: | 1037 Raises: |
| 1181 ParseError: When the wrong format data is found. | 1038 ParseError: When the wrong format data is found. |
| 1182 """ | 1039 """ |
| 1183 text = self.token | 1040 text = self.token |
| 1184 if len(text) < 1 or text[0] not in _QUOTES: | 1041 if len(text) < 1 or text[0] not in _QUOTES: |
| 1185 raise self.ParseError('Expected string but found: %r' % (text,)) | 1042 raise self._ParseError('Expected string but found: %r' % (text,)) |
| 1186 | 1043 |
| 1187 if len(text) < 2 or text[-1] != text[0]: | 1044 if len(text) < 2 or text[-1] != text[0]: |
| 1188 raise self.ParseError('String missing ending quote: %r' % (text,)) | 1045 raise self._ParseError('String missing ending quote: %r' % (text,)) |
| 1189 | 1046 |
| 1190 try: | 1047 try: |
| 1191 result = text_encoding.CUnescape(text[1:-1]) | 1048 result = text_encoding.CUnescape(text[1:-1]) |
| 1192 except ValueError as e: | 1049 except ValueError as e: |
| 1193 raise self.ParseError(str(e)) | 1050 raise self._ParseError(str(e)) |
| 1194 self.NextToken() | 1051 self.NextToken() |
| 1195 return result | 1052 return result |
| 1196 | 1053 |
| 1197 def ConsumeEnum(self, field): | 1054 def ConsumeEnum(self, field): |
| 1198 try: | 1055 try: |
| 1199 result = ParseEnum(field, self.token) | 1056 result = ParseEnum(field, self.token) |
| 1200 except ValueError as e: | 1057 except ValueError as e: |
| 1201 raise self.ParseError(str(e)) | 1058 raise self._ParseError(str(e)) |
| 1202 self.NextToken() | 1059 self.NextToken() |
| 1203 return result | 1060 return result |
| 1204 | 1061 |
| 1205 def ParseErrorPreviousToken(self, message): | 1062 def ParseErrorPreviousToken(self, message): |
| 1206 """Creates and *returns* a ParseError for the previously read token. | 1063 """Creates and *returns* a ParseError for the previously read token. |
| 1207 | 1064 |
| 1208 Args: | 1065 Args: |
| 1209 message: A message to set for the exception. | 1066 message: A message to set for the exception. |
| 1210 | 1067 |
| 1211 Returns: | 1068 Returns: |
| 1212 A ParseError instance. | 1069 A ParseError instance. |
| 1213 """ | 1070 """ |
| 1214 return ParseError(message, self._previous_line + 1, | 1071 return ParseError('%d:%d : %s' % ( |
| 1215 self._previous_column + 1) | 1072 self._previous_line + 1, self._previous_column + 1, message)) |
| 1216 | 1073 |
| 1217 def ParseError(self, message): | 1074 def _ParseError(self, message): |
| 1218 """Creates and *returns* a ParseError for the current token.""" | 1075 """Creates and *returns* a ParseError for the current token.""" |
| 1219 return ParseError(message, self._line + 1, self._column + 1) | 1076 return ParseError('%d:%d : %s' % ( |
| 1077 self._line + 1, self._column + 1, message)) |
| 1220 | 1078 |
| 1221 def _StringParseError(self, e): | 1079 def _StringParseError(self, e): |
| 1222 return self.ParseError('Couldn\'t parse string: ' + str(e)) | 1080 return self._ParseError('Couldn\'t parse string: ' + str(e)) |
| 1223 | 1081 |
| 1224 def NextToken(self): | 1082 def NextToken(self): |
| 1225 """Reads the next meaningful token.""" | 1083 """Reads the next meaningful token.""" |
| 1226 self._previous_line = self._line | 1084 self._previous_line = self._line |
| 1227 self._previous_column = self._column | 1085 self._previous_column = self._column |
| 1228 | 1086 |
| 1229 self._column += len(self.token) | 1087 self._column += len(self.token) |
| 1230 self._SkipWhitespace() | 1088 self._SkipWhitespace() |
| 1231 | 1089 |
| 1232 if not self._more_lines: | 1090 if not self._more_lines: |
| 1233 self.token = '' | 1091 self.token = '' |
| 1234 return | 1092 return |
| 1235 | 1093 |
| 1236 match = self._TOKEN.match(self._current_line, self._column) | 1094 match = self._TOKEN.match(self._current_line, self._column) |
| 1237 if not match and not self._skip_comments: | |
| 1238 match = self._COMMENT.match(self._current_line, self._column) | |
| 1239 if match: | 1095 if match: |
| 1240 token = match.group(0) | 1096 token = match.group(0) |
| 1241 self.token = token | 1097 self.token = token |
| 1242 else: | 1098 else: |
| 1243 self.token = self._current_line[self._column] | 1099 self.token = self._current_line[self._column] |
| 1244 | 1100 |
| 1245 # Aliased so it can still be accessed by current visibility violators. | |
| 1246 # TODO(dbarnett): Migrate violators to textformat_tokenizer. | |
| 1247 _Tokenizer = Tokenizer # pylint: disable=invalid-name | |
| 1248 | |
| 1249 | |
| 1250 def _ConsumeInt32(tokenizer): | |
| 1251 """Consumes a signed 32bit integer number from tokenizer. | |
| 1252 | |
| 1253 Args: | |
| 1254 tokenizer: A tokenizer used to parse the number. | |
| 1255 | |
| 1256 Returns: | |
| 1257 The integer parsed. | |
| 1258 | |
| 1259 Raises: | |
| 1260 ParseError: If a signed 32bit integer couldn't be consumed. | |
| 1261 """ | |
| 1262 return _ConsumeInteger(tokenizer, is_signed=True, is_long=False) | |
| 1263 | |
| 1264 | |
| 1265 def _ConsumeUint32(tokenizer): | |
| 1266 """Consumes an unsigned 32bit integer number from tokenizer. | |
| 1267 | |
| 1268 Args: | |
| 1269 tokenizer: A tokenizer used to parse the number. | |
| 1270 | |
| 1271 Returns: | |
| 1272 The integer parsed. | |
| 1273 | |
| 1274 Raises: | |
| 1275 ParseError: If an unsigned 32bit integer couldn't be consumed. | |
| 1276 """ | |
| 1277 return _ConsumeInteger(tokenizer, is_signed=False, is_long=False) | |
| 1278 | |
| 1279 | |
| 1280 def _TryConsumeInt64(tokenizer): | |
| 1281 try: | |
| 1282 _ConsumeInt64(tokenizer) | |
| 1283 return True | |
| 1284 except ParseError: | |
| 1285 return False | |
| 1286 | |
| 1287 | |
| 1288 def _ConsumeInt64(tokenizer): | |
| 1289 """Consumes a signed 32bit integer number from tokenizer. | |
| 1290 | |
| 1291 Args: | |
| 1292 tokenizer: A tokenizer used to parse the number. | |
| 1293 | |
| 1294 Returns: | |
| 1295 The integer parsed. | |
| 1296 | |
| 1297 Raises: | |
| 1298 ParseError: If a signed 32bit integer couldn't be consumed. | |
| 1299 """ | |
| 1300 return _ConsumeInteger(tokenizer, is_signed=True, is_long=True) | |
| 1301 | |
| 1302 | |
| 1303 def _TryConsumeUint64(tokenizer): | |
| 1304 try: | |
| 1305 _ConsumeUint64(tokenizer) | |
| 1306 return True | |
| 1307 except ParseError: | |
| 1308 return False | |
| 1309 | |
| 1310 | |
| 1311 def _ConsumeUint64(tokenizer): | |
| 1312 """Consumes an unsigned 64bit integer number from tokenizer. | |
| 1313 | |
| 1314 Args: | |
| 1315 tokenizer: A tokenizer used to parse the number. | |
| 1316 | |
| 1317 Returns: | |
| 1318 The integer parsed. | |
| 1319 | |
| 1320 Raises: | |
| 1321 ParseError: If an unsigned 64bit integer couldn't be consumed. | |
| 1322 """ | |
| 1323 return _ConsumeInteger(tokenizer, is_signed=False, is_long=True) | |
| 1324 | |
| 1325 | |
| 1326 def _TryConsumeInteger(tokenizer, is_signed=False, is_long=False): | |
| 1327 try: | |
| 1328 _ConsumeInteger(tokenizer, is_signed=is_signed, is_long=is_long) | |
| 1329 return True | |
| 1330 except ParseError: | |
| 1331 return False | |
| 1332 | |
| 1333 | |
| 1334 def _ConsumeInteger(tokenizer, is_signed=False, is_long=False): | |
| 1335 """Consumes an integer number from tokenizer. | |
| 1336 | |
| 1337 Args: | |
| 1338 tokenizer: A tokenizer used to parse the number. | |
| 1339 is_signed: True if a signed integer must be parsed. | |
| 1340 is_long: True if a long integer must be parsed. | |
| 1341 | |
| 1342 Returns: | |
| 1343 The integer parsed. | |
| 1344 | |
| 1345 Raises: | |
| 1346 ParseError: If an integer with given characteristics couldn't be consumed. | |
| 1347 """ | |
| 1348 try: | |
| 1349 result = ParseInteger(tokenizer.token, is_signed=is_signed, is_long=is_long) | |
| 1350 except ValueError as e: | |
| 1351 raise tokenizer.ParseError(str(e)) | |
| 1352 tokenizer.NextToken() | |
| 1353 return result | |
| 1354 | |
| 1355 | 1101 |
| 1356 def ParseInteger(text, is_signed=False, is_long=False): | 1102 def ParseInteger(text, is_signed=False, is_long=False): |
| 1357 """Parses an integer. | 1103 """Parses an integer. |
| 1358 | 1104 |
| 1359 Args: | 1105 Args: |
| 1360 text: The text to parse. | 1106 text: The text to parse. |
| 1361 is_signed: True if a signed integer must be parsed. | 1107 is_signed: True if a signed integer must be parsed. |
| 1362 is_long: True if a long integer must be parsed. | 1108 is_long: True if a long integer must be parsed. |
| 1363 | 1109 |
| 1364 Returns: | 1110 Returns: |
| 1365 The integer value. | 1111 The integer value. |
| 1366 | 1112 |
| 1367 Raises: | 1113 Raises: |
| 1368 ValueError: Thrown Iff the text is not a valid integer. | 1114 ValueError: Thrown Iff the text is not a valid integer. |
| 1369 """ | 1115 """ |
| 1370 # Do the actual parsing. Exception handling is propagated to caller. | 1116 # Do the actual parsing. Exception handling is propagated to caller. |
| 1371 result = _ParseAbstractInteger(text, is_long=is_long) | |
| 1372 | |
| 1373 # Check if the integer is sane. Exceptions handled by callers. | |
| 1374 checker = _INTEGER_CHECKERS[2 * int(is_long) + int(is_signed)] | |
| 1375 checker.CheckValue(result) | |
| 1376 return result | |
| 1377 | |
| 1378 | |
| 1379 def _ParseAbstractInteger(text, is_long=False): | |
| 1380 """Parses an integer without checking size/signedness. | |
| 1381 | |
| 1382 Args: | |
| 1383 text: The text to parse. | |
| 1384 is_long: True if the value should be returned as a long integer. | |
| 1385 | |
| 1386 Returns: | |
| 1387 The integer value. | |
| 1388 | |
| 1389 Raises: | |
| 1390 ValueError: Thrown Iff the text is not a valid integer. | |
| 1391 """ | |
| 1392 # Do the actual parsing. Exception handling is propagated to caller. | |
| 1393 try: | 1117 try: |
| 1394 # We force 32-bit values to int and 64-bit values to long to make | 1118 # We force 32-bit values to int and 64-bit values to long to make |
| 1395 # alternate implementations where the distinction is more significant | 1119 # alternate implementations where the distinction is more significant |
| 1396 # (e.g. the C++ implementation) simpler. | 1120 # (e.g. the C++ implementation) simpler. |
| 1397 if is_long: | 1121 if is_long: |
| 1398 return long(text, 0) | 1122 result = long(text, 0) |
| 1399 else: | 1123 else: |
| 1400 return int(text, 0) | 1124 result = int(text, 0) |
| 1401 except ValueError: | 1125 except ValueError: |
| 1402 raise ValueError('Couldn\'t parse integer: %s' % text) | 1126 raise ValueError('Couldn\'t parse integer: %s' % text) |
| 1403 | 1127 |
| 1128 # Check if the integer is sane. Exceptions handled by callers. |
| 1129 checker = _INTEGER_CHECKERS[2 * int(is_long) + int(is_signed)] |
| 1130 checker.CheckValue(result) |
| 1131 return result |
| 1132 |
| 1404 | 1133 |
| 1405 def ParseFloat(text): | 1134 def ParseFloat(text): |
| 1406 """Parse a floating point number. | 1135 """Parse a floating point number. |
| 1407 | 1136 |
| 1408 Args: | 1137 Args: |
| 1409 text: Text to parse. | 1138 text: Text to parse. |
| 1410 | 1139 |
| 1411 Returns: | 1140 Returns: |
| 1412 The number parsed. | 1141 The number parsed. |
| 1413 | 1142 |
| (...skipping 25 matching lines...) Expand all Loading... |
| 1439 | 1168 |
| 1440 Args: | 1169 Args: |
| 1441 text: Text to parse. | 1170 text: Text to parse. |
| 1442 | 1171 |
| 1443 Returns: | 1172 Returns: |
| 1444 Boolean values parsed | 1173 Boolean values parsed |
| 1445 | 1174 |
| 1446 Raises: | 1175 Raises: |
| 1447 ValueError: If text is not a valid boolean. | 1176 ValueError: If text is not a valid boolean. |
| 1448 """ | 1177 """ |
| 1449 if text in ('true', 't', '1', 'True'): | 1178 if text in ('true', 't', '1'): |
| 1450 return True | 1179 return True |
| 1451 elif text in ('false', 'f', '0', 'False'): | 1180 elif text in ('false', 'f', '0'): |
| 1452 return False | 1181 return False |
| 1453 else: | 1182 else: |
| 1454 raise ValueError('Expected "true" or "false".') | 1183 raise ValueError('Expected "true" or "false".') |
| 1455 | 1184 |
| 1456 | 1185 |
| 1457 def ParseEnum(field, value): | 1186 def ParseEnum(field, value): |
| 1458 """Parse an enum value. | 1187 """Parse an enum value. |
| 1459 | 1188 |
| 1460 The value can be specified by a number (the enum value), or by | 1189 The value can be specified by a number (the enum value), or by |
| 1461 a string literal (the enum name). | 1190 a string literal (the enum name). |
| 1462 | 1191 |
| 1463 Args: | 1192 Args: |
| 1464 field: Enum field descriptor. | 1193 field: Enum field descriptor. |
| 1465 value: String value. | 1194 value: String value. |
| 1466 | 1195 |
| 1467 Returns: | 1196 Returns: |
| 1468 Enum value number. | 1197 Enum value number. |
| 1469 | 1198 |
| 1470 Raises: | 1199 Raises: |
| 1471 ValueError: If the enum value could not be parsed. | 1200 ValueError: If the enum value could not be parsed. |
| 1472 """ | 1201 """ |
| 1473 enum_descriptor = field.enum_type | 1202 enum_descriptor = field.enum_type |
| 1474 try: | 1203 try: |
| 1475 number = int(value, 0) | 1204 number = int(value, 0) |
| 1476 except ValueError: | 1205 except ValueError: |
| 1477 # Identifier. | 1206 # Identifier. |
| 1478 enum_value = enum_descriptor.values_by_name.get(value, None) | 1207 enum_value = enum_descriptor.values_by_name.get(value, None) |
| 1479 if enum_value is None: | 1208 if enum_value is None: |
| 1480 raise ValueError('Enum type "%s" has no value named %s.' % | 1209 raise ValueError( |
| 1481 (enum_descriptor.full_name, value)) | 1210 'Enum type "%s" has no value named %s.' % ( |
| 1211 enum_descriptor.full_name, value)) |
| 1482 else: | 1212 else: |
| 1483 # Numeric value. | 1213 # Numeric value. |
| 1484 enum_value = enum_descriptor.values_by_number.get(number, None) | 1214 enum_value = enum_descriptor.values_by_number.get(number, None) |
| 1485 if enum_value is None: | 1215 if enum_value is None: |
| 1486 raise ValueError('Enum type "%s" has no value with number %d.' % | 1216 raise ValueError( |
| 1487 (enum_descriptor.full_name, number)) | 1217 'Enum type "%s" has no value with number %d.' % ( |
| 1218 enum_descriptor.full_name, number)) |
| 1488 return enum_value.number | 1219 return enum_value.number |
| OLD | NEW |