Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(283)

Side by Side Diff: third_party/protobuf/python/google/protobuf/text_format.py

Issue 2495533002: third_party/protobuf: Update to HEAD (83d681ee2c) (Closed)
Patch Set: Make chrome settings proto generated file a component Created 4 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 # Protocol Buffers - Google's data interchange format 1 # Protocol Buffers - Google's data interchange format
2 # Copyright 2008 Google Inc. All rights reserved. 2 # Copyright 2008 Google Inc. All rights reserved.
3 # https://developers.google.com/protocol-buffers/ 3 # https://developers.google.com/protocol-buffers/
4 # 4 #
5 # Redistribution and use in source and binary forms, with or without 5 # Redistribution and use in source and binary forms, with or without
6 # modification, are permitted provided that the following conditions are 6 # modification, are permitted provided that the following conditions are
7 # met: 7 # met:
8 # 8 #
9 # * Redistributions of source code must retain the above copyright 9 # * Redistributions of source code must retain the above copyright
10 # notice, this list of conditions and the following disclaimer. 10 # notice, this list of conditions and the following disclaimer.
(...skipping 30 matching lines...) Expand all
41 """ 41 """
42 42
43 __author__ = 'kenton@google.com (Kenton Varda)' 43 __author__ = 'kenton@google.com (Kenton Varda)'
44 44
45 import io 45 import io
46 import re 46 import re
47 47
48 import six 48 import six
49 49
50 if six.PY3: 50 if six.PY3:
51 long = int 51 long = int # pylint: disable=redefined-builtin,invalid-name
52 52
53 # pylint: disable=g-import-not-at-top
53 from google.protobuf.internal import type_checkers 54 from google.protobuf.internal import type_checkers
54 from google.protobuf import descriptor 55 from google.protobuf import descriptor
55 from google.protobuf import text_encoding 56 from google.protobuf import text_encoding
56 57
57 __all__ = ['MessageToString', 'PrintMessage', 'PrintField', 58 __all__ = ['MessageToString', 'PrintMessage', 'PrintField', 'PrintFieldValue',
58 'PrintFieldValue', 'Merge'] 59 'Merge']
59
60 60
61 _INTEGER_CHECKERS = (type_checkers.Uint32ValueChecker(), 61 _INTEGER_CHECKERS = (type_checkers.Uint32ValueChecker(),
62 type_checkers.Int32ValueChecker(), 62 type_checkers.Int32ValueChecker(),
63 type_checkers.Uint64ValueChecker(), 63 type_checkers.Uint64ValueChecker(),
64 type_checkers.Int64ValueChecker()) 64 type_checkers.Int64ValueChecker())
65 _FLOAT_INFINITY = re.compile('-?inf(?:inity)?f?', re.IGNORECASE) 65 _FLOAT_INFINITY = re.compile('-?inf(?:inity)?f?', re.IGNORECASE)
66 _FLOAT_NAN = re.compile('nanf?', re.IGNORECASE) 66 _FLOAT_NAN = re.compile('nanf?', re.IGNORECASE)
67 _FLOAT_TYPES = frozenset([descriptor.FieldDescriptor.CPPTYPE_FLOAT, 67 _FLOAT_TYPES = frozenset([descriptor.FieldDescriptor.CPPTYPE_FLOAT,
68 descriptor.FieldDescriptor.CPPTYPE_DOUBLE]) 68 descriptor.FieldDescriptor.CPPTYPE_DOUBLE])
69 _QUOTES = frozenset(("'", '"')) 69 _QUOTES = frozenset(("'", '"'))
70 _ANY_FULL_TYPE_NAME = 'google.protobuf.Any'
70 71
71 72
72 class Error(Exception): 73 class Error(Exception):
73 """Top-level module error for text_format.""" 74 """Top-level module error for text_format."""
74 75
75 76
76 class ParseError(Error): 77 class ParseError(Error):
77 """Thrown in case of text parsing error.""" 78 """Thrown in case of text parsing or tokenizing error."""
79
80 def __init__(self, message=None, line=None, column=None):
81 if message is not None and line is not None:
82 loc = str(line)
83 if column is not None:
84 loc += ':{0}'.format(column)
85 message = '{0} : {1}'.format(loc, message)
86 if message is not None:
87 super(ParseError, self).__init__(message)
88 else:
89 super(ParseError, self).__init__()
90 self._line = line
91 self._column = column
92
93 def GetLine(self):
94 return self._line
95
96 def GetColumn(self):
97 return self._column
78 98
79 99
80 class TextWriter(object): 100 class TextWriter(object):
101
81 def __init__(self, as_utf8): 102 def __init__(self, as_utf8):
82 if six.PY2: 103 if six.PY2:
83 self._writer = io.BytesIO() 104 self._writer = io.BytesIO()
84 else: 105 else:
85 self._writer = io.StringIO() 106 self._writer = io.StringIO()
86 107
87 def write(self, val): 108 def write(self, val):
88 if six.PY2: 109 if six.PY2:
89 if isinstance(val, six.text_type): 110 if isinstance(val, six.text_type):
90 val = val.encode('utf-8') 111 val = val.encode('utf-8')
91 return self._writer.write(val) 112 return self._writer.write(val)
92 113
93 def close(self): 114 def close(self):
94 return self._writer.close() 115 return self._writer.close()
95 116
96 def getvalue(self): 117 def getvalue(self):
97 return self._writer.getvalue() 118 return self._writer.getvalue()
98 119
99 120
100 def MessageToString(message, as_utf8=False, as_one_line=False, 121 def MessageToString(message,
101 pointy_brackets=False, use_index_order=False, 122 as_utf8=False,
102 float_format=None, use_field_number=False): 123 as_one_line=False,
124 pointy_brackets=False,
125 use_index_order=False,
126 float_format=None,
127 use_field_number=False,
128 descriptor_pool=None,
129 indent=0):
103 """Convert protobuf message to text format. 130 """Convert protobuf message to text format.
104 131
105 Floating point values can be formatted compactly with 15 digits of 132 Floating point values can be formatted compactly with 15 digits of
106 precision (which is the most that IEEE 754 "double" can guarantee) 133 precision (which is the most that IEEE 754 "double" can guarantee)
107 using float_format='.15g'. To ensure that converting to text and back to a 134 using float_format='.15g'. To ensure that converting to text and back to a
108 proto will result in an identical value, float_format='.17g' should be used. 135 proto will result in an identical value, float_format='.17g' should be used.
109 136
110 Args: 137 Args:
111 message: The protocol buffers message. 138 message: The protocol buffers message.
112 as_utf8: Produce text output in UTF8 format. 139 as_utf8: Produce text output in UTF8 format.
113 as_one_line: Don't introduce newlines between fields. 140 as_one_line: Don't introduce newlines between fields.
114 pointy_brackets: If True, use angle brackets instead of curly braces for 141 pointy_brackets: If True, use angle brackets instead of curly braces for
115 nesting. 142 nesting.
116 use_index_order: If True, print fields of a proto message using the order 143 use_index_order: If True, print fields of a proto message using the order
117 defined in source code instead of the field number. By default, use the 144 defined in source code instead of the field number. By default, use the
118 field number order. 145 field number order.
119 float_format: If set, use this to specify floating point number formatting 146 float_format: If set, use this to specify floating point number formatting
120 (per the "Format Specification Mini-Language"); otherwise, str() is used. 147 (per the "Format Specification Mini-Language"); otherwise, str() is used.
121 use_field_number: If True, print field numbers instead of names. 148 use_field_number: If True, print field numbers instead of names.
149 descriptor_pool: A DescriptorPool used to resolve Any types.
150 indent: The indent level, in terms of spaces, for pretty print.
122 151
123 Returns: 152 Returns:
124 A string of the text formatted protocol buffer message. 153 A string of the text formatted protocol buffer message.
125 """ 154 """
126 out = TextWriter(as_utf8) 155 out = TextWriter(as_utf8)
127 printer = _Printer(out, 0, as_utf8, as_one_line, 156 printer = _Printer(out, indent, as_utf8, as_one_line, pointy_brackets,
128 pointy_brackets, use_index_order, float_format, 157 use_index_order, float_format, use_field_number,
129 use_field_number) 158 descriptor_pool)
130 printer.PrintMessage(message) 159 printer.PrintMessage(message)
131 result = out.getvalue() 160 result = out.getvalue()
132 out.close() 161 out.close()
133 if as_one_line: 162 if as_one_line:
134 return result.rstrip() 163 return result.rstrip()
135 return result 164 return result
136 165
137 166
138 def _IsMapEntry(field): 167 def _IsMapEntry(field):
139 return (field.type == descriptor.FieldDescriptor.TYPE_MESSAGE and 168 return (field.type == descriptor.FieldDescriptor.TYPE_MESSAGE and
140 field.message_type.has_options and 169 field.message_type.has_options and
141 field.message_type.GetOptions().map_entry) 170 field.message_type.GetOptions().map_entry)
142 171
143 172
144 def PrintMessage(message, out, indent=0, as_utf8=False, as_one_line=False, 173 def PrintMessage(message,
145 pointy_brackets=False, use_index_order=False, 174 out,
146 float_format=None, use_field_number=False): 175 indent=0,
147 printer = _Printer(out, indent, as_utf8, as_one_line, 176 as_utf8=False,
148 pointy_brackets, use_index_order, float_format, 177 as_one_line=False,
149 use_field_number) 178 pointy_brackets=False,
179 use_index_order=False,
180 float_format=None,
181 use_field_number=False,
182 descriptor_pool=None):
183 printer = _Printer(out, indent, as_utf8, as_one_line, pointy_brackets,
184 use_index_order, float_format, use_field_number,
185 descriptor_pool)
150 printer.PrintMessage(message) 186 printer.PrintMessage(message)
151 187
152 188
153 def PrintField(field, value, out, indent=0, as_utf8=False, as_one_line=False, 189 def PrintField(field,
154 pointy_brackets=False, use_index_order=False, float_format=None): 190 value,
191 out,
192 indent=0,
193 as_utf8=False,
194 as_one_line=False,
195 pointy_brackets=False,
196 use_index_order=False,
197 float_format=None):
155 """Print a single field name/value pair.""" 198 """Print a single field name/value pair."""
156 printer = _Printer(out, indent, as_utf8, as_one_line, 199 printer = _Printer(out, indent, as_utf8, as_one_line, pointy_brackets,
157 pointy_brackets, use_index_order, float_format) 200 use_index_order, float_format)
158 printer.PrintField(field, value) 201 printer.PrintField(field, value)
159 202
160 203
161 def PrintFieldValue(field, value, out, indent=0, as_utf8=False, 204 def PrintFieldValue(field,
162 as_one_line=False, pointy_brackets=False, 205 value,
206 out,
207 indent=0,
208 as_utf8=False,
209 as_one_line=False,
210 pointy_brackets=False,
163 use_index_order=False, 211 use_index_order=False,
164 float_format=None): 212 float_format=None):
165 """Print a single field value (not including name).""" 213 """Print a single field value (not including name)."""
166 printer = _Printer(out, indent, as_utf8, as_one_line, 214 printer = _Printer(out, indent, as_utf8, as_one_line, pointy_brackets,
167 pointy_brackets, use_index_order, float_format) 215 use_index_order, float_format)
168 printer.PrintFieldValue(field, value) 216 printer.PrintFieldValue(field, value)
169 217
170 218
219 def _BuildMessageFromTypeName(type_name, descriptor_pool):
220 """Returns a protobuf message instance.
221
222 Args:
223 type_name: Fully-qualified protobuf message type name string.
224 descriptor_pool: DescriptorPool instance.
225
226 Returns:
227 A Message instance of type matching type_name, or None if the a Descriptor
228 wasn't found matching type_name.
229 """
230 # pylint: disable=g-import-not-at-top
231 from google.protobuf import symbol_database
232 database = symbol_database.Default()
233 try:
234 message_descriptor = descriptor_pool.FindMessageTypeByName(type_name)
235 except KeyError:
236 return None
237 message_type = database.GetPrototype(message_descriptor)
238 return message_type()
239
240
171 class _Printer(object): 241 class _Printer(object):
172 """Text format printer for protocol message.""" 242 """Text format printer for protocol message."""
173 243
174 def __init__(self, out, indent=0, as_utf8=False, as_one_line=False, 244 def __init__(self,
175 pointy_brackets=False, use_index_order=False, float_format=None, 245 out,
176 use_field_number=False): 246 indent=0,
247 as_utf8=False,
248 as_one_line=False,
249 pointy_brackets=False,
250 use_index_order=False,
251 float_format=None,
252 use_field_number=False,
253 descriptor_pool=None):
177 """Initialize the Printer. 254 """Initialize the Printer.
178 255
179 Floating point values can be formatted compactly with 15 digits of 256 Floating point values can be formatted compactly with 15 digits of
180 precision (which is the most that IEEE 754 "double" can guarantee) 257 precision (which is the most that IEEE 754 "double" can guarantee)
181 using float_format='.15g'. To ensure that converting to text and back to a 258 using float_format='.15g'. To ensure that converting to text and back to a
182 proto will result in an identical value, float_format='.17g' should be used. 259 proto will result in an identical value, float_format='.17g' should be used.
183 260
184 Args: 261 Args:
185 out: To record the text format result. 262 out: To record the text format result.
186 indent: The indent level for pretty print. 263 indent: The indent level for pretty print.
187 as_utf8: Produce text output in UTF8 format. 264 as_utf8: Produce text output in UTF8 format.
188 as_one_line: Don't introduce newlines between fields. 265 as_one_line: Don't introduce newlines between fields.
189 pointy_brackets: If True, use angle brackets instead of curly braces for 266 pointy_brackets: If True, use angle brackets instead of curly braces for
190 nesting. 267 nesting.
191 use_index_order: If True, print fields of a proto message using the order 268 use_index_order: If True, print fields of a proto message using the order
192 defined in source code instead of the field number. By default, use the 269 defined in source code instead of the field number. By default, use the
193 field number order. 270 field number order.
194 float_format: If set, use this to specify floating point number formatting 271 float_format: If set, use this to specify floating point number formatting
195 (per the "Format Specification Mini-Language"); otherwise, str() is 272 (per the "Format Specification Mini-Language"); otherwise, str() is
196 used. 273 used.
197 use_field_number: If True, print field numbers instead of names. 274 use_field_number: If True, print field numbers instead of names.
275 descriptor_pool: A DescriptorPool used to resolve Any types.
198 """ 276 """
199 self.out = out 277 self.out = out
200 self.indent = indent 278 self.indent = indent
201 self.as_utf8 = as_utf8 279 self.as_utf8 = as_utf8
202 self.as_one_line = as_one_line 280 self.as_one_line = as_one_line
203 self.pointy_brackets = pointy_brackets 281 self.pointy_brackets = pointy_brackets
204 self.use_index_order = use_index_order 282 self.use_index_order = use_index_order
205 self.float_format = float_format 283 self.float_format = float_format
206 self.use_field_number = use_field_number 284 self.use_field_number = use_field_number
285 self.descriptor_pool = descriptor_pool
286
287 def _TryPrintAsAnyMessage(self, message):
288 """Serializes if message is a google.protobuf.Any field."""
289 packed_message = _BuildMessageFromTypeName(message.TypeName(),
290 self.descriptor_pool)
291 if packed_message:
292 packed_message.MergeFromString(message.value)
293 self.out.write('%s[%s]' % (self.indent * ' ', message.type_url))
294 self._PrintMessageFieldValue(packed_message)
295 self.out.write(' ' if self.as_one_line else '\n')
296 return True
297 else:
298 return False
207 299
208 def PrintMessage(self, message): 300 def PrintMessage(self, message):
209 """Convert protobuf message to text format. 301 """Convert protobuf message to text format.
210 302
211 Args: 303 Args:
212 message: The protocol buffers message. 304 message: The protocol buffers message.
213 """ 305 """
306 if (message.DESCRIPTOR.full_name == _ANY_FULL_TYPE_NAME and
307 self.descriptor_pool and self._TryPrintAsAnyMessage(message)):
308 return
214 fields = message.ListFields() 309 fields = message.ListFields()
215 if self.use_index_order: 310 if self.use_index_order:
216 fields.sort(key=lambda x: x[0].index) 311 fields.sort(key=lambda x: x[0].index)
217 for field, value in fields: 312 for field, value in fields:
218 if _IsMapEntry(field): 313 if _IsMapEntry(field):
219 for key in sorted(value): 314 for key in sorted(value):
220 # This is slow for maps with submessage entires because it copies the 315 # This is slow for maps with submessage entires because it copies the
221 # entire tree. Unfortunately this would take significant refactoring 316 # entire tree. Unfortunately this would take significant refactoring
222 # of this file to work around. 317 # of this file to work around.
223 # 318 #
224 # TODO(haberman): refactor and optimize if this becomes an issue. 319 # TODO(haberman): refactor and optimize if this becomes an issue.
225 entry_submsg = field.message_type._concrete_class( 320 entry_submsg = value.GetEntryClass()(key=key, value=value[key])
226 key=key, value=value[key])
227 self.PrintField(field, entry_submsg) 321 self.PrintField(field, entry_submsg)
228 elif field.label == descriptor.FieldDescriptor.LABEL_REPEATED: 322 elif field.label == descriptor.FieldDescriptor.LABEL_REPEATED:
229 for element in value: 323 for element in value:
230 self.PrintField(field, element) 324 self.PrintField(field, element)
231 else: 325 else:
232 self.PrintField(field, value) 326 self.PrintField(field, value)
233 327
234 def PrintField(self, field, value): 328 def PrintField(self, field, value):
235 """Print a single field name/value pair.""" 329 """Print a single field name/value pair."""
236 out = self.out 330 out = self.out
(...skipping 20 matching lines...) Expand all
257 # The colon is optional in this case, but our cross-language golden files 351 # The colon is optional in this case, but our cross-language golden files
258 # don't include it. 352 # don't include it.
259 out.write(': ') 353 out.write(': ')
260 354
261 self.PrintFieldValue(field, value) 355 self.PrintFieldValue(field, value)
262 if self.as_one_line: 356 if self.as_one_line:
263 out.write(' ') 357 out.write(' ')
264 else: 358 else:
265 out.write('\n') 359 out.write('\n')
266 360
361 def _PrintMessageFieldValue(self, value):
362 if self.pointy_brackets:
363 openb = '<'
364 closeb = '>'
365 else:
366 openb = '{'
367 closeb = '}'
368
369 if self.as_one_line:
370 self.out.write(' %s ' % openb)
371 self.PrintMessage(value)
372 self.out.write(closeb)
373 else:
374 self.out.write(' %s\n' % openb)
375 self.indent += 2
376 self.PrintMessage(value)
377 self.indent -= 2
378 self.out.write(' ' * self.indent + closeb)
379
267 def PrintFieldValue(self, field, value): 380 def PrintFieldValue(self, field, value):
268 """Print a single field value (not including name). 381 """Print a single field value (not including name).
269 382
270 For repeated fields, the value should be a single element. 383 For repeated fields, the value should be a single element.
271 384
272 Args: 385 Args:
273 field: The descriptor of the field to be printed. 386 field: The descriptor of the field to be printed.
274 value: The value of the field. 387 value: The value of the field.
275 """ 388 """
276 out = self.out 389 out = self.out
277 if self.pointy_brackets:
278 openb = '<'
279 closeb = '>'
280 else:
281 openb = '{'
282 closeb = '}'
283
284 if field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_MESSAGE: 390 if field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_MESSAGE:
285 if self.as_one_line: 391 self._PrintMessageFieldValue(value)
286 out.write(' %s ' % openb)
287 self.PrintMessage(value)
288 out.write(closeb)
289 else:
290 out.write(' %s\n' % openb)
291 self.indent += 2
292 self.PrintMessage(value)
293 self.indent -= 2
294 out.write(' ' * self.indent + closeb)
295 elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_ENUM: 392 elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_ENUM:
296 enum_value = field.enum_type.values_by_number.get(value, None) 393 enum_value = field.enum_type.values_by_number.get(value, None)
297 if enum_value is not None: 394 if enum_value is not None:
298 out.write(enum_value.name) 395 out.write(enum_value.name)
299 else: 396 else:
300 out.write(str(value)) 397 out.write(str(value))
301 elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_STRING: 398 elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_STRING:
302 out.write('\"') 399 out.write('\"')
303 if isinstance(value, six.text_type): 400 if isinstance(value, six.text_type):
304 out_value = value.encode('utf-8') 401 out_value = value.encode('utf-8')
(...skipping 10 matching lines...) Expand all
315 if value: 412 if value:
316 out.write('true') 413 out.write('true')
317 else: 414 else:
318 out.write('false') 415 out.write('false')
319 elif field.cpp_type in _FLOAT_TYPES and self.float_format is not None: 416 elif field.cpp_type in _FLOAT_TYPES and self.float_format is not None:
320 out.write('{1:{0}}'.format(self.float_format, value)) 417 out.write('{1:{0}}'.format(self.float_format, value))
321 else: 418 else:
322 out.write(str(value)) 419 out.write(str(value))
323 420
324 421
325 def Parse(text, message, 422 def Parse(text,
326 allow_unknown_extension=False, allow_field_number=False): 423 message,
327 """Parses an text representation of a protocol message into a message. 424 allow_unknown_extension=False,
425 allow_field_number=False):
426 """Parses a text representation of a protocol message into a message.
328 427
329 Args: 428 Args:
330 text: Message text representation. 429 text: Message text representation.
331 message: A protocol buffer message to merge into. 430 message: A protocol buffer message to merge into.
332 allow_unknown_extension: if True, skip over missing extensions and keep 431 allow_unknown_extension: if True, skip over missing extensions and keep
333 parsing 432 parsing
334 allow_field_number: if True, both field number and field name are allowed. 433 allow_field_number: if True, both field number and field name are allowed.
335 434
336 Returns: 435 Returns:
337 The same message passed as argument. 436 The same message passed as argument.
338 437
339 Raises: 438 Raises:
340 ParseError: On text parsing problems. 439 ParseError: On text parsing problems.
341 """ 440 """
342 if not isinstance(text, str): 441 if not isinstance(text, str):
343 text = text.decode('utf-8') 442 text = text.decode('utf-8')
344 return ParseLines(text.split('\n'), message, allow_unknown_extension, 443 return ParseLines(
345 allow_field_number) 444 text.split('\n'), message, allow_unknown_extension, allow_field_number)
346 445
347 446
348 def Merge(text, message, allow_unknown_extension=False, 447 def Merge(text,
349 allow_field_number=False): 448 message,
350 """Parses an text representation of a protocol message into a message. 449 allow_unknown_extension=False,
450 allow_field_number=False,
451 descriptor_pool=None):
452 """Parses a text representation of a protocol message into a message.
351 453
352 Like Parse(), but allows repeated values for a non-repeated field, and uses 454 Like Parse(), but allows repeated values for a non-repeated field, and uses
353 the last one. 455 the last one.
354 456
355 Args: 457 Args:
356 text: Message text representation. 458 text: Message text representation.
357 message: A protocol buffer message to merge into. 459 message: A protocol buffer message to merge into.
358 allow_unknown_extension: if True, skip over missing extensions and keep 460 allow_unknown_extension: if True, skip over missing extensions and keep
359 parsing 461 parsing
360 allow_field_number: if True, both field number and field name are allowed. 462 allow_field_number: if True, both field number and field name are allowed.
463 descriptor_pool: A DescriptorPool used to resolve Any types.
361 464
362 Returns: 465 Returns:
363 The same message passed as argument. 466 The same message passed as argument.
364 467
365 Raises: 468 Raises:
366 ParseError: On text parsing problems. 469 ParseError: On text parsing problems.
367 """ 470 """
368 return MergeLines(text.split('\n'), message, allow_unknown_extension, 471 return MergeLines(
369 allow_field_number) 472 text.split('\n'),
473 message,
474 allow_unknown_extension,
475 allow_field_number,
476 descriptor_pool=descriptor_pool)
370 477
371 478
372 def ParseLines(lines, message, allow_unknown_extension=False, 479 def ParseLines(lines,
480 message,
481 allow_unknown_extension=False,
373 allow_field_number=False): 482 allow_field_number=False):
374 """Parses an text representation of a protocol message into a message. 483 """Parses a text representation of a protocol message into a message.
375 484
376 Args: 485 Args:
377 lines: An iterable of lines of a message's text representation. 486 lines: An iterable of lines of a message's text representation.
378 message: A protocol buffer message to merge into. 487 message: A protocol buffer message to merge into.
379 allow_unknown_extension: if True, skip over missing extensions and keep 488 allow_unknown_extension: if True, skip over missing extensions and keep
380 parsing 489 parsing
381 allow_field_number: if True, both field number and field name are allowed. 490 allow_field_number: if True, both field number and field name are allowed.
491 descriptor_pool: A DescriptorPool used to resolve Any types.
382 492
383 Returns: 493 Returns:
384 The same message passed as argument. 494 The same message passed as argument.
385 495
386 Raises: 496 Raises:
387 ParseError: On text parsing problems. 497 ParseError: On text parsing problems.
388 """ 498 """
389 parser = _Parser(allow_unknown_extension, allow_field_number) 499 parser = _Parser(allow_unknown_extension, allow_field_number)
390 return parser.ParseLines(lines, message) 500 return parser.ParseLines(lines, message)
391 501
392 502
393 def MergeLines(lines, message, allow_unknown_extension=False, 503 def MergeLines(lines,
394 allow_field_number=False): 504 message,
395 """Parses an text representation of a protocol message into a message. 505 allow_unknown_extension=False,
506 allow_field_number=False,
507 descriptor_pool=None):
508 """Parses a text representation of a protocol message into a message.
396 509
397 Args: 510 Args:
398 lines: An iterable of lines of a message's text representation. 511 lines: An iterable of lines of a message's text representation.
399 message: A protocol buffer message to merge into. 512 message: A protocol buffer message to merge into.
400 allow_unknown_extension: if True, skip over missing extensions and keep 513 allow_unknown_extension: if True, skip over missing extensions and keep
401 parsing 514 parsing
402 allow_field_number: if True, both field number and field name are allowed. 515 allow_field_number: if True, both field number and field name are allowed.
403 516
404 Returns: 517 Returns:
405 The same message passed as argument. 518 The same message passed as argument.
406 519
407 Raises: 520 Raises:
408 ParseError: On text parsing problems. 521 ParseError: On text parsing problems.
409 """ 522 """
410 parser = _Parser(allow_unknown_extension, allow_field_number) 523 parser = _Parser(allow_unknown_extension,
524 allow_field_number,
525 descriptor_pool=descriptor_pool)
411 return parser.MergeLines(lines, message) 526 return parser.MergeLines(lines, message)
412 527
413 528
414 class _Parser(object): 529 class _Parser(object):
415 """Text format parser for protocol message.""" 530 """Text format parser for protocol message."""
416 531
417 def __init__(self, allow_unknown_extension=False, allow_field_number=False): 532 def __init__(self,
533 allow_unknown_extension=False,
534 allow_field_number=False,
535 descriptor_pool=None):
418 self.allow_unknown_extension = allow_unknown_extension 536 self.allow_unknown_extension = allow_unknown_extension
419 self.allow_field_number = allow_field_number 537 self.allow_field_number = allow_field_number
538 self.descriptor_pool = descriptor_pool
420 539
421 def ParseFromString(self, text, message): 540 def ParseFromString(self, text, message):
422 """Parses an text representation of a protocol message into a message.""" 541 """Parses a text representation of a protocol message into a message."""
423 if not isinstance(text, str): 542 if not isinstance(text, str):
424 text = text.decode('utf-8') 543 text = text.decode('utf-8')
425 return self.ParseLines(text.split('\n'), message) 544 return self.ParseLines(text.split('\n'), message)
426 545
427 def ParseLines(self, lines, message): 546 def ParseLines(self, lines, message):
428 """Parses an text representation of a protocol message into a message.""" 547 """Parses a text representation of a protocol message into a message."""
429 self._allow_multiple_scalars = False 548 self._allow_multiple_scalars = False
430 self._ParseOrMerge(lines, message) 549 self._ParseOrMerge(lines, message)
431 return message 550 return message
432 551
433 def MergeFromString(self, text, message): 552 def MergeFromString(self, text, message):
434 """Merges an text representation of a protocol message into a message.""" 553 """Merges a text representation of a protocol message into a message."""
435 return self._MergeLines(text.split('\n'), message) 554 return self._MergeLines(text.split('\n'), message)
436 555
437 def MergeLines(self, lines, message): 556 def MergeLines(self, lines, message):
438 """Merges an text representation of a protocol message into a message.""" 557 """Merges a text representation of a protocol message into a message."""
439 self._allow_multiple_scalars = True 558 self._allow_multiple_scalars = True
440 self._ParseOrMerge(lines, message) 559 self._ParseOrMerge(lines, message)
441 return message 560 return message
442 561
443 def _ParseOrMerge(self, lines, message): 562 def _ParseOrMerge(self, lines, message):
444 """Converts an text representation of a protocol message into a message. 563 """Converts a text representation of a protocol message into a message.
445 564
446 Args: 565 Args:
447 lines: Lines of a message's text representation. 566 lines: Lines of a message's text representation.
448 message: A protocol buffer message to merge into. 567 message: A protocol buffer message to merge into.
449 568
450 Raises: 569 Raises:
451 ParseError: On text parsing problems. 570 ParseError: On text parsing problems.
452 """ 571 """
453 tokenizer = _Tokenizer(lines) 572 tokenizer = Tokenizer(lines)
454 while not tokenizer.AtEnd(): 573 while not tokenizer.AtEnd():
455 self._MergeField(tokenizer, message) 574 self._MergeField(tokenizer, message)
456 575
457 def _MergeField(self, tokenizer, message): 576 def _MergeField(self, tokenizer, message):
458 """Merges a single protocol message field into a message. 577 """Merges a single protocol message field into a message.
459 578
460 Args: 579 Args:
461 tokenizer: A tokenizer to parse the field name and values. 580 tokenizer: A tokenizer to parse the field name and values.
462 message: A protocol message to record the data. 581 message: A protocol message to record the data.
463 582
(...skipping 20 matching lines...) Expand all
484 field = message.Extensions._FindExtensionByName(name) 603 field = message.Extensions._FindExtensionByName(name)
485 # pylint: enable=protected-access 604 # pylint: enable=protected-access
486 if not field: 605 if not field:
487 if self.allow_unknown_extension: 606 if self.allow_unknown_extension:
488 field = None 607 field = None
489 else: 608 else:
490 raise tokenizer.ParseErrorPreviousToken( 609 raise tokenizer.ParseErrorPreviousToken(
491 'Extension "%s" not registered.' % name) 610 'Extension "%s" not registered.' % name)
492 elif message_descriptor != field.containing_type: 611 elif message_descriptor != field.containing_type:
493 raise tokenizer.ParseErrorPreviousToken( 612 raise tokenizer.ParseErrorPreviousToken(
494 'Extension "%s" does not extend message type "%s".' % ( 613 'Extension "%s" does not extend message type "%s".' %
495 name, message_descriptor.full_name)) 614 (name, message_descriptor.full_name))
496 615
497 tokenizer.Consume(']') 616 tokenizer.Consume(']')
498 617
499 else: 618 else:
500 name = tokenizer.ConsumeIdentifier() 619 name = tokenizer.ConsumeIdentifierOrNumber()
501 if self.allow_field_number and name.isdigit(): 620 if self.allow_field_number and name.isdigit():
502 number = ParseInteger(name, True, True) 621 number = ParseInteger(name, True, True)
503 field = message_descriptor.fields_by_number.get(number, None) 622 field = message_descriptor.fields_by_number.get(number, None)
504 if not field and message_descriptor.is_extendable: 623 if not field and message_descriptor.is_extendable:
505 field = message.Extensions._FindExtensionByNumber(number) 624 field = message.Extensions._FindExtensionByNumber(number)
506 else: 625 else:
507 field = message_descriptor.fields_by_name.get(name, None) 626 field = message_descriptor.fields_by_name.get(name, None)
508 627
509 # Group names are expected to be capitalized as they appear in the 628 # Group names are expected to be capitalized as they appear in the
510 # .proto file, which actually matches their type names, not their field 629 # .proto file, which actually matches their type names, not their field
511 # names. 630 # names.
512 if not field: 631 if not field:
513 field = message_descriptor.fields_by_name.get(name.lower(), None) 632 field = message_descriptor.fields_by_name.get(name.lower(), None)
514 if field and field.type != descriptor.FieldDescriptor.TYPE_GROUP: 633 if field and field.type != descriptor.FieldDescriptor.TYPE_GROUP:
515 field = None 634 field = None
516 635
517 if (field and field.type == descriptor.FieldDescriptor.TYPE_GROUP and 636 if (field and field.type == descriptor.FieldDescriptor.TYPE_GROUP and
518 field.message_type.name != name): 637 field.message_type.name != name):
519 field = None 638 field = None
520 639
521 if not field: 640 if not field:
522 raise tokenizer.ParseErrorPreviousToken( 641 raise tokenizer.ParseErrorPreviousToken(
523 'Message type "%s" has no field named "%s".' % ( 642 'Message type "%s" has no field named "%s".' %
524 message_descriptor.full_name, name)) 643 (message_descriptor.full_name, name))
525 644
526 if field: 645 if field:
527 if not self._allow_multiple_scalars and field.containing_oneof: 646 if not self._allow_multiple_scalars and field.containing_oneof:
528 # Check if there's a different field set in this oneof. 647 # Check if there's a different field set in this oneof.
529 # Note that we ignore the case if the same field was set before, and we 648 # Note that we ignore the case if the same field was set before, and we
530 # apply _allow_multiple_scalars to non-scalar fields as well. 649 # apply _allow_multiple_scalars to non-scalar fields as well.
531 which_oneof = message.WhichOneof(field.containing_oneof.name) 650 which_oneof = message.WhichOneof(field.containing_oneof.name)
532 if which_oneof is not None and which_oneof != field.name: 651 if which_oneof is not None and which_oneof != field.name:
533 raise tokenizer.ParseErrorPreviousToken( 652 raise tokenizer.ParseErrorPreviousToken(
534 'Field "%s" is specified along with field "%s", another member ' 653 'Field "%s" is specified along with field "%s", another member '
535 'of oneof "%s" for message type "%s".' % ( 654 'of oneof "%s" for message type "%s".' %
536 field.name, which_oneof, field.containing_oneof.name, 655 (field.name, which_oneof, field.containing_oneof.name,
537 message_descriptor.full_name)) 656 message_descriptor.full_name))
538 657
539 if field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_MESSAGE: 658 if field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_MESSAGE:
540 tokenizer.TryConsume(':') 659 tokenizer.TryConsume(':')
541 merger = self._MergeMessageField 660 merger = self._MergeMessageField
542 else: 661 else:
543 tokenizer.Consume(':') 662 tokenizer.Consume(':')
544 merger = self._MergeScalarField 663 merger = self._MergeScalarField
545 664
546 if (field.label == descriptor.FieldDescriptor.LABEL_REPEATED 665 if (field.label == descriptor.FieldDescriptor.LABEL_REPEATED and
547 and tokenizer.TryConsume('[')): 666 tokenizer.TryConsume('[')):
548 # Short repeated format, e.g. "foo: [1, 2, 3]" 667 # Short repeated format, e.g. "foo: [1, 2, 3]"
549 while True: 668 while True:
550 merger(tokenizer, message, field) 669 merger(tokenizer, message, field)
551 if tokenizer.TryConsume(']'): break 670 if tokenizer.TryConsume(']'):
671 break
552 tokenizer.Consume(',') 672 tokenizer.Consume(',')
553 673
554 else: 674 else:
555 merger(tokenizer, message, field) 675 merger(tokenizer, message, field)
556 676
557 else: # Proto field is unknown. 677 else: # Proto field is unknown.
558 assert self.allow_unknown_extension 678 assert self.allow_unknown_extension
559 _SkipFieldContents(tokenizer) 679 _SkipFieldContents(tokenizer)
560 680
561 # For historical reasons, fields may optionally be separated by commas or 681 # For historical reasons, fields may optionally be separated by commas or
562 # semicolons. 682 # semicolons.
563 if not tokenizer.TryConsume(','): 683 if not tokenizer.TryConsume(','):
564 tokenizer.TryConsume(';') 684 tokenizer.TryConsume(';')
565 685
686 def _ConsumeAnyTypeUrl(self, tokenizer):
687 """Consumes a google.protobuf.Any type URL and returns the type name."""
688 # Consume "type.googleapis.com/".
689 tokenizer.ConsumeIdentifier()
690 tokenizer.Consume('.')
691 tokenizer.ConsumeIdentifier()
692 tokenizer.Consume('.')
693 tokenizer.ConsumeIdentifier()
694 tokenizer.Consume('/')
695 # Consume the fully-qualified type name.
696 name = [tokenizer.ConsumeIdentifier()]
697 while tokenizer.TryConsume('.'):
698 name.append(tokenizer.ConsumeIdentifier())
699 return '.'.join(name)
700
566 def _MergeMessageField(self, tokenizer, message, field): 701 def _MergeMessageField(self, tokenizer, message, field):
567 """Merges a single scalar field into a message. 702 """Merges a single scalar field into a message.
568 703
569 Args: 704 Args:
570 tokenizer: A tokenizer to parse the field value. 705 tokenizer: A tokenizer to parse the field value.
571 message: The message of which field is a member. 706 message: The message of which field is a member.
572 field: The descriptor of the field to be merged. 707 field: The descriptor of the field to be merged.
573 708
574 Raises: 709 Raises:
575 ParseError: In case of text parsing problems. 710 ParseError: In case of text parsing problems.
576 """ 711 """
577 is_map_entry = _IsMapEntry(field) 712 is_map_entry = _IsMapEntry(field)
578 713
579 if tokenizer.TryConsume('<'): 714 if tokenizer.TryConsume('<'):
580 end_token = '>' 715 end_token = '>'
581 else: 716 else:
582 tokenizer.Consume('{') 717 tokenizer.Consume('{')
583 end_token = '}' 718 end_token = '}'
584 719
585 if field.label == descriptor.FieldDescriptor.LABEL_REPEATED: 720 if (field.message_type.full_name == _ANY_FULL_TYPE_NAME and
721 tokenizer.TryConsume('[')):
722 packed_type_name = self._ConsumeAnyTypeUrl(tokenizer)
723 tokenizer.Consume(']')
724 tokenizer.TryConsume(':')
725 if tokenizer.TryConsume('<'):
726 expanded_any_end_token = '>'
727 else:
728 tokenizer.Consume('{')
729 expanded_any_end_token = '}'
730 if not self.descriptor_pool:
731 raise ParseError('Descriptor pool required to parse expanded Any field')
732 expanded_any_sub_message = _BuildMessageFromTypeName(packed_type_name,
733 self.descriptor_pool)
734 if not expanded_any_sub_message:
735 raise ParseError('Type %s not found in descriptor pool' %
736 packed_type_name)
737 while not tokenizer.TryConsume(expanded_any_end_token):
738 if tokenizer.AtEnd():
739 raise tokenizer.ParseErrorPreviousToken('Expected "%s".' %
740 (expanded_any_end_token,))
741 self._MergeField(tokenizer, expanded_any_sub_message)
742 if field.label == descriptor.FieldDescriptor.LABEL_REPEATED:
743 any_message = getattr(message, field.name).add()
744 else:
745 any_message = getattr(message, field.name)
746 any_message.Pack(expanded_any_sub_message)
747 elif field.label == descriptor.FieldDescriptor.LABEL_REPEATED:
586 if field.is_extension: 748 if field.is_extension:
587 sub_message = message.Extensions[field].add() 749 sub_message = message.Extensions[field].add()
588 elif is_map_entry: 750 elif is_map_entry:
589 # pylint: disable=protected-access 751 sub_message = getattr(message, field.name).GetEntryClass()()
590 sub_message = field.message_type._concrete_class()
591 else: 752 else:
592 sub_message = getattr(message, field.name).add() 753 sub_message = getattr(message, field.name).add()
593 else: 754 else:
594 if field.is_extension: 755 if field.is_extension:
595 sub_message = message.Extensions[field] 756 sub_message = message.Extensions[field]
596 else: 757 else:
597 sub_message = getattr(message, field.name) 758 sub_message = getattr(message, field.name)
598 sub_message.SetInParent() 759 sub_message.SetInParent()
599 760
600 while not tokenizer.TryConsume(end_token): 761 while not tokenizer.TryConsume(end_token):
(...skipping 20 matching lines...) Expand all
621 Raises: 782 Raises:
622 ParseError: In case of text parsing problems. 783 ParseError: In case of text parsing problems.
623 RuntimeError: On runtime errors. 784 RuntimeError: On runtime errors.
624 """ 785 """
625 _ = self.allow_unknown_extension 786 _ = self.allow_unknown_extension
626 value = None 787 value = None
627 788
628 if field.type in (descriptor.FieldDescriptor.TYPE_INT32, 789 if field.type in (descriptor.FieldDescriptor.TYPE_INT32,
629 descriptor.FieldDescriptor.TYPE_SINT32, 790 descriptor.FieldDescriptor.TYPE_SINT32,
630 descriptor.FieldDescriptor.TYPE_SFIXED32): 791 descriptor.FieldDescriptor.TYPE_SFIXED32):
631 value = tokenizer.ConsumeInt32() 792 value = _ConsumeInt32(tokenizer)
632 elif field.type in (descriptor.FieldDescriptor.TYPE_INT64, 793 elif field.type in (descriptor.FieldDescriptor.TYPE_INT64,
633 descriptor.FieldDescriptor.TYPE_SINT64, 794 descriptor.FieldDescriptor.TYPE_SINT64,
634 descriptor.FieldDescriptor.TYPE_SFIXED64): 795 descriptor.FieldDescriptor.TYPE_SFIXED64):
635 value = tokenizer.ConsumeInt64() 796 value = _ConsumeInt64(tokenizer)
636 elif field.type in (descriptor.FieldDescriptor.TYPE_UINT32, 797 elif field.type in (descriptor.FieldDescriptor.TYPE_UINT32,
637 descriptor.FieldDescriptor.TYPE_FIXED32): 798 descriptor.FieldDescriptor.TYPE_FIXED32):
638 value = tokenizer.ConsumeUint32() 799 value = _ConsumeUint32(tokenizer)
639 elif field.type in (descriptor.FieldDescriptor.TYPE_UINT64, 800 elif field.type in (descriptor.FieldDescriptor.TYPE_UINT64,
640 descriptor.FieldDescriptor.TYPE_FIXED64): 801 descriptor.FieldDescriptor.TYPE_FIXED64):
641 value = tokenizer.ConsumeUint64() 802 value = _ConsumeUint64(tokenizer)
642 elif field.type in (descriptor.FieldDescriptor.TYPE_FLOAT, 803 elif field.type in (descriptor.FieldDescriptor.TYPE_FLOAT,
643 descriptor.FieldDescriptor.TYPE_DOUBLE): 804 descriptor.FieldDescriptor.TYPE_DOUBLE):
644 value = tokenizer.ConsumeFloat() 805 value = tokenizer.ConsumeFloat()
645 elif field.type == descriptor.FieldDescriptor.TYPE_BOOL: 806 elif field.type == descriptor.FieldDescriptor.TYPE_BOOL:
646 value = tokenizer.ConsumeBool() 807 value = tokenizer.ConsumeBool()
647 elif field.type == descriptor.FieldDescriptor.TYPE_STRING: 808 elif field.type == descriptor.FieldDescriptor.TYPE_STRING:
648 value = tokenizer.ConsumeString() 809 value = tokenizer.ConsumeString()
649 elif field.type == descriptor.FieldDescriptor.TYPE_BYTES: 810 elif field.type == descriptor.FieldDescriptor.TYPE_BYTES:
650 value = tokenizer.ConsumeByteString() 811 value = tokenizer.ConsumeByteString()
651 elif field.type == descriptor.FieldDescriptor.TYPE_ENUM: 812 elif field.type == descriptor.FieldDescriptor.TYPE_ENUM:
(...skipping 94 matching lines...) Expand 10 before | Expand all | Expand 10 after
746 ParseError: In case an invalid field value is found. 907 ParseError: In case an invalid field value is found.
747 """ 908 """
748 # String/bytes tokens can come in multiple adjacent string literals. 909 # String/bytes tokens can come in multiple adjacent string literals.
749 # If we can consume one, consume as many as we can. 910 # If we can consume one, consume as many as we can.
750 if tokenizer.TryConsumeByteString(): 911 if tokenizer.TryConsumeByteString():
751 while tokenizer.TryConsumeByteString(): 912 while tokenizer.TryConsumeByteString():
752 pass 913 pass
753 return 914 return
754 915
755 if (not tokenizer.TryConsumeIdentifier() and 916 if (not tokenizer.TryConsumeIdentifier() and
756 not tokenizer.TryConsumeInt64() and 917 not _TryConsumeInt64(tokenizer) and not _TryConsumeUint64(tokenizer) and
757 not tokenizer.TryConsumeUint64() and
758 not tokenizer.TryConsumeFloat()): 918 not tokenizer.TryConsumeFloat()):
759 raise ParseError('Invalid field value: ' + tokenizer.token) 919 raise ParseError('Invalid field value: ' + tokenizer.token)
760 920
761 921
762 class _Tokenizer(object): 922 class Tokenizer(object):
763 """Protocol buffer text representation tokenizer. 923 """Protocol buffer text representation tokenizer.
764 924
765 This class handles the lower level string parsing by splitting it into 925 This class handles the lower level string parsing by splitting it into
766 meaningful tokens. 926 meaningful tokens.
767 927
768 It was directly ported from the Java protocol buffer API. 928 It was directly ported from the Java protocol buffer API.
769 """ 929 """
770 930
771 _WHITESPACE = re.compile('(\\s|(#.*$))+', re.MULTILINE) 931 _WHITESPACE = re.compile(r'\s+')
932 _COMMENT = re.compile(r'(\s*#.*$)', re.MULTILINE)
933 _WHITESPACE_OR_COMMENT = re.compile(r'(\s|(#.*$))+', re.MULTILINE)
772 _TOKEN = re.compile('|'.join([ 934 _TOKEN = re.compile('|'.join([
773 r'[a-zA-Z_][0-9a-zA-Z_+-]*', # an identifier 935 r'[a-zA-Z_][0-9a-zA-Z_+-]*', # an identifier
774 r'([0-9+-]|(\.[0-9]))[0-9a-zA-Z_.+-]*', # a number 936 r'([0-9+-]|(\.[0-9]))[0-9a-zA-Z_.+-]*', # a number
775 ] + [ # quoted str for each quote mark 937 ] + [ # quoted str for each quote mark
776 r'{qt}([^{qt}\n\\]|\\.)*({qt}|\\?$)'.format(qt=mark) for mark in _QUOTES 938 r'{qt}([^{qt}\n\\]|\\.)*({qt}|\\?$)'.format(qt=mark) for mark in _QUOTES
777 ])) 939 ]))
778 940
779 _IDENTIFIER = re.compile(r'\w+') 941 _IDENTIFIER = re.compile(r'[^\d\W]\w*')
942 _IDENTIFIER_OR_NUMBER = re.compile(r'\w+')
780 943
781 def __init__(self, lines): 944 def __init__(self, lines, skip_comments=True):
782 self._position = 0 945 self._position = 0
783 self._line = -1 946 self._line = -1
784 self._column = 0 947 self._column = 0
785 self._token_start = None 948 self._token_start = None
786 self.token = '' 949 self.token = ''
787 self._lines = iter(lines) 950 self._lines = iter(lines)
788 self._current_line = '' 951 self._current_line = ''
789 self._previous_line = 0 952 self._previous_line = 0
790 self._previous_column = 0 953 self._previous_column = 0
791 self._more_lines = True 954 self._more_lines = True
955 self._skip_comments = skip_comments
956 self._whitespace_pattern = (skip_comments and self._WHITESPACE_OR_COMMENT
957 or self._WHITESPACE)
792 self._SkipWhitespace() 958 self._SkipWhitespace()
793 self.NextToken() 959 self.NextToken()
794 960
795 def LookingAt(self, token): 961 def LookingAt(self, token):
796 return self.token == token 962 return self.token == token
797 963
798 def AtEnd(self): 964 def AtEnd(self):
799 """Checks the end of the text was reached. 965 """Checks the end of the text was reached.
800 966
801 Returns: 967 Returns:
802 True iff the end was reached. 968 True iff the end was reached.
803 """ 969 """
804 return not self.token 970 return not self.token
805 971
806 def _PopLine(self): 972 def _PopLine(self):
807 while len(self._current_line) <= self._column: 973 while len(self._current_line) <= self._column:
808 try: 974 try:
809 self._current_line = next(self._lines) 975 self._current_line = next(self._lines)
810 except StopIteration: 976 except StopIteration:
811 self._current_line = '' 977 self._current_line = ''
812 self._more_lines = False 978 self._more_lines = False
813 return 979 return
814 else: 980 else:
815 self._line += 1 981 self._line += 1
816 self._column = 0 982 self._column = 0
817 983
818 def _SkipWhitespace(self): 984 def _SkipWhitespace(self):
819 while True: 985 while True:
820 self._PopLine() 986 self._PopLine()
821 match = self._WHITESPACE.match(self._current_line, self._column) 987 match = self._whitespace_pattern.match(self._current_line, self._column)
822 if not match: 988 if not match:
823 break 989 break
824 length = len(match.group(0)) 990 length = len(match.group(0))
825 self._column += length 991 self._column += length
826 992
827 def TryConsume(self, token): 993 def TryConsume(self, token):
828 """Tries to consume a given piece of text. 994 """Tries to consume a given piece of text.
829 995
830 Args: 996 Args:
831 token: Text to consume. 997 token: Text to consume.
832 998
833 Returns: 999 Returns:
834 True iff the text was consumed. 1000 True iff the text was consumed.
835 """ 1001 """
836 if self.token == token: 1002 if self.token == token:
837 self.NextToken() 1003 self.NextToken()
838 return True 1004 return True
839 return False 1005 return False
840 1006
841 def Consume(self, token): 1007 def Consume(self, token):
842 """Consumes a piece of text. 1008 """Consumes a piece of text.
843 1009
844 Args: 1010 Args:
845 token: Text to consume. 1011 token: Text to consume.
846 1012
847 Raises: 1013 Raises:
848 ParseError: If the text couldn't be consumed. 1014 ParseError: If the text couldn't be consumed.
849 """ 1015 """
850 if not self.TryConsume(token): 1016 if not self.TryConsume(token):
851 raise self._ParseError('Expected "%s".' % token) 1017 raise self.ParseError('Expected "%s".' % token)
1018
1019 def ConsumeComment(self):
1020 result = self.token
1021 if not self._COMMENT.match(result):
1022 raise self.ParseError('Expected comment.')
1023 self.NextToken()
1024 return result
852 1025
853 def TryConsumeIdentifier(self): 1026 def TryConsumeIdentifier(self):
854 try: 1027 try:
855 self.ConsumeIdentifier() 1028 self.ConsumeIdentifier()
856 return True 1029 return True
857 except ParseError: 1030 except ParseError:
858 return False 1031 return False
859 1032
860 def ConsumeIdentifier(self): 1033 def ConsumeIdentifier(self):
861 """Consumes protocol message field identifier. 1034 """Consumes protocol message field identifier.
862 1035
863 Returns: 1036 Returns:
864 Identifier string. 1037 Identifier string.
865 1038
866 Raises: 1039 Raises:
867 ParseError: If an identifier couldn't be consumed. 1040 ParseError: If an identifier couldn't be consumed.
868 """ 1041 """
869 result = self.token 1042 result = self.token
870 if not self._IDENTIFIER.match(result): 1043 if not self._IDENTIFIER.match(result):
871 raise self._ParseError('Expected identifier.') 1044 raise self.ParseError('Expected identifier.')
872 self.NextToken() 1045 self.NextToken()
873 return result 1046 return result
874 1047
875 def ConsumeInt32(self): 1048 def TryConsumeIdentifierOrNumber(self):
876 """Consumes a signed 32bit integer number. 1049 try:
1050 self.ConsumeIdentifierOrNumber()
1051 return True
1052 except ParseError:
1053 return False
877 1054
1055 def ConsumeIdentifierOrNumber(self):
1056 """Consumes protocol message field identifier.
1057
1058 Returns:
1059 Identifier string.
1060
1061 Raises:
1062 ParseError: If an identifier couldn't be consumed.
1063 """
1064 result = self.token
1065 if not self._IDENTIFIER_OR_NUMBER.match(result):
1066 raise self.ParseError('Expected identifier or number.')
1067 self.NextToken()
1068 return result
1069
1070 def TryConsumeInteger(self):
1071 try:
1072 # Note: is_long only affects value type, not whether an error is raised.
1073 self.ConsumeInteger()
1074 return True
1075 except ParseError:
1076 return False
1077
1078 def ConsumeInteger(self, is_long=False):
1079 """Consumes an integer number.
1080
1081 Args:
1082 is_long: True if the value should be returned as a long integer.
878 Returns: 1083 Returns:
879 The integer parsed. 1084 The integer parsed.
880 1085
881 Raises: 1086 Raises:
882 ParseError: If a signed 32bit integer couldn't be consumed. 1087 ParseError: If an integer couldn't be consumed.
883 """ 1088 """
884 try: 1089 try:
885 result = ParseInteger(self.token, is_signed=True, is_long=False) 1090 result = _ParseAbstractInteger(self.token, is_long=is_long)
886 except ValueError as e: 1091 except ValueError as e:
887 raise self._ParseError(str(e)) 1092 raise self.ParseError(str(e))
888 self.NextToken() 1093 self.NextToken()
889 return result 1094 return result
890 1095
891 def ConsumeUint32(self):
892 """Consumes an unsigned 32bit integer number.
893
894 Returns:
895 The integer parsed.
896
897 Raises:
898 ParseError: If an unsigned 32bit integer couldn't be consumed.
899 """
900 try:
901 result = ParseInteger(self.token, is_signed=False, is_long=False)
902 except ValueError as e:
903 raise self._ParseError(str(e))
904 self.NextToken()
905 return result
906
907 def TryConsumeInt64(self):
908 try:
909 self.ConsumeInt64()
910 return True
911 except ParseError:
912 return False
913
914 def ConsumeInt64(self):
915 """Consumes a signed 64bit integer number.
916
917 Returns:
918 The integer parsed.
919
920 Raises:
921 ParseError: If a signed 64bit integer couldn't be consumed.
922 """
923 try:
924 result = ParseInteger(self.token, is_signed=True, is_long=True)
925 except ValueError as e:
926 raise self._ParseError(str(e))
927 self.NextToken()
928 return result
929
930 def TryConsumeUint64(self):
931 try:
932 self.ConsumeUint64()
933 return True
934 except ParseError:
935 return False
936
937 def ConsumeUint64(self):
938 """Consumes an unsigned 64bit integer number.
939
940 Returns:
941 The integer parsed.
942
943 Raises:
944 ParseError: If an unsigned 64bit integer couldn't be consumed.
945 """
946 try:
947 result = ParseInteger(self.token, is_signed=False, is_long=True)
948 except ValueError as e:
949 raise self._ParseError(str(e))
950 self.NextToken()
951 return result
952
953 def TryConsumeFloat(self): 1096 def TryConsumeFloat(self):
954 try: 1097 try:
955 self.ConsumeFloat() 1098 self.ConsumeFloat()
956 return True 1099 return True
957 except ParseError: 1100 except ParseError:
958 return False 1101 return False
959 1102
960 def ConsumeFloat(self): 1103 def ConsumeFloat(self):
961 """Consumes an floating point number. 1104 """Consumes an floating point number.
962 1105
963 Returns: 1106 Returns:
964 The number parsed. 1107 The number parsed.
965 1108
966 Raises: 1109 Raises:
967 ParseError: If a floating point number couldn't be consumed. 1110 ParseError: If a floating point number couldn't be consumed.
968 """ 1111 """
969 try: 1112 try:
970 result = ParseFloat(self.token) 1113 result = ParseFloat(self.token)
971 except ValueError as e: 1114 except ValueError as e:
972 raise self._ParseError(str(e)) 1115 raise self.ParseError(str(e))
973 self.NextToken() 1116 self.NextToken()
974 return result 1117 return result
975 1118
976 def ConsumeBool(self): 1119 def ConsumeBool(self):
977 """Consumes a boolean value. 1120 """Consumes a boolean value.
978 1121
979 Returns: 1122 Returns:
980 The bool parsed. 1123 The bool parsed.
981 1124
982 Raises: 1125 Raises:
983 ParseError: If a boolean value couldn't be consumed. 1126 ParseError: If a boolean value couldn't be consumed.
984 """ 1127 """
985 try: 1128 try:
986 result = ParseBool(self.token) 1129 result = ParseBool(self.token)
987 except ValueError as e: 1130 except ValueError as e:
988 raise self._ParseError(str(e)) 1131 raise self.ParseError(str(e))
989 self.NextToken() 1132 self.NextToken()
990 return result 1133 return result
991 1134
992 def TryConsumeByteString(self): 1135 def TryConsumeByteString(self):
993 try: 1136 try:
994 self.ConsumeByteString() 1137 self.ConsumeByteString()
995 return True 1138 return True
996 except ParseError: 1139 except ParseError:
997 return False 1140 return False
998 1141
(...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after
1032 tokens which are automatically concatenated, like in C or Python. This 1175 tokens which are automatically concatenated, like in C or Python. This
1033 method only consumes one token. 1176 method only consumes one token.
1034 1177
1035 Returns: 1178 Returns:
1036 The token parsed. 1179 The token parsed.
1037 Raises: 1180 Raises:
1038 ParseError: When the wrong format data is found. 1181 ParseError: When the wrong format data is found.
1039 """ 1182 """
1040 text = self.token 1183 text = self.token
1041 if len(text) < 1 or text[0] not in _QUOTES: 1184 if len(text) < 1 or text[0] not in _QUOTES:
1042 raise self._ParseError('Expected string but found: %r' % (text,)) 1185 raise self.ParseError('Expected string but found: %r' % (text,))
1043 1186
1044 if len(text) < 2 or text[-1] != text[0]: 1187 if len(text) < 2 or text[-1] != text[0]:
1045 raise self._ParseError('String missing ending quote: %r' % (text,)) 1188 raise self.ParseError('String missing ending quote: %r' % (text,))
1046 1189
1047 try: 1190 try:
1048 result = text_encoding.CUnescape(text[1:-1]) 1191 result = text_encoding.CUnescape(text[1:-1])
1049 except ValueError as e: 1192 except ValueError as e:
1050 raise self._ParseError(str(e)) 1193 raise self.ParseError(str(e))
1051 self.NextToken() 1194 self.NextToken()
1052 return result 1195 return result
1053 1196
1054 def ConsumeEnum(self, field): 1197 def ConsumeEnum(self, field):
1055 try: 1198 try:
1056 result = ParseEnum(field, self.token) 1199 result = ParseEnum(field, self.token)
1057 except ValueError as e: 1200 except ValueError as e:
1058 raise self._ParseError(str(e)) 1201 raise self.ParseError(str(e))
1059 self.NextToken() 1202 self.NextToken()
1060 return result 1203 return result
1061 1204
1062 def ParseErrorPreviousToken(self, message): 1205 def ParseErrorPreviousToken(self, message):
1063 """Creates and *returns* a ParseError for the previously read token. 1206 """Creates and *returns* a ParseError for the previously read token.
1064 1207
1065 Args: 1208 Args:
1066 message: A message to set for the exception. 1209 message: A message to set for the exception.
1067 1210
1068 Returns: 1211 Returns:
1069 A ParseError instance. 1212 A ParseError instance.
1070 """ 1213 """
1071 return ParseError('%d:%d : %s' % ( 1214 return ParseError(message, self._previous_line + 1,
1072 self._previous_line + 1, self._previous_column + 1, message)) 1215 self._previous_column + 1)
1073 1216
1074 def _ParseError(self, message): 1217 def ParseError(self, message):
1075 """Creates and *returns* a ParseError for the current token.""" 1218 """Creates and *returns* a ParseError for the current token."""
1076 return ParseError('%d:%d : %s' % ( 1219 return ParseError(message, self._line + 1, self._column + 1)
1077 self._line + 1, self._column + 1, message))
1078 1220
1079 def _StringParseError(self, e): 1221 def _StringParseError(self, e):
1080 return self._ParseError('Couldn\'t parse string: ' + str(e)) 1222 return self.ParseError('Couldn\'t parse string: ' + str(e))
1081 1223
1082 def NextToken(self): 1224 def NextToken(self):
1083 """Reads the next meaningful token.""" 1225 """Reads the next meaningful token."""
1084 self._previous_line = self._line 1226 self._previous_line = self._line
1085 self._previous_column = self._column 1227 self._previous_column = self._column
1086 1228
1087 self._column += len(self.token) 1229 self._column += len(self.token)
1088 self._SkipWhitespace() 1230 self._SkipWhitespace()
1089 1231
1090 if not self._more_lines: 1232 if not self._more_lines:
1091 self.token = '' 1233 self.token = ''
1092 return 1234 return
1093 1235
1094 match = self._TOKEN.match(self._current_line, self._column) 1236 match = self._TOKEN.match(self._current_line, self._column)
1237 if not match and not self._skip_comments:
1238 match = self._COMMENT.match(self._current_line, self._column)
1095 if match: 1239 if match:
1096 token = match.group(0) 1240 token = match.group(0)
1097 self.token = token 1241 self.token = token
1098 else: 1242 else:
1099 self.token = self._current_line[self._column] 1243 self.token = self._current_line[self._column]
1100 1244
1245 # Aliased so it can still be accessed by current visibility violators.
1246 # TODO(dbarnett): Migrate violators to textformat_tokenizer.
1247 _Tokenizer = Tokenizer # pylint: disable=invalid-name
1248
1249
1250 def _ConsumeInt32(tokenizer):
1251 """Consumes a signed 32bit integer number from tokenizer.
1252
1253 Args:
1254 tokenizer: A tokenizer used to parse the number.
1255
1256 Returns:
1257 The integer parsed.
1258
1259 Raises:
1260 ParseError: If a signed 32bit integer couldn't be consumed.
1261 """
1262 return _ConsumeInteger(tokenizer, is_signed=True, is_long=False)
1263
1264
1265 def _ConsumeUint32(tokenizer):
1266 """Consumes an unsigned 32bit integer number from tokenizer.
1267
1268 Args:
1269 tokenizer: A tokenizer used to parse the number.
1270
1271 Returns:
1272 The integer parsed.
1273
1274 Raises:
1275 ParseError: If an unsigned 32bit integer couldn't be consumed.
1276 """
1277 return _ConsumeInteger(tokenizer, is_signed=False, is_long=False)
1278
1279
1280 def _TryConsumeInt64(tokenizer):
1281 try:
1282 _ConsumeInt64(tokenizer)
1283 return True
1284 except ParseError:
1285 return False
1286
1287
1288 def _ConsumeInt64(tokenizer):
1289 """Consumes a signed 32bit integer number from tokenizer.
1290
1291 Args:
1292 tokenizer: A tokenizer used to parse the number.
1293
1294 Returns:
1295 The integer parsed.
1296
1297 Raises:
1298 ParseError: If a signed 32bit integer couldn't be consumed.
1299 """
1300 return _ConsumeInteger(tokenizer, is_signed=True, is_long=True)
1301
1302
1303 def _TryConsumeUint64(tokenizer):
1304 try:
1305 _ConsumeUint64(tokenizer)
1306 return True
1307 except ParseError:
1308 return False
1309
1310
1311 def _ConsumeUint64(tokenizer):
1312 """Consumes an unsigned 64bit integer number from tokenizer.
1313
1314 Args:
1315 tokenizer: A tokenizer used to parse the number.
1316
1317 Returns:
1318 The integer parsed.
1319
1320 Raises:
1321 ParseError: If an unsigned 64bit integer couldn't be consumed.
1322 """
1323 return _ConsumeInteger(tokenizer, is_signed=False, is_long=True)
1324
1325
1326 def _TryConsumeInteger(tokenizer, is_signed=False, is_long=False):
1327 try:
1328 _ConsumeInteger(tokenizer, is_signed=is_signed, is_long=is_long)
1329 return True
1330 except ParseError:
1331 return False
1332
1333
1334 def _ConsumeInteger(tokenizer, is_signed=False, is_long=False):
1335 """Consumes an integer number from tokenizer.
1336
1337 Args:
1338 tokenizer: A tokenizer used to parse the number.
1339 is_signed: True if a signed integer must be parsed.
1340 is_long: True if a long integer must be parsed.
1341
1342 Returns:
1343 The integer parsed.
1344
1345 Raises:
1346 ParseError: If an integer with given characteristics couldn't be consumed.
1347 """
1348 try:
1349 result = ParseInteger(tokenizer.token, is_signed=is_signed, is_long=is_long)
1350 except ValueError as e:
1351 raise tokenizer.ParseError(str(e))
1352 tokenizer.NextToken()
1353 return result
1354
1101 1355
1102 def ParseInteger(text, is_signed=False, is_long=False): 1356 def ParseInteger(text, is_signed=False, is_long=False):
1103 """Parses an integer. 1357 """Parses an integer.
1104 1358
1105 Args: 1359 Args:
1106 text: The text to parse. 1360 text: The text to parse.
1107 is_signed: True if a signed integer must be parsed. 1361 is_signed: True if a signed integer must be parsed.
1108 is_long: True if a long integer must be parsed. 1362 is_long: True if a long integer must be parsed.
1109 1363
1110 Returns: 1364 Returns:
1111 The integer value. 1365 The integer value.
1112 1366
1113 Raises: 1367 Raises:
1114 ValueError: Thrown Iff the text is not a valid integer. 1368 ValueError: Thrown Iff the text is not a valid integer.
1115 """ 1369 """
1116 # Do the actual parsing. Exception handling is propagated to caller. 1370 # Do the actual parsing. Exception handling is propagated to caller.
1371 result = _ParseAbstractInteger(text, is_long=is_long)
1372
1373 # Check if the integer is sane. Exceptions handled by callers.
1374 checker = _INTEGER_CHECKERS[2 * int(is_long) + int(is_signed)]
1375 checker.CheckValue(result)
1376 return result
1377
1378
1379 def _ParseAbstractInteger(text, is_long=False):
1380 """Parses an integer without checking size/signedness.
1381
1382 Args:
1383 text: The text to parse.
1384 is_long: True if the value should be returned as a long integer.
1385
1386 Returns:
1387 The integer value.
1388
1389 Raises:
1390 ValueError: Thrown Iff the text is not a valid integer.
1391 """
1392 # Do the actual parsing. Exception handling is propagated to caller.
1117 try: 1393 try:
1118 # We force 32-bit values to int and 64-bit values to long to make 1394 # We force 32-bit values to int and 64-bit values to long to make
1119 # alternate implementations where the distinction is more significant 1395 # alternate implementations where the distinction is more significant
1120 # (e.g. the C++ implementation) simpler. 1396 # (e.g. the C++ implementation) simpler.
1121 if is_long: 1397 if is_long:
1122 result = long(text, 0) 1398 return long(text, 0)
1123 else: 1399 else:
1124 result = int(text, 0) 1400 return int(text, 0)
1125 except ValueError: 1401 except ValueError:
1126 raise ValueError('Couldn\'t parse integer: %s' % text) 1402 raise ValueError('Couldn\'t parse integer: %s' % text)
1127 1403
1128 # Check if the integer is sane. Exceptions handled by callers.
1129 checker = _INTEGER_CHECKERS[2 * int(is_long) + int(is_signed)]
1130 checker.CheckValue(result)
1131 return result
1132
1133 1404
1134 def ParseFloat(text): 1405 def ParseFloat(text):
1135 """Parse a floating point number. 1406 """Parse a floating point number.
1136 1407
1137 Args: 1408 Args:
1138 text: Text to parse. 1409 text: Text to parse.
1139 1410
1140 Returns: 1411 Returns:
1141 The number parsed. 1412 The number parsed.
1142 1413
(...skipping 25 matching lines...) Expand all
1168 1439
1169 Args: 1440 Args:
1170 text: Text to parse. 1441 text: Text to parse.
1171 1442
1172 Returns: 1443 Returns:
1173 Boolean values parsed 1444 Boolean values parsed
1174 1445
1175 Raises: 1446 Raises:
1176 ValueError: If text is not a valid boolean. 1447 ValueError: If text is not a valid boolean.
1177 """ 1448 """
1178 if text in ('true', 't', '1'): 1449 if text in ('true', 't', '1', 'True'):
1179 return True 1450 return True
1180 elif text in ('false', 'f', '0'): 1451 elif text in ('false', 'f', '0', 'False'):
1181 return False 1452 return False
1182 else: 1453 else:
1183 raise ValueError('Expected "true" or "false".') 1454 raise ValueError('Expected "true" or "false".')
1184 1455
1185 1456
1186 def ParseEnum(field, value): 1457 def ParseEnum(field, value):
1187 """Parse an enum value. 1458 """Parse an enum value.
1188 1459
1189 The value can be specified by a number (the enum value), or by 1460 The value can be specified by a number (the enum value), or by
1190 a string literal (the enum name). 1461 a string literal (the enum name).
1191 1462
1192 Args: 1463 Args:
1193 field: Enum field descriptor. 1464 field: Enum field descriptor.
1194 value: String value. 1465 value: String value.
1195 1466
1196 Returns: 1467 Returns:
1197 Enum value number. 1468 Enum value number.
1198 1469
1199 Raises: 1470 Raises:
1200 ValueError: If the enum value could not be parsed. 1471 ValueError: If the enum value could not be parsed.
1201 """ 1472 """
1202 enum_descriptor = field.enum_type 1473 enum_descriptor = field.enum_type
1203 try: 1474 try:
1204 number = int(value, 0) 1475 number = int(value, 0)
1205 except ValueError: 1476 except ValueError:
1206 # Identifier. 1477 # Identifier.
1207 enum_value = enum_descriptor.values_by_name.get(value, None) 1478 enum_value = enum_descriptor.values_by_name.get(value, None)
1208 if enum_value is None: 1479 if enum_value is None:
1209 raise ValueError( 1480 raise ValueError('Enum type "%s" has no value named %s.' %
1210 'Enum type "%s" has no value named %s.' % ( 1481 (enum_descriptor.full_name, value))
1211 enum_descriptor.full_name, value))
1212 else: 1482 else:
1213 # Numeric value. 1483 # Numeric value.
1214 enum_value = enum_descriptor.values_by_number.get(number, None) 1484 enum_value = enum_descriptor.values_by_number.get(number, None)
1215 if enum_value is None: 1485 if enum_value is None:
1216 raise ValueError( 1486 raise ValueError('Enum type "%s" has no value with number %d.' %
1217 'Enum type "%s" has no value with number %d.' % ( 1487 (enum_descriptor.full_name, number))
1218 enum_descriptor.full_name, number))
1219 return enum_value.number 1488 return enum_value.number
OLDNEW
« no previous file with comments | « third_party/protobuf/python/google/protobuf/symbol_database.py ('k') | third_party/protobuf/python/setup.cfg » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698