Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(121)

Side by Side Diff: recipe_engine/third_party/google/protobuf/text_format.py

Issue 2236673002: Bump vendoring, move to proto3 release. (Closed) Base URL: https://github.com/luci/recipes-py@master
Patch Set: Created 4 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 # Protocol Buffers - Google's data interchange format 1 # Protocol Buffers - Google's data interchange format
2 # Copyright 2008 Google Inc. All rights reserved. 2 # Copyright 2008 Google Inc. All rights reserved.
3 # https://developers.google.com/protocol-buffers/ 3 # https://developers.google.com/protocol-buffers/
4 # 4 #
5 # Redistribution and use in source and binary forms, with or without 5 # Redistribution and use in source and binary forms, with or without
6 # modification, are permitted provided that the following conditions are 6 # modification, are permitted provided that the following conditions are
7 # met: 7 # met:
8 # 8 #
9 # * Redistributions of source code must retain the above copyright 9 # * Redistributions of source code must retain the above copyright
10 # notice, this list of conditions and the following disclaimer. 10 # notice, this list of conditions and the following disclaimer.
(...skipping 30 matching lines...) Expand all
41 """ 41 """
42 42
43 __author__ = 'kenton@google.com (Kenton Varda)' 43 __author__ = 'kenton@google.com (Kenton Varda)'
44 44
45 import io 45 import io
46 import re 46 import re
47 47
48 import six 48 import six
49 49
50 if six.PY3: 50 if six.PY3:
51 long = int 51 long = int # pylint: disable=redefined-builtin,invalid-name
52 52
53 # pylint: disable=g-import-not-at-top
53 from google.protobuf.internal import type_checkers 54 from google.protobuf.internal import type_checkers
54 from google.protobuf import descriptor 55 from google.protobuf import descriptor
55 from google.protobuf import text_encoding 56 from google.protobuf import text_encoding
56 57
57 __all__ = ['MessageToString', 'PrintMessage', 'PrintField', 58 __all__ = ['MessageToString', 'PrintMessage', 'PrintField', 'PrintFieldValue',
58 'PrintFieldValue', 'Merge'] 59 'Merge']
59
60 60
61 _INTEGER_CHECKERS = (type_checkers.Uint32ValueChecker(), 61 _INTEGER_CHECKERS = (type_checkers.Uint32ValueChecker(),
62 type_checkers.Int32ValueChecker(), 62 type_checkers.Int32ValueChecker(),
63 type_checkers.Uint64ValueChecker(), 63 type_checkers.Uint64ValueChecker(),
64 type_checkers.Int64ValueChecker()) 64 type_checkers.Int64ValueChecker())
65 _FLOAT_INFINITY = re.compile('-?inf(?:inity)?f?', re.IGNORECASE) 65 _FLOAT_INFINITY = re.compile('-?inf(?:inity)?f?', re.IGNORECASE)
66 _FLOAT_NAN = re.compile('nanf?', re.IGNORECASE) 66 _FLOAT_NAN = re.compile('nanf?', re.IGNORECASE)
67 _FLOAT_TYPES = frozenset([descriptor.FieldDescriptor.CPPTYPE_FLOAT, 67 _FLOAT_TYPES = frozenset([descriptor.FieldDescriptor.CPPTYPE_FLOAT,
68 descriptor.FieldDescriptor.CPPTYPE_DOUBLE]) 68 descriptor.FieldDescriptor.CPPTYPE_DOUBLE])
69 _QUOTES = frozenset(("'", '"')) 69 _QUOTES = frozenset(("'", '"'))
70 _ANY_FULL_TYPE_NAME = 'google.protobuf.Any'
70 71
71 72
72 class Error(Exception): 73 class Error(Exception):
73 """Top-level module error for text_format.""" 74 """Top-level module error for text_format."""
74 75
75 76
76 class ParseError(Error): 77 class ParseError(Error):
77 """Thrown in case of text parsing error.""" 78 """Thrown in case of text parsing or tokenizing error."""
79
80 def __init__(self, message=None, line=None, column=None):
81 if message is not None and line is not None:
82 loc = str(line)
83 if column is not None:
84 loc += ':{0}'.format(column)
85 message = '{0} : {1}'.format(loc, message)
86 if message is not None:
87 super(ParseError, self).__init__(message)
88 else:
89 super(ParseError, self).__init__()
90 self._line = line
91 self._column = column
92
93 def GetLine(self):
94 return self._line
95
96 def GetColumn(self):
97 return self._column
78 98
79 99
80 class TextWriter(object): 100 class TextWriter(object):
101
81 def __init__(self, as_utf8): 102 def __init__(self, as_utf8):
82 if six.PY2: 103 if six.PY2:
83 self._writer = io.BytesIO() 104 self._writer = io.BytesIO()
84 else: 105 else:
85 self._writer = io.StringIO() 106 self._writer = io.StringIO()
86 107
87 def write(self, val): 108 def write(self, val):
88 if six.PY2: 109 if six.PY2:
89 if isinstance(val, six.text_type): 110 if isinstance(val, six.text_type):
90 val = val.encode('utf-8') 111 val = val.encode('utf-8')
91 return self._writer.write(val) 112 return self._writer.write(val)
92 113
93 def close(self): 114 def close(self):
94 return self._writer.close() 115 return self._writer.close()
95 116
96 def getvalue(self): 117 def getvalue(self):
97 return self._writer.getvalue() 118 return self._writer.getvalue()
98 119
99 120
100 def MessageToString(message, as_utf8=False, as_one_line=False, 121 def MessageToString(message,
101 pointy_brackets=False, use_index_order=False, 122 as_utf8=False,
102 float_format=None, use_field_number=False): 123 as_one_line=False,
124 pointy_brackets=False,
125 use_index_order=False,
126 float_format=None,
127 use_field_number=False,
128 descriptor_pool=None,
129 indent=0):
103 """Convert protobuf message to text format. 130 """Convert protobuf message to text format.
104 131
105 Floating point values can be formatted compactly with 15 digits of 132 Floating point values can be formatted compactly with 15 digits of
106 precision (which is the most that IEEE 754 "double" can guarantee) 133 precision (which is the most that IEEE 754 "double" can guarantee)
107 using float_format='.15g'. To ensure that converting to text and back to a 134 using float_format='.15g'. To ensure that converting to text and back to a
108 proto will result in an identical value, float_format='.17g' should be used. 135 proto will result in an identical value, float_format='.17g' should be used.
109 136
110 Args: 137 Args:
111 message: The protocol buffers message. 138 message: The protocol buffers message.
112 as_utf8: Produce text output in UTF8 format. 139 as_utf8: Produce text output in UTF8 format.
113 as_one_line: Don't introduce newlines between fields. 140 as_one_line: Don't introduce newlines between fields.
114 pointy_brackets: If True, use angle brackets instead of curly braces for 141 pointy_brackets: If True, use angle brackets instead of curly braces for
115 nesting. 142 nesting.
116 use_index_order: If True, print fields of a proto message using the order 143 use_index_order: If True, print fields of a proto message using the order
117 defined in source code instead of the field number. By default, use the 144 defined in source code instead of the field number. By default, use the
118 field number order. 145 field number order.
119 float_format: If set, use this to specify floating point number formatting 146 float_format: If set, use this to specify floating point number formatting
120 (per the "Format Specification Mini-Language"); otherwise, str() is used. 147 (per the "Format Specification Mini-Language"); otherwise, str() is used.
121 use_field_number: If True, print field numbers instead of names. 148 use_field_number: If True, print field numbers instead of names.
149 descriptor_pool: A DescriptorPool used to resolve Any types.
150 indent: The indent level, in terms of spaces, for pretty print.
122 151
123 Returns: 152 Returns:
124 A string of the text formatted protocol buffer message. 153 A string of the text formatted protocol buffer message.
125 """ 154 """
126 out = TextWriter(as_utf8) 155 out = TextWriter(as_utf8)
127 printer = _Printer(out, 0, as_utf8, as_one_line, 156 printer = _Printer(out, indent, as_utf8, as_one_line, pointy_brackets,
128 pointy_brackets, use_index_order, float_format, 157 use_index_order, float_format, use_field_number,
129 use_field_number) 158 descriptor_pool)
130 printer.PrintMessage(message) 159 printer.PrintMessage(message)
131 result = out.getvalue() 160 result = out.getvalue()
132 out.close() 161 out.close()
133 if as_one_line: 162 if as_one_line:
134 return result.rstrip() 163 return result.rstrip()
135 return result 164 return result
136 165
137 166
138 def _IsMapEntry(field): 167 def _IsMapEntry(field):
139 return (field.type == descriptor.FieldDescriptor.TYPE_MESSAGE and 168 return (field.type == descriptor.FieldDescriptor.TYPE_MESSAGE and
140 field.message_type.has_options and 169 field.message_type.has_options and
141 field.message_type.GetOptions().map_entry) 170 field.message_type.GetOptions().map_entry)
142 171
143 172
144 def PrintMessage(message, out, indent=0, as_utf8=False, as_one_line=False, 173 def PrintMessage(message,
145 pointy_brackets=False, use_index_order=False, 174 out,
146 float_format=None, use_field_number=False): 175 indent=0,
147 printer = _Printer(out, indent, as_utf8, as_one_line, 176 as_utf8=False,
148 pointy_brackets, use_index_order, float_format, 177 as_one_line=False,
149 use_field_number) 178 pointy_brackets=False,
179 use_index_order=False,
180 float_format=None,
181 use_field_number=False,
182 descriptor_pool=None):
183 printer = _Printer(out, indent, as_utf8, as_one_line, pointy_brackets,
184 use_index_order, float_format, use_field_number,
185 descriptor_pool)
150 printer.PrintMessage(message) 186 printer.PrintMessage(message)
151 187
152 188
153 def PrintField(field, value, out, indent=0, as_utf8=False, as_one_line=False, 189 def PrintField(field,
154 pointy_brackets=False, use_index_order=False, float_format=None): 190 value,
191 out,
192 indent=0,
193 as_utf8=False,
194 as_one_line=False,
195 pointy_brackets=False,
196 use_index_order=False,
197 float_format=None):
155 """Print a single field name/value pair.""" 198 """Print a single field name/value pair."""
156 printer = _Printer(out, indent, as_utf8, as_one_line, 199 printer = _Printer(out, indent, as_utf8, as_one_line, pointy_brackets,
157 pointy_brackets, use_index_order, float_format) 200 use_index_order, float_format)
158 printer.PrintField(field, value) 201 printer.PrintField(field, value)
159 202
160 203
161 def PrintFieldValue(field, value, out, indent=0, as_utf8=False, 204 def PrintFieldValue(field,
162 as_one_line=False, pointy_brackets=False, 205 value,
206 out,
207 indent=0,
208 as_utf8=False,
209 as_one_line=False,
210 pointy_brackets=False,
163 use_index_order=False, 211 use_index_order=False,
164 float_format=None): 212 float_format=None):
165 """Print a single field value (not including name).""" 213 """Print a single field value (not including name)."""
166 printer = _Printer(out, indent, as_utf8, as_one_line, 214 printer = _Printer(out, indent, as_utf8, as_one_line, pointy_brackets,
167 pointy_brackets, use_index_order, float_format) 215 use_index_order, float_format)
168 printer.PrintFieldValue(field, value) 216 printer.PrintFieldValue(field, value)
169 217
170 218
219 def _BuildMessageFromTypeName(type_name, descriptor_pool):
220 """Returns a protobuf message instance.
221
222 Args:
223 type_name: Fully-qualified protobuf message type name string.
224 descriptor_pool: DescriptorPool instance.
225
226 Returns:
227 A Message instance of type matching type_name, or None if the a Descriptor
228 wasn't found matching type_name.
229 """
230 # pylint: disable=g-import-not-at-top
231 from google.protobuf import message_factory
232 factory = message_factory.MessageFactory(descriptor_pool)
233 try:
234 message_descriptor = descriptor_pool.FindMessageTypeByName(type_name)
235 except KeyError:
236 return None
237 message_type = factory.GetPrototype(message_descriptor)
238 return message_type()
239
240
171 class _Printer(object): 241 class _Printer(object):
172 """Text format printer for protocol message.""" 242 """Text format printer for protocol message."""
173 243
174 def __init__(self, out, indent=0, as_utf8=False, as_one_line=False, 244 def __init__(self,
175 pointy_brackets=False, use_index_order=False, float_format=None, 245 out,
176 use_field_number=False): 246 indent=0,
247 as_utf8=False,
248 as_one_line=False,
249 pointy_brackets=False,
250 use_index_order=False,
251 float_format=None,
252 use_field_number=False,
253 descriptor_pool=None):
177 """Initialize the Printer. 254 """Initialize the Printer.
178 255
179 Floating point values can be formatted compactly with 15 digits of 256 Floating point values can be formatted compactly with 15 digits of
180 precision (which is the most that IEEE 754 "double" can guarantee) 257 precision (which is the most that IEEE 754 "double" can guarantee)
181 using float_format='.15g'. To ensure that converting to text and back to a 258 using float_format='.15g'. To ensure that converting to text and back to a
182 proto will result in an identical value, float_format='.17g' should be used. 259 proto will result in an identical value, float_format='.17g' should be used.
183 260
184 Args: 261 Args:
185 out: To record the text format result. 262 out: To record the text format result.
186 indent: The indent level for pretty print. 263 indent: The indent level for pretty print.
187 as_utf8: Produce text output in UTF8 format. 264 as_utf8: Produce text output in UTF8 format.
188 as_one_line: Don't introduce newlines between fields. 265 as_one_line: Don't introduce newlines between fields.
189 pointy_brackets: If True, use angle brackets instead of curly braces for 266 pointy_brackets: If True, use angle brackets instead of curly braces for
190 nesting. 267 nesting.
191 use_index_order: If True, print fields of a proto message using the order 268 use_index_order: If True, print fields of a proto message using the order
192 defined in source code instead of the field number. By default, use the 269 defined in source code instead of the field number. By default, use the
193 field number order. 270 field number order.
194 float_format: If set, use this to specify floating point number formatting 271 float_format: If set, use this to specify floating point number formatting
195 (per the "Format Specification Mini-Language"); otherwise, str() is 272 (per the "Format Specification Mini-Language"); otherwise, str() is
196 used. 273 used.
197 use_field_number: If True, print field numbers instead of names. 274 use_field_number: If True, print field numbers instead of names.
275 descriptor_pool: A DescriptorPool used to resolve Any types.
198 """ 276 """
199 self.out = out 277 self.out = out
200 self.indent = indent 278 self.indent = indent
201 self.as_utf8 = as_utf8 279 self.as_utf8 = as_utf8
202 self.as_one_line = as_one_line 280 self.as_one_line = as_one_line
203 self.pointy_brackets = pointy_brackets 281 self.pointy_brackets = pointy_brackets
204 self.use_index_order = use_index_order 282 self.use_index_order = use_index_order
205 self.float_format = float_format 283 self.float_format = float_format
206 self.use_field_number = use_field_number 284 self.use_field_number = use_field_number
285 self.descriptor_pool = descriptor_pool
286
287 def _TryPrintAsAnyMessage(self, message):
288 """Serializes if message is a google.protobuf.Any field."""
289 packed_message = _BuildMessageFromTypeName(message.TypeName(),
290 self.descriptor_pool)
291 if packed_message:
292 packed_message.MergeFromString(message.value)
293 self.out.write('%s[%s]' % (self.indent * ' ', message.type_url))
294 self._PrintMessageFieldValue(packed_message)
295 self.out.write(' ' if self.as_one_line else '\n')
296 return True
297 else:
298 return False
207 299
208 def PrintMessage(self, message): 300 def PrintMessage(self, message):
209 """Convert protobuf message to text format. 301 """Convert protobuf message to text format.
210 302
211 Args: 303 Args:
212 message: The protocol buffers message. 304 message: The protocol buffers message.
213 """ 305 """
306 if (message.DESCRIPTOR.full_name == _ANY_FULL_TYPE_NAME and
307 self.descriptor_pool and self._TryPrintAsAnyMessage(message)):
308 return
214 fields = message.ListFields() 309 fields = message.ListFields()
215 if self.use_index_order: 310 if self.use_index_order:
216 fields.sort(key=lambda x: x[0].index) 311 fields.sort(key=lambda x: x[0].index)
217 for field, value in fields: 312 for field, value in fields:
218 if _IsMapEntry(field): 313 if _IsMapEntry(field):
219 for key in sorted(value): 314 for key in sorted(value):
220 # This is slow for maps with submessage entires because it copies the 315 # This is slow for maps with submessage entires because it copies the
221 # entire tree. Unfortunately this would take significant refactoring 316 # entire tree. Unfortunately this would take significant refactoring
222 # of this file to work around. 317 # of this file to work around.
223 # 318 #
224 # TODO(haberman): refactor and optimize if this becomes an issue. 319 # TODO(haberman): refactor and optimize if this becomes an issue.
225 entry_submsg = field.message_type._concrete_class( 320 entry_submsg = field.message_type._concrete_class(key=key,
226 key=key, value=value[key]) 321 value=value[key])
227 self.PrintField(field, entry_submsg) 322 self.PrintField(field, entry_submsg)
228 elif field.label == descriptor.FieldDescriptor.LABEL_REPEATED: 323 elif field.label == descriptor.FieldDescriptor.LABEL_REPEATED:
229 for element in value: 324 for element in value:
230 self.PrintField(field, element) 325 self.PrintField(field, element)
231 else: 326 else:
232 self.PrintField(field, value) 327 self.PrintField(field, value)
233 328
234 def PrintField(self, field, value): 329 def PrintField(self, field, value):
235 """Print a single field name/value pair.""" 330 """Print a single field name/value pair."""
236 out = self.out 331 out = self.out
(...skipping 20 matching lines...) Expand all
257 # The colon is optional in this case, but our cross-language golden files 352 # The colon is optional in this case, but our cross-language golden files
258 # don't include it. 353 # don't include it.
259 out.write(': ') 354 out.write(': ')
260 355
261 self.PrintFieldValue(field, value) 356 self.PrintFieldValue(field, value)
262 if self.as_one_line: 357 if self.as_one_line:
263 out.write(' ') 358 out.write(' ')
264 else: 359 else:
265 out.write('\n') 360 out.write('\n')
266 361
362 def _PrintMessageFieldValue(self, value):
363 if self.pointy_brackets:
364 openb = '<'
365 closeb = '>'
366 else:
367 openb = '{'
368 closeb = '}'
369
370 if self.as_one_line:
371 self.out.write(' %s ' % openb)
372 self.PrintMessage(value)
373 self.out.write(closeb)
374 else:
375 self.out.write(' %s\n' % openb)
376 self.indent += 2
377 self.PrintMessage(value)
378 self.indent -= 2
379 self.out.write(' ' * self.indent + closeb)
380
267 def PrintFieldValue(self, field, value): 381 def PrintFieldValue(self, field, value):
268 """Print a single field value (not including name). 382 """Print a single field value (not including name).
269 383
270 For repeated fields, the value should be a single element. 384 For repeated fields, the value should be a single element.
271 385
272 Args: 386 Args:
273 field: The descriptor of the field to be printed. 387 field: The descriptor of the field to be printed.
274 value: The value of the field. 388 value: The value of the field.
275 """ 389 """
276 out = self.out 390 out = self.out
277 if self.pointy_brackets:
278 openb = '<'
279 closeb = '>'
280 else:
281 openb = '{'
282 closeb = '}'
283
284 if field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_MESSAGE: 391 if field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_MESSAGE:
285 if self.as_one_line: 392 self._PrintMessageFieldValue(value)
286 out.write(' %s ' % openb)
287 self.PrintMessage(value)
288 out.write(closeb)
289 else:
290 out.write(' %s\n' % openb)
291 self.indent += 2
292 self.PrintMessage(value)
293 self.indent -= 2
294 out.write(' ' * self.indent + closeb)
295 elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_ENUM: 393 elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_ENUM:
296 enum_value = field.enum_type.values_by_number.get(value, None) 394 enum_value = field.enum_type.values_by_number.get(value, None)
297 if enum_value is not None: 395 if enum_value is not None:
298 out.write(enum_value.name) 396 out.write(enum_value.name)
299 else: 397 else:
300 out.write(str(value)) 398 out.write(str(value))
301 elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_STRING: 399 elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_STRING:
302 out.write('\"') 400 out.write('\"')
303 if isinstance(value, six.text_type): 401 if isinstance(value, six.text_type):
304 out_value = value.encode('utf-8') 402 out_value = value.encode('utf-8')
(...skipping 10 matching lines...) Expand all
315 if value: 413 if value:
316 out.write('true') 414 out.write('true')
317 else: 415 else:
318 out.write('false') 416 out.write('false')
319 elif field.cpp_type in _FLOAT_TYPES and self.float_format is not None: 417 elif field.cpp_type in _FLOAT_TYPES and self.float_format is not None:
320 out.write('{1:{0}}'.format(self.float_format, value)) 418 out.write('{1:{0}}'.format(self.float_format, value))
321 else: 419 else:
322 out.write(str(value)) 420 out.write(str(value))
323 421
324 422
325 def Parse(text, message, 423 def Parse(text,
326 allow_unknown_extension=False, allow_field_number=False): 424 message,
327 """Parses an text representation of a protocol message into a message. 425 allow_unknown_extension=False,
426 allow_field_number=False):
427 """Parses a text representation of a protocol message into a message.
328 428
329 Args: 429 Args:
330 text: Message text representation. 430 text: Message text representation.
331 message: A protocol buffer message to merge into. 431 message: A protocol buffer message to merge into.
332 allow_unknown_extension: if True, skip over missing extensions and keep 432 allow_unknown_extension: if True, skip over missing extensions and keep
333 parsing 433 parsing
334 allow_field_number: if True, both field number and field name are allowed. 434 allow_field_number: if True, both field number and field name are allowed.
335 435
336 Returns: 436 Returns:
337 The same message passed as argument. 437 The same message passed as argument.
338 438
339 Raises: 439 Raises:
340 ParseError: On text parsing problems. 440 ParseError: On text parsing problems.
341 """ 441 """
342 if not isinstance(text, str): 442 if not isinstance(text, str):
343 text = text.decode('utf-8') 443 text = text.decode('utf-8')
344 return ParseLines(text.split('\n'), message, allow_unknown_extension, 444 return ParseLines(
345 allow_field_number) 445 text.split('\n'), message, allow_unknown_extension, allow_field_number)
346 446
347 447
348 def Merge(text, message, allow_unknown_extension=False, 448 def Merge(text,
349 allow_field_number=False): 449 message,
350 """Parses an text representation of a protocol message into a message. 450 allow_unknown_extension=False,
451 allow_field_number=False,
452 descriptor_pool=None):
453 """Parses a text representation of a protocol message into a message.
351 454
352 Like Parse(), but allows repeated values for a non-repeated field, and uses 455 Like Parse(), but allows repeated values for a non-repeated field, and uses
353 the last one. 456 the last one.
354 457
355 Args: 458 Args:
356 text: Message text representation. 459 text: Message text representation.
357 message: A protocol buffer message to merge into. 460 message: A protocol buffer message to merge into.
358 allow_unknown_extension: if True, skip over missing extensions and keep 461 allow_unknown_extension: if True, skip over missing extensions and keep
359 parsing 462 parsing
360 allow_field_number: if True, both field number and field name are allowed. 463 allow_field_number: if True, both field number and field name are allowed.
464 descriptor_pool: A DescriptorPool used to resolve Any types.
361 465
362 Returns: 466 Returns:
363 The same message passed as argument. 467 The same message passed as argument.
364 468
365 Raises: 469 Raises:
366 ParseError: On text parsing problems. 470 ParseError: On text parsing problems.
367 """ 471 """
368 return MergeLines(text.split('\n'), message, allow_unknown_extension, 472 return MergeLines(
369 allow_field_number) 473 text.split('\n'),
474 message,
475 allow_unknown_extension,
476 allow_field_number,
477 descriptor_pool=descriptor_pool)
370 478
371 479
372 def ParseLines(lines, message, allow_unknown_extension=False, 480 def ParseLines(lines,
481 message,
482 allow_unknown_extension=False,
373 allow_field_number=False): 483 allow_field_number=False):
374 """Parses an text representation of a protocol message into a message. 484 """Parses a text representation of a protocol message into a message.
375 485
376 Args: 486 Args:
377 lines: An iterable of lines of a message's text representation. 487 lines: An iterable of lines of a message's text representation.
378 message: A protocol buffer message to merge into. 488 message: A protocol buffer message to merge into.
379 allow_unknown_extension: if True, skip over missing extensions and keep 489 allow_unknown_extension: if True, skip over missing extensions and keep
380 parsing 490 parsing
381 allow_field_number: if True, both field number and field name are allowed. 491 allow_field_number: if True, both field number and field name are allowed.
492 descriptor_pool: A DescriptorPool used to resolve Any types.
382 493
383 Returns: 494 Returns:
384 The same message passed as argument. 495 The same message passed as argument.
385 496
386 Raises: 497 Raises:
387 ParseError: On text parsing problems. 498 ParseError: On text parsing problems.
388 """ 499 """
389 parser = _Parser(allow_unknown_extension, allow_field_number) 500 parser = _Parser(allow_unknown_extension, allow_field_number)
390 return parser.ParseLines(lines, message) 501 return parser.ParseLines(lines, message)
391 502
392 503
393 def MergeLines(lines, message, allow_unknown_extension=False, 504 def MergeLines(lines,
394 allow_field_number=False): 505 message,
395 """Parses an text representation of a protocol message into a message. 506 allow_unknown_extension=False,
507 allow_field_number=False,
508 descriptor_pool=None):
509 """Parses a text representation of a protocol message into a message.
396 510
397 Args: 511 Args:
398 lines: An iterable of lines of a message's text representation. 512 lines: An iterable of lines of a message's text representation.
399 message: A protocol buffer message to merge into. 513 message: A protocol buffer message to merge into.
400 allow_unknown_extension: if True, skip over missing extensions and keep 514 allow_unknown_extension: if True, skip over missing extensions and keep
401 parsing 515 parsing
402 allow_field_number: if True, both field number and field name are allowed. 516 allow_field_number: if True, both field number and field name are allowed.
403 517
404 Returns: 518 Returns:
405 The same message passed as argument. 519 The same message passed as argument.
406 520
407 Raises: 521 Raises:
408 ParseError: On text parsing problems. 522 ParseError: On text parsing problems.
409 """ 523 """
410 parser = _Parser(allow_unknown_extension, allow_field_number) 524 parser = _Parser(allow_unknown_extension,
525 allow_field_number,
526 descriptor_pool=descriptor_pool)
411 return parser.MergeLines(lines, message) 527 return parser.MergeLines(lines, message)
412 528
413 529
414 class _Parser(object): 530 class _Parser(object):
415 """Text format parser for protocol message.""" 531 """Text format parser for protocol message."""
416 532
417 def __init__(self, allow_unknown_extension=False, allow_field_number=False): 533 def __init__(self,
534 allow_unknown_extension=False,
535 allow_field_number=False,
536 descriptor_pool=None):
418 self.allow_unknown_extension = allow_unknown_extension 537 self.allow_unknown_extension = allow_unknown_extension
419 self.allow_field_number = allow_field_number 538 self.allow_field_number = allow_field_number
539 self.descriptor_pool = descriptor_pool
420 540
421 def ParseFromString(self, text, message): 541 def ParseFromString(self, text, message):
422 """Parses an text representation of a protocol message into a message.""" 542 """Parses a text representation of a protocol message into a message."""
423 if not isinstance(text, str): 543 if not isinstance(text, str):
424 text = text.decode('utf-8') 544 text = text.decode('utf-8')
425 return self.ParseLines(text.split('\n'), message) 545 return self.ParseLines(text.split('\n'), message)
426 546
427 def ParseLines(self, lines, message): 547 def ParseLines(self, lines, message):
428 """Parses an text representation of a protocol message into a message.""" 548 """Parses a text representation of a protocol message into a message."""
429 self._allow_multiple_scalars = False 549 self._allow_multiple_scalars = False
430 self._ParseOrMerge(lines, message) 550 self._ParseOrMerge(lines, message)
431 return message 551 return message
432 552
433 def MergeFromString(self, text, message): 553 def MergeFromString(self, text, message):
434 """Merges an text representation of a protocol message into a message.""" 554 """Merges a text representation of a protocol message into a message."""
435 return self._MergeLines(text.split('\n'), message) 555 return self._MergeLines(text.split('\n'), message)
436 556
437 def MergeLines(self, lines, message): 557 def MergeLines(self, lines, message):
438 """Merges an text representation of a protocol message into a message.""" 558 """Merges a text representation of a protocol message into a message."""
439 self._allow_multiple_scalars = True 559 self._allow_multiple_scalars = True
440 self._ParseOrMerge(lines, message) 560 self._ParseOrMerge(lines, message)
441 return message 561 return message
442 562
443 def _ParseOrMerge(self, lines, message): 563 def _ParseOrMerge(self, lines, message):
444 """Converts an text representation of a protocol message into a message. 564 """Converts a text representation of a protocol message into a message.
445 565
446 Args: 566 Args:
447 lines: Lines of a message's text representation. 567 lines: Lines of a message's text representation.
448 message: A protocol buffer message to merge into. 568 message: A protocol buffer message to merge into.
449 569
450 Raises: 570 Raises:
451 ParseError: On text parsing problems. 571 ParseError: On text parsing problems.
452 """ 572 """
453 tokenizer = _Tokenizer(lines) 573 tokenizer = Tokenizer(lines)
454 while not tokenizer.AtEnd(): 574 while not tokenizer.AtEnd():
455 self._MergeField(tokenizer, message) 575 self._MergeField(tokenizer, message)
456 576
457 def _MergeField(self, tokenizer, message): 577 def _MergeField(self, tokenizer, message):
458 """Merges a single protocol message field into a message. 578 """Merges a single protocol message field into a message.
459 579
460 Args: 580 Args:
461 tokenizer: A tokenizer to parse the field name and values. 581 tokenizer: A tokenizer to parse the field name and values.
462 message: A protocol message to record the data. 582 message: A protocol message to record the data.
463 583
(...skipping 20 matching lines...) Expand all
484 field = message.Extensions._FindExtensionByName(name) 604 field = message.Extensions._FindExtensionByName(name)
485 # pylint: enable=protected-access 605 # pylint: enable=protected-access
486 if not field: 606 if not field:
487 if self.allow_unknown_extension: 607 if self.allow_unknown_extension:
488 field = None 608 field = None
489 else: 609 else:
490 raise tokenizer.ParseErrorPreviousToken( 610 raise tokenizer.ParseErrorPreviousToken(
491 'Extension "%s" not registered.' % name) 611 'Extension "%s" not registered.' % name)
492 elif message_descriptor != field.containing_type: 612 elif message_descriptor != field.containing_type:
493 raise tokenizer.ParseErrorPreviousToken( 613 raise tokenizer.ParseErrorPreviousToken(
494 'Extension "%s" does not extend message type "%s".' % ( 614 'Extension "%s" does not extend message type "%s".' %
495 name, message_descriptor.full_name)) 615 (name, message_descriptor.full_name))
496 616
497 tokenizer.Consume(']') 617 tokenizer.Consume(']')
498 618
499 else: 619 else:
500 name = tokenizer.ConsumeIdentifier() 620 name = tokenizer.ConsumeIdentifierOrNumber()
501 if self.allow_field_number and name.isdigit(): 621 if self.allow_field_number and name.isdigit():
502 number = ParseInteger(name, True, True) 622 number = ParseInteger(name, True, True)
503 field = message_descriptor.fields_by_number.get(number, None) 623 field = message_descriptor.fields_by_number.get(number, None)
504 if not field and message_descriptor.is_extendable: 624 if not field and message_descriptor.is_extendable:
505 field = message.Extensions._FindExtensionByNumber(number) 625 field = message.Extensions._FindExtensionByNumber(number)
506 else: 626 else:
507 field = message_descriptor.fields_by_name.get(name, None) 627 field = message_descriptor.fields_by_name.get(name, None)
508 628
509 # Group names are expected to be capitalized as they appear in the 629 # Group names are expected to be capitalized as they appear in the
510 # .proto file, which actually matches their type names, not their field 630 # .proto file, which actually matches their type names, not their field
511 # names. 631 # names.
512 if not field: 632 if not field:
513 field = message_descriptor.fields_by_name.get(name.lower(), None) 633 field = message_descriptor.fields_by_name.get(name.lower(), None)
514 if field and field.type != descriptor.FieldDescriptor.TYPE_GROUP: 634 if field and field.type != descriptor.FieldDescriptor.TYPE_GROUP:
515 field = None 635 field = None
516 636
517 if (field and field.type == descriptor.FieldDescriptor.TYPE_GROUP and 637 if (field and field.type == descriptor.FieldDescriptor.TYPE_GROUP and
518 field.message_type.name != name): 638 field.message_type.name != name):
519 field = None 639 field = None
520 640
521 if not field: 641 if not field:
522 raise tokenizer.ParseErrorPreviousToken( 642 raise tokenizer.ParseErrorPreviousToken(
523 'Message type "%s" has no field named "%s".' % ( 643 'Message type "%s" has no field named "%s".' %
524 message_descriptor.full_name, name)) 644 (message_descriptor.full_name, name))
525 645
526 if field: 646 if field:
527 if not self._allow_multiple_scalars and field.containing_oneof: 647 if not self._allow_multiple_scalars and field.containing_oneof:
528 # Check if there's a different field set in this oneof. 648 # Check if there's a different field set in this oneof.
529 # Note that we ignore the case if the same field was set before, and we 649 # Note that we ignore the case if the same field was set before, and we
530 # apply _allow_multiple_scalars to non-scalar fields as well. 650 # apply _allow_multiple_scalars to non-scalar fields as well.
531 which_oneof = message.WhichOneof(field.containing_oneof.name) 651 which_oneof = message.WhichOneof(field.containing_oneof.name)
532 if which_oneof is not None and which_oneof != field.name: 652 if which_oneof is not None and which_oneof != field.name:
533 raise tokenizer.ParseErrorPreviousToken( 653 raise tokenizer.ParseErrorPreviousToken(
534 'Field "%s" is specified along with field "%s", another member ' 654 'Field "%s" is specified along with field "%s", another member '
535 'of oneof "%s" for message type "%s".' % ( 655 'of oneof "%s" for message type "%s".' %
536 field.name, which_oneof, field.containing_oneof.name, 656 (field.name, which_oneof, field.containing_oneof.name,
537 message_descriptor.full_name)) 657 message_descriptor.full_name))
538 658
539 if field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_MESSAGE: 659 if field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_MESSAGE:
540 tokenizer.TryConsume(':') 660 tokenizer.TryConsume(':')
541 merger = self._MergeMessageField 661 merger = self._MergeMessageField
542 else: 662 else:
543 tokenizer.Consume(':') 663 tokenizer.Consume(':')
544 merger = self._MergeScalarField 664 merger = self._MergeScalarField
545 665
546 if (field.label == descriptor.FieldDescriptor.LABEL_REPEATED 666 if (field.label == descriptor.FieldDescriptor.LABEL_REPEATED and
547 and tokenizer.TryConsume('[')): 667 tokenizer.TryConsume('[')):
548 # Short repeated format, e.g. "foo: [1, 2, 3]" 668 # Short repeated format, e.g. "foo: [1, 2, 3]"
549 while True: 669 while True:
550 merger(tokenizer, message, field) 670 merger(tokenizer, message, field)
551 if tokenizer.TryConsume(']'): break 671 if tokenizer.TryConsume(']'):
672 break
552 tokenizer.Consume(',') 673 tokenizer.Consume(',')
553 674
554 else: 675 else:
555 merger(tokenizer, message, field) 676 merger(tokenizer, message, field)
556 677
557 else: # Proto field is unknown. 678 else: # Proto field is unknown.
558 assert self.allow_unknown_extension 679 assert self.allow_unknown_extension
559 _SkipFieldContents(tokenizer) 680 _SkipFieldContents(tokenizer)
560 681
561 # For historical reasons, fields may optionally be separated by commas or 682 # For historical reasons, fields may optionally be separated by commas or
562 # semicolons. 683 # semicolons.
563 if not tokenizer.TryConsume(','): 684 if not tokenizer.TryConsume(','):
564 tokenizer.TryConsume(';') 685 tokenizer.TryConsume(';')
565 686
687 def _ConsumeAnyTypeUrl(self, tokenizer):
688 """Consumes a google.protobuf.Any type URL and returns the type name."""
689 # Consume "type.googleapis.com/".
690 tokenizer.ConsumeIdentifier()
691 tokenizer.Consume('.')
692 tokenizer.ConsumeIdentifier()
693 tokenizer.Consume('.')
694 tokenizer.ConsumeIdentifier()
695 tokenizer.Consume('/')
696 # Consume the fully-qualified type name.
697 name = [tokenizer.ConsumeIdentifier()]
698 while tokenizer.TryConsume('.'):
699 name.append(tokenizer.ConsumeIdentifier())
700 return '.'.join(name)
701
566 def _MergeMessageField(self, tokenizer, message, field): 702 def _MergeMessageField(self, tokenizer, message, field):
567 """Merges a single scalar field into a message. 703 """Merges a single scalar field into a message.
568 704
569 Args: 705 Args:
570 tokenizer: A tokenizer to parse the field value. 706 tokenizer: A tokenizer to parse the field value.
571 message: The message of which field is a member. 707 message: The message of which field is a member.
572 field: The descriptor of the field to be merged. 708 field: The descriptor of the field to be merged.
573 709
574 Raises: 710 Raises:
575 ParseError: In case of text parsing problems. 711 ParseError: In case of text parsing problems.
576 """ 712 """
577 is_map_entry = _IsMapEntry(field) 713 is_map_entry = _IsMapEntry(field)
578 714
579 if tokenizer.TryConsume('<'): 715 if tokenizer.TryConsume('<'):
580 end_token = '>' 716 end_token = '>'
581 else: 717 else:
582 tokenizer.Consume('{') 718 tokenizer.Consume('{')
583 end_token = '}' 719 end_token = '}'
584 720
585 if field.label == descriptor.FieldDescriptor.LABEL_REPEATED: 721 if (field.message_type.full_name == _ANY_FULL_TYPE_NAME and
722 tokenizer.TryConsume('[')):
723 packed_type_name = self._ConsumeAnyTypeUrl(tokenizer)
724 tokenizer.Consume(']')
725 tokenizer.TryConsume(':')
726 if tokenizer.TryConsume('<'):
727 expanded_any_end_token = '>'
728 else:
729 tokenizer.Consume('{')
730 expanded_any_end_token = '}'
731 if not self.descriptor_pool:
732 raise ParseError('Descriptor pool required to parse expanded Any field')
733 expanded_any_sub_message = _BuildMessageFromTypeName(packed_type_name,
734 self.descriptor_pool)
735 if not expanded_any_sub_message:
736 raise ParseError('Type %s not found in descriptor pool' %
737 packed_type_name)
738 while not tokenizer.TryConsume(expanded_any_end_token):
739 if tokenizer.AtEnd():
740 raise tokenizer.ParseErrorPreviousToken('Expected "%s".' %
741 (expanded_any_end_token,))
742 self._MergeField(tokenizer, expanded_any_sub_message)
743 if field.label == descriptor.FieldDescriptor.LABEL_REPEATED:
744 any_message = getattr(message, field.name).add()
745 else:
746 any_message = getattr(message, field.name)
747 any_message.Pack(expanded_any_sub_message)
748 elif field.label == descriptor.FieldDescriptor.LABEL_REPEATED:
586 if field.is_extension: 749 if field.is_extension:
587 sub_message = message.Extensions[field].add() 750 sub_message = message.Extensions[field].add()
588 elif is_map_entry: 751 elif is_map_entry:
589 # pylint: disable=protected-access 752 # pylint: disable=protected-access
590 sub_message = field.message_type._concrete_class() 753 sub_message = field.message_type._concrete_class()
591 else: 754 else:
592 sub_message = getattr(message, field.name).add() 755 sub_message = getattr(message, field.name).add()
593 else: 756 else:
594 if field.is_extension: 757 if field.is_extension:
595 sub_message = message.Extensions[field] 758 sub_message = message.Extensions[field]
(...skipping 25 matching lines...) Expand all
621 Raises: 784 Raises:
622 ParseError: In case of text parsing problems. 785 ParseError: In case of text parsing problems.
623 RuntimeError: On runtime errors. 786 RuntimeError: On runtime errors.
624 """ 787 """
625 _ = self.allow_unknown_extension 788 _ = self.allow_unknown_extension
626 value = None 789 value = None
627 790
628 if field.type in (descriptor.FieldDescriptor.TYPE_INT32, 791 if field.type in (descriptor.FieldDescriptor.TYPE_INT32,
629 descriptor.FieldDescriptor.TYPE_SINT32, 792 descriptor.FieldDescriptor.TYPE_SINT32,
630 descriptor.FieldDescriptor.TYPE_SFIXED32): 793 descriptor.FieldDescriptor.TYPE_SFIXED32):
631 value = tokenizer.ConsumeInt32() 794 value = _ConsumeInt32(tokenizer)
632 elif field.type in (descriptor.FieldDescriptor.TYPE_INT64, 795 elif field.type in (descriptor.FieldDescriptor.TYPE_INT64,
633 descriptor.FieldDescriptor.TYPE_SINT64, 796 descriptor.FieldDescriptor.TYPE_SINT64,
634 descriptor.FieldDescriptor.TYPE_SFIXED64): 797 descriptor.FieldDescriptor.TYPE_SFIXED64):
635 value = tokenizer.ConsumeInt64() 798 value = _ConsumeInt64(tokenizer)
636 elif field.type in (descriptor.FieldDescriptor.TYPE_UINT32, 799 elif field.type in (descriptor.FieldDescriptor.TYPE_UINT32,
637 descriptor.FieldDescriptor.TYPE_FIXED32): 800 descriptor.FieldDescriptor.TYPE_FIXED32):
638 value = tokenizer.ConsumeUint32() 801 value = _ConsumeUint32(tokenizer)
639 elif field.type in (descriptor.FieldDescriptor.TYPE_UINT64, 802 elif field.type in (descriptor.FieldDescriptor.TYPE_UINT64,
640 descriptor.FieldDescriptor.TYPE_FIXED64): 803 descriptor.FieldDescriptor.TYPE_FIXED64):
641 value = tokenizer.ConsumeUint64() 804 value = _ConsumeUint64(tokenizer)
642 elif field.type in (descriptor.FieldDescriptor.TYPE_FLOAT, 805 elif field.type in (descriptor.FieldDescriptor.TYPE_FLOAT,
643 descriptor.FieldDescriptor.TYPE_DOUBLE): 806 descriptor.FieldDescriptor.TYPE_DOUBLE):
644 value = tokenizer.ConsumeFloat() 807 value = tokenizer.ConsumeFloat()
645 elif field.type == descriptor.FieldDescriptor.TYPE_BOOL: 808 elif field.type == descriptor.FieldDescriptor.TYPE_BOOL:
646 value = tokenizer.ConsumeBool() 809 value = tokenizer.ConsumeBool()
647 elif field.type == descriptor.FieldDescriptor.TYPE_STRING: 810 elif field.type == descriptor.FieldDescriptor.TYPE_STRING:
648 value = tokenizer.ConsumeString() 811 value = tokenizer.ConsumeString()
649 elif field.type == descriptor.FieldDescriptor.TYPE_BYTES: 812 elif field.type == descriptor.FieldDescriptor.TYPE_BYTES:
650 value = tokenizer.ConsumeByteString() 813 value = tokenizer.ConsumeByteString()
651 elif field.type == descriptor.FieldDescriptor.TYPE_ENUM: 814 elif field.type == descriptor.FieldDescriptor.TYPE_ENUM:
(...skipping 94 matching lines...) Expand 10 before | Expand all | Expand 10 after
746 ParseError: In case an invalid field value is found. 909 ParseError: In case an invalid field value is found.
747 """ 910 """
748 # String/bytes tokens can come in multiple adjacent string literals. 911 # String/bytes tokens can come in multiple adjacent string literals.
749 # If we can consume one, consume as many as we can. 912 # If we can consume one, consume as many as we can.
750 if tokenizer.TryConsumeByteString(): 913 if tokenizer.TryConsumeByteString():
751 while tokenizer.TryConsumeByteString(): 914 while tokenizer.TryConsumeByteString():
752 pass 915 pass
753 return 916 return
754 917
755 if (not tokenizer.TryConsumeIdentifier() and 918 if (not tokenizer.TryConsumeIdentifier() and
756 not tokenizer.TryConsumeInt64() and 919 not _TryConsumeInt64(tokenizer) and not _TryConsumeUint64(tokenizer) and
757 not tokenizer.TryConsumeUint64() and
758 not tokenizer.TryConsumeFloat()): 920 not tokenizer.TryConsumeFloat()):
759 raise ParseError('Invalid field value: ' + tokenizer.token) 921 raise ParseError('Invalid field value: ' + tokenizer.token)
760 922
761 923
762 class _Tokenizer(object): 924 class Tokenizer(object):
763 """Protocol buffer text representation tokenizer. 925 """Protocol buffer text representation tokenizer.
764 926
765 This class handles the lower level string parsing by splitting it into 927 This class handles the lower level string parsing by splitting it into
766 meaningful tokens. 928 meaningful tokens.
767 929
768 It was directly ported from the Java protocol buffer API. 930 It was directly ported from the Java protocol buffer API.
769 """ 931 """
770 932
771 _WHITESPACE = re.compile('(\\s|(#.*$))+', re.MULTILINE) 933 _WHITESPACE = re.compile(r'\s+')
934 _COMMENT = re.compile(r'(\s*#.*$)', re.MULTILINE)
935 _WHITESPACE_OR_COMMENT = re.compile(r'(\s|(#.*$))+', re.MULTILINE)
772 _TOKEN = re.compile('|'.join([ 936 _TOKEN = re.compile('|'.join([
773 r'[a-zA-Z_][0-9a-zA-Z_+-]*', # an identifier 937 r'[a-zA-Z_][0-9a-zA-Z_+-]*', # an identifier
774 r'([0-9+-]|(\.[0-9]))[0-9a-zA-Z_.+-]*', # a number 938 r'([0-9+-]|(\.[0-9]))[0-9a-zA-Z_.+-]*', # a number
775 ] + [ # quoted str for each quote mark 939 ] + [ # quoted str for each quote mark
776 r'{qt}([^{qt}\n\\]|\\.)*({qt}|\\?$)'.format(qt=mark) for mark in _QUOTES 940 r'{qt}([^{qt}\n\\]|\\.)*({qt}|\\?$)'.format(qt=mark) for mark in _QUOTES
777 ])) 941 ]))
778 942
779 _IDENTIFIER = re.compile(r'\w+') 943 _IDENTIFIER = re.compile(r'[^\d\W]\w*')
944 _IDENTIFIER_OR_NUMBER = re.compile(r'\w+')
780 945
781 def __init__(self, lines): 946 def __init__(self, lines, skip_comments=True):
782 self._position = 0 947 self._position = 0
783 self._line = -1 948 self._line = -1
784 self._column = 0 949 self._column = 0
785 self._token_start = None 950 self._token_start = None
786 self.token = '' 951 self.token = ''
787 self._lines = iter(lines) 952 self._lines = iter(lines)
788 self._current_line = '' 953 self._current_line = ''
789 self._previous_line = 0 954 self._previous_line = 0
790 self._previous_column = 0 955 self._previous_column = 0
791 self._more_lines = True 956 self._more_lines = True
957 self._skip_comments = skip_comments
958 self._whitespace_pattern = (skip_comments and self._WHITESPACE_OR_COMMENT
959 or self._WHITESPACE)
792 self._SkipWhitespace() 960 self._SkipWhitespace()
793 self.NextToken() 961 self.NextToken()
794 962
795 def LookingAt(self, token): 963 def LookingAt(self, token):
796 return self.token == token 964 return self.token == token
797 965
798 def AtEnd(self): 966 def AtEnd(self):
799 """Checks the end of the text was reached. 967 """Checks the end of the text was reached.
800 968
801 Returns: 969 Returns:
802 True iff the end was reached. 970 True iff the end was reached.
803 """ 971 """
804 return not self.token 972 return not self.token
805 973
806 def _PopLine(self): 974 def _PopLine(self):
807 while len(self._current_line) <= self._column: 975 while len(self._current_line) <= self._column:
808 try: 976 try:
809 self._current_line = next(self._lines) 977 self._current_line = next(self._lines)
810 except StopIteration: 978 except StopIteration:
811 self._current_line = '' 979 self._current_line = ''
812 self._more_lines = False 980 self._more_lines = False
813 return 981 return
814 else: 982 else:
815 self._line += 1 983 self._line += 1
816 self._column = 0 984 self._column = 0
817 985
818 def _SkipWhitespace(self): 986 def _SkipWhitespace(self):
819 while True: 987 while True:
820 self._PopLine() 988 self._PopLine()
821 match = self._WHITESPACE.match(self._current_line, self._column) 989 match = self._whitespace_pattern.match(self._current_line, self._column)
822 if not match: 990 if not match:
823 break 991 break
824 length = len(match.group(0)) 992 length = len(match.group(0))
825 self._column += length 993 self._column += length
826 994
827 def TryConsume(self, token): 995 def TryConsume(self, token):
828 """Tries to consume a given piece of text. 996 """Tries to consume a given piece of text.
829 997
830 Args: 998 Args:
831 token: Text to consume. 999 token: Text to consume.
832 1000
833 Returns: 1001 Returns:
834 True iff the text was consumed. 1002 True iff the text was consumed.
835 """ 1003 """
836 if self.token == token: 1004 if self.token == token:
837 self.NextToken() 1005 self.NextToken()
838 return True 1006 return True
839 return False 1007 return False
840 1008
841 def Consume(self, token): 1009 def Consume(self, token):
842 """Consumes a piece of text. 1010 """Consumes a piece of text.
843 1011
844 Args: 1012 Args:
845 token: Text to consume. 1013 token: Text to consume.
846 1014
847 Raises: 1015 Raises:
848 ParseError: If the text couldn't be consumed. 1016 ParseError: If the text couldn't be consumed.
849 """ 1017 """
850 if not self.TryConsume(token): 1018 if not self.TryConsume(token):
851 raise self._ParseError('Expected "%s".' % token) 1019 raise self.ParseError('Expected "%s".' % token)
1020
1021 def ConsumeComment(self):
1022 result = self.token
1023 if not self._COMMENT.match(result):
1024 raise self.ParseError('Expected comment.')
1025 self.NextToken()
1026 return result
852 1027
853 def TryConsumeIdentifier(self): 1028 def TryConsumeIdentifier(self):
854 try: 1029 try:
855 self.ConsumeIdentifier() 1030 self.ConsumeIdentifier()
856 return True 1031 return True
857 except ParseError: 1032 except ParseError:
858 return False 1033 return False
859 1034
860 def ConsumeIdentifier(self): 1035 def ConsumeIdentifier(self):
861 """Consumes protocol message field identifier. 1036 """Consumes protocol message field identifier.
862 1037
863 Returns: 1038 Returns:
864 Identifier string. 1039 Identifier string.
865 1040
866 Raises: 1041 Raises:
867 ParseError: If an identifier couldn't be consumed. 1042 ParseError: If an identifier couldn't be consumed.
868 """ 1043 """
869 result = self.token 1044 result = self.token
870 if not self._IDENTIFIER.match(result): 1045 if not self._IDENTIFIER.match(result):
871 raise self._ParseError('Expected identifier.') 1046 raise self.ParseError('Expected identifier.')
872 self.NextToken() 1047 self.NextToken()
873 return result 1048 return result
874 1049
875 def ConsumeInt32(self): 1050 def TryConsumeIdentifierOrNumber(self):
876 """Consumes a signed 32bit integer number. 1051 try:
1052 self.ConsumeIdentifierOrNumber()
1053 return True
1054 except ParseError:
1055 return False
877 1056
1057 def ConsumeIdentifierOrNumber(self):
1058 """Consumes protocol message field identifier.
1059
1060 Returns:
1061 Identifier string.
1062
1063 Raises:
1064 ParseError: If an identifier couldn't be consumed.
1065 """
1066 result = self.token
1067 if not self._IDENTIFIER_OR_NUMBER.match(result):
1068 raise self.ParseError('Expected identifier or number.')
1069 self.NextToken()
1070 return result
1071
1072 def TryConsumeInteger(self):
1073 try:
1074 # Note: is_long only affects value type, not whether an error is raised.
1075 self.ConsumeInteger()
1076 return True
1077 except ParseError:
1078 return False
1079
1080 def ConsumeInteger(self, is_long=False):
1081 """Consumes an integer number.
1082
1083 Args:
1084 is_long: True if the value should be returned as a long integer.
878 Returns: 1085 Returns:
879 The integer parsed. 1086 The integer parsed.
880 1087
881 Raises: 1088 Raises:
882 ParseError: If a signed 32bit integer couldn't be consumed. 1089 ParseError: If an integer couldn't be consumed.
883 """ 1090 """
884 try: 1091 try:
885 result = ParseInteger(self.token, is_signed=True, is_long=False) 1092 result = _ParseAbstractInteger(self.token, is_long=is_long)
886 except ValueError as e: 1093 except ValueError as e:
887 raise self._ParseError(str(e)) 1094 raise self.ParseError(str(e))
888 self.NextToken() 1095 self.NextToken()
889 return result 1096 return result
890 1097
891 def ConsumeUint32(self):
892 """Consumes an unsigned 32bit integer number.
893
894 Returns:
895 The integer parsed.
896
897 Raises:
898 ParseError: If an unsigned 32bit integer couldn't be consumed.
899 """
900 try:
901 result = ParseInteger(self.token, is_signed=False, is_long=False)
902 except ValueError as e:
903 raise self._ParseError(str(e))
904 self.NextToken()
905 return result
906
907 def TryConsumeInt64(self):
908 try:
909 self.ConsumeInt64()
910 return True
911 except ParseError:
912 return False
913
914 def ConsumeInt64(self):
915 """Consumes a signed 64bit integer number.
916
917 Returns:
918 The integer parsed.
919
920 Raises:
921 ParseError: If a signed 64bit integer couldn't be consumed.
922 """
923 try:
924 result = ParseInteger(self.token, is_signed=True, is_long=True)
925 except ValueError as e:
926 raise self._ParseError(str(e))
927 self.NextToken()
928 return result
929
930 def TryConsumeUint64(self):
931 try:
932 self.ConsumeUint64()
933 return True
934 except ParseError:
935 return False
936
937 def ConsumeUint64(self):
938 """Consumes an unsigned 64bit integer number.
939
940 Returns:
941 The integer parsed.
942
943 Raises:
944 ParseError: If an unsigned 64bit integer couldn't be consumed.
945 """
946 try:
947 result = ParseInteger(self.token, is_signed=False, is_long=True)
948 except ValueError as e:
949 raise self._ParseError(str(e))
950 self.NextToken()
951 return result
952
953 def TryConsumeFloat(self): 1098 def TryConsumeFloat(self):
954 try: 1099 try:
955 self.ConsumeFloat() 1100 self.ConsumeFloat()
956 return True 1101 return True
957 except ParseError: 1102 except ParseError:
958 return False 1103 return False
959 1104
960 def ConsumeFloat(self): 1105 def ConsumeFloat(self):
961 """Consumes an floating point number. 1106 """Consumes an floating point number.
962 1107
963 Returns: 1108 Returns:
964 The number parsed. 1109 The number parsed.
965 1110
966 Raises: 1111 Raises:
967 ParseError: If a floating point number couldn't be consumed. 1112 ParseError: If a floating point number couldn't be consumed.
968 """ 1113 """
969 try: 1114 try:
970 result = ParseFloat(self.token) 1115 result = ParseFloat(self.token)
971 except ValueError as e: 1116 except ValueError as e:
972 raise self._ParseError(str(e)) 1117 raise self.ParseError(str(e))
973 self.NextToken() 1118 self.NextToken()
974 return result 1119 return result
975 1120
976 def ConsumeBool(self): 1121 def ConsumeBool(self):
977 """Consumes a boolean value. 1122 """Consumes a boolean value.
978 1123
979 Returns: 1124 Returns:
980 The bool parsed. 1125 The bool parsed.
981 1126
982 Raises: 1127 Raises:
983 ParseError: If a boolean value couldn't be consumed. 1128 ParseError: If a boolean value couldn't be consumed.
984 """ 1129 """
985 try: 1130 try:
986 result = ParseBool(self.token) 1131 result = ParseBool(self.token)
987 except ValueError as e: 1132 except ValueError as e:
988 raise self._ParseError(str(e)) 1133 raise self.ParseError(str(e))
989 self.NextToken() 1134 self.NextToken()
990 return result 1135 return result
991 1136
992 def TryConsumeByteString(self): 1137 def TryConsumeByteString(self):
993 try: 1138 try:
994 self.ConsumeByteString() 1139 self.ConsumeByteString()
995 return True 1140 return True
996 except ParseError: 1141 except ParseError:
997 return False 1142 return False
998 1143
(...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after
1032 tokens which are automatically concatenated, like in C or Python. This 1177 tokens which are automatically concatenated, like in C or Python. This
1033 method only consumes one token. 1178 method only consumes one token.
1034 1179
1035 Returns: 1180 Returns:
1036 The token parsed. 1181 The token parsed.
1037 Raises: 1182 Raises:
1038 ParseError: When the wrong format data is found. 1183 ParseError: When the wrong format data is found.
1039 """ 1184 """
1040 text = self.token 1185 text = self.token
1041 if len(text) < 1 or text[0] not in _QUOTES: 1186 if len(text) < 1 or text[0] not in _QUOTES:
1042 raise self._ParseError('Expected string but found: %r' % (text,)) 1187 raise self.ParseError('Expected string but found: %r' % (text,))
1043 1188
1044 if len(text) < 2 or text[-1] != text[0]: 1189 if len(text) < 2 or text[-1] != text[0]:
1045 raise self._ParseError('String missing ending quote: %r' % (text,)) 1190 raise self.ParseError('String missing ending quote: %r' % (text,))
1046 1191
1047 try: 1192 try:
1048 result = text_encoding.CUnescape(text[1:-1]) 1193 result = text_encoding.CUnescape(text[1:-1])
1049 except ValueError as e: 1194 except ValueError as e:
1050 raise self._ParseError(str(e)) 1195 raise self.ParseError(str(e))
1051 self.NextToken() 1196 self.NextToken()
1052 return result 1197 return result
1053 1198
1054 def ConsumeEnum(self, field): 1199 def ConsumeEnum(self, field):
1055 try: 1200 try:
1056 result = ParseEnum(field, self.token) 1201 result = ParseEnum(field, self.token)
1057 except ValueError as e: 1202 except ValueError as e:
1058 raise self._ParseError(str(e)) 1203 raise self.ParseError(str(e))
1059 self.NextToken() 1204 self.NextToken()
1060 return result 1205 return result
1061 1206
1062 def ParseErrorPreviousToken(self, message): 1207 def ParseErrorPreviousToken(self, message):
1063 """Creates and *returns* a ParseError for the previously read token. 1208 """Creates and *returns* a ParseError for the previously read token.
1064 1209
1065 Args: 1210 Args:
1066 message: A message to set for the exception. 1211 message: A message to set for the exception.
1067 1212
1068 Returns: 1213 Returns:
1069 A ParseError instance. 1214 A ParseError instance.
1070 """ 1215 """
1071 return ParseError('%d:%d : %s' % ( 1216 return ParseError(message, self._previous_line + 1,
1072 self._previous_line + 1, self._previous_column + 1, message)) 1217 self._previous_column + 1)
1073 1218
1074 def _ParseError(self, message): 1219 def ParseError(self, message):
1075 """Creates and *returns* a ParseError for the current token.""" 1220 """Creates and *returns* a ParseError for the current token."""
1076 return ParseError('%d:%d : %s' % ( 1221 return ParseError(message, self._line + 1, self._column + 1)
1077 self._line + 1, self._column + 1, message))
1078 1222
1079 def _StringParseError(self, e): 1223 def _StringParseError(self, e):
1080 return self._ParseError('Couldn\'t parse string: ' + str(e)) 1224 return self.ParseError('Couldn\'t parse string: ' + str(e))
1081 1225
1082 def NextToken(self): 1226 def NextToken(self):
1083 """Reads the next meaningful token.""" 1227 """Reads the next meaningful token."""
1084 self._previous_line = self._line 1228 self._previous_line = self._line
1085 self._previous_column = self._column 1229 self._previous_column = self._column
1086 1230
1087 self._column += len(self.token) 1231 self._column += len(self.token)
1088 self._SkipWhitespace() 1232 self._SkipWhitespace()
1089 1233
1090 if not self._more_lines: 1234 if not self._more_lines:
1091 self.token = '' 1235 self.token = ''
1092 return 1236 return
1093 1237
1094 match = self._TOKEN.match(self._current_line, self._column) 1238 match = self._TOKEN.match(self._current_line, self._column)
1239 if not match and not self._skip_comments:
1240 match = self._COMMENT.match(self._current_line, self._column)
1095 if match: 1241 if match:
1096 token = match.group(0) 1242 token = match.group(0)
1097 self.token = token 1243 self.token = token
1098 else: 1244 else:
1099 self.token = self._current_line[self._column] 1245 self.token = self._current_line[self._column]
1100 1246
1247 # Aliased so it can still be accessed by current visibility violators.
1248 # TODO(dbarnett): Migrate violators to textformat_tokenizer.
1249 _Tokenizer = Tokenizer # pylint: disable=invalid-name
1250
1251
1252 def _ConsumeInt32(tokenizer):
1253 """Consumes a signed 32bit integer number from tokenizer.
1254
1255 Args:
1256 tokenizer: A tokenizer used to parse the number.
1257
1258 Returns:
1259 The integer parsed.
1260
1261 Raises:
1262 ParseError: If a signed 32bit integer couldn't be consumed.
1263 """
1264 return _ConsumeInteger(tokenizer, is_signed=True, is_long=False)
1265
1266
1267 def _ConsumeUint32(tokenizer):
1268 """Consumes an unsigned 32bit integer number from tokenizer.
1269
1270 Args:
1271 tokenizer: A tokenizer used to parse the number.
1272
1273 Returns:
1274 The integer parsed.
1275
1276 Raises:
1277 ParseError: If an unsigned 32bit integer couldn't be consumed.
1278 """
1279 return _ConsumeInteger(tokenizer, is_signed=False, is_long=False)
1280
1281
1282 def _TryConsumeInt64(tokenizer):
1283 try:
1284 _ConsumeInt64(tokenizer)
1285 return True
1286 except ParseError:
1287 return False
1288
1289
1290 def _ConsumeInt64(tokenizer):
1291 """Consumes a signed 32bit integer number from tokenizer.
1292
1293 Args:
1294 tokenizer: A tokenizer used to parse the number.
1295
1296 Returns:
1297 The integer parsed.
1298
1299 Raises:
1300 ParseError: If a signed 32bit integer couldn't be consumed.
1301 """
1302 return _ConsumeInteger(tokenizer, is_signed=True, is_long=True)
1303
1304
1305 def _TryConsumeUint64(tokenizer):
1306 try:
1307 _ConsumeUint64(tokenizer)
1308 return True
1309 except ParseError:
1310 return False
1311
1312
1313 def _ConsumeUint64(tokenizer):
1314 """Consumes an unsigned 64bit integer number from tokenizer.
1315
1316 Args:
1317 tokenizer: A tokenizer used to parse the number.
1318
1319 Returns:
1320 The integer parsed.
1321
1322 Raises:
1323 ParseError: If an unsigned 64bit integer couldn't be consumed.
1324 """
1325 return _ConsumeInteger(tokenizer, is_signed=False, is_long=True)
1326
1327
1328 def _TryConsumeInteger(tokenizer, is_signed=False, is_long=False):
1329 try:
1330 _ConsumeInteger(tokenizer, is_signed=is_signed, is_long=is_long)
1331 return True
1332 except ParseError:
1333 return False
1334
1335
1336 def _ConsumeInteger(tokenizer, is_signed=False, is_long=False):
1337 """Consumes an integer number from tokenizer.
1338
1339 Args:
1340 tokenizer: A tokenizer used to parse the number.
1341 is_signed: True if a signed integer must be parsed.
1342 is_long: True if a long integer must be parsed.
1343
1344 Returns:
1345 The integer parsed.
1346
1347 Raises:
1348 ParseError: If an integer with given characteristics couldn't be consumed.
1349 """
1350 try:
1351 result = ParseInteger(tokenizer.token, is_signed=is_signed, is_long=is_long)
1352 except ValueError as e:
1353 raise tokenizer.ParseError(str(e))
1354 tokenizer.NextToken()
1355 return result
1356
1101 1357
1102 def ParseInteger(text, is_signed=False, is_long=False): 1358 def ParseInteger(text, is_signed=False, is_long=False):
1103 """Parses an integer. 1359 """Parses an integer.
1104 1360
1105 Args: 1361 Args:
1106 text: The text to parse. 1362 text: The text to parse.
1107 is_signed: True if a signed integer must be parsed. 1363 is_signed: True if a signed integer must be parsed.
1108 is_long: True if a long integer must be parsed. 1364 is_long: True if a long integer must be parsed.
1109 1365
1110 Returns: 1366 Returns:
1111 The integer value. 1367 The integer value.
1112 1368
1113 Raises: 1369 Raises:
1114 ValueError: Thrown Iff the text is not a valid integer. 1370 ValueError: Thrown Iff the text is not a valid integer.
1115 """ 1371 """
1116 # Do the actual parsing. Exception handling is propagated to caller. 1372 # Do the actual parsing. Exception handling is propagated to caller.
1373 result = _ParseAbstractInteger(text, is_long=is_long)
1374
1375 # Check if the integer is sane. Exceptions handled by callers.
1376 checker = _INTEGER_CHECKERS[2 * int(is_long) + int(is_signed)]
1377 checker.CheckValue(result)
1378 return result
1379
1380
1381 def _ParseAbstractInteger(text, is_long=False):
1382 """Parses an integer without checking size/signedness.
1383
1384 Args:
1385 text: The text to parse.
1386 is_long: True if the value should be returned as a long integer.
1387
1388 Returns:
1389 The integer value.
1390
1391 Raises:
1392 ValueError: Thrown Iff the text is not a valid integer.
1393 """
1394 # Do the actual parsing. Exception handling is propagated to caller.
1117 try: 1395 try:
1118 # We force 32-bit values to int and 64-bit values to long to make 1396 # We force 32-bit values to int and 64-bit values to long to make
1119 # alternate implementations where the distinction is more significant 1397 # alternate implementations where the distinction is more significant
1120 # (e.g. the C++ implementation) simpler. 1398 # (e.g. the C++ implementation) simpler.
1121 if is_long: 1399 if is_long:
1122 result = long(text, 0) 1400 return long(text, 0)
1123 else: 1401 else:
1124 result = int(text, 0) 1402 return int(text, 0)
1125 except ValueError: 1403 except ValueError:
1126 raise ValueError('Couldn\'t parse integer: %s' % text) 1404 raise ValueError('Couldn\'t parse integer: %s' % text)
1127 1405
1128 # Check if the integer is sane. Exceptions handled by callers.
1129 checker = _INTEGER_CHECKERS[2 * int(is_long) + int(is_signed)]
1130 checker.CheckValue(result)
1131 return result
1132
1133 1406
1134 def ParseFloat(text): 1407 def ParseFloat(text):
1135 """Parse a floating point number. 1408 """Parse a floating point number.
1136 1409
1137 Args: 1410 Args:
1138 text: Text to parse. 1411 text: Text to parse.
1139 1412
1140 Returns: 1413 Returns:
1141 The number parsed. 1414 The number parsed.
1142 1415
(...skipping 56 matching lines...) Expand 10 before | Expand all | Expand 10 after
1199 Raises: 1472 Raises:
1200 ValueError: If the enum value could not be parsed. 1473 ValueError: If the enum value could not be parsed.
1201 """ 1474 """
1202 enum_descriptor = field.enum_type 1475 enum_descriptor = field.enum_type
1203 try: 1476 try:
1204 number = int(value, 0) 1477 number = int(value, 0)
1205 except ValueError: 1478 except ValueError:
1206 # Identifier. 1479 # Identifier.
1207 enum_value = enum_descriptor.values_by_name.get(value, None) 1480 enum_value = enum_descriptor.values_by_name.get(value, None)
1208 if enum_value is None: 1481 if enum_value is None:
1209 raise ValueError( 1482 raise ValueError('Enum type "%s" has no value named %s.' %
1210 'Enum type "%s" has no value named %s.' % ( 1483 (enum_descriptor.full_name, value))
1211 enum_descriptor.full_name, value))
1212 else: 1484 else:
1213 # Numeric value. 1485 # Numeric value.
1214 enum_value = enum_descriptor.values_by_number.get(number, None) 1486 enum_value = enum_descriptor.values_by_number.get(number, None)
1215 if enum_value is None: 1487 if enum_value is None:
1216 raise ValueError( 1488 raise ValueError('Enum type "%s" has no value with number %d.' %
1217 'Enum type "%s" has no value with number %d.' % ( 1489 (enum_descriptor.full_name, number))
1218 enum_descriptor.full_name, number))
1219 return enum_value.number 1490 return enum_value.number
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698