Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(186)

Side by Side Diff: third_party/protobuf/python/google/protobuf/text_format.py

Issue 2600753002: Reverts third_party/protobuf: Update to HEAD (f52e188fe4) (Closed)
Patch Set: Created 3 years, 12 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 # Protocol Buffers - Google's data interchange format 1 # Protocol Buffers - Google's data interchange format
2 # Copyright 2008 Google Inc. All rights reserved. 2 # Copyright 2008 Google Inc. All rights reserved.
3 # https://developers.google.com/protocol-buffers/ 3 # https://developers.google.com/protocol-buffers/
4 # 4 #
5 # Redistribution and use in source and binary forms, with or without 5 # Redistribution and use in source and binary forms, with or without
6 # modification, are permitted provided that the following conditions are 6 # modification, are permitted provided that the following conditions are
7 # met: 7 # met:
8 # 8 #
9 # * Redistributions of source code must retain the above copyright 9 # * Redistributions of source code must retain the above copyright
10 # notice, this list of conditions and the following disclaimer. 10 # notice, this list of conditions and the following disclaimer.
(...skipping 30 matching lines...) Expand all
41 """ 41 """
42 42
43 __author__ = 'kenton@google.com (Kenton Varda)' 43 __author__ = 'kenton@google.com (Kenton Varda)'
44 44
45 import io 45 import io
46 import re 46 import re
47 47
48 import six 48 import six
49 49
50 if six.PY3: 50 if six.PY3:
51 long = int # pylint: disable=redefined-builtin,invalid-name 51 long = int
52 52
53 # pylint: disable=g-import-not-at-top
54 from google.protobuf.internal import type_checkers 53 from google.protobuf.internal import type_checkers
55 from google.protobuf import descriptor 54 from google.protobuf import descriptor
56 from google.protobuf import text_encoding 55 from google.protobuf import text_encoding
57 56
58 __all__ = ['MessageToString', 'PrintMessage', 'PrintField', 'PrintFieldValue', 57 __all__ = ['MessageToString', 'PrintMessage', 'PrintField',
59 'Merge'] 58 'PrintFieldValue', 'Merge']
59
60 60
61 _INTEGER_CHECKERS = (type_checkers.Uint32ValueChecker(), 61 _INTEGER_CHECKERS = (type_checkers.Uint32ValueChecker(),
62 type_checkers.Int32ValueChecker(), 62 type_checkers.Int32ValueChecker(),
63 type_checkers.Uint64ValueChecker(), 63 type_checkers.Uint64ValueChecker(),
64 type_checkers.Int64ValueChecker()) 64 type_checkers.Int64ValueChecker())
65 _FLOAT_INFINITY = re.compile('-?inf(?:inity)?f?', re.IGNORECASE) 65 _FLOAT_INFINITY = re.compile('-?inf(?:inity)?f?', re.IGNORECASE)
66 _FLOAT_NAN = re.compile('nanf?', re.IGNORECASE) 66 _FLOAT_NAN = re.compile('nanf?', re.IGNORECASE)
67 _FLOAT_TYPES = frozenset([descriptor.FieldDescriptor.CPPTYPE_FLOAT, 67 _FLOAT_TYPES = frozenset([descriptor.FieldDescriptor.CPPTYPE_FLOAT,
68 descriptor.FieldDescriptor.CPPTYPE_DOUBLE]) 68 descriptor.FieldDescriptor.CPPTYPE_DOUBLE])
69 _QUOTES = frozenset(("'", '"')) 69 _QUOTES = frozenset(("'", '"'))
70 _ANY_FULL_TYPE_NAME = 'google.protobuf.Any'
71 70
72 71
73 class Error(Exception): 72 class Error(Exception):
74 """Top-level module error for text_format.""" 73 """Top-level module error for text_format."""
75 74
76 75
77 class ParseError(Error): 76 class ParseError(Error):
78 """Thrown in case of text parsing or tokenizing error.""" 77 """Thrown in case of text parsing error."""
79
80 def __init__(self, message=None, line=None, column=None):
81 if message is not None and line is not None:
82 loc = str(line)
83 if column is not None:
84 loc += ':{0}'.format(column)
85 message = '{0} : {1}'.format(loc, message)
86 if message is not None:
87 super(ParseError, self).__init__(message)
88 else:
89 super(ParseError, self).__init__()
90 self._line = line
91 self._column = column
92
93 def GetLine(self):
94 return self._line
95
96 def GetColumn(self):
97 return self._column
98 78
99 79
100 class TextWriter(object): 80 class TextWriter(object):
101
102 def __init__(self, as_utf8): 81 def __init__(self, as_utf8):
103 if six.PY2: 82 if six.PY2:
104 self._writer = io.BytesIO() 83 self._writer = io.BytesIO()
105 else: 84 else:
106 self._writer = io.StringIO() 85 self._writer = io.StringIO()
107 86
108 def write(self, val): 87 def write(self, val):
109 if six.PY2: 88 if six.PY2:
110 if isinstance(val, six.text_type): 89 if isinstance(val, six.text_type):
111 val = val.encode('utf-8') 90 val = val.encode('utf-8')
112 return self._writer.write(val) 91 return self._writer.write(val)
113 92
114 def close(self): 93 def close(self):
115 return self._writer.close() 94 return self._writer.close()
116 95
117 def getvalue(self): 96 def getvalue(self):
118 return self._writer.getvalue() 97 return self._writer.getvalue()
119 98
120 99
121 def MessageToString(message, 100 def MessageToString(message, as_utf8=False, as_one_line=False,
122 as_utf8=False, 101 pointy_brackets=False, use_index_order=False,
123 as_one_line=False, 102 float_format=None, use_field_number=False):
124 pointy_brackets=False,
125 use_index_order=False,
126 float_format=None,
127 use_field_number=False,
128 descriptor_pool=None,
129 indent=0):
130 """Convert protobuf message to text format. 103 """Convert protobuf message to text format.
131 104
132 Floating point values can be formatted compactly with 15 digits of 105 Floating point values can be formatted compactly with 15 digits of
133 precision (which is the most that IEEE 754 "double" can guarantee) 106 precision (which is the most that IEEE 754 "double" can guarantee)
134 using float_format='.15g'. To ensure that converting to text and back to a 107 using float_format='.15g'. To ensure that converting to text and back to a
135 proto will result in an identical value, float_format='.17g' should be used. 108 proto will result in an identical value, float_format='.17g' should be used.
136 109
137 Args: 110 Args:
138 message: The protocol buffers message. 111 message: The protocol buffers message.
139 as_utf8: Produce text output in UTF8 format. 112 as_utf8: Produce text output in UTF8 format.
140 as_one_line: Don't introduce newlines between fields. 113 as_one_line: Don't introduce newlines between fields.
141 pointy_brackets: If True, use angle brackets instead of curly braces for 114 pointy_brackets: If True, use angle brackets instead of curly braces for
142 nesting. 115 nesting.
143 use_index_order: If True, print fields of a proto message using the order 116 use_index_order: If True, print fields of a proto message using the order
144 defined in source code instead of the field number. By default, use the 117 defined in source code instead of the field number. By default, use the
145 field number order. 118 field number order.
146 float_format: If set, use this to specify floating point number formatting 119 float_format: If set, use this to specify floating point number formatting
147 (per the "Format Specification Mini-Language"); otherwise, str() is used. 120 (per the "Format Specification Mini-Language"); otherwise, str() is used.
148 use_field_number: If True, print field numbers instead of names. 121 use_field_number: If True, print field numbers instead of names.
149 descriptor_pool: A DescriptorPool used to resolve Any types.
150 indent: The indent level, in terms of spaces, for pretty print.
151 122
152 Returns: 123 Returns:
153 A string of the text formatted protocol buffer message. 124 A string of the text formatted protocol buffer message.
154 """ 125 """
155 out = TextWriter(as_utf8) 126 out = TextWriter(as_utf8)
156 printer = _Printer(out, indent, as_utf8, as_one_line, pointy_brackets, 127 printer = _Printer(out, 0, as_utf8, as_one_line,
157 use_index_order, float_format, use_field_number, 128 pointy_brackets, use_index_order, float_format,
158 descriptor_pool) 129 use_field_number)
159 printer.PrintMessage(message) 130 printer.PrintMessage(message)
160 result = out.getvalue() 131 result = out.getvalue()
161 out.close() 132 out.close()
162 if as_one_line: 133 if as_one_line:
163 return result.rstrip() 134 return result.rstrip()
164 return result 135 return result
165 136
166 137
167 def _IsMapEntry(field): 138 def _IsMapEntry(field):
168 return (field.type == descriptor.FieldDescriptor.TYPE_MESSAGE and 139 return (field.type == descriptor.FieldDescriptor.TYPE_MESSAGE and
169 field.message_type.has_options and 140 field.message_type.has_options and
170 field.message_type.GetOptions().map_entry) 141 field.message_type.GetOptions().map_entry)
171 142
172 143
173 def PrintMessage(message, 144 def PrintMessage(message, out, indent=0, as_utf8=False, as_one_line=False,
174 out, 145 pointy_brackets=False, use_index_order=False,
175 indent=0, 146 float_format=None, use_field_number=False):
176 as_utf8=False, 147 printer = _Printer(out, indent, as_utf8, as_one_line,
177 as_one_line=False, 148 pointy_brackets, use_index_order, float_format,
178 pointy_brackets=False, 149 use_field_number)
179 use_index_order=False,
180 float_format=None,
181 use_field_number=False,
182 descriptor_pool=None):
183 printer = _Printer(out, indent, as_utf8, as_one_line, pointy_brackets,
184 use_index_order, float_format, use_field_number,
185 descriptor_pool)
186 printer.PrintMessage(message) 150 printer.PrintMessage(message)
187 151
188 152
189 def PrintField(field, 153 def PrintField(field, value, out, indent=0, as_utf8=False, as_one_line=False,
190 value, 154 pointy_brackets=False, use_index_order=False, float_format=None):
191 out,
192 indent=0,
193 as_utf8=False,
194 as_one_line=False,
195 pointy_brackets=False,
196 use_index_order=False,
197 float_format=None):
198 """Print a single field name/value pair.""" 155 """Print a single field name/value pair."""
199 printer = _Printer(out, indent, as_utf8, as_one_line, pointy_brackets, 156 printer = _Printer(out, indent, as_utf8, as_one_line,
200 use_index_order, float_format) 157 pointy_brackets, use_index_order, float_format)
201 printer.PrintField(field, value) 158 printer.PrintField(field, value)
202 159
203 160
204 def PrintFieldValue(field, 161 def PrintFieldValue(field, value, out, indent=0, as_utf8=False,
205 value, 162 as_one_line=False, pointy_brackets=False,
206 out,
207 indent=0,
208 as_utf8=False,
209 as_one_line=False,
210 pointy_brackets=False,
211 use_index_order=False, 163 use_index_order=False,
212 float_format=None): 164 float_format=None):
213 """Print a single field value (not including name).""" 165 """Print a single field value (not including name)."""
214 printer = _Printer(out, indent, as_utf8, as_one_line, pointy_brackets, 166 printer = _Printer(out, indent, as_utf8, as_one_line,
215 use_index_order, float_format) 167 pointy_brackets, use_index_order, float_format)
216 printer.PrintFieldValue(field, value) 168 printer.PrintFieldValue(field, value)
217 169
218 170
219 def _BuildMessageFromTypeName(type_name, descriptor_pool):
220 """Returns a protobuf message instance.
221
222 Args:
223 type_name: Fully-qualified protobuf message type name string.
224 descriptor_pool: DescriptorPool instance.
225
226 Returns:
227 A Message instance of type matching type_name, or None if the a Descriptor
228 wasn't found matching type_name.
229 """
230 # pylint: disable=g-import-not-at-top
231 from google.protobuf import symbol_database
232 database = symbol_database.Default()
233 try:
234 message_descriptor = descriptor_pool.FindMessageTypeByName(type_name)
235 except KeyError:
236 return None
237 message_type = database.GetPrototype(message_descriptor)
238 return message_type()
239
240
241 class _Printer(object): 171 class _Printer(object):
242 """Text format printer for protocol message.""" 172 """Text format printer for protocol message."""
243 173
244 def __init__(self, 174 def __init__(self, out, indent=0, as_utf8=False, as_one_line=False,
245 out, 175 pointy_brackets=False, use_index_order=False, float_format=None,
246 indent=0, 176 use_field_number=False):
247 as_utf8=False,
248 as_one_line=False,
249 pointy_brackets=False,
250 use_index_order=False,
251 float_format=None,
252 use_field_number=False,
253 descriptor_pool=None):
254 """Initialize the Printer. 177 """Initialize the Printer.
255 178
256 Floating point values can be formatted compactly with 15 digits of 179 Floating point values can be formatted compactly with 15 digits of
257 precision (which is the most that IEEE 754 "double" can guarantee) 180 precision (which is the most that IEEE 754 "double" can guarantee)
258 using float_format='.15g'. To ensure that converting to text and back to a 181 using float_format='.15g'. To ensure that converting to text and back to a
259 proto will result in an identical value, float_format='.17g' should be used. 182 proto will result in an identical value, float_format='.17g' should be used.
260 183
261 Args: 184 Args:
262 out: To record the text format result. 185 out: To record the text format result.
263 indent: The indent level for pretty print. 186 indent: The indent level for pretty print.
264 as_utf8: Produce text output in UTF8 format. 187 as_utf8: Produce text output in UTF8 format.
265 as_one_line: Don't introduce newlines between fields. 188 as_one_line: Don't introduce newlines between fields.
266 pointy_brackets: If True, use angle brackets instead of curly braces for 189 pointy_brackets: If True, use angle brackets instead of curly braces for
267 nesting. 190 nesting.
268 use_index_order: If True, print fields of a proto message using the order 191 use_index_order: If True, print fields of a proto message using the order
269 defined in source code instead of the field number. By default, use the 192 defined in source code instead of the field number. By default, use the
270 field number order. 193 field number order.
271 float_format: If set, use this to specify floating point number formatting 194 float_format: If set, use this to specify floating point number formatting
272 (per the "Format Specification Mini-Language"); otherwise, str() is 195 (per the "Format Specification Mini-Language"); otherwise, str() is
273 used. 196 used.
274 use_field_number: If True, print field numbers instead of names. 197 use_field_number: If True, print field numbers instead of names.
275 descriptor_pool: A DescriptorPool used to resolve Any types.
276 """ 198 """
277 self.out = out 199 self.out = out
278 self.indent = indent 200 self.indent = indent
279 self.as_utf8 = as_utf8 201 self.as_utf8 = as_utf8
280 self.as_one_line = as_one_line 202 self.as_one_line = as_one_line
281 self.pointy_brackets = pointy_brackets 203 self.pointy_brackets = pointy_brackets
282 self.use_index_order = use_index_order 204 self.use_index_order = use_index_order
283 self.float_format = float_format 205 self.float_format = float_format
284 self.use_field_number = use_field_number 206 self.use_field_number = use_field_number
285 self.descriptor_pool = descriptor_pool
286
287 def _TryPrintAsAnyMessage(self, message):
288 """Serializes if message is a google.protobuf.Any field."""
289 packed_message = _BuildMessageFromTypeName(message.TypeName(),
290 self.descriptor_pool)
291 if packed_message:
292 packed_message.MergeFromString(message.value)
293 self.out.write('%s[%s]' % (self.indent * ' ', message.type_url))
294 self._PrintMessageFieldValue(packed_message)
295 self.out.write(' ' if self.as_one_line else '\n')
296 return True
297 else:
298 return False
299 207
300 def PrintMessage(self, message): 208 def PrintMessage(self, message):
301 """Convert protobuf message to text format. 209 """Convert protobuf message to text format.
302 210
303 Args: 211 Args:
304 message: The protocol buffers message. 212 message: The protocol buffers message.
305 """ 213 """
306 if (message.DESCRIPTOR.full_name == _ANY_FULL_TYPE_NAME and
307 self.descriptor_pool and self._TryPrintAsAnyMessage(message)):
308 return
309 fields = message.ListFields() 214 fields = message.ListFields()
310 if self.use_index_order: 215 if self.use_index_order:
311 fields.sort(key=lambda x: x[0].index) 216 fields.sort(key=lambda x: x[0].index)
312 for field, value in fields: 217 for field, value in fields:
313 if _IsMapEntry(field): 218 if _IsMapEntry(field):
314 for key in sorted(value): 219 for key in sorted(value):
315 # This is slow for maps with submessage entires because it copies the 220 # This is slow for maps with submessage entires because it copies the
316 # entire tree. Unfortunately this would take significant refactoring 221 # entire tree. Unfortunately this would take significant refactoring
317 # of this file to work around. 222 # of this file to work around.
318 # 223 #
319 # TODO(haberman): refactor and optimize if this becomes an issue. 224 # TODO(haberman): refactor and optimize if this becomes an issue.
320 entry_submsg = value.GetEntryClass()(key=key, value=value[key]) 225 entry_submsg = field.message_type._concrete_class(
226 key=key, value=value[key])
321 self.PrintField(field, entry_submsg) 227 self.PrintField(field, entry_submsg)
322 elif field.label == descriptor.FieldDescriptor.LABEL_REPEATED: 228 elif field.label == descriptor.FieldDescriptor.LABEL_REPEATED:
323 for element in value: 229 for element in value:
324 self.PrintField(field, element) 230 self.PrintField(field, element)
325 else: 231 else:
326 self.PrintField(field, value) 232 self.PrintField(field, value)
327 233
328 def PrintField(self, field, value): 234 def PrintField(self, field, value):
329 """Print a single field name/value pair.""" 235 """Print a single field name/value pair."""
330 out = self.out 236 out = self.out
(...skipping 20 matching lines...) Expand all
351 # The colon is optional in this case, but our cross-language golden files 257 # The colon is optional in this case, but our cross-language golden files
352 # don't include it. 258 # don't include it.
353 out.write(': ') 259 out.write(': ')
354 260
355 self.PrintFieldValue(field, value) 261 self.PrintFieldValue(field, value)
356 if self.as_one_line: 262 if self.as_one_line:
357 out.write(' ') 263 out.write(' ')
358 else: 264 else:
359 out.write('\n') 265 out.write('\n')
360 266
361 def _PrintMessageFieldValue(self, value):
362 if self.pointy_brackets:
363 openb = '<'
364 closeb = '>'
365 else:
366 openb = '{'
367 closeb = '}'
368
369 if self.as_one_line:
370 self.out.write(' %s ' % openb)
371 self.PrintMessage(value)
372 self.out.write(closeb)
373 else:
374 self.out.write(' %s\n' % openb)
375 self.indent += 2
376 self.PrintMessage(value)
377 self.indent -= 2
378 self.out.write(' ' * self.indent + closeb)
379
380 def PrintFieldValue(self, field, value): 267 def PrintFieldValue(self, field, value):
381 """Print a single field value (not including name). 268 """Print a single field value (not including name).
382 269
383 For repeated fields, the value should be a single element. 270 For repeated fields, the value should be a single element.
384 271
385 Args: 272 Args:
386 field: The descriptor of the field to be printed. 273 field: The descriptor of the field to be printed.
387 value: The value of the field. 274 value: The value of the field.
388 """ 275 """
389 out = self.out 276 out = self.out
277 if self.pointy_brackets:
278 openb = '<'
279 closeb = '>'
280 else:
281 openb = '{'
282 closeb = '}'
283
390 if field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_MESSAGE: 284 if field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_MESSAGE:
391 self._PrintMessageFieldValue(value) 285 if self.as_one_line:
286 out.write(' %s ' % openb)
287 self.PrintMessage(value)
288 out.write(closeb)
289 else:
290 out.write(' %s\n' % openb)
291 self.indent += 2
292 self.PrintMessage(value)
293 self.indent -= 2
294 out.write(' ' * self.indent + closeb)
392 elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_ENUM: 295 elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_ENUM:
393 enum_value = field.enum_type.values_by_number.get(value, None) 296 enum_value = field.enum_type.values_by_number.get(value, None)
394 if enum_value is not None: 297 if enum_value is not None:
395 out.write(enum_value.name) 298 out.write(enum_value.name)
396 else: 299 else:
397 out.write(str(value)) 300 out.write(str(value))
398 elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_STRING: 301 elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_STRING:
399 out.write('\"') 302 out.write('\"')
400 if isinstance(value, six.text_type): 303 if isinstance(value, six.text_type):
401 out_value = value.encode('utf-8') 304 out_value = value.encode('utf-8')
(...skipping 10 matching lines...) Expand all
412 if value: 315 if value:
413 out.write('true') 316 out.write('true')
414 else: 317 else:
415 out.write('false') 318 out.write('false')
416 elif field.cpp_type in _FLOAT_TYPES and self.float_format is not None: 319 elif field.cpp_type in _FLOAT_TYPES and self.float_format is not None:
417 out.write('{1:{0}}'.format(self.float_format, value)) 320 out.write('{1:{0}}'.format(self.float_format, value))
418 else: 321 else:
419 out.write(str(value)) 322 out.write(str(value))
420 323
421 324
422 def Parse(text, 325 def Parse(text, message,
423 message, 326 allow_unknown_extension=False, allow_field_number=False):
424 allow_unknown_extension=False, 327 """Parses an text representation of a protocol message into a message.
425 allow_field_number=False):
426 """Parses a text representation of a protocol message into a message.
427 328
428 Args: 329 Args:
429 text: Message text representation. 330 text: Message text representation.
430 message: A protocol buffer message to merge into. 331 message: A protocol buffer message to merge into.
431 allow_unknown_extension: if True, skip over missing extensions and keep 332 allow_unknown_extension: if True, skip over missing extensions and keep
432 parsing 333 parsing
433 allow_field_number: if True, both field number and field name are allowed. 334 allow_field_number: if True, both field number and field name are allowed.
434 335
435 Returns: 336 Returns:
436 The same message passed as argument. 337 The same message passed as argument.
437 338
438 Raises: 339 Raises:
439 ParseError: On text parsing problems. 340 ParseError: On text parsing problems.
440 """ 341 """
441 if not isinstance(text, str): 342 if not isinstance(text, str):
442 text = text.decode('utf-8') 343 text = text.decode('utf-8')
443 return ParseLines( 344 return ParseLines(text.split('\n'), message, allow_unknown_extension,
444 text.split('\n'), message, allow_unknown_extension, allow_field_number) 345 allow_field_number)
445 346
446 347
447 def Merge(text, 348 def Merge(text, message, allow_unknown_extension=False,
448 message, 349 allow_field_number=False):
449 allow_unknown_extension=False, 350 """Parses an text representation of a protocol message into a message.
450 allow_field_number=False,
451 descriptor_pool=None):
452 """Parses a text representation of a protocol message into a message.
453 351
454 Like Parse(), but allows repeated values for a non-repeated field, and uses 352 Like Parse(), but allows repeated values for a non-repeated field, and uses
455 the last one. 353 the last one.
456 354
457 Args: 355 Args:
458 text: Message text representation. 356 text: Message text representation.
459 message: A protocol buffer message to merge into. 357 message: A protocol buffer message to merge into.
460 allow_unknown_extension: if True, skip over missing extensions and keep 358 allow_unknown_extension: if True, skip over missing extensions and keep
461 parsing 359 parsing
462 allow_field_number: if True, both field number and field name are allowed. 360 allow_field_number: if True, both field number and field name are allowed.
463 descriptor_pool: A DescriptorPool used to resolve Any types.
464 361
465 Returns: 362 Returns:
466 The same message passed as argument. 363 The same message passed as argument.
467 364
468 Raises: 365 Raises:
469 ParseError: On text parsing problems. 366 ParseError: On text parsing problems.
470 """ 367 """
471 return MergeLines( 368 return MergeLines(text.split('\n'), message, allow_unknown_extension,
472 text.split('\n'), 369 allow_field_number)
473 message,
474 allow_unknown_extension,
475 allow_field_number,
476 descriptor_pool=descriptor_pool)
477 370
478 371
479 def ParseLines(lines, 372 def ParseLines(lines, message, allow_unknown_extension=False,
480 message,
481 allow_unknown_extension=False,
482 allow_field_number=False): 373 allow_field_number=False):
483 """Parses a text representation of a protocol message into a message. 374 """Parses an text representation of a protocol message into a message.
484 375
485 Args: 376 Args:
486 lines: An iterable of lines of a message's text representation. 377 lines: An iterable of lines of a message's text representation.
487 message: A protocol buffer message to merge into. 378 message: A protocol buffer message to merge into.
488 allow_unknown_extension: if True, skip over missing extensions and keep 379 allow_unknown_extension: if True, skip over missing extensions and keep
489 parsing 380 parsing
490 allow_field_number: if True, both field number and field name are allowed. 381 allow_field_number: if True, both field number and field name are allowed.
491 descriptor_pool: A DescriptorPool used to resolve Any types.
492 382
493 Returns: 383 Returns:
494 The same message passed as argument. 384 The same message passed as argument.
495 385
496 Raises: 386 Raises:
497 ParseError: On text parsing problems. 387 ParseError: On text parsing problems.
498 """ 388 """
499 parser = _Parser(allow_unknown_extension, allow_field_number) 389 parser = _Parser(allow_unknown_extension, allow_field_number)
500 return parser.ParseLines(lines, message) 390 return parser.ParseLines(lines, message)
501 391
502 392
503 def MergeLines(lines, 393 def MergeLines(lines, message, allow_unknown_extension=False,
504 message, 394 allow_field_number=False):
505 allow_unknown_extension=False, 395 """Parses an text representation of a protocol message into a message.
506 allow_field_number=False,
507 descriptor_pool=None):
508 """Parses a text representation of a protocol message into a message.
509 396
510 Args: 397 Args:
511 lines: An iterable of lines of a message's text representation. 398 lines: An iterable of lines of a message's text representation.
512 message: A protocol buffer message to merge into. 399 message: A protocol buffer message to merge into.
513 allow_unknown_extension: if True, skip over missing extensions and keep 400 allow_unknown_extension: if True, skip over missing extensions and keep
514 parsing 401 parsing
515 allow_field_number: if True, both field number and field name are allowed. 402 allow_field_number: if True, both field number and field name are allowed.
516 403
517 Returns: 404 Returns:
518 The same message passed as argument. 405 The same message passed as argument.
519 406
520 Raises: 407 Raises:
521 ParseError: On text parsing problems. 408 ParseError: On text parsing problems.
522 """ 409 """
523 parser = _Parser(allow_unknown_extension, 410 parser = _Parser(allow_unknown_extension, allow_field_number)
524 allow_field_number,
525 descriptor_pool=descriptor_pool)
526 return parser.MergeLines(lines, message) 411 return parser.MergeLines(lines, message)
527 412
528 413
529 class _Parser(object): 414 class _Parser(object):
530 """Text format parser for protocol message.""" 415 """Text format parser for protocol message."""
531 416
532 def __init__(self, 417 def __init__(self, allow_unknown_extension=False, allow_field_number=False):
533 allow_unknown_extension=False,
534 allow_field_number=False,
535 descriptor_pool=None):
536 self.allow_unknown_extension = allow_unknown_extension 418 self.allow_unknown_extension = allow_unknown_extension
537 self.allow_field_number = allow_field_number 419 self.allow_field_number = allow_field_number
538 self.descriptor_pool = descriptor_pool
539 420
540 def ParseFromString(self, text, message): 421 def ParseFromString(self, text, message):
541 """Parses a text representation of a protocol message into a message.""" 422 """Parses an text representation of a protocol message into a message."""
542 if not isinstance(text, str): 423 if not isinstance(text, str):
543 text = text.decode('utf-8') 424 text = text.decode('utf-8')
544 return self.ParseLines(text.split('\n'), message) 425 return self.ParseLines(text.split('\n'), message)
545 426
546 def ParseLines(self, lines, message): 427 def ParseLines(self, lines, message):
547 """Parses a text representation of a protocol message into a message.""" 428 """Parses an text representation of a protocol message into a message."""
548 self._allow_multiple_scalars = False 429 self._allow_multiple_scalars = False
549 self._ParseOrMerge(lines, message) 430 self._ParseOrMerge(lines, message)
550 return message 431 return message
551 432
552 def MergeFromString(self, text, message): 433 def MergeFromString(self, text, message):
553 """Merges a text representation of a protocol message into a message.""" 434 """Merges an text representation of a protocol message into a message."""
554 return self._MergeLines(text.split('\n'), message) 435 return self._MergeLines(text.split('\n'), message)
555 436
556 def MergeLines(self, lines, message): 437 def MergeLines(self, lines, message):
557 """Merges a text representation of a protocol message into a message.""" 438 """Merges an text representation of a protocol message into a message."""
558 self._allow_multiple_scalars = True 439 self._allow_multiple_scalars = True
559 self._ParseOrMerge(lines, message) 440 self._ParseOrMerge(lines, message)
560 return message 441 return message
561 442
562 def _ParseOrMerge(self, lines, message): 443 def _ParseOrMerge(self, lines, message):
563 """Converts a text representation of a protocol message into a message. 444 """Converts an text representation of a protocol message into a message.
564 445
565 Args: 446 Args:
566 lines: Lines of a message's text representation. 447 lines: Lines of a message's text representation.
567 message: A protocol buffer message to merge into. 448 message: A protocol buffer message to merge into.
568 449
569 Raises: 450 Raises:
570 ParseError: On text parsing problems. 451 ParseError: On text parsing problems.
571 """ 452 """
572 tokenizer = Tokenizer(lines) 453 tokenizer = _Tokenizer(lines)
573 while not tokenizer.AtEnd(): 454 while not tokenizer.AtEnd():
574 self._MergeField(tokenizer, message) 455 self._MergeField(tokenizer, message)
575 456
576 def _MergeField(self, tokenizer, message): 457 def _MergeField(self, tokenizer, message):
577 """Merges a single protocol message field into a message. 458 """Merges a single protocol message field into a message.
578 459
579 Args: 460 Args:
580 tokenizer: A tokenizer to parse the field name and values. 461 tokenizer: A tokenizer to parse the field name and values.
581 message: A protocol message to record the data. 462 message: A protocol message to record the data.
582 463
(...skipping 20 matching lines...) Expand all
603 field = message.Extensions._FindExtensionByName(name) 484 field = message.Extensions._FindExtensionByName(name)
604 # pylint: enable=protected-access 485 # pylint: enable=protected-access
605 if not field: 486 if not field:
606 if self.allow_unknown_extension: 487 if self.allow_unknown_extension:
607 field = None 488 field = None
608 else: 489 else:
609 raise tokenizer.ParseErrorPreviousToken( 490 raise tokenizer.ParseErrorPreviousToken(
610 'Extension "%s" not registered.' % name) 491 'Extension "%s" not registered.' % name)
611 elif message_descriptor != field.containing_type: 492 elif message_descriptor != field.containing_type:
612 raise tokenizer.ParseErrorPreviousToken( 493 raise tokenizer.ParseErrorPreviousToken(
613 'Extension "%s" does not extend message type "%s".' % 494 'Extension "%s" does not extend message type "%s".' % (
614 (name, message_descriptor.full_name)) 495 name, message_descriptor.full_name))
615 496
616 tokenizer.Consume(']') 497 tokenizer.Consume(']')
617 498
618 else: 499 else:
619 name = tokenizer.ConsumeIdentifierOrNumber() 500 name = tokenizer.ConsumeIdentifier()
620 if self.allow_field_number and name.isdigit(): 501 if self.allow_field_number and name.isdigit():
621 number = ParseInteger(name, True, True) 502 number = ParseInteger(name, True, True)
622 field = message_descriptor.fields_by_number.get(number, None) 503 field = message_descriptor.fields_by_number.get(number, None)
623 if not field and message_descriptor.is_extendable: 504 if not field and message_descriptor.is_extendable:
624 field = message.Extensions._FindExtensionByNumber(number) 505 field = message.Extensions._FindExtensionByNumber(number)
625 else: 506 else:
626 field = message_descriptor.fields_by_name.get(name, None) 507 field = message_descriptor.fields_by_name.get(name, None)
627 508
628 # Group names are expected to be capitalized as they appear in the 509 # Group names are expected to be capitalized as they appear in the
629 # .proto file, which actually matches their type names, not their field 510 # .proto file, which actually matches their type names, not their field
630 # names. 511 # names.
631 if not field: 512 if not field:
632 field = message_descriptor.fields_by_name.get(name.lower(), None) 513 field = message_descriptor.fields_by_name.get(name.lower(), None)
633 if field and field.type != descriptor.FieldDescriptor.TYPE_GROUP: 514 if field and field.type != descriptor.FieldDescriptor.TYPE_GROUP:
634 field = None 515 field = None
635 516
636 if (field and field.type == descriptor.FieldDescriptor.TYPE_GROUP and 517 if (field and field.type == descriptor.FieldDescriptor.TYPE_GROUP and
637 field.message_type.name != name): 518 field.message_type.name != name):
638 field = None 519 field = None
639 520
640 if not field: 521 if not field:
641 raise tokenizer.ParseErrorPreviousToken( 522 raise tokenizer.ParseErrorPreviousToken(
642 'Message type "%s" has no field named "%s".' % 523 'Message type "%s" has no field named "%s".' % (
643 (message_descriptor.full_name, name)) 524 message_descriptor.full_name, name))
644 525
645 if field: 526 if field:
646 if not self._allow_multiple_scalars and field.containing_oneof: 527 if not self._allow_multiple_scalars and field.containing_oneof:
647 # Check if there's a different field set in this oneof. 528 # Check if there's a different field set in this oneof.
648 # Note that we ignore the case if the same field was set before, and we 529 # Note that we ignore the case if the same field was set before, and we
649 # apply _allow_multiple_scalars to non-scalar fields as well. 530 # apply _allow_multiple_scalars to non-scalar fields as well.
650 which_oneof = message.WhichOneof(field.containing_oneof.name) 531 which_oneof = message.WhichOneof(field.containing_oneof.name)
651 if which_oneof is not None and which_oneof != field.name: 532 if which_oneof is not None and which_oneof != field.name:
652 raise tokenizer.ParseErrorPreviousToken( 533 raise tokenizer.ParseErrorPreviousToken(
653 'Field "%s" is specified along with field "%s", another member ' 534 'Field "%s" is specified along with field "%s", another member '
654 'of oneof "%s" for message type "%s".' % 535 'of oneof "%s" for message type "%s".' % (
655 (field.name, which_oneof, field.containing_oneof.name, 536 field.name, which_oneof, field.containing_oneof.name,
656 message_descriptor.full_name)) 537 message_descriptor.full_name))
657 538
658 if field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_MESSAGE: 539 if field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_MESSAGE:
659 tokenizer.TryConsume(':') 540 tokenizer.TryConsume(':')
660 merger = self._MergeMessageField 541 merger = self._MergeMessageField
661 else: 542 else:
662 tokenizer.Consume(':') 543 tokenizer.Consume(':')
663 merger = self._MergeScalarField 544 merger = self._MergeScalarField
664 545
665 if (field.label == descriptor.FieldDescriptor.LABEL_REPEATED and 546 if (field.label == descriptor.FieldDescriptor.LABEL_REPEATED
666 tokenizer.TryConsume('[')): 547 and tokenizer.TryConsume('[')):
667 # Short repeated format, e.g. "foo: [1, 2, 3]" 548 # Short repeated format, e.g. "foo: [1, 2, 3]"
668 while True: 549 while True:
669 merger(tokenizer, message, field) 550 merger(tokenizer, message, field)
670 if tokenizer.TryConsume(']'): 551 if tokenizer.TryConsume(']'): break
671 break
672 tokenizer.Consume(',') 552 tokenizer.Consume(',')
673 553
674 else: 554 else:
675 merger(tokenizer, message, field) 555 merger(tokenizer, message, field)
676 556
677 else: # Proto field is unknown. 557 else: # Proto field is unknown.
678 assert self.allow_unknown_extension 558 assert self.allow_unknown_extension
679 _SkipFieldContents(tokenizer) 559 _SkipFieldContents(tokenizer)
680 560
681 # For historical reasons, fields may optionally be separated by commas or 561 # For historical reasons, fields may optionally be separated by commas or
682 # semicolons. 562 # semicolons.
683 if not tokenizer.TryConsume(','): 563 if not tokenizer.TryConsume(','):
684 tokenizer.TryConsume(';') 564 tokenizer.TryConsume(';')
685 565
686 def _ConsumeAnyTypeUrl(self, tokenizer):
687 """Consumes a google.protobuf.Any type URL and returns the type name."""
688 # Consume "type.googleapis.com/".
689 tokenizer.ConsumeIdentifier()
690 tokenizer.Consume('.')
691 tokenizer.ConsumeIdentifier()
692 tokenizer.Consume('.')
693 tokenizer.ConsumeIdentifier()
694 tokenizer.Consume('/')
695 # Consume the fully-qualified type name.
696 name = [tokenizer.ConsumeIdentifier()]
697 while tokenizer.TryConsume('.'):
698 name.append(tokenizer.ConsumeIdentifier())
699 return '.'.join(name)
700
701 def _MergeMessageField(self, tokenizer, message, field): 566 def _MergeMessageField(self, tokenizer, message, field):
702 """Merges a single scalar field into a message. 567 """Merges a single scalar field into a message.
703 568
704 Args: 569 Args:
705 tokenizer: A tokenizer to parse the field value. 570 tokenizer: A tokenizer to parse the field value.
706 message: The message of which field is a member. 571 message: The message of which field is a member.
707 field: The descriptor of the field to be merged. 572 field: The descriptor of the field to be merged.
708 573
709 Raises: 574 Raises:
710 ParseError: In case of text parsing problems. 575 ParseError: In case of text parsing problems.
711 """ 576 """
712 is_map_entry = _IsMapEntry(field) 577 is_map_entry = _IsMapEntry(field)
713 578
714 if tokenizer.TryConsume('<'): 579 if tokenizer.TryConsume('<'):
715 end_token = '>' 580 end_token = '>'
716 else: 581 else:
717 tokenizer.Consume('{') 582 tokenizer.Consume('{')
718 end_token = '}' 583 end_token = '}'
719 584
720 if (field.message_type.full_name == _ANY_FULL_TYPE_NAME and 585 if field.label == descriptor.FieldDescriptor.LABEL_REPEATED:
721 tokenizer.TryConsume('[')):
722 packed_type_name = self._ConsumeAnyTypeUrl(tokenizer)
723 tokenizer.Consume(']')
724 tokenizer.TryConsume(':')
725 if tokenizer.TryConsume('<'):
726 expanded_any_end_token = '>'
727 else:
728 tokenizer.Consume('{')
729 expanded_any_end_token = '}'
730 if not self.descriptor_pool:
731 raise ParseError('Descriptor pool required to parse expanded Any field')
732 expanded_any_sub_message = _BuildMessageFromTypeName(packed_type_name,
733 self.descriptor_pool)
734 if not expanded_any_sub_message:
735 raise ParseError('Type %s not found in descriptor pool' %
736 packed_type_name)
737 while not tokenizer.TryConsume(expanded_any_end_token):
738 if tokenizer.AtEnd():
739 raise tokenizer.ParseErrorPreviousToken('Expected "%s".' %
740 (expanded_any_end_token,))
741 self._MergeField(tokenizer, expanded_any_sub_message)
742 if field.label == descriptor.FieldDescriptor.LABEL_REPEATED:
743 any_message = getattr(message, field.name).add()
744 else:
745 any_message = getattr(message, field.name)
746 any_message.Pack(expanded_any_sub_message)
747 elif field.label == descriptor.FieldDescriptor.LABEL_REPEATED:
748 if field.is_extension: 586 if field.is_extension:
749 sub_message = message.Extensions[field].add() 587 sub_message = message.Extensions[field].add()
750 elif is_map_entry: 588 elif is_map_entry:
751 sub_message = getattr(message, field.name).GetEntryClass()() 589 # pylint: disable=protected-access
590 sub_message = field.message_type._concrete_class()
752 else: 591 else:
753 sub_message = getattr(message, field.name).add() 592 sub_message = getattr(message, field.name).add()
754 else: 593 else:
755 if field.is_extension: 594 if field.is_extension:
756 sub_message = message.Extensions[field] 595 sub_message = message.Extensions[field]
757 else: 596 else:
758 sub_message = getattr(message, field.name) 597 sub_message = getattr(message, field.name)
759 sub_message.SetInParent() 598 sub_message.SetInParent()
760 599
761 while not tokenizer.TryConsume(end_token): 600 while not tokenizer.TryConsume(end_token):
(...skipping 20 matching lines...) Expand all
782 Raises: 621 Raises:
783 ParseError: In case of text parsing problems. 622 ParseError: In case of text parsing problems.
784 RuntimeError: On runtime errors. 623 RuntimeError: On runtime errors.
785 """ 624 """
786 _ = self.allow_unknown_extension 625 _ = self.allow_unknown_extension
787 value = None 626 value = None
788 627
789 if field.type in (descriptor.FieldDescriptor.TYPE_INT32, 628 if field.type in (descriptor.FieldDescriptor.TYPE_INT32,
790 descriptor.FieldDescriptor.TYPE_SINT32, 629 descriptor.FieldDescriptor.TYPE_SINT32,
791 descriptor.FieldDescriptor.TYPE_SFIXED32): 630 descriptor.FieldDescriptor.TYPE_SFIXED32):
792 value = _ConsumeInt32(tokenizer) 631 value = tokenizer.ConsumeInt32()
793 elif field.type in (descriptor.FieldDescriptor.TYPE_INT64, 632 elif field.type in (descriptor.FieldDescriptor.TYPE_INT64,
794 descriptor.FieldDescriptor.TYPE_SINT64, 633 descriptor.FieldDescriptor.TYPE_SINT64,
795 descriptor.FieldDescriptor.TYPE_SFIXED64): 634 descriptor.FieldDescriptor.TYPE_SFIXED64):
796 value = _ConsumeInt64(tokenizer) 635 value = tokenizer.ConsumeInt64()
797 elif field.type in (descriptor.FieldDescriptor.TYPE_UINT32, 636 elif field.type in (descriptor.FieldDescriptor.TYPE_UINT32,
798 descriptor.FieldDescriptor.TYPE_FIXED32): 637 descriptor.FieldDescriptor.TYPE_FIXED32):
799 value = _ConsumeUint32(tokenizer) 638 value = tokenizer.ConsumeUint32()
800 elif field.type in (descriptor.FieldDescriptor.TYPE_UINT64, 639 elif field.type in (descriptor.FieldDescriptor.TYPE_UINT64,
801 descriptor.FieldDescriptor.TYPE_FIXED64): 640 descriptor.FieldDescriptor.TYPE_FIXED64):
802 value = _ConsumeUint64(tokenizer) 641 value = tokenizer.ConsumeUint64()
803 elif field.type in (descriptor.FieldDescriptor.TYPE_FLOAT, 642 elif field.type in (descriptor.FieldDescriptor.TYPE_FLOAT,
804 descriptor.FieldDescriptor.TYPE_DOUBLE): 643 descriptor.FieldDescriptor.TYPE_DOUBLE):
805 value = tokenizer.ConsumeFloat() 644 value = tokenizer.ConsumeFloat()
806 elif field.type == descriptor.FieldDescriptor.TYPE_BOOL: 645 elif field.type == descriptor.FieldDescriptor.TYPE_BOOL:
807 value = tokenizer.ConsumeBool() 646 value = tokenizer.ConsumeBool()
808 elif field.type == descriptor.FieldDescriptor.TYPE_STRING: 647 elif field.type == descriptor.FieldDescriptor.TYPE_STRING:
809 value = tokenizer.ConsumeString() 648 value = tokenizer.ConsumeString()
810 elif field.type == descriptor.FieldDescriptor.TYPE_BYTES: 649 elif field.type == descriptor.FieldDescriptor.TYPE_BYTES:
811 value = tokenizer.ConsumeByteString() 650 value = tokenizer.ConsumeByteString()
812 elif field.type == descriptor.FieldDescriptor.TYPE_ENUM: 651 elif field.type == descriptor.FieldDescriptor.TYPE_ENUM:
(...skipping 94 matching lines...) Expand 10 before | Expand all | Expand 10 after
907 ParseError: In case an invalid field value is found. 746 ParseError: In case an invalid field value is found.
908 """ 747 """
909 # String/bytes tokens can come in multiple adjacent string literals. 748 # String/bytes tokens can come in multiple adjacent string literals.
910 # If we can consume one, consume as many as we can. 749 # If we can consume one, consume as many as we can.
911 if tokenizer.TryConsumeByteString(): 750 if tokenizer.TryConsumeByteString():
912 while tokenizer.TryConsumeByteString(): 751 while tokenizer.TryConsumeByteString():
913 pass 752 pass
914 return 753 return
915 754
916 if (not tokenizer.TryConsumeIdentifier() and 755 if (not tokenizer.TryConsumeIdentifier() and
917 not _TryConsumeInt64(tokenizer) and not _TryConsumeUint64(tokenizer) and 756 not tokenizer.TryConsumeInt64() and
757 not tokenizer.TryConsumeUint64() and
918 not tokenizer.TryConsumeFloat()): 758 not tokenizer.TryConsumeFloat()):
919 raise ParseError('Invalid field value: ' + tokenizer.token) 759 raise ParseError('Invalid field value: ' + tokenizer.token)
920 760
921 761
922 class Tokenizer(object): 762 class _Tokenizer(object):
923 """Protocol buffer text representation tokenizer. 763 """Protocol buffer text representation tokenizer.
924 764
925 This class handles the lower level string parsing by splitting it into 765 This class handles the lower level string parsing by splitting it into
926 meaningful tokens. 766 meaningful tokens.
927 767
928 It was directly ported from the Java protocol buffer API. 768 It was directly ported from the Java protocol buffer API.
929 """ 769 """
930 770
931 _WHITESPACE = re.compile(r'\s+') 771 _WHITESPACE = re.compile('(\\s|(#.*$))+', re.MULTILINE)
932 _COMMENT = re.compile(r'(\s*#.*$)', re.MULTILINE)
933 _WHITESPACE_OR_COMMENT = re.compile(r'(\s|(#.*$))+', re.MULTILINE)
934 _TOKEN = re.compile('|'.join([ 772 _TOKEN = re.compile('|'.join([
935 r'[a-zA-Z_][0-9a-zA-Z_+-]*', # an identifier 773 r'[a-zA-Z_][0-9a-zA-Z_+-]*', # an identifier
936 r'([0-9+-]|(\.[0-9]))[0-9a-zA-Z_.+-]*', # a number 774 r'([0-9+-]|(\.[0-9]))[0-9a-zA-Z_.+-]*', # a number
937 ] + [ # quoted str for each quote mark 775 ] + [ # quoted str for each quote mark
938 r'{qt}([^{qt}\n\\]|\\.)*({qt}|\\?$)'.format(qt=mark) for mark in _QUOTES 776 r'{qt}([^{qt}\n\\]|\\.)*({qt}|\\?$)'.format(qt=mark) for mark in _QUOTES
939 ])) 777 ]))
940 778
941 _IDENTIFIER = re.compile(r'[^\d\W]\w*') 779 _IDENTIFIER = re.compile(r'\w+')
942 _IDENTIFIER_OR_NUMBER = re.compile(r'\w+')
943 780
944 def __init__(self, lines, skip_comments=True): 781 def __init__(self, lines):
945 self._position = 0 782 self._position = 0
946 self._line = -1 783 self._line = -1
947 self._column = 0 784 self._column = 0
948 self._token_start = None 785 self._token_start = None
949 self.token = '' 786 self.token = ''
950 self._lines = iter(lines) 787 self._lines = iter(lines)
951 self._current_line = '' 788 self._current_line = ''
952 self._previous_line = 0 789 self._previous_line = 0
953 self._previous_column = 0 790 self._previous_column = 0
954 self._more_lines = True 791 self._more_lines = True
955 self._skip_comments = skip_comments
956 self._whitespace_pattern = (skip_comments and self._WHITESPACE_OR_COMMENT
957 or self._WHITESPACE)
958 self._SkipWhitespace() 792 self._SkipWhitespace()
959 self.NextToken() 793 self.NextToken()
960 794
961 def LookingAt(self, token): 795 def LookingAt(self, token):
962 return self.token == token 796 return self.token == token
963 797
964 def AtEnd(self): 798 def AtEnd(self):
965 """Checks the end of the text was reached. 799 """Checks the end of the text was reached.
966 800
967 Returns: 801 Returns:
968 True iff the end was reached. 802 True iff the end was reached.
969 """ 803 """
970 return not self.token 804 return not self.token
971 805
972 def _PopLine(self): 806 def _PopLine(self):
973 while len(self._current_line) <= self._column: 807 while len(self._current_line) <= self._column:
974 try: 808 try:
975 self._current_line = next(self._lines) 809 self._current_line = next(self._lines)
976 except StopIteration: 810 except StopIteration:
977 self._current_line = '' 811 self._current_line = ''
978 self._more_lines = False 812 self._more_lines = False
979 return 813 return
980 else: 814 else:
981 self._line += 1 815 self._line += 1
982 self._column = 0 816 self._column = 0
983 817
984 def _SkipWhitespace(self): 818 def _SkipWhitespace(self):
985 while True: 819 while True:
986 self._PopLine() 820 self._PopLine()
987 match = self._whitespace_pattern.match(self._current_line, self._column) 821 match = self._WHITESPACE.match(self._current_line, self._column)
988 if not match: 822 if not match:
989 break 823 break
990 length = len(match.group(0)) 824 length = len(match.group(0))
991 self._column += length 825 self._column += length
992 826
993 def TryConsume(self, token): 827 def TryConsume(self, token):
994 """Tries to consume a given piece of text. 828 """Tries to consume a given piece of text.
995 829
996 Args: 830 Args:
997 token: Text to consume. 831 token: Text to consume.
998 832
999 Returns: 833 Returns:
1000 True iff the text was consumed. 834 True iff the text was consumed.
1001 """ 835 """
1002 if self.token == token: 836 if self.token == token:
1003 self.NextToken() 837 self.NextToken()
1004 return True 838 return True
1005 return False 839 return False
1006 840
1007 def Consume(self, token): 841 def Consume(self, token):
1008 """Consumes a piece of text. 842 """Consumes a piece of text.
1009 843
1010 Args: 844 Args:
1011 token: Text to consume. 845 token: Text to consume.
1012 846
1013 Raises: 847 Raises:
1014 ParseError: If the text couldn't be consumed. 848 ParseError: If the text couldn't be consumed.
1015 """ 849 """
1016 if not self.TryConsume(token): 850 if not self.TryConsume(token):
1017 raise self.ParseError('Expected "%s".' % token) 851 raise self._ParseError('Expected "%s".' % token)
1018
1019 def ConsumeComment(self):
1020 result = self.token
1021 if not self._COMMENT.match(result):
1022 raise self.ParseError('Expected comment.')
1023 self.NextToken()
1024 return result
1025 852
1026 def TryConsumeIdentifier(self): 853 def TryConsumeIdentifier(self):
1027 try: 854 try:
1028 self.ConsumeIdentifier() 855 self.ConsumeIdentifier()
1029 return True 856 return True
1030 except ParseError: 857 except ParseError:
1031 return False 858 return False
1032 859
1033 def ConsumeIdentifier(self): 860 def ConsumeIdentifier(self):
1034 """Consumes protocol message field identifier. 861 """Consumes protocol message field identifier.
1035 862
1036 Returns: 863 Returns:
1037 Identifier string. 864 Identifier string.
1038 865
1039 Raises: 866 Raises:
1040 ParseError: If an identifier couldn't be consumed. 867 ParseError: If an identifier couldn't be consumed.
1041 """ 868 """
1042 result = self.token 869 result = self.token
1043 if not self._IDENTIFIER.match(result): 870 if not self._IDENTIFIER.match(result):
1044 raise self.ParseError('Expected identifier.') 871 raise self._ParseError('Expected identifier.')
1045 self.NextToken() 872 self.NextToken()
1046 return result 873 return result
1047 874
1048 def TryConsumeIdentifierOrNumber(self): 875 def ConsumeInt32(self):
876 """Consumes a signed 32bit integer number.
877
878 Returns:
879 The integer parsed.
880
881 Raises:
882 ParseError: If a signed 32bit integer couldn't be consumed.
883 """
1049 try: 884 try:
1050 self.ConsumeIdentifierOrNumber() 885 result = ParseInteger(self.token, is_signed=True, is_long=False)
886 except ValueError as e:
887 raise self._ParseError(str(e))
888 self.NextToken()
889 return result
890
891 def ConsumeUint32(self):
892 """Consumes an unsigned 32bit integer number.
893
894 Returns:
895 The integer parsed.
896
897 Raises:
898 ParseError: If an unsigned 32bit integer couldn't be consumed.
899 """
900 try:
901 result = ParseInteger(self.token, is_signed=False, is_long=False)
902 except ValueError as e:
903 raise self._ParseError(str(e))
904 self.NextToken()
905 return result
906
907 def TryConsumeInt64(self):
908 try:
909 self.ConsumeInt64()
1051 return True 910 return True
1052 except ParseError: 911 except ParseError:
1053 return False 912 return False
1054 913
1055 def ConsumeIdentifierOrNumber(self): 914 def ConsumeInt64(self):
1056 """Consumes protocol message field identifier. 915 """Consumes a signed 64bit integer number.
1057 916
1058 Returns: 917 Returns:
1059 Identifier string. 918 The integer parsed.
1060 919
1061 Raises: 920 Raises:
1062 ParseError: If an identifier couldn't be consumed. 921 ParseError: If a signed 64bit integer couldn't be consumed.
1063 """ 922 """
1064 result = self.token 923 try:
1065 if not self._IDENTIFIER_OR_NUMBER.match(result): 924 result = ParseInteger(self.token, is_signed=True, is_long=True)
1066 raise self.ParseError('Expected identifier or number.') 925 except ValueError as e:
926 raise self._ParseError(str(e))
1067 self.NextToken() 927 self.NextToken()
1068 return result 928 return result
1069 929
1070 def TryConsumeInteger(self): 930 def TryConsumeUint64(self):
1071 try: 931 try:
1072 # Note: is_long only affects value type, not whether an error is raised. 932 self.ConsumeUint64()
1073 self.ConsumeInteger()
1074 return True 933 return True
1075 except ParseError: 934 except ParseError:
1076 return False 935 return False
1077 936
1078 def ConsumeInteger(self, is_long=False): 937 def ConsumeUint64(self):
1079 """Consumes an integer number. 938 """Consumes an unsigned 64bit integer number.
1080 939
1081 Args:
1082 is_long: True if the value should be returned as a long integer.
1083 Returns: 940 Returns:
1084 The integer parsed. 941 The integer parsed.
1085 942
1086 Raises: 943 Raises:
1087 ParseError: If an integer couldn't be consumed. 944 ParseError: If an unsigned 64bit integer couldn't be consumed.
1088 """ 945 """
1089 try: 946 try:
1090 result = _ParseAbstractInteger(self.token, is_long=is_long) 947 result = ParseInteger(self.token, is_signed=False, is_long=True)
1091 except ValueError as e: 948 except ValueError as e:
1092 raise self.ParseError(str(e)) 949 raise self._ParseError(str(e))
1093 self.NextToken() 950 self.NextToken()
1094 return result 951 return result
1095 952
1096 def TryConsumeFloat(self): 953 def TryConsumeFloat(self):
1097 try: 954 try:
1098 self.ConsumeFloat() 955 self.ConsumeFloat()
1099 return True 956 return True
1100 except ParseError: 957 except ParseError:
1101 return False 958 return False
1102 959
1103 def ConsumeFloat(self): 960 def ConsumeFloat(self):
1104 """Consumes an floating point number. 961 """Consumes an floating point number.
1105 962
1106 Returns: 963 Returns:
1107 The number parsed. 964 The number parsed.
1108 965
1109 Raises: 966 Raises:
1110 ParseError: If a floating point number couldn't be consumed. 967 ParseError: If a floating point number couldn't be consumed.
1111 """ 968 """
1112 try: 969 try:
1113 result = ParseFloat(self.token) 970 result = ParseFloat(self.token)
1114 except ValueError as e: 971 except ValueError as e:
1115 raise self.ParseError(str(e)) 972 raise self._ParseError(str(e))
1116 self.NextToken() 973 self.NextToken()
1117 return result 974 return result
1118 975
1119 def ConsumeBool(self): 976 def ConsumeBool(self):
1120 """Consumes a boolean value. 977 """Consumes a boolean value.
1121 978
1122 Returns: 979 Returns:
1123 The bool parsed. 980 The bool parsed.
1124 981
1125 Raises: 982 Raises:
1126 ParseError: If a boolean value couldn't be consumed. 983 ParseError: If a boolean value couldn't be consumed.
1127 """ 984 """
1128 try: 985 try:
1129 result = ParseBool(self.token) 986 result = ParseBool(self.token)
1130 except ValueError as e: 987 except ValueError as e:
1131 raise self.ParseError(str(e)) 988 raise self._ParseError(str(e))
1132 self.NextToken() 989 self.NextToken()
1133 return result 990 return result
1134 991
1135 def TryConsumeByteString(self): 992 def TryConsumeByteString(self):
1136 try: 993 try:
1137 self.ConsumeByteString() 994 self.ConsumeByteString()
1138 return True 995 return True
1139 except ParseError: 996 except ParseError:
1140 return False 997 return False
1141 998
(...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after
1175 tokens which are automatically concatenated, like in C or Python. This 1032 tokens which are automatically concatenated, like in C or Python. This
1176 method only consumes one token. 1033 method only consumes one token.
1177 1034
1178 Returns: 1035 Returns:
1179 The token parsed. 1036 The token parsed.
1180 Raises: 1037 Raises:
1181 ParseError: When the wrong format data is found. 1038 ParseError: When the wrong format data is found.
1182 """ 1039 """
1183 text = self.token 1040 text = self.token
1184 if len(text) < 1 or text[0] not in _QUOTES: 1041 if len(text) < 1 or text[0] not in _QUOTES:
1185 raise self.ParseError('Expected string but found: %r' % (text,)) 1042 raise self._ParseError('Expected string but found: %r' % (text,))
1186 1043
1187 if len(text) < 2 or text[-1] != text[0]: 1044 if len(text) < 2 or text[-1] != text[0]:
1188 raise self.ParseError('String missing ending quote: %r' % (text,)) 1045 raise self._ParseError('String missing ending quote: %r' % (text,))
1189 1046
1190 try: 1047 try:
1191 result = text_encoding.CUnescape(text[1:-1]) 1048 result = text_encoding.CUnescape(text[1:-1])
1192 except ValueError as e: 1049 except ValueError as e:
1193 raise self.ParseError(str(e)) 1050 raise self._ParseError(str(e))
1194 self.NextToken() 1051 self.NextToken()
1195 return result 1052 return result
1196 1053
1197 def ConsumeEnum(self, field): 1054 def ConsumeEnum(self, field):
1198 try: 1055 try:
1199 result = ParseEnum(field, self.token) 1056 result = ParseEnum(field, self.token)
1200 except ValueError as e: 1057 except ValueError as e:
1201 raise self.ParseError(str(e)) 1058 raise self._ParseError(str(e))
1202 self.NextToken() 1059 self.NextToken()
1203 return result 1060 return result
1204 1061
1205 def ParseErrorPreviousToken(self, message): 1062 def ParseErrorPreviousToken(self, message):
1206 """Creates and *returns* a ParseError for the previously read token. 1063 """Creates and *returns* a ParseError for the previously read token.
1207 1064
1208 Args: 1065 Args:
1209 message: A message to set for the exception. 1066 message: A message to set for the exception.
1210 1067
1211 Returns: 1068 Returns:
1212 A ParseError instance. 1069 A ParseError instance.
1213 """ 1070 """
1214 return ParseError(message, self._previous_line + 1, 1071 return ParseError('%d:%d : %s' % (
1215 self._previous_column + 1) 1072 self._previous_line + 1, self._previous_column + 1, message))
1216 1073
1217 def ParseError(self, message): 1074 def _ParseError(self, message):
1218 """Creates and *returns* a ParseError for the current token.""" 1075 """Creates and *returns* a ParseError for the current token."""
1219 return ParseError(message, self._line + 1, self._column + 1) 1076 return ParseError('%d:%d : %s' % (
1077 self._line + 1, self._column + 1, message))
1220 1078
1221 def _StringParseError(self, e): 1079 def _StringParseError(self, e):
1222 return self.ParseError('Couldn\'t parse string: ' + str(e)) 1080 return self._ParseError('Couldn\'t parse string: ' + str(e))
1223 1081
1224 def NextToken(self): 1082 def NextToken(self):
1225 """Reads the next meaningful token.""" 1083 """Reads the next meaningful token."""
1226 self._previous_line = self._line 1084 self._previous_line = self._line
1227 self._previous_column = self._column 1085 self._previous_column = self._column
1228 1086
1229 self._column += len(self.token) 1087 self._column += len(self.token)
1230 self._SkipWhitespace() 1088 self._SkipWhitespace()
1231 1089
1232 if not self._more_lines: 1090 if not self._more_lines:
1233 self.token = '' 1091 self.token = ''
1234 return 1092 return
1235 1093
1236 match = self._TOKEN.match(self._current_line, self._column) 1094 match = self._TOKEN.match(self._current_line, self._column)
1237 if not match and not self._skip_comments:
1238 match = self._COMMENT.match(self._current_line, self._column)
1239 if match: 1095 if match:
1240 token = match.group(0) 1096 token = match.group(0)
1241 self.token = token 1097 self.token = token
1242 else: 1098 else:
1243 self.token = self._current_line[self._column] 1099 self.token = self._current_line[self._column]
1244 1100
1245 # Aliased so it can still be accessed by current visibility violators.
1246 # TODO(dbarnett): Migrate violators to textformat_tokenizer.
1247 _Tokenizer = Tokenizer # pylint: disable=invalid-name
1248
1249
1250 def _ConsumeInt32(tokenizer):
1251 """Consumes a signed 32bit integer number from tokenizer.
1252
1253 Args:
1254 tokenizer: A tokenizer used to parse the number.
1255
1256 Returns:
1257 The integer parsed.
1258
1259 Raises:
1260 ParseError: If a signed 32bit integer couldn't be consumed.
1261 """
1262 return _ConsumeInteger(tokenizer, is_signed=True, is_long=False)
1263
1264
1265 def _ConsumeUint32(tokenizer):
1266 """Consumes an unsigned 32bit integer number from tokenizer.
1267
1268 Args:
1269 tokenizer: A tokenizer used to parse the number.
1270
1271 Returns:
1272 The integer parsed.
1273
1274 Raises:
1275 ParseError: If an unsigned 32bit integer couldn't be consumed.
1276 """
1277 return _ConsumeInteger(tokenizer, is_signed=False, is_long=False)
1278
1279
1280 def _TryConsumeInt64(tokenizer):
1281 try:
1282 _ConsumeInt64(tokenizer)
1283 return True
1284 except ParseError:
1285 return False
1286
1287
1288 def _ConsumeInt64(tokenizer):
1289 """Consumes a signed 32bit integer number from tokenizer.
1290
1291 Args:
1292 tokenizer: A tokenizer used to parse the number.
1293
1294 Returns:
1295 The integer parsed.
1296
1297 Raises:
1298 ParseError: If a signed 32bit integer couldn't be consumed.
1299 """
1300 return _ConsumeInteger(tokenizer, is_signed=True, is_long=True)
1301
1302
1303 def _TryConsumeUint64(tokenizer):
1304 try:
1305 _ConsumeUint64(tokenizer)
1306 return True
1307 except ParseError:
1308 return False
1309
1310
1311 def _ConsumeUint64(tokenizer):
1312 """Consumes an unsigned 64bit integer number from tokenizer.
1313
1314 Args:
1315 tokenizer: A tokenizer used to parse the number.
1316
1317 Returns:
1318 The integer parsed.
1319
1320 Raises:
1321 ParseError: If an unsigned 64bit integer couldn't be consumed.
1322 """
1323 return _ConsumeInteger(tokenizer, is_signed=False, is_long=True)
1324
1325
1326 def _TryConsumeInteger(tokenizer, is_signed=False, is_long=False):
1327 try:
1328 _ConsumeInteger(tokenizer, is_signed=is_signed, is_long=is_long)
1329 return True
1330 except ParseError:
1331 return False
1332
1333
1334 def _ConsumeInteger(tokenizer, is_signed=False, is_long=False):
1335 """Consumes an integer number from tokenizer.
1336
1337 Args:
1338 tokenizer: A tokenizer used to parse the number.
1339 is_signed: True if a signed integer must be parsed.
1340 is_long: True if a long integer must be parsed.
1341
1342 Returns:
1343 The integer parsed.
1344
1345 Raises:
1346 ParseError: If an integer with given characteristics couldn't be consumed.
1347 """
1348 try:
1349 result = ParseInteger(tokenizer.token, is_signed=is_signed, is_long=is_long)
1350 except ValueError as e:
1351 raise tokenizer.ParseError(str(e))
1352 tokenizer.NextToken()
1353 return result
1354
1355 1101
1356 def ParseInteger(text, is_signed=False, is_long=False): 1102 def ParseInteger(text, is_signed=False, is_long=False):
1357 """Parses an integer. 1103 """Parses an integer.
1358 1104
1359 Args: 1105 Args:
1360 text: The text to parse. 1106 text: The text to parse.
1361 is_signed: True if a signed integer must be parsed. 1107 is_signed: True if a signed integer must be parsed.
1362 is_long: True if a long integer must be parsed. 1108 is_long: True if a long integer must be parsed.
1363 1109
1364 Returns: 1110 Returns:
1365 The integer value. 1111 The integer value.
1366 1112
1367 Raises: 1113 Raises:
1368 ValueError: Thrown Iff the text is not a valid integer. 1114 ValueError: Thrown Iff the text is not a valid integer.
1369 """ 1115 """
1370 # Do the actual parsing. Exception handling is propagated to caller. 1116 # Do the actual parsing. Exception handling is propagated to caller.
1371 result = _ParseAbstractInteger(text, is_long=is_long)
1372
1373 # Check if the integer is sane. Exceptions handled by callers.
1374 checker = _INTEGER_CHECKERS[2 * int(is_long) + int(is_signed)]
1375 checker.CheckValue(result)
1376 return result
1377
1378
1379 def _ParseAbstractInteger(text, is_long=False):
1380 """Parses an integer without checking size/signedness.
1381
1382 Args:
1383 text: The text to parse.
1384 is_long: True if the value should be returned as a long integer.
1385
1386 Returns:
1387 The integer value.
1388
1389 Raises:
1390 ValueError: Thrown Iff the text is not a valid integer.
1391 """
1392 # Do the actual parsing. Exception handling is propagated to caller.
1393 try: 1117 try:
1394 # We force 32-bit values to int and 64-bit values to long to make 1118 # We force 32-bit values to int and 64-bit values to long to make
1395 # alternate implementations where the distinction is more significant 1119 # alternate implementations where the distinction is more significant
1396 # (e.g. the C++ implementation) simpler. 1120 # (e.g. the C++ implementation) simpler.
1397 if is_long: 1121 if is_long:
1398 return long(text, 0) 1122 result = long(text, 0)
1399 else: 1123 else:
1400 return int(text, 0) 1124 result = int(text, 0)
1401 except ValueError: 1125 except ValueError:
1402 raise ValueError('Couldn\'t parse integer: %s' % text) 1126 raise ValueError('Couldn\'t parse integer: %s' % text)
1403 1127
1128 # Check if the integer is sane. Exceptions handled by callers.
1129 checker = _INTEGER_CHECKERS[2 * int(is_long) + int(is_signed)]
1130 checker.CheckValue(result)
1131 return result
1132
1404 1133
1405 def ParseFloat(text): 1134 def ParseFloat(text):
1406 """Parse a floating point number. 1135 """Parse a floating point number.
1407 1136
1408 Args: 1137 Args:
1409 text: Text to parse. 1138 text: Text to parse.
1410 1139
1411 Returns: 1140 Returns:
1412 The number parsed. 1141 The number parsed.
1413 1142
(...skipping 25 matching lines...) Expand all
1439 1168
1440 Args: 1169 Args:
1441 text: Text to parse. 1170 text: Text to parse.
1442 1171
1443 Returns: 1172 Returns:
1444 Boolean values parsed 1173 Boolean values parsed
1445 1174
1446 Raises: 1175 Raises:
1447 ValueError: If text is not a valid boolean. 1176 ValueError: If text is not a valid boolean.
1448 """ 1177 """
1449 if text in ('true', 't', '1', 'True'): 1178 if text in ('true', 't', '1'):
1450 return True 1179 return True
1451 elif text in ('false', 'f', '0', 'False'): 1180 elif text in ('false', 'f', '0'):
1452 return False 1181 return False
1453 else: 1182 else:
1454 raise ValueError('Expected "true" or "false".') 1183 raise ValueError('Expected "true" or "false".')
1455 1184
1456 1185
1457 def ParseEnum(field, value): 1186 def ParseEnum(field, value):
1458 """Parse an enum value. 1187 """Parse an enum value.
1459 1188
1460 The value can be specified by a number (the enum value), or by 1189 The value can be specified by a number (the enum value), or by
1461 a string literal (the enum name). 1190 a string literal (the enum name).
1462 1191
1463 Args: 1192 Args:
1464 field: Enum field descriptor. 1193 field: Enum field descriptor.
1465 value: String value. 1194 value: String value.
1466 1195
1467 Returns: 1196 Returns:
1468 Enum value number. 1197 Enum value number.
1469 1198
1470 Raises: 1199 Raises:
1471 ValueError: If the enum value could not be parsed. 1200 ValueError: If the enum value could not be parsed.
1472 """ 1201 """
1473 enum_descriptor = field.enum_type 1202 enum_descriptor = field.enum_type
1474 try: 1203 try:
1475 number = int(value, 0) 1204 number = int(value, 0)
1476 except ValueError: 1205 except ValueError:
1477 # Identifier. 1206 # Identifier.
1478 enum_value = enum_descriptor.values_by_name.get(value, None) 1207 enum_value = enum_descriptor.values_by_name.get(value, None)
1479 if enum_value is None: 1208 if enum_value is None:
1480 raise ValueError('Enum type "%s" has no value named %s.' % 1209 raise ValueError(
1481 (enum_descriptor.full_name, value)) 1210 'Enum type "%s" has no value named %s.' % (
1211 enum_descriptor.full_name, value))
1482 else: 1212 else:
1483 # Numeric value. 1213 # Numeric value.
1484 enum_value = enum_descriptor.values_by_number.get(number, None) 1214 enum_value = enum_descriptor.values_by_number.get(number, None)
1485 if enum_value is None: 1215 if enum_value is None:
1486 raise ValueError('Enum type "%s" has no value with number %d.' % 1216 raise ValueError(
1487 (enum_descriptor.full_name, number)) 1217 'Enum type "%s" has no value with number %d.' % (
1218 enum_descriptor.full_name, number))
1488 return enum_value.number 1219 return enum_value.number
OLDNEW
« no previous file with comments | « third_party/protobuf/python/google/protobuf/symbol_database.py ('k') | third_party/protobuf/python/setup.cfg » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698