OLD | NEW |
| (Empty) |
1 # Protocol Buffers - Google's data interchange format | |
2 # Copyright 2008 Google Inc. All rights reserved. | |
3 # http://code.google.com/p/protobuf/ | |
4 # | |
5 # Redistribution and use in source and binary forms, with or without | |
6 # modification, are permitted provided that the following conditions are | |
7 # met: | |
8 # | |
9 # * Redistributions of source code must retain the above copyright | |
10 # notice, this list of conditions and the following disclaimer. | |
11 # * Redistributions in binary form must reproduce the above | |
12 # copyright notice, this list of conditions and the following disclaimer | |
13 # in the documentation and/or other materials provided with the | |
14 # distribution. | |
15 # * Neither the name of Google Inc. nor the names of its | |
16 # contributors may be used to endorse or promote products derived from | |
17 # this software without specific prior written permission. | |
18 # | |
19 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
20 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
21 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
22 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
23 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
24 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
25 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
26 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
27 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
28 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
29 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
30 | |
31 #PY25 compatible for GAE. | |
32 # | |
33 # Copyright 2007 Google Inc. All Rights Reserved. | |
34 | |
35 """Contains routines for printing protocol messages in text format.""" | |
36 | |
37 __author__ = 'kenton@google.com (Kenton Varda)' | |
38 | |
39 import cStringIO | |
40 import re | |
41 | |
42 from google.protobuf.internal import type_checkers | |
43 from google.protobuf import descriptor | |
44 from google.protobuf import text_encoding | |
45 | |
46 __all__ = ['MessageToString', 'PrintMessage', 'PrintField', | |
47 'PrintFieldValue', 'Merge'] | |
48 | |
49 | |
50 _INTEGER_CHECKERS = (type_checkers.Uint32ValueChecker(), | |
51 type_checkers.Int32ValueChecker(), | |
52 type_checkers.Uint64ValueChecker(), | |
53 type_checkers.Int64ValueChecker()) | |
54 _FLOAT_INFINITY = re.compile('-?inf(?:inity)?f?', re.IGNORECASE) | |
55 _FLOAT_NAN = re.compile('nanf?', re.IGNORECASE) | |
56 _FLOAT_TYPES = frozenset([descriptor.FieldDescriptor.CPPTYPE_FLOAT, | |
57 descriptor.FieldDescriptor.CPPTYPE_DOUBLE]) | |
58 | |
59 | |
60 class Error(Exception): | |
61 """Top-level module error for text_format.""" | |
62 | |
63 | |
64 class ParseError(Error): | |
65 """Thrown in case of ASCII parsing error.""" | |
66 | |
67 | |
68 def MessageToString(message, as_utf8=False, as_one_line=False, | |
69 pointy_brackets=False, use_index_order=False, | |
70 float_format=None): | |
71 """Convert protobuf message to text format. | |
72 | |
73 Floating point values can be formatted compactly with 15 digits of | |
74 precision (which is the most that IEEE 754 "double" can guarantee) | |
75 using float_format='.15g'. | |
76 | |
77 Args: | |
78 message: The protocol buffers message. | |
79 as_utf8: Produce text output in UTF8 format. | |
80 as_one_line: Don't introduce newlines between fields. | |
81 pointy_brackets: If True, use angle brackets instead of curly braces for | |
82 nesting. | |
83 use_index_order: If True, print fields of a proto message using the order | |
84 defined in source code instead of the field number. By default, use the | |
85 field number order. | |
86 float_format: If set, use this to specify floating point number formatting | |
87 (per the "Format Specification Mini-Language"); otherwise, str() is used. | |
88 | |
89 Returns: | |
90 A string of the text formatted protocol buffer message. | |
91 """ | |
92 out = cStringIO.StringIO() | |
93 PrintMessage(message, out, as_utf8=as_utf8, as_one_line=as_one_line, | |
94 pointy_brackets=pointy_brackets, | |
95 use_index_order=use_index_order, | |
96 float_format=float_format) | |
97 result = out.getvalue() | |
98 out.close() | |
99 if as_one_line: | |
100 return result.rstrip() | |
101 return result | |
102 | |
103 | |
104 def PrintMessage(message, out, indent=0, as_utf8=False, as_one_line=False, | |
105 pointy_brackets=False, use_index_order=False, | |
106 float_format=None): | |
107 fields = message.ListFields() | |
108 if use_index_order: | |
109 fields.sort(key=lambda x: x[0].index) | |
110 for field, value in fields: | |
111 if field.label == descriptor.FieldDescriptor.LABEL_REPEATED: | |
112 for element in value: | |
113 PrintField(field, element, out, indent, as_utf8, as_one_line, | |
114 pointy_brackets=pointy_brackets, | |
115 float_format=float_format) | |
116 else: | |
117 PrintField(field, value, out, indent, as_utf8, as_one_line, | |
118 pointy_brackets=pointy_brackets, | |
119 float_format=float_format) | |
120 | |
121 | |
122 def PrintField(field, value, out, indent=0, as_utf8=False, as_one_line=False, | |
123 pointy_brackets=False, float_format=None): | |
124 """Print a single field name/value pair. For repeated fields, the value | |
125 should be a single element.""" | |
126 | |
127 out.write(' ' * indent) | |
128 if field.is_extension: | |
129 out.write('[') | |
130 if (field.containing_type.GetOptions().message_set_wire_format and | |
131 field.type == descriptor.FieldDescriptor.TYPE_MESSAGE and | |
132 field.message_type == field.extension_scope and | |
133 field.label == descriptor.FieldDescriptor.LABEL_OPTIONAL): | |
134 out.write(field.message_type.full_name) | |
135 else: | |
136 out.write(field.full_name) | |
137 out.write(']') | |
138 elif field.type == descriptor.FieldDescriptor.TYPE_GROUP: | |
139 # For groups, use the capitalized name. | |
140 out.write(field.message_type.name) | |
141 else: | |
142 out.write(field.name) | |
143 | |
144 if field.cpp_type != descriptor.FieldDescriptor.CPPTYPE_MESSAGE: | |
145 # The colon is optional in this case, but our cross-language golden files | |
146 # don't include it. | |
147 out.write(': ') | |
148 | |
149 PrintFieldValue(field, value, out, indent, as_utf8, as_one_line, | |
150 pointy_brackets=pointy_brackets, | |
151 float_format=float_format) | |
152 if as_one_line: | |
153 out.write(' ') | |
154 else: | |
155 out.write('\n') | |
156 | |
157 | |
158 def PrintFieldValue(field, value, out, indent=0, as_utf8=False, | |
159 as_one_line=False, pointy_brackets=False, | |
160 float_format=None): | |
161 """Print a single field value (not including name). For repeated fields, | |
162 the value should be a single element.""" | |
163 | |
164 if pointy_brackets: | |
165 openb = '<' | |
166 closeb = '>' | |
167 else: | |
168 openb = '{' | |
169 closeb = '}' | |
170 | |
171 if field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_MESSAGE: | |
172 if as_one_line: | |
173 out.write(' %s ' % openb) | |
174 PrintMessage(value, out, indent, as_utf8, as_one_line, | |
175 pointy_brackets=pointy_brackets, | |
176 float_format=float_format) | |
177 out.write(closeb) | |
178 else: | |
179 out.write(' %s\n' % openb) | |
180 PrintMessage(value, out, indent + 2, as_utf8, as_one_line, | |
181 pointy_brackets=pointy_brackets, | |
182 float_format=float_format) | |
183 out.write(' ' * indent + closeb) | |
184 elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_ENUM: | |
185 enum_value = field.enum_type.values_by_number.get(value, None) | |
186 if enum_value is not None: | |
187 out.write(enum_value.name) | |
188 else: | |
189 out.write(str(value)) | |
190 elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_STRING: | |
191 out.write('\"') | |
192 if isinstance(value, unicode): | |
193 out_value = value.encode('utf-8') | |
194 else: | |
195 out_value = value | |
196 if field.type == descriptor.FieldDescriptor.TYPE_BYTES: | |
197 # We need to escape non-UTF8 chars in TYPE_BYTES field. | |
198 out_as_utf8 = False | |
199 else: | |
200 out_as_utf8 = as_utf8 | |
201 out.write(text_encoding.CEscape(out_value, out_as_utf8)) | |
202 out.write('\"') | |
203 elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_BOOL: | |
204 if value: | |
205 out.write('true') | |
206 else: | |
207 out.write('false') | |
208 elif field.cpp_type in _FLOAT_TYPES and float_format is not None: | |
209 out.write('{1:{0}}'.format(float_format, value)) | |
210 else: | |
211 out.write(str(value)) | |
212 | |
213 | |
214 def _ParseOrMerge(lines, message, allow_multiple_scalars): | |
215 """Converts an ASCII representation of a protocol message into a message. | |
216 | |
217 Args: | |
218 lines: Lines of a message's ASCII representation. | |
219 message: A protocol buffer message to merge into. | |
220 allow_multiple_scalars: Determines if repeated values for a non-repeated | |
221 field are permitted, e.g., the string "foo: 1 foo: 2" for a | |
222 required/optional field named "foo". | |
223 | |
224 Raises: | |
225 ParseError: On ASCII parsing problems. | |
226 """ | |
227 tokenizer = _Tokenizer(lines) | |
228 while not tokenizer.AtEnd(): | |
229 _MergeField(tokenizer, message, allow_multiple_scalars) | |
230 | |
231 | |
232 def Parse(text, message): | |
233 """Parses an ASCII representation of a protocol message into a message. | |
234 | |
235 Args: | |
236 text: Message ASCII representation. | |
237 message: A protocol buffer message to merge into. | |
238 | |
239 Returns: | |
240 The same message passed as argument. | |
241 | |
242 Raises: | |
243 ParseError: On ASCII parsing problems. | |
244 """ | |
245 if not isinstance(text, str): text = text.decode('utf-8') | |
246 return ParseLines(text.split('\n'), message) | |
247 | |
248 | |
249 def Merge(text, message): | |
250 """Parses an ASCII representation of a protocol message into a message. | |
251 | |
252 Like Parse(), but allows repeated values for a non-repeated field, and uses | |
253 the last one. | |
254 | |
255 Args: | |
256 text: Message ASCII representation. | |
257 message: A protocol buffer message to merge into. | |
258 | |
259 Returns: | |
260 The same message passed as argument. | |
261 | |
262 Raises: | |
263 ParseError: On ASCII parsing problems. | |
264 """ | |
265 return MergeLines(text.split('\n'), message) | |
266 | |
267 | |
268 def ParseLines(lines, message): | |
269 """Parses an ASCII representation of a protocol message into a message. | |
270 | |
271 Args: | |
272 lines: An iterable of lines of a message's ASCII representation. | |
273 message: A protocol buffer message to merge into. | |
274 | |
275 Returns: | |
276 The same message passed as argument. | |
277 | |
278 Raises: | |
279 ParseError: On ASCII parsing problems. | |
280 """ | |
281 _ParseOrMerge(lines, message, False) | |
282 return message | |
283 | |
284 | |
285 def MergeLines(lines, message): | |
286 """Parses an ASCII representation of a protocol message into a message. | |
287 | |
288 Args: | |
289 lines: An iterable of lines of a message's ASCII representation. | |
290 message: A protocol buffer message to merge into. | |
291 | |
292 Returns: | |
293 The same message passed as argument. | |
294 | |
295 Raises: | |
296 ParseError: On ASCII parsing problems. | |
297 """ | |
298 _ParseOrMerge(lines, message, True) | |
299 return message | |
300 | |
301 | |
302 def _MergeField(tokenizer, message, allow_multiple_scalars): | |
303 """Merges a single protocol message field into a message. | |
304 | |
305 Args: | |
306 tokenizer: A tokenizer to parse the field name and values. | |
307 message: A protocol message to record the data. | |
308 allow_multiple_scalars: Determines if repeated values for a non-repeated | |
309 field are permitted, e.g., the string "foo: 1 foo: 2" for a | |
310 required/optional field named "foo". | |
311 | |
312 Raises: | |
313 ParseError: In case of ASCII parsing problems. | |
314 """ | |
315 message_descriptor = message.DESCRIPTOR | |
316 if tokenizer.TryConsume('['): | |
317 name = [tokenizer.ConsumeIdentifier()] | |
318 while tokenizer.TryConsume('.'): | |
319 name.append(tokenizer.ConsumeIdentifier()) | |
320 name = '.'.join(name) | |
321 | |
322 if not message_descriptor.is_extendable: | |
323 raise tokenizer.ParseErrorPreviousToken( | |
324 'Message type "%s" does not have extensions.' % | |
325 message_descriptor.full_name) | |
326 # pylint: disable=protected-access | |
327 field = message.Extensions._FindExtensionByName(name) | |
328 # pylint: enable=protected-access | |
329 if not field: | |
330 raise tokenizer.ParseErrorPreviousToken( | |
331 'Extension "%s" not registered.' % name) | |
332 elif message_descriptor != field.containing_type: | |
333 raise tokenizer.ParseErrorPreviousToken( | |
334 'Extension "%s" does not extend message type "%s".' % ( | |
335 name, message_descriptor.full_name)) | |
336 tokenizer.Consume(']') | |
337 else: | |
338 name = tokenizer.ConsumeIdentifier() | |
339 field = message_descriptor.fields_by_name.get(name, None) | |
340 | |
341 # Group names are expected to be capitalized as they appear in the | |
342 # .proto file, which actually matches their type names, not their field | |
343 # names. | |
344 if not field: | |
345 field = message_descriptor.fields_by_name.get(name.lower(), None) | |
346 if field and field.type != descriptor.FieldDescriptor.TYPE_GROUP: | |
347 field = None | |
348 | |
349 if (field and field.type == descriptor.FieldDescriptor.TYPE_GROUP and | |
350 field.message_type.name != name): | |
351 field = None | |
352 | |
353 if not field: | |
354 raise tokenizer.ParseErrorPreviousToken( | |
355 'Message type "%s" has no field named "%s".' % ( | |
356 message_descriptor.full_name, name)) | |
357 | |
358 if field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_MESSAGE: | |
359 tokenizer.TryConsume(':') | |
360 | |
361 if tokenizer.TryConsume('<'): | |
362 end_token = '>' | |
363 else: | |
364 tokenizer.Consume('{') | |
365 end_token = '}' | |
366 | |
367 if field.label == descriptor.FieldDescriptor.LABEL_REPEATED: | |
368 if field.is_extension: | |
369 sub_message = message.Extensions[field].add() | |
370 else: | |
371 sub_message = getattr(message, field.name).add() | |
372 else: | |
373 if field.is_extension: | |
374 sub_message = message.Extensions[field] | |
375 else: | |
376 sub_message = getattr(message, field.name) | |
377 sub_message.SetInParent() | |
378 | |
379 while not tokenizer.TryConsume(end_token): | |
380 if tokenizer.AtEnd(): | |
381 raise tokenizer.ParseErrorPreviousToken('Expected "%s".' % (end_token)) | |
382 _MergeField(tokenizer, sub_message, allow_multiple_scalars) | |
383 else: | |
384 _MergeScalarField(tokenizer, message, field, allow_multiple_scalars) | |
385 | |
386 # For historical reasons, fields may optionally be separated by commas or | |
387 # semicolons. | |
388 if not tokenizer.TryConsume(','): | |
389 tokenizer.TryConsume(';') | |
390 | |
391 | |
392 def _MergeScalarField(tokenizer, message, field, allow_multiple_scalars): | |
393 """Merges a single protocol message scalar field into a message. | |
394 | |
395 Args: | |
396 tokenizer: A tokenizer to parse the field value. | |
397 message: A protocol message to record the data. | |
398 field: The descriptor of the field to be merged. | |
399 allow_multiple_scalars: Determines if repeated values for a non-repeated | |
400 field are permitted, e.g., the string "foo: 1 foo: 2" for a | |
401 required/optional field named "foo". | |
402 | |
403 Raises: | |
404 ParseError: In case of ASCII parsing problems. | |
405 RuntimeError: On runtime errors. | |
406 """ | |
407 tokenizer.Consume(':') | |
408 value = None | |
409 | |
410 if field.type in (descriptor.FieldDescriptor.TYPE_INT32, | |
411 descriptor.FieldDescriptor.TYPE_SINT32, | |
412 descriptor.FieldDescriptor.TYPE_SFIXED32): | |
413 value = tokenizer.ConsumeInt32() | |
414 elif field.type in (descriptor.FieldDescriptor.TYPE_INT64, | |
415 descriptor.FieldDescriptor.TYPE_SINT64, | |
416 descriptor.FieldDescriptor.TYPE_SFIXED64): | |
417 value = tokenizer.ConsumeInt64() | |
418 elif field.type in (descriptor.FieldDescriptor.TYPE_UINT32, | |
419 descriptor.FieldDescriptor.TYPE_FIXED32): | |
420 value = tokenizer.ConsumeUint32() | |
421 elif field.type in (descriptor.FieldDescriptor.TYPE_UINT64, | |
422 descriptor.FieldDescriptor.TYPE_FIXED64): | |
423 value = tokenizer.ConsumeUint64() | |
424 elif field.type in (descriptor.FieldDescriptor.TYPE_FLOAT, | |
425 descriptor.FieldDescriptor.TYPE_DOUBLE): | |
426 value = tokenizer.ConsumeFloat() | |
427 elif field.type == descriptor.FieldDescriptor.TYPE_BOOL: | |
428 value = tokenizer.ConsumeBool() | |
429 elif field.type == descriptor.FieldDescriptor.TYPE_STRING: | |
430 value = tokenizer.ConsumeString() | |
431 elif field.type == descriptor.FieldDescriptor.TYPE_BYTES: | |
432 value = tokenizer.ConsumeByteString() | |
433 elif field.type == descriptor.FieldDescriptor.TYPE_ENUM: | |
434 value = tokenizer.ConsumeEnum(field) | |
435 else: | |
436 raise RuntimeError('Unknown field type %d' % field.type) | |
437 | |
438 if field.label == descriptor.FieldDescriptor.LABEL_REPEATED: | |
439 if field.is_extension: | |
440 message.Extensions[field].append(value) | |
441 else: | |
442 getattr(message, field.name).append(value) | |
443 else: | |
444 if field.is_extension: | |
445 if not allow_multiple_scalars and message.HasExtension(field): | |
446 raise tokenizer.ParseErrorPreviousToken( | |
447 'Message type "%s" should not have multiple "%s" extensions.' % | |
448 (message.DESCRIPTOR.full_name, field.full_name)) | |
449 else: | |
450 message.Extensions[field] = value | |
451 else: | |
452 if not allow_multiple_scalars and message.HasField(field.name): | |
453 raise tokenizer.ParseErrorPreviousToken( | |
454 'Message type "%s" should not have multiple "%s" fields.' % | |
455 (message.DESCRIPTOR.full_name, field.name)) | |
456 else: | |
457 setattr(message, field.name, value) | |
458 | |
459 | |
460 class _Tokenizer(object): | |
461 """Protocol buffer ASCII representation tokenizer. | |
462 | |
463 This class handles the lower level string parsing by splitting it into | |
464 meaningful tokens. | |
465 | |
466 It was directly ported from the Java protocol buffer API. | |
467 """ | |
468 | |
469 _WHITESPACE = re.compile('(\\s|(#.*$))+', re.MULTILINE) | |
470 _TOKEN = re.compile( | |
471 '[a-zA-Z_][0-9a-zA-Z_+-]*|' # an identifier | |
472 '[0-9+-][0-9a-zA-Z_.+-]*|' # a number | |
473 '\"([^\"\n\\\\]|\\\\.)*(\"|\\\\?$)|' # a double-quoted string | |
474 '\'([^\'\n\\\\]|\\\\.)*(\'|\\\\?$)') # a single-quoted string | |
475 _IDENTIFIER = re.compile(r'\w+') | |
476 | |
477 def __init__(self, lines): | |
478 self._position = 0 | |
479 self._line = -1 | |
480 self._column = 0 | |
481 self._token_start = None | |
482 self.token = '' | |
483 self._lines = iter(lines) | |
484 self._current_line = '' | |
485 self._previous_line = 0 | |
486 self._previous_column = 0 | |
487 self._more_lines = True | |
488 self._SkipWhitespace() | |
489 self.NextToken() | |
490 | |
491 def AtEnd(self): | |
492 """Checks the end of the text was reached. | |
493 | |
494 Returns: | |
495 True iff the end was reached. | |
496 """ | |
497 return not self.token | |
498 | |
499 def _PopLine(self): | |
500 while len(self._current_line) <= self._column: | |
501 try: | |
502 self._current_line = self._lines.next() | |
503 except StopIteration: | |
504 self._current_line = '' | |
505 self._more_lines = False | |
506 return | |
507 else: | |
508 self._line += 1 | |
509 self._column = 0 | |
510 | |
511 def _SkipWhitespace(self): | |
512 while True: | |
513 self._PopLine() | |
514 match = self._WHITESPACE.match(self._current_line, self._column) | |
515 if not match: | |
516 break | |
517 length = len(match.group(0)) | |
518 self._column += length | |
519 | |
520 def TryConsume(self, token): | |
521 """Tries to consume a given piece of text. | |
522 | |
523 Args: | |
524 token: Text to consume. | |
525 | |
526 Returns: | |
527 True iff the text was consumed. | |
528 """ | |
529 if self.token == token: | |
530 self.NextToken() | |
531 return True | |
532 return False | |
533 | |
534 def Consume(self, token): | |
535 """Consumes a piece of text. | |
536 | |
537 Args: | |
538 token: Text to consume. | |
539 | |
540 Raises: | |
541 ParseError: If the text couldn't be consumed. | |
542 """ | |
543 if not self.TryConsume(token): | |
544 raise self._ParseError('Expected "%s".' % token) | |
545 | |
546 def ConsumeIdentifier(self): | |
547 """Consumes protocol message field identifier. | |
548 | |
549 Returns: | |
550 Identifier string. | |
551 | |
552 Raises: | |
553 ParseError: If an identifier couldn't be consumed. | |
554 """ | |
555 result = self.token | |
556 if not self._IDENTIFIER.match(result): | |
557 raise self._ParseError('Expected identifier.') | |
558 self.NextToken() | |
559 return result | |
560 | |
561 def ConsumeInt32(self): | |
562 """Consumes a signed 32bit integer number. | |
563 | |
564 Returns: | |
565 The integer parsed. | |
566 | |
567 Raises: | |
568 ParseError: If a signed 32bit integer couldn't be consumed. | |
569 """ | |
570 try: | |
571 result = ParseInteger(self.token, is_signed=True, is_long=False) | |
572 except ValueError, e: | |
573 raise self._ParseError(str(e)) | |
574 self.NextToken() | |
575 return result | |
576 | |
577 def ConsumeUint32(self): | |
578 """Consumes an unsigned 32bit integer number. | |
579 | |
580 Returns: | |
581 The integer parsed. | |
582 | |
583 Raises: | |
584 ParseError: If an unsigned 32bit integer couldn't be consumed. | |
585 """ | |
586 try: | |
587 result = ParseInteger(self.token, is_signed=False, is_long=False) | |
588 except ValueError, e: | |
589 raise self._ParseError(str(e)) | |
590 self.NextToken() | |
591 return result | |
592 | |
593 def ConsumeInt64(self): | |
594 """Consumes a signed 64bit integer number. | |
595 | |
596 Returns: | |
597 The integer parsed. | |
598 | |
599 Raises: | |
600 ParseError: If a signed 64bit integer couldn't be consumed. | |
601 """ | |
602 try: | |
603 result = ParseInteger(self.token, is_signed=True, is_long=True) | |
604 except ValueError, e: | |
605 raise self._ParseError(str(e)) | |
606 self.NextToken() | |
607 return result | |
608 | |
609 def ConsumeUint64(self): | |
610 """Consumes an unsigned 64bit integer number. | |
611 | |
612 Returns: | |
613 The integer parsed. | |
614 | |
615 Raises: | |
616 ParseError: If an unsigned 64bit integer couldn't be consumed. | |
617 """ | |
618 try: | |
619 result = ParseInteger(self.token, is_signed=False, is_long=True) | |
620 except ValueError, e: | |
621 raise self._ParseError(str(e)) | |
622 self.NextToken() | |
623 return result | |
624 | |
625 def ConsumeFloat(self): | |
626 """Consumes an floating point number. | |
627 | |
628 Returns: | |
629 The number parsed. | |
630 | |
631 Raises: | |
632 ParseError: If a floating point number couldn't be consumed. | |
633 """ | |
634 try: | |
635 result = ParseFloat(self.token) | |
636 except ValueError, e: | |
637 raise self._ParseError(str(e)) | |
638 self.NextToken() | |
639 return result | |
640 | |
641 def ConsumeBool(self): | |
642 """Consumes a boolean value. | |
643 | |
644 Returns: | |
645 The bool parsed. | |
646 | |
647 Raises: | |
648 ParseError: If a boolean value couldn't be consumed. | |
649 """ | |
650 try: | |
651 result = ParseBool(self.token) | |
652 except ValueError, e: | |
653 raise self._ParseError(str(e)) | |
654 self.NextToken() | |
655 return result | |
656 | |
657 def ConsumeString(self): | |
658 """Consumes a string value. | |
659 | |
660 Returns: | |
661 The string parsed. | |
662 | |
663 Raises: | |
664 ParseError: If a string value couldn't be consumed. | |
665 """ | |
666 the_bytes = self.ConsumeByteString() | |
667 try: | |
668 return unicode(the_bytes, 'utf-8') | |
669 except UnicodeDecodeError, e: | |
670 raise self._StringParseError(e) | |
671 | |
672 def ConsumeByteString(self): | |
673 """Consumes a byte array value. | |
674 | |
675 Returns: | |
676 The array parsed (as a string). | |
677 | |
678 Raises: | |
679 ParseError: If a byte array value couldn't be consumed. | |
680 """ | |
681 the_list = [self._ConsumeSingleByteString()] | |
682 while self.token and self.token[0] in ('\'', '"'): | |
683 the_list.append(self._ConsumeSingleByteString()) | |
684 return ''.encode('latin1').join(the_list) ##PY25 | |
685 ##!PY25 return b''.join(the_list) | |
686 | |
687 def _ConsumeSingleByteString(self): | |
688 """Consume one token of a string literal. | |
689 | |
690 String literals (whether bytes or text) can come in multiple adjacent | |
691 tokens which are automatically concatenated, like in C or Python. This | |
692 method only consumes one token. | |
693 """ | |
694 text = self.token | |
695 if len(text) < 1 or text[0] not in ('\'', '"'): | |
696 raise self._ParseError('Expected string.') | |
697 | |
698 if len(text) < 2 or text[-1] != text[0]: | |
699 raise self._ParseError('String missing ending quote.') | |
700 | |
701 try: | |
702 result = text_encoding.CUnescape(text[1:-1]) | |
703 except ValueError, e: | |
704 raise self._ParseError(str(e)) | |
705 self.NextToken() | |
706 return result | |
707 | |
708 def ConsumeEnum(self, field): | |
709 try: | |
710 result = ParseEnum(field, self.token) | |
711 except ValueError, e: | |
712 raise self._ParseError(str(e)) | |
713 self.NextToken() | |
714 return result | |
715 | |
716 def ParseErrorPreviousToken(self, message): | |
717 """Creates and *returns* a ParseError for the previously read token. | |
718 | |
719 Args: | |
720 message: A message to set for the exception. | |
721 | |
722 Returns: | |
723 A ParseError instance. | |
724 """ | |
725 return ParseError('%d:%d : %s' % ( | |
726 self._previous_line + 1, self._previous_column + 1, message)) | |
727 | |
728 def _ParseError(self, message): | |
729 """Creates and *returns* a ParseError for the current token.""" | |
730 return ParseError('%d:%d : %s' % ( | |
731 self._line + 1, self._column + 1, message)) | |
732 | |
733 def _StringParseError(self, e): | |
734 return self._ParseError('Couldn\'t parse string: ' + str(e)) | |
735 | |
736 def NextToken(self): | |
737 """Reads the next meaningful token.""" | |
738 self._previous_line = self._line | |
739 self._previous_column = self._column | |
740 | |
741 self._column += len(self.token) | |
742 self._SkipWhitespace() | |
743 | |
744 if not self._more_lines: | |
745 self.token = '' | |
746 return | |
747 | |
748 match = self._TOKEN.match(self._current_line, self._column) | |
749 if match: | |
750 token = match.group(0) | |
751 self.token = token | |
752 else: | |
753 self.token = self._current_line[self._column] | |
754 | |
755 | |
756 def ParseInteger(text, is_signed=False, is_long=False): | |
757 """Parses an integer. | |
758 | |
759 Args: | |
760 text: The text to parse. | |
761 is_signed: True if a signed integer must be parsed. | |
762 is_long: True if a long integer must be parsed. | |
763 | |
764 Returns: | |
765 The integer value. | |
766 | |
767 Raises: | |
768 ValueError: Thrown Iff the text is not a valid integer. | |
769 """ | |
770 # Do the actual parsing. Exception handling is propagated to caller. | |
771 try: | |
772 # We force 32-bit values to int and 64-bit values to long to make | |
773 # alternate implementations where the distinction is more significant | |
774 # (e.g. the C++ implementation) simpler. | |
775 if is_long: | |
776 result = long(text, 0) | |
777 else: | |
778 result = int(text, 0) | |
779 except ValueError: | |
780 raise ValueError('Couldn\'t parse integer: %s' % text) | |
781 | |
782 # Check if the integer is sane. Exceptions handled by callers. | |
783 checker = _INTEGER_CHECKERS[2 * int(is_long) + int(is_signed)] | |
784 checker.CheckValue(result) | |
785 return result | |
786 | |
787 | |
788 def ParseFloat(text): | |
789 """Parse a floating point number. | |
790 | |
791 Args: | |
792 text: Text to parse. | |
793 | |
794 Returns: | |
795 The number parsed. | |
796 | |
797 Raises: | |
798 ValueError: If a floating point number couldn't be parsed. | |
799 """ | |
800 try: | |
801 # Assume Python compatible syntax. | |
802 return float(text) | |
803 except ValueError: | |
804 # Check alternative spellings. | |
805 if _FLOAT_INFINITY.match(text): | |
806 if text[0] == '-': | |
807 return float('-inf') | |
808 else: | |
809 return float('inf') | |
810 elif _FLOAT_NAN.match(text): | |
811 return float('nan') | |
812 else: | |
813 # assume '1.0f' format | |
814 try: | |
815 return float(text.rstrip('f')) | |
816 except ValueError: | |
817 raise ValueError('Couldn\'t parse float: %s' % text) | |
818 | |
819 | |
820 def ParseBool(text): | |
821 """Parse a boolean value. | |
822 | |
823 Args: | |
824 text: Text to parse. | |
825 | |
826 Returns: | |
827 Boolean values parsed | |
828 | |
829 Raises: | |
830 ValueError: If text is not a valid boolean. | |
831 """ | |
832 if text in ('true', 't', '1'): | |
833 return True | |
834 elif text in ('false', 'f', '0'): | |
835 return False | |
836 else: | |
837 raise ValueError('Expected "true" or "false".') | |
838 | |
839 | |
840 def ParseEnum(field, value): | |
841 """Parse an enum value. | |
842 | |
843 The value can be specified by a number (the enum value), or by | |
844 a string literal (the enum name). | |
845 | |
846 Args: | |
847 field: Enum field descriptor. | |
848 value: String value. | |
849 | |
850 Returns: | |
851 Enum value number. | |
852 | |
853 Raises: | |
854 ValueError: If the enum value could not be parsed. | |
855 """ | |
856 enum_descriptor = field.enum_type | |
857 try: | |
858 number = int(value, 0) | |
859 except ValueError: | |
860 # Identifier. | |
861 enum_value = enum_descriptor.values_by_name.get(value, None) | |
862 if enum_value is None: | |
863 raise ValueError( | |
864 'Enum type "%s" has no value named %s.' % ( | |
865 enum_descriptor.full_name, value)) | |
866 else: | |
867 # Numeric value. | |
868 enum_value = enum_descriptor.values_by_number.get(number, None) | |
869 if enum_value is None: | |
870 raise ValueError( | |
871 'Enum type "%s" has no value with number %d.' % ( | |
872 enum_descriptor.full_name, number)) | |
873 return enum_value.number | |
OLD | NEW |