Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(976)

Side by Side Diff: third_party/google/protobuf/text_format.py

Issue 1153333003: Added tools to retrieve CQ builders from a CQ config (Closed) Base URL: https://chromium.googlesource.com/chromium/tools/depot_tools.git@master
Patch Set: Addressed comments Created 5 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « third_party/google/protobuf/text_encoding.py ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 # Protocol Buffers - Google's data interchange format
2 # Copyright 2008 Google Inc. All rights reserved.
3 # http://code.google.com/p/protobuf/
4 #
5 # Redistribution and use in source and binary forms, with or without
6 # modification, are permitted provided that the following conditions are
7 # met:
8 #
9 # * Redistributions of source code must retain the above copyright
10 # notice, this list of conditions and the following disclaimer.
11 # * Redistributions in binary form must reproduce the above
12 # copyright notice, this list of conditions and the following disclaimer
13 # in the documentation and/or other materials provided with the
14 # distribution.
15 # * Neither the name of Google Inc. nor the names of its
16 # contributors may be used to endorse or promote products derived from
17 # this software without specific prior written permission.
18 #
19 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31 #PY25 compatible for GAE.
32 #
33 # Copyright 2007 Google Inc. All Rights Reserved.
34
35 """Contains routines for printing protocol messages in text format."""
36
37 __author__ = 'kenton@google.com (Kenton Varda)'
38
39 import cStringIO
40 import re
41
42 from google.protobuf.internal import type_checkers
43 from google.protobuf import descriptor
44 from google.protobuf import text_encoding
45
46 __all__ = ['MessageToString', 'PrintMessage', 'PrintField',
47 'PrintFieldValue', 'Merge']
48
49
50 _INTEGER_CHECKERS = (type_checkers.Uint32ValueChecker(),
51 type_checkers.Int32ValueChecker(),
52 type_checkers.Uint64ValueChecker(),
53 type_checkers.Int64ValueChecker())
54 _FLOAT_INFINITY = re.compile('-?inf(?:inity)?f?', re.IGNORECASE)
55 _FLOAT_NAN = re.compile('nanf?', re.IGNORECASE)
56 _FLOAT_TYPES = frozenset([descriptor.FieldDescriptor.CPPTYPE_FLOAT,
57 descriptor.FieldDescriptor.CPPTYPE_DOUBLE])
58
59
60 class Error(Exception):
61 """Top-level module error for text_format."""
62
63
64 class ParseError(Error):
65 """Thrown in case of ASCII parsing error."""
66
67
68 def MessageToString(message, as_utf8=False, as_one_line=False,
69 pointy_brackets=False, use_index_order=False,
70 float_format=None):
71 """Convert protobuf message to text format.
72
73 Floating point values can be formatted compactly with 15 digits of
74 precision (which is the most that IEEE 754 "double" can guarantee)
75 using float_format='.15g'.
76
77 Args:
78 message: The protocol buffers message.
79 as_utf8: Produce text output in UTF8 format.
80 as_one_line: Don't introduce newlines between fields.
81 pointy_brackets: If True, use angle brackets instead of curly braces for
82 nesting.
83 use_index_order: If True, print fields of a proto message using the order
84 defined in source code instead of the field number. By default, use the
85 field number order.
86 float_format: If set, use this to specify floating point number formatting
87 (per the "Format Specification Mini-Language"); otherwise, str() is used.
88
89 Returns:
90 A string of the text formatted protocol buffer message.
91 """
92 out = cStringIO.StringIO()
93 PrintMessage(message, out, as_utf8=as_utf8, as_one_line=as_one_line,
94 pointy_brackets=pointy_brackets,
95 use_index_order=use_index_order,
96 float_format=float_format)
97 result = out.getvalue()
98 out.close()
99 if as_one_line:
100 return result.rstrip()
101 return result
102
103
104 def PrintMessage(message, out, indent=0, as_utf8=False, as_one_line=False,
105 pointy_brackets=False, use_index_order=False,
106 float_format=None):
107 fields = message.ListFields()
108 if use_index_order:
109 fields.sort(key=lambda x: x[0].index)
110 for field, value in fields:
111 if field.label == descriptor.FieldDescriptor.LABEL_REPEATED:
112 for element in value:
113 PrintField(field, element, out, indent, as_utf8, as_one_line,
114 pointy_brackets=pointy_brackets,
115 float_format=float_format)
116 else:
117 PrintField(field, value, out, indent, as_utf8, as_one_line,
118 pointy_brackets=pointy_brackets,
119 float_format=float_format)
120
121
122 def PrintField(field, value, out, indent=0, as_utf8=False, as_one_line=False,
123 pointy_brackets=False, float_format=None):
124 """Print a single field name/value pair. For repeated fields, the value
125 should be a single element."""
126
127 out.write(' ' * indent)
128 if field.is_extension:
129 out.write('[')
130 if (field.containing_type.GetOptions().message_set_wire_format and
131 field.type == descriptor.FieldDescriptor.TYPE_MESSAGE and
132 field.message_type == field.extension_scope and
133 field.label == descriptor.FieldDescriptor.LABEL_OPTIONAL):
134 out.write(field.message_type.full_name)
135 else:
136 out.write(field.full_name)
137 out.write(']')
138 elif field.type == descriptor.FieldDescriptor.TYPE_GROUP:
139 # For groups, use the capitalized name.
140 out.write(field.message_type.name)
141 else:
142 out.write(field.name)
143
144 if field.cpp_type != descriptor.FieldDescriptor.CPPTYPE_MESSAGE:
145 # The colon is optional in this case, but our cross-language golden files
146 # don't include it.
147 out.write(': ')
148
149 PrintFieldValue(field, value, out, indent, as_utf8, as_one_line,
150 pointy_brackets=pointy_brackets,
151 float_format=float_format)
152 if as_one_line:
153 out.write(' ')
154 else:
155 out.write('\n')
156
157
158 def PrintFieldValue(field, value, out, indent=0, as_utf8=False,
159 as_one_line=False, pointy_brackets=False,
160 float_format=None):
161 """Print a single field value (not including name). For repeated fields,
162 the value should be a single element."""
163
164 if pointy_brackets:
165 openb = '<'
166 closeb = '>'
167 else:
168 openb = '{'
169 closeb = '}'
170
171 if field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_MESSAGE:
172 if as_one_line:
173 out.write(' %s ' % openb)
174 PrintMessage(value, out, indent, as_utf8, as_one_line,
175 pointy_brackets=pointy_brackets,
176 float_format=float_format)
177 out.write(closeb)
178 else:
179 out.write(' %s\n' % openb)
180 PrintMessage(value, out, indent + 2, as_utf8, as_one_line,
181 pointy_brackets=pointy_brackets,
182 float_format=float_format)
183 out.write(' ' * indent + closeb)
184 elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_ENUM:
185 enum_value = field.enum_type.values_by_number.get(value, None)
186 if enum_value is not None:
187 out.write(enum_value.name)
188 else:
189 out.write(str(value))
190 elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_STRING:
191 out.write('\"')
192 if isinstance(value, unicode):
193 out_value = value.encode('utf-8')
194 else:
195 out_value = value
196 if field.type == descriptor.FieldDescriptor.TYPE_BYTES:
197 # We need to escape non-UTF8 chars in TYPE_BYTES field.
198 out_as_utf8 = False
199 else:
200 out_as_utf8 = as_utf8
201 out.write(text_encoding.CEscape(out_value, out_as_utf8))
202 out.write('\"')
203 elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_BOOL:
204 if value:
205 out.write('true')
206 else:
207 out.write('false')
208 elif field.cpp_type in _FLOAT_TYPES and float_format is not None:
209 out.write('{1:{0}}'.format(float_format, value))
210 else:
211 out.write(str(value))
212
213
214 def _ParseOrMerge(lines, message, allow_multiple_scalars):
215 """Converts an ASCII representation of a protocol message into a message.
216
217 Args:
218 lines: Lines of a message's ASCII representation.
219 message: A protocol buffer message to merge into.
220 allow_multiple_scalars: Determines if repeated values for a non-repeated
221 field are permitted, e.g., the string "foo: 1 foo: 2" for a
222 required/optional field named "foo".
223
224 Raises:
225 ParseError: On ASCII parsing problems.
226 """
227 tokenizer = _Tokenizer(lines)
228 while not tokenizer.AtEnd():
229 _MergeField(tokenizer, message, allow_multiple_scalars)
230
231
232 def Parse(text, message):
233 """Parses an ASCII representation of a protocol message into a message.
234
235 Args:
236 text: Message ASCII representation.
237 message: A protocol buffer message to merge into.
238
239 Returns:
240 The same message passed as argument.
241
242 Raises:
243 ParseError: On ASCII parsing problems.
244 """
245 if not isinstance(text, str): text = text.decode('utf-8')
246 return ParseLines(text.split('\n'), message)
247
248
249 def Merge(text, message):
250 """Parses an ASCII representation of a protocol message into a message.
251
252 Like Parse(), but allows repeated values for a non-repeated field, and uses
253 the last one.
254
255 Args:
256 text: Message ASCII representation.
257 message: A protocol buffer message to merge into.
258
259 Returns:
260 The same message passed as argument.
261
262 Raises:
263 ParseError: On ASCII parsing problems.
264 """
265 return MergeLines(text.split('\n'), message)
266
267
268 def ParseLines(lines, message):
269 """Parses an ASCII representation of a protocol message into a message.
270
271 Args:
272 lines: An iterable of lines of a message's ASCII representation.
273 message: A protocol buffer message to merge into.
274
275 Returns:
276 The same message passed as argument.
277
278 Raises:
279 ParseError: On ASCII parsing problems.
280 """
281 _ParseOrMerge(lines, message, False)
282 return message
283
284
285 def MergeLines(lines, message):
286 """Parses an ASCII representation of a protocol message into a message.
287
288 Args:
289 lines: An iterable of lines of a message's ASCII representation.
290 message: A protocol buffer message to merge into.
291
292 Returns:
293 The same message passed as argument.
294
295 Raises:
296 ParseError: On ASCII parsing problems.
297 """
298 _ParseOrMerge(lines, message, True)
299 return message
300
301
302 def _MergeField(tokenizer, message, allow_multiple_scalars):
303 """Merges a single protocol message field into a message.
304
305 Args:
306 tokenizer: A tokenizer to parse the field name and values.
307 message: A protocol message to record the data.
308 allow_multiple_scalars: Determines if repeated values for a non-repeated
309 field are permitted, e.g., the string "foo: 1 foo: 2" for a
310 required/optional field named "foo".
311
312 Raises:
313 ParseError: In case of ASCII parsing problems.
314 """
315 message_descriptor = message.DESCRIPTOR
316 if tokenizer.TryConsume('['):
317 name = [tokenizer.ConsumeIdentifier()]
318 while tokenizer.TryConsume('.'):
319 name.append(tokenizer.ConsumeIdentifier())
320 name = '.'.join(name)
321
322 if not message_descriptor.is_extendable:
323 raise tokenizer.ParseErrorPreviousToken(
324 'Message type "%s" does not have extensions.' %
325 message_descriptor.full_name)
326 # pylint: disable=protected-access
327 field = message.Extensions._FindExtensionByName(name)
328 # pylint: enable=protected-access
329 if not field:
330 raise tokenizer.ParseErrorPreviousToken(
331 'Extension "%s" not registered.' % name)
332 elif message_descriptor != field.containing_type:
333 raise tokenizer.ParseErrorPreviousToken(
334 'Extension "%s" does not extend message type "%s".' % (
335 name, message_descriptor.full_name))
336 tokenizer.Consume(']')
337 else:
338 name = tokenizer.ConsumeIdentifier()
339 field = message_descriptor.fields_by_name.get(name, None)
340
341 # Group names are expected to be capitalized as they appear in the
342 # .proto file, which actually matches their type names, not their field
343 # names.
344 if not field:
345 field = message_descriptor.fields_by_name.get(name.lower(), None)
346 if field and field.type != descriptor.FieldDescriptor.TYPE_GROUP:
347 field = None
348
349 if (field and field.type == descriptor.FieldDescriptor.TYPE_GROUP and
350 field.message_type.name != name):
351 field = None
352
353 if not field:
354 raise tokenizer.ParseErrorPreviousToken(
355 'Message type "%s" has no field named "%s".' % (
356 message_descriptor.full_name, name))
357
358 if field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_MESSAGE:
359 tokenizer.TryConsume(':')
360
361 if tokenizer.TryConsume('<'):
362 end_token = '>'
363 else:
364 tokenizer.Consume('{')
365 end_token = '}'
366
367 if field.label == descriptor.FieldDescriptor.LABEL_REPEATED:
368 if field.is_extension:
369 sub_message = message.Extensions[field].add()
370 else:
371 sub_message = getattr(message, field.name).add()
372 else:
373 if field.is_extension:
374 sub_message = message.Extensions[field]
375 else:
376 sub_message = getattr(message, field.name)
377 sub_message.SetInParent()
378
379 while not tokenizer.TryConsume(end_token):
380 if tokenizer.AtEnd():
381 raise tokenizer.ParseErrorPreviousToken('Expected "%s".' % (end_token))
382 _MergeField(tokenizer, sub_message, allow_multiple_scalars)
383 else:
384 _MergeScalarField(tokenizer, message, field, allow_multiple_scalars)
385
386 # For historical reasons, fields may optionally be separated by commas or
387 # semicolons.
388 if not tokenizer.TryConsume(','):
389 tokenizer.TryConsume(';')
390
391
392 def _MergeScalarField(tokenizer, message, field, allow_multiple_scalars):
393 """Merges a single protocol message scalar field into a message.
394
395 Args:
396 tokenizer: A tokenizer to parse the field value.
397 message: A protocol message to record the data.
398 field: The descriptor of the field to be merged.
399 allow_multiple_scalars: Determines if repeated values for a non-repeated
400 field are permitted, e.g., the string "foo: 1 foo: 2" for a
401 required/optional field named "foo".
402
403 Raises:
404 ParseError: In case of ASCII parsing problems.
405 RuntimeError: On runtime errors.
406 """
407 tokenizer.Consume(':')
408 value = None
409
410 if field.type in (descriptor.FieldDescriptor.TYPE_INT32,
411 descriptor.FieldDescriptor.TYPE_SINT32,
412 descriptor.FieldDescriptor.TYPE_SFIXED32):
413 value = tokenizer.ConsumeInt32()
414 elif field.type in (descriptor.FieldDescriptor.TYPE_INT64,
415 descriptor.FieldDescriptor.TYPE_SINT64,
416 descriptor.FieldDescriptor.TYPE_SFIXED64):
417 value = tokenizer.ConsumeInt64()
418 elif field.type in (descriptor.FieldDescriptor.TYPE_UINT32,
419 descriptor.FieldDescriptor.TYPE_FIXED32):
420 value = tokenizer.ConsumeUint32()
421 elif field.type in (descriptor.FieldDescriptor.TYPE_UINT64,
422 descriptor.FieldDescriptor.TYPE_FIXED64):
423 value = tokenizer.ConsumeUint64()
424 elif field.type in (descriptor.FieldDescriptor.TYPE_FLOAT,
425 descriptor.FieldDescriptor.TYPE_DOUBLE):
426 value = tokenizer.ConsumeFloat()
427 elif field.type == descriptor.FieldDescriptor.TYPE_BOOL:
428 value = tokenizer.ConsumeBool()
429 elif field.type == descriptor.FieldDescriptor.TYPE_STRING:
430 value = tokenizer.ConsumeString()
431 elif field.type == descriptor.FieldDescriptor.TYPE_BYTES:
432 value = tokenizer.ConsumeByteString()
433 elif field.type == descriptor.FieldDescriptor.TYPE_ENUM:
434 value = tokenizer.ConsumeEnum(field)
435 else:
436 raise RuntimeError('Unknown field type %d' % field.type)
437
438 if field.label == descriptor.FieldDescriptor.LABEL_REPEATED:
439 if field.is_extension:
440 message.Extensions[field].append(value)
441 else:
442 getattr(message, field.name).append(value)
443 else:
444 if field.is_extension:
445 if not allow_multiple_scalars and message.HasExtension(field):
446 raise tokenizer.ParseErrorPreviousToken(
447 'Message type "%s" should not have multiple "%s" extensions.' %
448 (message.DESCRIPTOR.full_name, field.full_name))
449 else:
450 message.Extensions[field] = value
451 else:
452 if not allow_multiple_scalars and message.HasField(field.name):
453 raise tokenizer.ParseErrorPreviousToken(
454 'Message type "%s" should not have multiple "%s" fields.' %
455 (message.DESCRIPTOR.full_name, field.name))
456 else:
457 setattr(message, field.name, value)
458
459
460 class _Tokenizer(object):
461 """Protocol buffer ASCII representation tokenizer.
462
463 This class handles the lower level string parsing by splitting it into
464 meaningful tokens.
465
466 It was directly ported from the Java protocol buffer API.
467 """
468
469 _WHITESPACE = re.compile('(\\s|(#.*$))+', re.MULTILINE)
470 _TOKEN = re.compile(
471 '[a-zA-Z_][0-9a-zA-Z_+-]*|' # an identifier
472 '[0-9+-][0-9a-zA-Z_.+-]*|' # a number
473 '\"([^\"\n\\\\]|\\\\.)*(\"|\\\\?$)|' # a double-quoted string
474 '\'([^\'\n\\\\]|\\\\.)*(\'|\\\\?$)') # a single-quoted string
475 _IDENTIFIER = re.compile(r'\w+')
476
477 def __init__(self, lines):
478 self._position = 0
479 self._line = -1
480 self._column = 0
481 self._token_start = None
482 self.token = ''
483 self._lines = iter(lines)
484 self._current_line = ''
485 self._previous_line = 0
486 self._previous_column = 0
487 self._more_lines = True
488 self._SkipWhitespace()
489 self.NextToken()
490
491 def AtEnd(self):
492 """Checks the end of the text was reached.
493
494 Returns:
495 True iff the end was reached.
496 """
497 return not self.token
498
499 def _PopLine(self):
500 while len(self._current_line) <= self._column:
501 try:
502 self._current_line = self._lines.next()
503 except StopIteration:
504 self._current_line = ''
505 self._more_lines = False
506 return
507 else:
508 self._line += 1
509 self._column = 0
510
511 def _SkipWhitespace(self):
512 while True:
513 self._PopLine()
514 match = self._WHITESPACE.match(self._current_line, self._column)
515 if not match:
516 break
517 length = len(match.group(0))
518 self._column += length
519
520 def TryConsume(self, token):
521 """Tries to consume a given piece of text.
522
523 Args:
524 token: Text to consume.
525
526 Returns:
527 True iff the text was consumed.
528 """
529 if self.token == token:
530 self.NextToken()
531 return True
532 return False
533
534 def Consume(self, token):
535 """Consumes a piece of text.
536
537 Args:
538 token: Text to consume.
539
540 Raises:
541 ParseError: If the text couldn't be consumed.
542 """
543 if not self.TryConsume(token):
544 raise self._ParseError('Expected "%s".' % token)
545
546 def ConsumeIdentifier(self):
547 """Consumes protocol message field identifier.
548
549 Returns:
550 Identifier string.
551
552 Raises:
553 ParseError: If an identifier couldn't be consumed.
554 """
555 result = self.token
556 if not self._IDENTIFIER.match(result):
557 raise self._ParseError('Expected identifier.')
558 self.NextToken()
559 return result
560
561 def ConsumeInt32(self):
562 """Consumes a signed 32bit integer number.
563
564 Returns:
565 The integer parsed.
566
567 Raises:
568 ParseError: If a signed 32bit integer couldn't be consumed.
569 """
570 try:
571 result = ParseInteger(self.token, is_signed=True, is_long=False)
572 except ValueError, e:
573 raise self._ParseError(str(e))
574 self.NextToken()
575 return result
576
577 def ConsumeUint32(self):
578 """Consumes an unsigned 32bit integer number.
579
580 Returns:
581 The integer parsed.
582
583 Raises:
584 ParseError: If an unsigned 32bit integer couldn't be consumed.
585 """
586 try:
587 result = ParseInteger(self.token, is_signed=False, is_long=False)
588 except ValueError, e:
589 raise self._ParseError(str(e))
590 self.NextToken()
591 return result
592
593 def ConsumeInt64(self):
594 """Consumes a signed 64bit integer number.
595
596 Returns:
597 The integer parsed.
598
599 Raises:
600 ParseError: If a signed 64bit integer couldn't be consumed.
601 """
602 try:
603 result = ParseInteger(self.token, is_signed=True, is_long=True)
604 except ValueError, e:
605 raise self._ParseError(str(e))
606 self.NextToken()
607 return result
608
609 def ConsumeUint64(self):
610 """Consumes an unsigned 64bit integer number.
611
612 Returns:
613 The integer parsed.
614
615 Raises:
616 ParseError: If an unsigned 64bit integer couldn't be consumed.
617 """
618 try:
619 result = ParseInteger(self.token, is_signed=False, is_long=True)
620 except ValueError, e:
621 raise self._ParseError(str(e))
622 self.NextToken()
623 return result
624
625 def ConsumeFloat(self):
626 """Consumes an floating point number.
627
628 Returns:
629 The number parsed.
630
631 Raises:
632 ParseError: If a floating point number couldn't be consumed.
633 """
634 try:
635 result = ParseFloat(self.token)
636 except ValueError, e:
637 raise self._ParseError(str(e))
638 self.NextToken()
639 return result
640
641 def ConsumeBool(self):
642 """Consumes a boolean value.
643
644 Returns:
645 The bool parsed.
646
647 Raises:
648 ParseError: If a boolean value couldn't be consumed.
649 """
650 try:
651 result = ParseBool(self.token)
652 except ValueError, e:
653 raise self._ParseError(str(e))
654 self.NextToken()
655 return result
656
657 def ConsumeString(self):
658 """Consumes a string value.
659
660 Returns:
661 The string parsed.
662
663 Raises:
664 ParseError: If a string value couldn't be consumed.
665 """
666 the_bytes = self.ConsumeByteString()
667 try:
668 return unicode(the_bytes, 'utf-8')
669 except UnicodeDecodeError, e:
670 raise self._StringParseError(e)
671
672 def ConsumeByteString(self):
673 """Consumes a byte array value.
674
675 Returns:
676 The array parsed (as a string).
677
678 Raises:
679 ParseError: If a byte array value couldn't be consumed.
680 """
681 the_list = [self._ConsumeSingleByteString()]
682 while self.token and self.token[0] in ('\'', '"'):
683 the_list.append(self._ConsumeSingleByteString())
684 return ''.encode('latin1').join(the_list) ##PY25
685 ##!PY25 return b''.join(the_list)
686
687 def _ConsumeSingleByteString(self):
688 """Consume one token of a string literal.
689
690 String literals (whether bytes or text) can come in multiple adjacent
691 tokens which are automatically concatenated, like in C or Python. This
692 method only consumes one token.
693 """
694 text = self.token
695 if len(text) < 1 or text[0] not in ('\'', '"'):
696 raise self._ParseError('Expected string.')
697
698 if len(text) < 2 or text[-1] != text[0]:
699 raise self._ParseError('String missing ending quote.')
700
701 try:
702 result = text_encoding.CUnescape(text[1:-1])
703 except ValueError, e:
704 raise self._ParseError(str(e))
705 self.NextToken()
706 return result
707
708 def ConsumeEnum(self, field):
709 try:
710 result = ParseEnum(field, self.token)
711 except ValueError, e:
712 raise self._ParseError(str(e))
713 self.NextToken()
714 return result
715
716 def ParseErrorPreviousToken(self, message):
717 """Creates and *returns* a ParseError for the previously read token.
718
719 Args:
720 message: A message to set for the exception.
721
722 Returns:
723 A ParseError instance.
724 """
725 return ParseError('%d:%d : %s' % (
726 self._previous_line + 1, self._previous_column + 1, message))
727
728 def _ParseError(self, message):
729 """Creates and *returns* a ParseError for the current token."""
730 return ParseError('%d:%d : %s' % (
731 self._line + 1, self._column + 1, message))
732
733 def _StringParseError(self, e):
734 return self._ParseError('Couldn\'t parse string: ' + str(e))
735
736 def NextToken(self):
737 """Reads the next meaningful token."""
738 self._previous_line = self._line
739 self._previous_column = self._column
740
741 self._column += len(self.token)
742 self._SkipWhitespace()
743
744 if not self._more_lines:
745 self.token = ''
746 return
747
748 match = self._TOKEN.match(self._current_line, self._column)
749 if match:
750 token = match.group(0)
751 self.token = token
752 else:
753 self.token = self._current_line[self._column]
754
755
756 def ParseInteger(text, is_signed=False, is_long=False):
757 """Parses an integer.
758
759 Args:
760 text: The text to parse.
761 is_signed: True if a signed integer must be parsed.
762 is_long: True if a long integer must be parsed.
763
764 Returns:
765 The integer value.
766
767 Raises:
768 ValueError: Thrown Iff the text is not a valid integer.
769 """
770 # Do the actual parsing. Exception handling is propagated to caller.
771 try:
772 # We force 32-bit values to int and 64-bit values to long to make
773 # alternate implementations where the distinction is more significant
774 # (e.g. the C++ implementation) simpler.
775 if is_long:
776 result = long(text, 0)
777 else:
778 result = int(text, 0)
779 except ValueError:
780 raise ValueError('Couldn\'t parse integer: %s' % text)
781
782 # Check if the integer is sane. Exceptions handled by callers.
783 checker = _INTEGER_CHECKERS[2 * int(is_long) + int(is_signed)]
784 checker.CheckValue(result)
785 return result
786
787
788 def ParseFloat(text):
789 """Parse a floating point number.
790
791 Args:
792 text: Text to parse.
793
794 Returns:
795 The number parsed.
796
797 Raises:
798 ValueError: If a floating point number couldn't be parsed.
799 """
800 try:
801 # Assume Python compatible syntax.
802 return float(text)
803 except ValueError:
804 # Check alternative spellings.
805 if _FLOAT_INFINITY.match(text):
806 if text[0] == '-':
807 return float('-inf')
808 else:
809 return float('inf')
810 elif _FLOAT_NAN.match(text):
811 return float('nan')
812 else:
813 # assume '1.0f' format
814 try:
815 return float(text.rstrip('f'))
816 except ValueError:
817 raise ValueError('Couldn\'t parse float: %s' % text)
818
819
820 def ParseBool(text):
821 """Parse a boolean value.
822
823 Args:
824 text: Text to parse.
825
826 Returns:
827 Boolean values parsed
828
829 Raises:
830 ValueError: If text is not a valid boolean.
831 """
832 if text in ('true', 't', '1'):
833 return True
834 elif text in ('false', 'f', '0'):
835 return False
836 else:
837 raise ValueError('Expected "true" or "false".')
838
839
840 def ParseEnum(field, value):
841 """Parse an enum value.
842
843 The value can be specified by a number (the enum value), or by
844 a string literal (the enum name).
845
846 Args:
847 field: Enum field descriptor.
848 value: String value.
849
850 Returns:
851 Enum value number.
852
853 Raises:
854 ValueError: If the enum value could not be parsed.
855 """
856 enum_descriptor = field.enum_type
857 try:
858 number = int(value, 0)
859 except ValueError:
860 # Identifier.
861 enum_value = enum_descriptor.values_by_name.get(value, None)
862 if enum_value is None:
863 raise ValueError(
864 'Enum type "%s" has no value named %s.' % (
865 enum_descriptor.full_name, value))
866 else:
867 # Numeric value.
868 enum_value = enum_descriptor.values_by_number.get(number, None)
869 if enum_value is None:
870 raise ValueError(
871 'Enum type "%s" has no value with number %d.' % (
872 enum_descriptor.full_name, number))
873 return enum_value.number
OLDNEW
« no previous file with comments | « third_party/google/protobuf/text_encoding.py ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698