third_party/google/protobuf/text_format.py - Issue 1153333003: Added tools to retrieve CQ builders from a CQ config

Side by Side Diff: third_party/google/protobuf/text_format.py

Issue 1153333003: Added tools to retrieve CQ builders from a CQ config (Closed) Base URL: https://chromium.googlesource.com/chromium/tools/depot_tools.git@master

Patch Set: Addressed comments Created 5 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
(Empty)
	1 # Protocol Buffers - Google's data interchange format

	2 # Copyright 2008 Google Inc. All rights reserved.

	3 # http://code.google.com/p/protobuf/

	4 #

	5 # Redistribution and use in source and binary forms, with or without

	6 # modification, are permitted provided that the following conditions are

	7 # met:

	8 #

	9 # * Redistributions of source code must retain the above copyright

	10 # notice, this list of conditions and the following disclaimer.

	11 # * Redistributions in binary form must reproduce the above

	12 # copyright notice, this list of conditions and the following disclaimer

	13 # in the documentation and/or other materials provided with the

	14 # distribution.

	15 # * Neither the name of Google Inc. nor the names of its

	16 # contributors may be used to endorse or promote products derived from

	17 # this software without specific prior written permission.

	18 #

	19 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS

	20 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT

	21 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR

	22 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT

	23 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,

	24 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT

	25 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,

	26 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY

	27 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT

	28 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE

	29 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

	30

	31 #PY25 compatible for GAE.

	32 #

	33 # Copyright 2007 Google Inc. All Rights Reserved.

	34

	35 """Contains routines for printing protocol messages in text format."""

	36

	37 __author__ = 'kenton@google.com (Kenton Varda)'

	38

	39 import cStringIO

	40 import re

	41

	42 from google.protobuf.internal import type_checkers

	43 from google.protobuf import descriptor

	44 from google.protobuf import text_encoding

	45

	46 __all__ = ['MessageToString', 'PrintMessage', 'PrintField',

	47 'PrintFieldValue', 'Merge']

	48

	49

	50 _INTEGER_CHECKERS = (type_checkers.Uint32ValueChecker(),

	51 type_checkers.Int32ValueChecker(),

	52 type_checkers.Uint64ValueChecker(),

	53 type_checkers.Int64ValueChecker())

	54 _FLOAT_INFINITY = re.compile('-?inf(?:inity)?f?', re.IGNORECASE)

	55 _FLOAT_NAN = re.compile('nanf?', re.IGNORECASE)

	56 _FLOAT_TYPES = frozenset([descriptor.FieldDescriptor.CPPTYPE_FLOAT,

	57 descriptor.FieldDescriptor.CPPTYPE_DOUBLE])

	58

	59

	60 class Error(Exception):

	61 """Top-level module error for text_format."""

	62

	63

	64 class ParseError(Error):

	65 """Thrown in case of ASCII parsing error."""

	66

	67

	68 def MessageToString(message, as_utf8=False, as_one_line=False,

	69 pointy_brackets=False, use_index_order=False,

	70 float_format=None):

	71 """Convert protobuf message to text format.

	72

	73 Floating point values can be formatted compactly with 15 digits of

	74 precision (which is the most that IEEE 754 "double" can guarantee)

	75 using float_format='.15g'.

	76

	77 Args:

	78 message: The protocol buffers message.

	79 as_utf8: Produce text output in UTF8 format.

	80 as_one_line: Don't introduce newlines between fields.

	81 pointy_brackets: If True, use angle brackets instead of curly braces for

	82 nesting.

	83 use_index_order: If True, print fields of a proto message using the order

	84 defined in source code instead of the field number. By default, use the

	85 field number order.

	86 float_format: If set, use this to specify floating point number formatting

	87 (per the "Format Specification Mini-Language"); otherwise, str() is used.

	88

	89 Returns:

	90 A string of the text formatted protocol buffer message.

	91 """

	92 out = cStringIO.StringIO()

	93 PrintMessage(message, out, as_utf8=as_utf8, as_one_line=as_one_line,

	94 pointy_brackets=pointy_brackets,

	95 use_index_order=use_index_order,

	96 float_format=float_format)

	97 result = out.getvalue()

	98 out.close()

	99 if as_one_line:

	100 return result.rstrip()

	101 return result

	102

	103

	104 def PrintMessage(message, out, indent=0, as_utf8=False, as_one_line=False,

	105 pointy_brackets=False, use_index_order=False,

	106 float_format=None):

	107 fields = message.ListFields()

	108 if use_index_order:

	109 fields.sort(key=lambda x: x[0].index)

	110 for field, value in fields:

	111 if field.label == descriptor.FieldDescriptor.LABEL_REPEATED:

	112 for element in value:

	113 PrintField(field, element, out, indent, as_utf8, as_one_line,

	114 pointy_brackets=pointy_brackets,

	115 float_format=float_format)

	116 else:

	117 PrintField(field, value, out, indent, as_utf8, as_one_line,

	118 pointy_brackets=pointy_brackets,

	119 float_format=float_format)

	120

	121

	122 def PrintField(field, value, out, indent=0, as_utf8=False, as_one_line=False,

	123 pointy_brackets=False, float_format=None):

	124 """Print a single field name/value pair. For repeated fields, the value

	125 should be a single element."""

	126

	127 out.write(' ' * indent)

	128 if field.is_extension:

	129 out.write('[')

	130 if (field.containing_type.GetOptions().message_set_wire_format and

	131 field.type == descriptor.FieldDescriptor.TYPE_MESSAGE and

	132 field.message_type == field.extension_scope and

	133 field.label == descriptor.FieldDescriptor.LABEL_OPTIONAL):

	134 out.write(field.message_type.full_name)

	135 else:

	136 out.write(field.full_name)

	137 out.write(']')

	138 elif field.type == descriptor.FieldDescriptor.TYPE_GROUP:

	139 # For groups, use the capitalized name.

	140 out.write(field.message_type.name)

	141 else:

	142 out.write(field.name)

	143

	144 if field.cpp_type != descriptor.FieldDescriptor.CPPTYPE_MESSAGE:

	145 # The colon is optional in this case, but our cross-language golden files

	146 # don't include it.

	147 out.write(': ')

	148

	149 PrintFieldValue(field, value, out, indent, as_utf8, as_one_line,

	150 pointy_brackets=pointy_brackets,

	151 float_format=float_format)

	152 if as_one_line:

	153 out.write(' ')

	154 else:

	155 out.write('\n')

	156

	157

	158 def PrintFieldValue(field, value, out, indent=0, as_utf8=False,

	159 as_one_line=False, pointy_brackets=False,

	160 float_format=None):

	161 """Print a single field value (not including name). For repeated fields,

	162 the value should be a single element."""

	163

	164 if pointy_brackets:

	165 openb = '<'

	166 closeb = '>'

	167 else:

	168 openb = '{'

	169 closeb = '}'

	170

	171 if field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_MESSAGE:

	172 if as_one_line:

	173 out.write(' %s ' % openb)

	174 PrintMessage(value, out, indent, as_utf8, as_one_line,

	175 pointy_brackets=pointy_brackets,

	176 float_format=float_format)

	177 out.write(closeb)

	178 else:

	179 out.write(' %s\n' % openb)

	180 PrintMessage(value, out, indent + 2, as_utf8, as_one_line,

	181 pointy_brackets=pointy_brackets,

	182 float_format=float_format)

	183 out.write(' ' * indent + closeb)

	184 elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_ENUM:

	185 enum_value = field.enum_type.values_by_number.get(value, None)

	186 if enum_value is not None:

	187 out.write(enum_value.name)

	188 else:

	189 out.write(str(value))

	190 elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_STRING:

	191 out.write('\"')

	192 if isinstance(value, unicode):

	193 out_value = value.encode('utf-8')

	194 else:

	195 out_value = value

	196 if field.type == descriptor.FieldDescriptor.TYPE_BYTES:

	197 # We need to escape non-UTF8 chars in TYPE_BYTES field.

	198 out_as_utf8 = False

	199 else:

	200 out_as_utf8 = as_utf8

	201 out.write(text_encoding.CEscape(out_value, out_as_utf8))

	202 out.write('\"')

	203 elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_BOOL:

	204 if value:

	205 out.write('true')

	206 else:

	207 out.write('false')

	208 elif field.cpp_type in _FLOAT_TYPES and float_format is not None:

	209 out.write('{1:{0}}'.format(float_format, value))

	210 else:

	211 out.write(str(value))

	212

	213

	214 def _ParseOrMerge(lines, message, allow_multiple_scalars):

	215 """Converts an ASCII representation of a protocol message into a message.

	216

	217 Args:

	218 lines: Lines of a message's ASCII representation.

	219 message: A protocol buffer message to merge into.

	220 allow_multiple_scalars: Determines if repeated values for a non-repeated

	221 field are permitted, e.g., the string "foo: 1 foo: 2" for a

	222 required/optional field named "foo".

	223

	224 Raises:

	225 ParseError: On ASCII parsing problems.

	226 """

	227 tokenizer = _Tokenizer(lines)

	228 while not tokenizer.AtEnd():

	229 _MergeField(tokenizer, message, allow_multiple_scalars)

	230

	231

	232 def Parse(text, message):

	233 """Parses an ASCII representation of a protocol message into a message.

	234

	235 Args:

	236 text: Message ASCII representation.

	237 message: A protocol buffer message to merge into.

	238

	239 Returns:

	240 The same message passed as argument.

	241

	242 Raises:

	243 ParseError: On ASCII parsing problems.

	244 """

	245 if not isinstance(text, str): text = text.decode('utf-8')

	246 return ParseLines(text.split('\n'), message)

	247

	248

	249 def Merge(text, message):

	250 """Parses an ASCII representation of a protocol message into a message.

	251

	252 Like Parse(), but allows repeated values for a non-repeated field, and uses

	253 the last one.

	254

	255 Args:

	256 text: Message ASCII representation.

	257 message: A protocol buffer message to merge into.

	258

	259 Returns:

	260 The same message passed as argument.

	261

	262 Raises:

	263 ParseError: On ASCII parsing problems.

	264 """

	265 return MergeLines(text.split('\n'), message)

	266

	267

	268 def ParseLines(lines, message):

	269 """Parses an ASCII representation of a protocol message into a message.

	270

	271 Args:

	272 lines: An iterable of lines of a message's ASCII representation.

	273 message: A protocol buffer message to merge into.

	274

	275 Returns:

	276 The same message passed as argument.

	277

	278 Raises:

	279 ParseError: On ASCII parsing problems.

	280 """

	281 _ParseOrMerge(lines, message, False)

	282 return message

	283

	284

	285 def MergeLines(lines, message):

	286 """Parses an ASCII representation of a protocol message into a message.

	287

	288 Args:

	289 lines: An iterable of lines of a message's ASCII representation.

	290 message: A protocol buffer message to merge into.

	291

	292 Returns:

	293 The same message passed as argument.

	294

	295 Raises:

	296 ParseError: On ASCII parsing problems.

	297 """

	298 _ParseOrMerge(lines, message, True)

	299 return message

	300

	301

	302 def _MergeField(tokenizer, message, allow_multiple_scalars):

	303 """Merges a single protocol message field into a message.

	304

	305 Args:

	306 tokenizer: A tokenizer to parse the field name and values.

	307 message: A protocol message to record the data.

	308 allow_multiple_scalars: Determines if repeated values for a non-repeated

	309 field are permitted, e.g., the string "foo: 1 foo: 2" for a

	310 required/optional field named "foo".

	311

	312 Raises:

	313 ParseError: In case of ASCII parsing problems.

	314 """

	315 message_descriptor = message.DESCRIPTOR

	316 if tokenizer.TryConsume('['):

	317 name = [tokenizer.ConsumeIdentifier()]

	318 while tokenizer.TryConsume('.'):

	319 name.append(tokenizer.ConsumeIdentifier())

	320 name = '.'.join(name)

	321

	322 if not message_descriptor.is_extendable:

	323 raise tokenizer.ParseErrorPreviousToken(

	324 'Message type "%s" does not have extensions.' %

	325 message_descriptor.full_name)

	326 # pylint: disable=protected-access

	327 field = message.Extensions._FindExtensionByName(name)

	328 # pylint: enable=protected-access

	329 if not field:

	330 raise tokenizer.ParseErrorPreviousToken(

	331 'Extension "%s" not registered.' % name)

	332 elif message_descriptor != field.containing_type:

	333 raise tokenizer.ParseErrorPreviousToken(

	334 'Extension "%s" does not extend message type "%s".' % (

	335 name, message_descriptor.full_name))

	336 tokenizer.Consume(']')

	337 else:

	338 name = tokenizer.ConsumeIdentifier()

	339 field = message_descriptor.fields_by_name.get(name, None)

	340

	341 # Group names are expected to be capitalized as they appear in the

	342 # .proto file, which actually matches their type names, not their field

	343 # names.

	344 if not field:

	345 field = message_descriptor.fields_by_name.get(name.lower(), None)

	346 if field and field.type != descriptor.FieldDescriptor.TYPE_GROUP:

	347 field = None

	348

	349 if (field and field.type == descriptor.FieldDescriptor.TYPE_GROUP and

	350 field.message_type.name != name):

	351 field = None

	352

	353 if not field:

	354 raise tokenizer.ParseErrorPreviousToken(

	355 'Message type "%s" has no field named "%s".' % (

	356 message_descriptor.full_name, name))

	357

	358 if field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_MESSAGE:

	359 tokenizer.TryConsume(':')

	360

	361 if tokenizer.TryConsume('<'):

	362 end_token = '>'

	363 else:

	364 tokenizer.Consume('{')

	365 end_token = '}'

	366

	367 if field.label == descriptor.FieldDescriptor.LABEL_REPEATED:

	368 if field.is_extension:

	369 sub_message = message.Extensions[field].add()

	370 else:

	371 sub_message = getattr(message, field.name).add()

	372 else:

	373 if field.is_extension:

	374 sub_message = message.Extensions[field]

	375 else:

	376 sub_message = getattr(message, field.name)

	377 sub_message.SetInParent()

	378

	379 while not tokenizer.TryConsume(end_token):

	380 if tokenizer.AtEnd():

	381 raise tokenizer.ParseErrorPreviousToken('Expected "%s".' % (end_token))

	382 _MergeField(tokenizer, sub_message, allow_multiple_scalars)

	383 else:

	384 _MergeScalarField(tokenizer, message, field, allow_multiple_scalars)

	385

	386 # For historical reasons, fields may optionally be separated by commas or

	387 # semicolons.

	388 if not tokenizer.TryConsume(','):

	389 tokenizer.TryConsume(';')

	390

	391

	392 def _MergeScalarField(tokenizer, message, field, allow_multiple_scalars):

	393 """Merges a single protocol message scalar field into a message.

	394

	395 Args:

	396 tokenizer: A tokenizer to parse the field value.

	397 message: A protocol message to record the data.

	398 field: The descriptor of the field to be merged.

	399 allow_multiple_scalars: Determines if repeated values for a non-repeated

	400 field are permitted, e.g., the string "foo: 1 foo: 2" for a

	401 required/optional field named "foo".

	402

	403 Raises:

	404 ParseError: In case of ASCII parsing problems.

	405 RuntimeError: On runtime errors.

	406 """

	407 tokenizer.Consume(':')

	408 value = None

	409

	410 if field.type in (descriptor.FieldDescriptor.TYPE_INT32,

	411 descriptor.FieldDescriptor.TYPE_SINT32,

	412 descriptor.FieldDescriptor.TYPE_SFIXED32):

	413 value = tokenizer.ConsumeInt32()

	414 elif field.type in (descriptor.FieldDescriptor.TYPE_INT64,

	415 descriptor.FieldDescriptor.TYPE_SINT64,

	416 descriptor.FieldDescriptor.TYPE_SFIXED64):

	417 value = tokenizer.ConsumeInt64()

	418 elif field.type in (descriptor.FieldDescriptor.TYPE_UINT32,

	419 descriptor.FieldDescriptor.TYPE_FIXED32):

	420 value = tokenizer.ConsumeUint32()

	421 elif field.type in (descriptor.FieldDescriptor.TYPE_UINT64,

	422 descriptor.FieldDescriptor.TYPE_FIXED64):

	423 value = tokenizer.ConsumeUint64()

	424 elif field.type in (descriptor.FieldDescriptor.TYPE_FLOAT,

	425 descriptor.FieldDescriptor.TYPE_DOUBLE):

	426 value = tokenizer.ConsumeFloat()

	427 elif field.type == descriptor.FieldDescriptor.TYPE_BOOL:

	428 value = tokenizer.ConsumeBool()

	429 elif field.type == descriptor.FieldDescriptor.TYPE_STRING:

	430 value = tokenizer.ConsumeString()

	431 elif field.type == descriptor.FieldDescriptor.TYPE_BYTES:

	432 value = tokenizer.ConsumeByteString()

	433 elif field.type == descriptor.FieldDescriptor.TYPE_ENUM:

	434 value = tokenizer.ConsumeEnum(field)

	435 else:

	436 raise RuntimeError('Unknown field type %d' % field.type)

	437

	438 if field.label == descriptor.FieldDescriptor.LABEL_REPEATED:

	439 if field.is_extension:

	440 message.Extensions[field].append(value)

	441 else:

	442 getattr(message, field.name).append(value)

	443 else:

	444 if field.is_extension:

	445 if not allow_multiple_scalars and message.HasExtension(field):

	446 raise tokenizer.ParseErrorPreviousToken(

	447 'Message type "%s" should not have multiple "%s" extensions.' %

	448 (message.DESCRIPTOR.full_name, field.full_name))

	449 else:

	450 message.Extensions[field] = value

	451 else:

	452 if not allow_multiple_scalars and message.HasField(field.name):

	453 raise tokenizer.ParseErrorPreviousToken(

	454 'Message type "%s" should not have multiple "%s" fields.' %

	455 (message.DESCRIPTOR.full_name, field.name))

	456 else:

	457 setattr(message, field.name, value)

	458

	459

	460 class _Tokenizer(object):

	461 """Protocol buffer ASCII representation tokenizer.

	462

	463 This class handles the lower level string parsing by splitting it into

	464 meaningful tokens.

	465

	466 It was directly ported from the Java protocol buffer API.

	467 """

	468

	469 _WHITESPACE = re.compile('(\\s\|(#.*$))+', re.MULTILINE)

	470 _TOKEN = re.compile(

	471 '[a-zA-Z_][0-9a-zA-Z_+-]*\|' # an identifier

	472 '[0-9+-][0-9a-zA-Z_.+-]*\|' # a number

	473 '\"([^\"\n\\\\]\|\\\\.)*(\"\|\\\\?$)\|' # a double-quoted string

	474 '\'([^\'\n\\\\]\|\\\\.)*(\'\|\\\\?$)') # a single-quoted string

	475 _IDENTIFIER = re.compile(r'\w+')

	476

	477 def __init__(self, lines):

	478 self._position = 0

	479 self._line = -1

	480 self._column = 0

	481 self._token_start = None

	482 self.token = ''

	483 self._lines = iter(lines)

	484 self._current_line = ''

	485 self._previous_line = 0

	486 self._previous_column = 0

	487 self._more_lines = True

	488 self._SkipWhitespace()

	489 self.NextToken()

	490

	491 def AtEnd(self):

	492 """Checks the end of the text was reached.

	493

	494 Returns:

	495 True iff the end was reached.

	496 """

	497 return not self.token

	498

	499 def _PopLine(self):

	500 while len(self._current_line) <= self._column:

	501 try:

	502 self._current_line = self._lines.next()

	503 except StopIteration:

	504 self._current_line = ''

	505 self._more_lines = False

	506 return

	507 else:

	508 self._line += 1

	509 self._column = 0

	510

	511 def _SkipWhitespace(self):

	512 while True:

	513 self._PopLine()

	514 match = self._WHITESPACE.match(self._current_line, self._column)

	515 if not match:

	516 break

	517 length = len(match.group(0))

	518 self._column += length

	519

	520 def TryConsume(self, token):

	521 """Tries to consume a given piece of text.

	522

	523 Args:

	524 token: Text to consume.

	525

	526 Returns:

	527 True iff the text was consumed.

	528 """

	529 if self.token == token:

	530 self.NextToken()

	531 return True

	532 return False

	533

	534 def Consume(self, token):

	535 """Consumes a piece of text.

	536

	537 Args:

	538 token: Text to consume.

	539

	540 Raises:

	541 ParseError: If the text couldn't be consumed.

	542 """

	543 if not self.TryConsume(token):

	544 raise self._ParseError('Expected "%s".' % token)

	545

	546 def ConsumeIdentifier(self):

	547 """Consumes protocol message field identifier.

	548

	549 Returns:

	550 Identifier string.

	551

	552 Raises:

	553 ParseError: If an identifier couldn't be consumed.

	554 """

	555 result = self.token

	556 if not self._IDENTIFIER.match(result):

	557 raise self._ParseError('Expected identifier.')

	558 self.NextToken()

	559 return result

	560

	561 def ConsumeInt32(self):

	562 """Consumes a signed 32bit integer number.

	563

	564 Returns:

	565 The integer parsed.

	566

	567 Raises:

	568 ParseError: If a signed 32bit integer couldn't be consumed.

	569 """

	570 try:

	571 result = ParseInteger(self.token, is_signed=True, is_long=False)

	572 except ValueError, e:

	573 raise self._ParseError(str(e))

	574 self.NextToken()

	575 return result

	576

	577 def ConsumeUint32(self):

	578 """Consumes an unsigned 32bit integer number.

	579

	580 Returns:

	581 The integer parsed.

	582

	583 Raises:

	584 ParseError: If an unsigned 32bit integer couldn't be consumed.

	585 """

	586 try:

	587 result = ParseInteger(self.token, is_signed=False, is_long=False)

	588 except ValueError, e:

	589 raise self._ParseError(str(e))

	590 self.NextToken()

	591 return result

	592

	593 def ConsumeInt64(self):

	594 """Consumes a signed 64bit integer number.

	595

	596 Returns:

	597 The integer parsed.

	598

	599 Raises:

	600 ParseError: If a signed 64bit integer couldn't be consumed.

	601 """

	602 try:

	603 result = ParseInteger(self.token, is_signed=True, is_long=True)

	604 except ValueError, e:

	605 raise self._ParseError(str(e))

	606 self.NextToken()

	607 return result

	608

	609 def ConsumeUint64(self):

	610 """Consumes an unsigned 64bit integer number.

	611

	612 Returns:

	613 The integer parsed.

	614

	615 Raises:

	616 ParseError: If an unsigned 64bit integer couldn't be consumed.

	617 """

	618 try:

	619 result = ParseInteger(self.token, is_signed=False, is_long=True)

	620 except ValueError, e:

	621 raise self._ParseError(str(e))

	622 self.NextToken()

	623 return result

	624

	625 def ConsumeFloat(self):

	626 """Consumes an floating point number.

	627

	628 Returns:

	629 The number parsed.

	630

	631 Raises:

	632 ParseError: If a floating point number couldn't be consumed.

	633 """

	634 try:

	635 result = ParseFloat(self.token)

	636 except ValueError, e:

	637 raise self._ParseError(str(e))

	638 self.NextToken()

	639 return result

	640

	641 def ConsumeBool(self):

	642 """Consumes a boolean value.

	643

	644 Returns:

	645 The bool parsed.

	646

	647 Raises:

	648 ParseError: If a boolean value couldn't be consumed.

	649 """

	650 try:

	651 result = ParseBool(self.token)

	652 except ValueError, e:

	653 raise self._ParseError(str(e))

	654 self.NextToken()

	655 return result

	656

	657 def ConsumeString(self):

	658 """Consumes a string value.

	659

	660 Returns:

	661 The string parsed.

	662

	663 Raises:

	664 ParseError: If a string value couldn't be consumed.

	665 """

	666 the_bytes = self.ConsumeByteString()

	667 try:

	668 return unicode(the_bytes, 'utf-8')

	669 except UnicodeDecodeError, e:

	670 raise self._StringParseError(e)

	671

	672 def ConsumeByteString(self):

	673 """Consumes a byte array value.

	674

	675 Returns:

	676 The array parsed (as a string).

	677

	678 Raises:

	679 ParseError: If a byte array value couldn't be consumed.

	680 """

	681 the_list = [self._ConsumeSingleByteString()]

	682 while self.token and self.token[0] in ('\'', '"'):

	683 the_list.append(self._ConsumeSingleByteString())

	684 return ''.encode('latin1').join(the_list) ##PY25

	685 ##!PY25 return b''.join(the_list)

	686

	687 def _ConsumeSingleByteString(self):

	688 """Consume one token of a string literal.

	689

	690 String literals (whether bytes or text) can come in multiple adjacent

	691 tokens which are automatically concatenated, like in C or Python. This

	692 method only consumes one token.

	693 """

	694 text = self.token

	695 if len(text) < 1 or text[0] not in ('\'', '"'):

	696 raise self._ParseError('Expected string.')

	697

	698 if len(text) < 2 or text[-1] != text[0]:

	699 raise self._ParseError('String missing ending quote.')

	700

	701 try:

	702 result = text_encoding.CUnescape(text[1:-1])

	703 except ValueError, e:

	704 raise self._ParseError(str(e))

	705 self.NextToken()

	706 return result

	707

	708 def ConsumeEnum(self, field):

	709 try:

	710 result = ParseEnum(field, self.token)

	711 except ValueError, e:

	712 raise self._ParseError(str(e))

	713 self.NextToken()

	714 return result

	715

	716 def ParseErrorPreviousToken(self, message):

	717 """Creates and returns a ParseError for the previously read token.

	718

	719 Args:

	720 message: A message to set for the exception.

	721

	722 Returns:

	723 A ParseError instance.

	724 """

	725 return ParseError('%d:%d : %s' % (

	726 self._previous_line + 1, self._previous_column + 1, message))

	727

	728 def _ParseError(self, message):

	729 """Creates and returns a ParseError for the current token."""

	730 return ParseError('%d:%d : %s' % (

	731 self._line + 1, self._column + 1, message))

	732

	733 def _StringParseError(self, e):

	734 return self._ParseError('Couldn\'t parse string: ' + str(e))

	735

	736 def NextToken(self):

	737 """Reads the next meaningful token."""

	738 self._previous_line = self._line

	739 self._previous_column = self._column

	740

	741 self._column += len(self.token)

	742 self._SkipWhitespace()

	743

	744 if not self._more_lines:

	745 self.token = ''

	746 return

	747

	748 match = self._TOKEN.match(self._current_line, self._column)

	749 if match:

	750 token = match.group(0)

	751 self.token = token

	752 else:

	753 self.token = self._current_line[self._column]

	754

	755

	756 def ParseInteger(text, is_signed=False, is_long=False):

	757 """Parses an integer.

	758

	759 Args:

	760 text: The text to parse.

	761 is_signed: True if a signed integer must be parsed.

	762 is_long: True if a long integer must be parsed.

	763

	764 Returns:

	765 The integer value.

	766

	767 Raises:

	768 ValueError: Thrown Iff the text is not a valid integer.

	769 """

	770 # Do the actual parsing. Exception handling is propagated to caller.

	771 try:

	772 # We force 32-bit values to int and 64-bit values to long to make

	773 # alternate implementations where the distinction is more significant

	774 # (e.g. the C++ implementation) simpler.

	775 if is_long:

	776 result = long(text, 0)

	777 else:

	778 result = int(text, 0)

	779 except ValueError:

	780 raise ValueError('Couldn\'t parse integer: %s' % text)

	781

	782 # Check if the integer is sane. Exceptions handled by callers.

	783 checker = _INTEGER_CHECKERS[2 * int(is_long) + int(is_signed)]

	784 checker.CheckValue(result)

	785 return result

	786

	787

	788 def ParseFloat(text):

	789 """Parse a floating point number.

	790

	791 Args:

	792 text: Text to parse.

	793

	794 Returns:

	795 The number parsed.

	796

	797 Raises:

	798 ValueError: If a floating point number couldn't be parsed.

	799 """

	800 try:

	801 # Assume Python compatible syntax.

	802 return float(text)

	803 except ValueError:

	804 # Check alternative spellings.

	805 if _FLOAT_INFINITY.match(text):

	806 if text[0] == '-':

	807 return float('-inf')

	808 else:

	809 return float('inf')

	810 elif _FLOAT_NAN.match(text):

	811 return float('nan')

	812 else:

	813 # assume '1.0f' format

	814 try:

	815 return float(text.rstrip('f'))

	816 except ValueError:

	817 raise ValueError('Couldn\'t parse float: %s' % text)

	818

	819

	820 def ParseBool(text):

	821 """Parse a boolean value.

	822

	823 Args:

	824 text: Text to parse.

	825

	826 Returns:

	827 Boolean values parsed

	828

	829 Raises:

	830 ValueError: If text is not a valid boolean.

	831 """

	832 if text in ('true', 't', '1'):

	833 return True

	834 elif text in ('false', 'f', '0'):

	835 return False

	836 else:

	837 raise ValueError('Expected "true" or "false".')

	838

	839

	840 def ParseEnum(field, value):

	841 """Parse an enum value.

	842

	843 The value can be specified by a number (the enum value), or by

	844 a string literal (the enum name).

	845

	846 Args:

	847 field: Enum field descriptor.

	848 value: String value.

	849

	850 Returns:

	851 Enum value number.

	852

	853 Raises:

	854 ValueError: If the enum value could not be parsed.

	855 """

	856 enum_descriptor = field.enum_type

	857 try:

	858 number = int(value, 0)

	859 except ValueError:

	860 # Identifier.

	861 enum_value = enum_descriptor.values_by_name.get(value, None)

	862 if enum_value is None:

	863 raise ValueError(

	864 'Enum type "%s" has no value named %s.' % (

	865 enum_descriptor.full_name, value))

	866 else:

	867 # Numeric value.

	868 enum_value = enum_descriptor.values_by_number.get(number, None)

	869 if enum_value is None:

	870 raise ValueError(

	871 'Enum type "%s" has no value with number %d.' % (

	872 enum_descriptor.full_name, number))

	873 return enum_value.number

OLD	NEW

« no previous file with comments | « third_party/google/protobuf/text_encoding.py ('k') | no next file » | no next file with comments »