third_party/protobuf/python/google/protobuf/text_format.py - Issue 2599263002: third_party/protobuf: Update to HEAD (f52e188fe4)

Side by Side Diff: third_party/protobuf/python/google/protobuf/text_format.py

Issue 2599263002: third_party/protobuf: Update to HEAD (f52e188fe4) (Closed)

Patch Set: Address comments Created 3 years, 12 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
1 # Protocol Buffers - Google's data interchange format	1 # Protocol Buffers - Google's data interchange format

2 # Copyright 2008 Google Inc. All rights reserved.	2 # Copyright 2008 Google Inc. All rights reserved.

3 # https://developers.google.com/protocol-buffers/	3 # https://developers.google.com/protocol-buffers/

4 #	4 #

5 # Redistribution and use in source and binary forms, with or without	5 # Redistribution and use in source and binary forms, with or without

6 # modification, are permitted provided that the following conditions are	6 # modification, are permitted provided that the following conditions are

7 # met:	7 # met:

8 #	8 #

9 # * Redistributions of source code must retain the above copyright	9 # * Redistributions of source code must retain the above copyright

10 # notice, this list of conditions and the following disclaimer.	10 # notice, this list of conditions and the following disclaimer.

(...skipping 30 matching lines...) Expand all Loading...
41 """	41 """

42	42

43 __author__ = 'kenton@google.com (Kenton Varda)'	43 __author__ = 'kenton@google.com (Kenton Varda)'

44	44

45 import io	45 import io

46 import re	46 import re

47	47

48 import six	48 import six

49	49

50 if six.PY3:	50 if six.PY3:

51 long = int	51 long = int # pylint: disable=redefined-builtin,invalid-name

52	52

	53 # pylint: disable=g-import-not-at-top

53 from google.protobuf.internal import type_checkers	54 from google.protobuf.internal import type_checkers

54 from google.protobuf import descriptor	55 from google.protobuf import descriptor

55 from google.protobuf import text_encoding	56 from google.protobuf import text_encoding

56	57

57 __all__ = ['MessageToString', 'PrintMessage', 'PrintField',	58 __all__ = ['MessageToString', 'PrintMessage', 'PrintField', 'PrintFieldValue',

58 'PrintFieldValue', 'Merge']	59 'Merge']

59

60	60

61 _INTEGER_CHECKERS = (type_checkers.Uint32ValueChecker(),	61 _INTEGER_CHECKERS = (type_checkers.Uint32ValueChecker(),

62 type_checkers.Int32ValueChecker(),	62 type_checkers.Int32ValueChecker(),

63 type_checkers.Uint64ValueChecker(),	63 type_checkers.Uint64ValueChecker(),

64 type_checkers.Int64ValueChecker())	64 type_checkers.Int64ValueChecker())

65 _FLOAT_INFINITY = re.compile('-?inf(?:inity)?f?', re.IGNORECASE)	65 _FLOAT_INFINITY = re.compile('-?inf(?:inity)?f?', re.IGNORECASE)

66 _FLOAT_NAN = re.compile('nanf?', re.IGNORECASE)	66 _FLOAT_NAN = re.compile('nanf?', re.IGNORECASE)

67 _FLOAT_TYPES = frozenset([descriptor.FieldDescriptor.CPPTYPE_FLOAT,	67 _FLOAT_TYPES = frozenset([descriptor.FieldDescriptor.CPPTYPE_FLOAT,

68 descriptor.FieldDescriptor.CPPTYPE_DOUBLE])	68 descriptor.FieldDescriptor.CPPTYPE_DOUBLE])

69 _QUOTES = frozenset(("'", '"'))	69 _QUOTES = frozenset(("'", '"'))

	70 _ANY_FULL_TYPE_NAME = 'google.protobuf.Any'

70	71

71	72

72 class Error(Exception):	73 class Error(Exception):

73 """Top-level module error for text_format."""	74 """Top-level module error for text_format."""

74	75

75	76

76 class ParseError(Error):	77 class ParseError(Error):

77 """Thrown in case of text parsing error."""	78 """Thrown in case of text parsing or tokenizing error."""

	79

	80 def __init__(self, message=None, line=None, column=None):

	81 if message is not None and line is not None:

	82 loc = str(line)

	83 if column is not None:

	84 loc += ':{0}'.format(column)

	85 message = '{0} : {1}'.format(loc, message)

	86 if message is not None:

	87 super(ParseError, self).__init__(message)

	88 else:

	89 super(ParseError, self).__init__()

	90 self._line = line

	91 self._column = column

	92

	93 def GetLine(self):

	94 return self._line

	95

	96 def GetColumn(self):

	97 return self._column

78	98

79	99

80 class TextWriter(object):	100 class TextWriter(object):

	101

81 def __init__(self, as_utf8):	102 def __init__(self, as_utf8):

82 if six.PY2:	103 if six.PY2:

83 self._writer = io.BytesIO()	104 self._writer = io.BytesIO()

84 else:	105 else:

85 self._writer = io.StringIO()	106 self._writer = io.StringIO()

86	107

87 def write(self, val):	108 def write(self, val):

88 if six.PY2:	109 if six.PY2:

89 if isinstance(val, six.text_type):	110 if isinstance(val, six.text_type):

90 val = val.encode('utf-8')	111 val = val.encode('utf-8')

91 return self._writer.write(val)	112 return self._writer.write(val)

92	113

93 def close(self):	114 def close(self):

94 return self._writer.close()	115 return self._writer.close()

95	116

96 def getvalue(self):	117 def getvalue(self):

97 return self._writer.getvalue()	118 return self._writer.getvalue()

98	119

99	120

100 def MessageToString(message, as_utf8=False, as_one_line=False,	121 def MessageToString(message,

101 pointy_brackets=False, use_index_order=False,	122 as_utf8=False,

102 float_format=None, use_field_number=False):	123 as_one_line=False,

	124 pointy_brackets=False,

	125 use_index_order=False,

	126 float_format=None,

	127 use_field_number=False,

	128 descriptor_pool=None,

	129 indent=0):

103 """Convert protobuf message to text format.	130 """Convert protobuf message to text format.

104	131

105 Floating point values can be formatted compactly with 15 digits of	132 Floating point values can be formatted compactly with 15 digits of

106 precision (which is the most that IEEE 754 "double" can guarantee)	133 precision (which is the most that IEEE 754 "double" can guarantee)

107 using float_format='.15g'. To ensure that converting to text and back to a	134 using float_format='.15g'. To ensure that converting to text and back to a

108 proto will result in an identical value, float_format='.17g' should be used.	135 proto will result in an identical value, float_format='.17g' should be used.

109	136

110 Args:	137 Args:

111 message: The protocol buffers message.	138 message: The protocol buffers message.

112 as_utf8: Produce text output in UTF8 format.	139 as_utf8: Produce text output in UTF8 format.

113 as_one_line: Don't introduce newlines between fields.	140 as_one_line: Don't introduce newlines between fields.

114 pointy_brackets: If True, use angle brackets instead of curly braces for	141 pointy_brackets: If True, use angle brackets instead of curly braces for

115 nesting.	142 nesting.

116 use_index_order: If True, print fields of a proto message using the order	143 use_index_order: If True, print fields of a proto message using the order

117 defined in source code instead of the field number. By default, use the	144 defined in source code instead of the field number. By default, use the

118 field number order.	145 field number order.

119 float_format: If set, use this to specify floating point number formatting	146 float_format: If set, use this to specify floating point number formatting

120 (per the "Format Specification Mini-Language"); otherwise, str() is used.	147 (per the "Format Specification Mini-Language"); otherwise, str() is used.

121 use_field_number: If True, print field numbers instead of names.	148 use_field_number: If True, print field numbers instead of names.

	149 descriptor_pool: A DescriptorPool used to resolve Any types.

	150 indent: The indent level, in terms of spaces, for pretty print.

122	151

123 Returns:	152 Returns:

124 A string of the text formatted protocol buffer message.	153 A string of the text formatted protocol buffer message.

125 """	154 """

126 out = TextWriter(as_utf8)	155 out = TextWriter(as_utf8)

127 printer = _Printer(out, 0, as_utf8, as_one_line,	156 printer = _Printer(out, indent, as_utf8, as_one_line, pointy_brackets,

128 pointy_brackets, use_index_order, float_format,	157 use_index_order, float_format, use_field_number,

129 use_field_number)	158 descriptor_pool)

130 printer.PrintMessage(message)	159 printer.PrintMessage(message)

131 result = out.getvalue()	160 result = out.getvalue()

132 out.close()	161 out.close()

133 if as_one_line:	162 if as_one_line:

134 return result.rstrip()	163 return result.rstrip()

135 return result	164 return result

136	165

137	166

138 def _IsMapEntry(field):	167 def _IsMapEntry(field):

139 return (field.type == descriptor.FieldDescriptor.TYPE_MESSAGE and	168 return (field.type == descriptor.FieldDescriptor.TYPE_MESSAGE and

140 field.message_type.has_options and	169 field.message_type.has_options and

141 field.message_type.GetOptions().map_entry)	170 field.message_type.GetOptions().map_entry)

142	171

143	172

144 def PrintMessage(message, out, indent=0, as_utf8=False, as_one_line=False,	173 def PrintMessage(message,

145 pointy_brackets=False, use_index_order=False,	174 out,

146 float_format=None, use_field_number=False):	175 indent=0,

147 printer = _Printer(out, indent, as_utf8, as_one_line,	176 as_utf8=False,

148 pointy_brackets, use_index_order, float_format,	177 as_one_line=False,

149 use_field_number)	178 pointy_brackets=False,

	179 use_index_order=False,

	180 float_format=None,

	181 use_field_number=False,

	182 descriptor_pool=None):

	183 printer = _Printer(out, indent, as_utf8, as_one_line, pointy_brackets,

	184 use_index_order, float_format, use_field_number,

	185 descriptor_pool)

150 printer.PrintMessage(message)	186 printer.PrintMessage(message)

151	187

152	188

153 def PrintField(field, value, out, indent=0, as_utf8=False, as_one_line=False,	189 def PrintField(field,

154 pointy_brackets=False, use_index_order=False, float_format=None):	190 value,

	191 out,

	192 indent=0,

	193 as_utf8=False,

	194 as_one_line=False,

	195 pointy_brackets=False,

	196 use_index_order=False,

	197 float_format=None):

155 """Print a single field name/value pair."""	198 """Print a single field name/value pair."""

156 printer = _Printer(out, indent, as_utf8, as_one_line,	199 printer = _Printer(out, indent, as_utf8, as_one_line, pointy_brackets,

157 pointy_brackets, use_index_order, float_format)	200 use_index_order, float_format)

158 printer.PrintField(field, value)	201 printer.PrintField(field, value)

159	202

160	203

161 def PrintFieldValue(field, value, out, indent=0, as_utf8=False,	204 def PrintFieldValue(field,

162 as_one_line=False, pointy_brackets=False,	205 value,

	206 out,

	207 indent=0,

	208 as_utf8=False,

	209 as_one_line=False,

	210 pointy_brackets=False,

163 use_index_order=False,	211 use_index_order=False,

164 float_format=None):	212 float_format=None):

165 """Print a single field value (not including name)."""	213 """Print a single field value (not including name)."""

166 printer = _Printer(out, indent, as_utf8, as_one_line,	214 printer = _Printer(out, indent, as_utf8, as_one_line, pointy_brackets,

167 pointy_brackets, use_index_order, float_format)	215 use_index_order, float_format)

168 printer.PrintFieldValue(field, value)	216 printer.PrintFieldValue(field, value)

169	217

170	218

	219 def _BuildMessageFromTypeName(type_name, descriptor_pool):

	220 """Returns a protobuf message instance.

	221

	222 Args:

	223 type_name: Fully-qualified protobuf message type name string.

	224 descriptor_pool: DescriptorPool instance.

	225

	226 Returns:

	227 A Message instance of type matching type_name, or None if the a Descriptor

	228 wasn't found matching type_name.

	229 """

	230 # pylint: disable=g-import-not-at-top

	231 from google.protobuf import symbol_database

	232 database = symbol_database.Default()

	233 try:

	234 message_descriptor = descriptor_pool.FindMessageTypeByName(type_name)

	235 except KeyError:

	236 return None

	237 message_type = database.GetPrototype(message_descriptor)

	238 return message_type()

	239

	240

171 class _Printer(object):	241 class _Printer(object):

172 """Text format printer for protocol message."""	242 """Text format printer for protocol message."""

173	243

174 def __init__(self, out, indent=0, as_utf8=False, as_one_line=False,	244 def __init__(self,

175 pointy_brackets=False, use_index_order=False, float_format=None,	245 out,

176 use_field_number=False):	246 indent=0,

	247 as_utf8=False,

	248 as_one_line=False,

	249 pointy_brackets=False,

	250 use_index_order=False,

	251 float_format=None,

	252 use_field_number=False,

	253 descriptor_pool=None):

177 """Initialize the Printer.	254 """Initialize the Printer.

178	255

179 Floating point values can be formatted compactly with 15 digits of	256 Floating point values can be formatted compactly with 15 digits of

180 precision (which is the most that IEEE 754 "double" can guarantee)	257 precision (which is the most that IEEE 754 "double" can guarantee)

181 using float_format='.15g'. To ensure that converting to text and back to a	258 using float_format='.15g'. To ensure that converting to text and back to a

182 proto will result in an identical value, float_format='.17g' should be used.	259 proto will result in an identical value, float_format='.17g' should be used.

183	260

184 Args:	261 Args:

185 out: To record the text format result.	262 out: To record the text format result.

186 indent: The indent level for pretty print.	263 indent: The indent level for pretty print.

187 as_utf8: Produce text output in UTF8 format.	264 as_utf8: Produce text output in UTF8 format.

188 as_one_line: Don't introduce newlines between fields.	265 as_one_line: Don't introduce newlines between fields.

189 pointy_brackets: If True, use angle brackets instead of curly braces for	266 pointy_brackets: If True, use angle brackets instead of curly braces for

190 nesting.	267 nesting.

191 use_index_order: If True, print fields of a proto message using the order	268 use_index_order: If True, print fields of a proto message using the order

192 defined in source code instead of the field number. By default, use the	269 defined in source code instead of the field number. By default, use the

193 field number order.	270 field number order.

194 float_format: If set, use this to specify floating point number formatting	271 float_format: If set, use this to specify floating point number formatting

195 (per the "Format Specification Mini-Language"); otherwise, str() is	272 (per the "Format Specification Mini-Language"); otherwise, str() is

196 used.	273 used.

197 use_field_number: If True, print field numbers instead of names.	274 use_field_number: If True, print field numbers instead of names.

	275 descriptor_pool: A DescriptorPool used to resolve Any types.

198 """	276 """

199 self.out = out	277 self.out = out

200 self.indent = indent	278 self.indent = indent

201 self.as_utf8 = as_utf8	279 self.as_utf8 = as_utf8

202 self.as_one_line = as_one_line	280 self.as_one_line = as_one_line

203 self.pointy_brackets = pointy_brackets	281 self.pointy_brackets = pointy_brackets

204 self.use_index_order = use_index_order	282 self.use_index_order = use_index_order

205 self.float_format = float_format	283 self.float_format = float_format

206 self.use_field_number = use_field_number	284 self.use_field_number = use_field_number

	285 self.descriptor_pool = descriptor_pool

	286

	287 def _TryPrintAsAnyMessage(self, message):

	288 """Serializes if message is a google.protobuf.Any field."""

	289 packed_message = _BuildMessageFromTypeName(message.TypeName(),

	290 self.descriptor_pool)

	291 if packed_message:

	292 packed_message.MergeFromString(message.value)

	293 self.out.write('%s[%s]' % (self.indent * ' ', message.type_url))

	294 self._PrintMessageFieldValue(packed_message)

	295 self.out.write(' ' if self.as_one_line else '\n')

	296 return True

	297 else:

	298 return False

207	299

208 def PrintMessage(self, message):	300 def PrintMessage(self, message):

209 """Convert protobuf message to text format.	301 """Convert protobuf message to text format.

210	302

211 Args:	303 Args:

212 message: The protocol buffers message.	304 message: The protocol buffers message.

213 """	305 """

	306 if (message.DESCRIPTOR.full_name == _ANY_FULL_TYPE_NAME and

	307 self.descriptor_pool and self._TryPrintAsAnyMessage(message)):

	308 return

214 fields = message.ListFields()	309 fields = message.ListFields()

215 if self.use_index_order:	310 if self.use_index_order:

216 fields.sort(key=lambda x: x[0].index)	311 fields.sort(key=lambda x: x[0].index)

217 for field, value in fields:	312 for field, value in fields:

218 if _IsMapEntry(field):	313 if _IsMapEntry(field):

219 for key in sorted(value):	314 for key in sorted(value):

220 # This is slow for maps with submessage entires because it copies the	315 # This is slow for maps with submessage entires because it copies the

221 # entire tree. Unfortunately this would take significant refactoring	316 # entire tree. Unfortunately this would take significant refactoring

222 # of this file to work around.	317 # of this file to work around.

223 #	318 #

224 # TODO(haberman): refactor and optimize if this becomes an issue.	319 # TODO(haberman): refactor and optimize if this becomes an issue.

225 entry_submsg = field.message_type._concrete_class(	320 entry_submsg = value.GetEntryClass()(key=key, value=value[key])

226 key=key, value=value[key])

227 self.PrintField(field, entry_submsg)	321 self.PrintField(field, entry_submsg)

228 elif field.label == descriptor.FieldDescriptor.LABEL_REPEATED:	322 elif field.label == descriptor.FieldDescriptor.LABEL_REPEATED:

229 for element in value:	323 for element in value:

230 self.PrintField(field, element)	324 self.PrintField(field, element)

231 else:	325 else:

232 self.PrintField(field, value)	326 self.PrintField(field, value)

233	327

234 def PrintField(self, field, value):	328 def PrintField(self, field, value):

235 """Print a single field name/value pair."""	329 """Print a single field name/value pair."""

236 out = self.out	330 out = self.out

(...skipping 20 matching lines...) Expand all Loading...
257 # The colon is optional in this case, but our cross-language golden files	351 # The colon is optional in this case, but our cross-language golden files

258 # don't include it.	352 # don't include it.

259 out.write(': ')	353 out.write(': ')

260	354

261 self.PrintFieldValue(field, value)	355 self.PrintFieldValue(field, value)

262 if self.as_one_line:	356 if self.as_one_line:

263 out.write(' ')	357 out.write(' ')

264 else:	358 else:

265 out.write('\n')	359 out.write('\n')

266	360

	361 def _PrintMessageFieldValue(self, value):

	362 if self.pointy_brackets:

	363 openb = '<'

	364 closeb = '>'

	365 else:

	366 openb = '{'

	367 closeb = '}'

	368

	369 if self.as_one_line:

	370 self.out.write(' %s ' % openb)

	371 self.PrintMessage(value)

	372 self.out.write(closeb)

	373 else:

	374 self.out.write(' %s\n' % openb)

	375 self.indent += 2

	376 self.PrintMessage(value)

	377 self.indent -= 2

	378 self.out.write(' ' * self.indent + closeb)

	379

267 def PrintFieldValue(self, field, value):	380 def PrintFieldValue(self, field, value):

268 """Print a single field value (not including name).	381 """Print a single field value (not including name).

269	382

270 For repeated fields, the value should be a single element.	383 For repeated fields, the value should be a single element.

271	384

272 Args:	385 Args:

273 field: The descriptor of the field to be printed.	386 field: The descriptor of the field to be printed.

274 value: The value of the field.	387 value: The value of the field.

275 """	388 """

276 out = self.out	389 out = self.out

277 if self.pointy_brackets:

278 openb = '<'

279 closeb = '>'

280 else:

281 openb = '{'

282 closeb = '}'

283

284 if field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_MESSAGE:	390 if field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_MESSAGE:

285 if self.as_one_line:	391 self._PrintMessageFieldValue(value)

286 out.write(' %s ' % openb)

287 self.PrintMessage(value)

288 out.write(closeb)

289 else:

290 out.write(' %s\n' % openb)

291 self.indent += 2

292 self.PrintMessage(value)

293 self.indent -= 2

294 out.write(' ' * self.indent + closeb)

295 elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_ENUM:	392 elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_ENUM:

296 enum_value = field.enum_type.values_by_number.get(value, None)	393 enum_value = field.enum_type.values_by_number.get(value, None)

297 if enum_value is not None:	394 if enum_value is not None:

298 out.write(enum_value.name)	395 out.write(enum_value.name)

299 else:	396 else:

300 out.write(str(value))	397 out.write(str(value))

301 elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_STRING:	398 elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_STRING:

302 out.write('\"')	399 out.write('\"')

303 if isinstance(value, six.text_type):	400 if isinstance(value, six.text_type):

304 out_value = value.encode('utf-8')	401 out_value = value.encode('utf-8')

(...skipping 10 matching lines...) Expand all Loading...
315 if value:	412 if value:

316 out.write('true')	413 out.write('true')

317 else:	414 else:

318 out.write('false')	415 out.write('false')

319 elif field.cpp_type in _FLOAT_TYPES and self.float_format is not None:	416 elif field.cpp_type in _FLOAT_TYPES and self.float_format is not None:

320 out.write('{1:{0}}'.format(self.float_format, value))	417 out.write('{1:{0}}'.format(self.float_format, value))

321 else:	418 else:

322 out.write(str(value))	419 out.write(str(value))

323	420

324	421

325 def Parse(text, message,	422 def Parse(text,

326 allow_unknown_extension=False, allow_field_number=False):	423 message,

327 """Parses an text representation of a protocol message into a message.	424 allow_unknown_extension=False,

	425 allow_field_number=False):

	426 """Parses a text representation of a protocol message into a message.

328	427

329 Args:	428 Args:

330 text: Message text representation.	429 text: Message text representation.

331 message: A protocol buffer message to merge into.	430 message: A protocol buffer message to merge into.

332 allow_unknown_extension: if True, skip over missing extensions and keep	431 allow_unknown_extension: if True, skip over missing extensions and keep

333 parsing	432 parsing

334 allow_field_number: if True, both field number and field name are allowed.	433 allow_field_number: if True, both field number and field name are allowed.

335	434

336 Returns:	435 Returns:

337 The same message passed as argument.	436 The same message passed as argument.

338	437

339 Raises:	438 Raises:

340 ParseError: On text parsing problems.	439 ParseError: On text parsing problems.

341 """	440 """

342 if not isinstance(text, str):	441 if not isinstance(text, str):

343 text = text.decode('utf-8')	442 text = text.decode('utf-8')

344 return ParseLines(text.split('\n'), message, allow_unknown_extension,	443 return ParseLines(

345 allow_field_number)	444 text.split('\n'), message, allow_unknown_extension, allow_field_number)

346	445

347	446

348 def Merge(text, message, allow_unknown_extension=False,	447 def Merge(text,

349 allow_field_number=False):	448 message,

350 """Parses an text representation of a protocol message into a message.	449 allow_unknown_extension=False,

	450 allow_field_number=False,

	451 descriptor_pool=None):

	452 """Parses a text representation of a protocol message into a message.

351	453

352 Like Parse(), but allows repeated values for a non-repeated field, and uses	454 Like Parse(), but allows repeated values for a non-repeated field, and uses

353 the last one.	455 the last one.

354	456

355 Args:	457 Args:

356 text: Message text representation.	458 text: Message text representation.

357 message: A protocol buffer message to merge into.	459 message: A protocol buffer message to merge into.

358 allow_unknown_extension: if True, skip over missing extensions and keep	460 allow_unknown_extension: if True, skip over missing extensions and keep

359 parsing	461 parsing

360 allow_field_number: if True, both field number and field name are allowed.	462 allow_field_number: if True, both field number and field name are allowed.

	463 descriptor_pool: A DescriptorPool used to resolve Any types.

361	464

362 Returns:	465 Returns:

363 The same message passed as argument.	466 The same message passed as argument.

364	467

365 Raises:	468 Raises:

366 ParseError: On text parsing problems.	469 ParseError: On text parsing problems.

367 """	470 """

368 return MergeLines(text.split('\n'), message, allow_unknown_extension,	471 return MergeLines(

369 allow_field_number)	472 text.split('\n'),

	473 message,

	474 allow_unknown_extension,

	475 allow_field_number,

	476 descriptor_pool=descriptor_pool)

370	477

371	478

372 def ParseLines(lines, message, allow_unknown_extension=False,	479 def ParseLines(lines,

	480 message,

	481 allow_unknown_extension=False,

373 allow_field_number=False):	482 allow_field_number=False):

374 """Parses an text representation of a protocol message into a message.	483 """Parses a text representation of a protocol message into a message.

375	484

376 Args:	485 Args:

377 lines: An iterable of lines of a message's text representation.	486 lines: An iterable of lines of a message's text representation.

378 message: A protocol buffer message to merge into.	487 message: A protocol buffer message to merge into.

379 allow_unknown_extension: if True, skip over missing extensions and keep	488 allow_unknown_extension: if True, skip over missing extensions and keep

380 parsing	489 parsing

381 allow_field_number: if True, both field number and field name are allowed.	490 allow_field_number: if True, both field number and field name are allowed.

	491 descriptor_pool: A DescriptorPool used to resolve Any types.

382	492

383 Returns:	493 Returns:

384 The same message passed as argument.	494 The same message passed as argument.

385	495

386 Raises:	496 Raises:

387 ParseError: On text parsing problems.	497 ParseError: On text parsing problems.

388 """	498 """

389 parser = _Parser(allow_unknown_extension, allow_field_number)	499 parser = _Parser(allow_unknown_extension, allow_field_number)

390 return parser.ParseLines(lines, message)	500 return parser.ParseLines(lines, message)

391	501

392	502

393 def MergeLines(lines, message, allow_unknown_extension=False,	503 def MergeLines(lines,

394 allow_field_number=False):	504 message,

395 """Parses an text representation of a protocol message into a message.	505 allow_unknown_extension=False,

	506 allow_field_number=False,

	507 descriptor_pool=None):

	508 """Parses a text representation of a protocol message into a message.

396	509

397 Args:	510 Args:

398 lines: An iterable of lines of a message's text representation.	511 lines: An iterable of lines of a message's text representation.

399 message: A protocol buffer message to merge into.	512 message: A protocol buffer message to merge into.

400 allow_unknown_extension: if True, skip over missing extensions and keep	513 allow_unknown_extension: if True, skip over missing extensions and keep

401 parsing	514 parsing

402 allow_field_number: if True, both field number and field name are allowed.	515 allow_field_number: if True, both field number and field name are allowed.

403	516

404 Returns:	517 Returns:

405 The same message passed as argument.	518 The same message passed as argument.

406	519

407 Raises:	520 Raises:

408 ParseError: On text parsing problems.	521 ParseError: On text parsing problems.

409 """	522 """

410 parser = _Parser(allow_unknown_extension, allow_field_number)	523 parser = _Parser(allow_unknown_extension,

	524 allow_field_number,

	525 descriptor_pool=descriptor_pool)

411 return parser.MergeLines(lines, message)	526 return parser.MergeLines(lines, message)

412	527

413	528

414 class _Parser(object):	529 class _Parser(object):

415 """Text format parser for protocol message."""	530 """Text format parser for protocol message."""

416	531

417 def __init__(self, allow_unknown_extension=False, allow_field_number=False):	532 def __init__(self,

	533 allow_unknown_extension=False,

	534 allow_field_number=False,

	535 descriptor_pool=None):

418 self.allow_unknown_extension = allow_unknown_extension	536 self.allow_unknown_extension = allow_unknown_extension

419 self.allow_field_number = allow_field_number	537 self.allow_field_number = allow_field_number

	538 self.descriptor_pool = descriptor_pool

420	539

421 def ParseFromString(self, text, message):	540 def ParseFromString(self, text, message):

422 """Parses an text representation of a protocol message into a message."""	541 """Parses a text representation of a protocol message into a message."""

423 if not isinstance(text, str):	542 if not isinstance(text, str):

424 text = text.decode('utf-8')	543 text = text.decode('utf-8')

425 return self.ParseLines(text.split('\n'), message)	544 return self.ParseLines(text.split('\n'), message)

426	545

427 def ParseLines(self, lines, message):	546 def ParseLines(self, lines, message):

428 """Parses an text representation of a protocol message into a message."""	547 """Parses a text representation of a protocol message into a message."""

429 self._allow_multiple_scalars = False	548 self._allow_multiple_scalars = False

430 self._ParseOrMerge(lines, message)	549 self._ParseOrMerge(lines, message)

431 return message	550 return message

432	551

433 def MergeFromString(self, text, message):	552 def MergeFromString(self, text, message):

434 """Merges an text representation of a protocol message into a message."""	553 """Merges a text representation of a protocol message into a message."""

435 return self._MergeLines(text.split('\n'), message)	554 return self._MergeLines(text.split('\n'), message)

436	555

437 def MergeLines(self, lines, message):	556 def MergeLines(self, lines, message):

438 """Merges an text representation of a protocol message into a message."""	557 """Merges a text representation of a protocol message into a message."""

439 self._allow_multiple_scalars = True	558 self._allow_multiple_scalars = True

440 self._ParseOrMerge(lines, message)	559 self._ParseOrMerge(lines, message)

441 return message	560 return message

442	561

443 def _ParseOrMerge(self, lines, message):	562 def _ParseOrMerge(self, lines, message):

444 """Converts an text representation of a protocol message into a message.	563 """Converts a text representation of a protocol message into a message.

445	564

446 Args:	565 Args:

447 lines: Lines of a message's text representation.	566 lines: Lines of a message's text representation.

448 message: A protocol buffer message to merge into.	567 message: A protocol buffer message to merge into.

449	568

450 Raises:	569 Raises:

451 ParseError: On text parsing problems.	570 ParseError: On text parsing problems.

452 """	571 """

453 tokenizer = _Tokenizer(lines)	572 tokenizer = Tokenizer(lines)

454 while not tokenizer.AtEnd():	573 while not tokenizer.AtEnd():

455 self._MergeField(tokenizer, message)	574 self._MergeField(tokenizer, message)

456	575

457 def _MergeField(self, tokenizer, message):	576 def _MergeField(self, tokenizer, message):

458 """Merges a single protocol message field into a message.	577 """Merges a single protocol message field into a message.

459	578

460 Args:	579 Args:

461 tokenizer: A tokenizer to parse the field name and values.	580 tokenizer: A tokenizer to parse the field name and values.

462 message: A protocol message to record the data.	581 message: A protocol message to record the data.

463	582

(...skipping 20 matching lines...) Expand all Loading...
484 field = message.Extensions._FindExtensionByName(name)	603 field = message.Extensions._FindExtensionByName(name)

485 # pylint: enable=protected-access	604 # pylint: enable=protected-access

486 if not field:	605 if not field:

487 if self.allow_unknown_extension:	606 if self.allow_unknown_extension:

488 field = None	607 field = None

489 else:	608 else:

490 raise tokenizer.ParseErrorPreviousToken(	609 raise tokenizer.ParseErrorPreviousToken(

491 'Extension "%s" not registered.' % name)	610 'Extension "%s" not registered.' % name)

492 elif message_descriptor != field.containing_type:	611 elif message_descriptor != field.containing_type:

493 raise tokenizer.ParseErrorPreviousToken(	612 raise tokenizer.ParseErrorPreviousToken(

494 'Extension "%s" does not extend message type "%s".' % (	613 'Extension "%s" does not extend message type "%s".' %

495 name, message_descriptor.full_name))	614 (name, message_descriptor.full_name))

496	615

497 tokenizer.Consume(']')	616 tokenizer.Consume(']')

498	617

499 else:	618 else:

500 name = tokenizer.ConsumeIdentifier()	619 name = tokenizer.ConsumeIdentifierOrNumber()

501 if self.allow_field_number and name.isdigit():	620 if self.allow_field_number and name.isdigit():

502 number = ParseInteger(name, True, True)	621 number = ParseInteger(name, True, True)

503 field = message_descriptor.fields_by_number.get(number, None)	622 field = message_descriptor.fields_by_number.get(number, None)

504 if not field and message_descriptor.is_extendable:	623 if not field and message_descriptor.is_extendable:

505 field = message.Extensions._FindExtensionByNumber(number)	624 field = message.Extensions._FindExtensionByNumber(number)

506 else:	625 else:

507 field = message_descriptor.fields_by_name.get(name, None)	626 field = message_descriptor.fields_by_name.get(name, None)

508	627

509 # Group names are expected to be capitalized as they appear in the	628 # Group names are expected to be capitalized as they appear in the

510 # .proto file, which actually matches their type names, not their field	629 # .proto file, which actually matches their type names, not their field

511 # names.	630 # names.

512 if not field:	631 if not field:

513 field = message_descriptor.fields_by_name.get(name.lower(), None)	632 field = message_descriptor.fields_by_name.get(name.lower(), None)

514 if field and field.type != descriptor.FieldDescriptor.TYPE_GROUP:	633 if field and field.type != descriptor.FieldDescriptor.TYPE_GROUP:

515 field = None	634 field = None

516	635

517 if (field and field.type == descriptor.FieldDescriptor.TYPE_GROUP and	636 if (field and field.type == descriptor.FieldDescriptor.TYPE_GROUP and

518 field.message_type.name != name):	637 field.message_type.name != name):

519 field = None	638 field = None

520	639

521 if not field:	640 if not field:

522 raise tokenizer.ParseErrorPreviousToken(	641 raise tokenizer.ParseErrorPreviousToken(

523 'Message type "%s" has no field named "%s".' % (	642 'Message type "%s" has no field named "%s".' %

524 message_descriptor.full_name, name))	643 (message_descriptor.full_name, name))

525	644

526 if field:	645 if field:

527 if not self._allow_multiple_scalars and field.containing_oneof:	646 if not self._allow_multiple_scalars and field.containing_oneof:

528 # Check if there's a different field set in this oneof.	647 # Check if there's a different field set in this oneof.

529 # Note that we ignore the case if the same field was set before, and we	648 # Note that we ignore the case if the same field was set before, and we

530 # apply _allow_multiple_scalars to non-scalar fields as well.	649 # apply _allow_multiple_scalars to non-scalar fields as well.

531 which_oneof = message.WhichOneof(field.containing_oneof.name)	650 which_oneof = message.WhichOneof(field.containing_oneof.name)

532 if which_oneof is not None and which_oneof != field.name:	651 if which_oneof is not None and which_oneof != field.name:

533 raise tokenizer.ParseErrorPreviousToken(	652 raise tokenizer.ParseErrorPreviousToken(

534 'Field "%s" is specified along with field "%s", another member '	653 'Field "%s" is specified along with field "%s", another member '

535 'of oneof "%s" for message type "%s".' % (	654 'of oneof "%s" for message type "%s".' %

536 field.name, which_oneof, field.containing_oneof.name,	655 (field.name, which_oneof, field.containing_oneof.name,

537 message_descriptor.full_name))	656 message_descriptor.full_name))

538	657

539 if field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_MESSAGE:	658 if field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_MESSAGE:

540 tokenizer.TryConsume(':')	659 tokenizer.TryConsume(':')

541 merger = self._MergeMessageField	660 merger = self._MergeMessageField

542 else:	661 else:

543 tokenizer.Consume(':')	662 tokenizer.Consume(':')

544 merger = self._MergeScalarField	663 merger = self._MergeScalarField

545	664

546 if (field.label == descriptor.FieldDescriptor.LABEL_REPEATED	665 if (field.label == descriptor.FieldDescriptor.LABEL_REPEATED and

547 and tokenizer.TryConsume('[')):	666 tokenizer.TryConsume('[')):

548 # Short repeated format, e.g. "foo: [1, 2, 3]"	667 # Short repeated format, e.g. "foo: [1, 2, 3]"

549 while True:	668 while True:

550 merger(tokenizer, message, field)	669 merger(tokenizer, message, field)

551 if tokenizer.TryConsume(']'): break	670 if tokenizer.TryConsume(']'):

	671 break

552 tokenizer.Consume(',')	672 tokenizer.Consume(',')

553	673

554 else:	674 else:

555 merger(tokenizer, message, field)	675 merger(tokenizer, message, field)

556	676

557 else: # Proto field is unknown.	677 else: # Proto field is unknown.

558 assert self.allow_unknown_extension	678 assert self.allow_unknown_extension

559 _SkipFieldContents(tokenizer)	679 _SkipFieldContents(tokenizer)

560	680

561 # For historical reasons, fields may optionally be separated by commas or	681 # For historical reasons, fields may optionally be separated by commas or

562 # semicolons.	682 # semicolons.

563 if not tokenizer.TryConsume(','):	683 if not tokenizer.TryConsume(','):

564 tokenizer.TryConsume(';')	684 tokenizer.TryConsume(';')

565	685

	686 def _ConsumeAnyTypeUrl(self, tokenizer):

	687 """Consumes a google.protobuf.Any type URL and returns the type name."""

	688 # Consume "type.googleapis.com/".

	689 tokenizer.ConsumeIdentifier()

	690 tokenizer.Consume('.')

	691 tokenizer.ConsumeIdentifier()

	692 tokenizer.Consume('.')

	693 tokenizer.ConsumeIdentifier()

	694 tokenizer.Consume('/')

	695 # Consume the fully-qualified type name.

	696 name = [tokenizer.ConsumeIdentifier()]

	697 while tokenizer.TryConsume('.'):

	698 name.append(tokenizer.ConsumeIdentifier())

	699 return '.'.join(name)

	700

566 def _MergeMessageField(self, tokenizer, message, field):	701 def _MergeMessageField(self, tokenizer, message, field):

567 """Merges a single scalar field into a message.	702 """Merges a single scalar field into a message.

568	703

569 Args:	704 Args:

570 tokenizer: A tokenizer to parse the field value.	705 tokenizer: A tokenizer to parse the field value.

571 message: The message of which field is a member.	706 message: The message of which field is a member.

572 field: The descriptor of the field to be merged.	707 field: The descriptor of the field to be merged.

573	708

574 Raises:	709 Raises:

575 ParseError: In case of text parsing problems.	710 ParseError: In case of text parsing problems.

576 """	711 """

577 is_map_entry = _IsMapEntry(field)	712 is_map_entry = _IsMapEntry(field)

578	713

579 if tokenizer.TryConsume('<'):	714 if tokenizer.TryConsume('<'):

580 end_token = '>'	715 end_token = '>'

581 else:	716 else:

582 tokenizer.Consume('{')	717 tokenizer.Consume('{')

583 end_token = '}'	718 end_token = '}'

584	719

585 if field.label == descriptor.FieldDescriptor.LABEL_REPEATED:	720 if (field.message_type.full_name == _ANY_FULL_TYPE_NAME and

	721 tokenizer.TryConsume('[')):

	722 packed_type_name = self._ConsumeAnyTypeUrl(tokenizer)

	723 tokenizer.Consume(']')

	724 tokenizer.TryConsume(':')

	725 if tokenizer.TryConsume('<'):

	726 expanded_any_end_token = '>'

	727 else:

	728 tokenizer.Consume('{')

	729 expanded_any_end_token = '}'

	730 if not self.descriptor_pool:

	731 raise ParseError('Descriptor pool required to parse expanded Any field')

	732 expanded_any_sub_message = _BuildMessageFromTypeName(packed_type_name,

	733 self.descriptor_pool)

	734 if not expanded_any_sub_message:

	735 raise ParseError('Type %s not found in descriptor pool' %

	736 packed_type_name)

	737 while not tokenizer.TryConsume(expanded_any_end_token):

	738 if tokenizer.AtEnd():

	739 raise tokenizer.ParseErrorPreviousToken('Expected "%s".' %

	740 (expanded_any_end_token,))

	741 self._MergeField(tokenizer, expanded_any_sub_message)

	742 if field.label == descriptor.FieldDescriptor.LABEL_REPEATED:

	743 any_message = getattr(message, field.name).add()

	744 else:

	745 any_message = getattr(message, field.name)

	746 any_message.Pack(expanded_any_sub_message)

	747 elif field.label == descriptor.FieldDescriptor.LABEL_REPEATED:

586 if field.is_extension:	748 if field.is_extension:

587 sub_message = message.Extensions[field].add()	749 sub_message = message.Extensions[field].add()

588 elif is_map_entry:	750 elif is_map_entry:

589 # pylint: disable=protected-access	751 sub_message = getattr(message, field.name).GetEntryClass()()

590 sub_message = field.message_type._concrete_class()

591 else:	752 else:

592 sub_message = getattr(message, field.name).add()	753 sub_message = getattr(message, field.name).add()

593 else:	754 else:

594 if field.is_extension:	755 if field.is_extension:

595 sub_message = message.Extensions[field]	756 sub_message = message.Extensions[field]

596 else:	757 else:

597 sub_message = getattr(message, field.name)	758 sub_message = getattr(message, field.name)

598 sub_message.SetInParent()	759 sub_message.SetInParent()

599	760

600 while not tokenizer.TryConsume(end_token):	761 while not tokenizer.TryConsume(end_token):

(...skipping 20 matching lines...) Expand all Loading...
621 Raises:	782 Raises:

622 ParseError: In case of text parsing problems.	783 ParseError: In case of text parsing problems.

623 RuntimeError: On runtime errors.	784 RuntimeError: On runtime errors.

624 """	785 """

625 _ = self.allow_unknown_extension	786 _ = self.allow_unknown_extension

626 value = None	787 value = None

627	788

628 if field.type in (descriptor.FieldDescriptor.TYPE_INT32,	789 if field.type in (descriptor.FieldDescriptor.TYPE_INT32,

629 descriptor.FieldDescriptor.TYPE_SINT32,	790 descriptor.FieldDescriptor.TYPE_SINT32,

630 descriptor.FieldDescriptor.TYPE_SFIXED32):	791 descriptor.FieldDescriptor.TYPE_SFIXED32):

631 value = tokenizer.ConsumeInt32()	792 value = _ConsumeInt32(tokenizer)

632 elif field.type in (descriptor.FieldDescriptor.TYPE_INT64,	793 elif field.type in (descriptor.FieldDescriptor.TYPE_INT64,

633 descriptor.FieldDescriptor.TYPE_SINT64,	794 descriptor.FieldDescriptor.TYPE_SINT64,

634 descriptor.FieldDescriptor.TYPE_SFIXED64):	795 descriptor.FieldDescriptor.TYPE_SFIXED64):

635 value = tokenizer.ConsumeInt64()	796 value = _ConsumeInt64(tokenizer)

636 elif field.type in (descriptor.FieldDescriptor.TYPE_UINT32,	797 elif field.type in (descriptor.FieldDescriptor.TYPE_UINT32,

637 descriptor.FieldDescriptor.TYPE_FIXED32):	798 descriptor.FieldDescriptor.TYPE_FIXED32):

638 value = tokenizer.ConsumeUint32()	799 value = _ConsumeUint32(tokenizer)

639 elif field.type in (descriptor.FieldDescriptor.TYPE_UINT64,	800 elif field.type in (descriptor.FieldDescriptor.TYPE_UINT64,

640 descriptor.FieldDescriptor.TYPE_FIXED64):	801 descriptor.FieldDescriptor.TYPE_FIXED64):

641 value = tokenizer.ConsumeUint64()	802 value = _ConsumeUint64(tokenizer)

642 elif field.type in (descriptor.FieldDescriptor.TYPE_FLOAT,	803 elif field.type in (descriptor.FieldDescriptor.TYPE_FLOAT,

643 descriptor.FieldDescriptor.TYPE_DOUBLE):	804 descriptor.FieldDescriptor.TYPE_DOUBLE):

644 value = tokenizer.ConsumeFloat()	805 value = tokenizer.ConsumeFloat()

645 elif field.type == descriptor.FieldDescriptor.TYPE_BOOL:	806 elif field.type == descriptor.FieldDescriptor.TYPE_BOOL:

646 value = tokenizer.ConsumeBool()	807 value = tokenizer.ConsumeBool()

647 elif field.type == descriptor.FieldDescriptor.TYPE_STRING:	808 elif field.type == descriptor.FieldDescriptor.TYPE_STRING:

648 value = tokenizer.ConsumeString()	809 value = tokenizer.ConsumeString()

649 elif field.type == descriptor.FieldDescriptor.TYPE_BYTES:	810 elif field.type == descriptor.FieldDescriptor.TYPE_BYTES:

650 value = tokenizer.ConsumeByteString()	811 value = tokenizer.ConsumeByteString()

651 elif field.type == descriptor.FieldDescriptor.TYPE_ENUM:	812 elif field.type == descriptor.FieldDescriptor.TYPE_ENUM:

(...skipping 94 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
746 ParseError: In case an invalid field value is found.	907 ParseError: In case an invalid field value is found.

747 """	908 """

748 # String/bytes tokens can come in multiple adjacent string literals.	909 # String/bytes tokens can come in multiple adjacent string literals.

749 # If we can consume one, consume as many as we can.	910 # If we can consume one, consume as many as we can.

750 if tokenizer.TryConsumeByteString():	911 if tokenizer.TryConsumeByteString():

751 while tokenizer.TryConsumeByteString():	912 while tokenizer.TryConsumeByteString():

752 pass	913 pass

753 return	914 return

754	915

755 if (not tokenizer.TryConsumeIdentifier() and	916 if (not tokenizer.TryConsumeIdentifier() and

756 not tokenizer.TryConsumeInt64() and	917 not _TryConsumeInt64(tokenizer) and not _TryConsumeUint64(tokenizer) and

757 not tokenizer.TryConsumeUint64() and

758 not tokenizer.TryConsumeFloat()):	918 not tokenizer.TryConsumeFloat()):

759 raise ParseError('Invalid field value: ' + tokenizer.token)	919 raise ParseError('Invalid field value: ' + tokenizer.token)

760	920

761	921

762 class _Tokenizer(object):	922 class Tokenizer(object):

763 """Protocol buffer text representation tokenizer.	923 """Protocol buffer text representation tokenizer.

764	924

765 This class handles the lower level string parsing by splitting it into	925 This class handles the lower level string parsing by splitting it into

766 meaningful tokens.	926 meaningful tokens.

767	927

768 It was directly ported from the Java protocol buffer API.	928 It was directly ported from the Java protocol buffer API.

769 """	929 """

770	930

771 _WHITESPACE = re.compile('(\\s\|(#.*$))+', re.MULTILINE)	931 _WHITESPACE = re.compile(r'\s+')

	932 _COMMENT = re.compile(r'(\s#.$)', re.MULTILINE)

	933 _WHITESPACE_OR_COMMENT = re.compile(r'(\s\|(#.*$))+', re.MULTILINE)

772 _TOKEN = re.compile('\|'.join([	934 _TOKEN = re.compile('\|'.join([

773 r'[a-zA-Z_][0-9a-zA-Z_+-]*', # an identifier	935 r'[a-zA-Z_][0-9a-zA-Z_+-]*', # an identifier

774 r'([0-9+-]\|(\.[0-9]))[0-9a-zA-Z_.+-]*', # a number	936 r'([0-9+-]\|(\.[0-9]))[0-9a-zA-Z_.+-]*', # a number

775 ] + [ # quoted str for each quote mark	937 ] + [ # quoted str for each quote mark

776 r'{qt}([^{qt}\n\\]\|\\.)*({qt}\|\\?$)'.format(qt=mark) for mark in _QUOTES	938 r'{qt}([^{qt}\n\\]\|\\.)*({qt}\|\\?$)'.format(qt=mark) for mark in _QUOTES

777 ]))	939 ]))

778	940

779 _IDENTIFIER = re.compile(r'\w+')	941 _IDENTIFIER = re.compile(r'[^\d\W]\w*')

	942 _IDENTIFIER_OR_NUMBER = re.compile(r'\w+')

780	943

781 def __init__(self, lines):	944 def __init__(self, lines, skip_comments=True):

782 self._position = 0	945 self._position = 0

783 self._line = -1	946 self._line = -1

784 self._column = 0	947 self._column = 0

785 self._token_start = None	948 self._token_start = None

786 self.token = ''	949 self.token = ''

787 self._lines = iter(lines)	950 self._lines = iter(lines)

788 self._current_line = ''	951 self._current_line = ''

789 self._previous_line = 0	952 self._previous_line = 0

790 self._previous_column = 0	953 self._previous_column = 0

791 self._more_lines = True	954 self._more_lines = True

	955 self._skip_comments = skip_comments

	956 self._whitespace_pattern = (skip_comments and self._WHITESPACE_OR_COMMENT

	957 or self._WHITESPACE)

792 self._SkipWhitespace()	958 self._SkipWhitespace()

793 self.NextToken()	959 self.NextToken()

794	960

795 def LookingAt(self, token):	961 def LookingAt(self, token):

796 return self.token == token	962 return self.token == token

797	963

798 def AtEnd(self):	964 def AtEnd(self):

799 """Checks the end of the text was reached.	965 """Checks the end of the text was reached.

800	966

801 Returns:	967 Returns:

802 True iff the end was reached.	968 True iff the end was reached.

803 """	969 """

804 return not self.token	970 return not self.token

805	971

806 def _PopLine(self):	972 def _PopLine(self):

807 while len(self._current_line) <= self._column:	973 while len(self._current_line) <= self._column:

808 try:	974 try:

809 self._current_line = next(self._lines)	975 self._current_line = next(self._lines)

810 except StopIteration:	976 except StopIteration:

811 self._current_line = ''	977 self._current_line = ''

812 self._more_lines = False	978 self._more_lines = False

813 return	979 return

814 else:	980 else:

815 self._line += 1	981 self._line += 1

816 self._column = 0	982 self._column = 0

817	983

818 def _SkipWhitespace(self):	984 def _SkipWhitespace(self):

819 while True:	985 while True:

820 self._PopLine()	986 self._PopLine()

821 match = self._WHITESPACE.match(self._current_line, self._column)	987 match = self._whitespace_pattern.match(self._current_line, self._column)

822 if not match:	988 if not match:

823 break	989 break

824 length = len(match.group(0))	990 length = len(match.group(0))

825 self._column += length	991 self._column += length

826	992

827 def TryConsume(self, token):	993 def TryConsume(self, token):

828 """Tries to consume a given piece of text.	994 """Tries to consume a given piece of text.

829	995

830 Args:	996 Args:

831 token: Text to consume.	997 token: Text to consume.

832	998

833 Returns:	999 Returns:

834 True iff the text was consumed.	1000 True iff the text was consumed.

835 """	1001 """

836 if self.token == token:	1002 if self.token == token:

837 self.NextToken()	1003 self.NextToken()

838 return True	1004 return True

839 return False	1005 return False

840	1006

841 def Consume(self, token):	1007 def Consume(self, token):

842 """Consumes a piece of text.	1008 """Consumes a piece of text.

843	1009

844 Args:	1010 Args:

845 token: Text to consume.	1011 token: Text to consume.

846	1012

847 Raises:	1013 Raises:

848 ParseError: If the text couldn't be consumed.	1014 ParseError: If the text couldn't be consumed.

849 """	1015 """

850 if not self.TryConsume(token):	1016 if not self.TryConsume(token):

851 raise self._ParseError('Expected "%s".' % token)	1017 raise self.ParseError('Expected "%s".' % token)

	1018

	1019 def ConsumeComment(self):

	1020 result = self.token

	1021 if not self._COMMENT.match(result):

	1022 raise self.ParseError('Expected comment.')

	1023 self.NextToken()

	1024 return result

852	1025

853 def TryConsumeIdentifier(self):	1026 def TryConsumeIdentifier(self):

854 try:	1027 try:

855 self.ConsumeIdentifier()	1028 self.ConsumeIdentifier()

856 return True	1029 return True

857 except ParseError:	1030 except ParseError:

858 return False	1031 return False

859	1032

860 def ConsumeIdentifier(self):	1033 def ConsumeIdentifier(self):

861 """Consumes protocol message field identifier.	1034 """Consumes protocol message field identifier.

862	1035

863 Returns:	1036 Returns:

864 Identifier string.	1037 Identifier string.

865	1038

866 Raises:	1039 Raises:

867 ParseError: If an identifier couldn't be consumed.	1040 ParseError: If an identifier couldn't be consumed.

868 """	1041 """

869 result = self.token	1042 result = self.token

870 if not self._IDENTIFIER.match(result):	1043 if not self._IDENTIFIER.match(result):

871 raise self._ParseError('Expected identifier.')	1044 raise self.ParseError('Expected identifier.')

872 self.NextToken()	1045 self.NextToken()

873 return result	1046 return result

874	1047

875 def ConsumeInt32(self):	1048 def TryConsumeIdentifierOrNumber(self):

876 """Consumes a signed 32bit integer number.	1049 try:

	1050 self.ConsumeIdentifierOrNumber()

	1051 return True

	1052 except ParseError:

	1053 return False

877	1054

	1055 def ConsumeIdentifierOrNumber(self):

	1056 """Consumes protocol message field identifier.

	1057

	1058 Returns:

	1059 Identifier string.

	1060

	1061 Raises:

	1062 ParseError: If an identifier couldn't be consumed.

	1063 """

	1064 result = self.token

	1065 if not self._IDENTIFIER_OR_NUMBER.match(result):

	1066 raise self.ParseError('Expected identifier or number.')

	1067 self.NextToken()

	1068 return result

	1069

	1070 def TryConsumeInteger(self):

	1071 try:

	1072 # Note: is_long only affects value type, not whether an error is raised.

	1073 self.ConsumeInteger()

	1074 return True

	1075 except ParseError:

	1076 return False

	1077

	1078 def ConsumeInteger(self, is_long=False):

	1079 """Consumes an integer number.

	1080

	1081 Args:

	1082 is_long: True if the value should be returned as a long integer.

878 Returns:	1083 Returns:

879 The integer parsed.	1084 The integer parsed.

880	1085

881 Raises:	1086 Raises:

882 ParseError: If a signed 32bit integer couldn't be consumed.	1087 ParseError: If an integer couldn't be consumed.

883 """	1088 """

884 try:	1089 try:

885 result = ParseInteger(self.token, is_signed=True, is_long=False)	1090 result = _ParseAbstractInteger(self.token, is_long=is_long)

886 except ValueError as e:	1091 except ValueError as e:

887 raise self._ParseError(str(e))	1092 raise self.ParseError(str(e))

888 self.NextToken()	1093 self.NextToken()

889 return result	1094 return result

890	1095

891 def ConsumeUint32(self):

892 """Consumes an unsigned 32bit integer number.

893

894 Returns:

895 The integer parsed.

896

897 Raises:

898 ParseError: If an unsigned 32bit integer couldn't be consumed.

899 """

900 try:

901 result = ParseInteger(self.token, is_signed=False, is_long=False)

902 except ValueError as e:

903 raise self._ParseError(str(e))

904 self.NextToken()

905 return result

906

907 def TryConsumeInt64(self):

908 try:

909 self.ConsumeInt64()

910 return True

911 except ParseError:

912 return False

913

914 def ConsumeInt64(self):

915 """Consumes a signed 64bit integer number.

916

917 Returns:

918 The integer parsed.

919

920 Raises:

921 ParseError: If a signed 64bit integer couldn't be consumed.

922 """

923 try:

924 result = ParseInteger(self.token, is_signed=True, is_long=True)

925 except ValueError as e:

926 raise self._ParseError(str(e))

927 self.NextToken()

928 return result

929

930 def TryConsumeUint64(self):

931 try:

932 self.ConsumeUint64()

933 return True

934 except ParseError:

935 return False

936

937 def ConsumeUint64(self):

938 """Consumes an unsigned 64bit integer number.

939

940 Returns:

941 The integer parsed.

942

943 Raises:

944 ParseError: If an unsigned 64bit integer couldn't be consumed.

945 """

946 try:

947 result = ParseInteger(self.token, is_signed=False, is_long=True)

948 except ValueError as e:

949 raise self._ParseError(str(e))

950 self.NextToken()

951 return result

952

953 def TryConsumeFloat(self):	1096 def TryConsumeFloat(self):

954 try:	1097 try:

955 self.ConsumeFloat()	1098 self.ConsumeFloat()

956 return True	1099 return True

957 except ParseError:	1100 except ParseError:

958 return False	1101 return False

959	1102

960 def ConsumeFloat(self):	1103 def ConsumeFloat(self):

961 """Consumes an floating point number.	1104 """Consumes an floating point number.

962	1105

963 Returns:	1106 Returns:

964 The number parsed.	1107 The number parsed.

965	1108

966 Raises:	1109 Raises:

967 ParseError: If a floating point number couldn't be consumed.	1110 ParseError: If a floating point number couldn't be consumed.

968 """	1111 """

969 try:	1112 try:

970 result = ParseFloat(self.token)	1113 result = ParseFloat(self.token)

971 except ValueError as e:	1114 except ValueError as e:

972 raise self._ParseError(str(e))	1115 raise self.ParseError(str(e))

973 self.NextToken()	1116 self.NextToken()

974 return result	1117 return result

975	1118

976 def ConsumeBool(self):	1119 def ConsumeBool(self):

977 """Consumes a boolean value.	1120 """Consumes a boolean value.

978	1121

979 Returns:	1122 Returns:

980 The bool parsed.	1123 The bool parsed.

981	1124

982 Raises:	1125 Raises:

983 ParseError: If a boolean value couldn't be consumed.	1126 ParseError: If a boolean value couldn't be consumed.

984 """	1127 """

985 try:	1128 try:

986 result = ParseBool(self.token)	1129 result = ParseBool(self.token)

987 except ValueError as e:	1130 except ValueError as e:

988 raise self._ParseError(str(e))	1131 raise self.ParseError(str(e))

989 self.NextToken()	1132 self.NextToken()

990 return result	1133 return result

991	1134

992 def TryConsumeByteString(self):	1135 def TryConsumeByteString(self):

993 try:	1136 try:

994 self.ConsumeByteString()	1137 self.ConsumeByteString()

995 return True	1138 return True

996 except ParseError:	1139 except ParseError:

997 return False	1140 return False

998	1141

(...skipping 33 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1032 tokens which are automatically concatenated, like in C or Python. This	1175 tokens which are automatically concatenated, like in C or Python. This

1033 method only consumes one token.	1176 method only consumes one token.

1034	1177

1035 Returns:	1178 Returns:

1036 The token parsed.	1179 The token parsed.

1037 Raises:	1180 Raises:

1038 ParseError: When the wrong format data is found.	1181 ParseError: When the wrong format data is found.

1039 """	1182 """

1040 text = self.token	1183 text = self.token

1041 if len(text) < 1 or text[0] not in _QUOTES:	1184 if len(text) < 1 or text[0] not in _QUOTES:

1042 raise self._ParseError('Expected string but found: %r' % (text,))	1185 raise self.ParseError('Expected string but found: %r' % (text,))

1043	1186

1044 if len(text) < 2 or text[-1] != text[0]:	1187 if len(text) < 2 or text[-1] != text[0]:

1045 raise self._ParseError('String missing ending quote: %r' % (text,))	1188 raise self.ParseError('String missing ending quote: %r' % (text,))

1046	1189

1047 try:	1190 try:

1048 result = text_encoding.CUnescape(text[1:-1])	1191 result = text_encoding.CUnescape(text[1:-1])

1049 except ValueError as e:	1192 except ValueError as e:

1050 raise self._ParseError(str(e))	1193 raise self.ParseError(str(e))

1051 self.NextToken()	1194 self.NextToken()

1052 return result	1195 return result

1053	1196

1054 def ConsumeEnum(self, field):	1197 def ConsumeEnum(self, field):

1055 try:	1198 try:

1056 result = ParseEnum(field, self.token)	1199 result = ParseEnum(field, self.token)

1057 except ValueError as e:	1200 except ValueError as e:

1058 raise self._ParseError(str(e))	1201 raise self.ParseError(str(e))

1059 self.NextToken()	1202 self.NextToken()

1060 return result	1203 return result

1061	1204

1062 def ParseErrorPreviousToken(self, message):	1205 def ParseErrorPreviousToken(self, message):

1063 """Creates and returns a ParseError for the previously read token.	1206 """Creates and returns a ParseError for the previously read token.

1064	1207

1065 Args:	1208 Args:

1066 message: A message to set for the exception.	1209 message: A message to set for the exception.

1067	1210

1068 Returns:	1211 Returns:

1069 A ParseError instance.	1212 A ParseError instance.

1070 """	1213 """

1071 return ParseError('%d:%d : %s' % (	1214 return ParseError(message, self._previous_line + 1,

1072 self._previous_line + 1, self._previous_column + 1, message))	1215 self._previous_column + 1)

1073	1216

1074 def _ParseError(self, message):	1217 def ParseError(self, message):

1075 """Creates and returns a ParseError for the current token."""	1218 """Creates and returns a ParseError for the current token."""

1076 return ParseError('%d:%d : %s' % (	1219 return ParseError(message, self._line + 1, self._column + 1)

1077 self._line + 1, self._column + 1, message))

1078	1220

1079 def _StringParseError(self, e):	1221 def _StringParseError(self, e):

1080 return self._ParseError('Couldn\'t parse string: ' + str(e))	1222 return self.ParseError('Couldn\'t parse string: ' + str(e))

1081	1223

1082 def NextToken(self):	1224 def NextToken(self):

1083 """Reads the next meaningful token."""	1225 """Reads the next meaningful token."""

1084 self._previous_line = self._line	1226 self._previous_line = self._line

1085 self._previous_column = self._column	1227 self._previous_column = self._column

1086	1228

1087 self._column += len(self.token)	1229 self._column += len(self.token)

1088 self._SkipWhitespace()	1230 self._SkipWhitespace()

1089	1231

1090 if not self._more_lines:	1232 if not self._more_lines:

1091 self.token = ''	1233 self.token = ''

1092 return	1234 return

1093	1235

1094 match = self._TOKEN.match(self._current_line, self._column)	1236 match = self._TOKEN.match(self._current_line, self._column)

	1237 if not match and not self._skip_comments:

	1238 match = self._COMMENT.match(self._current_line, self._column)

1095 if match:	1239 if match:

1096 token = match.group(0)	1240 token = match.group(0)

1097 self.token = token	1241 self.token = token

1098 else:	1242 else:

1099 self.token = self._current_line[self._column]	1243 self.token = self._current_line[self._column]

1100	1244

	1245 # Aliased so it can still be accessed by current visibility violators.

	1246 # TODO(dbarnett): Migrate violators to textformat_tokenizer.

	1247 _Tokenizer = Tokenizer # pylint: disable=invalid-name

	1248

	1249

	1250 def _ConsumeInt32(tokenizer):

	1251 """Consumes a signed 32bit integer number from tokenizer.

	1252

	1253 Args:

	1254 tokenizer: A tokenizer used to parse the number.

	1255

	1256 Returns:

	1257 The integer parsed.

	1258

	1259 Raises:

	1260 ParseError: If a signed 32bit integer couldn't be consumed.

	1261 """

	1262 return _ConsumeInteger(tokenizer, is_signed=True, is_long=False)

	1263

	1264

	1265 def _ConsumeUint32(tokenizer):

	1266 """Consumes an unsigned 32bit integer number from tokenizer.

	1267

	1268 Args:

	1269 tokenizer: A tokenizer used to parse the number.

	1270

	1271 Returns:

	1272 The integer parsed.

	1273

	1274 Raises:

	1275 ParseError: If an unsigned 32bit integer couldn't be consumed.

	1276 """

	1277 return _ConsumeInteger(tokenizer, is_signed=False, is_long=False)

	1278

	1279

	1280 def _TryConsumeInt64(tokenizer):

	1281 try:

	1282 _ConsumeInt64(tokenizer)

	1283 return True

	1284 except ParseError:

	1285 return False

	1286

	1287

	1288 def _ConsumeInt64(tokenizer):

	1289 """Consumes a signed 32bit integer number from tokenizer.

	1290

	1291 Args:

	1292 tokenizer: A tokenizer used to parse the number.

	1293

	1294 Returns:

	1295 The integer parsed.

	1296

	1297 Raises:

	1298 ParseError: If a signed 32bit integer couldn't be consumed.

	1299 """

	1300 return _ConsumeInteger(tokenizer, is_signed=True, is_long=True)

	1301

	1302

	1303 def _TryConsumeUint64(tokenizer):

	1304 try:

	1305 _ConsumeUint64(tokenizer)

	1306 return True

	1307 except ParseError:

	1308 return False

	1309

	1310

	1311 def _ConsumeUint64(tokenizer):

	1312 """Consumes an unsigned 64bit integer number from tokenizer.

	1313

	1314 Args:

	1315 tokenizer: A tokenizer used to parse the number.

	1316

	1317 Returns:

	1318 The integer parsed.

	1319

	1320 Raises:

	1321 ParseError: If an unsigned 64bit integer couldn't be consumed.

	1322 """

	1323 return _ConsumeInteger(tokenizer, is_signed=False, is_long=True)

	1324

	1325

	1326 def _TryConsumeInteger(tokenizer, is_signed=False, is_long=False):

	1327 try:

	1328 _ConsumeInteger(tokenizer, is_signed=is_signed, is_long=is_long)

	1329 return True

	1330 except ParseError:

	1331 return False

	1332

	1333

	1334 def _ConsumeInteger(tokenizer, is_signed=False, is_long=False):

	1335 """Consumes an integer number from tokenizer.

	1336

	1337 Args:

	1338 tokenizer: A tokenizer used to parse the number.

	1339 is_signed: True if a signed integer must be parsed.

	1340 is_long: True if a long integer must be parsed.

	1341

	1342 Returns:

	1343 The integer parsed.

	1344

	1345 Raises:

	1346 ParseError: If an integer with given characteristics couldn't be consumed.

	1347 """

	1348 try:

	1349 result = ParseInteger(tokenizer.token, is_signed=is_signed, is_long=is_long)

	1350 except ValueError as e:

	1351 raise tokenizer.ParseError(str(e))

	1352 tokenizer.NextToken()

	1353 return result

	1354

1101	1355

1102 def ParseInteger(text, is_signed=False, is_long=False):	1356 def ParseInteger(text, is_signed=False, is_long=False):

1103 """Parses an integer.	1357 """Parses an integer.

1104	1358

1105 Args:	1359 Args:

1106 text: The text to parse.	1360 text: The text to parse.

1107 is_signed: True if a signed integer must be parsed.	1361 is_signed: True if a signed integer must be parsed.

1108 is_long: True if a long integer must be parsed.	1362 is_long: True if a long integer must be parsed.

1109	1363

1110 Returns:	1364 Returns:

1111 The integer value.	1365 The integer value.

1112	1366

1113 Raises:	1367 Raises:

1114 ValueError: Thrown Iff the text is not a valid integer.	1368 ValueError: Thrown Iff the text is not a valid integer.

1115 """	1369 """

1116 # Do the actual parsing. Exception handling is propagated to caller.	1370 # Do the actual parsing. Exception handling is propagated to caller.

	1371 result = _ParseAbstractInteger(text, is_long=is_long)

	1372

	1373 # Check if the integer is sane. Exceptions handled by callers.

	1374 checker = _INTEGER_CHECKERS[2 * int(is_long) + int(is_signed)]

	1375 checker.CheckValue(result)

	1376 return result

	1377

	1378

	1379 def _ParseAbstractInteger(text, is_long=False):

	1380 """Parses an integer without checking size/signedness.

	1381

	1382 Args:

	1383 text: The text to parse.

	1384 is_long: True if the value should be returned as a long integer.

	1385

	1386 Returns:

	1387 The integer value.

	1388

	1389 Raises:

	1390 ValueError: Thrown Iff the text is not a valid integer.

	1391 """

	1392 # Do the actual parsing. Exception handling is propagated to caller.

1117 try:	1393 try:

1118 # We force 32-bit values to int and 64-bit values to long to make	1394 # We force 32-bit values to int and 64-bit values to long to make

1119 # alternate implementations where the distinction is more significant	1395 # alternate implementations where the distinction is more significant

1120 # (e.g. the C++ implementation) simpler.	1396 # (e.g. the C++ implementation) simpler.

1121 if is_long:	1397 if is_long:

1122 result = long(text, 0)	1398 return long(text, 0)

1123 else:	1399 else:

1124 result = int(text, 0)	1400 return int(text, 0)

1125 except ValueError:	1401 except ValueError:

1126 raise ValueError('Couldn\'t parse integer: %s' % text)	1402 raise ValueError('Couldn\'t parse integer: %s' % text)

1127	1403

1128 # Check if the integer is sane. Exceptions handled by callers.

1129 checker = _INTEGER_CHECKERS[2 * int(is_long) + int(is_signed)]

1130 checker.CheckValue(result)

1131 return result

1132

1133	1404

1134 def ParseFloat(text):	1405 def ParseFloat(text):

1135 """Parse a floating point number.	1406 """Parse a floating point number.

1136	1407

1137 Args:	1408 Args:

1138 text: Text to parse.	1409 text: Text to parse.

1139	1410

1140 Returns:	1411 Returns:

1141 The number parsed.	1412 The number parsed.

1142	1413

(...skipping 25 matching lines...) Expand all Loading...
1168	1439

1169 Args:	1440 Args:

1170 text: Text to parse.	1441 text: Text to parse.

1171	1442

1172 Returns:	1443 Returns:

1173 Boolean values parsed	1444 Boolean values parsed

1174	1445

1175 Raises:	1446 Raises:

1176 ValueError: If text is not a valid boolean.	1447 ValueError: If text is not a valid boolean.

1177 """	1448 """

1178 if text in ('true', 't', '1'):	1449 if text in ('true', 't', '1', 'True'):

1179 return True	1450 return True

1180 elif text in ('false', 'f', '0'):	1451 elif text in ('false', 'f', '0', 'False'):

1181 return False	1452 return False

1182 else:	1453 else:

1183 raise ValueError('Expected "true" or "false".')	1454 raise ValueError('Expected "true" or "false".')

1184	1455

1185	1456

1186 def ParseEnum(field, value):	1457 def ParseEnum(field, value):

1187 """Parse an enum value.	1458 """Parse an enum value.

1188	1459

1189 The value can be specified by a number (the enum value), or by	1460 The value can be specified by a number (the enum value), or by

1190 a string literal (the enum name).	1461 a string literal (the enum name).

1191	1462

1192 Args:	1463 Args:

1193 field: Enum field descriptor.	1464 field: Enum field descriptor.

1194 value: String value.	1465 value: String value.

1195	1466

1196 Returns:	1467 Returns:

1197 Enum value number.	1468 Enum value number.

1198	1469

1199 Raises:	1470 Raises:

1200 ValueError: If the enum value could not be parsed.	1471 ValueError: If the enum value could not be parsed.

1201 """	1472 """

1202 enum_descriptor = field.enum_type	1473 enum_descriptor = field.enum_type

1203 try:	1474 try:

1204 number = int(value, 0)	1475 number = int(value, 0)

1205 except ValueError:	1476 except ValueError:

1206 # Identifier.	1477 # Identifier.

1207 enum_value = enum_descriptor.values_by_name.get(value, None)	1478 enum_value = enum_descriptor.values_by_name.get(value, None)

1208 if enum_value is None:	1479 if enum_value is None:

1209 raise ValueError(	1480 raise ValueError('Enum type "%s" has no value named %s.' %

1210 'Enum type "%s" has no value named %s.' % (	1481 (enum_descriptor.full_name, value))

1211 enum_descriptor.full_name, value))

1212 else:	1482 else:

1213 # Numeric value.	1483 # Numeric value.

1214 enum_value = enum_descriptor.values_by_number.get(number, None)	1484 enum_value = enum_descriptor.values_by_number.get(number, None)

1215 if enum_value is None:	1485 if enum_value is None:

1216 raise ValueError(	1486 raise ValueError('Enum type "%s" has no value with number %d.' %

1217 'Enum type "%s" has no value with number %d.' % (	1487 (enum_descriptor.full_name, number))

1218 enum_descriptor.full_name, number))

1219 return enum_value.number	1488 return enum_value.number

OLD	NEW

« no previous file with comments | « third_party/protobuf/python/google/protobuf/symbol_database.py ('k') | third_party/protobuf/python/setup.cfg » ('j') | no next file with comments »