third_party/protobuf/python/google/protobuf/text_format.py - Issue 2600753002: Reverts third_party/protobuf: Update to HEAD (f52e188fe4)

Side by Side Diff: third_party/protobuf/python/google/protobuf/text_format.py

Issue 2600753002: Reverts third_party/protobuf: Update to HEAD (f52e188fe4) (Closed)

Patch Set: Created 3 years, 12 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
1 # Protocol Buffers - Google's data interchange format	1 # Protocol Buffers - Google's data interchange format

2 # Copyright 2008 Google Inc. All rights reserved.	2 # Copyright 2008 Google Inc. All rights reserved.

3 # https://developers.google.com/protocol-buffers/	3 # https://developers.google.com/protocol-buffers/

4 #	4 #

5 # Redistribution and use in source and binary forms, with or without	5 # Redistribution and use in source and binary forms, with or without

6 # modification, are permitted provided that the following conditions are	6 # modification, are permitted provided that the following conditions are

7 # met:	7 # met:

8 #	8 #

9 # * Redistributions of source code must retain the above copyright	9 # * Redistributions of source code must retain the above copyright

10 # notice, this list of conditions and the following disclaimer.	10 # notice, this list of conditions and the following disclaimer.

(...skipping 30 matching lines...) Expand all Loading...
41 """	41 """

42	42

43 __author__ = 'kenton@google.com (Kenton Varda)'	43 __author__ = 'kenton@google.com (Kenton Varda)'

44	44

45 import io	45 import io

46 import re	46 import re

47	47

48 import six	48 import six

49	49

50 if six.PY3:	50 if six.PY3:

51 long = int # pylint: disable=redefined-builtin,invalid-name	51 long = int

52	52

53 # pylint: disable=g-import-not-at-top

54 from google.protobuf.internal import type_checkers	53 from google.protobuf.internal import type_checkers

55 from google.protobuf import descriptor	54 from google.protobuf import descriptor

56 from google.protobuf import text_encoding	55 from google.protobuf import text_encoding

57	56

58 __all__ = ['MessageToString', 'PrintMessage', 'PrintField', 'PrintFieldValue',	57 __all__ = ['MessageToString', 'PrintMessage', 'PrintField',

59 'Merge']	58 'PrintFieldValue', 'Merge']

	59

60	60

61 _INTEGER_CHECKERS = (type_checkers.Uint32ValueChecker(),	61 _INTEGER_CHECKERS = (type_checkers.Uint32ValueChecker(),

62 type_checkers.Int32ValueChecker(),	62 type_checkers.Int32ValueChecker(),

63 type_checkers.Uint64ValueChecker(),	63 type_checkers.Uint64ValueChecker(),

64 type_checkers.Int64ValueChecker())	64 type_checkers.Int64ValueChecker())

65 _FLOAT_INFINITY = re.compile('-?inf(?:inity)?f?', re.IGNORECASE)	65 _FLOAT_INFINITY = re.compile('-?inf(?:inity)?f?', re.IGNORECASE)

66 _FLOAT_NAN = re.compile('nanf?', re.IGNORECASE)	66 _FLOAT_NAN = re.compile('nanf?', re.IGNORECASE)

67 _FLOAT_TYPES = frozenset([descriptor.FieldDescriptor.CPPTYPE_FLOAT,	67 _FLOAT_TYPES = frozenset([descriptor.FieldDescriptor.CPPTYPE_FLOAT,

68 descriptor.FieldDescriptor.CPPTYPE_DOUBLE])	68 descriptor.FieldDescriptor.CPPTYPE_DOUBLE])

69 _QUOTES = frozenset(("'", '"'))	69 _QUOTES = frozenset(("'", '"'))

70 _ANY_FULL_TYPE_NAME = 'google.protobuf.Any'

71	70

72	71

73 class Error(Exception):	72 class Error(Exception):

74 """Top-level module error for text_format."""	73 """Top-level module error for text_format."""

75	74

76	75

77 class ParseError(Error):	76 class ParseError(Error):

78 """Thrown in case of text parsing or tokenizing error."""	77 """Thrown in case of text parsing error."""

79

80 def __init__(self, message=None, line=None, column=None):

81 if message is not None and line is not None:

82 loc = str(line)

83 if column is not None:

84 loc += ':{0}'.format(column)

85 message = '{0} : {1}'.format(loc, message)

86 if message is not None:

87 super(ParseError, self).__init__(message)

88 else:

89 super(ParseError, self).__init__()

90 self._line = line

91 self._column = column

92

93 def GetLine(self):

94 return self._line

95

96 def GetColumn(self):

97 return self._column

98	78

99	79

100 class TextWriter(object):	80 class TextWriter(object):

101

102 def __init__(self, as_utf8):	81 def __init__(self, as_utf8):

103 if six.PY2:	82 if six.PY2:

104 self._writer = io.BytesIO()	83 self._writer = io.BytesIO()

105 else:	84 else:

106 self._writer = io.StringIO()	85 self._writer = io.StringIO()

107	86

108 def write(self, val):	87 def write(self, val):

109 if six.PY2:	88 if six.PY2:

110 if isinstance(val, six.text_type):	89 if isinstance(val, six.text_type):

111 val = val.encode('utf-8')	90 val = val.encode('utf-8')

112 return self._writer.write(val)	91 return self._writer.write(val)

113	92

114 def close(self):	93 def close(self):

115 return self._writer.close()	94 return self._writer.close()

116	95

117 def getvalue(self):	96 def getvalue(self):

118 return self._writer.getvalue()	97 return self._writer.getvalue()

119	98

120	99

121 def MessageToString(message,	100 def MessageToString(message, as_utf8=False, as_one_line=False,

122 as_utf8=False,	101 pointy_brackets=False, use_index_order=False,

123 as_one_line=False,	102 float_format=None, use_field_number=False):

124 pointy_brackets=False,

125 use_index_order=False,

126 float_format=None,

127 use_field_number=False,

128 descriptor_pool=None,

129 indent=0):

130 """Convert protobuf message to text format.	103 """Convert protobuf message to text format.

131	104

132 Floating point values can be formatted compactly with 15 digits of	105 Floating point values can be formatted compactly with 15 digits of

133 precision (which is the most that IEEE 754 "double" can guarantee)	106 precision (which is the most that IEEE 754 "double" can guarantee)

134 using float_format='.15g'. To ensure that converting to text and back to a	107 using float_format='.15g'. To ensure that converting to text and back to a

135 proto will result in an identical value, float_format='.17g' should be used.	108 proto will result in an identical value, float_format='.17g' should be used.

136	109

137 Args:	110 Args:

138 message: The protocol buffers message.	111 message: The protocol buffers message.

139 as_utf8: Produce text output in UTF8 format.	112 as_utf8: Produce text output in UTF8 format.

140 as_one_line: Don't introduce newlines between fields.	113 as_one_line: Don't introduce newlines between fields.

141 pointy_brackets: If True, use angle brackets instead of curly braces for	114 pointy_brackets: If True, use angle brackets instead of curly braces for

142 nesting.	115 nesting.

143 use_index_order: If True, print fields of a proto message using the order	116 use_index_order: If True, print fields of a proto message using the order

144 defined in source code instead of the field number. By default, use the	117 defined in source code instead of the field number. By default, use the

145 field number order.	118 field number order.

146 float_format: If set, use this to specify floating point number formatting	119 float_format: If set, use this to specify floating point number formatting

147 (per the "Format Specification Mini-Language"); otherwise, str() is used.	120 (per the "Format Specification Mini-Language"); otherwise, str() is used.

148 use_field_number: If True, print field numbers instead of names.	121 use_field_number: If True, print field numbers instead of names.

149 descriptor_pool: A DescriptorPool used to resolve Any types.

150 indent: The indent level, in terms of spaces, for pretty print.

151	122

152 Returns:	123 Returns:

153 A string of the text formatted protocol buffer message.	124 A string of the text formatted protocol buffer message.

154 """	125 """

155 out = TextWriter(as_utf8)	126 out = TextWriter(as_utf8)

156 printer = _Printer(out, indent, as_utf8, as_one_line, pointy_brackets,	127 printer = _Printer(out, 0, as_utf8, as_one_line,

157 use_index_order, float_format, use_field_number,	128 pointy_brackets, use_index_order, float_format,

158 descriptor_pool)	129 use_field_number)

159 printer.PrintMessage(message)	130 printer.PrintMessage(message)

160 result = out.getvalue()	131 result = out.getvalue()

161 out.close()	132 out.close()

162 if as_one_line:	133 if as_one_line:

163 return result.rstrip()	134 return result.rstrip()

164 return result	135 return result

165	136

166	137

167 def _IsMapEntry(field):	138 def _IsMapEntry(field):

168 return (field.type == descriptor.FieldDescriptor.TYPE_MESSAGE and	139 return (field.type == descriptor.FieldDescriptor.TYPE_MESSAGE and

169 field.message_type.has_options and	140 field.message_type.has_options and

170 field.message_type.GetOptions().map_entry)	141 field.message_type.GetOptions().map_entry)

171	142

172	143

173 def PrintMessage(message,	144 def PrintMessage(message, out, indent=0, as_utf8=False, as_one_line=False,

174 out,	145 pointy_brackets=False, use_index_order=False,

175 indent=0,	146 float_format=None, use_field_number=False):

176 as_utf8=False,	147 printer = _Printer(out, indent, as_utf8, as_one_line,

177 as_one_line=False,	148 pointy_brackets, use_index_order, float_format,

178 pointy_brackets=False,	149 use_field_number)

179 use_index_order=False,

180 float_format=None,

181 use_field_number=False,

182 descriptor_pool=None):

183 printer = _Printer(out, indent, as_utf8, as_one_line, pointy_brackets,

184 use_index_order, float_format, use_field_number,

185 descriptor_pool)

186 printer.PrintMessage(message)	150 printer.PrintMessage(message)

187	151

188	152

189 def PrintField(field,	153 def PrintField(field, value, out, indent=0, as_utf8=False, as_one_line=False,

190 value,	154 pointy_brackets=False, use_index_order=False, float_format=None):

191 out,

192 indent=0,

193 as_utf8=False,

194 as_one_line=False,

195 pointy_brackets=False,

196 use_index_order=False,

197 float_format=None):

198 """Print a single field name/value pair."""	155 """Print a single field name/value pair."""

199 printer = _Printer(out, indent, as_utf8, as_one_line, pointy_brackets,	156 printer = _Printer(out, indent, as_utf8, as_one_line,

200 use_index_order, float_format)	157 pointy_brackets, use_index_order, float_format)

201 printer.PrintField(field, value)	158 printer.PrintField(field, value)

202	159

203	160

204 def PrintFieldValue(field,	161 def PrintFieldValue(field, value, out, indent=0, as_utf8=False,

205 value,	162 as_one_line=False, pointy_brackets=False,

206 out,

207 indent=0,

208 as_utf8=False,

209 as_one_line=False,

210 pointy_brackets=False,

211 use_index_order=False,	163 use_index_order=False,

212 float_format=None):	164 float_format=None):

213 """Print a single field value (not including name)."""	165 """Print a single field value (not including name)."""

214 printer = _Printer(out, indent, as_utf8, as_one_line, pointy_brackets,	166 printer = _Printer(out, indent, as_utf8, as_one_line,

215 use_index_order, float_format)	167 pointy_brackets, use_index_order, float_format)

216 printer.PrintFieldValue(field, value)	168 printer.PrintFieldValue(field, value)

217	169

218	170

219 def _BuildMessageFromTypeName(type_name, descriptor_pool):

220 """Returns a protobuf message instance.

221

222 Args:

223 type_name: Fully-qualified protobuf message type name string.

224 descriptor_pool: DescriptorPool instance.

225

226 Returns:

227 A Message instance of type matching type_name, or None if the a Descriptor

228 wasn't found matching type_name.

229 """

230 # pylint: disable=g-import-not-at-top

231 from google.protobuf import symbol_database

232 database = symbol_database.Default()

233 try:

234 message_descriptor = descriptor_pool.FindMessageTypeByName(type_name)

235 except KeyError:

236 return None

237 message_type = database.GetPrototype(message_descriptor)

238 return message_type()

239

240

241 class _Printer(object):	171 class _Printer(object):

242 """Text format printer for protocol message."""	172 """Text format printer for protocol message."""

243	173

244 def __init__(self,	174 def __init__(self, out, indent=0, as_utf8=False, as_one_line=False,

245 out,	175 pointy_brackets=False, use_index_order=False, float_format=None,

246 indent=0,	176 use_field_number=False):

247 as_utf8=False,

248 as_one_line=False,

249 pointy_brackets=False,

250 use_index_order=False,

251 float_format=None,

252 use_field_number=False,

253 descriptor_pool=None):

254 """Initialize the Printer.	177 """Initialize the Printer.

255	178

256 Floating point values can be formatted compactly with 15 digits of	179 Floating point values can be formatted compactly with 15 digits of

257 precision (which is the most that IEEE 754 "double" can guarantee)	180 precision (which is the most that IEEE 754 "double" can guarantee)

258 using float_format='.15g'. To ensure that converting to text and back to a	181 using float_format='.15g'. To ensure that converting to text and back to a

259 proto will result in an identical value, float_format='.17g' should be used.	182 proto will result in an identical value, float_format='.17g' should be used.

260	183

261 Args:	184 Args:

262 out: To record the text format result.	185 out: To record the text format result.

263 indent: The indent level for pretty print.	186 indent: The indent level for pretty print.

264 as_utf8: Produce text output in UTF8 format.	187 as_utf8: Produce text output in UTF8 format.

265 as_one_line: Don't introduce newlines between fields.	188 as_one_line: Don't introduce newlines between fields.

266 pointy_brackets: If True, use angle brackets instead of curly braces for	189 pointy_brackets: If True, use angle brackets instead of curly braces for

267 nesting.	190 nesting.

268 use_index_order: If True, print fields of a proto message using the order	191 use_index_order: If True, print fields of a proto message using the order

269 defined in source code instead of the field number. By default, use the	192 defined in source code instead of the field number. By default, use the

270 field number order.	193 field number order.

271 float_format: If set, use this to specify floating point number formatting	194 float_format: If set, use this to specify floating point number formatting

272 (per the "Format Specification Mini-Language"); otherwise, str() is	195 (per the "Format Specification Mini-Language"); otherwise, str() is

273 used.	196 used.

274 use_field_number: If True, print field numbers instead of names.	197 use_field_number: If True, print field numbers instead of names.

275 descriptor_pool: A DescriptorPool used to resolve Any types.

276 """	198 """

277 self.out = out	199 self.out = out

278 self.indent = indent	200 self.indent = indent

279 self.as_utf8 = as_utf8	201 self.as_utf8 = as_utf8

280 self.as_one_line = as_one_line	202 self.as_one_line = as_one_line

281 self.pointy_brackets = pointy_brackets	203 self.pointy_brackets = pointy_brackets

282 self.use_index_order = use_index_order	204 self.use_index_order = use_index_order

283 self.float_format = float_format	205 self.float_format = float_format

284 self.use_field_number = use_field_number	206 self.use_field_number = use_field_number

285 self.descriptor_pool = descriptor_pool

286

287 def _TryPrintAsAnyMessage(self, message):

288 """Serializes if message is a google.protobuf.Any field."""

289 packed_message = _BuildMessageFromTypeName(message.TypeName(),

290 self.descriptor_pool)

291 if packed_message:

292 packed_message.MergeFromString(message.value)

293 self.out.write('%s[%s]' % (self.indent * ' ', message.type_url))

294 self._PrintMessageFieldValue(packed_message)

295 self.out.write(' ' if self.as_one_line else '\n')

296 return True

297 else:

298 return False

299	207

300 def PrintMessage(self, message):	208 def PrintMessage(self, message):

301 """Convert protobuf message to text format.	209 """Convert protobuf message to text format.

302	210

303 Args:	211 Args:

304 message: The protocol buffers message.	212 message: The protocol buffers message.

305 """	213 """

306 if (message.DESCRIPTOR.full_name == _ANY_FULL_TYPE_NAME and

307 self.descriptor_pool and self._TryPrintAsAnyMessage(message)):

308 return

309 fields = message.ListFields()	214 fields = message.ListFields()

310 if self.use_index_order:	215 if self.use_index_order:

311 fields.sort(key=lambda x: x[0].index)	216 fields.sort(key=lambda x: x[0].index)

312 for field, value in fields:	217 for field, value in fields:

313 if _IsMapEntry(field):	218 if _IsMapEntry(field):

314 for key in sorted(value):	219 for key in sorted(value):

315 # This is slow for maps with submessage entires because it copies the	220 # This is slow for maps with submessage entires because it copies the

316 # entire tree. Unfortunately this would take significant refactoring	221 # entire tree. Unfortunately this would take significant refactoring

317 # of this file to work around.	222 # of this file to work around.

318 #	223 #

319 # TODO(haberman): refactor and optimize if this becomes an issue.	224 # TODO(haberman): refactor and optimize if this becomes an issue.

320 entry_submsg = value.GetEntryClass()(key=key, value=value[key])	225 entry_submsg = field.message_type._concrete_class(

	226 key=key, value=value[key])

321 self.PrintField(field, entry_submsg)	227 self.PrintField(field, entry_submsg)

322 elif field.label == descriptor.FieldDescriptor.LABEL_REPEATED:	228 elif field.label == descriptor.FieldDescriptor.LABEL_REPEATED:

323 for element in value:	229 for element in value:

324 self.PrintField(field, element)	230 self.PrintField(field, element)

325 else:	231 else:

326 self.PrintField(field, value)	232 self.PrintField(field, value)

327	233

328 def PrintField(self, field, value):	234 def PrintField(self, field, value):

329 """Print a single field name/value pair."""	235 """Print a single field name/value pair."""

330 out = self.out	236 out = self.out

(...skipping 20 matching lines...) Expand all Loading...
351 # The colon is optional in this case, but our cross-language golden files	257 # The colon is optional in this case, but our cross-language golden files

352 # don't include it.	258 # don't include it.

353 out.write(': ')	259 out.write(': ')

354	260

355 self.PrintFieldValue(field, value)	261 self.PrintFieldValue(field, value)

356 if self.as_one_line:	262 if self.as_one_line:

357 out.write(' ')	263 out.write(' ')

358 else:	264 else:

359 out.write('\n')	265 out.write('\n')

360	266

361 def _PrintMessageFieldValue(self, value):

362 if self.pointy_brackets:

363 openb = '<'

364 closeb = '>'

365 else:

366 openb = '{'

367 closeb = '}'

368

369 if self.as_one_line:

370 self.out.write(' %s ' % openb)

371 self.PrintMessage(value)

372 self.out.write(closeb)

373 else:

374 self.out.write(' %s\n' % openb)

375 self.indent += 2

376 self.PrintMessage(value)

377 self.indent -= 2

378 self.out.write(' ' * self.indent + closeb)

379

380 def PrintFieldValue(self, field, value):	267 def PrintFieldValue(self, field, value):

381 """Print a single field value (not including name).	268 """Print a single field value (not including name).

382	269

383 For repeated fields, the value should be a single element.	270 For repeated fields, the value should be a single element.

384	271

385 Args:	272 Args:

386 field: The descriptor of the field to be printed.	273 field: The descriptor of the field to be printed.

387 value: The value of the field.	274 value: The value of the field.

388 """	275 """

389 out = self.out	276 out = self.out

	277 if self.pointy_brackets:

	278 openb = '<'

	279 closeb = '>'

	280 else:

	281 openb = '{'

	282 closeb = '}'

	283

390 if field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_MESSAGE:	284 if field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_MESSAGE:

391 self._PrintMessageFieldValue(value)	285 if self.as_one_line:

	286 out.write(' %s ' % openb)

	287 self.PrintMessage(value)

	288 out.write(closeb)

	289 else:

	290 out.write(' %s\n' % openb)

	291 self.indent += 2

	292 self.PrintMessage(value)

	293 self.indent -= 2

	294 out.write(' ' * self.indent + closeb)

392 elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_ENUM:	295 elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_ENUM:

393 enum_value = field.enum_type.values_by_number.get(value, None)	296 enum_value = field.enum_type.values_by_number.get(value, None)

394 if enum_value is not None:	297 if enum_value is not None:

395 out.write(enum_value.name)	298 out.write(enum_value.name)

396 else:	299 else:

397 out.write(str(value))	300 out.write(str(value))

398 elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_STRING:	301 elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_STRING:

399 out.write('\"')	302 out.write('\"')

400 if isinstance(value, six.text_type):	303 if isinstance(value, six.text_type):

401 out_value = value.encode('utf-8')	304 out_value = value.encode('utf-8')

(...skipping 10 matching lines...) Expand all Loading...
412 if value:	315 if value:

413 out.write('true')	316 out.write('true')

414 else:	317 else:

415 out.write('false')	318 out.write('false')

416 elif field.cpp_type in _FLOAT_TYPES and self.float_format is not None:	319 elif field.cpp_type in _FLOAT_TYPES and self.float_format is not None:

417 out.write('{1:{0}}'.format(self.float_format, value))	320 out.write('{1:{0}}'.format(self.float_format, value))

418 else:	321 else:

419 out.write(str(value))	322 out.write(str(value))

420	323

421	324

422 def Parse(text,	325 def Parse(text, message,

423 message,	326 allow_unknown_extension=False, allow_field_number=False):

424 allow_unknown_extension=False,	327 """Parses an text representation of a protocol message into a message.

425 allow_field_number=False):

426 """Parses a text representation of a protocol message into a message.

427	328

428 Args:	329 Args:

429 text: Message text representation.	330 text: Message text representation.

430 message: A protocol buffer message to merge into.	331 message: A protocol buffer message to merge into.

431 allow_unknown_extension: if True, skip over missing extensions and keep	332 allow_unknown_extension: if True, skip over missing extensions and keep

432 parsing	333 parsing

433 allow_field_number: if True, both field number and field name are allowed.	334 allow_field_number: if True, both field number and field name are allowed.

434	335

435 Returns:	336 Returns:

436 The same message passed as argument.	337 The same message passed as argument.

437	338

438 Raises:	339 Raises:

439 ParseError: On text parsing problems.	340 ParseError: On text parsing problems.

440 """	341 """

441 if not isinstance(text, str):	342 if not isinstance(text, str):

442 text = text.decode('utf-8')	343 text = text.decode('utf-8')

443 return ParseLines(	344 return ParseLines(text.split('\n'), message, allow_unknown_extension,

444 text.split('\n'), message, allow_unknown_extension, allow_field_number)	345 allow_field_number)

445	346

446	347

447 def Merge(text,	348 def Merge(text, message, allow_unknown_extension=False,

448 message,	349 allow_field_number=False):

449 allow_unknown_extension=False,	350 """Parses an text representation of a protocol message into a message.

450 allow_field_number=False,

451 descriptor_pool=None):

452 """Parses a text representation of a protocol message into a message.

453	351

454 Like Parse(), but allows repeated values for a non-repeated field, and uses	352 Like Parse(), but allows repeated values for a non-repeated field, and uses

455 the last one.	353 the last one.

456	354

457 Args:	355 Args:

458 text: Message text representation.	356 text: Message text representation.

459 message: A protocol buffer message to merge into.	357 message: A protocol buffer message to merge into.

460 allow_unknown_extension: if True, skip over missing extensions and keep	358 allow_unknown_extension: if True, skip over missing extensions and keep

461 parsing	359 parsing

462 allow_field_number: if True, both field number and field name are allowed.	360 allow_field_number: if True, both field number and field name are allowed.

463 descriptor_pool: A DescriptorPool used to resolve Any types.

464	361

465 Returns:	362 Returns:

466 The same message passed as argument.	363 The same message passed as argument.

467	364

468 Raises:	365 Raises:

469 ParseError: On text parsing problems.	366 ParseError: On text parsing problems.

470 """	367 """

471 return MergeLines(	368 return MergeLines(text.split('\n'), message, allow_unknown_extension,

472 text.split('\n'),	369 allow_field_number)

473 message,

474 allow_unknown_extension,

475 allow_field_number,

476 descriptor_pool=descriptor_pool)

477	370

478	371

479 def ParseLines(lines,	372 def ParseLines(lines, message, allow_unknown_extension=False,

480 message,

481 allow_unknown_extension=False,

482 allow_field_number=False):	373 allow_field_number=False):

483 """Parses a text representation of a protocol message into a message.	374 """Parses an text representation of a protocol message into a message.

484	375

485 Args:	376 Args:

486 lines: An iterable of lines of a message's text representation.	377 lines: An iterable of lines of a message's text representation.

487 message: A protocol buffer message to merge into.	378 message: A protocol buffer message to merge into.

488 allow_unknown_extension: if True, skip over missing extensions and keep	379 allow_unknown_extension: if True, skip over missing extensions and keep

489 parsing	380 parsing

490 allow_field_number: if True, both field number and field name are allowed.	381 allow_field_number: if True, both field number and field name are allowed.

491 descriptor_pool: A DescriptorPool used to resolve Any types.

492	382

493 Returns:	383 Returns:

494 The same message passed as argument.	384 The same message passed as argument.

495	385

496 Raises:	386 Raises:

497 ParseError: On text parsing problems.	387 ParseError: On text parsing problems.

498 """	388 """

499 parser = _Parser(allow_unknown_extension, allow_field_number)	389 parser = _Parser(allow_unknown_extension, allow_field_number)

500 return parser.ParseLines(lines, message)	390 return parser.ParseLines(lines, message)

501	391

502	392

503 def MergeLines(lines,	393 def MergeLines(lines, message, allow_unknown_extension=False,

504 message,	394 allow_field_number=False):

505 allow_unknown_extension=False,	395 """Parses an text representation of a protocol message into a message.

506 allow_field_number=False,

507 descriptor_pool=None):

508 """Parses a text representation of a protocol message into a message.

509	396

510 Args:	397 Args:

511 lines: An iterable of lines of a message's text representation.	398 lines: An iterable of lines of a message's text representation.

512 message: A protocol buffer message to merge into.	399 message: A protocol buffer message to merge into.

513 allow_unknown_extension: if True, skip over missing extensions and keep	400 allow_unknown_extension: if True, skip over missing extensions and keep

514 parsing	401 parsing

515 allow_field_number: if True, both field number and field name are allowed.	402 allow_field_number: if True, both field number and field name are allowed.

516	403

517 Returns:	404 Returns:

518 The same message passed as argument.	405 The same message passed as argument.

519	406

520 Raises:	407 Raises:

521 ParseError: On text parsing problems.	408 ParseError: On text parsing problems.

522 """	409 """

523 parser = _Parser(allow_unknown_extension,	410 parser = _Parser(allow_unknown_extension, allow_field_number)

524 allow_field_number,

525 descriptor_pool=descriptor_pool)

526 return parser.MergeLines(lines, message)	411 return parser.MergeLines(lines, message)

527	412

528	413

529 class _Parser(object):	414 class _Parser(object):

530 """Text format parser for protocol message."""	415 """Text format parser for protocol message."""

531	416

532 def __init__(self,	417 def __init__(self, allow_unknown_extension=False, allow_field_number=False):

533 allow_unknown_extension=False,

534 allow_field_number=False,

535 descriptor_pool=None):

536 self.allow_unknown_extension = allow_unknown_extension	418 self.allow_unknown_extension = allow_unknown_extension

537 self.allow_field_number = allow_field_number	419 self.allow_field_number = allow_field_number

538 self.descriptor_pool = descriptor_pool

539	420

540 def ParseFromString(self, text, message):	421 def ParseFromString(self, text, message):

541 """Parses a text representation of a protocol message into a message."""	422 """Parses an text representation of a protocol message into a message."""

542 if not isinstance(text, str):	423 if not isinstance(text, str):

543 text = text.decode('utf-8')	424 text = text.decode('utf-8')

544 return self.ParseLines(text.split('\n'), message)	425 return self.ParseLines(text.split('\n'), message)

545	426

546 def ParseLines(self, lines, message):	427 def ParseLines(self, lines, message):

547 """Parses a text representation of a protocol message into a message."""	428 """Parses an text representation of a protocol message into a message."""

548 self._allow_multiple_scalars = False	429 self._allow_multiple_scalars = False

549 self._ParseOrMerge(lines, message)	430 self._ParseOrMerge(lines, message)

550 return message	431 return message

551	432

552 def MergeFromString(self, text, message):	433 def MergeFromString(self, text, message):

553 """Merges a text representation of a protocol message into a message."""	434 """Merges an text representation of a protocol message into a message."""

554 return self._MergeLines(text.split('\n'), message)	435 return self._MergeLines(text.split('\n'), message)

555	436

556 def MergeLines(self, lines, message):	437 def MergeLines(self, lines, message):

557 """Merges a text representation of a protocol message into a message."""	438 """Merges an text representation of a protocol message into a message."""

558 self._allow_multiple_scalars = True	439 self._allow_multiple_scalars = True

559 self._ParseOrMerge(lines, message)	440 self._ParseOrMerge(lines, message)

560 return message	441 return message

561	442

562 def _ParseOrMerge(self, lines, message):	443 def _ParseOrMerge(self, lines, message):

563 """Converts a text representation of a protocol message into a message.	444 """Converts an text representation of a protocol message into a message.

564	445

565 Args:	446 Args:

566 lines: Lines of a message's text representation.	447 lines: Lines of a message's text representation.

567 message: A protocol buffer message to merge into.	448 message: A protocol buffer message to merge into.

568	449

569 Raises:	450 Raises:

570 ParseError: On text parsing problems.	451 ParseError: On text parsing problems.

571 """	452 """

572 tokenizer = Tokenizer(lines)	453 tokenizer = _Tokenizer(lines)

573 while not tokenizer.AtEnd():	454 while not tokenizer.AtEnd():

574 self._MergeField(tokenizer, message)	455 self._MergeField(tokenizer, message)

575	456

576 def _MergeField(self, tokenizer, message):	457 def _MergeField(self, tokenizer, message):

577 """Merges a single protocol message field into a message.	458 """Merges a single protocol message field into a message.

578	459

579 Args:	460 Args:

580 tokenizer: A tokenizer to parse the field name and values.	461 tokenizer: A tokenizer to parse the field name and values.

581 message: A protocol message to record the data.	462 message: A protocol message to record the data.

582	463

(...skipping 20 matching lines...) Expand all Loading...
603 field = message.Extensions._FindExtensionByName(name)	484 field = message.Extensions._FindExtensionByName(name)

604 # pylint: enable=protected-access	485 # pylint: enable=protected-access

605 if not field:	486 if not field:

606 if self.allow_unknown_extension:	487 if self.allow_unknown_extension:

607 field = None	488 field = None

608 else:	489 else:

609 raise tokenizer.ParseErrorPreviousToken(	490 raise tokenizer.ParseErrorPreviousToken(

610 'Extension "%s" not registered.' % name)	491 'Extension "%s" not registered.' % name)

611 elif message_descriptor != field.containing_type:	492 elif message_descriptor != field.containing_type:

612 raise tokenizer.ParseErrorPreviousToken(	493 raise tokenizer.ParseErrorPreviousToken(

613 'Extension "%s" does not extend message type "%s".' %	494 'Extension "%s" does not extend message type "%s".' % (

614 (name, message_descriptor.full_name))	495 name, message_descriptor.full_name))

615	496

616 tokenizer.Consume(']')	497 tokenizer.Consume(']')

617	498

618 else:	499 else:

619 name = tokenizer.ConsumeIdentifierOrNumber()	500 name = tokenizer.ConsumeIdentifier()

620 if self.allow_field_number and name.isdigit():	501 if self.allow_field_number and name.isdigit():

621 number = ParseInteger(name, True, True)	502 number = ParseInteger(name, True, True)

622 field = message_descriptor.fields_by_number.get(number, None)	503 field = message_descriptor.fields_by_number.get(number, None)

623 if not field and message_descriptor.is_extendable:	504 if not field and message_descriptor.is_extendable:

624 field = message.Extensions._FindExtensionByNumber(number)	505 field = message.Extensions._FindExtensionByNumber(number)

625 else:	506 else:

626 field = message_descriptor.fields_by_name.get(name, None)	507 field = message_descriptor.fields_by_name.get(name, None)

627	508

628 # Group names are expected to be capitalized as they appear in the	509 # Group names are expected to be capitalized as they appear in the

629 # .proto file, which actually matches their type names, not their field	510 # .proto file, which actually matches their type names, not their field

630 # names.	511 # names.

631 if not field:	512 if not field:

632 field = message_descriptor.fields_by_name.get(name.lower(), None)	513 field = message_descriptor.fields_by_name.get(name.lower(), None)

633 if field and field.type != descriptor.FieldDescriptor.TYPE_GROUP:	514 if field and field.type != descriptor.FieldDescriptor.TYPE_GROUP:

634 field = None	515 field = None

635	516

636 if (field and field.type == descriptor.FieldDescriptor.TYPE_GROUP and	517 if (field and field.type == descriptor.FieldDescriptor.TYPE_GROUP and

637 field.message_type.name != name):	518 field.message_type.name != name):

638 field = None	519 field = None

639	520

640 if not field:	521 if not field:

641 raise tokenizer.ParseErrorPreviousToken(	522 raise tokenizer.ParseErrorPreviousToken(

642 'Message type "%s" has no field named "%s".' %	523 'Message type "%s" has no field named "%s".' % (

643 (message_descriptor.full_name, name))	524 message_descriptor.full_name, name))

644	525

645 if field:	526 if field:

646 if not self._allow_multiple_scalars and field.containing_oneof:	527 if not self._allow_multiple_scalars and field.containing_oneof:

647 # Check if there's a different field set in this oneof.	528 # Check if there's a different field set in this oneof.

648 # Note that we ignore the case if the same field was set before, and we	529 # Note that we ignore the case if the same field was set before, and we

649 # apply _allow_multiple_scalars to non-scalar fields as well.	530 # apply _allow_multiple_scalars to non-scalar fields as well.

650 which_oneof = message.WhichOneof(field.containing_oneof.name)	531 which_oneof = message.WhichOneof(field.containing_oneof.name)

651 if which_oneof is not None and which_oneof != field.name:	532 if which_oneof is not None and which_oneof != field.name:

652 raise tokenizer.ParseErrorPreviousToken(	533 raise tokenizer.ParseErrorPreviousToken(

653 'Field "%s" is specified along with field "%s", another member '	534 'Field "%s" is specified along with field "%s", another member '

654 'of oneof "%s" for message type "%s".' %	535 'of oneof "%s" for message type "%s".' % (

655 (field.name, which_oneof, field.containing_oneof.name,	536 field.name, which_oneof, field.containing_oneof.name,

656 message_descriptor.full_name))	537 message_descriptor.full_name))

657	538

658 if field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_MESSAGE:	539 if field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_MESSAGE:

659 tokenizer.TryConsume(':')	540 tokenizer.TryConsume(':')

660 merger = self._MergeMessageField	541 merger = self._MergeMessageField

661 else:	542 else:

662 tokenizer.Consume(':')	543 tokenizer.Consume(':')

663 merger = self._MergeScalarField	544 merger = self._MergeScalarField

664	545

665 if (field.label == descriptor.FieldDescriptor.LABEL_REPEATED and	546 if (field.label == descriptor.FieldDescriptor.LABEL_REPEATED

666 tokenizer.TryConsume('[')):	547 and tokenizer.TryConsume('[')):

667 # Short repeated format, e.g. "foo: [1, 2, 3]"	548 # Short repeated format, e.g. "foo: [1, 2, 3]"

668 while True:	549 while True:

669 merger(tokenizer, message, field)	550 merger(tokenizer, message, field)

670 if tokenizer.TryConsume(']'):	551 if tokenizer.TryConsume(']'): break

671 break

672 tokenizer.Consume(',')	552 tokenizer.Consume(',')

673	553

674 else:	554 else:

675 merger(tokenizer, message, field)	555 merger(tokenizer, message, field)

676	556

677 else: # Proto field is unknown.	557 else: # Proto field is unknown.

678 assert self.allow_unknown_extension	558 assert self.allow_unknown_extension

679 _SkipFieldContents(tokenizer)	559 _SkipFieldContents(tokenizer)

680	560

681 # For historical reasons, fields may optionally be separated by commas or	561 # For historical reasons, fields may optionally be separated by commas or

682 # semicolons.	562 # semicolons.

683 if not tokenizer.TryConsume(','):	563 if not tokenizer.TryConsume(','):

684 tokenizer.TryConsume(';')	564 tokenizer.TryConsume(';')

685	565

686 def _ConsumeAnyTypeUrl(self, tokenizer):

687 """Consumes a google.protobuf.Any type URL and returns the type name."""

688 # Consume "type.googleapis.com/".

689 tokenizer.ConsumeIdentifier()

690 tokenizer.Consume('.')

691 tokenizer.ConsumeIdentifier()

692 tokenizer.Consume('.')

693 tokenizer.ConsumeIdentifier()

694 tokenizer.Consume('/')

695 # Consume the fully-qualified type name.

696 name = [tokenizer.ConsumeIdentifier()]

697 while tokenizer.TryConsume('.'):

698 name.append(tokenizer.ConsumeIdentifier())

699 return '.'.join(name)

700

701 def _MergeMessageField(self, tokenizer, message, field):	566 def _MergeMessageField(self, tokenizer, message, field):

702 """Merges a single scalar field into a message.	567 """Merges a single scalar field into a message.

703	568

704 Args:	569 Args:

705 tokenizer: A tokenizer to parse the field value.	570 tokenizer: A tokenizer to parse the field value.

706 message: The message of which field is a member.	571 message: The message of which field is a member.

707 field: The descriptor of the field to be merged.	572 field: The descriptor of the field to be merged.

708	573

709 Raises:	574 Raises:

710 ParseError: In case of text parsing problems.	575 ParseError: In case of text parsing problems.

711 """	576 """

712 is_map_entry = _IsMapEntry(field)	577 is_map_entry = _IsMapEntry(field)

713	578

714 if tokenizer.TryConsume('<'):	579 if tokenizer.TryConsume('<'):

715 end_token = '>'	580 end_token = '>'

716 else:	581 else:

717 tokenizer.Consume('{')	582 tokenizer.Consume('{')

718 end_token = '}'	583 end_token = '}'

719	584

720 if (field.message_type.full_name == _ANY_FULL_TYPE_NAME and	585 if field.label == descriptor.FieldDescriptor.LABEL_REPEATED:

721 tokenizer.TryConsume('[')):

722 packed_type_name = self._ConsumeAnyTypeUrl(tokenizer)

723 tokenizer.Consume(']')

724 tokenizer.TryConsume(':')

725 if tokenizer.TryConsume('<'):

726 expanded_any_end_token = '>'

727 else:

728 tokenizer.Consume('{')

729 expanded_any_end_token = '}'

730 if not self.descriptor_pool:

731 raise ParseError('Descriptor pool required to parse expanded Any field')

732 expanded_any_sub_message = _BuildMessageFromTypeName(packed_type_name,

733 self.descriptor_pool)

734 if not expanded_any_sub_message:

735 raise ParseError('Type %s not found in descriptor pool' %

736 packed_type_name)

737 while not tokenizer.TryConsume(expanded_any_end_token):

738 if tokenizer.AtEnd():

739 raise tokenizer.ParseErrorPreviousToken('Expected "%s".' %

740 (expanded_any_end_token,))

741 self._MergeField(tokenizer, expanded_any_sub_message)

742 if field.label == descriptor.FieldDescriptor.LABEL_REPEATED:

743 any_message = getattr(message, field.name).add()

744 else:

745 any_message = getattr(message, field.name)

746 any_message.Pack(expanded_any_sub_message)

747 elif field.label == descriptor.FieldDescriptor.LABEL_REPEATED:

748 if field.is_extension:	586 if field.is_extension:

749 sub_message = message.Extensions[field].add()	587 sub_message = message.Extensions[field].add()

750 elif is_map_entry:	588 elif is_map_entry:

751 sub_message = getattr(message, field.name).GetEntryClass()()	589 # pylint: disable=protected-access

	590 sub_message = field.message_type._concrete_class()

752 else:	591 else:

753 sub_message = getattr(message, field.name).add()	592 sub_message = getattr(message, field.name).add()

754 else:	593 else:

755 if field.is_extension:	594 if field.is_extension:

756 sub_message = message.Extensions[field]	595 sub_message = message.Extensions[field]

757 else:	596 else:

758 sub_message = getattr(message, field.name)	597 sub_message = getattr(message, field.name)

759 sub_message.SetInParent()	598 sub_message.SetInParent()

760	599

761 while not tokenizer.TryConsume(end_token):	600 while not tokenizer.TryConsume(end_token):

(...skipping 20 matching lines...) Expand all Loading...
782 Raises:	621 Raises:

783 ParseError: In case of text parsing problems.	622 ParseError: In case of text parsing problems.

784 RuntimeError: On runtime errors.	623 RuntimeError: On runtime errors.

785 """	624 """

786 _ = self.allow_unknown_extension	625 _ = self.allow_unknown_extension

787 value = None	626 value = None

788	627

789 if field.type in (descriptor.FieldDescriptor.TYPE_INT32,	628 if field.type in (descriptor.FieldDescriptor.TYPE_INT32,

790 descriptor.FieldDescriptor.TYPE_SINT32,	629 descriptor.FieldDescriptor.TYPE_SINT32,

791 descriptor.FieldDescriptor.TYPE_SFIXED32):	630 descriptor.FieldDescriptor.TYPE_SFIXED32):

792 value = _ConsumeInt32(tokenizer)	631 value = tokenizer.ConsumeInt32()

793 elif field.type in (descriptor.FieldDescriptor.TYPE_INT64,	632 elif field.type in (descriptor.FieldDescriptor.TYPE_INT64,

794 descriptor.FieldDescriptor.TYPE_SINT64,	633 descriptor.FieldDescriptor.TYPE_SINT64,

795 descriptor.FieldDescriptor.TYPE_SFIXED64):	634 descriptor.FieldDescriptor.TYPE_SFIXED64):

796 value = _ConsumeInt64(tokenizer)	635 value = tokenizer.ConsumeInt64()

797 elif field.type in (descriptor.FieldDescriptor.TYPE_UINT32,	636 elif field.type in (descriptor.FieldDescriptor.TYPE_UINT32,

798 descriptor.FieldDescriptor.TYPE_FIXED32):	637 descriptor.FieldDescriptor.TYPE_FIXED32):

799 value = _ConsumeUint32(tokenizer)	638 value = tokenizer.ConsumeUint32()

800 elif field.type in (descriptor.FieldDescriptor.TYPE_UINT64,	639 elif field.type in (descriptor.FieldDescriptor.TYPE_UINT64,

801 descriptor.FieldDescriptor.TYPE_FIXED64):	640 descriptor.FieldDescriptor.TYPE_FIXED64):

802 value = _ConsumeUint64(tokenizer)	641 value = tokenizer.ConsumeUint64()

803 elif field.type in (descriptor.FieldDescriptor.TYPE_FLOAT,	642 elif field.type in (descriptor.FieldDescriptor.TYPE_FLOAT,

804 descriptor.FieldDescriptor.TYPE_DOUBLE):	643 descriptor.FieldDescriptor.TYPE_DOUBLE):

805 value = tokenizer.ConsumeFloat()	644 value = tokenizer.ConsumeFloat()

806 elif field.type == descriptor.FieldDescriptor.TYPE_BOOL:	645 elif field.type == descriptor.FieldDescriptor.TYPE_BOOL:

807 value = tokenizer.ConsumeBool()	646 value = tokenizer.ConsumeBool()

808 elif field.type == descriptor.FieldDescriptor.TYPE_STRING:	647 elif field.type == descriptor.FieldDescriptor.TYPE_STRING:

809 value = tokenizer.ConsumeString()	648 value = tokenizer.ConsumeString()

810 elif field.type == descriptor.FieldDescriptor.TYPE_BYTES:	649 elif field.type == descriptor.FieldDescriptor.TYPE_BYTES:

811 value = tokenizer.ConsumeByteString()	650 value = tokenizer.ConsumeByteString()

812 elif field.type == descriptor.FieldDescriptor.TYPE_ENUM:	651 elif field.type == descriptor.FieldDescriptor.TYPE_ENUM:

(...skipping 94 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
907 ParseError: In case an invalid field value is found.	746 ParseError: In case an invalid field value is found.

908 """	747 """

909 # String/bytes tokens can come in multiple adjacent string literals.	748 # String/bytes tokens can come in multiple adjacent string literals.

910 # If we can consume one, consume as many as we can.	749 # If we can consume one, consume as many as we can.

911 if tokenizer.TryConsumeByteString():	750 if tokenizer.TryConsumeByteString():

912 while tokenizer.TryConsumeByteString():	751 while tokenizer.TryConsumeByteString():

913 pass	752 pass

914 return	753 return

915	754

916 if (not tokenizer.TryConsumeIdentifier() and	755 if (not tokenizer.TryConsumeIdentifier() and

917 not _TryConsumeInt64(tokenizer) and not _TryConsumeUint64(tokenizer) and	756 not tokenizer.TryConsumeInt64() and

	757 not tokenizer.TryConsumeUint64() and

918 not tokenizer.TryConsumeFloat()):	758 not tokenizer.TryConsumeFloat()):

919 raise ParseError('Invalid field value: ' + tokenizer.token)	759 raise ParseError('Invalid field value: ' + tokenizer.token)

920	760

921	761

922 class Tokenizer(object):	762 class _Tokenizer(object):

923 """Protocol buffer text representation tokenizer.	763 """Protocol buffer text representation tokenizer.

924	764

925 This class handles the lower level string parsing by splitting it into	765 This class handles the lower level string parsing by splitting it into

926 meaningful tokens.	766 meaningful tokens.

927	767

928 It was directly ported from the Java protocol buffer API.	768 It was directly ported from the Java protocol buffer API.

929 """	769 """

930	770

931 _WHITESPACE = re.compile(r'\s+')	771 _WHITESPACE = re.compile('(\\s\|(#.*$))+', re.MULTILINE)

932 _COMMENT = re.compile(r'(\s#.$)', re.MULTILINE)

933 _WHITESPACE_OR_COMMENT = re.compile(r'(\s\|(#.*$))+', re.MULTILINE)

934 _TOKEN = re.compile('\|'.join([	772 _TOKEN = re.compile('\|'.join([

935 r'[a-zA-Z_][0-9a-zA-Z_+-]*', # an identifier	773 r'[a-zA-Z_][0-9a-zA-Z_+-]*', # an identifier

936 r'([0-9+-]\|(\.[0-9]))[0-9a-zA-Z_.+-]*', # a number	774 r'([0-9+-]\|(\.[0-9]))[0-9a-zA-Z_.+-]*', # a number

937 ] + [ # quoted str for each quote mark	775 ] + [ # quoted str for each quote mark

938 r'{qt}([^{qt}\n\\]\|\\.)*({qt}\|\\?$)'.format(qt=mark) for mark in _QUOTES	776 r'{qt}([^{qt}\n\\]\|\\.)*({qt}\|\\?$)'.format(qt=mark) for mark in _QUOTES

939 ]))	777 ]))

940	778

941 _IDENTIFIER = re.compile(r'[^\d\W]\w*')	779 _IDENTIFIER = re.compile(r'\w+')

942 _IDENTIFIER_OR_NUMBER = re.compile(r'\w+')

943	780

944 def __init__(self, lines, skip_comments=True):	781 def __init__(self, lines):

945 self._position = 0	782 self._position = 0

946 self._line = -1	783 self._line = -1

947 self._column = 0	784 self._column = 0

948 self._token_start = None	785 self._token_start = None

949 self.token = ''	786 self.token = ''

950 self._lines = iter(lines)	787 self._lines = iter(lines)

951 self._current_line = ''	788 self._current_line = ''

952 self._previous_line = 0	789 self._previous_line = 0

953 self._previous_column = 0	790 self._previous_column = 0

954 self._more_lines = True	791 self._more_lines = True

955 self._skip_comments = skip_comments

956 self._whitespace_pattern = (skip_comments and self._WHITESPACE_OR_COMMENT

957 or self._WHITESPACE)

958 self._SkipWhitespace()	792 self._SkipWhitespace()

959 self.NextToken()	793 self.NextToken()

960	794

961 def LookingAt(self, token):	795 def LookingAt(self, token):

962 return self.token == token	796 return self.token == token

963	797

964 def AtEnd(self):	798 def AtEnd(self):

965 """Checks the end of the text was reached.	799 """Checks the end of the text was reached.

966	800

967 Returns:	801 Returns:

968 True iff the end was reached.	802 True iff the end was reached.

969 """	803 """

970 return not self.token	804 return not self.token

971	805

972 def _PopLine(self):	806 def _PopLine(self):

973 while len(self._current_line) <= self._column:	807 while len(self._current_line) <= self._column:

974 try:	808 try:

975 self._current_line = next(self._lines)	809 self._current_line = next(self._lines)

976 except StopIteration:	810 except StopIteration:

977 self._current_line = ''	811 self._current_line = ''

978 self._more_lines = False	812 self._more_lines = False

979 return	813 return

980 else:	814 else:

981 self._line += 1	815 self._line += 1

982 self._column = 0	816 self._column = 0

983	817

984 def _SkipWhitespace(self):	818 def _SkipWhitespace(self):

985 while True:	819 while True:

986 self._PopLine()	820 self._PopLine()

987 match = self._whitespace_pattern.match(self._current_line, self._column)	821 match = self._WHITESPACE.match(self._current_line, self._column)

988 if not match:	822 if not match:

989 break	823 break

990 length = len(match.group(0))	824 length = len(match.group(0))

991 self._column += length	825 self._column += length

992	826

993 def TryConsume(self, token):	827 def TryConsume(self, token):

994 """Tries to consume a given piece of text.	828 """Tries to consume a given piece of text.

995	829

996 Args:	830 Args:

997 token: Text to consume.	831 token: Text to consume.

998	832

999 Returns:	833 Returns:

1000 True iff the text was consumed.	834 True iff the text was consumed.

1001 """	835 """

1002 if self.token == token:	836 if self.token == token:

1003 self.NextToken()	837 self.NextToken()

1004 return True	838 return True

1005 return False	839 return False

1006	840

1007 def Consume(self, token):	841 def Consume(self, token):

1008 """Consumes a piece of text.	842 """Consumes a piece of text.

1009	843

1010 Args:	844 Args:

1011 token: Text to consume.	845 token: Text to consume.

1012	846

1013 Raises:	847 Raises:

1014 ParseError: If the text couldn't be consumed.	848 ParseError: If the text couldn't be consumed.

1015 """	849 """

1016 if not self.TryConsume(token):	850 if not self.TryConsume(token):

1017 raise self.ParseError('Expected "%s".' % token)	851 raise self._ParseError('Expected "%s".' % token)

1018

1019 def ConsumeComment(self):

1020 result = self.token

1021 if not self._COMMENT.match(result):

1022 raise self.ParseError('Expected comment.')

1023 self.NextToken()

1024 return result

1025	852

1026 def TryConsumeIdentifier(self):	853 def TryConsumeIdentifier(self):

1027 try:	854 try:

1028 self.ConsumeIdentifier()	855 self.ConsumeIdentifier()

1029 return True	856 return True

1030 except ParseError:	857 except ParseError:

1031 return False	858 return False

1032	859

1033 def ConsumeIdentifier(self):	860 def ConsumeIdentifier(self):

1034 """Consumes protocol message field identifier.	861 """Consumes protocol message field identifier.

1035	862

1036 Returns:	863 Returns:

1037 Identifier string.	864 Identifier string.

1038	865

1039 Raises:	866 Raises:

1040 ParseError: If an identifier couldn't be consumed.	867 ParseError: If an identifier couldn't be consumed.

1041 """	868 """

1042 result = self.token	869 result = self.token

1043 if not self._IDENTIFIER.match(result):	870 if not self._IDENTIFIER.match(result):

1044 raise self.ParseError('Expected identifier.')	871 raise self._ParseError('Expected identifier.')

1045 self.NextToken()	872 self.NextToken()

1046 return result	873 return result

1047	874

1048 def TryConsumeIdentifierOrNumber(self):	875 def ConsumeInt32(self):

	876 """Consumes a signed 32bit integer number.

	877

	878 Returns:

	879 The integer parsed.

	880

	881 Raises:

	882 ParseError: If a signed 32bit integer couldn't be consumed.

	883 """

1049 try:	884 try:

1050 self.ConsumeIdentifierOrNumber()	885 result = ParseInteger(self.token, is_signed=True, is_long=False)

	886 except ValueError as e:

	887 raise self._ParseError(str(e))

	888 self.NextToken()

	889 return result

	890

	891 def ConsumeUint32(self):

	892 """Consumes an unsigned 32bit integer number.

	893

	894 Returns:

	895 The integer parsed.

	896

	897 Raises:

	898 ParseError: If an unsigned 32bit integer couldn't be consumed.

	899 """

	900 try:

	901 result = ParseInteger(self.token, is_signed=False, is_long=False)

	902 except ValueError as e:

	903 raise self._ParseError(str(e))

	904 self.NextToken()

	905 return result

	906

	907 def TryConsumeInt64(self):

	908 try:

	909 self.ConsumeInt64()

1051 return True	910 return True

1052 except ParseError:	911 except ParseError:

1053 return False	912 return False

1054	913

1055 def ConsumeIdentifierOrNumber(self):	914 def ConsumeInt64(self):

1056 """Consumes protocol message field identifier.	915 """Consumes a signed 64bit integer number.

1057	916

1058 Returns:	917 Returns:

1059 Identifier string.	918 The integer parsed.

1060	919

1061 Raises:	920 Raises:

1062 ParseError: If an identifier couldn't be consumed.	921 ParseError: If a signed 64bit integer couldn't be consumed.

1063 """	922 """

1064 result = self.token	923 try:

1065 if not self._IDENTIFIER_OR_NUMBER.match(result):	924 result = ParseInteger(self.token, is_signed=True, is_long=True)

1066 raise self.ParseError('Expected identifier or number.')	925 except ValueError as e:

	926 raise self._ParseError(str(e))

1067 self.NextToken()	927 self.NextToken()

1068 return result	928 return result

1069	929

1070 def TryConsumeInteger(self):	930 def TryConsumeUint64(self):

1071 try:	931 try:

1072 # Note: is_long only affects value type, not whether an error is raised.	932 self.ConsumeUint64()

1073 self.ConsumeInteger()

1074 return True	933 return True

1075 except ParseError:	934 except ParseError:

1076 return False	935 return False

1077	936

1078 def ConsumeInteger(self, is_long=False):	937 def ConsumeUint64(self):

1079 """Consumes an integer number.	938 """Consumes an unsigned 64bit integer number.

1080	939

1081 Args:

1082 is_long: True if the value should be returned as a long integer.

1083 Returns:	940 Returns:

1084 The integer parsed.	941 The integer parsed.

1085	942

1086 Raises:	943 Raises:

1087 ParseError: If an integer couldn't be consumed.	944 ParseError: If an unsigned 64bit integer couldn't be consumed.

1088 """	945 """

1089 try:	946 try:

1090 result = _ParseAbstractInteger(self.token, is_long=is_long)	947 result = ParseInteger(self.token, is_signed=False, is_long=True)

1091 except ValueError as e:	948 except ValueError as e:

1092 raise self.ParseError(str(e))	949 raise self._ParseError(str(e))

1093 self.NextToken()	950 self.NextToken()

1094 return result	951 return result

1095	952

1096 def TryConsumeFloat(self):	953 def TryConsumeFloat(self):

1097 try:	954 try:

1098 self.ConsumeFloat()	955 self.ConsumeFloat()

1099 return True	956 return True

1100 except ParseError:	957 except ParseError:

1101 return False	958 return False

1102	959

1103 def ConsumeFloat(self):	960 def ConsumeFloat(self):

1104 """Consumes an floating point number.	961 """Consumes an floating point number.

1105	962

1106 Returns:	963 Returns:

1107 The number parsed.	964 The number parsed.

1108	965

1109 Raises:	966 Raises:

1110 ParseError: If a floating point number couldn't be consumed.	967 ParseError: If a floating point number couldn't be consumed.

1111 """	968 """

1112 try:	969 try:

1113 result = ParseFloat(self.token)	970 result = ParseFloat(self.token)

1114 except ValueError as e:	971 except ValueError as e:

1115 raise self.ParseError(str(e))	972 raise self._ParseError(str(e))

1116 self.NextToken()	973 self.NextToken()

1117 return result	974 return result

1118	975

1119 def ConsumeBool(self):	976 def ConsumeBool(self):

1120 """Consumes a boolean value.	977 """Consumes a boolean value.

1121	978

1122 Returns:	979 Returns:

1123 The bool parsed.	980 The bool parsed.

1124	981

1125 Raises:	982 Raises:

1126 ParseError: If a boolean value couldn't be consumed.	983 ParseError: If a boolean value couldn't be consumed.

1127 """	984 """

1128 try:	985 try:

1129 result = ParseBool(self.token)	986 result = ParseBool(self.token)

1130 except ValueError as e:	987 except ValueError as e:

1131 raise self.ParseError(str(e))	988 raise self._ParseError(str(e))

1132 self.NextToken()	989 self.NextToken()

1133 return result	990 return result

1134	991

1135 def TryConsumeByteString(self):	992 def TryConsumeByteString(self):

1136 try:	993 try:

1137 self.ConsumeByteString()	994 self.ConsumeByteString()

1138 return True	995 return True

1139 except ParseError:	996 except ParseError:

1140 return False	997 return False

1141	998

(...skipping 33 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1175 tokens which are automatically concatenated, like in C or Python. This	1032 tokens which are automatically concatenated, like in C or Python. This

1176 method only consumes one token.	1033 method only consumes one token.

1177	1034

1178 Returns:	1035 Returns:

1179 The token parsed.	1036 The token parsed.

1180 Raises:	1037 Raises:

1181 ParseError: When the wrong format data is found.	1038 ParseError: When the wrong format data is found.

1182 """	1039 """

1183 text = self.token	1040 text = self.token

1184 if len(text) < 1 or text[0] not in _QUOTES:	1041 if len(text) < 1 or text[0] not in _QUOTES:

1185 raise self.ParseError('Expected string but found: %r' % (text,))	1042 raise self._ParseError('Expected string but found: %r' % (text,))

1186	1043

1187 if len(text) < 2 or text[-1] != text[0]:	1044 if len(text) < 2 or text[-1] != text[0]:

1188 raise self.ParseError('String missing ending quote: %r' % (text,))	1045 raise self._ParseError('String missing ending quote: %r' % (text,))

1189	1046

1190 try:	1047 try:

1191 result = text_encoding.CUnescape(text[1:-1])	1048 result = text_encoding.CUnescape(text[1:-1])

1192 except ValueError as e:	1049 except ValueError as e:

1193 raise self.ParseError(str(e))	1050 raise self._ParseError(str(e))

1194 self.NextToken()	1051 self.NextToken()

1195 return result	1052 return result

1196	1053

1197 def ConsumeEnum(self, field):	1054 def ConsumeEnum(self, field):

1198 try:	1055 try:

1199 result = ParseEnum(field, self.token)	1056 result = ParseEnum(field, self.token)

1200 except ValueError as e:	1057 except ValueError as e:

1201 raise self.ParseError(str(e))	1058 raise self._ParseError(str(e))

1202 self.NextToken()	1059 self.NextToken()

1203 return result	1060 return result

1204	1061

1205 def ParseErrorPreviousToken(self, message):	1062 def ParseErrorPreviousToken(self, message):

1206 """Creates and returns a ParseError for the previously read token.	1063 """Creates and returns a ParseError for the previously read token.

1207	1064

1208 Args:	1065 Args:

1209 message: A message to set for the exception.	1066 message: A message to set for the exception.

1210	1067

1211 Returns:	1068 Returns:

1212 A ParseError instance.	1069 A ParseError instance.

1213 """	1070 """

1214 return ParseError(message, self._previous_line + 1,	1071 return ParseError('%d:%d : %s' % (

1215 self._previous_column + 1)	1072 self._previous_line + 1, self._previous_column + 1, message))

1216	1073

1217 def ParseError(self, message):	1074 def _ParseError(self, message):

1218 """Creates and returns a ParseError for the current token."""	1075 """Creates and returns a ParseError for the current token."""

1219 return ParseError(message, self._line + 1, self._column + 1)	1076 return ParseError('%d:%d : %s' % (

	1077 self._line + 1, self._column + 1, message))

1220	1078

1221 def _StringParseError(self, e):	1079 def _StringParseError(self, e):

1222 return self.ParseError('Couldn\'t parse string: ' + str(e))	1080 return self._ParseError('Couldn\'t parse string: ' + str(e))

1223	1081

1224 def NextToken(self):	1082 def NextToken(self):

1225 """Reads the next meaningful token."""	1083 """Reads the next meaningful token."""

1226 self._previous_line = self._line	1084 self._previous_line = self._line

1227 self._previous_column = self._column	1085 self._previous_column = self._column

1228	1086

1229 self._column += len(self.token)	1087 self._column += len(self.token)

1230 self._SkipWhitespace()	1088 self._SkipWhitespace()

1231	1089

1232 if not self._more_lines:	1090 if not self._more_lines:

1233 self.token = ''	1091 self.token = ''

1234 return	1092 return

1235	1093

1236 match = self._TOKEN.match(self._current_line, self._column)	1094 match = self._TOKEN.match(self._current_line, self._column)

1237 if not match and not self._skip_comments:

1238 match = self._COMMENT.match(self._current_line, self._column)

1239 if match:	1095 if match:

1240 token = match.group(0)	1096 token = match.group(0)

1241 self.token = token	1097 self.token = token

1242 else:	1098 else:

1243 self.token = self._current_line[self._column]	1099 self.token = self._current_line[self._column]

1244	1100

1245 # Aliased so it can still be accessed by current visibility violators.

1246 # TODO(dbarnett): Migrate violators to textformat_tokenizer.

1247 _Tokenizer = Tokenizer # pylint: disable=invalid-name

1248

1249

1250 def _ConsumeInt32(tokenizer):

1251 """Consumes a signed 32bit integer number from tokenizer.

1252

1253 Args:

1254 tokenizer: A tokenizer used to parse the number.

1255

1256 Returns:

1257 The integer parsed.

1258

1259 Raises:

1260 ParseError: If a signed 32bit integer couldn't be consumed.

1261 """

1262 return _ConsumeInteger(tokenizer, is_signed=True, is_long=False)

1263

1264

1265 def _ConsumeUint32(tokenizer):

1266 """Consumes an unsigned 32bit integer number from tokenizer.

1267

1268 Args:

1269 tokenizer: A tokenizer used to parse the number.

1270

1271 Returns:

1272 The integer parsed.

1273

1274 Raises:

1275 ParseError: If an unsigned 32bit integer couldn't be consumed.

1276 """

1277 return _ConsumeInteger(tokenizer, is_signed=False, is_long=False)

1278

1279

1280 def _TryConsumeInt64(tokenizer):

1281 try:

1282 _ConsumeInt64(tokenizer)

1283 return True

1284 except ParseError:

1285 return False

1286

1287

1288 def _ConsumeInt64(tokenizer):

1289 """Consumes a signed 32bit integer number from tokenizer.

1290

1291 Args:

1292 tokenizer: A tokenizer used to parse the number.

1293

1294 Returns:

1295 The integer parsed.

1296

1297 Raises:

1298 ParseError: If a signed 32bit integer couldn't be consumed.

1299 """

1300 return _ConsumeInteger(tokenizer, is_signed=True, is_long=True)

1301

1302

1303 def _TryConsumeUint64(tokenizer):

1304 try:

1305 _ConsumeUint64(tokenizer)

1306 return True

1307 except ParseError:

1308 return False

1309

1310

1311 def _ConsumeUint64(tokenizer):

1312 """Consumes an unsigned 64bit integer number from tokenizer.

1313

1314 Args:

1315 tokenizer: A tokenizer used to parse the number.

1316

1317 Returns:

1318 The integer parsed.

1319

1320 Raises:

1321 ParseError: If an unsigned 64bit integer couldn't be consumed.

1322 """

1323 return _ConsumeInteger(tokenizer, is_signed=False, is_long=True)

1324

1325

1326 def _TryConsumeInteger(tokenizer, is_signed=False, is_long=False):

1327 try:

1328 _ConsumeInteger(tokenizer, is_signed=is_signed, is_long=is_long)

1329 return True

1330 except ParseError:

1331 return False

1332

1333

1334 def _ConsumeInteger(tokenizer, is_signed=False, is_long=False):

1335 """Consumes an integer number from tokenizer.

1336

1337 Args:

1338 tokenizer: A tokenizer used to parse the number.

1339 is_signed: True if a signed integer must be parsed.

1340 is_long: True if a long integer must be parsed.

1341

1342 Returns:

1343 The integer parsed.

1344

1345 Raises:

1346 ParseError: If an integer with given characteristics couldn't be consumed.

1347 """

1348 try:

1349 result = ParseInteger(tokenizer.token, is_signed=is_signed, is_long=is_long)

1350 except ValueError as e:

1351 raise tokenizer.ParseError(str(e))

1352 tokenizer.NextToken()

1353 return result

1354

1355	1101

1356 def ParseInteger(text, is_signed=False, is_long=False):	1102 def ParseInteger(text, is_signed=False, is_long=False):

1357 """Parses an integer.	1103 """Parses an integer.

1358	1104

1359 Args:	1105 Args:

1360 text: The text to parse.	1106 text: The text to parse.

1361 is_signed: True if a signed integer must be parsed.	1107 is_signed: True if a signed integer must be parsed.

1362 is_long: True if a long integer must be parsed.	1108 is_long: True if a long integer must be parsed.

1363	1109

1364 Returns:	1110 Returns:

1365 The integer value.	1111 The integer value.

1366	1112

1367 Raises:	1113 Raises:

1368 ValueError: Thrown Iff the text is not a valid integer.	1114 ValueError: Thrown Iff the text is not a valid integer.

1369 """	1115 """

1370 # Do the actual parsing. Exception handling is propagated to caller.	1116 # Do the actual parsing. Exception handling is propagated to caller.

1371 result = _ParseAbstractInteger(text, is_long=is_long)

1372

1373 # Check if the integer is sane. Exceptions handled by callers.

1374 checker = _INTEGER_CHECKERS[2 * int(is_long) + int(is_signed)]

1375 checker.CheckValue(result)

1376 return result

1377

1378

1379 def _ParseAbstractInteger(text, is_long=False):

1380 """Parses an integer without checking size/signedness.

1381

1382 Args:

1383 text: The text to parse.

1384 is_long: True if the value should be returned as a long integer.

1385

1386 Returns:

1387 The integer value.

1388

1389 Raises:

1390 ValueError: Thrown Iff the text is not a valid integer.

1391 """

1392 # Do the actual parsing. Exception handling is propagated to caller.

1393 try:	1117 try:

1394 # We force 32-bit values to int and 64-bit values to long to make	1118 # We force 32-bit values to int and 64-bit values to long to make

1395 # alternate implementations where the distinction is more significant	1119 # alternate implementations where the distinction is more significant

1396 # (e.g. the C++ implementation) simpler.	1120 # (e.g. the C++ implementation) simpler.

1397 if is_long:	1121 if is_long:

1398 return long(text, 0)	1122 result = long(text, 0)

1399 else:	1123 else:

1400 return int(text, 0)	1124 result = int(text, 0)

1401 except ValueError:	1125 except ValueError:

1402 raise ValueError('Couldn\'t parse integer: %s' % text)	1126 raise ValueError('Couldn\'t parse integer: %s' % text)

1403	1127

	1128 # Check if the integer is sane. Exceptions handled by callers.

	1129 checker = _INTEGER_CHECKERS[2 * int(is_long) + int(is_signed)]

	1130 checker.CheckValue(result)

	1131 return result

	1132

1404	1133

1405 def ParseFloat(text):	1134 def ParseFloat(text):

1406 """Parse a floating point number.	1135 """Parse a floating point number.

1407	1136

1408 Args:	1137 Args:

1409 text: Text to parse.	1138 text: Text to parse.

1410	1139

1411 Returns:	1140 Returns:

1412 The number parsed.	1141 The number parsed.

1413	1142

(...skipping 25 matching lines...) Expand all Loading...
1439	1168

1440 Args:	1169 Args:

1441 text: Text to parse.	1170 text: Text to parse.

1442	1171

1443 Returns:	1172 Returns:

1444 Boolean values parsed	1173 Boolean values parsed

1445	1174

1446 Raises:	1175 Raises:

1447 ValueError: If text is not a valid boolean.	1176 ValueError: If text is not a valid boolean.

1448 """	1177 """

1449 if text in ('true', 't', '1', 'True'):	1178 if text in ('true', 't', '1'):

1450 return True	1179 return True

1451 elif text in ('false', 'f', '0', 'False'):	1180 elif text in ('false', 'f', '0'):

1452 return False	1181 return False

1453 else:	1182 else:

1454 raise ValueError('Expected "true" or "false".')	1183 raise ValueError('Expected "true" or "false".')

1455	1184

1456	1185

1457 def ParseEnum(field, value):	1186 def ParseEnum(field, value):

1458 """Parse an enum value.	1187 """Parse an enum value.

1459	1188

1460 The value can be specified by a number (the enum value), or by	1189 The value can be specified by a number (the enum value), or by

1461 a string literal (the enum name).	1190 a string literal (the enum name).

1462	1191

1463 Args:	1192 Args:

1464 field: Enum field descriptor.	1193 field: Enum field descriptor.

1465 value: String value.	1194 value: String value.

1466	1195

1467 Returns:	1196 Returns:

1468 Enum value number.	1197 Enum value number.

1469	1198

1470 Raises:	1199 Raises:

1471 ValueError: If the enum value could not be parsed.	1200 ValueError: If the enum value could not be parsed.

1472 """	1201 """

1473 enum_descriptor = field.enum_type	1202 enum_descriptor = field.enum_type

1474 try:	1203 try:

1475 number = int(value, 0)	1204 number = int(value, 0)

1476 except ValueError:	1205 except ValueError:

1477 # Identifier.	1206 # Identifier.

1478 enum_value = enum_descriptor.values_by_name.get(value, None)	1207 enum_value = enum_descriptor.values_by_name.get(value, None)

1479 if enum_value is None:	1208 if enum_value is None:

1480 raise ValueError('Enum type "%s" has no value named %s.' %	1209 raise ValueError(

1481 (enum_descriptor.full_name, value))	1210 'Enum type "%s" has no value named %s.' % (

	1211 enum_descriptor.full_name, value))

1482 else:	1212 else:

1483 # Numeric value.	1213 # Numeric value.

1484 enum_value = enum_descriptor.values_by_number.get(number, None)	1214 enum_value = enum_descriptor.values_by_number.get(number, None)

1485 if enum_value is None:	1215 if enum_value is None:

1486 raise ValueError('Enum type "%s" has no value with number %d.' %	1216 raise ValueError(

1487 (enum_descriptor.full_name, number))	1217 'Enum type "%s" has no value with number %d.' % (

	1218 enum_descriptor.full_name, number))

1488 return enum_value.number	1219 return enum_value.number

OLD	NEW

« no previous file with comments | « third_party/protobuf/python/google/protobuf/symbol_database.py ('k') | third_party/protobuf/python/setup.cfg » ('j') | no next file with comments »