third_party/google/protobuf/text_format.py - Issue 1162993005: Renamed google.protobuf to protobuf26

Side by Side Diff: third_party/google/protobuf/text_format.py

Issue 1162993005: Renamed google.protobuf to protobuf26 (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/tools/depot_tools

Patch Set: Addressed comments Created 5 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
	(Empty)
1 # Protocol Buffers - Google's data interchange format

2 # Copyright 2008 Google Inc. All rights reserved.

3 # http://code.google.com/p/protobuf/

4 #

5 # Redistribution and use in source and binary forms, with or without

6 # modification, are permitted provided that the following conditions are

7 # met:

8 #

9 # * Redistributions of source code must retain the above copyright

10 # notice, this list of conditions and the following disclaimer.

11 # * Redistributions in binary form must reproduce the above

12 # copyright notice, this list of conditions and the following disclaimer

13 # in the documentation and/or other materials provided with the

14 # distribution.

15 # * Neither the name of Google Inc. nor the names of its

16 # contributors may be used to endorse or promote products derived from

17 # this software without specific prior written permission.

18 #

19 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS

20 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT

21 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR

22 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT

23 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,

24 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT

25 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,

26 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY

27 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT

28 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE

29 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

30

31 #PY25 compatible for GAE.

32 #

33 # Copyright 2007 Google Inc. All Rights Reserved.

34

35 """Contains routines for printing protocol messages in text format."""

36

37 __author__ = 'kenton@google.com (Kenton Varda)'

38

39 import cStringIO

40 import re

41

42 from google.protobuf.internal import type_checkers

43 from google.protobuf import descriptor

44 from google.protobuf import text_encoding

45

46 __all__ = ['MessageToString', 'PrintMessage', 'PrintField',

47 'PrintFieldValue', 'Merge']

48

49

50 _INTEGER_CHECKERS = (type_checkers.Uint32ValueChecker(),

51 type_checkers.Int32ValueChecker(),

52 type_checkers.Uint64ValueChecker(),

53 type_checkers.Int64ValueChecker())

54 _FLOAT_INFINITY = re.compile('-?inf(?:inity)?f?', re.IGNORECASE)

55 _FLOAT_NAN = re.compile('nanf?', re.IGNORECASE)

56 _FLOAT_TYPES = frozenset([descriptor.FieldDescriptor.CPPTYPE_FLOAT,

57 descriptor.FieldDescriptor.CPPTYPE_DOUBLE])

58

59

60 class Error(Exception):

61 """Top-level module error for text_format."""

62

63

64 class ParseError(Error):

65 """Thrown in case of ASCII parsing error."""

66

67

68 def MessageToString(message, as_utf8=False, as_one_line=False,

69 pointy_brackets=False, use_index_order=False,

70 float_format=None):

71 """Convert protobuf message to text format.

72

73 Floating point values can be formatted compactly with 15 digits of

74 precision (which is the most that IEEE 754 "double" can guarantee)

75 using float_format='.15g'.

76

77 Args:

78 message: The protocol buffers message.

79 as_utf8: Produce text output in UTF8 format.

80 as_one_line: Don't introduce newlines between fields.

81 pointy_brackets: If True, use angle brackets instead of curly braces for

82 nesting.

83 use_index_order: If True, print fields of a proto message using the order

84 defined in source code instead of the field number. By default, use the

85 field number order.

86 float_format: If set, use this to specify floating point number formatting

87 (per the "Format Specification Mini-Language"); otherwise, str() is used.

88

89 Returns:

90 A string of the text formatted protocol buffer message.

91 """

92 out = cStringIO.StringIO()

93 PrintMessage(message, out, as_utf8=as_utf8, as_one_line=as_one_line,

94 pointy_brackets=pointy_brackets,

95 use_index_order=use_index_order,

96 float_format=float_format)

97 result = out.getvalue()

98 out.close()

99 if as_one_line:

100 return result.rstrip()

101 return result

102

103

104 def PrintMessage(message, out, indent=0, as_utf8=False, as_one_line=False,

105 pointy_brackets=False, use_index_order=False,

106 float_format=None):

107 fields = message.ListFields()

108 if use_index_order:

109 fields.sort(key=lambda x: x[0].index)

110 for field, value in fields:

111 if field.label == descriptor.FieldDescriptor.LABEL_REPEATED:

112 for element in value:

113 PrintField(field, element, out, indent, as_utf8, as_one_line,

114 pointy_brackets=pointy_brackets,

115 float_format=float_format)

116 else:

117 PrintField(field, value, out, indent, as_utf8, as_one_line,

118 pointy_brackets=pointy_brackets,

119 float_format=float_format)

120

121

122 def PrintField(field, value, out, indent=0, as_utf8=False, as_one_line=False,

123 pointy_brackets=False, float_format=None):

124 """Print a single field name/value pair. For repeated fields, the value

125 should be a single element."""

126

127 out.write(' ' * indent)

128 if field.is_extension:

129 out.write('[')

130 if (field.containing_type.GetOptions().message_set_wire_format and

131 field.type == descriptor.FieldDescriptor.TYPE_MESSAGE and

132 field.message_type == field.extension_scope and

133 field.label == descriptor.FieldDescriptor.LABEL_OPTIONAL):

134 out.write(field.message_type.full_name)

135 else:

136 out.write(field.full_name)

137 out.write(']')

138 elif field.type == descriptor.FieldDescriptor.TYPE_GROUP:

139 # For groups, use the capitalized name.

140 out.write(field.message_type.name)

141 else:

142 out.write(field.name)

143

144 if field.cpp_type != descriptor.FieldDescriptor.CPPTYPE_MESSAGE:

145 # The colon is optional in this case, but our cross-language golden files

146 # don't include it.

147 out.write(': ')

148

149 PrintFieldValue(field, value, out, indent, as_utf8, as_one_line,

150 pointy_brackets=pointy_brackets,

151 float_format=float_format)

152 if as_one_line:

153 out.write(' ')

154 else:

155 out.write('\n')

156

157

158 def PrintFieldValue(field, value, out, indent=0, as_utf8=False,

159 as_one_line=False, pointy_brackets=False,

160 float_format=None):

161 """Print a single field value (not including name). For repeated fields,

162 the value should be a single element."""

163

164 if pointy_brackets:

165 openb = '<'

166 closeb = '>'

167 else:

168 openb = '{'

169 closeb = '}'

170

171 if field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_MESSAGE:

172 if as_one_line:

173 out.write(' %s ' % openb)

174 PrintMessage(value, out, indent, as_utf8, as_one_line,

175 pointy_brackets=pointy_brackets,

176 float_format=float_format)

177 out.write(closeb)

178 else:

179 out.write(' %s\n' % openb)

180 PrintMessage(value, out, indent + 2, as_utf8, as_one_line,

181 pointy_brackets=pointy_brackets,

182 float_format=float_format)

183 out.write(' ' * indent + closeb)

184 elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_ENUM:

185 enum_value = field.enum_type.values_by_number.get(value, None)

186 if enum_value is not None:

187 out.write(enum_value.name)

188 else:

189 out.write(str(value))

190 elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_STRING:

191 out.write('\"')

192 if isinstance(value, unicode):

193 out_value = value.encode('utf-8')

194 else:

195 out_value = value

196 if field.type == descriptor.FieldDescriptor.TYPE_BYTES:

197 # We need to escape non-UTF8 chars in TYPE_BYTES field.

198 out_as_utf8 = False

199 else:

200 out_as_utf8 = as_utf8

201 out.write(text_encoding.CEscape(out_value, out_as_utf8))

202 out.write('\"')

203 elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_BOOL:

204 if value:

205 out.write('true')

206 else:

207 out.write('false')

208 elif field.cpp_type in _FLOAT_TYPES and float_format is not None:

209 out.write('{1:{0}}'.format(float_format, value))

210 else:

211 out.write(str(value))

212

213

214 def _ParseOrMerge(lines, message, allow_multiple_scalars):

215 """Converts an ASCII representation of a protocol message into a message.

216

217 Args:

218 lines: Lines of a message's ASCII representation.

219 message: A protocol buffer message to merge into.

220 allow_multiple_scalars: Determines if repeated values for a non-repeated

221 field are permitted, e.g., the string "foo: 1 foo: 2" for a

222 required/optional field named "foo".

223

224 Raises:

225 ParseError: On ASCII parsing problems.

226 """

227 tokenizer = _Tokenizer(lines)

228 while not tokenizer.AtEnd():

229 _MergeField(tokenizer, message, allow_multiple_scalars)

230

231

232 def Parse(text, message):

233 """Parses an ASCII representation of a protocol message into a message.

234

235 Args:

236 text: Message ASCII representation.

237 message: A protocol buffer message to merge into.

238

239 Returns:

240 The same message passed as argument.

241

242 Raises:

243 ParseError: On ASCII parsing problems.

244 """

245 if not isinstance(text, str): text = text.decode('utf-8')

246 return ParseLines(text.split('\n'), message)

247

248

249 def Merge(text, message):

250 """Parses an ASCII representation of a protocol message into a message.

251

252 Like Parse(), but allows repeated values for a non-repeated field, and uses

253 the last one.

254

255 Args:

256 text: Message ASCII representation.

257 message: A protocol buffer message to merge into.

258

259 Returns:

260 The same message passed as argument.

261

262 Raises:

263 ParseError: On ASCII parsing problems.

264 """

265 return MergeLines(text.split('\n'), message)

266

267

268 def ParseLines(lines, message):

269 """Parses an ASCII representation of a protocol message into a message.

270

271 Args:

272 lines: An iterable of lines of a message's ASCII representation.

273 message: A protocol buffer message to merge into.

274

275 Returns:

276 The same message passed as argument.

277

278 Raises:

279 ParseError: On ASCII parsing problems.

280 """

281 _ParseOrMerge(lines, message, False)

282 return message

283

284

285 def MergeLines(lines, message):

286 """Parses an ASCII representation of a protocol message into a message.

287

288 Args:

289 lines: An iterable of lines of a message's ASCII representation.

290 message: A protocol buffer message to merge into.

291

292 Returns:

293 The same message passed as argument.

294

295 Raises:

296 ParseError: On ASCII parsing problems.

297 """

298 _ParseOrMerge(lines, message, True)

299 return message

300

301

302 def _MergeField(tokenizer, message, allow_multiple_scalars):

303 """Merges a single protocol message field into a message.

304

305 Args:

306 tokenizer: A tokenizer to parse the field name and values.

307 message: A protocol message to record the data.

308 allow_multiple_scalars: Determines if repeated values for a non-repeated

309 field are permitted, e.g., the string "foo: 1 foo: 2" for a

310 required/optional field named "foo".

311

312 Raises:

313 ParseError: In case of ASCII parsing problems.

314 """

315 message_descriptor = message.DESCRIPTOR

316 if tokenizer.TryConsume('['):

317 name = [tokenizer.ConsumeIdentifier()]

318 while tokenizer.TryConsume('.'):

319 name.append(tokenizer.ConsumeIdentifier())

320 name = '.'.join(name)

321

322 if not message_descriptor.is_extendable:

323 raise tokenizer.ParseErrorPreviousToken(

324 'Message type "%s" does not have extensions.' %

325 message_descriptor.full_name)

326 # pylint: disable=protected-access

327 field = message.Extensions._FindExtensionByName(name)

328 # pylint: enable=protected-access

329 if not field:

330 raise tokenizer.ParseErrorPreviousToken(

331 'Extension "%s" not registered.' % name)

332 elif message_descriptor != field.containing_type:

333 raise tokenizer.ParseErrorPreviousToken(

334 'Extension "%s" does not extend message type "%s".' % (

335 name, message_descriptor.full_name))

336 tokenizer.Consume(']')

337 else:

338 name = tokenizer.ConsumeIdentifier()

339 field = message_descriptor.fields_by_name.get(name, None)

340

341 # Group names are expected to be capitalized as they appear in the

342 # .proto file, which actually matches their type names, not their field

343 # names.

344 if not field:

345 field = message_descriptor.fields_by_name.get(name.lower(), None)

346 if field and field.type != descriptor.FieldDescriptor.TYPE_GROUP:

347 field = None

348

349 if (field and field.type == descriptor.FieldDescriptor.TYPE_GROUP and

350 field.message_type.name != name):

351 field = None

352

353 if not field:

354 raise tokenizer.ParseErrorPreviousToken(

355 'Message type "%s" has no field named "%s".' % (

356 message_descriptor.full_name, name))

357

358 if field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_MESSAGE:

359 tokenizer.TryConsume(':')

360

361 if tokenizer.TryConsume('<'):

362 end_token = '>'

363 else:

364 tokenizer.Consume('{')

365 end_token = '}'

366

367 if field.label == descriptor.FieldDescriptor.LABEL_REPEATED:

368 if field.is_extension:

369 sub_message = message.Extensions[field].add()

370 else:

371 sub_message = getattr(message, field.name).add()

372 else:

373 if field.is_extension:

374 sub_message = message.Extensions[field]

375 else:

376 sub_message = getattr(message, field.name)

377 sub_message.SetInParent()

378

379 while not tokenizer.TryConsume(end_token):

380 if tokenizer.AtEnd():

381 raise tokenizer.ParseErrorPreviousToken('Expected "%s".' % (end_token))

382 _MergeField(tokenizer, sub_message, allow_multiple_scalars)

383 else:

384 _MergeScalarField(tokenizer, message, field, allow_multiple_scalars)

385

386 # For historical reasons, fields may optionally be separated by commas or

387 # semicolons.

388 if not tokenizer.TryConsume(','):

389 tokenizer.TryConsume(';')

390

391

392 def _MergeScalarField(tokenizer, message, field, allow_multiple_scalars):

393 """Merges a single protocol message scalar field into a message.

394

395 Args:

396 tokenizer: A tokenizer to parse the field value.

397 message: A protocol message to record the data.

398 field: The descriptor of the field to be merged.

399 allow_multiple_scalars: Determines if repeated values for a non-repeated

400 field are permitted, e.g., the string "foo: 1 foo: 2" for a

401 required/optional field named "foo".

402

403 Raises:

404 ParseError: In case of ASCII parsing problems.

405 RuntimeError: On runtime errors.

406 """

407 tokenizer.Consume(':')

408 value = None

409

410 if field.type in (descriptor.FieldDescriptor.TYPE_INT32,

411 descriptor.FieldDescriptor.TYPE_SINT32,

412 descriptor.FieldDescriptor.TYPE_SFIXED32):

413 value = tokenizer.ConsumeInt32()

414 elif field.type in (descriptor.FieldDescriptor.TYPE_INT64,

415 descriptor.FieldDescriptor.TYPE_SINT64,

416 descriptor.FieldDescriptor.TYPE_SFIXED64):

417 value = tokenizer.ConsumeInt64()

418 elif field.type in (descriptor.FieldDescriptor.TYPE_UINT32,

419 descriptor.FieldDescriptor.TYPE_FIXED32):

420 value = tokenizer.ConsumeUint32()

421 elif field.type in (descriptor.FieldDescriptor.TYPE_UINT64,

422 descriptor.FieldDescriptor.TYPE_FIXED64):

423 value = tokenizer.ConsumeUint64()

424 elif field.type in (descriptor.FieldDescriptor.TYPE_FLOAT,

425 descriptor.FieldDescriptor.TYPE_DOUBLE):

426 value = tokenizer.ConsumeFloat()

427 elif field.type == descriptor.FieldDescriptor.TYPE_BOOL:

428 value = tokenizer.ConsumeBool()

429 elif field.type == descriptor.FieldDescriptor.TYPE_STRING:

430 value = tokenizer.ConsumeString()

431 elif field.type == descriptor.FieldDescriptor.TYPE_BYTES:

432 value = tokenizer.ConsumeByteString()

433 elif field.type == descriptor.FieldDescriptor.TYPE_ENUM:

434 value = tokenizer.ConsumeEnum(field)

435 else:

436 raise RuntimeError('Unknown field type %d' % field.type)

437

438 if field.label == descriptor.FieldDescriptor.LABEL_REPEATED:

439 if field.is_extension:

440 message.Extensions[field].append(value)

441 else:

442 getattr(message, field.name).append(value)

443 else:

444 if field.is_extension:

445 if not allow_multiple_scalars and message.HasExtension(field):

446 raise tokenizer.ParseErrorPreviousToken(

447 'Message type "%s" should not have multiple "%s" extensions.' %

448 (message.DESCRIPTOR.full_name, field.full_name))

449 else:

450 message.Extensions[field] = value

451 else:

452 if not allow_multiple_scalars and message.HasField(field.name):

453 raise tokenizer.ParseErrorPreviousToken(

454 'Message type "%s" should not have multiple "%s" fields.' %

455 (message.DESCRIPTOR.full_name, field.name))

456 else:

457 setattr(message, field.name, value)

458

459

460 class _Tokenizer(object):

461 """Protocol buffer ASCII representation tokenizer.

462

463 This class handles the lower level string parsing by splitting it into

464 meaningful tokens.

465

466 It was directly ported from the Java protocol buffer API.

467 """

468

469 _WHITESPACE = re.compile('(\\s\|(#.*$))+', re.MULTILINE)

470 _TOKEN = re.compile(

471 '[a-zA-Z_][0-9a-zA-Z_+-]*\|' # an identifier

472 '[0-9+-][0-9a-zA-Z_.+-]*\|' # a number

473 '\"([^\"\n\\\\]\|\\\\.)*(\"\|\\\\?$)\|' # a double-quoted string

474 '\'([^\'\n\\\\]\|\\\\.)*(\'\|\\\\?$)') # a single-quoted string

475 _IDENTIFIER = re.compile(r'\w+')

476

477 def __init__(self, lines):

478 self._position = 0

479 self._line = -1

480 self._column = 0

481 self._token_start = None

482 self.token = ''

483 self._lines = iter(lines)

484 self._current_line = ''

485 self._previous_line = 0

486 self._previous_column = 0

487 self._more_lines = True

488 self._SkipWhitespace()

489 self.NextToken()

490

491 def AtEnd(self):

492 """Checks the end of the text was reached.

493

494 Returns:

495 True iff the end was reached.

496 """

497 return not self.token

498

499 def _PopLine(self):

500 while len(self._current_line) <= self._column:

501 try:

502 self._current_line = self._lines.next()

503 except StopIteration:

504 self._current_line = ''

505 self._more_lines = False

506 return

507 else:

508 self._line += 1

509 self._column = 0

510

511 def _SkipWhitespace(self):

512 while True:

513 self._PopLine()

514 match = self._WHITESPACE.match(self._current_line, self._column)

515 if not match:

516 break

517 length = len(match.group(0))

518 self._column += length

519

520 def TryConsume(self, token):

521 """Tries to consume a given piece of text.

522

523 Args:

524 token: Text to consume.

525

526 Returns:

527 True iff the text was consumed.

528 """

529 if self.token == token:

530 self.NextToken()

531 return True

532 return False

533

534 def Consume(self, token):

535 """Consumes a piece of text.

536

537 Args:

538 token: Text to consume.

539

540 Raises:

541 ParseError: If the text couldn't be consumed.

542 """

543 if not self.TryConsume(token):

544 raise self._ParseError('Expected "%s".' % token)

545

546 def ConsumeIdentifier(self):

547 """Consumes protocol message field identifier.

548

549 Returns:

550 Identifier string.

551

552 Raises:

553 ParseError: If an identifier couldn't be consumed.

554 """

555 result = self.token

556 if not self._IDENTIFIER.match(result):

557 raise self._ParseError('Expected identifier.')

558 self.NextToken()

559 return result

560

561 def ConsumeInt32(self):

562 """Consumes a signed 32bit integer number.

563

564 Returns:

565 The integer parsed.

566

567 Raises:

568 ParseError: If a signed 32bit integer couldn't be consumed.

569 """

570 try:

571 result = ParseInteger(self.token, is_signed=True, is_long=False)

572 except ValueError, e:

573 raise self._ParseError(str(e))

574 self.NextToken()

575 return result

576

577 def ConsumeUint32(self):

578 """Consumes an unsigned 32bit integer number.

579

580 Returns:

581 The integer parsed.

582

583 Raises:

584 ParseError: If an unsigned 32bit integer couldn't be consumed.

585 """

586 try:

587 result = ParseInteger(self.token, is_signed=False, is_long=False)

588 except ValueError, e:

589 raise self._ParseError(str(e))

590 self.NextToken()

591 return result

592

593 def ConsumeInt64(self):

594 """Consumes a signed 64bit integer number.

595

596 Returns:

597 The integer parsed.

598

599 Raises:

600 ParseError: If a signed 64bit integer couldn't be consumed.

601 """

602 try:

603 result = ParseInteger(self.token, is_signed=True, is_long=True)

604 except ValueError, e:

605 raise self._ParseError(str(e))

606 self.NextToken()

607 return result

608

609 def ConsumeUint64(self):

610 """Consumes an unsigned 64bit integer number.

611

612 Returns:

613 The integer parsed.

614

615 Raises:

616 ParseError: If an unsigned 64bit integer couldn't be consumed.

617 """

618 try:

619 result = ParseInteger(self.token, is_signed=False, is_long=True)

620 except ValueError, e:

621 raise self._ParseError(str(e))

622 self.NextToken()

623 return result

624

625 def ConsumeFloat(self):

626 """Consumes an floating point number.

627

628 Returns:

629 The number parsed.

630

631 Raises:

632 ParseError: If a floating point number couldn't be consumed.

633 """

634 try:

635 result = ParseFloat(self.token)

636 except ValueError, e:

637 raise self._ParseError(str(e))

638 self.NextToken()

639 return result

640

641 def ConsumeBool(self):

642 """Consumes a boolean value.

643

644 Returns:

645 The bool parsed.

646

647 Raises:

648 ParseError: If a boolean value couldn't be consumed.

649 """

650 try:

651 result = ParseBool(self.token)

652 except ValueError, e:

653 raise self._ParseError(str(e))

654 self.NextToken()

655 return result

656

657 def ConsumeString(self):

658 """Consumes a string value.

659

660 Returns:

661 The string parsed.

662

663 Raises:

664 ParseError: If a string value couldn't be consumed.

665 """

666 the_bytes = self.ConsumeByteString()

667 try:

668 return unicode(the_bytes, 'utf-8')

669 except UnicodeDecodeError, e:

670 raise self._StringParseError(e)

671

672 def ConsumeByteString(self):

673 """Consumes a byte array value.

674

675 Returns:

676 The array parsed (as a string).

677

678 Raises:

679 ParseError: If a byte array value couldn't be consumed.

680 """

681 the_list = [self._ConsumeSingleByteString()]

682 while self.token and self.token[0] in ('\'', '"'):

683 the_list.append(self._ConsumeSingleByteString())

684 return ''.encode('latin1').join(the_list) ##PY25

685 ##!PY25 return b''.join(the_list)

686

687 def _ConsumeSingleByteString(self):

688 """Consume one token of a string literal.

689

690 String literals (whether bytes or text) can come in multiple adjacent

691 tokens which are automatically concatenated, like in C or Python. This

692 method only consumes one token.

693 """

694 text = self.token

695 if len(text) < 1 or text[0] not in ('\'', '"'):

696 raise self._ParseError('Expected string.')

697

698 if len(text) < 2 or text[-1] != text[0]:

699 raise self._ParseError('String missing ending quote.')

700

701 try:

702 result = text_encoding.CUnescape(text[1:-1])

703 except ValueError, e:

704 raise self._ParseError(str(e))

705 self.NextToken()

706 return result

707

708 def ConsumeEnum(self, field):

709 try:

710 result = ParseEnum(field, self.token)

711 except ValueError, e:

712 raise self._ParseError(str(e))

713 self.NextToken()

714 return result

715

716 def ParseErrorPreviousToken(self, message):

717 """Creates and returns a ParseError for the previously read token.

718

719 Args:

720 message: A message to set for the exception.

721

722 Returns:

723 A ParseError instance.

724 """

725 return ParseError('%d:%d : %s' % (

726 self._previous_line + 1, self._previous_column + 1, message))

727

728 def _ParseError(self, message):

729 """Creates and returns a ParseError for the current token."""

730 return ParseError('%d:%d : %s' % (

731 self._line + 1, self._column + 1, message))

732

733 def _StringParseError(self, e):

734 return self._ParseError('Couldn\'t parse string: ' + str(e))

735

736 def NextToken(self):

737 """Reads the next meaningful token."""

738 self._previous_line = self._line

739 self._previous_column = self._column

740

741 self._column += len(self.token)

742 self._SkipWhitespace()

743

744 if not self._more_lines:

745 self.token = ''

746 return

747

748 match = self._TOKEN.match(self._current_line, self._column)

749 if match:

750 token = match.group(0)

751 self.token = token

752 else:

753 self.token = self._current_line[self._column]

754

755

756 def ParseInteger(text, is_signed=False, is_long=False):

757 """Parses an integer.

758

759 Args:

760 text: The text to parse.

761 is_signed: True if a signed integer must be parsed.

762 is_long: True if a long integer must be parsed.

763

764 Returns:

765 The integer value.

766

767 Raises:

768 ValueError: Thrown Iff the text is not a valid integer.

769 """

770 # Do the actual parsing. Exception handling is propagated to caller.

771 try:

772 # We force 32-bit values to int and 64-bit values to long to make

773 # alternate implementations where the distinction is more significant

774 # (e.g. the C++ implementation) simpler.

775 if is_long:

776 result = long(text, 0)

777 else:

778 result = int(text, 0)

779 except ValueError:

780 raise ValueError('Couldn\'t parse integer: %s' % text)

781

782 # Check if the integer is sane. Exceptions handled by callers.

783 checker = _INTEGER_CHECKERS[2 * int(is_long) + int(is_signed)]

784 checker.CheckValue(result)

785 return result

786

787

788 def ParseFloat(text):

789 """Parse a floating point number.

790

791 Args:

792 text: Text to parse.

793

794 Returns:

795 The number parsed.

796

797 Raises:

798 ValueError: If a floating point number couldn't be parsed.

799 """

800 try:

801 # Assume Python compatible syntax.

802 return float(text)

803 except ValueError:

804 # Check alternative spellings.

805 if _FLOAT_INFINITY.match(text):

806 if text[0] == '-':

807 return float('-inf')

808 else:

809 return float('inf')

810 elif _FLOAT_NAN.match(text):

811 return float('nan')

812 else:

813 # assume '1.0f' format

814 try:

815 return float(text.rstrip('f'))

816 except ValueError:

817 raise ValueError('Couldn\'t parse float: %s' % text)

818

819

820 def ParseBool(text):

821 """Parse a boolean value.

822

823 Args:

824 text: Text to parse.

825

826 Returns:

827 Boolean values parsed

828

829 Raises:

830 ValueError: If text is not a valid boolean.

831 """

832 if text in ('true', 't', '1'):

833 return True

834 elif text in ('false', 'f', '0'):

835 return False

836 else:

837 raise ValueError('Expected "true" or "false".')

838

839

840 def ParseEnum(field, value):

841 """Parse an enum value.

842

843 The value can be specified by a number (the enum value), or by

844 a string literal (the enum name).

845

846 Args:

847 field: Enum field descriptor.

848 value: String value.

849

850 Returns:

851 Enum value number.

852

853 Raises:

854 ValueError: If the enum value could not be parsed.

855 """

856 enum_descriptor = field.enum_type

857 try:

858 number = int(value, 0)

859 except ValueError:

860 # Identifier.

861 enum_value = enum_descriptor.values_by_name.get(value, None)

862 if enum_value is None:

863 raise ValueError(

864 'Enum type "%s" has no value named %s.' % (

865 enum_descriptor.full_name, value))

866 else:

867 # Numeric value.

868 enum_value = enum_descriptor.values_by_number.get(number, None)

869 if enum_value is None:

870 raise ValueError(

871 'Enum type "%s" has no value with number %d.' % (

872 enum_descriptor.full_name, number))

873 return enum_value.number

OLD	NEW

« no previous file with comments | « third_party/google/protobuf/text_encoding.py ('k') | third_party/protobuf26/README.chromium » ('j') | no next file with comments »