OLD | NEW |
1 # Protocol Buffers - Google's data interchange format | 1 # Protocol Buffers - Google's data interchange format |
2 # Copyright 2008 Google Inc. All rights reserved. | 2 # Copyright 2008 Google Inc. All rights reserved. |
3 # https://developers.google.com/protocol-buffers/ | 3 # https://developers.google.com/protocol-buffers/ |
4 # | 4 # |
5 # Redistribution and use in source and binary forms, with or without | 5 # Redistribution and use in source and binary forms, with or without |
6 # modification, are permitted provided that the following conditions are | 6 # modification, are permitted provided that the following conditions are |
7 # met: | 7 # met: |
8 # | 8 # |
9 # * Redistributions of source code must retain the above copyright | 9 # * Redistributions of source code must retain the above copyright |
10 # notice, this list of conditions and the following disclaimer. | 10 # notice, this list of conditions and the following disclaimer. |
(...skipping 30 matching lines...) Expand all Loading... |
41 """ | 41 """ |
42 | 42 |
43 __author__ = 'kenton@google.com (Kenton Varda)' | 43 __author__ = 'kenton@google.com (Kenton Varda)' |
44 | 44 |
45 import io | 45 import io |
46 import re | 46 import re |
47 | 47 |
48 import six | 48 import six |
49 | 49 |
50 if six.PY3: | 50 if six.PY3: |
51 long = int # pylint: disable=redefined-builtin,invalid-name | 51 long = int |
52 | 52 |
53 # pylint: disable=g-import-not-at-top | |
54 from google.protobuf.internal import type_checkers | 53 from google.protobuf.internal import type_checkers |
55 from google.protobuf import descriptor | 54 from google.protobuf import descriptor |
56 from google.protobuf import text_encoding | 55 from google.protobuf import text_encoding |
57 | 56 |
58 __all__ = ['MessageToString', 'PrintMessage', 'PrintField', 'PrintFieldValue', | 57 __all__ = ['MessageToString', 'PrintMessage', 'PrintField', |
59 'Merge'] | 58 'PrintFieldValue', 'Merge'] |
| 59 |
60 | 60 |
61 _INTEGER_CHECKERS = (type_checkers.Uint32ValueChecker(), | 61 _INTEGER_CHECKERS = (type_checkers.Uint32ValueChecker(), |
62 type_checkers.Int32ValueChecker(), | 62 type_checkers.Int32ValueChecker(), |
63 type_checkers.Uint64ValueChecker(), | 63 type_checkers.Uint64ValueChecker(), |
64 type_checkers.Int64ValueChecker()) | 64 type_checkers.Int64ValueChecker()) |
65 _FLOAT_INFINITY = re.compile('-?inf(?:inity)?f?', re.IGNORECASE) | 65 _FLOAT_INFINITY = re.compile('-?inf(?:inity)?f?', re.IGNORECASE) |
66 _FLOAT_NAN = re.compile('nanf?', re.IGNORECASE) | 66 _FLOAT_NAN = re.compile('nanf?', re.IGNORECASE) |
67 _FLOAT_TYPES = frozenset([descriptor.FieldDescriptor.CPPTYPE_FLOAT, | 67 _FLOAT_TYPES = frozenset([descriptor.FieldDescriptor.CPPTYPE_FLOAT, |
68 descriptor.FieldDescriptor.CPPTYPE_DOUBLE]) | 68 descriptor.FieldDescriptor.CPPTYPE_DOUBLE]) |
69 _QUOTES = frozenset(("'", '"')) | 69 _QUOTES = frozenset(("'", '"')) |
70 _ANY_FULL_TYPE_NAME = 'google.protobuf.Any' | |
71 | 70 |
72 | 71 |
73 class Error(Exception): | 72 class Error(Exception): |
74 """Top-level module error for text_format.""" | 73 """Top-level module error for text_format.""" |
75 | 74 |
76 | 75 |
77 class ParseError(Error): | 76 class ParseError(Error): |
78 """Thrown in case of text parsing or tokenizing error.""" | 77 """Thrown in case of text parsing error.""" |
79 | |
80 def __init__(self, message=None, line=None, column=None): | |
81 if message is not None and line is not None: | |
82 loc = str(line) | |
83 if column is not None: | |
84 loc += ':{0}'.format(column) | |
85 message = '{0} : {1}'.format(loc, message) | |
86 if message is not None: | |
87 super(ParseError, self).__init__(message) | |
88 else: | |
89 super(ParseError, self).__init__() | |
90 self._line = line | |
91 self._column = column | |
92 | |
93 def GetLine(self): | |
94 return self._line | |
95 | |
96 def GetColumn(self): | |
97 return self._column | |
98 | 78 |
99 | 79 |
100 class TextWriter(object): | 80 class TextWriter(object): |
101 | |
102 def __init__(self, as_utf8): | 81 def __init__(self, as_utf8): |
103 if six.PY2: | 82 if six.PY2: |
104 self._writer = io.BytesIO() | 83 self._writer = io.BytesIO() |
105 else: | 84 else: |
106 self._writer = io.StringIO() | 85 self._writer = io.StringIO() |
107 | 86 |
108 def write(self, val): | 87 def write(self, val): |
109 if six.PY2: | 88 if six.PY2: |
110 if isinstance(val, six.text_type): | 89 if isinstance(val, six.text_type): |
111 val = val.encode('utf-8') | 90 val = val.encode('utf-8') |
112 return self._writer.write(val) | 91 return self._writer.write(val) |
113 | 92 |
114 def close(self): | 93 def close(self): |
115 return self._writer.close() | 94 return self._writer.close() |
116 | 95 |
117 def getvalue(self): | 96 def getvalue(self): |
118 return self._writer.getvalue() | 97 return self._writer.getvalue() |
119 | 98 |
120 | 99 |
121 def MessageToString(message, | 100 def MessageToString(message, as_utf8=False, as_one_line=False, |
122 as_utf8=False, | 101 pointy_brackets=False, use_index_order=False, |
123 as_one_line=False, | 102 float_format=None, use_field_number=False): |
124 pointy_brackets=False, | |
125 use_index_order=False, | |
126 float_format=None, | |
127 use_field_number=False, | |
128 descriptor_pool=None, | |
129 indent=0): | |
130 """Convert protobuf message to text format. | 103 """Convert protobuf message to text format. |
131 | 104 |
132 Floating point values can be formatted compactly with 15 digits of | 105 Floating point values can be formatted compactly with 15 digits of |
133 precision (which is the most that IEEE 754 "double" can guarantee) | 106 precision (which is the most that IEEE 754 "double" can guarantee) |
134 using float_format='.15g'. To ensure that converting to text and back to a | 107 using float_format='.15g'. To ensure that converting to text and back to a |
135 proto will result in an identical value, float_format='.17g' should be used. | 108 proto will result in an identical value, float_format='.17g' should be used. |
136 | 109 |
137 Args: | 110 Args: |
138 message: The protocol buffers message. | 111 message: The protocol buffers message. |
139 as_utf8: Produce text output in UTF8 format. | 112 as_utf8: Produce text output in UTF8 format. |
140 as_one_line: Don't introduce newlines between fields. | 113 as_one_line: Don't introduce newlines between fields. |
141 pointy_brackets: If True, use angle brackets instead of curly braces for | 114 pointy_brackets: If True, use angle brackets instead of curly braces for |
142 nesting. | 115 nesting. |
143 use_index_order: If True, print fields of a proto message using the order | 116 use_index_order: If True, print fields of a proto message using the order |
144 defined in source code instead of the field number. By default, use the | 117 defined in source code instead of the field number. By default, use the |
145 field number order. | 118 field number order. |
146 float_format: If set, use this to specify floating point number formatting | 119 float_format: If set, use this to specify floating point number formatting |
147 (per the "Format Specification Mini-Language"); otherwise, str() is used. | 120 (per the "Format Specification Mini-Language"); otherwise, str() is used. |
148 use_field_number: If True, print field numbers instead of names. | 121 use_field_number: If True, print field numbers instead of names. |
149 descriptor_pool: A DescriptorPool used to resolve Any types. | |
150 indent: The indent level, in terms of spaces, for pretty print. | |
151 | 122 |
152 Returns: | 123 Returns: |
153 A string of the text formatted protocol buffer message. | 124 A string of the text formatted protocol buffer message. |
154 """ | 125 """ |
155 out = TextWriter(as_utf8) | 126 out = TextWriter(as_utf8) |
156 printer = _Printer(out, indent, as_utf8, as_one_line, pointy_brackets, | 127 printer = _Printer(out, 0, as_utf8, as_one_line, |
157 use_index_order, float_format, use_field_number, | 128 pointy_brackets, use_index_order, float_format, |
158 descriptor_pool) | 129 use_field_number) |
159 printer.PrintMessage(message) | 130 printer.PrintMessage(message) |
160 result = out.getvalue() | 131 result = out.getvalue() |
161 out.close() | 132 out.close() |
162 if as_one_line: | 133 if as_one_line: |
163 return result.rstrip() | 134 return result.rstrip() |
164 return result | 135 return result |
165 | 136 |
166 | 137 |
167 def _IsMapEntry(field): | 138 def _IsMapEntry(field): |
168 return (field.type == descriptor.FieldDescriptor.TYPE_MESSAGE and | 139 return (field.type == descriptor.FieldDescriptor.TYPE_MESSAGE and |
169 field.message_type.has_options and | 140 field.message_type.has_options and |
170 field.message_type.GetOptions().map_entry) | 141 field.message_type.GetOptions().map_entry) |
171 | 142 |
172 | 143 |
173 def PrintMessage(message, | 144 def PrintMessage(message, out, indent=0, as_utf8=False, as_one_line=False, |
174 out, | 145 pointy_brackets=False, use_index_order=False, |
175 indent=0, | 146 float_format=None, use_field_number=False): |
176 as_utf8=False, | 147 printer = _Printer(out, indent, as_utf8, as_one_line, |
177 as_one_line=False, | 148 pointy_brackets, use_index_order, float_format, |
178 pointy_brackets=False, | 149 use_field_number) |
179 use_index_order=False, | |
180 float_format=None, | |
181 use_field_number=False, | |
182 descriptor_pool=None): | |
183 printer = _Printer(out, indent, as_utf8, as_one_line, pointy_brackets, | |
184 use_index_order, float_format, use_field_number, | |
185 descriptor_pool) | |
186 printer.PrintMessage(message) | 150 printer.PrintMessage(message) |
187 | 151 |
188 | 152 |
189 def PrintField(field, | 153 def PrintField(field, value, out, indent=0, as_utf8=False, as_one_line=False, |
190 value, | 154 pointy_brackets=False, use_index_order=False, float_format=None): |
191 out, | |
192 indent=0, | |
193 as_utf8=False, | |
194 as_one_line=False, | |
195 pointy_brackets=False, | |
196 use_index_order=False, | |
197 float_format=None): | |
198 """Print a single field name/value pair.""" | 155 """Print a single field name/value pair.""" |
199 printer = _Printer(out, indent, as_utf8, as_one_line, pointy_brackets, | 156 printer = _Printer(out, indent, as_utf8, as_one_line, |
200 use_index_order, float_format) | 157 pointy_brackets, use_index_order, float_format) |
201 printer.PrintField(field, value) | 158 printer.PrintField(field, value) |
202 | 159 |
203 | 160 |
204 def PrintFieldValue(field, | 161 def PrintFieldValue(field, value, out, indent=0, as_utf8=False, |
205 value, | 162 as_one_line=False, pointy_brackets=False, |
206 out, | |
207 indent=0, | |
208 as_utf8=False, | |
209 as_one_line=False, | |
210 pointy_brackets=False, | |
211 use_index_order=False, | 163 use_index_order=False, |
212 float_format=None): | 164 float_format=None): |
213 """Print a single field value (not including name).""" | 165 """Print a single field value (not including name).""" |
214 printer = _Printer(out, indent, as_utf8, as_one_line, pointy_brackets, | 166 printer = _Printer(out, indent, as_utf8, as_one_line, |
215 use_index_order, float_format) | 167 pointy_brackets, use_index_order, float_format) |
216 printer.PrintFieldValue(field, value) | 168 printer.PrintFieldValue(field, value) |
217 | 169 |
218 | 170 |
219 def _BuildMessageFromTypeName(type_name, descriptor_pool): | |
220 """Returns a protobuf message instance. | |
221 | |
222 Args: | |
223 type_name: Fully-qualified protobuf message type name string. | |
224 descriptor_pool: DescriptorPool instance. | |
225 | |
226 Returns: | |
227 A Message instance of type matching type_name, or None if the a Descriptor | |
228 wasn't found matching type_name. | |
229 """ | |
230 # pylint: disable=g-import-not-at-top | |
231 from google.protobuf import symbol_database | |
232 database = symbol_database.Default() | |
233 try: | |
234 message_descriptor = descriptor_pool.FindMessageTypeByName(type_name) | |
235 except KeyError: | |
236 return None | |
237 message_type = database.GetPrototype(message_descriptor) | |
238 return message_type() | |
239 | |
240 | |
241 class _Printer(object): | 171 class _Printer(object): |
242 """Text format printer for protocol message.""" | 172 """Text format printer for protocol message.""" |
243 | 173 |
244 def __init__(self, | 174 def __init__(self, out, indent=0, as_utf8=False, as_one_line=False, |
245 out, | 175 pointy_brackets=False, use_index_order=False, float_format=None, |
246 indent=0, | 176 use_field_number=False): |
247 as_utf8=False, | |
248 as_one_line=False, | |
249 pointy_brackets=False, | |
250 use_index_order=False, | |
251 float_format=None, | |
252 use_field_number=False, | |
253 descriptor_pool=None): | |
254 """Initialize the Printer. | 177 """Initialize the Printer. |
255 | 178 |
256 Floating point values can be formatted compactly with 15 digits of | 179 Floating point values can be formatted compactly with 15 digits of |
257 precision (which is the most that IEEE 754 "double" can guarantee) | 180 precision (which is the most that IEEE 754 "double" can guarantee) |
258 using float_format='.15g'. To ensure that converting to text and back to a | 181 using float_format='.15g'. To ensure that converting to text and back to a |
259 proto will result in an identical value, float_format='.17g' should be used. | 182 proto will result in an identical value, float_format='.17g' should be used. |
260 | 183 |
261 Args: | 184 Args: |
262 out: To record the text format result. | 185 out: To record the text format result. |
263 indent: The indent level for pretty print. | 186 indent: The indent level for pretty print. |
264 as_utf8: Produce text output in UTF8 format. | 187 as_utf8: Produce text output in UTF8 format. |
265 as_one_line: Don't introduce newlines between fields. | 188 as_one_line: Don't introduce newlines between fields. |
266 pointy_brackets: If True, use angle brackets instead of curly braces for | 189 pointy_brackets: If True, use angle brackets instead of curly braces for |
267 nesting. | 190 nesting. |
268 use_index_order: If True, print fields of a proto message using the order | 191 use_index_order: If True, print fields of a proto message using the order |
269 defined in source code instead of the field number. By default, use the | 192 defined in source code instead of the field number. By default, use the |
270 field number order. | 193 field number order. |
271 float_format: If set, use this to specify floating point number formatting | 194 float_format: If set, use this to specify floating point number formatting |
272 (per the "Format Specification Mini-Language"); otherwise, str() is | 195 (per the "Format Specification Mini-Language"); otherwise, str() is |
273 used. | 196 used. |
274 use_field_number: If True, print field numbers instead of names. | 197 use_field_number: If True, print field numbers instead of names. |
275 descriptor_pool: A DescriptorPool used to resolve Any types. | |
276 """ | 198 """ |
277 self.out = out | 199 self.out = out |
278 self.indent = indent | 200 self.indent = indent |
279 self.as_utf8 = as_utf8 | 201 self.as_utf8 = as_utf8 |
280 self.as_one_line = as_one_line | 202 self.as_one_line = as_one_line |
281 self.pointy_brackets = pointy_brackets | 203 self.pointy_brackets = pointy_brackets |
282 self.use_index_order = use_index_order | 204 self.use_index_order = use_index_order |
283 self.float_format = float_format | 205 self.float_format = float_format |
284 self.use_field_number = use_field_number | 206 self.use_field_number = use_field_number |
285 self.descriptor_pool = descriptor_pool | |
286 | |
287 def _TryPrintAsAnyMessage(self, message): | |
288 """Serializes if message is a google.protobuf.Any field.""" | |
289 packed_message = _BuildMessageFromTypeName(message.TypeName(), | |
290 self.descriptor_pool) | |
291 if packed_message: | |
292 packed_message.MergeFromString(message.value) | |
293 self.out.write('%s[%s]' % (self.indent * ' ', message.type_url)) | |
294 self._PrintMessageFieldValue(packed_message) | |
295 self.out.write(' ' if self.as_one_line else '\n') | |
296 return True | |
297 else: | |
298 return False | |
299 | 207 |
300 def PrintMessage(self, message): | 208 def PrintMessage(self, message): |
301 """Convert protobuf message to text format. | 209 """Convert protobuf message to text format. |
302 | 210 |
303 Args: | 211 Args: |
304 message: The protocol buffers message. | 212 message: The protocol buffers message. |
305 """ | 213 """ |
306 if (message.DESCRIPTOR.full_name == _ANY_FULL_TYPE_NAME and | |
307 self.descriptor_pool and self._TryPrintAsAnyMessage(message)): | |
308 return | |
309 fields = message.ListFields() | 214 fields = message.ListFields() |
310 if self.use_index_order: | 215 if self.use_index_order: |
311 fields.sort(key=lambda x: x[0].index) | 216 fields.sort(key=lambda x: x[0].index) |
312 for field, value in fields: | 217 for field, value in fields: |
313 if _IsMapEntry(field): | 218 if _IsMapEntry(field): |
314 for key in sorted(value): | 219 for key in sorted(value): |
315 # This is slow for maps with submessage entires because it copies the | 220 # This is slow for maps with submessage entires because it copies the |
316 # entire tree. Unfortunately this would take significant refactoring | 221 # entire tree. Unfortunately this would take significant refactoring |
317 # of this file to work around. | 222 # of this file to work around. |
318 # | 223 # |
319 # TODO(haberman): refactor and optimize if this becomes an issue. | 224 # TODO(haberman): refactor and optimize if this becomes an issue. |
320 entry_submsg = value.GetEntryClass()(key=key, value=value[key]) | 225 entry_submsg = field.message_type._concrete_class( |
| 226 key=key, value=value[key]) |
321 self.PrintField(field, entry_submsg) | 227 self.PrintField(field, entry_submsg) |
322 elif field.label == descriptor.FieldDescriptor.LABEL_REPEATED: | 228 elif field.label == descriptor.FieldDescriptor.LABEL_REPEATED: |
323 for element in value: | 229 for element in value: |
324 self.PrintField(field, element) | 230 self.PrintField(field, element) |
325 else: | 231 else: |
326 self.PrintField(field, value) | 232 self.PrintField(field, value) |
327 | 233 |
328 def PrintField(self, field, value): | 234 def PrintField(self, field, value): |
329 """Print a single field name/value pair.""" | 235 """Print a single field name/value pair.""" |
330 out = self.out | 236 out = self.out |
(...skipping 20 matching lines...) Expand all Loading... |
351 # The colon is optional in this case, but our cross-language golden files | 257 # The colon is optional in this case, but our cross-language golden files |
352 # don't include it. | 258 # don't include it. |
353 out.write(': ') | 259 out.write(': ') |
354 | 260 |
355 self.PrintFieldValue(field, value) | 261 self.PrintFieldValue(field, value) |
356 if self.as_one_line: | 262 if self.as_one_line: |
357 out.write(' ') | 263 out.write(' ') |
358 else: | 264 else: |
359 out.write('\n') | 265 out.write('\n') |
360 | 266 |
361 def _PrintMessageFieldValue(self, value): | |
362 if self.pointy_brackets: | |
363 openb = '<' | |
364 closeb = '>' | |
365 else: | |
366 openb = '{' | |
367 closeb = '}' | |
368 | |
369 if self.as_one_line: | |
370 self.out.write(' %s ' % openb) | |
371 self.PrintMessage(value) | |
372 self.out.write(closeb) | |
373 else: | |
374 self.out.write(' %s\n' % openb) | |
375 self.indent += 2 | |
376 self.PrintMessage(value) | |
377 self.indent -= 2 | |
378 self.out.write(' ' * self.indent + closeb) | |
379 | |
380 def PrintFieldValue(self, field, value): | 267 def PrintFieldValue(self, field, value): |
381 """Print a single field value (not including name). | 268 """Print a single field value (not including name). |
382 | 269 |
383 For repeated fields, the value should be a single element. | 270 For repeated fields, the value should be a single element. |
384 | 271 |
385 Args: | 272 Args: |
386 field: The descriptor of the field to be printed. | 273 field: The descriptor of the field to be printed. |
387 value: The value of the field. | 274 value: The value of the field. |
388 """ | 275 """ |
389 out = self.out | 276 out = self.out |
| 277 if self.pointy_brackets: |
| 278 openb = '<' |
| 279 closeb = '>' |
| 280 else: |
| 281 openb = '{' |
| 282 closeb = '}' |
| 283 |
390 if field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_MESSAGE: | 284 if field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_MESSAGE: |
391 self._PrintMessageFieldValue(value) | 285 if self.as_one_line: |
| 286 out.write(' %s ' % openb) |
| 287 self.PrintMessage(value) |
| 288 out.write(closeb) |
| 289 else: |
| 290 out.write(' %s\n' % openb) |
| 291 self.indent += 2 |
| 292 self.PrintMessage(value) |
| 293 self.indent -= 2 |
| 294 out.write(' ' * self.indent + closeb) |
392 elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_ENUM: | 295 elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_ENUM: |
393 enum_value = field.enum_type.values_by_number.get(value, None) | 296 enum_value = field.enum_type.values_by_number.get(value, None) |
394 if enum_value is not None: | 297 if enum_value is not None: |
395 out.write(enum_value.name) | 298 out.write(enum_value.name) |
396 else: | 299 else: |
397 out.write(str(value)) | 300 out.write(str(value)) |
398 elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_STRING: | 301 elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_STRING: |
399 out.write('\"') | 302 out.write('\"') |
400 if isinstance(value, six.text_type): | 303 if isinstance(value, six.text_type): |
401 out_value = value.encode('utf-8') | 304 out_value = value.encode('utf-8') |
(...skipping 10 matching lines...) Expand all Loading... |
412 if value: | 315 if value: |
413 out.write('true') | 316 out.write('true') |
414 else: | 317 else: |
415 out.write('false') | 318 out.write('false') |
416 elif field.cpp_type in _FLOAT_TYPES and self.float_format is not None: | 319 elif field.cpp_type in _FLOAT_TYPES and self.float_format is not None: |
417 out.write('{1:{0}}'.format(self.float_format, value)) | 320 out.write('{1:{0}}'.format(self.float_format, value)) |
418 else: | 321 else: |
419 out.write(str(value)) | 322 out.write(str(value)) |
420 | 323 |
421 | 324 |
422 def Parse(text, | 325 def Parse(text, message, |
423 message, | 326 allow_unknown_extension=False, allow_field_number=False): |
424 allow_unknown_extension=False, | 327 """Parses an text representation of a protocol message into a message. |
425 allow_field_number=False): | |
426 """Parses a text representation of a protocol message into a message. | |
427 | 328 |
428 Args: | 329 Args: |
429 text: Message text representation. | 330 text: Message text representation. |
430 message: A protocol buffer message to merge into. | 331 message: A protocol buffer message to merge into. |
431 allow_unknown_extension: if True, skip over missing extensions and keep | 332 allow_unknown_extension: if True, skip over missing extensions and keep |
432 parsing | 333 parsing |
433 allow_field_number: if True, both field number and field name are allowed. | 334 allow_field_number: if True, both field number and field name are allowed. |
434 | 335 |
435 Returns: | 336 Returns: |
436 The same message passed as argument. | 337 The same message passed as argument. |
437 | 338 |
438 Raises: | 339 Raises: |
439 ParseError: On text parsing problems. | 340 ParseError: On text parsing problems. |
440 """ | 341 """ |
441 if not isinstance(text, str): | 342 if not isinstance(text, str): |
442 text = text.decode('utf-8') | 343 text = text.decode('utf-8') |
443 return ParseLines( | 344 return ParseLines(text.split('\n'), message, allow_unknown_extension, |
444 text.split('\n'), message, allow_unknown_extension, allow_field_number) | 345 allow_field_number) |
445 | 346 |
446 | 347 |
447 def Merge(text, | 348 def Merge(text, message, allow_unknown_extension=False, |
448 message, | 349 allow_field_number=False): |
449 allow_unknown_extension=False, | 350 """Parses an text representation of a protocol message into a message. |
450 allow_field_number=False, | |
451 descriptor_pool=None): | |
452 """Parses a text representation of a protocol message into a message. | |
453 | 351 |
454 Like Parse(), but allows repeated values for a non-repeated field, and uses | 352 Like Parse(), but allows repeated values for a non-repeated field, and uses |
455 the last one. | 353 the last one. |
456 | 354 |
457 Args: | 355 Args: |
458 text: Message text representation. | 356 text: Message text representation. |
459 message: A protocol buffer message to merge into. | 357 message: A protocol buffer message to merge into. |
460 allow_unknown_extension: if True, skip over missing extensions and keep | 358 allow_unknown_extension: if True, skip over missing extensions and keep |
461 parsing | 359 parsing |
462 allow_field_number: if True, both field number and field name are allowed. | 360 allow_field_number: if True, both field number and field name are allowed. |
463 descriptor_pool: A DescriptorPool used to resolve Any types. | |
464 | 361 |
465 Returns: | 362 Returns: |
466 The same message passed as argument. | 363 The same message passed as argument. |
467 | 364 |
468 Raises: | 365 Raises: |
469 ParseError: On text parsing problems. | 366 ParseError: On text parsing problems. |
470 """ | 367 """ |
471 return MergeLines( | 368 return MergeLines(text.split('\n'), message, allow_unknown_extension, |
472 text.split('\n'), | 369 allow_field_number) |
473 message, | |
474 allow_unknown_extension, | |
475 allow_field_number, | |
476 descriptor_pool=descriptor_pool) | |
477 | 370 |
478 | 371 |
479 def ParseLines(lines, | 372 def ParseLines(lines, message, allow_unknown_extension=False, |
480 message, | |
481 allow_unknown_extension=False, | |
482 allow_field_number=False): | 373 allow_field_number=False): |
483 """Parses a text representation of a protocol message into a message. | 374 """Parses an text representation of a protocol message into a message. |
484 | 375 |
485 Args: | 376 Args: |
486 lines: An iterable of lines of a message's text representation. | 377 lines: An iterable of lines of a message's text representation. |
487 message: A protocol buffer message to merge into. | 378 message: A protocol buffer message to merge into. |
488 allow_unknown_extension: if True, skip over missing extensions and keep | 379 allow_unknown_extension: if True, skip over missing extensions and keep |
489 parsing | 380 parsing |
490 allow_field_number: if True, both field number and field name are allowed. | 381 allow_field_number: if True, both field number and field name are allowed. |
491 descriptor_pool: A DescriptorPool used to resolve Any types. | |
492 | 382 |
493 Returns: | 383 Returns: |
494 The same message passed as argument. | 384 The same message passed as argument. |
495 | 385 |
496 Raises: | 386 Raises: |
497 ParseError: On text parsing problems. | 387 ParseError: On text parsing problems. |
498 """ | 388 """ |
499 parser = _Parser(allow_unknown_extension, allow_field_number) | 389 parser = _Parser(allow_unknown_extension, allow_field_number) |
500 return parser.ParseLines(lines, message) | 390 return parser.ParseLines(lines, message) |
501 | 391 |
502 | 392 |
503 def MergeLines(lines, | 393 def MergeLines(lines, message, allow_unknown_extension=False, |
504 message, | 394 allow_field_number=False): |
505 allow_unknown_extension=False, | 395 """Parses an text representation of a protocol message into a message. |
506 allow_field_number=False, | |
507 descriptor_pool=None): | |
508 """Parses a text representation of a protocol message into a message. | |
509 | 396 |
510 Args: | 397 Args: |
511 lines: An iterable of lines of a message's text representation. | 398 lines: An iterable of lines of a message's text representation. |
512 message: A protocol buffer message to merge into. | 399 message: A protocol buffer message to merge into. |
513 allow_unknown_extension: if True, skip over missing extensions and keep | 400 allow_unknown_extension: if True, skip over missing extensions and keep |
514 parsing | 401 parsing |
515 allow_field_number: if True, both field number and field name are allowed. | 402 allow_field_number: if True, both field number and field name are allowed. |
516 | 403 |
517 Returns: | 404 Returns: |
518 The same message passed as argument. | 405 The same message passed as argument. |
519 | 406 |
520 Raises: | 407 Raises: |
521 ParseError: On text parsing problems. | 408 ParseError: On text parsing problems. |
522 """ | 409 """ |
523 parser = _Parser(allow_unknown_extension, | 410 parser = _Parser(allow_unknown_extension, allow_field_number) |
524 allow_field_number, | |
525 descriptor_pool=descriptor_pool) | |
526 return parser.MergeLines(lines, message) | 411 return parser.MergeLines(lines, message) |
527 | 412 |
528 | 413 |
529 class _Parser(object): | 414 class _Parser(object): |
530 """Text format parser for protocol message.""" | 415 """Text format parser for protocol message.""" |
531 | 416 |
532 def __init__(self, | 417 def __init__(self, allow_unknown_extension=False, allow_field_number=False): |
533 allow_unknown_extension=False, | |
534 allow_field_number=False, | |
535 descriptor_pool=None): | |
536 self.allow_unknown_extension = allow_unknown_extension | 418 self.allow_unknown_extension = allow_unknown_extension |
537 self.allow_field_number = allow_field_number | 419 self.allow_field_number = allow_field_number |
538 self.descriptor_pool = descriptor_pool | |
539 | 420 |
540 def ParseFromString(self, text, message): | 421 def ParseFromString(self, text, message): |
541 """Parses a text representation of a protocol message into a message.""" | 422 """Parses an text representation of a protocol message into a message.""" |
542 if not isinstance(text, str): | 423 if not isinstance(text, str): |
543 text = text.decode('utf-8') | 424 text = text.decode('utf-8') |
544 return self.ParseLines(text.split('\n'), message) | 425 return self.ParseLines(text.split('\n'), message) |
545 | 426 |
546 def ParseLines(self, lines, message): | 427 def ParseLines(self, lines, message): |
547 """Parses a text representation of a protocol message into a message.""" | 428 """Parses an text representation of a protocol message into a message.""" |
548 self._allow_multiple_scalars = False | 429 self._allow_multiple_scalars = False |
549 self._ParseOrMerge(lines, message) | 430 self._ParseOrMerge(lines, message) |
550 return message | 431 return message |
551 | 432 |
552 def MergeFromString(self, text, message): | 433 def MergeFromString(self, text, message): |
553 """Merges a text representation of a protocol message into a message.""" | 434 """Merges an text representation of a protocol message into a message.""" |
554 return self._MergeLines(text.split('\n'), message) | 435 return self._MergeLines(text.split('\n'), message) |
555 | 436 |
556 def MergeLines(self, lines, message): | 437 def MergeLines(self, lines, message): |
557 """Merges a text representation of a protocol message into a message.""" | 438 """Merges an text representation of a protocol message into a message.""" |
558 self._allow_multiple_scalars = True | 439 self._allow_multiple_scalars = True |
559 self._ParseOrMerge(lines, message) | 440 self._ParseOrMerge(lines, message) |
560 return message | 441 return message |
561 | 442 |
562 def _ParseOrMerge(self, lines, message): | 443 def _ParseOrMerge(self, lines, message): |
563 """Converts a text representation of a protocol message into a message. | 444 """Converts an text representation of a protocol message into a message. |
564 | 445 |
565 Args: | 446 Args: |
566 lines: Lines of a message's text representation. | 447 lines: Lines of a message's text representation. |
567 message: A protocol buffer message to merge into. | 448 message: A protocol buffer message to merge into. |
568 | 449 |
569 Raises: | 450 Raises: |
570 ParseError: On text parsing problems. | 451 ParseError: On text parsing problems. |
571 """ | 452 """ |
572 tokenizer = Tokenizer(lines) | 453 tokenizer = _Tokenizer(lines) |
573 while not tokenizer.AtEnd(): | 454 while not tokenizer.AtEnd(): |
574 self._MergeField(tokenizer, message) | 455 self._MergeField(tokenizer, message) |
575 | 456 |
576 def _MergeField(self, tokenizer, message): | 457 def _MergeField(self, tokenizer, message): |
577 """Merges a single protocol message field into a message. | 458 """Merges a single protocol message field into a message. |
578 | 459 |
579 Args: | 460 Args: |
580 tokenizer: A tokenizer to parse the field name and values. | 461 tokenizer: A tokenizer to parse the field name and values. |
581 message: A protocol message to record the data. | 462 message: A protocol message to record the data. |
582 | 463 |
(...skipping 20 matching lines...) Expand all Loading... |
603 field = message.Extensions._FindExtensionByName(name) | 484 field = message.Extensions._FindExtensionByName(name) |
604 # pylint: enable=protected-access | 485 # pylint: enable=protected-access |
605 if not field: | 486 if not field: |
606 if self.allow_unknown_extension: | 487 if self.allow_unknown_extension: |
607 field = None | 488 field = None |
608 else: | 489 else: |
609 raise tokenizer.ParseErrorPreviousToken( | 490 raise tokenizer.ParseErrorPreviousToken( |
610 'Extension "%s" not registered.' % name) | 491 'Extension "%s" not registered.' % name) |
611 elif message_descriptor != field.containing_type: | 492 elif message_descriptor != field.containing_type: |
612 raise tokenizer.ParseErrorPreviousToken( | 493 raise tokenizer.ParseErrorPreviousToken( |
613 'Extension "%s" does not extend message type "%s".' % | 494 'Extension "%s" does not extend message type "%s".' % ( |
614 (name, message_descriptor.full_name)) | 495 name, message_descriptor.full_name)) |
615 | 496 |
616 tokenizer.Consume(']') | 497 tokenizer.Consume(']') |
617 | 498 |
618 else: | 499 else: |
619 name = tokenizer.ConsumeIdentifierOrNumber() | 500 name = tokenizer.ConsumeIdentifier() |
620 if self.allow_field_number and name.isdigit(): | 501 if self.allow_field_number and name.isdigit(): |
621 number = ParseInteger(name, True, True) | 502 number = ParseInteger(name, True, True) |
622 field = message_descriptor.fields_by_number.get(number, None) | 503 field = message_descriptor.fields_by_number.get(number, None) |
623 if not field and message_descriptor.is_extendable: | 504 if not field and message_descriptor.is_extendable: |
624 field = message.Extensions._FindExtensionByNumber(number) | 505 field = message.Extensions._FindExtensionByNumber(number) |
625 else: | 506 else: |
626 field = message_descriptor.fields_by_name.get(name, None) | 507 field = message_descriptor.fields_by_name.get(name, None) |
627 | 508 |
628 # Group names are expected to be capitalized as they appear in the | 509 # Group names are expected to be capitalized as they appear in the |
629 # .proto file, which actually matches their type names, not their field | 510 # .proto file, which actually matches their type names, not their field |
630 # names. | 511 # names. |
631 if not field: | 512 if not field: |
632 field = message_descriptor.fields_by_name.get(name.lower(), None) | 513 field = message_descriptor.fields_by_name.get(name.lower(), None) |
633 if field and field.type != descriptor.FieldDescriptor.TYPE_GROUP: | 514 if field and field.type != descriptor.FieldDescriptor.TYPE_GROUP: |
634 field = None | 515 field = None |
635 | 516 |
636 if (field and field.type == descriptor.FieldDescriptor.TYPE_GROUP and | 517 if (field and field.type == descriptor.FieldDescriptor.TYPE_GROUP and |
637 field.message_type.name != name): | 518 field.message_type.name != name): |
638 field = None | 519 field = None |
639 | 520 |
640 if not field: | 521 if not field: |
641 raise tokenizer.ParseErrorPreviousToken( | 522 raise tokenizer.ParseErrorPreviousToken( |
642 'Message type "%s" has no field named "%s".' % | 523 'Message type "%s" has no field named "%s".' % ( |
643 (message_descriptor.full_name, name)) | 524 message_descriptor.full_name, name)) |
644 | 525 |
645 if field: | 526 if field: |
646 if not self._allow_multiple_scalars and field.containing_oneof: | 527 if not self._allow_multiple_scalars and field.containing_oneof: |
647 # Check if there's a different field set in this oneof. | 528 # Check if there's a different field set in this oneof. |
648 # Note that we ignore the case if the same field was set before, and we | 529 # Note that we ignore the case if the same field was set before, and we |
649 # apply _allow_multiple_scalars to non-scalar fields as well. | 530 # apply _allow_multiple_scalars to non-scalar fields as well. |
650 which_oneof = message.WhichOneof(field.containing_oneof.name) | 531 which_oneof = message.WhichOneof(field.containing_oneof.name) |
651 if which_oneof is not None and which_oneof != field.name: | 532 if which_oneof is not None and which_oneof != field.name: |
652 raise tokenizer.ParseErrorPreviousToken( | 533 raise tokenizer.ParseErrorPreviousToken( |
653 'Field "%s" is specified along with field "%s", another member ' | 534 'Field "%s" is specified along with field "%s", another member ' |
654 'of oneof "%s" for message type "%s".' % | 535 'of oneof "%s" for message type "%s".' % ( |
655 (field.name, which_oneof, field.containing_oneof.name, | 536 field.name, which_oneof, field.containing_oneof.name, |
656 message_descriptor.full_name)) | 537 message_descriptor.full_name)) |
657 | 538 |
658 if field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_MESSAGE: | 539 if field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_MESSAGE: |
659 tokenizer.TryConsume(':') | 540 tokenizer.TryConsume(':') |
660 merger = self._MergeMessageField | 541 merger = self._MergeMessageField |
661 else: | 542 else: |
662 tokenizer.Consume(':') | 543 tokenizer.Consume(':') |
663 merger = self._MergeScalarField | 544 merger = self._MergeScalarField |
664 | 545 |
665 if (field.label == descriptor.FieldDescriptor.LABEL_REPEATED and | 546 if (field.label == descriptor.FieldDescriptor.LABEL_REPEATED |
666 tokenizer.TryConsume('[')): | 547 and tokenizer.TryConsume('[')): |
667 # Short repeated format, e.g. "foo: [1, 2, 3]" | 548 # Short repeated format, e.g. "foo: [1, 2, 3]" |
668 while True: | 549 while True: |
669 merger(tokenizer, message, field) | 550 merger(tokenizer, message, field) |
670 if tokenizer.TryConsume(']'): | 551 if tokenizer.TryConsume(']'): break |
671 break | |
672 tokenizer.Consume(',') | 552 tokenizer.Consume(',') |
673 | 553 |
674 else: | 554 else: |
675 merger(tokenizer, message, field) | 555 merger(tokenizer, message, field) |
676 | 556 |
677 else: # Proto field is unknown. | 557 else: # Proto field is unknown. |
678 assert self.allow_unknown_extension | 558 assert self.allow_unknown_extension |
679 _SkipFieldContents(tokenizer) | 559 _SkipFieldContents(tokenizer) |
680 | 560 |
681 # For historical reasons, fields may optionally be separated by commas or | 561 # For historical reasons, fields may optionally be separated by commas or |
682 # semicolons. | 562 # semicolons. |
683 if not tokenizer.TryConsume(','): | 563 if not tokenizer.TryConsume(','): |
684 tokenizer.TryConsume(';') | 564 tokenizer.TryConsume(';') |
685 | 565 |
686 def _ConsumeAnyTypeUrl(self, tokenizer): | |
687 """Consumes a google.protobuf.Any type URL and returns the type name.""" | |
688 # Consume "type.googleapis.com/". | |
689 tokenizer.ConsumeIdentifier() | |
690 tokenizer.Consume('.') | |
691 tokenizer.ConsumeIdentifier() | |
692 tokenizer.Consume('.') | |
693 tokenizer.ConsumeIdentifier() | |
694 tokenizer.Consume('/') | |
695 # Consume the fully-qualified type name. | |
696 name = [tokenizer.ConsumeIdentifier()] | |
697 while tokenizer.TryConsume('.'): | |
698 name.append(tokenizer.ConsumeIdentifier()) | |
699 return '.'.join(name) | |
700 | |
701 def _MergeMessageField(self, tokenizer, message, field): | 566 def _MergeMessageField(self, tokenizer, message, field): |
702 """Merges a single scalar field into a message. | 567 """Merges a single scalar field into a message. |
703 | 568 |
704 Args: | 569 Args: |
705 tokenizer: A tokenizer to parse the field value. | 570 tokenizer: A tokenizer to parse the field value. |
706 message: The message of which field is a member. | 571 message: The message of which field is a member. |
707 field: The descriptor of the field to be merged. | 572 field: The descriptor of the field to be merged. |
708 | 573 |
709 Raises: | 574 Raises: |
710 ParseError: In case of text parsing problems. | 575 ParseError: In case of text parsing problems. |
711 """ | 576 """ |
712 is_map_entry = _IsMapEntry(field) | 577 is_map_entry = _IsMapEntry(field) |
713 | 578 |
714 if tokenizer.TryConsume('<'): | 579 if tokenizer.TryConsume('<'): |
715 end_token = '>' | 580 end_token = '>' |
716 else: | 581 else: |
717 tokenizer.Consume('{') | 582 tokenizer.Consume('{') |
718 end_token = '}' | 583 end_token = '}' |
719 | 584 |
720 if (field.message_type.full_name == _ANY_FULL_TYPE_NAME and | 585 if field.label == descriptor.FieldDescriptor.LABEL_REPEATED: |
721 tokenizer.TryConsume('[')): | |
722 packed_type_name = self._ConsumeAnyTypeUrl(tokenizer) | |
723 tokenizer.Consume(']') | |
724 tokenizer.TryConsume(':') | |
725 if tokenizer.TryConsume('<'): | |
726 expanded_any_end_token = '>' | |
727 else: | |
728 tokenizer.Consume('{') | |
729 expanded_any_end_token = '}' | |
730 if not self.descriptor_pool: | |
731 raise ParseError('Descriptor pool required to parse expanded Any field') | |
732 expanded_any_sub_message = _BuildMessageFromTypeName(packed_type_name, | |
733 self.descriptor_pool) | |
734 if not expanded_any_sub_message: | |
735 raise ParseError('Type %s not found in descriptor pool' % | |
736 packed_type_name) | |
737 while not tokenizer.TryConsume(expanded_any_end_token): | |
738 if tokenizer.AtEnd(): | |
739 raise tokenizer.ParseErrorPreviousToken('Expected "%s".' % | |
740 (expanded_any_end_token,)) | |
741 self._MergeField(tokenizer, expanded_any_sub_message) | |
742 if field.label == descriptor.FieldDescriptor.LABEL_REPEATED: | |
743 any_message = getattr(message, field.name).add() | |
744 else: | |
745 any_message = getattr(message, field.name) | |
746 any_message.Pack(expanded_any_sub_message) | |
747 elif field.label == descriptor.FieldDescriptor.LABEL_REPEATED: | |
748 if field.is_extension: | 586 if field.is_extension: |
749 sub_message = message.Extensions[field].add() | 587 sub_message = message.Extensions[field].add() |
750 elif is_map_entry: | 588 elif is_map_entry: |
751 sub_message = getattr(message, field.name).GetEntryClass()() | 589 # pylint: disable=protected-access |
| 590 sub_message = field.message_type._concrete_class() |
752 else: | 591 else: |
753 sub_message = getattr(message, field.name).add() | 592 sub_message = getattr(message, field.name).add() |
754 else: | 593 else: |
755 if field.is_extension: | 594 if field.is_extension: |
756 sub_message = message.Extensions[field] | 595 sub_message = message.Extensions[field] |
757 else: | 596 else: |
758 sub_message = getattr(message, field.name) | 597 sub_message = getattr(message, field.name) |
759 sub_message.SetInParent() | 598 sub_message.SetInParent() |
760 | 599 |
761 while not tokenizer.TryConsume(end_token): | 600 while not tokenizer.TryConsume(end_token): |
(...skipping 20 matching lines...) Expand all Loading... |
782 Raises: | 621 Raises: |
783 ParseError: In case of text parsing problems. | 622 ParseError: In case of text parsing problems. |
784 RuntimeError: On runtime errors. | 623 RuntimeError: On runtime errors. |
785 """ | 624 """ |
786 _ = self.allow_unknown_extension | 625 _ = self.allow_unknown_extension |
787 value = None | 626 value = None |
788 | 627 |
789 if field.type in (descriptor.FieldDescriptor.TYPE_INT32, | 628 if field.type in (descriptor.FieldDescriptor.TYPE_INT32, |
790 descriptor.FieldDescriptor.TYPE_SINT32, | 629 descriptor.FieldDescriptor.TYPE_SINT32, |
791 descriptor.FieldDescriptor.TYPE_SFIXED32): | 630 descriptor.FieldDescriptor.TYPE_SFIXED32): |
792 value = _ConsumeInt32(tokenizer) | 631 value = tokenizer.ConsumeInt32() |
793 elif field.type in (descriptor.FieldDescriptor.TYPE_INT64, | 632 elif field.type in (descriptor.FieldDescriptor.TYPE_INT64, |
794 descriptor.FieldDescriptor.TYPE_SINT64, | 633 descriptor.FieldDescriptor.TYPE_SINT64, |
795 descriptor.FieldDescriptor.TYPE_SFIXED64): | 634 descriptor.FieldDescriptor.TYPE_SFIXED64): |
796 value = _ConsumeInt64(tokenizer) | 635 value = tokenizer.ConsumeInt64() |
797 elif field.type in (descriptor.FieldDescriptor.TYPE_UINT32, | 636 elif field.type in (descriptor.FieldDescriptor.TYPE_UINT32, |
798 descriptor.FieldDescriptor.TYPE_FIXED32): | 637 descriptor.FieldDescriptor.TYPE_FIXED32): |
799 value = _ConsumeUint32(tokenizer) | 638 value = tokenizer.ConsumeUint32() |
800 elif field.type in (descriptor.FieldDescriptor.TYPE_UINT64, | 639 elif field.type in (descriptor.FieldDescriptor.TYPE_UINT64, |
801 descriptor.FieldDescriptor.TYPE_FIXED64): | 640 descriptor.FieldDescriptor.TYPE_FIXED64): |
802 value = _ConsumeUint64(tokenizer) | 641 value = tokenizer.ConsumeUint64() |
803 elif field.type in (descriptor.FieldDescriptor.TYPE_FLOAT, | 642 elif field.type in (descriptor.FieldDescriptor.TYPE_FLOAT, |
804 descriptor.FieldDescriptor.TYPE_DOUBLE): | 643 descriptor.FieldDescriptor.TYPE_DOUBLE): |
805 value = tokenizer.ConsumeFloat() | 644 value = tokenizer.ConsumeFloat() |
806 elif field.type == descriptor.FieldDescriptor.TYPE_BOOL: | 645 elif field.type == descriptor.FieldDescriptor.TYPE_BOOL: |
807 value = tokenizer.ConsumeBool() | 646 value = tokenizer.ConsumeBool() |
808 elif field.type == descriptor.FieldDescriptor.TYPE_STRING: | 647 elif field.type == descriptor.FieldDescriptor.TYPE_STRING: |
809 value = tokenizer.ConsumeString() | 648 value = tokenizer.ConsumeString() |
810 elif field.type == descriptor.FieldDescriptor.TYPE_BYTES: | 649 elif field.type == descriptor.FieldDescriptor.TYPE_BYTES: |
811 value = tokenizer.ConsumeByteString() | 650 value = tokenizer.ConsumeByteString() |
812 elif field.type == descriptor.FieldDescriptor.TYPE_ENUM: | 651 elif field.type == descriptor.FieldDescriptor.TYPE_ENUM: |
(...skipping 94 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
907 ParseError: In case an invalid field value is found. | 746 ParseError: In case an invalid field value is found. |
908 """ | 747 """ |
909 # String/bytes tokens can come in multiple adjacent string literals. | 748 # String/bytes tokens can come in multiple adjacent string literals. |
910 # If we can consume one, consume as many as we can. | 749 # If we can consume one, consume as many as we can. |
911 if tokenizer.TryConsumeByteString(): | 750 if tokenizer.TryConsumeByteString(): |
912 while tokenizer.TryConsumeByteString(): | 751 while tokenizer.TryConsumeByteString(): |
913 pass | 752 pass |
914 return | 753 return |
915 | 754 |
916 if (not tokenizer.TryConsumeIdentifier() and | 755 if (not tokenizer.TryConsumeIdentifier() and |
917 not _TryConsumeInt64(tokenizer) and not _TryConsumeUint64(tokenizer) and | 756 not tokenizer.TryConsumeInt64() and |
| 757 not tokenizer.TryConsumeUint64() and |
918 not tokenizer.TryConsumeFloat()): | 758 not tokenizer.TryConsumeFloat()): |
919 raise ParseError('Invalid field value: ' + tokenizer.token) | 759 raise ParseError('Invalid field value: ' + tokenizer.token) |
920 | 760 |
921 | 761 |
922 class Tokenizer(object): | 762 class _Tokenizer(object): |
923 """Protocol buffer text representation tokenizer. | 763 """Protocol buffer text representation tokenizer. |
924 | 764 |
925 This class handles the lower level string parsing by splitting it into | 765 This class handles the lower level string parsing by splitting it into |
926 meaningful tokens. | 766 meaningful tokens. |
927 | 767 |
928 It was directly ported from the Java protocol buffer API. | 768 It was directly ported from the Java protocol buffer API. |
929 """ | 769 """ |
930 | 770 |
931 _WHITESPACE = re.compile(r'\s+') | 771 _WHITESPACE = re.compile('(\\s|(#.*$))+', re.MULTILINE) |
932 _COMMENT = re.compile(r'(\s*#.*$)', re.MULTILINE) | |
933 _WHITESPACE_OR_COMMENT = re.compile(r'(\s|(#.*$))+', re.MULTILINE) | |
934 _TOKEN = re.compile('|'.join([ | 772 _TOKEN = re.compile('|'.join([ |
935 r'[a-zA-Z_][0-9a-zA-Z_+-]*', # an identifier | 773 r'[a-zA-Z_][0-9a-zA-Z_+-]*', # an identifier |
936 r'([0-9+-]|(\.[0-9]))[0-9a-zA-Z_.+-]*', # a number | 774 r'([0-9+-]|(\.[0-9]))[0-9a-zA-Z_.+-]*', # a number |
937 ] + [ # quoted str for each quote mark | 775 ] + [ # quoted str for each quote mark |
938 r'{qt}([^{qt}\n\\]|\\.)*({qt}|\\?$)'.format(qt=mark) for mark in _QUOTES | 776 r'{qt}([^{qt}\n\\]|\\.)*({qt}|\\?$)'.format(qt=mark) for mark in _QUOTES |
939 ])) | 777 ])) |
940 | 778 |
941 _IDENTIFIER = re.compile(r'[^\d\W]\w*') | 779 _IDENTIFIER = re.compile(r'\w+') |
942 _IDENTIFIER_OR_NUMBER = re.compile(r'\w+') | |
943 | 780 |
944 def __init__(self, lines, skip_comments=True): | 781 def __init__(self, lines): |
945 self._position = 0 | 782 self._position = 0 |
946 self._line = -1 | 783 self._line = -1 |
947 self._column = 0 | 784 self._column = 0 |
948 self._token_start = None | 785 self._token_start = None |
949 self.token = '' | 786 self.token = '' |
950 self._lines = iter(lines) | 787 self._lines = iter(lines) |
951 self._current_line = '' | 788 self._current_line = '' |
952 self._previous_line = 0 | 789 self._previous_line = 0 |
953 self._previous_column = 0 | 790 self._previous_column = 0 |
954 self._more_lines = True | 791 self._more_lines = True |
955 self._skip_comments = skip_comments | |
956 self._whitespace_pattern = (skip_comments and self._WHITESPACE_OR_COMMENT | |
957 or self._WHITESPACE) | |
958 self._SkipWhitespace() | 792 self._SkipWhitespace() |
959 self.NextToken() | 793 self.NextToken() |
960 | 794 |
961 def LookingAt(self, token): | 795 def LookingAt(self, token): |
962 return self.token == token | 796 return self.token == token |
963 | 797 |
964 def AtEnd(self): | 798 def AtEnd(self): |
965 """Checks the end of the text was reached. | 799 """Checks the end of the text was reached. |
966 | 800 |
967 Returns: | 801 Returns: |
968 True iff the end was reached. | 802 True iff the end was reached. |
969 """ | 803 """ |
970 return not self.token | 804 return not self.token |
971 | 805 |
972 def _PopLine(self): | 806 def _PopLine(self): |
973 while len(self._current_line) <= self._column: | 807 while len(self._current_line) <= self._column: |
974 try: | 808 try: |
975 self._current_line = next(self._lines) | 809 self._current_line = next(self._lines) |
976 except StopIteration: | 810 except StopIteration: |
977 self._current_line = '' | 811 self._current_line = '' |
978 self._more_lines = False | 812 self._more_lines = False |
979 return | 813 return |
980 else: | 814 else: |
981 self._line += 1 | 815 self._line += 1 |
982 self._column = 0 | 816 self._column = 0 |
983 | 817 |
984 def _SkipWhitespace(self): | 818 def _SkipWhitespace(self): |
985 while True: | 819 while True: |
986 self._PopLine() | 820 self._PopLine() |
987 match = self._whitespace_pattern.match(self._current_line, self._column) | 821 match = self._WHITESPACE.match(self._current_line, self._column) |
988 if not match: | 822 if not match: |
989 break | 823 break |
990 length = len(match.group(0)) | 824 length = len(match.group(0)) |
991 self._column += length | 825 self._column += length |
992 | 826 |
993 def TryConsume(self, token): | 827 def TryConsume(self, token): |
994 """Tries to consume a given piece of text. | 828 """Tries to consume a given piece of text. |
995 | 829 |
996 Args: | 830 Args: |
997 token: Text to consume. | 831 token: Text to consume. |
998 | 832 |
999 Returns: | 833 Returns: |
1000 True iff the text was consumed. | 834 True iff the text was consumed. |
1001 """ | 835 """ |
1002 if self.token == token: | 836 if self.token == token: |
1003 self.NextToken() | 837 self.NextToken() |
1004 return True | 838 return True |
1005 return False | 839 return False |
1006 | 840 |
1007 def Consume(self, token): | 841 def Consume(self, token): |
1008 """Consumes a piece of text. | 842 """Consumes a piece of text. |
1009 | 843 |
1010 Args: | 844 Args: |
1011 token: Text to consume. | 845 token: Text to consume. |
1012 | 846 |
1013 Raises: | 847 Raises: |
1014 ParseError: If the text couldn't be consumed. | 848 ParseError: If the text couldn't be consumed. |
1015 """ | 849 """ |
1016 if not self.TryConsume(token): | 850 if not self.TryConsume(token): |
1017 raise self.ParseError('Expected "%s".' % token) | 851 raise self._ParseError('Expected "%s".' % token) |
1018 | |
1019 def ConsumeComment(self): | |
1020 result = self.token | |
1021 if not self._COMMENT.match(result): | |
1022 raise self.ParseError('Expected comment.') | |
1023 self.NextToken() | |
1024 return result | |
1025 | 852 |
1026 def TryConsumeIdentifier(self): | 853 def TryConsumeIdentifier(self): |
1027 try: | 854 try: |
1028 self.ConsumeIdentifier() | 855 self.ConsumeIdentifier() |
1029 return True | 856 return True |
1030 except ParseError: | 857 except ParseError: |
1031 return False | 858 return False |
1032 | 859 |
1033 def ConsumeIdentifier(self): | 860 def ConsumeIdentifier(self): |
1034 """Consumes protocol message field identifier. | 861 """Consumes protocol message field identifier. |
1035 | 862 |
1036 Returns: | 863 Returns: |
1037 Identifier string. | 864 Identifier string. |
1038 | 865 |
1039 Raises: | 866 Raises: |
1040 ParseError: If an identifier couldn't be consumed. | 867 ParseError: If an identifier couldn't be consumed. |
1041 """ | 868 """ |
1042 result = self.token | 869 result = self.token |
1043 if not self._IDENTIFIER.match(result): | 870 if not self._IDENTIFIER.match(result): |
1044 raise self.ParseError('Expected identifier.') | 871 raise self._ParseError('Expected identifier.') |
1045 self.NextToken() | 872 self.NextToken() |
1046 return result | 873 return result |
1047 | 874 |
1048 def TryConsumeIdentifierOrNumber(self): | 875 def ConsumeInt32(self): |
| 876 """Consumes a signed 32bit integer number. |
| 877 |
| 878 Returns: |
| 879 The integer parsed. |
| 880 |
| 881 Raises: |
| 882 ParseError: If a signed 32bit integer couldn't be consumed. |
| 883 """ |
1049 try: | 884 try: |
1050 self.ConsumeIdentifierOrNumber() | 885 result = ParseInteger(self.token, is_signed=True, is_long=False) |
| 886 except ValueError as e: |
| 887 raise self._ParseError(str(e)) |
| 888 self.NextToken() |
| 889 return result |
| 890 |
| 891 def ConsumeUint32(self): |
| 892 """Consumes an unsigned 32bit integer number. |
| 893 |
| 894 Returns: |
| 895 The integer parsed. |
| 896 |
| 897 Raises: |
| 898 ParseError: If an unsigned 32bit integer couldn't be consumed. |
| 899 """ |
| 900 try: |
| 901 result = ParseInteger(self.token, is_signed=False, is_long=False) |
| 902 except ValueError as e: |
| 903 raise self._ParseError(str(e)) |
| 904 self.NextToken() |
| 905 return result |
| 906 |
| 907 def TryConsumeInt64(self): |
| 908 try: |
| 909 self.ConsumeInt64() |
1051 return True | 910 return True |
1052 except ParseError: | 911 except ParseError: |
1053 return False | 912 return False |
1054 | 913 |
1055 def ConsumeIdentifierOrNumber(self): | 914 def ConsumeInt64(self): |
1056 """Consumes protocol message field identifier. | 915 """Consumes a signed 64bit integer number. |
1057 | 916 |
1058 Returns: | 917 Returns: |
1059 Identifier string. | 918 The integer parsed. |
1060 | 919 |
1061 Raises: | 920 Raises: |
1062 ParseError: If an identifier couldn't be consumed. | 921 ParseError: If a signed 64bit integer couldn't be consumed. |
1063 """ | 922 """ |
1064 result = self.token | 923 try: |
1065 if not self._IDENTIFIER_OR_NUMBER.match(result): | 924 result = ParseInteger(self.token, is_signed=True, is_long=True) |
1066 raise self.ParseError('Expected identifier or number.') | 925 except ValueError as e: |
| 926 raise self._ParseError(str(e)) |
1067 self.NextToken() | 927 self.NextToken() |
1068 return result | 928 return result |
1069 | 929 |
1070 def TryConsumeInteger(self): | 930 def TryConsumeUint64(self): |
1071 try: | 931 try: |
1072 # Note: is_long only affects value type, not whether an error is raised. | 932 self.ConsumeUint64() |
1073 self.ConsumeInteger() | |
1074 return True | 933 return True |
1075 except ParseError: | 934 except ParseError: |
1076 return False | 935 return False |
1077 | 936 |
1078 def ConsumeInteger(self, is_long=False): | 937 def ConsumeUint64(self): |
1079 """Consumes an integer number. | 938 """Consumes an unsigned 64bit integer number. |
1080 | 939 |
1081 Args: | |
1082 is_long: True if the value should be returned as a long integer. | |
1083 Returns: | 940 Returns: |
1084 The integer parsed. | 941 The integer parsed. |
1085 | 942 |
1086 Raises: | 943 Raises: |
1087 ParseError: If an integer couldn't be consumed. | 944 ParseError: If an unsigned 64bit integer couldn't be consumed. |
1088 """ | 945 """ |
1089 try: | 946 try: |
1090 result = _ParseAbstractInteger(self.token, is_long=is_long) | 947 result = ParseInteger(self.token, is_signed=False, is_long=True) |
1091 except ValueError as e: | 948 except ValueError as e: |
1092 raise self.ParseError(str(e)) | 949 raise self._ParseError(str(e)) |
1093 self.NextToken() | 950 self.NextToken() |
1094 return result | 951 return result |
1095 | 952 |
1096 def TryConsumeFloat(self): | 953 def TryConsumeFloat(self): |
1097 try: | 954 try: |
1098 self.ConsumeFloat() | 955 self.ConsumeFloat() |
1099 return True | 956 return True |
1100 except ParseError: | 957 except ParseError: |
1101 return False | 958 return False |
1102 | 959 |
1103 def ConsumeFloat(self): | 960 def ConsumeFloat(self): |
1104 """Consumes an floating point number. | 961 """Consumes an floating point number. |
1105 | 962 |
1106 Returns: | 963 Returns: |
1107 The number parsed. | 964 The number parsed. |
1108 | 965 |
1109 Raises: | 966 Raises: |
1110 ParseError: If a floating point number couldn't be consumed. | 967 ParseError: If a floating point number couldn't be consumed. |
1111 """ | 968 """ |
1112 try: | 969 try: |
1113 result = ParseFloat(self.token) | 970 result = ParseFloat(self.token) |
1114 except ValueError as e: | 971 except ValueError as e: |
1115 raise self.ParseError(str(e)) | 972 raise self._ParseError(str(e)) |
1116 self.NextToken() | 973 self.NextToken() |
1117 return result | 974 return result |
1118 | 975 |
1119 def ConsumeBool(self): | 976 def ConsumeBool(self): |
1120 """Consumes a boolean value. | 977 """Consumes a boolean value. |
1121 | 978 |
1122 Returns: | 979 Returns: |
1123 The bool parsed. | 980 The bool parsed. |
1124 | 981 |
1125 Raises: | 982 Raises: |
1126 ParseError: If a boolean value couldn't be consumed. | 983 ParseError: If a boolean value couldn't be consumed. |
1127 """ | 984 """ |
1128 try: | 985 try: |
1129 result = ParseBool(self.token) | 986 result = ParseBool(self.token) |
1130 except ValueError as e: | 987 except ValueError as e: |
1131 raise self.ParseError(str(e)) | 988 raise self._ParseError(str(e)) |
1132 self.NextToken() | 989 self.NextToken() |
1133 return result | 990 return result |
1134 | 991 |
1135 def TryConsumeByteString(self): | 992 def TryConsumeByteString(self): |
1136 try: | 993 try: |
1137 self.ConsumeByteString() | 994 self.ConsumeByteString() |
1138 return True | 995 return True |
1139 except ParseError: | 996 except ParseError: |
1140 return False | 997 return False |
1141 | 998 |
(...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1175 tokens which are automatically concatenated, like in C or Python. This | 1032 tokens which are automatically concatenated, like in C or Python. This |
1176 method only consumes one token. | 1033 method only consumes one token. |
1177 | 1034 |
1178 Returns: | 1035 Returns: |
1179 The token parsed. | 1036 The token parsed. |
1180 Raises: | 1037 Raises: |
1181 ParseError: When the wrong format data is found. | 1038 ParseError: When the wrong format data is found. |
1182 """ | 1039 """ |
1183 text = self.token | 1040 text = self.token |
1184 if len(text) < 1 or text[0] not in _QUOTES: | 1041 if len(text) < 1 or text[0] not in _QUOTES: |
1185 raise self.ParseError('Expected string but found: %r' % (text,)) | 1042 raise self._ParseError('Expected string but found: %r' % (text,)) |
1186 | 1043 |
1187 if len(text) < 2 or text[-1] != text[0]: | 1044 if len(text) < 2 or text[-1] != text[0]: |
1188 raise self.ParseError('String missing ending quote: %r' % (text,)) | 1045 raise self._ParseError('String missing ending quote: %r' % (text,)) |
1189 | 1046 |
1190 try: | 1047 try: |
1191 result = text_encoding.CUnescape(text[1:-1]) | 1048 result = text_encoding.CUnescape(text[1:-1]) |
1192 except ValueError as e: | 1049 except ValueError as e: |
1193 raise self.ParseError(str(e)) | 1050 raise self._ParseError(str(e)) |
1194 self.NextToken() | 1051 self.NextToken() |
1195 return result | 1052 return result |
1196 | 1053 |
1197 def ConsumeEnum(self, field): | 1054 def ConsumeEnum(self, field): |
1198 try: | 1055 try: |
1199 result = ParseEnum(field, self.token) | 1056 result = ParseEnum(field, self.token) |
1200 except ValueError as e: | 1057 except ValueError as e: |
1201 raise self.ParseError(str(e)) | 1058 raise self._ParseError(str(e)) |
1202 self.NextToken() | 1059 self.NextToken() |
1203 return result | 1060 return result |
1204 | 1061 |
1205 def ParseErrorPreviousToken(self, message): | 1062 def ParseErrorPreviousToken(self, message): |
1206 """Creates and *returns* a ParseError for the previously read token. | 1063 """Creates and *returns* a ParseError for the previously read token. |
1207 | 1064 |
1208 Args: | 1065 Args: |
1209 message: A message to set for the exception. | 1066 message: A message to set for the exception. |
1210 | 1067 |
1211 Returns: | 1068 Returns: |
1212 A ParseError instance. | 1069 A ParseError instance. |
1213 """ | 1070 """ |
1214 return ParseError(message, self._previous_line + 1, | 1071 return ParseError('%d:%d : %s' % ( |
1215 self._previous_column + 1) | 1072 self._previous_line + 1, self._previous_column + 1, message)) |
1216 | 1073 |
1217 def ParseError(self, message): | 1074 def _ParseError(self, message): |
1218 """Creates and *returns* a ParseError for the current token.""" | 1075 """Creates and *returns* a ParseError for the current token.""" |
1219 return ParseError(message, self._line + 1, self._column + 1) | 1076 return ParseError('%d:%d : %s' % ( |
| 1077 self._line + 1, self._column + 1, message)) |
1220 | 1078 |
1221 def _StringParseError(self, e): | 1079 def _StringParseError(self, e): |
1222 return self.ParseError('Couldn\'t parse string: ' + str(e)) | 1080 return self._ParseError('Couldn\'t parse string: ' + str(e)) |
1223 | 1081 |
1224 def NextToken(self): | 1082 def NextToken(self): |
1225 """Reads the next meaningful token.""" | 1083 """Reads the next meaningful token.""" |
1226 self._previous_line = self._line | 1084 self._previous_line = self._line |
1227 self._previous_column = self._column | 1085 self._previous_column = self._column |
1228 | 1086 |
1229 self._column += len(self.token) | 1087 self._column += len(self.token) |
1230 self._SkipWhitespace() | 1088 self._SkipWhitespace() |
1231 | 1089 |
1232 if not self._more_lines: | 1090 if not self._more_lines: |
1233 self.token = '' | 1091 self.token = '' |
1234 return | 1092 return |
1235 | 1093 |
1236 match = self._TOKEN.match(self._current_line, self._column) | 1094 match = self._TOKEN.match(self._current_line, self._column) |
1237 if not match and not self._skip_comments: | |
1238 match = self._COMMENT.match(self._current_line, self._column) | |
1239 if match: | 1095 if match: |
1240 token = match.group(0) | 1096 token = match.group(0) |
1241 self.token = token | 1097 self.token = token |
1242 else: | 1098 else: |
1243 self.token = self._current_line[self._column] | 1099 self.token = self._current_line[self._column] |
1244 | 1100 |
1245 # Aliased so it can still be accessed by current visibility violators. | |
1246 # TODO(dbarnett): Migrate violators to textformat_tokenizer. | |
1247 _Tokenizer = Tokenizer # pylint: disable=invalid-name | |
1248 | |
1249 | |
1250 def _ConsumeInt32(tokenizer): | |
1251 """Consumes a signed 32bit integer number from tokenizer. | |
1252 | |
1253 Args: | |
1254 tokenizer: A tokenizer used to parse the number. | |
1255 | |
1256 Returns: | |
1257 The integer parsed. | |
1258 | |
1259 Raises: | |
1260 ParseError: If a signed 32bit integer couldn't be consumed. | |
1261 """ | |
1262 return _ConsumeInteger(tokenizer, is_signed=True, is_long=False) | |
1263 | |
1264 | |
1265 def _ConsumeUint32(tokenizer): | |
1266 """Consumes an unsigned 32bit integer number from tokenizer. | |
1267 | |
1268 Args: | |
1269 tokenizer: A tokenizer used to parse the number. | |
1270 | |
1271 Returns: | |
1272 The integer parsed. | |
1273 | |
1274 Raises: | |
1275 ParseError: If an unsigned 32bit integer couldn't be consumed. | |
1276 """ | |
1277 return _ConsumeInteger(tokenizer, is_signed=False, is_long=False) | |
1278 | |
1279 | |
1280 def _TryConsumeInt64(tokenizer): | |
1281 try: | |
1282 _ConsumeInt64(tokenizer) | |
1283 return True | |
1284 except ParseError: | |
1285 return False | |
1286 | |
1287 | |
1288 def _ConsumeInt64(tokenizer): | |
1289 """Consumes a signed 32bit integer number from tokenizer. | |
1290 | |
1291 Args: | |
1292 tokenizer: A tokenizer used to parse the number. | |
1293 | |
1294 Returns: | |
1295 The integer parsed. | |
1296 | |
1297 Raises: | |
1298 ParseError: If a signed 32bit integer couldn't be consumed. | |
1299 """ | |
1300 return _ConsumeInteger(tokenizer, is_signed=True, is_long=True) | |
1301 | |
1302 | |
1303 def _TryConsumeUint64(tokenizer): | |
1304 try: | |
1305 _ConsumeUint64(tokenizer) | |
1306 return True | |
1307 except ParseError: | |
1308 return False | |
1309 | |
1310 | |
1311 def _ConsumeUint64(tokenizer): | |
1312 """Consumes an unsigned 64bit integer number from tokenizer. | |
1313 | |
1314 Args: | |
1315 tokenizer: A tokenizer used to parse the number. | |
1316 | |
1317 Returns: | |
1318 The integer parsed. | |
1319 | |
1320 Raises: | |
1321 ParseError: If an unsigned 64bit integer couldn't be consumed. | |
1322 """ | |
1323 return _ConsumeInteger(tokenizer, is_signed=False, is_long=True) | |
1324 | |
1325 | |
1326 def _TryConsumeInteger(tokenizer, is_signed=False, is_long=False): | |
1327 try: | |
1328 _ConsumeInteger(tokenizer, is_signed=is_signed, is_long=is_long) | |
1329 return True | |
1330 except ParseError: | |
1331 return False | |
1332 | |
1333 | |
1334 def _ConsumeInteger(tokenizer, is_signed=False, is_long=False): | |
1335 """Consumes an integer number from tokenizer. | |
1336 | |
1337 Args: | |
1338 tokenizer: A tokenizer used to parse the number. | |
1339 is_signed: True if a signed integer must be parsed. | |
1340 is_long: True if a long integer must be parsed. | |
1341 | |
1342 Returns: | |
1343 The integer parsed. | |
1344 | |
1345 Raises: | |
1346 ParseError: If an integer with given characteristics couldn't be consumed. | |
1347 """ | |
1348 try: | |
1349 result = ParseInteger(tokenizer.token, is_signed=is_signed, is_long=is_long) | |
1350 except ValueError as e: | |
1351 raise tokenizer.ParseError(str(e)) | |
1352 tokenizer.NextToken() | |
1353 return result | |
1354 | |
1355 | 1101 |
1356 def ParseInteger(text, is_signed=False, is_long=False): | 1102 def ParseInteger(text, is_signed=False, is_long=False): |
1357 """Parses an integer. | 1103 """Parses an integer. |
1358 | 1104 |
1359 Args: | 1105 Args: |
1360 text: The text to parse. | 1106 text: The text to parse. |
1361 is_signed: True if a signed integer must be parsed. | 1107 is_signed: True if a signed integer must be parsed. |
1362 is_long: True if a long integer must be parsed. | 1108 is_long: True if a long integer must be parsed. |
1363 | 1109 |
1364 Returns: | 1110 Returns: |
1365 The integer value. | 1111 The integer value. |
1366 | 1112 |
1367 Raises: | 1113 Raises: |
1368 ValueError: Thrown Iff the text is not a valid integer. | 1114 ValueError: Thrown Iff the text is not a valid integer. |
1369 """ | 1115 """ |
1370 # Do the actual parsing. Exception handling is propagated to caller. | 1116 # Do the actual parsing. Exception handling is propagated to caller. |
1371 result = _ParseAbstractInteger(text, is_long=is_long) | |
1372 | |
1373 # Check if the integer is sane. Exceptions handled by callers. | |
1374 checker = _INTEGER_CHECKERS[2 * int(is_long) + int(is_signed)] | |
1375 checker.CheckValue(result) | |
1376 return result | |
1377 | |
1378 | |
1379 def _ParseAbstractInteger(text, is_long=False): | |
1380 """Parses an integer without checking size/signedness. | |
1381 | |
1382 Args: | |
1383 text: The text to parse. | |
1384 is_long: True if the value should be returned as a long integer. | |
1385 | |
1386 Returns: | |
1387 The integer value. | |
1388 | |
1389 Raises: | |
1390 ValueError: Thrown Iff the text is not a valid integer. | |
1391 """ | |
1392 # Do the actual parsing. Exception handling is propagated to caller. | |
1393 try: | 1117 try: |
1394 # We force 32-bit values to int and 64-bit values to long to make | 1118 # We force 32-bit values to int and 64-bit values to long to make |
1395 # alternate implementations where the distinction is more significant | 1119 # alternate implementations where the distinction is more significant |
1396 # (e.g. the C++ implementation) simpler. | 1120 # (e.g. the C++ implementation) simpler. |
1397 if is_long: | 1121 if is_long: |
1398 return long(text, 0) | 1122 result = long(text, 0) |
1399 else: | 1123 else: |
1400 return int(text, 0) | 1124 result = int(text, 0) |
1401 except ValueError: | 1125 except ValueError: |
1402 raise ValueError('Couldn\'t parse integer: %s' % text) | 1126 raise ValueError('Couldn\'t parse integer: %s' % text) |
1403 | 1127 |
| 1128 # Check if the integer is sane. Exceptions handled by callers. |
| 1129 checker = _INTEGER_CHECKERS[2 * int(is_long) + int(is_signed)] |
| 1130 checker.CheckValue(result) |
| 1131 return result |
| 1132 |
1404 | 1133 |
1405 def ParseFloat(text): | 1134 def ParseFloat(text): |
1406 """Parse a floating point number. | 1135 """Parse a floating point number. |
1407 | 1136 |
1408 Args: | 1137 Args: |
1409 text: Text to parse. | 1138 text: Text to parse. |
1410 | 1139 |
1411 Returns: | 1140 Returns: |
1412 The number parsed. | 1141 The number parsed. |
1413 | 1142 |
(...skipping 25 matching lines...) Expand all Loading... |
1439 | 1168 |
1440 Args: | 1169 Args: |
1441 text: Text to parse. | 1170 text: Text to parse. |
1442 | 1171 |
1443 Returns: | 1172 Returns: |
1444 Boolean values parsed | 1173 Boolean values parsed |
1445 | 1174 |
1446 Raises: | 1175 Raises: |
1447 ValueError: If text is not a valid boolean. | 1176 ValueError: If text is not a valid boolean. |
1448 """ | 1177 """ |
1449 if text in ('true', 't', '1', 'True'): | 1178 if text in ('true', 't', '1'): |
1450 return True | 1179 return True |
1451 elif text in ('false', 'f', '0', 'False'): | 1180 elif text in ('false', 'f', '0'): |
1452 return False | 1181 return False |
1453 else: | 1182 else: |
1454 raise ValueError('Expected "true" or "false".') | 1183 raise ValueError('Expected "true" or "false".') |
1455 | 1184 |
1456 | 1185 |
1457 def ParseEnum(field, value): | 1186 def ParseEnum(field, value): |
1458 """Parse an enum value. | 1187 """Parse an enum value. |
1459 | 1188 |
1460 The value can be specified by a number (the enum value), or by | 1189 The value can be specified by a number (the enum value), or by |
1461 a string literal (the enum name). | 1190 a string literal (the enum name). |
1462 | 1191 |
1463 Args: | 1192 Args: |
1464 field: Enum field descriptor. | 1193 field: Enum field descriptor. |
1465 value: String value. | 1194 value: String value. |
1466 | 1195 |
1467 Returns: | 1196 Returns: |
1468 Enum value number. | 1197 Enum value number. |
1469 | 1198 |
1470 Raises: | 1199 Raises: |
1471 ValueError: If the enum value could not be parsed. | 1200 ValueError: If the enum value could not be parsed. |
1472 """ | 1201 """ |
1473 enum_descriptor = field.enum_type | 1202 enum_descriptor = field.enum_type |
1474 try: | 1203 try: |
1475 number = int(value, 0) | 1204 number = int(value, 0) |
1476 except ValueError: | 1205 except ValueError: |
1477 # Identifier. | 1206 # Identifier. |
1478 enum_value = enum_descriptor.values_by_name.get(value, None) | 1207 enum_value = enum_descriptor.values_by_name.get(value, None) |
1479 if enum_value is None: | 1208 if enum_value is None: |
1480 raise ValueError('Enum type "%s" has no value named %s.' % | 1209 raise ValueError( |
1481 (enum_descriptor.full_name, value)) | 1210 'Enum type "%s" has no value named %s.' % ( |
| 1211 enum_descriptor.full_name, value)) |
1482 else: | 1212 else: |
1483 # Numeric value. | 1213 # Numeric value. |
1484 enum_value = enum_descriptor.values_by_number.get(number, None) | 1214 enum_value = enum_descriptor.values_by_number.get(number, None) |
1485 if enum_value is None: | 1215 if enum_value is None: |
1486 raise ValueError('Enum type "%s" has no value with number %d.' % | 1216 raise ValueError( |
1487 (enum_descriptor.full_name, number)) | 1217 'Enum type "%s" has no value with number %d.' % ( |
| 1218 enum_descriptor.full_name, number)) |
1488 return enum_value.number | 1219 return enum_value.number |
OLD | NEW |