OLD | NEW |
1 # Protocol Buffers - Google's data interchange format | 1 # Protocol Buffers - Google's data interchange format |
2 # Copyright 2008 Google Inc. All rights reserved. | 2 # Copyright 2008 Google Inc. All rights reserved. |
3 # https://developers.google.com/protocol-buffers/ | 3 # https://developers.google.com/protocol-buffers/ |
4 # | 4 # |
5 # Redistribution and use in source and binary forms, with or without | 5 # Redistribution and use in source and binary forms, with or without |
6 # modification, are permitted provided that the following conditions are | 6 # modification, are permitted provided that the following conditions are |
7 # met: | 7 # met: |
8 # | 8 # |
9 # * Redistributions of source code must retain the above copyright | 9 # * Redistributions of source code must retain the above copyright |
10 # notice, this list of conditions and the following disclaimer. | 10 # notice, this list of conditions and the following disclaimer. |
(...skipping 30 matching lines...) Expand all Loading... |
41 """ | 41 """ |
42 | 42 |
43 __author__ = 'kenton@google.com (Kenton Varda)' | 43 __author__ = 'kenton@google.com (Kenton Varda)' |
44 | 44 |
45 import io | 45 import io |
46 import re | 46 import re |
47 | 47 |
48 import six | 48 import six |
49 | 49 |
50 if six.PY3: | 50 if six.PY3: |
51 long = int | 51 long = int # pylint: disable=redefined-builtin,invalid-name |
52 | 52 |
| 53 # pylint: disable=g-import-not-at-top |
53 from google.protobuf.internal import type_checkers | 54 from google.protobuf.internal import type_checkers |
54 from google.protobuf import descriptor | 55 from google.protobuf import descriptor |
55 from google.protobuf import text_encoding | 56 from google.protobuf import text_encoding |
56 | 57 |
57 __all__ = ['MessageToString', 'PrintMessage', 'PrintField', | 58 __all__ = ['MessageToString', 'PrintMessage', 'PrintField', 'PrintFieldValue', |
58 'PrintFieldValue', 'Merge'] | 59 'Merge'] |
59 | |
60 | 60 |
61 _INTEGER_CHECKERS = (type_checkers.Uint32ValueChecker(), | 61 _INTEGER_CHECKERS = (type_checkers.Uint32ValueChecker(), |
62 type_checkers.Int32ValueChecker(), | 62 type_checkers.Int32ValueChecker(), |
63 type_checkers.Uint64ValueChecker(), | 63 type_checkers.Uint64ValueChecker(), |
64 type_checkers.Int64ValueChecker()) | 64 type_checkers.Int64ValueChecker()) |
65 _FLOAT_INFINITY = re.compile('-?inf(?:inity)?f?', re.IGNORECASE) | 65 _FLOAT_INFINITY = re.compile('-?inf(?:inity)?f?', re.IGNORECASE) |
66 _FLOAT_NAN = re.compile('nanf?', re.IGNORECASE) | 66 _FLOAT_NAN = re.compile('nanf?', re.IGNORECASE) |
67 _FLOAT_TYPES = frozenset([descriptor.FieldDescriptor.CPPTYPE_FLOAT, | 67 _FLOAT_TYPES = frozenset([descriptor.FieldDescriptor.CPPTYPE_FLOAT, |
68 descriptor.FieldDescriptor.CPPTYPE_DOUBLE]) | 68 descriptor.FieldDescriptor.CPPTYPE_DOUBLE]) |
69 _QUOTES = frozenset(("'", '"')) | 69 _QUOTES = frozenset(("'", '"')) |
| 70 _ANY_FULL_TYPE_NAME = 'google.protobuf.Any' |
70 | 71 |
71 | 72 |
72 class Error(Exception): | 73 class Error(Exception): |
73 """Top-level module error for text_format.""" | 74 """Top-level module error for text_format.""" |
74 | 75 |
75 | 76 |
76 class ParseError(Error): | 77 class ParseError(Error): |
77 """Thrown in case of text parsing error.""" | 78 """Thrown in case of text parsing or tokenizing error.""" |
| 79 |
| 80 def __init__(self, message=None, line=None, column=None): |
| 81 if message is not None and line is not None: |
| 82 loc = str(line) |
| 83 if column is not None: |
| 84 loc += ':{0}'.format(column) |
| 85 message = '{0} : {1}'.format(loc, message) |
| 86 if message is not None: |
| 87 super(ParseError, self).__init__(message) |
| 88 else: |
| 89 super(ParseError, self).__init__() |
| 90 self._line = line |
| 91 self._column = column |
| 92 |
| 93 def GetLine(self): |
| 94 return self._line |
| 95 |
| 96 def GetColumn(self): |
| 97 return self._column |
78 | 98 |
79 | 99 |
80 class TextWriter(object): | 100 class TextWriter(object): |
| 101 |
81 def __init__(self, as_utf8): | 102 def __init__(self, as_utf8): |
82 if six.PY2: | 103 if six.PY2: |
83 self._writer = io.BytesIO() | 104 self._writer = io.BytesIO() |
84 else: | 105 else: |
85 self._writer = io.StringIO() | 106 self._writer = io.StringIO() |
86 | 107 |
87 def write(self, val): | 108 def write(self, val): |
88 if six.PY2: | 109 if six.PY2: |
89 if isinstance(val, six.text_type): | 110 if isinstance(val, six.text_type): |
90 val = val.encode('utf-8') | 111 val = val.encode('utf-8') |
91 return self._writer.write(val) | 112 return self._writer.write(val) |
92 | 113 |
93 def close(self): | 114 def close(self): |
94 return self._writer.close() | 115 return self._writer.close() |
95 | 116 |
96 def getvalue(self): | 117 def getvalue(self): |
97 return self._writer.getvalue() | 118 return self._writer.getvalue() |
98 | 119 |
99 | 120 |
100 def MessageToString(message, as_utf8=False, as_one_line=False, | 121 def MessageToString(message, |
101 pointy_brackets=False, use_index_order=False, | 122 as_utf8=False, |
102 float_format=None, use_field_number=False): | 123 as_one_line=False, |
| 124 pointy_brackets=False, |
| 125 use_index_order=False, |
| 126 float_format=None, |
| 127 use_field_number=False, |
| 128 descriptor_pool=None, |
| 129 indent=0): |
103 """Convert protobuf message to text format. | 130 """Convert protobuf message to text format. |
104 | 131 |
105 Floating point values can be formatted compactly with 15 digits of | 132 Floating point values can be formatted compactly with 15 digits of |
106 precision (which is the most that IEEE 754 "double" can guarantee) | 133 precision (which is the most that IEEE 754 "double" can guarantee) |
107 using float_format='.15g'. To ensure that converting to text and back to a | 134 using float_format='.15g'. To ensure that converting to text and back to a |
108 proto will result in an identical value, float_format='.17g' should be used. | 135 proto will result in an identical value, float_format='.17g' should be used. |
109 | 136 |
110 Args: | 137 Args: |
111 message: The protocol buffers message. | 138 message: The protocol buffers message. |
112 as_utf8: Produce text output in UTF8 format. | 139 as_utf8: Produce text output in UTF8 format. |
113 as_one_line: Don't introduce newlines between fields. | 140 as_one_line: Don't introduce newlines between fields. |
114 pointy_brackets: If True, use angle brackets instead of curly braces for | 141 pointy_brackets: If True, use angle brackets instead of curly braces for |
115 nesting. | 142 nesting. |
116 use_index_order: If True, print fields of a proto message using the order | 143 use_index_order: If True, print fields of a proto message using the order |
117 defined in source code instead of the field number. By default, use the | 144 defined in source code instead of the field number. By default, use the |
118 field number order. | 145 field number order. |
119 float_format: If set, use this to specify floating point number formatting | 146 float_format: If set, use this to specify floating point number formatting |
120 (per the "Format Specification Mini-Language"); otherwise, str() is used. | 147 (per the "Format Specification Mini-Language"); otherwise, str() is used. |
121 use_field_number: If True, print field numbers instead of names. | 148 use_field_number: If True, print field numbers instead of names. |
| 149 descriptor_pool: A DescriptorPool used to resolve Any types. |
| 150 indent: The indent level, in terms of spaces, for pretty print. |
122 | 151 |
123 Returns: | 152 Returns: |
124 A string of the text formatted protocol buffer message. | 153 A string of the text formatted protocol buffer message. |
125 """ | 154 """ |
126 out = TextWriter(as_utf8) | 155 out = TextWriter(as_utf8) |
127 printer = _Printer(out, 0, as_utf8, as_one_line, | 156 printer = _Printer(out, indent, as_utf8, as_one_line, pointy_brackets, |
128 pointy_brackets, use_index_order, float_format, | 157 use_index_order, float_format, use_field_number, |
129 use_field_number) | 158 descriptor_pool) |
130 printer.PrintMessage(message) | 159 printer.PrintMessage(message) |
131 result = out.getvalue() | 160 result = out.getvalue() |
132 out.close() | 161 out.close() |
133 if as_one_line: | 162 if as_one_line: |
134 return result.rstrip() | 163 return result.rstrip() |
135 return result | 164 return result |
136 | 165 |
137 | 166 |
138 def _IsMapEntry(field): | 167 def _IsMapEntry(field): |
139 return (field.type == descriptor.FieldDescriptor.TYPE_MESSAGE and | 168 return (field.type == descriptor.FieldDescriptor.TYPE_MESSAGE and |
140 field.message_type.has_options and | 169 field.message_type.has_options and |
141 field.message_type.GetOptions().map_entry) | 170 field.message_type.GetOptions().map_entry) |
142 | 171 |
143 | 172 |
144 def PrintMessage(message, out, indent=0, as_utf8=False, as_one_line=False, | 173 def PrintMessage(message, |
145 pointy_brackets=False, use_index_order=False, | 174 out, |
146 float_format=None, use_field_number=False): | 175 indent=0, |
147 printer = _Printer(out, indent, as_utf8, as_one_line, | 176 as_utf8=False, |
148 pointy_brackets, use_index_order, float_format, | 177 as_one_line=False, |
149 use_field_number) | 178 pointy_brackets=False, |
| 179 use_index_order=False, |
| 180 float_format=None, |
| 181 use_field_number=False, |
| 182 descriptor_pool=None): |
| 183 printer = _Printer(out, indent, as_utf8, as_one_line, pointy_brackets, |
| 184 use_index_order, float_format, use_field_number, |
| 185 descriptor_pool) |
150 printer.PrintMessage(message) | 186 printer.PrintMessage(message) |
151 | 187 |
152 | 188 |
153 def PrintField(field, value, out, indent=0, as_utf8=False, as_one_line=False, | 189 def PrintField(field, |
154 pointy_brackets=False, use_index_order=False, float_format=None): | 190 value, |
| 191 out, |
| 192 indent=0, |
| 193 as_utf8=False, |
| 194 as_one_line=False, |
| 195 pointy_brackets=False, |
| 196 use_index_order=False, |
| 197 float_format=None): |
155 """Print a single field name/value pair.""" | 198 """Print a single field name/value pair.""" |
156 printer = _Printer(out, indent, as_utf8, as_one_line, | 199 printer = _Printer(out, indent, as_utf8, as_one_line, pointy_brackets, |
157 pointy_brackets, use_index_order, float_format) | 200 use_index_order, float_format) |
158 printer.PrintField(field, value) | 201 printer.PrintField(field, value) |
159 | 202 |
160 | 203 |
161 def PrintFieldValue(field, value, out, indent=0, as_utf8=False, | 204 def PrintFieldValue(field, |
162 as_one_line=False, pointy_brackets=False, | 205 value, |
| 206 out, |
| 207 indent=0, |
| 208 as_utf8=False, |
| 209 as_one_line=False, |
| 210 pointy_brackets=False, |
163 use_index_order=False, | 211 use_index_order=False, |
164 float_format=None): | 212 float_format=None): |
165 """Print a single field value (not including name).""" | 213 """Print a single field value (not including name).""" |
166 printer = _Printer(out, indent, as_utf8, as_one_line, | 214 printer = _Printer(out, indent, as_utf8, as_one_line, pointy_brackets, |
167 pointy_brackets, use_index_order, float_format) | 215 use_index_order, float_format) |
168 printer.PrintFieldValue(field, value) | 216 printer.PrintFieldValue(field, value) |
169 | 217 |
170 | 218 |
| 219 def _BuildMessageFromTypeName(type_name, descriptor_pool): |
| 220 """Returns a protobuf message instance. |
| 221 |
| 222 Args: |
| 223 type_name: Fully-qualified protobuf message type name string. |
| 224 descriptor_pool: DescriptorPool instance. |
| 225 |
| 226 Returns: |
| 227 A Message instance of type matching type_name, or None if the a Descriptor |
| 228 wasn't found matching type_name. |
| 229 """ |
| 230 # pylint: disable=g-import-not-at-top |
| 231 from google.protobuf import symbol_database |
| 232 database = symbol_database.Default() |
| 233 try: |
| 234 message_descriptor = descriptor_pool.FindMessageTypeByName(type_name) |
| 235 except KeyError: |
| 236 return None |
| 237 message_type = database.GetPrototype(message_descriptor) |
| 238 return message_type() |
| 239 |
| 240 |
171 class _Printer(object): | 241 class _Printer(object): |
172 """Text format printer for protocol message.""" | 242 """Text format printer for protocol message.""" |
173 | 243 |
174 def __init__(self, out, indent=0, as_utf8=False, as_one_line=False, | 244 def __init__(self, |
175 pointy_brackets=False, use_index_order=False, float_format=None, | 245 out, |
176 use_field_number=False): | 246 indent=0, |
| 247 as_utf8=False, |
| 248 as_one_line=False, |
| 249 pointy_brackets=False, |
| 250 use_index_order=False, |
| 251 float_format=None, |
| 252 use_field_number=False, |
| 253 descriptor_pool=None): |
177 """Initialize the Printer. | 254 """Initialize the Printer. |
178 | 255 |
179 Floating point values can be formatted compactly with 15 digits of | 256 Floating point values can be formatted compactly with 15 digits of |
180 precision (which is the most that IEEE 754 "double" can guarantee) | 257 precision (which is the most that IEEE 754 "double" can guarantee) |
181 using float_format='.15g'. To ensure that converting to text and back to a | 258 using float_format='.15g'. To ensure that converting to text and back to a |
182 proto will result in an identical value, float_format='.17g' should be used. | 259 proto will result in an identical value, float_format='.17g' should be used. |
183 | 260 |
184 Args: | 261 Args: |
185 out: To record the text format result. | 262 out: To record the text format result. |
186 indent: The indent level for pretty print. | 263 indent: The indent level for pretty print. |
187 as_utf8: Produce text output in UTF8 format. | 264 as_utf8: Produce text output in UTF8 format. |
188 as_one_line: Don't introduce newlines between fields. | 265 as_one_line: Don't introduce newlines between fields. |
189 pointy_brackets: If True, use angle brackets instead of curly braces for | 266 pointy_brackets: If True, use angle brackets instead of curly braces for |
190 nesting. | 267 nesting. |
191 use_index_order: If True, print fields of a proto message using the order | 268 use_index_order: If True, print fields of a proto message using the order |
192 defined in source code instead of the field number. By default, use the | 269 defined in source code instead of the field number. By default, use the |
193 field number order. | 270 field number order. |
194 float_format: If set, use this to specify floating point number formatting | 271 float_format: If set, use this to specify floating point number formatting |
195 (per the "Format Specification Mini-Language"); otherwise, str() is | 272 (per the "Format Specification Mini-Language"); otherwise, str() is |
196 used. | 273 used. |
197 use_field_number: If True, print field numbers instead of names. | 274 use_field_number: If True, print field numbers instead of names. |
| 275 descriptor_pool: A DescriptorPool used to resolve Any types. |
198 """ | 276 """ |
199 self.out = out | 277 self.out = out |
200 self.indent = indent | 278 self.indent = indent |
201 self.as_utf8 = as_utf8 | 279 self.as_utf8 = as_utf8 |
202 self.as_one_line = as_one_line | 280 self.as_one_line = as_one_line |
203 self.pointy_brackets = pointy_brackets | 281 self.pointy_brackets = pointy_brackets |
204 self.use_index_order = use_index_order | 282 self.use_index_order = use_index_order |
205 self.float_format = float_format | 283 self.float_format = float_format |
206 self.use_field_number = use_field_number | 284 self.use_field_number = use_field_number |
| 285 self.descriptor_pool = descriptor_pool |
| 286 |
| 287 def _TryPrintAsAnyMessage(self, message): |
| 288 """Serializes if message is a google.protobuf.Any field.""" |
| 289 packed_message = _BuildMessageFromTypeName(message.TypeName(), |
| 290 self.descriptor_pool) |
| 291 if packed_message: |
| 292 packed_message.MergeFromString(message.value) |
| 293 self.out.write('%s[%s]' % (self.indent * ' ', message.type_url)) |
| 294 self._PrintMessageFieldValue(packed_message) |
| 295 self.out.write(' ' if self.as_one_line else '\n') |
| 296 return True |
| 297 else: |
| 298 return False |
207 | 299 |
208 def PrintMessage(self, message): | 300 def PrintMessage(self, message): |
209 """Convert protobuf message to text format. | 301 """Convert protobuf message to text format. |
210 | 302 |
211 Args: | 303 Args: |
212 message: The protocol buffers message. | 304 message: The protocol buffers message. |
213 """ | 305 """ |
| 306 if (message.DESCRIPTOR.full_name == _ANY_FULL_TYPE_NAME and |
| 307 self.descriptor_pool and self._TryPrintAsAnyMessage(message)): |
| 308 return |
214 fields = message.ListFields() | 309 fields = message.ListFields() |
215 if self.use_index_order: | 310 if self.use_index_order: |
216 fields.sort(key=lambda x: x[0].index) | 311 fields.sort(key=lambda x: x[0].index) |
217 for field, value in fields: | 312 for field, value in fields: |
218 if _IsMapEntry(field): | 313 if _IsMapEntry(field): |
219 for key in sorted(value): | 314 for key in sorted(value): |
220 # This is slow for maps with submessage entires because it copies the | 315 # This is slow for maps with submessage entires because it copies the |
221 # entire tree. Unfortunately this would take significant refactoring | 316 # entire tree. Unfortunately this would take significant refactoring |
222 # of this file to work around. | 317 # of this file to work around. |
223 # | 318 # |
224 # TODO(haberman): refactor and optimize if this becomes an issue. | 319 # TODO(haberman): refactor and optimize if this becomes an issue. |
225 entry_submsg = field.message_type._concrete_class( | 320 entry_submsg = value.GetEntryClass()(key=key, value=value[key]) |
226 key=key, value=value[key]) | |
227 self.PrintField(field, entry_submsg) | 321 self.PrintField(field, entry_submsg) |
228 elif field.label == descriptor.FieldDescriptor.LABEL_REPEATED: | 322 elif field.label == descriptor.FieldDescriptor.LABEL_REPEATED: |
229 for element in value: | 323 for element in value: |
230 self.PrintField(field, element) | 324 self.PrintField(field, element) |
231 else: | 325 else: |
232 self.PrintField(field, value) | 326 self.PrintField(field, value) |
233 | 327 |
234 def PrintField(self, field, value): | 328 def PrintField(self, field, value): |
235 """Print a single field name/value pair.""" | 329 """Print a single field name/value pair.""" |
236 out = self.out | 330 out = self.out |
(...skipping 20 matching lines...) Expand all Loading... |
257 # The colon is optional in this case, but our cross-language golden files | 351 # The colon is optional in this case, but our cross-language golden files |
258 # don't include it. | 352 # don't include it. |
259 out.write(': ') | 353 out.write(': ') |
260 | 354 |
261 self.PrintFieldValue(field, value) | 355 self.PrintFieldValue(field, value) |
262 if self.as_one_line: | 356 if self.as_one_line: |
263 out.write(' ') | 357 out.write(' ') |
264 else: | 358 else: |
265 out.write('\n') | 359 out.write('\n') |
266 | 360 |
| 361 def _PrintMessageFieldValue(self, value): |
| 362 if self.pointy_brackets: |
| 363 openb = '<' |
| 364 closeb = '>' |
| 365 else: |
| 366 openb = '{' |
| 367 closeb = '}' |
| 368 |
| 369 if self.as_one_line: |
| 370 self.out.write(' %s ' % openb) |
| 371 self.PrintMessage(value) |
| 372 self.out.write(closeb) |
| 373 else: |
| 374 self.out.write(' %s\n' % openb) |
| 375 self.indent += 2 |
| 376 self.PrintMessage(value) |
| 377 self.indent -= 2 |
| 378 self.out.write(' ' * self.indent + closeb) |
| 379 |
267 def PrintFieldValue(self, field, value): | 380 def PrintFieldValue(self, field, value): |
268 """Print a single field value (not including name). | 381 """Print a single field value (not including name). |
269 | 382 |
270 For repeated fields, the value should be a single element. | 383 For repeated fields, the value should be a single element. |
271 | 384 |
272 Args: | 385 Args: |
273 field: The descriptor of the field to be printed. | 386 field: The descriptor of the field to be printed. |
274 value: The value of the field. | 387 value: The value of the field. |
275 """ | 388 """ |
276 out = self.out | 389 out = self.out |
277 if self.pointy_brackets: | |
278 openb = '<' | |
279 closeb = '>' | |
280 else: | |
281 openb = '{' | |
282 closeb = '}' | |
283 | |
284 if field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_MESSAGE: | 390 if field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_MESSAGE: |
285 if self.as_one_line: | 391 self._PrintMessageFieldValue(value) |
286 out.write(' %s ' % openb) | |
287 self.PrintMessage(value) | |
288 out.write(closeb) | |
289 else: | |
290 out.write(' %s\n' % openb) | |
291 self.indent += 2 | |
292 self.PrintMessage(value) | |
293 self.indent -= 2 | |
294 out.write(' ' * self.indent + closeb) | |
295 elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_ENUM: | 392 elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_ENUM: |
296 enum_value = field.enum_type.values_by_number.get(value, None) | 393 enum_value = field.enum_type.values_by_number.get(value, None) |
297 if enum_value is not None: | 394 if enum_value is not None: |
298 out.write(enum_value.name) | 395 out.write(enum_value.name) |
299 else: | 396 else: |
300 out.write(str(value)) | 397 out.write(str(value)) |
301 elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_STRING: | 398 elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_STRING: |
302 out.write('\"') | 399 out.write('\"') |
303 if isinstance(value, six.text_type): | 400 if isinstance(value, six.text_type): |
304 out_value = value.encode('utf-8') | 401 out_value = value.encode('utf-8') |
(...skipping 10 matching lines...) Expand all Loading... |
315 if value: | 412 if value: |
316 out.write('true') | 413 out.write('true') |
317 else: | 414 else: |
318 out.write('false') | 415 out.write('false') |
319 elif field.cpp_type in _FLOAT_TYPES and self.float_format is not None: | 416 elif field.cpp_type in _FLOAT_TYPES and self.float_format is not None: |
320 out.write('{1:{0}}'.format(self.float_format, value)) | 417 out.write('{1:{0}}'.format(self.float_format, value)) |
321 else: | 418 else: |
322 out.write(str(value)) | 419 out.write(str(value)) |
323 | 420 |
324 | 421 |
325 def Parse(text, message, | 422 def Parse(text, |
326 allow_unknown_extension=False, allow_field_number=False): | 423 message, |
327 """Parses an text representation of a protocol message into a message. | 424 allow_unknown_extension=False, |
| 425 allow_field_number=False): |
| 426 """Parses a text representation of a protocol message into a message. |
328 | 427 |
329 Args: | 428 Args: |
330 text: Message text representation. | 429 text: Message text representation. |
331 message: A protocol buffer message to merge into. | 430 message: A protocol buffer message to merge into. |
332 allow_unknown_extension: if True, skip over missing extensions and keep | 431 allow_unknown_extension: if True, skip over missing extensions and keep |
333 parsing | 432 parsing |
334 allow_field_number: if True, both field number and field name are allowed. | 433 allow_field_number: if True, both field number and field name are allowed. |
335 | 434 |
336 Returns: | 435 Returns: |
337 The same message passed as argument. | 436 The same message passed as argument. |
338 | 437 |
339 Raises: | 438 Raises: |
340 ParseError: On text parsing problems. | 439 ParseError: On text parsing problems. |
341 """ | 440 """ |
342 if not isinstance(text, str): | 441 if not isinstance(text, str): |
343 text = text.decode('utf-8') | 442 text = text.decode('utf-8') |
344 return ParseLines(text.split('\n'), message, allow_unknown_extension, | 443 return ParseLines( |
345 allow_field_number) | 444 text.split('\n'), message, allow_unknown_extension, allow_field_number) |
346 | 445 |
347 | 446 |
348 def Merge(text, message, allow_unknown_extension=False, | 447 def Merge(text, |
349 allow_field_number=False): | 448 message, |
350 """Parses an text representation of a protocol message into a message. | 449 allow_unknown_extension=False, |
| 450 allow_field_number=False, |
| 451 descriptor_pool=None): |
| 452 """Parses a text representation of a protocol message into a message. |
351 | 453 |
352 Like Parse(), but allows repeated values for a non-repeated field, and uses | 454 Like Parse(), but allows repeated values for a non-repeated field, and uses |
353 the last one. | 455 the last one. |
354 | 456 |
355 Args: | 457 Args: |
356 text: Message text representation. | 458 text: Message text representation. |
357 message: A protocol buffer message to merge into. | 459 message: A protocol buffer message to merge into. |
358 allow_unknown_extension: if True, skip over missing extensions and keep | 460 allow_unknown_extension: if True, skip over missing extensions and keep |
359 parsing | 461 parsing |
360 allow_field_number: if True, both field number and field name are allowed. | 462 allow_field_number: if True, both field number and field name are allowed. |
| 463 descriptor_pool: A DescriptorPool used to resolve Any types. |
361 | 464 |
362 Returns: | 465 Returns: |
363 The same message passed as argument. | 466 The same message passed as argument. |
364 | 467 |
365 Raises: | 468 Raises: |
366 ParseError: On text parsing problems. | 469 ParseError: On text parsing problems. |
367 """ | 470 """ |
368 return MergeLines(text.split('\n'), message, allow_unknown_extension, | 471 return MergeLines( |
369 allow_field_number) | 472 text.split('\n'), |
| 473 message, |
| 474 allow_unknown_extension, |
| 475 allow_field_number, |
| 476 descriptor_pool=descriptor_pool) |
370 | 477 |
371 | 478 |
372 def ParseLines(lines, message, allow_unknown_extension=False, | 479 def ParseLines(lines, |
| 480 message, |
| 481 allow_unknown_extension=False, |
373 allow_field_number=False): | 482 allow_field_number=False): |
374 """Parses an text representation of a protocol message into a message. | 483 """Parses a text representation of a protocol message into a message. |
375 | 484 |
376 Args: | 485 Args: |
377 lines: An iterable of lines of a message's text representation. | 486 lines: An iterable of lines of a message's text representation. |
378 message: A protocol buffer message to merge into. | 487 message: A protocol buffer message to merge into. |
379 allow_unknown_extension: if True, skip over missing extensions and keep | 488 allow_unknown_extension: if True, skip over missing extensions and keep |
380 parsing | 489 parsing |
381 allow_field_number: if True, both field number and field name are allowed. | 490 allow_field_number: if True, both field number and field name are allowed. |
| 491 descriptor_pool: A DescriptorPool used to resolve Any types. |
382 | 492 |
383 Returns: | 493 Returns: |
384 The same message passed as argument. | 494 The same message passed as argument. |
385 | 495 |
386 Raises: | 496 Raises: |
387 ParseError: On text parsing problems. | 497 ParseError: On text parsing problems. |
388 """ | 498 """ |
389 parser = _Parser(allow_unknown_extension, allow_field_number) | 499 parser = _Parser(allow_unknown_extension, allow_field_number) |
390 return parser.ParseLines(lines, message) | 500 return parser.ParseLines(lines, message) |
391 | 501 |
392 | 502 |
393 def MergeLines(lines, message, allow_unknown_extension=False, | 503 def MergeLines(lines, |
394 allow_field_number=False): | 504 message, |
395 """Parses an text representation of a protocol message into a message. | 505 allow_unknown_extension=False, |
| 506 allow_field_number=False, |
| 507 descriptor_pool=None): |
| 508 """Parses a text representation of a protocol message into a message. |
396 | 509 |
397 Args: | 510 Args: |
398 lines: An iterable of lines of a message's text representation. | 511 lines: An iterable of lines of a message's text representation. |
399 message: A protocol buffer message to merge into. | 512 message: A protocol buffer message to merge into. |
400 allow_unknown_extension: if True, skip over missing extensions and keep | 513 allow_unknown_extension: if True, skip over missing extensions and keep |
401 parsing | 514 parsing |
402 allow_field_number: if True, both field number and field name are allowed. | 515 allow_field_number: if True, both field number and field name are allowed. |
403 | 516 |
404 Returns: | 517 Returns: |
405 The same message passed as argument. | 518 The same message passed as argument. |
406 | 519 |
407 Raises: | 520 Raises: |
408 ParseError: On text parsing problems. | 521 ParseError: On text parsing problems. |
409 """ | 522 """ |
410 parser = _Parser(allow_unknown_extension, allow_field_number) | 523 parser = _Parser(allow_unknown_extension, |
| 524 allow_field_number, |
| 525 descriptor_pool=descriptor_pool) |
411 return parser.MergeLines(lines, message) | 526 return parser.MergeLines(lines, message) |
412 | 527 |
413 | 528 |
414 class _Parser(object): | 529 class _Parser(object): |
415 """Text format parser for protocol message.""" | 530 """Text format parser for protocol message.""" |
416 | 531 |
417 def __init__(self, allow_unknown_extension=False, allow_field_number=False): | 532 def __init__(self, |
| 533 allow_unknown_extension=False, |
| 534 allow_field_number=False, |
| 535 descriptor_pool=None): |
418 self.allow_unknown_extension = allow_unknown_extension | 536 self.allow_unknown_extension = allow_unknown_extension |
419 self.allow_field_number = allow_field_number | 537 self.allow_field_number = allow_field_number |
| 538 self.descriptor_pool = descriptor_pool |
420 | 539 |
421 def ParseFromString(self, text, message): | 540 def ParseFromString(self, text, message): |
422 """Parses an text representation of a protocol message into a message.""" | 541 """Parses a text representation of a protocol message into a message.""" |
423 if not isinstance(text, str): | 542 if not isinstance(text, str): |
424 text = text.decode('utf-8') | 543 text = text.decode('utf-8') |
425 return self.ParseLines(text.split('\n'), message) | 544 return self.ParseLines(text.split('\n'), message) |
426 | 545 |
427 def ParseLines(self, lines, message): | 546 def ParseLines(self, lines, message): |
428 """Parses an text representation of a protocol message into a message.""" | 547 """Parses a text representation of a protocol message into a message.""" |
429 self._allow_multiple_scalars = False | 548 self._allow_multiple_scalars = False |
430 self._ParseOrMerge(lines, message) | 549 self._ParseOrMerge(lines, message) |
431 return message | 550 return message |
432 | 551 |
433 def MergeFromString(self, text, message): | 552 def MergeFromString(self, text, message): |
434 """Merges an text representation of a protocol message into a message.""" | 553 """Merges a text representation of a protocol message into a message.""" |
435 return self._MergeLines(text.split('\n'), message) | 554 return self._MergeLines(text.split('\n'), message) |
436 | 555 |
437 def MergeLines(self, lines, message): | 556 def MergeLines(self, lines, message): |
438 """Merges an text representation of a protocol message into a message.""" | 557 """Merges a text representation of a protocol message into a message.""" |
439 self._allow_multiple_scalars = True | 558 self._allow_multiple_scalars = True |
440 self._ParseOrMerge(lines, message) | 559 self._ParseOrMerge(lines, message) |
441 return message | 560 return message |
442 | 561 |
443 def _ParseOrMerge(self, lines, message): | 562 def _ParseOrMerge(self, lines, message): |
444 """Converts an text representation of a protocol message into a message. | 563 """Converts a text representation of a protocol message into a message. |
445 | 564 |
446 Args: | 565 Args: |
447 lines: Lines of a message's text representation. | 566 lines: Lines of a message's text representation. |
448 message: A protocol buffer message to merge into. | 567 message: A protocol buffer message to merge into. |
449 | 568 |
450 Raises: | 569 Raises: |
451 ParseError: On text parsing problems. | 570 ParseError: On text parsing problems. |
452 """ | 571 """ |
453 tokenizer = _Tokenizer(lines) | 572 tokenizer = Tokenizer(lines) |
454 while not tokenizer.AtEnd(): | 573 while not tokenizer.AtEnd(): |
455 self._MergeField(tokenizer, message) | 574 self._MergeField(tokenizer, message) |
456 | 575 |
457 def _MergeField(self, tokenizer, message): | 576 def _MergeField(self, tokenizer, message): |
458 """Merges a single protocol message field into a message. | 577 """Merges a single protocol message field into a message. |
459 | 578 |
460 Args: | 579 Args: |
461 tokenizer: A tokenizer to parse the field name and values. | 580 tokenizer: A tokenizer to parse the field name and values. |
462 message: A protocol message to record the data. | 581 message: A protocol message to record the data. |
463 | 582 |
(...skipping 20 matching lines...) Expand all Loading... |
484 field = message.Extensions._FindExtensionByName(name) | 603 field = message.Extensions._FindExtensionByName(name) |
485 # pylint: enable=protected-access | 604 # pylint: enable=protected-access |
486 if not field: | 605 if not field: |
487 if self.allow_unknown_extension: | 606 if self.allow_unknown_extension: |
488 field = None | 607 field = None |
489 else: | 608 else: |
490 raise tokenizer.ParseErrorPreviousToken( | 609 raise tokenizer.ParseErrorPreviousToken( |
491 'Extension "%s" not registered.' % name) | 610 'Extension "%s" not registered.' % name) |
492 elif message_descriptor != field.containing_type: | 611 elif message_descriptor != field.containing_type: |
493 raise tokenizer.ParseErrorPreviousToken( | 612 raise tokenizer.ParseErrorPreviousToken( |
494 'Extension "%s" does not extend message type "%s".' % ( | 613 'Extension "%s" does not extend message type "%s".' % |
495 name, message_descriptor.full_name)) | 614 (name, message_descriptor.full_name)) |
496 | 615 |
497 tokenizer.Consume(']') | 616 tokenizer.Consume(']') |
498 | 617 |
499 else: | 618 else: |
500 name = tokenizer.ConsumeIdentifier() | 619 name = tokenizer.ConsumeIdentifierOrNumber() |
501 if self.allow_field_number and name.isdigit(): | 620 if self.allow_field_number and name.isdigit(): |
502 number = ParseInteger(name, True, True) | 621 number = ParseInteger(name, True, True) |
503 field = message_descriptor.fields_by_number.get(number, None) | 622 field = message_descriptor.fields_by_number.get(number, None) |
504 if not field and message_descriptor.is_extendable: | 623 if not field and message_descriptor.is_extendable: |
505 field = message.Extensions._FindExtensionByNumber(number) | 624 field = message.Extensions._FindExtensionByNumber(number) |
506 else: | 625 else: |
507 field = message_descriptor.fields_by_name.get(name, None) | 626 field = message_descriptor.fields_by_name.get(name, None) |
508 | 627 |
509 # Group names are expected to be capitalized as they appear in the | 628 # Group names are expected to be capitalized as they appear in the |
510 # .proto file, which actually matches their type names, not their field | 629 # .proto file, which actually matches their type names, not their field |
511 # names. | 630 # names. |
512 if not field: | 631 if not field: |
513 field = message_descriptor.fields_by_name.get(name.lower(), None) | 632 field = message_descriptor.fields_by_name.get(name.lower(), None) |
514 if field and field.type != descriptor.FieldDescriptor.TYPE_GROUP: | 633 if field and field.type != descriptor.FieldDescriptor.TYPE_GROUP: |
515 field = None | 634 field = None |
516 | 635 |
517 if (field and field.type == descriptor.FieldDescriptor.TYPE_GROUP and | 636 if (field and field.type == descriptor.FieldDescriptor.TYPE_GROUP and |
518 field.message_type.name != name): | 637 field.message_type.name != name): |
519 field = None | 638 field = None |
520 | 639 |
521 if not field: | 640 if not field: |
522 raise tokenizer.ParseErrorPreviousToken( | 641 raise tokenizer.ParseErrorPreviousToken( |
523 'Message type "%s" has no field named "%s".' % ( | 642 'Message type "%s" has no field named "%s".' % |
524 message_descriptor.full_name, name)) | 643 (message_descriptor.full_name, name)) |
525 | 644 |
526 if field: | 645 if field: |
527 if not self._allow_multiple_scalars and field.containing_oneof: | 646 if not self._allow_multiple_scalars and field.containing_oneof: |
528 # Check if there's a different field set in this oneof. | 647 # Check if there's a different field set in this oneof. |
529 # Note that we ignore the case if the same field was set before, and we | 648 # Note that we ignore the case if the same field was set before, and we |
530 # apply _allow_multiple_scalars to non-scalar fields as well. | 649 # apply _allow_multiple_scalars to non-scalar fields as well. |
531 which_oneof = message.WhichOneof(field.containing_oneof.name) | 650 which_oneof = message.WhichOneof(field.containing_oneof.name) |
532 if which_oneof is not None and which_oneof != field.name: | 651 if which_oneof is not None and which_oneof != field.name: |
533 raise tokenizer.ParseErrorPreviousToken( | 652 raise tokenizer.ParseErrorPreviousToken( |
534 'Field "%s" is specified along with field "%s", another member ' | 653 'Field "%s" is specified along with field "%s", another member ' |
535 'of oneof "%s" for message type "%s".' % ( | 654 'of oneof "%s" for message type "%s".' % |
536 field.name, which_oneof, field.containing_oneof.name, | 655 (field.name, which_oneof, field.containing_oneof.name, |
537 message_descriptor.full_name)) | 656 message_descriptor.full_name)) |
538 | 657 |
539 if field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_MESSAGE: | 658 if field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_MESSAGE: |
540 tokenizer.TryConsume(':') | 659 tokenizer.TryConsume(':') |
541 merger = self._MergeMessageField | 660 merger = self._MergeMessageField |
542 else: | 661 else: |
543 tokenizer.Consume(':') | 662 tokenizer.Consume(':') |
544 merger = self._MergeScalarField | 663 merger = self._MergeScalarField |
545 | 664 |
546 if (field.label == descriptor.FieldDescriptor.LABEL_REPEATED | 665 if (field.label == descriptor.FieldDescriptor.LABEL_REPEATED and |
547 and tokenizer.TryConsume('[')): | 666 tokenizer.TryConsume('[')): |
548 # Short repeated format, e.g. "foo: [1, 2, 3]" | 667 # Short repeated format, e.g. "foo: [1, 2, 3]" |
549 while True: | 668 while True: |
550 merger(tokenizer, message, field) | 669 merger(tokenizer, message, field) |
551 if tokenizer.TryConsume(']'): break | 670 if tokenizer.TryConsume(']'): |
| 671 break |
552 tokenizer.Consume(',') | 672 tokenizer.Consume(',') |
553 | 673 |
554 else: | 674 else: |
555 merger(tokenizer, message, field) | 675 merger(tokenizer, message, field) |
556 | 676 |
557 else: # Proto field is unknown. | 677 else: # Proto field is unknown. |
558 assert self.allow_unknown_extension | 678 assert self.allow_unknown_extension |
559 _SkipFieldContents(tokenizer) | 679 _SkipFieldContents(tokenizer) |
560 | 680 |
561 # For historical reasons, fields may optionally be separated by commas or | 681 # For historical reasons, fields may optionally be separated by commas or |
562 # semicolons. | 682 # semicolons. |
563 if not tokenizer.TryConsume(','): | 683 if not tokenizer.TryConsume(','): |
564 tokenizer.TryConsume(';') | 684 tokenizer.TryConsume(';') |
565 | 685 |
| 686 def _ConsumeAnyTypeUrl(self, tokenizer): |
| 687 """Consumes a google.protobuf.Any type URL and returns the type name.""" |
| 688 # Consume "type.googleapis.com/". |
| 689 tokenizer.ConsumeIdentifier() |
| 690 tokenizer.Consume('.') |
| 691 tokenizer.ConsumeIdentifier() |
| 692 tokenizer.Consume('.') |
| 693 tokenizer.ConsumeIdentifier() |
| 694 tokenizer.Consume('/') |
| 695 # Consume the fully-qualified type name. |
| 696 name = [tokenizer.ConsumeIdentifier()] |
| 697 while tokenizer.TryConsume('.'): |
| 698 name.append(tokenizer.ConsumeIdentifier()) |
| 699 return '.'.join(name) |
| 700 |
566 def _MergeMessageField(self, tokenizer, message, field): | 701 def _MergeMessageField(self, tokenizer, message, field): |
567 """Merges a single scalar field into a message. | 702 """Merges a single scalar field into a message. |
568 | 703 |
569 Args: | 704 Args: |
570 tokenizer: A tokenizer to parse the field value. | 705 tokenizer: A tokenizer to parse the field value. |
571 message: The message of which field is a member. | 706 message: The message of which field is a member. |
572 field: The descriptor of the field to be merged. | 707 field: The descriptor of the field to be merged. |
573 | 708 |
574 Raises: | 709 Raises: |
575 ParseError: In case of text parsing problems. | 710 ParseError: In case of text parsing problems. |
576 """ | 711 """ |
577 is_map_entry = _IsMapEntry(field) | 712 is_map_entry = _IsMapEntry(field) |
578 | 713 |
579 if tokenizer.TryConsume('<'): | 714 if tokenizer.TryConsume('<'): |
580 end_token = '>' | 715 end_token = '>' |
581 else: | 716 else: |
582 tokenizer.Consume('{') | 717 tokenizer.Consume('{') |
583 end_token = '}' | 718 end_token = '}' |
584 | 719 |
585 if field.label == descriptor.FieldDescriptor.LABEL_REPEATED: | 720 if (field.message_type.full_name == _ANY_FULL_TYPE_NAME and |
| 721 tokenizer.TryConsume('[')): |
| 722 packed_type_name = self._ConsumeAnyTypeUrl(tokenizer) |
| 723 tokenizer.Consume(']') |
| 724 tokenizer.TryConsume(':') |
| 725 if tokenizer.TryConsume('<'): |
| 726 expanded_any_end_token = '>' |
| 727 else: |
| 728 tokenizer.Consume('{') |
| 729 expanded_any_end_token = '}' |
| 730 if not self.descriptor_pool: |
| 731 raise ParseError('Descriptor pool required to parse expanded Any field') |
| 732 expanded_any_sub_message = _BuildMessageFromTypeName(packed_type_name, |
| 733 self.descriptor_pool) |
| 734 if not expanded_any_sub_message: |
| 735 raise ParseError('Type %s not found in descriptor pool' % |
| 736 packed_type_name) |
| 737 while not tokenizer.TryConsume(expanded_any_end_token): |
| 738 if tokenizer.AtEnd(): |
| 739 raise tokenizer.ParseErrorPreviousToken('Expected "%s".' % |
| 740 (expanded_any_end_token,)) |
| 741 self._MergeField(tokenizer, expanded_any_sub_message) |
| 742 if field.label == descriptor.FieldDescriptor.LABEL_REPEATED: |
| 743 any_message = getattr(message, field.name).add() |
| 744 else: |
| 745 any_message = getattr(message, field.name) |
| 746 any_message.Pack(expanded_any_sub_message) |
| 747 elif field.label == descriptor.FieldDescriptor.LABEL_REPEATED: |
586 if field.is_extension: | 748 if field.is_extension: |
587 sub_message = message.Extensions[field].add() | 749 sub_message = message.Extensions[field].add() |
588 elif is_map_entry: | 750 elif is_map_entry: |
589 # pylint: disable=protected-access | 751 sub_message = getattr(message, field.name).GetEntryClass()() |
590 sub_message = field.message_type._concrete_class() | |
591 else: | 752 else: |
592 sub_message = getattr(message, field.name).add() | 753 sub_message = getattr(message, field.name).add() |
593 else: | 754 else: |
594 if field.is_extension: | 755 if field.is_extension: |
595 sub_message = message.Extensions[field] | 756 sub_message = message.Extensions[field] |
596 else: | 757 else: |
597 sub_message = getattr(message, field.name) | 758 sub_message = getattr(message, field.name) |
598 sub_message.SetInParent() | 759 sub_message.SetInParent() |
599 | 760 |
600 while not tokenizer.TryConsume(end_token): | 761 while not tokenizer.TryConsume(end_token): |
(...skipping 20 matching lines...) Expand all Loading... |
621 Raises: | 782 Raises: |
622 ParseError: In case of text parsing problems. | 783 ParseError: In case of text parsing problems. |
623 RuntimeError: On runtime errors. | 784 RuntimeError: On runtime errors. |
624 """ | 785 """ |
625 _ = self.allow_unknown_extension | 786 _ = self.allow_unknown_extension |
626 value = None | 787 value = None |
627 | 788 |
628 if field.type in (descriptor.FieldDescriptor.TYPE_INT32, | 789 if field.type in (descriptor.FieldDescriptor.TYPE_INT32, |
629 descriptor.FieldDescriptor.TYPE_SINT32, | 790 descriptor.FieldDescriptor.TYPE_SINT32, |
630 descriptor.FieldDescriptor.TYPE_SFIXED32): | 791 descriptor.FieldDescriptor.TYPE_SFIXED32): |
631 value = tokenizer.ConsumeInt32() | 792 value = _ConsumeInt32(tokenizer) |
632 elif field.type in (descriptor.FieldDescriptor.TYPE_INT64, | 793 elif field.type in (descriptor.FieldDescriptor.TYPE_INT64, |
633 descriptor.FieldDescriptor.TYPE_SINT64, | 794 descriptor.FieldDescriptor.TYPE_SINT64, |
634 descriptor.FieldDescriptor.TYPE_SFIXED64): | 795 descriptor.FieldDescriptor.TYPE_SFIXED64): |
635 value = tokenizer.ConsumeInt64() | 796 value = _ConsumeInt64(tokenizer) |
636 elif field.type in (descriptor.FieldDescriptor.TYPE_UINT32, | 797 elif field.type in (descriptor.FieldDescriptor.TYPE_UINT32, |
637 descriptor.FieldDescriptor.TYPE_FIXED32): | 798 descriptor.FieldDescriptor.TYPE_FIXED32): |
638 value = tokenizer.ConsumeUint32() | 799 value = _ConsumeUint32(tokenizer) |
639 elif field.type in (descriptor.FieldDescriptor.TYPE_UINT64, | 800 elif field.type in (descriptor.FieldDescriptor.TYPE_UINT64, |
640 descriptor.FieldDescriptor.TYPE_FIXED64): | 801 descriptor.FieldDescriptor.TYPE_FIXED64): |
641 value = tokenizer.ConsumeUint64() | 802 value = _ConsumeUint64(tokenizer) |
642 elif field.type in (descriptor.FieldDescriptor.TYPE_FLOAT, | 803 elif field.type in (descriptor.FieldDescriptor.TYPE_FLOAT, |
643 descriptor.FieldDescriptor.TYPE_DOUBLE): | 804 descriptor.FieldDescriptor.TYPE_DOUBLE): |
644 value = tokenizer.ConsumeFloat() | 805 value = tokenizer.ConsumeFloat() |
645 elif field.type == descriptor.FieldDescriptor.TYPE_BOOL: | 806 elif field.type == descriptor.FieldDescriptor.TYPE_BOOL: |
646 value = tokenizer.ConsumeBool() | 807 value = tokenizer.ConsumeBool() |
647 elif field.type == descriptor.FieldDescriptor.TYPE_STRING: | 808 elif field.type == descriptor.FieldDescriptor.TYPE_STRING: |
648 value = tokenizer.ConsumeString() | 809 value = tokenizer.ConsumeString() |
649 elif field.type == descriptor.FieldDescriptor.TYPE_BYTES: | 810 elif field.type == descriptor.FieldDescriptor.TYPE_BYTES: |
650 value = tokenizer.ConsumeByteString() | 811 value = tokenizer.ConsumeByteString() |
651 elif field.type == descriptor.FieldDescriptor.TYPE_ENUM: | 812 elif field.type == descriptor.FieldDescriptor.TYPE_ENUM: |
(...skipping 94 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
746 ParseError: In case an invalid field value is found. | 907 ParseError: In case an invalid field value is found. |
747 """ | 908 """ |
748 # String/bytes tokens can come in multiple adjacent string literals. | 909 # String/bytes tokens can come in multiple adjacent string literals. |
749 # If we can consume one, consume as many as we can. | 910 # If we can consume one, consume as many as we can. |
750 if tokenizer.TryConsumeByteString(): | 911 if tokenizer.TryConsumeByteString(): |
751 while tokenizer.TryConsumeByteString(): | 912 while tokenizer.TryConsumeByteString(): |
752 pass | 913 pass |
753 return | 914 return |
754 | 915 |
755 if (not tokenizer.TryConsumeIdentifier() and | 916 if (not tokenizer.TryConsumeIdentifier() and |
756 not tokenizer.TryConsumeInt64() and | 917 not _TryConsumeInt64(tokenizer) and not _TryConsumeUint64(tokenizer) and |
757 not tokenizer.TryConsumeUint64() and | |
758 not tokenizer.TryConsumeFloat()): | 918 not tokenizer.TryConsumeFloat()): |
759 raise ParseError('Invalid field value: ' + tokenizer.token) | 919 raise ParseError('Invalid field value: ' + tokenizer.token) |
760 | 920 |
761 | 921 |
762 class _Tokenizer(object): | 922 class Tokenizer(object): |
763 """Protocol buffer text representation tokenizer. | 923 """Protocol buffer text representation tokenizer. |
764 | 924 |
765 This class handles the lower level string parsing by splitting it into | 925 This class handles the lower level string parsing by splitting it into |
766 meaningful tokens. | 926 meaningful tokens. |
767 | 927 |
768 It was directly ported from the Java protocol buffer API. | 928 It was directly ported from the Java protocol buffer API. |
769 """ | 929 """ |
770 | 930 |
771 _WHITESPACE = re.compile('(\\s|(#.*$))+', re.MULTILINE) | 931 _WHITESPACE = re.compile(r'\s+') |
| 932 _COMMENT = re.compile(r'(\s*#.*$)', re.MULTILINE) |
| 933 _WHITESPACE_OR_COMMENT = re.compile(r'(\s|(#.*$))+', re.MULTILINE) |
772 _TOKEN = re.compile('|'.join([ | 934 _TOKEN = re.compile('|'.join([ |
773 r'[a-zA-Z_][0-9a-zA-Z_+-]*', # an identifier | 935 r'[a-zA-Z_][0-9a-zA-Z_+-]*', # an identifier |
774 r'([0-9+-]|(\.[0-9]))[0-9a-zA-Z_.+-]*', # a number | 936 r'([0-9+-]|(\.[0-9]))[0-9a-zA-Z_.+-]*', # a number |
775 ] + [ # quoted str for each quote mark | 937 ] + [ # quoted str for each quote mark |
776 r'{qt}([^{qt}\n\\]|\\.)*({qt}|\\?$)'.format(qt=mark) for mark in _QUOTES | 938 r'{qt}([^{qt}\n\\]|\\.)*({qt}|\\?$)'.format(qt=mark) for mark in _QUOTES |
777 ])) | 939 ])) |
778 | 940 |
779 _IDENTIFIER = re.compile(r'\w+') | 941 _IDENTIFIER = re.compile(r'[^\d\W]\w*') |
| 942 _IDENTIFIER_OR_NUMBER = re.compile(r'\w+') |
780 | 943 |
781 def __init__(self, lines): | 944 def __init__(self, lines, skip_comments=True): |
782 self._position = 0 | 945 self._position = 0 |
783 self._line = -1 | 946 self._line = -1 |
784 self._column = 0 | 947 self._column = 0 |
785 self._token_start = None | 948 self._token_start = None |
786 self.token = '' | 949 self.token = '' |
787 self._lines = iter(lines) | 950 self._lines = iter(lines) |
788 self._current_line = '' | 951 self._current_line = '' |
789 self._previous_line = 0 | 952 self._previous_line = 0 |
790 self._previous_column = 0 | 953 self._previous_column = 0 |
791 self._more_lines = True | 954 self._more_lines = True |
| 955 self._skip_comments = skip_comments |
| 956 self._whitespace_pattern = (skip_comments and self._WHITESPACE_OR_COMMENT |
| 957 or self._WHITESPACE) |
792 self._SkipWhitespace() | 958 self._SkipWhitespace() |
793 self.NextToken() | 959 self.NextToken() |
794 | 960 |
795 def LookingAt(self, token): | 961 def LookingAt(self, token): |
796 return self.token == token | 962 return self.token == token |
797 | 963 |
798 def AtEnd(self): | 964 def AtEnd(self): |
799 """Checks the end of the text was reached. | 965 """Checks the end of the text was reached. |
800 | 966 |
801 Returns: | 967 Returns: |
802 True iff the end was reached. | 968 True iff the end was reached. |
803 """ | 969 """ |
804 return not self.token | 970 return not self.token |
805 | 971 |
806 def _PopLine(self): | 972 def _PopLine(self): |
807 while len(self._current_line) <= self._column: | 973 while len(self._current_line) <= self._column: |
808 try: | 974 try: |
809 self._current_line = next(self._lines) | 975 self._current_line = next(self._lines) |
810 except StopIteration: | 976 except StopIteration: |
811 self._current_line = '' | 977 self._current_line = '' |
812 self._more_lines = False | 978 self._more_lines = False |
813 return | 979 return |
814 else: | 980 else: |
815 self._line += 1 | 981 self._line += 1 |
816 self._column = 0 | 982 self._column = 0 |
817 | 983 |
818 def _SkipWhitespace(self): | 984 def _SkipWhitespace(self): |
819 while True: | 985 while True: |
820 self._PopLine() | 986 self._PopLine() |
821 match = self._WHITESPACE.match(self._current_line, self._column) | 987 match = self._whitespace_pattern.match(self._current_line, self._column) |
822 if not match: | 988 if not match: |
823 break | 989 break |
824 length = len(match.group(0)) | 990 length = len(match.group(0)) |
825 self._column += length | 991 self._column += length |
826 | 992 |
827 def TryConsume(self, token): | 993 def TryConsume(self, token): |
828 """Tries to consume a given piece of text. | 994 """Tries to consume a given piece of text. |
829 | 995 |
830 Args: | 996 Args: |
831 token: Text to consume. | 997 token: Text to consume. |
832 | 998 |
833 Returns: | 999 Returns: |
834 True iff the text was consumed. | 1000 True iff the text was consumed. |
835 """ | 1001 """ |
836 if self.token == token: | 1002 if self.token == token: |
837 self.NextToken() | 1003 self.NextToken() |
838 return True | 1004 return True |
839 return False | 1005 return False |
840 | 1006 |
841 def Consume(self, token): | 1007 def Consume(self, token): |
842 """Consumes a piece of text. | 1008 """Consumes a piece of text. |
843 | 1009 |
844 Args: | 1010 Args: |
845 token: Text to consume. | 1011 token: Text to consume. |
846 | 1012 |
847 Raises: | 1013 Raises: |
848 ParseError: If the text couldn't be consumed. | 1014 ParseError: If the text couldn't be consumed. |
849 """ | 1015 """ |
850 if not self.TryConsume(token): | 1016 if not self.TryConsume(token): |
851 raise self._ParseError('Expected "%s".' % token) | 1017 raise self.ParseError('Expected "%s".' % token) |
| 1018 |
| 1019 def ConsumeComment(self): |
| 1020 result = self.token |
| 1021 if not self._COMMENT.match(result): |
| 1022 raise self.ParseError('Expected comment.') |
| 1023 self.NextToken() |
| 1024 return result |
852 | 1025 |
853 def TryConsumeIdentifier(self): | 1026 def TryConsumeIdentifier(self): |
854 try: | 1027 try: |
855 self.ConsumeIdentifier() | 1028 self.ConsumeIdentifier() |
856 return True | 1029 return True |
857 except ParseError: | 1030 except ParseError: |
858 return False | 1031 return False |
859 | 1032 |
860 def ConsumeIdentifier(self): | 1033 def ConsumeIdentifier(self): |
861 """Consumes protocol message field identifier. | 1034 """Consumes protocol message field identifier. |
862 | 1035 |
863 Returns: | 1036 Returns: |
864 Identifier string. | 1037 Identifier string. |
865 | 1038 |
866 Raises: | 1039 Raises: |
867 ParseError: If an identifier couldn't be consumed. | 1040 ParseError: If an identifier couldn't be consumed. |
868 """ | 1041 """ |
869 result = self.token | 1042 result = self.token |
870 if not self._IDENTIFIER.match(result): | 1043 if not self._IDENTIFIER.match(result): |
871 raise self._ParseError('Expected identifier.') | 1044 raise self.ParseError('Expected identifier.') |
872 self.NextToken() | 1045 self.NextToken() |
873 return result | 1046 return result |
874 | 1047 |
875 def ConsumeInt32(self): | 1048 def TryConsumeIdentifierOrNumber(self): |
876 """Consumes a signed 32bit integer number. | 1049 try: |
| 1050 self.ConsumeIdentifierOrNumber() |
| 1051 return True |
| 1052 except ParseError: |
| 1053 return False |
877 | 1054 |
| 1055 def ConsumeIdentifierOrNumber(self): |
| 1056 """Consumes protocol message field identifier. |
| 1057 |
| 1058 Returns: |
| 1059 Identifier string. |
| 1060 |
| 1061 Raises: |
| 1062 ParseError: If an identifier couldn't be consumed. |
| 1063 """ |
| 1064 result = self.token |
| 1065 if not self._IDENTIFIER_OR_NUMBER.match(result): |
| 1066 raise self.ParseError('Expected identifier or number.') |
| 1067 self.NextToken() |
| 1068 return result |
| 1069 |
| 1070 def TryConsumeInteger(self): |
| 1071 try: |
| 1072 # Note: is_long only affects value type, not whether an error is raised. |
| 1073 self.ConsumeInteger() |
| 1074 return True |
| 1075 except ParseError: |
| 1076 return False |
| 1077 |
| 1078 def ConsumeInteger(self, is_long=False): |
| 1079 """Consumes an integer number. |
| 1080 |
| 1081 Args: |
| 1082 is_long: True if the value should be returned as a long integer. |
878 Returns: | 1083 Returns: |
879 The integer parsed. | 1084 The integer parsed. |
880 | 1085 |
881 Raises: | 1086 Raises: |
882 ParseError: If a signed 32bit integer couldn't be consumed. | 1087 ParseError: If an integer couldn't be consumed. |
883 """ | 1088 """ |
884 try: | 1089 try: |
885 result = ParseInteger(self.token, is_signed=True, is_long=False) | 1090 result = _ParseAbstractInteger(self.token, is_long=is_long) |
886 except ValueError as e: | 1091 except ValueError as e: |
887 raise self._ParseError(str(e)) | 1092 raise self.ParseError(str(e)) |
888 self.NextToken() | 1093 self.NextToken() |
889 return result | 1094 return result |
890 | 1095 |
891 def ConsumeUint32(self): | |
892 """Consumes an unsigned 32bit integer number. | |
893 | |
894 Returns: | |
895 The integer parsed. | |
896 | |
897 Raises: | |
898 ParseError: If an unsigned 32bit integer couldn't be consumed. | |
899 """ | |
900 try: | |
901 result = ParseInteger(self.token, is_signed=False, is_long=False) | |
902 except ValueError as e: | |
903 raise self._ParseError(str(e)) | |
904 self.NextToken() | |
905 return result | |
906 | |
907 def TryConsumeInt64(self): | |
908 try: | |
909 self.ConsumeInt64() | |
910 return True | |
911 except ParseError: | |
912 return False | |
913 | |
914 def ConsumeInt64(self): | |
915 """Consumes a signed 64bit integer number. | |
916 | |
917 Returns: | |
918 The integer parsed. | |
919 | |
920 Raises: | |
921 ParseError: If a signed 64bit integer couldn't be consumed. | |
922 """ | |
923 try: | |
924 result = ParseInteger(self.token, is_signed=True, is_long=True) | |
925 except ValueError as e: | |
926 raise self._ParseError(str(e)) | |
927 self.NextToken() | |
928 return result | |
929 | |
930 def TryConsumeUint64(self): | |
931 try: | |
932 self.ConsumeUint64() | |
933 return True | |
934 except ParseError: | |
935 return False | |
936 | |
937 def ConsumeUint64(self): | |
938 """Consumes an unsigned 64bit integer number. | |
939 | |
940 Returns: | |
941 The integer parsed. | |
942 | |
943 Raises: | |
944 ParseError: If an unsigned 64bit integer couldn't be consumed. | |
945 """ | |
946 try: | |
947 result = ParseInteger(self.token, is_signed=False, is_long=True) | |
948 except ValueError as e: | |
949 raise self._ParseError(str(e)) | |
950 self.NextToken() | |
951 return result | |
952 | |
953 def TryConsumeFloat(self): | 1096 def TryConsumeFloat(self): |
954 try: | 1097 try: |
955 self.ConsumeFloat() | 1098 self.ConsumeFloat() |
956 return True | 1099 return True |
957 except ParseError: | 1100 except ParseError: |
958 return False | 1101 return False |
959 | 1102 |
960 def ConsumeFloat(self): | 1103 def ConsumeFloat(self): |
961 """Consumes an floating point number. | 1104 """Consumes an floating point number. |
962 | 1105 |
963 Returns: | 1106 Returns: |
964 The number parsed. | 1107 The number parsed. |
965 | 1108 |
966 Raises: | 1109 Raises: |
967 ParseError: If a floating point number couldn't be consumed. | 1110 ParseError: If a floating point number couldn't be consumed. |
968 """ | 1111 """ |
969 try: | 1112 try: |
970 result = ParseFloat(self.token) | 1113 result = ParseFloat(self.token) |
971 except ValueError as e: | 1114 except ValueError as e: |
972 raise self._ParseError(str(e)) | 1115 raise self.ParseError(str(e)) |
973 self.NextToken() | 1116 self.NextToken() |
974 return result | 1117 return result |
975 | 1118 |
976 def ConsumeBool(self): | 1119 def ConsumeBool(self): |
977 """Consumes a boolean value. | 1120 """Consumes a boolean value. |
978 | 1121 |
979 Returns: | 1122 Returns: |
980 The bool parsed. | 1123 The bool parsed. |
981 | 1124 |
982 Raises: | 1125 Raises: |
983 ParseError: If a boolean value couldn't be consumed. | 1126 ParseError: If a boolean value couldn't be consumed. |
984 """ | 1127 """ |
985 try: | 1128 try: |
986 result = ParseBool(self.token) | 1129 result = ParseBool(self.token) |
987 except ValueError as e: | 1130 except ValueError as e: |
988 raise self._ParseError(str(e)) | 1131 raise self.ParseError(str(e)) |
989 self.NextToken() | 1132 self.NextToken() |
990 return result | 1133 return result |
991 | 1134 |
992 def TryConsumeByteString(self): | 1135 def TryConsumeByteString(self): |
993 try: | 1136 try: |
994 self.ConsumeByteString() | 1137 self.ConsumeByteString() |
995 return True | 1138 return True |
996 except ParseError: | 1139 except ParseError: |
997 return False | 1140 return False |
998 | 1141 |
(...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1032 tokens which are automatically concatenated, like in C or Python. This | 1175 tokens which are automatically concatenated, like in C or Python. This |
1033 method only consumes one token. | 1176 method only consumes one token. |
1034 | 1177 |
1035 Returns: | 1178 Returns: |
1036 The token parsed. | 1179 The token parsed. |
1037 Raises: | 1180 Raises: |
1038 ParseError: When the wrong format data is found. | 1181 ParseError: When the wrong format data is found. |
1039 """ | 1182 """ |
1040 text = self.token | 1183 text = self.token |
1041 if len(text) < 1 or text[0] not in _QUOTES: | 1184 if len(text) < 1 or text[0] not in _QUOTES: |
1042 raise self._ParseError('Expected string but found: %r' % (text,)) | 1185 raise self.ParseError('Expected string but found: %r' % (text,)) |
1043 | 1186 |
1044 if len(text) < 2 or text[-1] != text[0]: | 1187 if len(text) < 2 or text[-1] != text[0]: |
1045 raise self._ParseError('String missing ending quote: %r' % (text,)) | 1188 raise self.ParseError('String missing ending quote: %r' % (text,)) |
1046 | 1189 |
1047 try: | 1190 try: |
1048 result = text_encoding.CUnescape(text[1:-1]) | 1191 result = text_encoding.CUnescape(text[1:-1]) |
1049 except ValueError as e: | 1192 except ValueError as e: |
1050 raise self._ParseError(str(e)) | 1193 raise self.ParseError(str(e)) |
1051 self.NextToken() | 1194 self.NextToken() |
1052 return result | 1195 return result |
1053 | 1196 |
1054 def ConsumeEnum(self, field): | 1197 def ConsumeEnum(self, field): |
1055 try: | 1198 try: |
1056 result = ParseEnum(field, self.token) | 1199 result = ParseEnum(field, self.token) |
1057 except ValueError as e: | 1200 except ValueError as e: |
1058 raise self._ParseError(str(e)) | 1201 raise self.ParseError(str(e)) |
1059 self.NextToken() | 1202 self.NextToken() |
1060 return result | 1203 return result |
1061 | 1204 |
1062 def ParseErrorPreviousToken(self, message): | 1205 def ParseErrorPreviousToken(self, message): |
1063 """Creates and *returns* a ParseError for the previously read token. | 1206 """Creates and *returns* a ParseError for the previously read token. |
1064 | 1207 |
1065 Args: | 1208 Args: |
1066 message: A message to set for the exception. | 1209 message: A message to set for the exception. |
1067 | 1210 |
1068 Returns: | 1211 Returns: |
1069 A ParseError instance. | 1212 A ParseError instance. |
1070 """ | 1213 """ |
1071 return ParseError('%d:%d : %s' % ( | 1214 return ParseError(message, self._previous_line + 1, |
1072 self._previous_line + 1, self._previous_column + 1, message)) | 1215 self._previous_column + 1) |
1073 | 1216 |
1074 def _ParseError(self, message): | 1217 def ParseError(self, message): |
1075 """Creates and *returns* a ParseError for the current token.""" | 1218 """Creates and *returns* a ParseError for the current token.""" |
1076 return ParseError('%d:%d : %s' % ( | 1219 return ParseError(message, self._line + 1, self._column + 1) |
1077 self._line + 1, self._column + 1, message)) | |
1078 | 1220 |
1079 def _StringParseError(self, e): | 1221 def _StringParseError(self, e): |
1080 return self._ParseError('Couldn\'t parse string: ' + str(e)) | 1222 return self.ParseError('Couldn\'t parse string: ' + str(e)) |
1081 | 1223 |
1082 def NextToken(self): | 1224 def NextToken(self): |
1083 """Reads the next meaningful token.""" | 1225 """Reads the next meaningful token.""" |
1084 self._previous_line = self._line | 1226 self._previous_line = self._line |
1085 self._previous_column = self._column | 1227 self._previous_column = self._column |
1086 | 1228 |
1087 self._column += len(self.token) | 1229 self._column += len(self.token) |
1088 self._SkipWhitespace() | 1230 self._SkipWhitespace() |
1089 | 1231 |
1090 if not self._more_lines: | 1232 if not self._more_lines: |
1091 self.token = '' | 1233 self.token = '' |
1092 return | 1234 return |
1093 | 1235 |
1094 match = self._TOKEN.match(self._current_line, self._column) | 1236 match = self._TOKEN.match(self._current_line, self._column) |
| 1237 if not match and not self._skip_comments: |
| 1238 match = self._COMMENT.match(self._current_line, self._column) |
1095 if match: | 1239 if match: |
1096 token = match.group(0) | 1240 token = match.group(0) |
1097 self.token = token | 1241 self.token = token |
1098 else: | 1242 else: |
1099 self.token = self._current_line[self._column] | 1243 self.token = self._current_line[self._column] |
1100 | 1244 |
| 1245 # Aliased so it can still be accessed by current visibility violators. |
| 1246 # TODO(dbarnett): Migrate violators to textformat_tokenizer. |
| 1247 _Tokenizer = Tokenizer # pylint: disable=invalid-name |
| 1248 |
| 1249 |
| 1250 def _ConsumeInt32(tokenizer): |
| 1251 """Consumes a signed 32bit integer number from tokenizer. |
| 1252 |
| 1253 Args: |
| 1254 tokenizer: A tokenizer used to parse the number. |
| 1255 |
| 1256 Returns: |
| 1257 The integer parsed. |
| 1258 |
| 1259 Raises: |
| 1260 ParseError: If a signed 32bit integer couldn't be consumed. |
| 1261 """ |
| 1262 return _ConsumeInteger(tokenizer, is_signed=True, is_long=False) |
| 1263 |
| 1264 |
| 1265 def _ConsumeUint32(tokenizer): |
| 1266 """Consumes an unsigned 32bit integer number from tokenizer. |
| 1267 |
| 1268 Args: |
| 1269 tokenizer: A tokenizer used to parse the number. |
| 1270 |
| 1271 Returns: |
| 1272 The integer parsed. |
| 1273 |
| 1274 Raises: |
| 1275 ParseError: If an unsigned 32bit integer couldn't be consumed. |
| 1276 """ |
| 1277 return _ConsumeInteger(tokenizer, is_signed=False, is_long=False) |
| 1278 |
| 1279 |
| 1280 def _TryConsumeInt64(tokenizer): |
| 1281 try: |
| 1282 _ConsumeInt64(tokenizer) |
| 1283 return True |
| 1284 except ParseError: |
| 1285 return False |
| 1286 |
| 1287 |
| 1288 def _ConsumeInt64(tokenizer): |
| 1289 """Consumes a signed 32bit integer number from tokenizer. |
| 1290 |
| 1291 Args: |
| 1292 tokenizer: A tokenizer used to parse the number. |
| 1293 |
| 1294 Returns: |
| 1295 The integer parsed. |
| 1296 |
| 1297 Raises: |
| 1298 ParseError: If a signed 32bit integer couldn't be consumed. |
| 1299 """ |
| 1300 return _ConsumeInteger(tokenizer, is_signed=True, is_long=True) |
| 1301 |
| 1302 |
| 1303 def _TryConsumeUint64(tokenizer): |
| 1304 try: |
| 1305 _ConsumeUint64(tokenizer) |
| 1306 return True |
| 1307 except ParseError: |
| 1308 return False |
| 1309 |
| 1310 |
| 1311 def _ConsumeUint64(tokenizer): |
| 1312 """Consumes an unsigned 64bit integer number from tokenizer. |
| 1313 |
| 1314 Args: |
| 1315 tokenizer: A tokenizer used to parse the number. |
| 1316 |
| 1317 Returns: |
| 1318 The integer parsed. |
| 1319 |
| 1320 Raises: |
| 1321 ParseError: If an unsigned 64bit integer couldn't be consumed. |
| 1322 """ |
| 1323 return _ConsumeInteger(tokenizer, is_signed=False, is_long=True) |
| 1324 |
| 1325 |
| 1326 def _TryConsumeInteger(tokenizer, is_signed=False, is_long=False): |
| 1327 try: |
| 1328 _ConsumeInteger(tokenizer, is_signed=is_signed, is_long=is_long) |
| 1329 return True |
| 1330 except ParseError: |
| 1331 return False |
| 1332 |
| 1333 |
| 1334 def _ConsumeInteger(tokenizer, is_signed=False, is_long=False): |
| 1335 """Consumes an integer number from tokenizer. |
| 1336 |
| 1337 Args: |
| 1338 tokenizer: A tokenizer used to parse the number. |
| 1339 is_signed: True if a signed integer must be parsed. |
| 1340 is_long: True if a long integer must be parsed. |
| 1341 |
| 1342 Returns: |
| 1343 The integer parsed. |
| 1344 |
| 1345 Raises: |
| 1346 ParseError: If an integer with given characteristics couldn't be consumed. |
| 1347 """ |
| 1348 try: |
| 1349 result = ParseInteger(tokenizer.token, is_signed=is_signed, is_long=is_long) |
| 1350 except ValueError as e: |
| 1351 raise tokenizer.ParseError(str(e)) |
| 1352 tokenizer.NextToken() |
| 1353 return result |
| 1354 |
1101 | 1355 |
1102 def ParseInteger(text, is_signed=False, is_long=False): | 1356 def ParseInteger(text, is_signed=False, is_long=False): |
1103 """Parses an integer. | 1357 """Parses an integer. |
1104 | 1358 |
1105 Args: | 1359 Args: |
1106 text: The text to parse. | 1360 text: The text to parse. |
1107 is_signed: True if a signed integer must be parsed. | 1361 is_signed: True if a signed integer must be parsed. |
1108 is_long: True if a long integer must be parsed. | 1362 is_long: True if a long integer must be parsed. |
1109 | 1363 |
1110 Returns: | 1364 Returns: |
1111 The integer value. | 1365 The integer value. |
1112 | 1366 |
1113 Raises: | 1367 Raises: |
1114 ValueError: Thrown Iff the text is not a valid integer. | 1368 ValueError: Thrown Iff the text is not a valid integer. |
1115 """ | 1369 """ |
1116 # Do the actual parsing. Exception handling is propagated to caller. | 1370 # Do the actual parsing. Exception handling is propagated to caller. |
| 1371 result = _ParseAbstractInteger(text, is_long=is_long) |
| 1372 |
| 1373 # Check if the integer is sane. Exceptions handled by callers. |
| 1374 checker = _INTEGER_CHECKERS[2 * int(is_long) + int(is_signed)] |
| 1375 checker.CheckValue(result) |
| 1376 return result |
| 1377 |
| 1378 |
| 1379 def _ParseAbstractInteger(text, is_long=False): |
| 1380 """Parses an integer without checking size/signedness. |
| 1381 |
| 1382 Args: |
| 1383 text: The text to parse. |
| 1384 is_long: True if the value should be returned as a long integer. |
| 1385 |
| 1386 Returns: |
| 1387 The integer value. |
| 1388 |
| 1389 Raises: |
| 1390 ValueError: Thrown Iff the text is not a valid integer. |
| 1391 """ |
| 1392 # Do the actual parsing. Exception handling is propagated to caller. |
1117 try: | 1393 try: |
1118 # We force 32-bit values to int and 64-bit values to long to make | 1394 # We force 32-bit values to int and 64-bit values to long to make |
1119 # alternate implementations where the distinction is more significant | 1395 # alternate implementations where the distinction is more significant |
1120 # (e.g. the C++ implementation) simpler. | 1396 # (e.g. the C++ implementation) simpler. |
1121 if is_long: | 1397 if is_long: |
1122 result = long(text, 0) | 1398 return long(text, 0) |
1123 else: | 1399 else: |
1124 result = int(text, 0) | 1400 return int(text, 0) |
1125 except ValueError: | 1401 except ValueError: |
1126 raise ValueError('Couldn\'t parse integer: %s' % text) | 1402 raise ValueError('Couldn\'t parse integer: %s' % text) |
1127 | 1403 |
1128 # Check if the integer is sane. Exceptions handled by callers. | |
1129 checker = _INTEGER_CHECKERS[2 * int(is_long) + int(is_signed)] | |
1130 checker.CheckValue(result) | |
1131 return result | |
1132 | |
1133 | 1404 |
1134 def ParseFloat(text): | 1405 def ParseFloat(text): |
1135 """Parse a floating point number. | 1406 """Parse a floating point number. |
1136 | 1407 |
1137 Args: | 1408 Args: |
1138 text: Text to parse. | 1409 text: Text to parse. |
1139 | 1410 |
1140 Returns: | 1411 Returns: |
1141 The number parsed. | 1412 The number parsed. |
1142 | 1413 |
(...skipping 25 matching lines...) Expand all Loading... |
1168 | 1439 |
1169 Args: | 1440 Args: |
1170 text: Text to parse. | 1441 text: Text to parse. |
1171 | 1442 |
1172 Returns: | 1443 Returns: |
1173 Boolean values parsed | 1444 Boolean values parsed |
1174 | 1445 |
1175 Raises: | 1446 Raises: |
1176 ValueError: If text is not a valid boolean. | 1447 ValueError: If text is not a valid boolean. |
1177 """ | 1448 """ |
1178 if text in ('true', 't', '1'): | 1449 if text in ('true', 't', '1', 'True'): |
1179 return True | 1450 return True |
1180 elif text in ('false', 'f', '0'): | 1451 elif text in ('false', 'f', '0', 'False'): |
1181 return False | 1452 return False |
1182 else: | 1453 else: |
1183 raise ValueError('Expected "true" or "false".') | 1454 raise ValueError('Expected "true" or "false".') |
1184 | 1455 |
1185 | 1456 |
1186 def ParseEnum(field, value): | 1457 def ParseEnum(field, value): |
1187 """Parse an enum value. | 1458 """Parse an enum value. |
1188 | 1459 |
1189 The value can be specified by a number (the enum value), or by | 1460 The value can be specified by a number (the enum value), or by |
1190 a string literal (the enum name). | 1461 a string literal (the enum name). |
1191 | 1462 |
1192 Args: | 1463 Args: |
1193 field: Enum field descriptor. | 1464 field: Enum field descriptor. |
1194 value: String value. | 1465 value: String value. |
1195 | 1466 |
1196 Returns: | 1467 Returns: |
1197 Enum value number. | 1468 Enum value number. |
1198 | 1469 |
1199 Raises: | 1470 Raises: |
1200 ValueError: If the enum value could not be parsed. | 1471 ValueError: If the enum value could not be parsed. |
1201 """ | 1472 """ |
1202 enum_descriptor = field.enum_type | 1473 enum_descriptor = field.enum_type |
1203 try: | 1474 try: |
1204 number = int(value, 0) | 1475 number = int(value, 0) |
1205 except ValueError: | 1476 except ValueError: |
1206 # Identifier. | 1477 # Identifier. |
1207 enum_value = enum_descriptor.values_by_name.get(value, None) | 1478 enum_value = enum_descriptor.values_by_name.get(value, None) |
1208 if enum_value is None: | 1479 if enum_value is None: |
1209 raise ValueError( | 1480 raise ValueError('Enum type "%s" has no value named %s.' % |
1210 'Enum type "%s" has no value named %s.' % ( | 1481 (enum_descriptor.full_name, value)) |
1211 enum_descriptor.full_name, value)) | |
1212 else: | 1482 else: |
1213 # Numeric value. | 1483 # Numeric value. |
1214 enum_value = enum_descriptor.values_by_number.get(number, None) | 1484 enum_value = enum_descriptor.values_by_number.get(number, None) |
1215 if enum_value is None: | 1485 if enum_value is None: |
1216 raise ValueError( | 1486 raise ValueError('Enum type "%s" has no value with number %d.' % |
1217 'Enum type "%s" has no value with number %d.' % ( | 1487 (enum_descriptor.full_name, number)) |
1218 enum_descriptor.full_name, number)) | |
1219 return enum_value.number | 1488 return enum_value.number |
OLD | NEW |