| Index: third_party/protobuf/python/google/protobuf/text_format.py
|
| diff --git a/third_party/protobuf/python/google/protobuf/text_format.py b/third_party/protobuf/python/google/protobuf/text_format.py
|
| index cc6ac90262c3c09269ca5d46d8c9062c3e23f8e5..c3a1cf602daed3437d7a08b4aaaa6058e879ff0b 100755
|
| --- a/third_party/protobuf/python/google/protobuf/text_format.py
|
| +++ b/third_party/protobuf/python/google/protobuf/text_format.py
|
| @@ -53,24 +53,26 @@ class ParseError(Exception):
|
| """Thrown in case of ASCII parsing error."""
|
|
|
|
|
| -def MessageToString(message):
|
| +def MessageToString(message, as_utf8=False, as_one_line=False):
|
| out = cStringIO.StringIO()
|
| - PrintMessage(message, out)
|
| + PrintMessage(message, out, as_utf8=as_utf8, as_one_line=as_one_line)
|
| result = out.getvalue()
|
| out.close()
|
| + if as_one_line:
|
| + return result.rstrip()
|
| return result
|
|
|
|
|
| -def PrintMessage(message, out, indent = 0):
|
| +def PrintMessage(message, out, indent=0, as_utf8=False, as_one_line=False):
|
| for field, value in message.ListFields():
|
| if field.label == descriptor.FieldDescriptor.LABEL_REPEATED:
|
| for element in value:
|
| - PrintField(field, element, out, indent)
|
| + PrintField(field, element, out, indent, as_utf8, as_one_line)
|
| else:
|
| - PrintField(field, value, out, indent)
|
| + PrintField(field, value, out, indent, as_utf8, as_one_line)
|
|
|
|
|
| -def PrintField(field, value, out, indent = 0):
|
| +def PrintField(field, value, out, indent=0, as_utf8=False, as_one_line=False):
|
| """Print a single field name/value pair. For repeated fields, the value
|
| should be a single element."""
|
|
|
| @@ -96,23 +98,35 @@ def PrintField(field, value, out, indent = 0):
|
| # don't include it.
|
| out.write(': ')
|
|
|
| - PrintFieldValue(field, value, out, indent)
|
| - out.write('\n')
|
| + PrintFieldValue(field, value, out, indent, as_utf8, as_one_line)
|
| + if as_one_line:
|
| + out.write(' ')
|
| + else:
|
| + out.write('\n')
|
|
|
|
|
| -def PrintFieldValue(field, value, out, indent = 0):
|
| +def PrintFieldValue(field, value, out, indent=0,
|
| + as_utf8=False, as_one_line=False):
|
| """Print a single field value (not including name). For repeated fields,
|
| the value should be a single element."""
|
|
|
| if field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_MESSAGE:
|
| - out.write(' {\n')
|
| - PrintMessage(value, out, indent + 2)
|
| - out.write(' ' * indent + '}')
|
| + if as_one_line:
|
| + out.write(' { ')
|
| + PrintMessage(value, out, indent, as_utf8, as_one_line)
|
| + out.write('}')
|
| + else:
|
| + out.write(' {\n')
|
| + PrintMessage(value, out, indent + 2, as_utf8, as_one_line)
|
| + out.write(' ' * indent + '}')
|
| elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_ENUM:
|
| out.write(field.enum_type.values_by_number[value].name)
|
| elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_STRING:
|
| out.write('\"')
|
| - out.write(_CEscape(value))
|
| + if type(value) is unicode:
|
| + out.write(_CEscape(value.encode('utf-8'), as_utf8))
|
| + else:
|
| + out.write(_CEscape(value, as_utf8))
|
| out.write('\"')
|
| elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_BOOL:
|
| if value:
|
| @@ -208,7 +222,7 @@ def _MergeField(tokenizer, message):
|
| sub_message = message.Extensions[field]
|
| else:
|
| sub_message = getattr(message, field.name)
|
| - sub_message.SetInParent()
|
| + sub_message.SetInParent()
|
|
|
| while not tokenizer.TryConsume(end_token):
|
| if tokenizer.AtEnd():
|
| @@ -334,10 +348,10 @@ class _Tokenizer(object):
|
| Returns:
|
| True iff the end was reached.
|
| """
|
| - return not self._lines and not self._current_line
|
| + return self.token == ''
|
|
|
| def _PopLine(self):
|
| - while not self._current_line:
|
| + while len(self._current_line) <= self._column:
|
| if not self._lines:
|
| self._current_line = ''
|
| return
|
| @@ -348,11 +362,10 @@ class _Tokenizer(object):
|
| def _SkipWhitespace(self):
|
| while True:
|
| self._PopLine()
|
| - match = re.match(self._WHITESPACE, self._current_line)
|
| + match = self._WHITESPACE.match(self._current_line, self._column)
|
| if not match:
|
| break
|
| length = len(match.group(0))
|
| - self._current_line = self._current_line[length:]
|
| self._column += length
|
|
|
| def TryConsume(self, token):
|
| @@ -402,7 +415,7 @@ class _Tokenizer(object):
|
| ParseError: If an identifier couldn't be consumed.
|
| """
|
| result = self.token
|
| - if not re.match(self._IDENTIFIER, result):
|
| + if not self._IDENTIFIER.match(result):
|
| raise self._ParseError('Expected identifier.')
|
| self.NextToken()
|
| return result
|
| @@ -481,13 +494,13 @@ class _Tokenizer(object):
|
| ParseError: If a floating point number couldn't be consumed.
|
| """
|
| text = self.token
|
| - if re.match(self._FLOAT_INFINITY, text):
|
| + if self._FLOAT_INFINITY.match(text):
|
| self.NextToken()
|
| if text.startswith('-'):
|
| return -_INFINITY
|
| return _INFINITY
|
|
|
| - if re.match(self._FLOAT_NAN, text):
|
| + if self._FLOAT_NAN.match(text):
|
| self.NextToken()
|
| return _NAN
|
|
|
| @@ -507,10 +520,10 @@ class _Tokenizer(object):
|
| Raises:
|
| ParseError: If a boolean value couldn't be consumed.
|
| """
|
| - if self.token == 'true':
|
| + if self.token in ('true', 't', '1'):
|
| self.NextToken()
|
| return True
|
| - elif self.token == 'false':
|
| + elif self.token in ('false', 'f', '0'):
|
| self.NextToken()
|
| return False
|
| else:
|
| @@ -525,7 +538,11 @@ class _Tokenizer(object):
|
| Raises:
|
| ParseError: If a string value couldn't be consumed.
|
| """
|
| - return unicode(self.ConsumeByteString(), 'utf-8')
|
| + bytes = self.ConsumeByteString()
|
| + try:
|
| + return unicode(bytes, 'utf-8')
|
| + except UnicodeDecodeError, e:
|
| + raise self._StringParseError(e)
|
|
|
| def ConsumeByteString(self):
|
| """Consumes a byte array value.
|
| @@ -609,7 +626,7 @@ class _Tokenizer(object):
|
| def _ParseError(self, message):
|
| """Creates and *returns* a ParseError for the current token."""
|
| return ParseError('%d:%d : %s' % (
|
| - self._line + 1, self._column + 1, message))
|
| + self._line + 1, self._column - len(self.token) + 1, message))
|
|
|
| def _IntegerParseError(self, e):
|
| return self._ParseError('Couldn\'t parse integer: ' + str(e))
|
| @@ -617,27 +634,27 @@ class _Tokenizer(object):
|
| def _FloatParseError(self, e):
|
| return self._ParseError('Couldn\'t parse number: ' + str(e))
|
|
|
| + def _StringParseError(self, e):
|
| + return self._ParseError('Couldn\'t parse string: ' + str(e))
|
| +
|
| def NextToken(self):
|
| """Reads the next meaningful token."""
|
| self._previous_line = self._line
|
| self._previous_column = self._column
|
| - if self.AtEnd():
|
| - self.token = ''
|
| - return
|
| +
|
| self._column += len(self.token)
|
| + self._SkipWhitespace()
|
|
|
| - # Make sure there is data to work on.
|
| - self._PopLine()
|
| + if not self._lines and len(self._current_line) <= self._column:
|
| + self.token = ''
|
| + return
|
|
|
| - match = re.match(self._TOKEN, self._current_line)
|
| + match = self._TOKEN.match(self._current_line, self._column)
|
| if match:
|
| token = match.group(0)
|
| - self._current_line = self._current_line[len(token):]
|
| self.token = token
|
| else:
|
| - self.token = self._current_line[0]
|
| - self._current_line = self._current_line[1:]
|
| - self._SkipWhitespace()
|
| + self.token = self._current_line[self._column]
|
|
|
|
|
| # text.encode('string_escape') does not seem to satisfy our needs as it
|
| @@ -645,7 +662,7 @@ class _Tokenizer(object):
|
| # C++ unescaping function allows hex escapes to be any length. So,
|
| # "\0011".encode('string_escape') ends up being "\\x011", which will be
|
| # decoded in C++ as a single-character string with char code 0x11.
|
| -def _CEscape(text):
|
| +def _CEscape(text, as_utf8):
|
| def escape(c):
|
| o = ord(c)
|
| if o == 10: return r"\n" # optional escape
|
| @@ -656,12 +673,13 @@ def _CEscape(text):
|
| if o == 34: return r'\"' # necessary escape
|
| if o == 92: return r"\\" # necessary escape
|
|
|
| - if o >= 127 or o < 32: return "\\%03o" % o # necessary escapes
|
| + # necessary escapes
|
| + if not as_utf8 and (o >= 127 or o < 32): return "\\%03o" % o
|
| return c
|
| return "".join([escape(c) for c in text])
|
|
|
|
|
| -_CUNESCAPE_HEX = re.compile('\\\\x([0-9a-fA-F]{2}|[0-9a-f-A-F])')
|
| +_CUNESCAPE_HEX = re.compile('\\\\x([0-9a-fA-F]{2}|[0-9a-fA-F])')
|
|
|
|
|
| def _CUnescape(text):
|
|
|