third_party/protobuf/python/google/protobuf/text_format.py - Issue 6737030: third_party/protobuf: update to upstream r371

Unified Diff: third_party/protobuf/python/google/protobuf/text_format.py

Issue 6737030: third_party/protobuf: update to upstream r371 (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: Created 9 years, 9 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: third_party/protobuf/python/google/protobuf/text_format.py

diff --git a/third_party/protobuf/python/google/protobuf/text_format.py b/third_party/protobuf/python/google/protobuf/text_format.py

index cc6ac90262c3c09269ca5d46d8c9062c3e23f8e5..c3a1cf602daed3437d7a08b4aaaa6058e879ff0b 100755

--- a/third_party/protobuf/python/google/protobuf/text_format.py

+++ b/third_party/protobuf/python/google/protobuf/text_format.py

@@ -53,24 +53,26 @@ class ParseError(Exception):

"""Thrown in case of ASCII parsing error."""

-def MessageToString(message):

+def MessageToString(message, as_utf8=False, as_one_line=False):

out = cStringIO.StringIO()

- PrintMessage(message, out)

+ PrintMessage(message, out, as_utf8=as_utf8, as_one_line=as_one_line)

result = out.getvalue()

out.close()

+ if as_one_line:

+ return result.rstrip()

return result

-def PrintMessage(message, out, indent = 0):

+def PrintMessage(message, out, indent=0, as_utf8=False, as_one_line=False):

for field, value in message.ListFields():

if field.label == descriptor.FieldDescriptor.LABEL_REPEATED:

for element in value:

- PrintField(field, element, out, indent)

+ PrintField(field, element, out, indent, as_utf8, as_one_line)

else:

- PrintField(field, value, out, indent)

+ PrintField(field, value, out, indent, as_utf8, as_one_line)

-def PrintField(field, value, out, indent = 0):

+def PrintField(field, value, out, indent=0, as_utf8=False, as_one_line=False):

"""Print a single field name/value pair. For repeated fields, the value

should be a single element."""

@@ -96,23 +98,35 @@ def PrintField(field, value, out, indent = 0):

# don't include it.

out.write(': ')

- PrintFieldValue(field, value, out, indent)

- out.write('\n')

+ PrintFieldValue(field, value, out, indent, as_utf8, as_one_line)

+ if as_one_line:

+ out.write(' ')

+ else:

+ out.write('\n')

-def PrintFieldValue(field, value, out, indent = 0):

+def PrintFieldValue(field, value, out, indent=0,

+ as_utf8=False, as_one_line=False):

"""Print a single field value (not including name). For repeated fields,

the value should be a single element."""

if field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_MESSAGE:

- out.write(' {\n')

- PrintMessage(value, out, indent + 2)

- out.write(' ' * indent + '}')

+ if as_one_line:

+ out.write(' { ')

+ PrintMessage(value, out, indent, as_utf8, as_one_line)

+ out.write('}')

+ else:

+ out.write(' {\n')

+ PrintMessage(value, out, indent + 2, as_utf8, as_one_line)

+ out.write(' ' * indent + '}')

elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_ENUM:

out.write(field.enum_type.values_by_number[value].name)

elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_STRING:

out.write('\"')

- out.write(_CEscape(value))

+ if type(value) is unicode:

+ out.write(_CEscape(value.encode('utf-8'), as_utf8))

+ else:

+ out.write(_CEscape(value, as_utf8))

out.write('\"')

elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_BOOL:

if value:

@@ -208,7 +222,7 @@ def _MergeField(tokenizer, message):

sub_message = message.Extensions[field]

else:

sub_message = getattr(message, field.name)

- sub_message.SetInParent()

+ sub_message.SetInParent()

while not tokenizer.TryConsume(end_token):

if tokenizer.AtEnd():

@@ -334,10 +348,10 @@ class _Tokenizer(object):

Returns:

True iff the end was reached.

"""

- return not self._lines and not self._current_line

+ return self.token == ''

def _PopLine(self):

- while not self._current_line:

+ while len(self._current_line) <= self._column:

if not self._lines:

self._current_line = ''

return

@@ -348,11 +362,10 @@ class _Tokenizer(object):

def _SkipWhitespace(self):

while True:

self._PopLine()

- match = re.match(self._WHITESPACE, self._current_line)

+ match = self._WHITESPACE.match(self._current_line, self._column)

if not match:

break

length = len(match.group(0))

- self._current_line = self._current_line[length:]

self._column += length

def TryConsume(self, token):

@@ -402,7 +415,7 @@ class _Tokenizer(object):

ParseError: If an identifier couldn't be consumed.

"""

result = self.token

- if not re.match(self._IDENTIFIER, result):

+ if not self._IDENTIFIER.match(result):

raise self._ParseError('Expected identifier.')

self.NextToken()

return result

@@ -481,13 +494,13 @@ class _Tokenizer(object):

ParseError: If a floating point number couldn't be consumed.

"""

text = self.token

- if re.match(self._FLOAT_INFINITY, text):

+ if self._FLOAT_INFINITY.match(text):

self.NextToken()

if text.startswith('-'):

return -_INFINITY

return _INFINITY

- if re.match(self._FLOAT_NAN, text):

+ if self._FLOAT_NAN.match(text):

self.NextToken()

return _NAN

@@ -507,10 +520,10 @@ class _Tokenizer(object):

Raises:

ParseError: If a boolean value couldn't be consumed.

"""

- if self.token == 'true':

+ if self.token in ('true', 't', '1'):

self.NextToken()

return True

- elif self.token == 'false':

+ elif self.token in ('false', 'f', '0'):

self.NextToken()

return False

else:

@@ -525,7 +538,11 @@ class _Tokenizer(object):

Raises:

ParseError: If a string value couldn't be consumed.

"""

- return unicode(self.ConsumeByteString(), 'utf-8')

+ bytes = self.ConsumeByteString()

+ try:

+ return unicode(bytes, 'utf-8')

+ except UnicodeDecodeError, e:

+ raise self._StringParseError(e)

def ConsumeByteString(self):

"""Consumes a byte array value.

@@ -609,7 +626,7 @@ class _Tokenizer(object):

def _ParseError(self, message):

"""Creates and *returns* a ParseError for the current token."""

return ParseError('%d:%d : %s' % (

- self._line + 1, self._column + 1, message))

+ self._line + 1, self._column - len(self.token) + 1, message))

def _IntegerParseError(self, e):

return self._ParseError('Couldn\'t parse integer: ' + str(e))

@@ -617,27 +634,27 @@ class _Tokenizer(object):

def _FloatParseError(self, e):

return self._ParseError('Couldn\'t parse number: ' + str(e))

+ def _StringParseError(self, e):

+ return self._ParseError('Couldn\'t parse string: ' + str(e))

def NextToken(self):

"""Reads the next meaningful token."""

self._previous_line = self._line

self._previous_column = self._column

- if self.AtEnd():

- self.token = ''

- return

self._column += len(self.token)

+ self._SkipWhitespace()

- # Make sure there is data to work on.

- self._PopLine()

+ if not self._lines and len(self._current_line) <= self._column:

+ self.token = ''

+ return

- match = re.match(self._TOKEN, self._current_line)

+ match = self._TOKEN.match(self._current_line, self._column)

if match:

token = match.group(0)

- self._current_line = self._current_line[len(token):]

self.token = token

else:

- self.token = self._current_line[0]

- self._current_line = self._current_line[1:]

- self._SkipWhitespace()

+ self.token = self._current_line[self._column]

# text.encode('string_escape') does not seem to satisfy our needs as it

@@ -645,7 +662,7 @@ class _Tokenizer(object):

# C++ unescaping function allows hex escapes to be any length. So,

# "\0011".encode('string_escape') ends up being "\\x011", which will be

# decoded in C++ as a single-character string with char code 0x11.

-def _CEscape(text):

+def _CEscape(text, as_utf8):

def escape(c):

o = ord(c)

if o == 10: return r"\n" # optional escape

@@ -656,12 +673,13 @@ def _CEscape(text):

if o == 34: return r'\"' # necessary escape

if o == 92: return r"\\" # necessary escape

- if o >= 127 or o < 32: return "\\%03o" % o # necessary escapes

+ # necessary escapes

+ if not as_utf8 and (o >= 127 or o < 32): return "\\%03o" % o

return c

return "".join([escape(c) for c in text])

-_CUNESCAPE_HEX = re.compile('\\\\x([0-9a-fA-F]{2}|[0-9a-f-A-F])')

+_CUNESCAPE_HEX = re.compile('\\\\x([0-9a-fA-F]{2}|[0-9a-fA-F])')

def _CUnescape(text):

« no previous file with comments | « third_party/protobuf/python/google/protobuf/reflection.py ('k') | third_party/protobuf/python/setup.py » ('j') | no next file with comments »