third_party/google/protobuf/text_encoding.py - Issue 1162993005: Renamed google.protobuf to protobuf26

Side by Side Diff: third_party/google/protobuf/text_encoding.py

Issue 1162993005: Renamed google.protobuf to protobuf26 (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/tools/depot_tools

Patch Set: Addressed comments Created 5 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
	(Empty)
1 # Protocol Buffers - Google's data interchange format

2 # Copyright 2008 Google Inc. All rights reserved.

3 # http://code.google.com/p/protobuf/

4 #

5 # Redistribution and use in source and binary forms, with or without

6 # modification, are permitted provided that the following conditions are

7 # met:

8 #

9 # * Redistributions of source code must retain the above copyright

10 # notice, this list of conditions and the following disclaimer.

11 # * Redistributions in binary form must reproduce the above

12 # copyright notice, this list of conditions and the following disclaimer

13 # in the documentation and/or other materials provided with the

14 # distribution.

15 # * Neither the name of Google Inc. nor the names of its

16 # contributors may be used to endorse or promote products derived from

17 # this software without specific prior written permission.

18 #

19 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS

20 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT

21 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR

22 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT

23 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,

24 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT

25 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,

26 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY

27 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT

28 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE

29 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

30

31 #PY25 compatible for GAE.

32 #

33 """Encoding related utilities."""

34

35 import re

36 import sys ##PY25

37

38 # Lookup table for utf8

39 _cescape_utf8_to_str = [chr(i) for i in xrange(0, 256)]

40 _cescape_utf8_to_str[9] = r'\t' # optional escape

41 _cescape_utf8_to_str[10] = r'\n' # optional escape

42 _cescape_utf8_to_str[13] = r'\r' # optional escape

43 _cescape_utf8_to_str[39] = r"\'" # optional escape

44

45 _cescape_utf8_to_str[34] = r'\"' # necessary escape

46 _cescape_utf8_to_str[92] = r'\\' # necessary escape

47

48 # Lookup table for non-utf8, with necessary escapes at (o >= 127 or o < 32)

49 _cescape_byte_to_str = ([r'\%03o' % i for i in xrange(0, 32)] +

50 [chr(i) for i in xrange(32, 127)] +

51 [r'\%03o' % i for i in xrange(127, 256)])

52 _cescape_byte_to_str[9] = r'\t' # optional escape

53 _cescape_byte_to_str[10] = r'\n' # optional escape

54 _cescape_byte_to_str[13] = r'\r' # optional escape

55 _cescape_byte_to_str[39] = r"\'" # optional escape

56

57 _cescape_byte_to_str[34] = r'\"' # necessary escape

58 _cescape_byte_to_str[92] = r'\\' # necessary escape

59

60

61 def CEscape(text, as_utf8):

62 """Escape a bytes string for use in an ascii protocol buffer.

63

64 text.encode('string_escape') does not seem to satisfy our needs as it

65 encodes unprintable characters using two-digit hex escapes whereas our

66 C++ unescaping function allows hex escapes to be any length. So,

67 "\0011".encode('string_escape') ends up being "\\x011", which will be

68 decoded in C++ as a single-character string with char code 0x11.

69

70 Args:

71 text: A byte string to be escaped

72 as_utf8: Specifies if result should be returned in UTF-8 encoding

73 Returns:

74 Escaped string

75 """

76 # PY3 hack: make Ord work for str and bytes:

77 # //platforms/networking/data uses unicode here, hence basestring.

78 Ord = ord if isinstance(text, basestring) else lambda x: x

79 if as_utf8:

80 return ''.join(_cescape_utf8_to_str[Ord(c)] for c in text)

81 return ''.join(_cescape_byte_to_str[Ord(c)] for c in text)

82

83

84 _CUNESCAPE_HEX = re.compile(r'(\\+)x([0-9a-fA-F])(?![0-9a-fA-F])')

85 _cescape_highbit_to_str = ([chr(i) for i in range(0, 127)] +

86 [r'\%03o' % i for i in range(127, 256)])

87

88

89 def CUnescape(text):

90 """Unescape a text string with C-style escape sequences to UTF-8 bytes."""

91

92 def ReplaceHex(m):

93 # Only replace the match if the number of leading back slashes is odd. i.e.

94 # the slash itself is not escaped.

95 if len(m.group(1)) & 1:

96 return m.group(1) + 'x0' + m.group(2)

97 return m.group(0)

98

99 # This is required because the 'string_escape' encoding doesn't

100 # allow single-digit hex escapes (like '\xf').

101 result = _CUNESCAPE_HEX.sub(ReplaceHex, text)

102

103 if sys.version_info[0] < 3: ##PY25

104 ##!PY25 if str is bytes: # PY2

105 return result.decode('string_escape')

106 result = ''.join(_cescape_highbit_to_str[ord(c)] for c in result)

107 return (result.encode('ascii') # Make it bytes to allow decode.

108 .decode('unicode_escape')

109 # Make it bytes again to return the proper type.

110 .encode('raw_unicode_escape'))

OLD	NEW

« no previous file with comments | « third_party/google/protobuf/symbol_database.py ('k') | third_party/google/protobuf/text_format.py » ('j') | no next file with comments »