third_party/google-endpoints/future/backports/email/quoprimime.py - Issue 2666783008: Add google-endpoints to third_party/.

Side by Side Diff: third_party/google-endpoints/future/backports/email/quoprimime.py

Issue 2666783008: Add google-endpoints to third_party/. (Closed)

Patch Set: Created 3 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

« no previous file with comments | « third_party/google-endpoints/future/backports/email/policy.py ('k') | third_party/google-endpoints/future/backports/email/utils.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
(Empty)
	1 # Copyright (C) 2001-2006 Python Software Foundation

	2 # Author: Ben Gertzfield

	3 # Contact: email-sig@python.org

	4

	5 """Quoted-printable content transfer encoding per RFCs 2045-2047.

	6

	7 This module handles the content transfer encoding method defined in RFC 2045

	8 to encode US ASCII-like 8-bit data called `quoted-printable'. It is used to

	9 safely encode text that is in a character set similar to the 7-bit US ASCII

	10 character set, but that includes some 8-bit characters that are normally not

	11 allowed in email bodies or headers.

	12

	13 Quoted-printable is very space-inefficient for encoding binary files; use the

	14 email.base64mime module for that instead.

	15

	16 This module provides an interface to encode and decode both headers and bodies

	17 with quoted-printable encoding.

	18

	19 RFC 2045 defines a method for including character set information in an

	20 `encoded-word' in a header. This method is commonly used for 8-bit real names

	21 in To:/From:/Cc: etc. fields, as well as Subject: lines.

	22

	23 This module does not do the line wrapping or end-of-line character

	24 conversion necessary for proper internationalized headers; it only

	25 does dumb encoding and decoding. To deal with the various line

	26 wrapping issues, use the email.header module.

	27 """

	28 from __future__ import unicode_literals

	29 from __future__ import division

	30 from __future__ import absolute_import

	31 from future.builtins import bytes, chr, dict, int, range, super

	32

	33 __all__ = [

	34 'body_decode',

	35 'body_encode',

	36 'body_length',

	37 'decode',

	38 'decodestring',

	39 'header_decode',

	40 'header_encode',

	41 'header_length',

	42 'quote',

	43 'unquote',

	44 ]

	45

	46 import re

	47 import io

	48

	49 from string import ascii_letters, digits, hexdigits

	50

	51 CRLF = '\r\n'

	52 NL = '\n'

	53 EMPTYSTRING = ''

	54

	55 # Build a mapping of octets to the expansion of that octet. Since we're only

	56 # going to have 256 of these things, this isn't terribly inefficient

	57 # space-wise. Remember that headers and bodies have different sets of safe

	58 # characters. Initialize both maps with the full expansion, and then override

	59 # the safe bytes with the more compact form.

	60 _QUOPRI_HEADER_MAP = dict((c, '=%02X' % c) for c in range(256))

	61 _QUOPRI_BODY_MAP = _QUOPRI_HEADER_MAP.copy()

	62

	63 # Safe header bytes which need no encoding.

	64 for c in bytes(b'-!*+/' + ascii_letters.encode('ascii') + digits.encode('ascii') ):

	65 _QUOPRI_HEADER_MAP[c] = chr(c)

	66 # Headers have one other special encoding; spaces become underscores.

	67 _QUOPRI_HEADER_MAP[ord(' ')] = '_'

	68

	69 # Safe body bytes which need no encoding.

	70 for c in bytes(b' !"#$%&\'()*+,-./0123456789:;<>'

	71 b'?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`'

	72 b'abcdefghijklmnopqrstuvwxyz{\|}~\t'):

	73 _QUOPRI_BODY_MAP[c] = chr(c)

	74

	75

	76

	77 # Helpers

	78 def header_check(octet):

	79 """Return True if the octet should be escaped with header quopri."""

	80 return chr(octet) != _QUOPRI_HEADER_MAP[octet]

	81

	82

	83 def body_check(octet):

	84 """Return True if the octet should be escaped with body quopri."""

	85 return chr(octet) != _QUOPRI_BODY_MAP[octet]

	86

	87

	88 def header_length(bytearray):

	89 """Return a header quoted-printable encoding length.

	90

	91 Note that this does not include any RFC 2047 chrome added by

	92 `header_encode()`.

	93

	94 :param bytearray: An array of bytes (a.k.a. octets).

	95 :return: The length in bytes of the byte array when it is encoded with

	96 quoted-printable for headers.

	97 """

	98 return sum(len(_QUOPRI_HEADER_MAP[octet]) for octet in bytearray)

	99

	100

	101 def body_length(bytearray):

	102 """Return a body quoted-printable encoding length.

	103

	104 :param bytearray: An array of bytes (a.k.a. octets).

	105 :return: The length in bytes of the byte array when it is encoded with

	106 quoted-printable for bodies.

	107 """

	108 return sum(len(_QUOPRI_BODY_MAP[octet]) for octet in bytearray)

	109

	110

	111 def _max_append(L, s, maxlen, extra=''):

	112 if not isinstance(s, str):

	113 s = chr(s)

	114 if not L:

	115 L.append(s.lstrip())

	116 elif len(L[-1]) + len(s) <= maxlen:

	117 L[-1] += extra + s

	118 else:

	119 L.append(s.lstrip())

	120

	121

	122 def unquote(s):

	123 """Turn a string in the form =AB to the ASCII character with value 0xab"""

	124 return chr(int(s[1:3], 16))

	125

	126

	127 def quote(c):

	128 return '=%02X' % ord(c)

	129

	130

	131

	132 def header_encode(header_bytes, charset='iso-8859-1'):

	133 """Encode a single header line with quoted-printable (like) encoding.

	134

	135 Defined in RFC 2045, this `Q' encoding is similar to quoted-printable, but

	136 used specifically for email header fields to allow charsets with mostly 7

	137 bit characters (and some 8 bit) to remain more or less readable in non-RFC

	138 2045 aware mail clients.

	139

	140 charset names the character set to use in the RFC 2046 header. It

	141 defaults to iso-8859-1.

	142 """

	143 # Return empty headers as an empty string.

	144 if not header_bytes:

	145 return ''

	146 # Iterate over every byte, encoding if necessary.

	147 encoded = []

	148 for octet in header_bytes:

	149 encoded.append(_QUOPRI_HEADER_MAP[octet])

	150 # Now add the RFC chrome to each encoded chunk and glue the chunks

	151 # together.

	152 return '=?%s?q?%s?=' % (charset, EMPTYSTRING.join(encoded))

	153

	154

	155 class _body_accumulator(io.StringIO):

	156

	157 def __init__(self, maxlinelen, eol, args, *kw):

	158 super().__init__(args, *kw)

	159 self.eol = eol

	160 self.maxlinelen = self.room = maxlinelen

	161

	162 def write_str(self, s):

	163 """Add string s to the accumulated body."""

	164 self.write(s)

	165 self.room -= len(s)

	166

	167 def newline(self):

	168 """Write eol, then start new line."""

	169 self.write_str(self.eol)

	170 self.room = self.maxlinelen

	171

	172 def write_soft_break(self):

	173 """Write a soft break, then start a new line."""

	174 self.write_str('=')

	175 self.newline()

	176

	177 def write_wrapped(self, s, extra_room=0):

	178 """Add a soft line break if needed, then write s."""

	179 if self.room < len(s) + extra_room:

	180 self.write_soft_break()

	181 self.write_str(s)

	182

	183 def write_char(self, c, is_last_char):

	184 if not is_last_char:

	185 # Another character follows on this line, so we must leave

	186 # extra room, either for it or a soft break, and whitespace

	187 # need not be quoted.

	188 self.write_wrapped(c, extra_room=1)

	189 elif c not in ' \t':

	190 # For this and remaining cases, no more characters follow,

	191 # so there is no need to reserve extra room (since a hard

	192 # break will immediately follow).

	193 self.write_wrapped(c)

	194 elif self.room >= 3:

	195 # It's a whitespace character at end-of-line, and we have room

	196 # for the three-character quoted encoding.

	197 self.write(quote(c))

	198 elif self.room == 2:

	199 # There's room for the whitespace character and a soft break.

	200 self.write(c)

	201 self.write_soft_break()

	202 else:

	203 # There's room only for a soft break. The quoted whitespace

	204 # will be the only content on the subsequent line.

	205 self.write_soft_break()

	206 self.write(quote(c))

	207

	208

	209 def body_encode(body, maxlinelen=76, eol=NL):

	210 """Encode with quoted-printable, wrapping at maxlinelen characters.

	211

	212 Each line of encoded text will end with eol, which defaults to "\\n". Set

	213 this to "\\r\\n" if you will be using the result of this function directly

	214 in an email.

	215

	216 Each line will be wrapped at, at most, maxlinelen characters before the

	217 eol string (maxlinelen defaults to 76 characters, the maximum value

	218 permitted by RFC 2045). Long lines will have the 'soft line break'

	219 quoted-printable character "=" appended to them, so the decoded text will

	220 be identical to the original text.

	221

	222 The minimum maxlinelen is 4 to have room for a quoted character ("=XX")

	223 followed by a soft line break. Smaller values will generate a

	224 ValueError.

	225

	226 """

	227

	228 if maxlinelen < 4:

	229 raise ValueError("maxlinelen must be at least 4")

	230 if not body:

	231 return body

	232

	233 # The last line may or may not end in eol, but all other lines do.

	234 last_has_eol = (body[-1] in '\r\n')

	235

	236 # This accumulator will make it easier to build the encoded body.

	237 encoded_body = _body_accumulator(maxlinelen, eol)

	238

	239 lines = body.splitlines()

	240 last_line_no = len(lines) - 1

	241 for line_no, line in enumerate(lines):

	242 last_char_index = len(line) - 1

	243 for i, c in enumerate(line):

	244 if body_check(ord(c)):

	245 c = quote(c)

	246 encoded_body.write_char(c, i==last_char_index)

	247 # Add an eol if input line had eol. All input lines have eol except

	248 # possibly the last one.

	249 if line_no < last_line_no or last_has_eol:

	250 encoded_body.newline()

	251

	252 return encoded_body.getvalue()

	253

	254

	255

	256 # BAW: I'm not sure if the intent was for the signature of this function to be

	257 # the same as base64MIME.decode() or not...

	258 def decode(encoded, eol=NL):

	259 """Decode a quoted-printable string.

	260

	261 Lines are separated with eol, which defaults to \\n.

	262 """

	263 if not encoded:

	264 return encoded

	265 # BAW: see comment in encode() above. Again, we're building up the

	266 # decoded string with string concatenation, which could be done much more

	267 # efficiently.

	268 decoded = ''

	269

	270 for line in encoded.splitlines():

	271 line = line.rstrip()

	272 if not line:

	273 decoded += eol

	274 continue

	275

	276 i = 0

	277 n = len(line)

	278 while i < n:

	279 c = line[i]

	280 if c != '=':

	281 decoded += c

	282 i += 1

	283 # Otherwise, c == "=". Are we at the end of the line? If so, add

	284 # a soft line break.

	285 elif i+1 == n:

	286 i += 1

	287 continue

	288 # Decode if in form =AB

	289 elif i+2 < n and line[i+1] in hexdigits and line[i+2] in hexdigits:

	290 decoded += unquote(line[i:i+3])

	291 i += 3

	292 # Otherwise, not in form =AB, pass literally

	293 else:

	294 decoded += c

	295 i += 1

	296

	297 if i == n:

	298 decoded += eol

	299 # Special case if original string did not end with eol

	300 if encoded[-1] not in '\r\n' and decoded.endswith(eol):

	301 decoded = decoded[:-1]

	302 return decoded

	303

	304

	305 # For convenience and backwards compatibility w/ standard base64 module

	306 body_decode = decode

	307 decodestring = decode

	308

	309

	310

	311 def _unquote_match(match):

	312 """Turn a match in the form =AB to the ASCII character with value 0xab"""

	313 s = match.group(0)

	314 return unquote(s)

	315

	316

	317 # Header decoding is done a bit differently

	318 def header_decode(s):

	319 """Decode a string encoded with RFC 2045 MIME header `Q' encoding.

	320

	321 This function does not parse a full MIME header value encoded with

	322 quoted-printable (like =?iso-8895-1?q?Hello_World?=) -- please use

	323 the high level email.header class for that functionality.

	324 """

	325 s = s.replace('_', ' ')

	326 return re.sub(r'=[a-fA-F0-9]{2}', _unquote_match, s, re.ASCII)

OLD	NEW