OLD | NEW |
(Empty) | |
| 1 # Copyright (C) 2001-2010 Python Software Foundation |
| 2 # Author: Barry Warsaw |
| 3 # Contact: email-sig@python.org |
| 4 |
| 5 """Classes to generate plain text from a message object tree.""" |
| 6 from __future__ import print_function |
| 7 from __future__ import unicode_literals |
| 8 from __future__ import division |
| 9 from __future__ import absolute_import |
| 10 from future.builtins import super |
| 11 from future.builtins import str |
| 12 |
| 13 __all__ = ['Generator', 'DecodedGenerator', 'BytesGenerator'] |
| 14 |
| 15 import re |
| 16 import sys |
| 17 import time |
| 18 import random |
| 19 import warnings |
| 20 |
| 21 from io import StringIO, BytesIO |
| 22 from future.backports.email._policybase import compat32 |
| 23 from future.backports.email.header import Header |
| 24 from future.backports.email.utils import _has_surrogates |
| 25 import future.backports.email.charset as _charset |
| 26 |
| 27 UNDERSCORE = '_' |
| 28 NL = '\n' # XXX: no longer used by the code below. |
| 29 |
| 30 fcre = re.compile(r'^From ', re.MULTILINE) |
| 31 |
| 32 |
| 33 class Generator(object): |
| 34 """Generates output from a Message object tree. |
| 35 |
| 36 This basic generator writes the message to the given file object as plain |
| 37 text. |
| 38 """ |
| 39 # |
| 40 # Public interface |
| 41 # |
| 42 |
| 43 def __init__(self, outfp, mangle_from_=True, maxheaderlen=None, **_3to2kwarg
s): |
| 44 if 'policy' in _3to2kwargs: policy = _3to2kwargs['policy']; del _3to2kwa
rgs['policy'] |
| 45 else: policy = None |
| 46 """Create the generator for message flattening. |
| 47 |
| 48 outfp is the output file-like object for writing the message to. It |
| 49 must have a write() method. |
| 50 |
| 51 Optional mangle_from_ is a flag that, when True (the default), escapes |
| 52 From_ lines in the body of the message by putting a `>' in front of |
| 53 them. |
| 54 |
| 55 Optional maxheaderlen specifies the longest length for a non-continued |
| 56 header. When a header line is longer (in characters, with tabs |
| 57 expanded to 8 spaces) than maxheaderlen, the header will split as |
| 58 defined in the Header class. Set maxheaderlen to zero to disable |
| 59 header wrapping. The default is 78, as recommended (but not required) |
| 60 by RFC 2822. |
| 61 |
| 62 The policy keyword specifies a policy object that controls a number of |
| 63 aspects of the generator's operation. The default policy maintains |
| 64 backward compatibility. |
| 65 |
| 66 """ |
| 67 self._fp = outfp |
| 68 self._mangle_from_ = mangle_from_ |
| 69 self.maxheaderlen = maxheaderlen |
| 70 self.policy = policy |
| 71 |
| 72 def write(self, s): |
| 73 # Just delegate to the file object |
| 74 self._fp.write(s) |
| 75 |
| 76 def flatten(self, msg, unixfrom=False, linesep=None): |
| 77 r"""Print the message object tree rooted at msg to the output file |
| 78 specified when the Generator instance was created. |
| 79 |
| 80 unixfrom is a flag that forces the printing of a Unix From_ delimiter |
| 81 before the first object in the message tree. If the original message |
| 82 has no From_ delimiter, a `standard' one is crafted. By default, this |
| 83 is False to inhibit the printing of any From_ delimiter. |
| 84 |
| 85 Note that for subobjects, no From_ line is printed. |
| 86 |
| 87 linesep specifies the characters used to indicate a new line in |
| 88 the output. The default value is determined by the policy. |
| 89 |
| 90 """ |
| 91 # We use the _XXX constants for operating on data that comes directly |
| 92 # from the msg, and _encoded_XXX constants for operating on data that |
| 93 # has already been converted (to bytes in the BytesGenerator) and |
| 94 # inserted into a temporary buffer. |
| 95 policy = msg.policy if self.policy is None else self.policy |
| 96 if linesep is not None: |
| 97 policy = policy.clone(linesep=linesep) |
| 98 if self.maxheaderlen is not None: |
| 99 policy = policy.clone(max_line_length=self.maxheaderlen) |
| 100 self._NL = policy.linesep |
| 101 self._encoded_NL = self._encode(self._NL) |
| 102 self._EMPTY = '' |
| 103 self._encoded_EMTPY = self._encode('') |
| 104 # Because we use clone (below) when we recursively process message |
| 105 # subparts, and because clone uses the computed policy (not None), |
| 106 # submessages will automatically get set to the computed policy when |
| 107 # they are processed by this code. |
| 108 old_gen_policy = self.policy |
| 109 old_msg_policy = msg.policy |
| 110 try: |
| 111 self.policy = policy |
| 112 msg.policy = policy |
| 113 if unixfrom: |
| 114 ufrom = msg.get_unixfrom() |
| 115 if not ufrom: |
| 116 ufrom = 'From nobody ' + time.ctime(time.time()) |
| 117 self.write(ufrom + self._NL) |
| 118 self._write(msg) |
| 119 finally: |
| 120 self.policy = old_gen_policy |
| 121 msg.policy = old_msg_policy |
| 122 |
| 123 def clone(self, fp): |
| 124 """Clone this generator with the exact same options.""" |
| 125 return self.__class__(fp, |
| 126 self._mangle_from_, |
| 127 None, # Use policy setting, which we've adjusted |
| 128 policy=self.policy) |
| 129 |
| 130 # |
| 131 # Protected interface - undocumented ;/ |
| 132 # |
| 133 |
| 134 # Note that we use 'self.write' when what we are writing is coming from |
| 135 # the source, and self._fp.write when what we are writing is coming from a |
| 136 # buffer (because the Bytes subclass has already had a chance to transform |
| 137 # the data in its write method in that case). This is an entirely |
| 138 # pragmatic split determined by experiment; we could be more general by |
| 139 # always using write and having the Bytes subclass write method detect when |
| 140 # it has already transformed the input; but, since this whole thing is a |
| 141 # hack anyway this seems good enough. |
| 142 |
| 143 # Similarly, we have _XXX and _encoded_XXX attributes that are used on |
| 144 # source and buffer data, respectively. |
| 145 _encoded_EMPTY = '' |
| 146 |
| 147 def _new_buffer(self): |
| 148 # BytesGenerator overrides this to return BytesIO. |
| 149 return StringIO() |
| 150 |
| 151 def _encode(self, s): |
| 152 # BytesGenerator overrides this to encode strings to bytes. |
| 153 return s |
| 154 |
| 155 def _write_lines(self, lines): |
| 156 # We have to transform the line endings. |
| 157 if not lines: |
| 158 return |
| 159 lines = lines.splitlines(True) |
| 160 for line in lines[:-1]: |
| 161 self.write(line.rstrip('\r\n')) |
| 162 self.write(self._NL) |
| 163 laststripped = lines[-1].rstrip('\r\n') |
| 164 self.write(laststripped) |
| 165 if len(lines[-1]) != len(laststripped): |
| 166 self.write(self._NL) |
| 167 |
| 168 def _write(self, msg): |
| 169 # We can't write the headers yet because of the following scenario: |
| 170 # say a multipart message includes the boundary string somewhere in |
| 171 # its body. We'd have to calculate the new boundary /before/ we write |
| 172 # the headers so that we can write the correct Content-Type: |
| 173 # parameter. |
| 174 # |
| 175 # The way we do this, so as to make the _handle_*() methods simpler, |
| 176 # is to cache any subpart writes into a buffer. The we write the |
| 177 # headers and the buffer contents. That way, subpart handlers can |
| 178 # Do The Right Thing, and can still modify the Content-Type: header if |
| 179 # necessary. |
| 180 oldfp = self._fp |
| 181 try: |
| 182 self._fp = sfp = self._new_buffer() |
| 183 self._dispatch(msg) |
| 184 finally: |
| 185 self._fp = oldfp |
| 186 # Write the headers. First we see if the message object wants to |
| 187 # handle that itself. If not, we'll do it generically. |
| 188 meth = getattr(msg, '_write_headers', None) |
| 189 if meth is None: |
| 190 self._write_headers(msg) |
| 191 else: |
| 192 meth(self) |
| 193 self._fp.write(sfp.getvalue()) |
| 194 |
| 195 def _dispatch(self, msg): |
| 196 # Get the Content-Type: for the message, then try to dispatch to |
| 197 # self._handle_<maintype>_<subtype>(). If there's no handler for the |
| 198 # full MIME type, then dispatch to self._handle_<maintype>(). If |
| 199 # that's missing too, then dispatch to self._writeBody(). |
| 200 main = msg.get_content_maintype() |
| 201 sub = msg.get_content_subtype() |
| 202 specific = UNDERSCORE.join((main, sub)).replace('-', '_') |
| 203 meth = getattr(self, '_handle_' + specific, None) |
| 204 if meth is None: |
| 205 generic = main.replace('-', '_') |
| 206 meth = getattr(self, '_handle_' + generic, None) |
| 207 if meth is None: |
| 208 meth = self._writeBody |
| 209 meth(msg) |
| 210 |
| 211 # |
| 212 # Default handlers |
| 213 # |
| 214 |
| 215 def _write_headers(self, msg): |
| 216 for h, v in msg.raw_items(): |
| 217 self.write(self.policy.fold(h, v)) |
| 218 # A blank line always separates headers from body |
| 219 self.write(self._NL) |
| 220 |
| 221 # |
| 222 # Handlers for writing types and subtypes |
| 223 # |
| 224 |
| 225 def _handle_text(self, msg): |
| 226 payload = msg.get_payload() |
| 227 if payload is None: |
| 228 return |
| 229 if not isinstance(payload, str): |
| 230 raise TypeError('string payload expected: %s' % type(payload)) |
| 231 if _has_surrogates(msg._payload): |
| 232 charset = msg.get_param('charset') |
| 233 if charset is not None: |
| 234 del msg['content-transfer-encoding'] |
| 235 msg.set_payload(payload, charset) |
| 236 payload = msg.get_payload() |
| 237 if self._mangle_from_: |
| 238 payload = fcre.sub('>From ', payload) |
| 239 self._write_lines(payload) |
| 240 |
| 241 # Default body handler |
| 242 _writeBody = _handle_text |
| 243 |
| 244 def _handle_multipart(self, msg): |
| 245 # The trick here is to write out each part separately, merge them all |
| 246 # together, and then make sure that the boundary we've chosen isn't |
| 247 # present in the payload. |
| 248 msgtexts = [] |
| 249 subparts = msg.get_payload() |
| 250 if subparts is None: |
| 251 subparts = [] |
| 252 elif isinstance(subparts, str): |
| 253 # e.g. a non-strict parse of a message with no starting boundary. |
| 254 self.write(subparts) |
| 255 return |
| 256 elif not isinstance(subparts, list): |
| 257 # Scalar payload |
| 258 subparts = [subparts] |
| 259 for part in subparts: |
| 260 s = self._new_buffer() |
| 261 g = self.clone(s) |
| 262 g.flatten(part, unixfrom=False, linesep=self._NL) |
| 263 msgtexts.append(s.getvalue()) |
| 264 # BAW: What about boundaries that are wrapped in double-quotes? |
| 265 boundary = msg.get_boundary() |
| 266 if not boundary: |
| 267 # Create a boundary that doesn't appear in any of the |
| 268 # message texts. |
| 269 alltext = self._encoded_NL.join(msgtexts) |
| 270 boundary = self._make_boundary(alltext) |
| 271 msg.set_boundary(boundary) |
| 272 # If there's a preamble, write it out, with a trailing CRLF |
| 273 if msg.preamble is not None: |
| 274 if self._mangle_from_: |
| 275 preamble = fcre.sub('>From ', msg.preamble) |
| 276 else: |
| 277 preamble = msg.preamble |
| 278 self._write_lines(preamble) |
| 279 self.write(self._NL) |
| 280 # dash-boundary transport-padding CRLF |
| 281 self.write('--' + boundary + self._NL) |
| 282 # body-part |
| 283 if msgtexts: |
| 284 self._fp.write(msgtexts.pop(0)) |
| 285 # *encapsulation |
| 286 # --> delimiter transport-padding |
| 287 # --> CRLF body-part |
| 288 for body_part in msgtexts: |
| 289 # delimiter transport-padding CRLF |
| 290 self.write(self._NL + '--' + boundary + self._NL) |
| 291 # body-part |
| 292 self._fp.write(body_part) |
| 293 # close-delimiter transport-padding |
| 294 self.write(self._NL + '--' + boundary + '--') |
| 295 if msg.epilogue is not None: |
| 296 self.write(self._NL) |
| 297 if self._mangle_from_: |
| 298 epilogue = fcre.sub('>From ', msg.epilogue) |
| 299 else: |
| 300 epilogue = msg.epilogue |
| 301 self._write_lines(epilogue) |
| 302 |
| 303 def _handle_multipart_signed(self, msg): |
| 304 # The contents of signed parts has to stay unmodified in order to keep |
| 305 # the signature intact per RFC1847 2.1, so we disable header wrapping. |
| 306 # RDM: This isn't enough to completely preserve the part, but it helps. |
| 307 p = self.policy |
| 308 self.policy = p.clone(max_line_length=0) |
| 309 try: |
| 310 self._handle_multipart(msg) |
| 311 finally: |
| 312 self.policy = p |
| 313 |
| 314 def _handle_message_delivery_status(self, msg): |
| 315 # We can't just write the headers directly to self's file object |
| 316 # because this will leave an extra newline between the last header |
| 317 # block and the boundary. Sigh. |
| 318 blocks = [] |
| 319 for part in msg.get_payload(): |
| 320 s = self._new_buffer() |
| 321 g = self.clone(s) |
| 322 g.flatten(part, unixfrom=False, linesep=self._NL) |
| 323 text = s.getvalue() |
| 324 lines = text.split(self._encoded_NL) |
| 325 # Strip off the unnecessary trailing empty line |
| 326 if lines and lines[-1] == self._encoded_EMPTY: |
| 327 blocks.append(self._encoded_NL.join(lines[:-1])) |
| 328 else: |
| 329 blocks.append(text) |
| 330 # Now join all the blocks with an empty line. This has the lovely |
| 331 # effect of separating each block with an empty line, but not adding |
| 332 # an extra one after the last one. |
| 333 self._fp.write(self._encoded_NL.join(blocks)) |
| 334 |
| 335 def _handle_message(self, msg): |
| 336 s = self._new_buffer() |
| 337 g = self.clone(s) |
| 338 # The payload of a message/rfc822 part should be a multipart sequence |
| 339 # of length 1. The zeroth element of the list should be the Message |
| 340 # object for the subpart. Extract that object, stringify it, and |
| 341 # write it out. |
| 342 # Except, it turns out, when it's a string instead, which happens when |
| 343 # and only when HeaderParser is used on a message of mime type |
| 344 # message/rfc822. Such messages are generated by, for example, |
| 345 # Groupwise when forwarding unadorned messages. (Issue 7970.) So |
| 346 # in that case we just emit the string body. |
| 347 payload = msg._payload |
| 348 if isinstance(payload, list): |
| 349 g.flatten(msg.get_payload(0), unixfrom=False, linesep=self._NL) |
| 350 payload = s.getvalue() |
| 351 else: |
| 352 payload = self._encode(payload) |
| 353 self._fp.write(payload) |
| 354 |
| 355 # This used to be a module level function; we use a classmethod for this |
| 356 # and _compile_re so we can continue to provide the module level function |
| 357 # for backward compatibility by doing |
| 358 # _make_boudary = Generator._make_boundary |
| 359 # at the end of the module. It *is* internal, so we could drop that... |
| 360 @classmethod |
| 361 def _make_boundary(cls, text=None): |
| 362 # Craft a random boundary. If text is given, ensure that the chosen |
| 363 # boundary doesn't appear in the text. |
| 364 token = random.randrange(sys.maxsize) |
| 365 boundary = ('=' * 15) + (_fmt % token) + '==' |
| 366 if text is None: |
| 367 return boundary |
| 368 b = boundary |
| 369 counter = 0 |
| 370 while True: |
| 371 cre = cls._compile_re('^--' + re.escape(b) + '(--)?$', re.MULTILINE) |
| 372 if not cre.search(text): |
| 373 break |
| 374 b = boundary + '.' + str(counter) |
| 375 counter += 1 |
| 376 return b |
| 377 |
| 378 @classmethod |
| 379 def _compile_re(cls, s, flags): |
| 380 return re.compile(s, flags) |
| 381 |
| 382 class BytesGenerator(Generator): |
| 383 """Generates a bytes version of a Message object tree. |
| 384 |
| 385 Functionally identical to the base Generator except that the output is |
| 386 bytes and not string. When surrogates were used in the input to encode |
| 387 bytes, these are decoded back to bytes for output. If the policy has |
| 388 cte_type set to 7bit, then the message is transformed such that the |
| 389 non-ASCII bytes are properly content transfer encoded, using the charset |
| 390 unknown-8bit. |
| 391 |
| 392 The outfp object must accept bytes in its write method. |
| 393 """ |
| 394 |
| 395 # Bytes versions of this constant for use in manipulating data from |
| 396 # the BytesIO buffer. |
| 397 _encoded_EMPTY = b'' |
| 398 |
| 399 def write(self, s): |
| 400 self._fp.write(str(s).encode('ascii', 'surrogateescape')) |
| 401 |
| 402 def _new_buffer(self): |
| 403 return BytesIO() |
| 404 |
| 405 def _encode(self, s): |
| 406 return s.encode('ascii') |
| 407 |
| 408 def _write_headers(self, msg): |
| 409 # This is almost the same as the string version, except for handling |
| 410 # strings with 8bit bytes. |
| 411 for h, v in msg.raw_items(): |
| 412 self._fp.write(self.policy.fold_binary(h, v)) |
| 413 # A blank line always separates headers from body |
| 414 self.write(self._NL) |
| 415 |
| 416 def _handle_text(self, msg): |
| 417 # If the string has surrogates the original source was bytes, so |
| 418 # just write it back out. |
| 419 if msg._payload is None: |
| 420 return |
| 421 if _has_surrogates(msg._payload) and not self.policy.cte_type=='7bit': |
| 422 if self._mangle_from_: |
| 423 msg._payload = fcre.sub(">From ", msg._payload) |
| 424 self._write_lines(msg._payload) |
| 425 else: |
| 426 super(BytesGenerator,self)._handle_text(msg) |
| 427 |
| 428 # Default body handler |
| 429 _writeBody = _handle_text |
| 430 |
| 431 @classmethod |
| 432 def _compile_re(cls, s, flags): |
| 433 return re.compile(s.encode('ascii'), flags) |
| 434 |
| 435 |
| 436 _FMT = '[Non-text (%(type)s) part of message omitted, filename %(filename)s]' |
| 437 |
| 438 class DecodedGenerator(Generator): |
| 439 """Generates a text representation of a message. |
| 440 |
| 441 Like the Generator base class, except that non-text parts are substituted |
| 442 with a format string representing the part. |
| 443 """ |
| 444 def __init__(self, outfp, mangle_from_=True, maxheaderlen=78, fmt=None): |
| 445 """Like Generator.__init__() except that an additional optional |
| 446 argument is allowed. |
| 447 |
| 448 Walks through all subparts of a message. If the subpart is of main |
| 449 type `text', then it prints the decoded payload of the subpart. |
| 450 |
| 451 Otherwise, fmt is a format string that is used instead of the message |
| 452 payload. fmt is expanded with the following keywords (in |
| 453 %(keyword)s format): |
| 454 |
| 455 type : Full MIME type of the non-text part |
| 456 maintype : Main MIME type of the non-text part |
| 457 subtype : Sub-MIME type of the non-text part |
| 458 filename : Filename of the non-text part |
| 459 description: Description associated with the non-text part |
| 460 encoding : Content transfer encoding of the non-text part |
| 461 |
| 462 The default value for fmt is None, meaning |
| 463 |
| 464 [Non-text (%(type)s) part of message omitted, filename %(filename)s] |
| 465 """ |
| 466 Generator.__init__(self, outfp, mangle_from_, maxheaderlen) |
| 467 if fmt is None: |
| 468 self._fmt = _FMT |
| 469 else: |
| 470 self._fmt = fmt |
| 471 |
| 472 def _dispatch(self, msg): |
| 473 for part in msg.walk(): |
| 474 maintype = part.get_content_maintype() |
| 475 if maintype == 'text': |
| 476 print(part.get_payload(decode=False), file=self) |
| 477 elif maintype == 'multipart': |
| 478 # Just skip this |
| 479 pass |
| 480 else: |
| 481 print(self._fmt % { |
| 482 'type' : part.get_content_type(), |
| 483 'maintype' : part.get_content_maintype(), |
| 484 'subtype' : part.get_content_subtype(), |
| 485 'filename' : part.get_filename('[no filename]'), |
| 486 'description': part.get('Content-Description', |
| 487 '[no description]'), |
| 488 'encoding' : part.get('Content-Transfer-Encoding', |
| 489 '[no encoding]'), |
| 490 }, file=self) |
| 491 |
| 492 |
| 493 # Helper used by Generator._make_boundary |
| 494 _width = len(repr(sys.maxsize-1)) |
| 495 _fmt = '%%0%dd' % _width |
| 496 |
| 497 # Backward compatibility |
| 498 _make_boundary = Generator._make_boundary |
OLD | NEW |