OLD | NEW |
(Empty) | |
| 1 """Representing and manipulating email headers via custom objects. |
| 2 |
| 3 This module provides an implementation of the HeaderRegistry API. |
| 4 The implementation is designed to flexibly follow RFC5322 rules. |
| 5 |
| 6 Eventually HeaderRegistry will be a public API, but it isn't yet, |
| 7 and will probably change some before that happens. |
| 8 |
| 9 """ |
| 10 from __future__ import unicode_literals |
| 11 from __future__ import division |
| 12 from __future__ import absolute_import |
| 13 |
| 14 from future.builtins import super |
| 15 from future.builtins import str |
| 16 from future.utils import text_to_native_str |
| 17 from future.backports.email import utils |
| 18 from future.backports.email import errors |
| 19 from future.backports.email import _header_value_parser as parser |
| 20 |
| 21 class Address(object): |
| 22 |
| 23 def __init__(self, display_name='', username='', domain='', addr_spec=None): |
| 24 """Create an object represeting a full email address. |
| 25 |
| 26 An address can have a 'display_name', a 'username', and a 'domain'. In |
| 27 addition to specifying the username and domain separately, they may be |
| 28 specified together by using the addr_spec keyword *instead of* the |
| 29 username and domain keywords. If an addr_spec string is specified it |
| 30 must be properly quoted according to RFC 5322 rules; an error will be |
| 31 raised if it is not. |
| 32 |
| 33 An Address object has display_name, username, domain, and addr_spec |
| 34 attributes, all of which are read-only. The addr_spec and the string |
| 35 value of the object are both quoted according to RFC5322 rules, but |
| 36 without any Content Transfer Encoding. |
| 37 |
| 38 """ |
| 39 # This clause with its potential 'raise' may only happen when an |
| 40 # application program creates an Address object using an addr_spec |
| 41 # keyword. The email library code itself must always supply username |
| 42 # and domain. |
| 43 if addr_spec is not None: |
| 44 if username or domain: |
| 45 raise TypeError("addrspec specified when username and/or " |
| 46 "domain also specified") |
| 47 a_s, rest = parser.get_addr_spec(addr_spec) |
| 48 if rest: |
| 49 raise ValueError("Invalid addr_spec; only '{}' " |
| 50 "could be parsed from '{}'".format( |
| 51 a_s, addr_spec)) |
| 52 if a_s.all_defects: |
| 53 raise a_s.all_defects[0] |
| 54 username = a_s.local_part |
| 55 domain = a_s.domain |
| 56 self._display_name = display_name |
| 57 self._username = username |
| 58 self._domain = domain |
| 59 |
| 60 @property |
| 61 def display_name(self): |
| 62 return self._display_name |
| 63 |
| 64 @property |
| 65 def username(self): |
| 66 return self._username |
| 67 |
| 68 @property |
| 69 def domain(self): |
| 70 return self._domain |
| 71 |
| 72 @property |
| 73 def addr_spec(self): |
| 74 """The addr_spec (username@domain) portion of the address, quoted |
| 75 according to RFC 5322 rules, but with no Content Transfer Encoding. |
| 76 """ |
| 77 nameset = set(self.username) |
| 78 if len(nameset) > len(nameset-parser.DOT_ATOM_ENDS): |
| 79 lp = parser.quote_string(self.username) |
| 80 else: |
| 81 lp = self.username |
| 82 if self.domain: |
| 83 return lp + '@' + self.domain |
| 84 if not lp: |
| 85 return '<>' |
| 86 return lp |
| 87 |
| 88 def __repr__(self): |
| 89 return "Address(display_name={!r}, username={!r}, domain={!r})".format( |
| 90 self.display_name, self.username, self.domain) |
| 91 |
| 92 def __str__(self): |
| 93 nameset = set(self.display_name) |
| 94 if len(nameset) > len(nameset-parser.SPECIALS): |
| 95 disp = parser.quote_string(self.display_name) |
| 96 else: |
| 97 disp = self.display_name |
| 98 if disp: |
| 99 addr_spec = '' if self.addr_spec=='<>' else self.addr_spec |
| 100 return "{} <{}>".format(disp, addr_spec) |
| 101 return self.addr_spec |
| 102 |
| 103 def __eq__(self, other): |
| 104 if type(other) != type(self): |
| 105 return False |
| 106 return (self.display_name == other.display_name and |
| 107 self.username == other.username and |
| 108 self.domain == other.domain) |
| 109 |
| 110 |
| 111 class Group(object): |
| 112 |
| 113 def __init__(self, display_name=None, addresses=None): |
| 114 """Create an object representing an address group. |
| 115 |
| 116 An address group consists of a display_name followed by colon and an |
| 117 list of addresses (see Address) terminated by a semi-colon. The Group |
| 118 is created by specifying a display_name and a possibly empty list of |
| 119 Address objects. A Group can also be used to represent a single |
| 120 address that is not in a group, which is convenient when manipulating |
| 121 lists that are a combination of Groups and individual Addresses. In |
| 122 this case the display_name should be set to None. In particular, the |
| 123 string representation of a Group whose display_name is None is the same |
| 124 as the Address object, if there is one and only one Address object in |
| 125 the addresses list. |
| 126 |
| 127 """ |
| 128 self._display_name = display_name |
| 129 self._addresses = tuple(addresses) if addresses else tuple() |
| 130 |
| 131 @property |
| 132 def display_name(self): |
| 133 return self._display_name |
| 134 |
| 135 @property |
| 136 def addresses(self): |
| 137 return self._addresses |
| 138 |
| 139 def __repr__(self): |
| 140 return "Group(display_name={!r}, addresses={!r}".format( |
| 141 self.display_name, self.addresses) |
| 142 |
| 143 def __str__(self): |
| 144 if self.display_name is None and len(self.addresses)==1: |
| 145 return str(self.addresses[0]) |
| 146 disp = self.display_name |
| 147 if disp is not None: |
| 148 nameset = set(disp) |
| 149 if len(nameset) > len(nameset-parser.SPECIALS): |
| 150 disp = parser.quote_string(disp) |
| 151 adrstr = ", ".join(str(x) for x in self.addresses) |
| 152 adrstr = ' ' + adrstr if adrstr else adrstr |
| 153 return "{}:{};".format(disp, adrstr) |
| 154 |
| 155 def __eq__(self, other): |
| 156 if type(other) != type(self): |
| 157 return False |
| 158 return (self.display_name == other.display_name and |
| 159 self.addresses == other.addresses) |
| 160 |
| 161 |
| 162 # Header Classes # |
| 163 |
| 164 class BaseHeader(str): |
| 165 |
| 166 """Base class for message headers. |
| 167 |
| 168 Implements generic behavior and provides tools for subclasses. |
| 169 |
| 170 A subclass must define a classmethod named 'parse' that takes an unfolded |
| 171 value string and a dictionary as its arguments. The dictionary will |
| 172 contain one key, 'defects', initialized to an empty list. After the call |
| 173 the dictionary must contain two additional keys: parse_tree, set to the |
| 174 parse tree obtained from parsing the header, and 'decoded', set to the |
| 175 string value of the idealized representation of the data from the value. |
| 176 (That is, encoded words are decoded, and values that have canonical |
| 177 representations are so represented.) |
| 178 |
| 179 The defects key is intended to collect parsing defects, which the message |
| 180 parser will subsequently dispose of as appropriate. The parser should not, |
| 181 insofar as practical, raise any errors. Defects should be added to the |
| 182 list instead. The standard header parsers register defects for RFC |
| 183 compliance issues, for obsolete RFC syntax, and for unrecoverable parsing |
| 184 errors. |
| 185 |
| 186 The parse method may add additional keys to the dictionary. In this case |
| 187 the subclass must define an 'init' method, which will be passed the |
| 188 dictionary as its keyword arguments. The method should use (usually by |
| 189 setting them as the value of similarly named attributes) and remove all the |
| 190 extra keys added by its parse method, and then use super to call its parent |
| 191 class with the remaining arguments and keywords. |
| 192 |
| 193 The subclass should also make sure that a 'max_count' attribute is defined |
| 194 that is either None or 1. XXX: need to better define this API. |
| 195 |
| 196 """ |
| 197 |
| 198 def __new__(cls, name, value): |
| 199 kwds = {'defects': []} |
| 200 cls.parse(value, kwds) |
| 201 if utils._has_surrogates(kwds['decoded']): |
| 202 kwds['decoded'] = utils._sanitize(kwds['decoded']) |
| 203 self = str.__new__(cls, kwds['decoded']) |
| 204 # del kwds['decoded'] |
| 205 self.init(name, **kwds) |
| 206 return self |
| 207 |
| 208 def init(self, name, **_3to2kwargs): |
| 209 defects = _3to2kwargs['defects']; del _3to2kwargs['defects'] |
| 210 parse_tree = _3to2kwargs['parse_tree']; del _3to2kwargs['parse_tree'] |
| 211 self._name = name |
| 212 self._parse_tree = parse_tree |
| 213 self._defects = defects |
| 214 |
| 215 @property |
| 216 def name(self): |
| 217 return self._name |
| 218 |
| 219 @property |
| 220 def defects(self): |
| 221 return tuple(self._defects) |
| 222 |
| 223 def __reduce__(self): |
| 224 return ( |
| 225 _reconstruct_header, |
| 226 ( |
| 227 self.__class__.__name__, |
| 228 self.__class__.__bases__, |
| 229 str(self), |
| 230 ), |
| 231 self.__dict__) |
| 232 |
| 233 @classmethod |
| 234 def _reconstruct(cls, value): |
| 235 return str.__new__(cls, value) |
| 236 |
| 237 def fold(self, **_3to2kwargs): |
| 238 policy = _3to2kwargs['policy']; del _3to2kwargs['policy'] |
| 239 """Fold header according to policy. |
| 240 |
| 241 The parsed representation of the header is folded according to |
| 242 RFC5322 rules, as modified by the policy. If the parse tree |
| 243 contains surrogateescaped bytes, the bytes are CTE encoded using |
| 244 the charset 'unknown-8bit". |
| 245 |
| 246 Any non-ASCII characters in the parse tree are CTE encoded using |
| 247 charset utf-8. XXX: make this a policy setting. |
| 248 |
| 249 The returned value is an ASCII-only string possibly containing linesep |
| 250 characters, and ending with a linesep character. The string includes |
| 251 the header name and the ': ' separator. |
| 252 |
| 253 """ |
| 254 # At some point we need to only put fws here if it was in the source. |
| 255 header = parser.Header([ |
| 256 parser.HeaderLabel([ |
| 257 parser.ValueTerminal(self.name, 'header-name'), |
| 258 parser.ValueTerminal(':', 'header-sep')]), |
| 259 parser.CFWSList([parser.WhiteSpaceTerminal(' ', 'fws')]), |
| 260 self._parse_tree]) |
| 261 return header.fold(policy=policy) |
| 262 |
| 263 |
| 264 def _reconstruct_header(cls_name, bases, value): |
| 265 return type(text_to_native_str(cls_name), bases, {})._reconstruct(value) |
| 266 |
| 267 |
| 268 class UnstructuredHeader(object): |
| 269 |
| 270 max_count = None |
| 271 value_parser = staticmethod(parser.get_unstructured) |
| 272 |
| 273 @classmethod |
| 274 def parse(cls, value, kwds): |
| 275 kwds['parse_tree'] = cls.value_parser(value) |
| 276 kwds['decoded'] = str(kwds['parse_tree']) |
| 277 |
| 278 |
| 279 class UniqueUnstructuredHeader(UnstructuredHeader): |
| 280 |
| 281 max_count = 1 |
| 282 |
| 283 |
| 284 class DateHeader(object): |
| 285 |
| 286 """Header whose value consists of a single timestamp. |
| 287 |
| 288 Provides an additional attribute, datetime, which is either an aware |
| 289 datetime using a timezone, or a naive datetime if the timezone |
| 290 in the input string is -0000. Also accepts a datetime as input. |
| 291 The 'value' attribute is the normalized form of the timestamp, |
| 292 which means it is the output of format_datetime on the datetime. |
| 293 """ |
| 294 |
| 295 max_count = None |
| 296 |
| 297 # This is used only for folding, not for creating 'decoded'. |
| 298 value_parser = staticmethod(parser.get_unstructured) |
| 299 |
| 300 @classmethod |
| 301 def parse(cls, value, kwds): |
| 302 if not value: |
| 303 kwds['defects'].append(errors.HeaderMissingRequiredValue()) |
| 304 kwds['datetime'] = None |
| 305 kwds['decoded'] = '' |
| 306 kwds['parse_tree'] = parser.TokenList() |
| 307 return |
| 308 if isinstance(value, str): |
| 309 value = utils.parsedate_to_datetime(value) |
| 310 kwds['datetime'] = value |
| 311 kwds['decoded'] = utils.format_datetime(kwds['datetime']) |
| 312 kwds['parse_tree'] = cls.value_parser(kwds['decoded']) |
| 313 |
| 314 def init(self, *args, **kw): |
| 315 self._datetime = kw.pop('datetime') |
| 316 super().init(*args, **kw) |
| 317 |
| 318 @property |
| 319 def datetime(self): |
| 320 return self._datetime |
| 321 |
| 322 |
| 323 class UniqueDateHeader(DateHeader): |
| 324 |
| 325 max_count = 1 |
| 326 |
| 327 |
| 328 class AddressHeader(object): |
| 329 |
| 330 max_count = None |
| 331 |
| 332 @staticmethod |
| 333 def value_parser(value): |
| 334 address_list, value = parser.get_address_list(value) |
| 335 assert not value, 'this should not happen' |
| 336 return address_list |
| 337 |
| 338 @classmethod |
| 339 def parse(cls, value, kwds): |
| 340 if isinstance(value, str): |
| 341 # We are translating here from the RFC language (address/mailbox) |
| 342 # to our API language (group/address). |
| 343 kwds['parse_tree'] = address_list = cls.value_parser(value) |
| 344 groups = [] |
| 345 for addr in address_list.addresses: |
| 346 groups.append(Group(addr.display_name, |
| 347 [Address(mb.display_name or '', |
| 348 mb.local_part or '', |
| 349 mb.domain or '') |
| 350 for mb in addr.all_mailboxes])) |
| 351 defects = list(address_list.all_defects) |
| 352 else: |
| 353 # Assume it is Address/Group stuff |
| 354 if not hasattr(value, '__iter__'): |
| 355 value = [value] |
| 356 groups = [Group(None, [item]) if not hasattr(item, 'addresses') |
| 357 else item |
| 358 for item in value] |
| 359 defects = [] |
| 360 kwds['groups'] = groups |
| 361 kwds['defects'] = defects |
| 362 kwds['decoded'] = ', '.join([str(item) for item in groups]) |
| 363 if 'parse_tree' not in kwds: |
| 364 kwds['parse_tree'] = cls.value_parser(kwds['decoded']) |
| 365 |
| 366 def init(self, *args, **kw): |
| 367 self._groups = tuple(kw.pop('groups')) |
| 368 self._addresses = None |
| 369 super().init(*args, **kw) |
| 370 |
| 371 @property |
| 372 def groups(self): |
| 373 return self._groups |
| 374 |
| 375 @property |
| 376 def addresses(self): |
| 377 if self._addresses is None: |
| 378 self._addresses = tuple([address for group in self._groups |
| 379 for address in group.addresses]) |
| 380 return self._addresses |
| 381 |
| 382 |
| 383 class UniqueAddressHeader(AddressHeader): |
| 384 |
| 385 max_count = 1 |
| 386 |
| 387 |
| 388 class SingleAddressHeader(AddressHeader): |
| 389 |
| 390 @property |
| 391 def address(self): |
| 392 if len(self.addresses)!=1: |
| 393 raise ValueError(("value of single address header {} is not " |
| 394 "a single address").format(self.name)) |
| 395 return self.addresses[0] |
| 396 |
| 397 |
| 398 class UniqueSingleAddressHeader(SingleAddressHeader): |
| 399 |
| 400 max_count = 1 |
| 401 |
| 402 |
| 403 class MIMEVersionHeader(object): |
| 404 |
| 405 max_count = 1 |
| 406 |
| 407 value_parser = staticmethod(parser.parse_mime_version) |
| 408 |
| 409 @classmethod |
| 410 def parse(cls, value, kwds): |
| 411 kwds['parse_tree'] = parse_tree = cls.value_parser(value) |
| 412 kwds['decoded'] = str(parse_tree) |
| 413 kwds['defects'].extend(parse_tree.all_defects) |
| 414 kwds['major'] = None if parse_tree.minor is None else parse_tree.major |
| 415 kwds['minor'] = parse_tree.minor |
| 416 if parse_tree.minor is not None: |
| 417 kwds['version'] = '{}.{}'.format(kwds['major'], kwds['minor']) |
| 418 else: |
| 419 kwds['version'] = None |
| 420 |
| 421 def init(self, *args, **kw): |
| 422 self._version = kw.pop('version') |
| 423 self._major = kw.pop('major') |
| 424 self._minor = kw.pop('minor') |
| 425 super().init(*args, **kw) |
| 426 |
| 427 @property |
| 428 def major(self): |
| 429 return self._major |
| 430 |
| 431 @property |
| 432 def minor(self): |
| 433 return self._minor |
| 434 |
| 435 @property |
| 436 def version(self): |
| 437 return self._version |
| 438 |
| 439 |
| 440 class ParameterizedMIMEHeader(object): |
| 441 |
| 442 # Mixin that handles the params dict. Must be subclassed and |
| 443 # a property value_parser for the specific header provided. |
| 444 |
| 445 max_count = 1 |
| 446 |
| 447 @classmethod |
| 448 def parse(cls, value, kwds): |
| 449 kwds['parse_tree'] = parse_tree = cls.value_parser(value) |
| 450 kwds['decoded'] = str(parse_tree) |
| 451 kwds['defects'].extend(parse_tree.all_defects) |
| 452 if parse_tree.params is None: |
| 453 kwds['params'] = {} |
| 454 else: |
| 455 # The MIME RFCs specify that parameter ordering is arbitrary. |
| 456 kwds['params'] = dict((utils._sanitize(name).lower(), |
| 457 utils._sanitize(value)) |
| 458 for name, value in parse_tree.params) |
| 459 |
| 460 def init(self, *args, **kw): |
| 461 self._params = kw.pop('params') |
| 462 super().init(*args, **kw) |
| 463 |
| 464 @property |
| 465 def params(self): |
| 466 return self._params.copy() |
| 467 |
| 468 |
| 469 class ContentTypeHeader(ParameterizedMIMEHeader): |
| 470 |
| 471 value_parser = staticmethod(parser.parse_content_type_header) |
| 472 |
| 473 def init(self, *args, **kw): |
| 474 super().init(*args, **kw) |
| 475 self._maintype = utils._sanitize(self._parse_tree.maintype) |
| 476 self._subtype = utils._sanitize(self._parse_tree.subtype) |
| 477 |
| 478 @property |
| 479 def maintype(self): |
| 480 return self._maintype |
| 481 |
| 482 @property |
| 483 def subtype(self): |
| 484 return self._subtype |
| 485 |
| 486 @property |
| 487 def content_type(self): |
| 488 return self.maintype + '/' + self.subtype |
| 489 |
| 490 |
| 491 class ContentDispositionHeader(ParameterizedMIMEHeader): |
| 492 |
| 493 value_parser = staticmethod(parser.parse_content_disposition_header) |
| 494 |
| 495 def init(self, *args, **kw): |
| 496 super().init(*args, **kw) |
| 497 cd = self._parse_tree.content_disposition |
| 498 self._content_disposition = cd if cd is None else utils._sanitize(cd) |
| 499 |
| 500 @property |
| 501 def content_disposition(self): |
| 502 return self._content_disposition |
| 503 |
| 504 |
| 505 class ContentTransferEncodingHeader(object): |
| 506 |
| 507 max_count = 1 |
| 508 |
| 509 value_parser = staticmethod(parser.parse_content_transfer_encoding_header) |
| 510 |
| 511 @classmethod |
| 512 def parse(cls, value, kwds): |
| 513 kwds['parse_tree'] = parse_tree = cls.value_parser(value) |
| 514 kwds['decoded'] = str(parse_tree) |
| 515 kwds['defects'].extend(parse_tree.all_defects) |
| 516 |
| 517 def init(self, *args, **kw): |
| 518 super().init(*args, **kw) |
| 519 self._cte = utils._sanitize(self._parse_tree.cte) |
| 520 |
| 521 @property |
| 522 def cte(self): |
| 523 return self._cte |
| 524 |
| 525 |
| 526 # The header factory # |
| 527 |
| 528 _default_header_map = { |
| 529 'subject': UniqueUnstructuredHeader, |
| 530 'date': UniqueDateHeader, |
| 531 'resent-date': DateHeader, |
| 532 'orig-date': UniqueDateHeader, |
| 533 'sender': UniqueSingleAddressHeader, |
| 534 'resent-sender': SingleAddressHeader, |
| 535 'to': UniqueAddressHeader, |
| 536 'resent-to': AddressHeader, |
| 537 'cc': UniqueAddressHeader, |
| 538 'resent-cc': AddressHeader, |
| 539 'bcc': UniqueAddressHeader, |
| 540 'resent-bcc': AddressHeader, |
| 541 'from': UniqueAddressHeader, |
| 542 'resent-from': AddressHeader, |
| 543 'reply-to': UniqueAddressHeader, |
| 544 'mime-version': MIMEVersionHeader, |
| 545 'content-type': ContentTypeHeader, |
| 546 'content-disposition': ContentDispositionHeader, |
| 547 'content-transfer-encoding': ContentTransferEncodingHeader, |
| 548 } |
| 549 |
| 550 class HeaderRegistry(object): |
| 551 |
| 552 """A header_factory and header registry.""" |
| 553 |
| 554 def __init__(self, base_class=BaseHeader, default_class=UnstructuredHeader, |
| 555 use_default_map=True): |
| 556 """Create a header_factory that works with the Policy API. |
| 557 |
| 558 base_class is the class that will be the last class in the created |
| 559 header class's __bases__ list. default_class is the class that will be |
| 560 used if "name" (see __call__) does not appear in the registry. |
| 561 use_default_map controls whether or not the default mapping of names to |
| 562 specialized classes is copied in to the registry when the factory is |
| 563 created. The default is True. |
| 564 |
| 565 """ |
| 566 self.registry = {} |
| 567 self.base_class = base_class |
| 568 self.default_class = default_class |
| 569 if use_default_map: |
| 570 self.registry.update(_default_header_map) |
| 571 |
| 572 def map_to_type(self, name, cls): |
| 573 """Register cls as the specialized class for handling "name" headers. |
| 574 |
| 575 """ |
| 576 self.registry[name.lower()] = cls |
| 577 |
| 578 def __getitem__(self, name): |
| 579 cls = self.registry.get(name.lower(), self.default_class) |
| 580 return type(text_to_native_str('_'+cls.__name__), (cls, self.base_class)
, {}) |
| 581 |
| 582 def __call__(self, name, value): |
| 583 """Create a header instance for header 'name' from 'value'. |
| 584 |
| 585 Creates a header instance by creating a specialized class for parsing |
| 586 and representing the specified header by combining the factory |
| 587 base_class with a specialized class from the registry or the |
| 588 default_class, and passing the name and value to the constructed |
| 589 class's constructor. |
| 590 |
| 591 """ |
| 592 return self[name](name, value) |
OLD | NEW |