OLD | NEW |
1 # copyright 2003-2011 LOGILAB S.A. (Paris, FRANCE), all rights reserved. | 1 # copyright 2003-2012 LOGILAB S.A. (Paris, FRANCE), all rights reserved. |
2 # contact http://www.logilab.fr/ -- mailto:contact@logilab.fr | 2 # contact http://www.logilab.fr/ -- mailto:contact@logilab.fr |
3 # | 3 # |
4 # This file is part of logilab-common. | 4 # This file is part of logilab-common. |
5 # | 5 # |
6 # logilab-common is free software: you can redistribute it and/or modify it unde
r | 6 # logilab-common is free software: you can redistribute it and/or modify it unde
r |
7 # the terms of the GNU Lesser General Public License as published by the Free | 7 # the terms of the GNU Lesser General Public License as published by the Free |
8 # Software Foundation, either version 2.1 of the License, or (at your option) an
y | 8 # Software Foundation, either version 2.1 of the License, or (at your option) an
y |
9 # later version. | 9 # later version. |
10 # | 10 # |
11 # logilab-common is distributed in the hope that it will be useful, but WITHOUT | 11 # logilab-common is distributed in the hope that it will be useful, but WITHOUT |
12 # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS | 12 # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
13 # FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more | 13 # FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more |
14 # details. | 14 # details. |
15 # | 15 # |
16 # You should have received a copy of the GNU Lesser General Public License along | 16 # You should have received a copy of the GNU Lesser General Public License along |
17 # with logilab-common. If not, see <http://www.gnu.org/licenses/>. | 17 # with logilab-common. If not, see <http://www.gnu.org/licenses/>. |
18 """Unicode email support (extends email from stdlib). | 18 """Unicode email support (extends email from stdlib)""" |
19 | 19 |
20 | |
21 | |
22 | |
23 """ | |
24 __docformat__ = "restructuredtext en" | 20 __docformat__ = "restructuredtext en" |
25 | 21 |
26 import email | 22 import email |
27 from encodings import search_function | 23 from encodings import search_function |
28 import sys | 24 import sys |
29 if sys.version_info >= (2, 5): | 25 if sys.version_info >= (2, 5): |
30 from email.utils import parseaddr, parsedate | 26 from email.utils import parseaddr, parsedate |
31 from email.header import decode_header | 27 from email.header import decode_header |
32 else: | 28 else: |
33 from email.Utils import parseaddr, parsedate | 29 from email.Utils import parseaddr, parsedate |
34 from email.Header import decode_header | 30 from email.Header import decode_header |
35 | 31 |
36 from datetime import datetime | 32 from datetime import datetime |
37 | 33 |
38 try: | 34 try: |
39 from mx.DateTime import DateTime | 35 from mx.DateTime import DateTime |
40 except ImportError: | 36 except ImportError: |
41 DateTime = datetime | 37 DateTime = datetime |
42 | 38 |
43 import logilab.common as lgc | 39 import logilab.common as lgc |
44 | 40 |
45 | 41 |
46 def decode_QP(string): | 42 def decode_QP(string): |
47 parts = [] | 43 parts = [] |
48 for decoded, charset in decode_header(string): | 44 for decoded, charset in decode_header(string): |
49 if not charset : | 45 if not charset : |
50 charset = 'iso-8859-15' | 46 charset = 'iso-8859-15' |
51 parts.append(unicode(decoded, charset, 'replace')) | 47 parts.append(decoded.decode(charset, 'replace')) |
52 | 48 |
53 return u' '.join(parts) | 49 if sys.version_info < (3, 3): |
| 50 # decoding was non-RFC compliant wrt to whitespace handling |
| 51 # see http://bugs.python.org/issue1079 |
| 52 return u' '.join(parts) |
| 53 return u''.join(parts) |
54 | 54 |
55 def message_from_file(fd): | 55 def message_from_file(fd): |
56 try: | 56 try: |
57 return UMessage(email.message_from_file(fd)) | 57 return UMessage(email.message_from_file(fd)) |
58 except email.Errors.MessageParseError: | 58 except email.Errors.MessageParseError: |
59 return '' | 59 return '' |
60 | 60 |
61 def message_from_string(string): | 61 def message_from_string(string): |
62 try: | 62 try: |
63 return UMessage(email.message_from_string(string)) | 63 return UMessage(email.message_from_string(string)) |
64 except email.Errors.MessageParseError: | 64 except email.Errors.MessageParseError: |
65 return '' | 65 return '' |
66 | 66 |
67 class UMessage: | 67 class UMessage: |
68 """Encapsulates an email.Message instance and returns only unicode objects. | 68 """Encapsulates an email.Message instance and returns only unicode objects. |
69 """ | 69 """ |
70 | 70 |
71 def __init__(self, message): | 71 def __init__(self, message): |
72 self.message = message | 72 self.message = message |
73 | 73 |
74 # email.Message interface ################################################# | 74 # email.Message interface ################################################# |
75 | 75 |
76 def get(self, header, default=None): | 76 def get(self, header, default=None): |
77 value = self.message.get(header, default) | 77 value = self.message.get(header, default) |
78 if value: | 78 if value: |
79 return decode_QP(value) | 79 return decode_QP(value) |
80 return value | 80 return value |
81 | 81 |
| 82 def __getitem__(self, header): |
| 83 return self.get(header) |
| 84 |
82 def get_all(self, header, default=()): | 85 def get_all(self, header, default=()): |
83 return [decode_QP(val) for val in self.message.get_all(header, default) | 86 return [decode_QP(val) for val in self.message.get_all(header, default) |
84 if val is not None] | 87 if val is not None] |
85 | 88 |
86 def get_payload(self, index=None, decode=False): | |
87 message = self.message | |
88 if index is None: | |
89 payload = message.get_payload(index, decode) | |
90 if isinstance(payload, list): | |
91 return [UMessage(msg) for msg in payload] | |
92 if message.get_content_maintype() != 'text': | |
93 return payload | |
94 | |
95 charset = message.get_content_charset() or 'iso-8859-1' | |
96 if search_function(charset) is None: | |
97 charset = 'iso-8859-1' | |
98 return unicode(payload or '', charset, "replace") | |
99 else: | |
100 payload = UMessage(message.get_payload(index, decode)) | |
101 return payload | |
102 | |
103 def is_multipart(self): | 89 def is_multipart(self): |
104 return self.message.is_multipart() | 90 return self.message.is_multipart() |
105 | 91 |
106 def get_boundary(self): | 92 def get_boundary(self): |
107 return self.message.get_boundary() | 93 return self.message.get_boundary() |
108 | 94 |
109 def walk(self): | 95 def walk(self): |
110 for part in self.message.walk(): | 96 for part in self.message.walk(): |
111 yield UMessage(part) | 97 yield UMessage(part) |
112 | 98 |
113 def get_content_maintype(self): | 99 if sys.version_info < (3, 0): |
114 return unicode(self.message.get_content_maintype()) | |
115 | 100 |
116 def get_content_type(self): | 101 def get_payload(self, index=None, decode=False): |
117 return unicode(self.message.get_content_type()) | 102 message = self.message |
| 103 if index is None: |
| 104 payload = message.get_payload(index, decode) |
| 105 if isinstance(payload, list): |
| 106 return [UMessage(msg) for msg in payload] |
| 107 if message.get_content_maintype() != 'text': |
| 108 return payload |
118 | 109 |
119 def get_filename(self, failobj=None): | 110 charset = message.get_content_charset() or 'iso-8859-1' |
120 value = self.message.get_filename(failobj) | 111 if search_function(charset) is None: |
121 if value is failobj: | 112 charset = 'iso-8859-1' |
122 return value | 113 return unicode(payload or '', charset, "replace") |
123 try: | 114 else: |
124 return unicode(value) | 115 payload = UMessage(message.get_payload(index, decode)) |
125 except UnicodeDecodeError: | 116 return payload |
126 return u'error decoding filename' | 117 |
| 118 def get_content_maintype(self): |
| 119 return unicode(self.message.get_content_maintype()) |
| 120 |
| 121 def get_content_type(self): |
| 122 return unicode(self.message.get_content_type()) |
| 123 |
| 124 def get_filename(self, failobj=None): |
| 125 value = self.message.get_filename(failobj) |
| 126 if value is failobj: |
| 127 return value |
| 128 try: |
| 129 return unicode(value) |
| 130 except UnicodeDecodeError: |
| 131 return u'error decoding filename' |
| 132 |
| 133 else: |
| 134 |
| 135 def get_payload(self, index=None, decode=False): |
| 136 message = self.message |
| 137 if index is None: |
| 138 payload = message.get_payload(index, decode) |
| 139 if isinstance(payload, list): |
| 140 return [UMessage(msg) for msg in payload] |
| 141 return payload |
| 142 else: |
| 143 payload = UMessage(message.get_payload(index, decode)) |
| 144 return payload |
| 145 |
| 146 def get_content_maintype(self): |
| 147 return self.message.get_content_maintype() |
| 148 |
| 149 def get_content_type(self): |
| 150 return self.message.get_content_type() |
| 151 |
| 152 def get_filename(self, failobj=None): |
| 153 return self.message.get_filename(failobj) |
127 | 154 |
128 # other convenience methods ############################################### | 155 # other convenience methods ############################################### |
129 | 156 |
130 def headers(self): | 157 def headers(self): |
131 """return an unicode string containing all the message's headers""" | 158 """return an unicode string containing all the message's headers""" |
132 values = [] | 159 values = [] |
133 for header in self.message.keys(): | 160 for header in self.message.keys(): |
134 values.append(u'%s: %s' % (header, self.get(header))) | 161 values.append(u'%s: %s' % (header, self.get(header))) |
135 return '\n'.join(values) | 162 return '\n'.join(values) |
136 | 163 |
(...skipping 21 matching lines...) Expand all Loading... |
158 pass | 185 pass |
159 if value is not None: | 186 if value is not None: |
160 datetuple = parsedate(value) | 187 datetuple = parsedate(value) |
161 if datetuple: | 188 if datetuple: |
162 if lgc.USE_MX_DATETIME: | 189 if lgc.USE_MX_DATETIME: |
163 return DateTime(*datetuple[:6]) | 190 return DateTime(*datetuple[:6]) |
164 return datetime(*datetuple[:6]) | 191 return datetime(*datetuple[:6]) |
165 elif not return_str: | 192 elif not return_str: |
166 return None | 193 return None |
167 return value | 194 return value |
OLD | NEW |